diff --git a/Cargo.lock b/Cargo.lock index 54ebb4ae5..f15930a1e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -481,6 +481,15 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + [[package]] name = "micro_http" version = "0.1.0" @@ -1204,6 +1213,7 @@ name = "virtio-queue" version = "0.1.0" dependencies = [ "log", + "memoffset", "vm-memory", "vmm-sys-util", ] diff --git a/block_util/src/lib.rs b/block_util/src/lib.rs index 76f069195..0611137e2 100644 --- a/block_util/src/lib.rs +++ b/block_util/src/lib.rs @@ -36,7 +36,7 @@ use virtio_bindings::bindings::virtio_blk::*; use virtio_queue::DescriptorChain; use vm_memory::{ bitmap::AtomicBitmap, bitmap::Bitmap, ByteValued, Bytes, GuestAddress, GuestMemory, - GuestMemoryAtomic, GuestMemoryError, + GuestMemoryError, GuestMemoryLoadGuard, }; use vmm_sys_util::eventfd::EventFd; @@ -176,7 +176,7 @@ pub struct Request { impl Request { pub fn parse( - desc_chain: &mut DescriptorChain>, + desc_chain: &mut DescriptorChain>, ) -> result::Result { let hdr_desc = desc_chain .next() diff --git a/fuzz/fuzz_targets/block.rs b/fuzz/fuzz_targets/block.rs index 2c7d1c4b6..7773a6ee6 100644 --- a/fuzz/fuzz_targets/block.rs +++ b/fuzz/fuzz_targets/block.rs @@ -79,7 +79,7 @@ fuzz_target!(|bytes| { let mut q = Queue::< GuestMemoryAtomic, - QueueState>, + QueueState, >::new(guest_memory.clone(), QUEUE_SIZE); q.state.ready = true; q.state.size = QUEUE_SIZE / 2; diff --git a/virtio-devices/src/iommu.rs b/virtio-devices/src/iommu.rs index 1b8ef1c16..d91a927af 100644 --- a/virtio-devices/src/iommu.rs +++ b/virtio-devices/src/iommu.rs @@ -25,7 +25,10 @@ use versionize::{VersionMap, Versionize, VersionizeResult}; use versionize_derive::Versionize; use virtio_queue::{AccessPlatform, DescriptorChain, Queue}; use vm_device::dma_mapping::ExternalDmaMapping; -use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemoryAtomic, GuestMemoryError}; +use vm_memory::{ + Address, ByteValued, Bytes, GuestAddress, GuestMemoryAtomic, GuestMemoryError, + GuestMemoryLoadGuard, +}; use vm_migration::VersionMapped; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use vmm_sys_util::eventfd::EventFd; @@ -339,7 +342,7 @@ impl Request { // is created based on the information provided from the guest driver for // virtio-iommu (giving the link device_id <=> domain). fn parse( - desc_chain: &mut DescriptorChain>, + desc_chain: &mut DescriptorChain>, mapping: &Arc, ext_mapping: &BTreeMap>, ext_domain_mapping: &mut BTreeMap>, diff --git a/virtio-devices/src/mem.rs b/virtio-devices/src/mem.rs index bd3ee1c3d..6f384114a 100644 --- a/virtio-devices/src/mem.rs +++ b/virtio-devices/src/mem.rs @@ -38,7 +38,7 @@ use virtio_queue::{DescriptorChain, Queue}; use vm_device::dma_mapping::ExternalDmaMapping; use vm_memory::{ Address, ByteValued, Bytes, GuestAddress, GuestMemoryAtomic, GuestMemoryError, - GuestMemoryRegion, + GuestMemoryLoadGuard, GuestMemoryRegion, }; use vm_migration::protocol::MemoryRangeTable; use vm_migration::{ @@ -277,7 +277,7 @@ struct Request { impl Request { fn parse( - desc_chain: &mut DescriptorChain>, + desc_chain: &mut DescriptorChain>, ) -> result::Result { let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; // The descriptor contains the request type which MUST be readable. diff --git a/virtio-devices/src/pmem.rs b/virtio-devices/src/pmem.rs index eb9489fdb..3eb0df170 100644 --- a/virtio-devices/src/pmem.rs +++ b/virtio-devices/src/pmem.rs @@ -28,7 +28,10 @@ use std::sync::{Arc, Barrier}; use versionize::{VersionMap, Versionize, VersionizeResult}; use versionize_derive::Versionize; use virtio_queue::{DescriptorChain, Queue}; -use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemoryAtomic, GuestMemoryError}; +use vm_memory::{ + Address, ByteValued, Bytes, GuestAddress, GuestMemoryAtomic, GuestMemoryError, + GuestMemoryLoadGuard, +}; use vm_migration::VersionMapped; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use vmm_sys_util::eventfd::EventFd; @@ -114,7 +117,7 @@ struct Request { impl Request { fn parse( - desc_chain: &mut DescriptorChain>, + desc_chain: &mut DescriptorChain>, ) -> result::Result { let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; // The descriptor contains the request type which MUST be readable. diff --git a/virtio-devices/src/transport/pci_device.rs b/virtio-devices/src/transport/pci_device.rs index 7f3e2414d..db2b143bd 100644 --- a/virtio-devices/src/transport/pci_device.rs +++ b/virtio-devices/src/transport/pci_device.rs @@ -362,10 +362,11 @@ impl VirtioPciDevice { .queue_max_sizes() .iter() .map(|&s| { - let mut queue = Queue::< - GuestMemoryAtomic, - virtio_queue::QueueState>, - >::new(memory.clone(), s); + let mut queue = + Queue::, virtio_queue::QueueState>::new( + memory.clone(), + s, + ); queue.state.access_platform = access_platform.clone(); queue }) diff --git a/virtio-devices/src/vhost_user/vu_common_ctrl.rs b/virtio-devices/src/vhost_user/vu_common_ctrl.rs index 133cd8cbc..1a128b63c 100644 --- a/virtio-devices/src/vhost_user/vu_common_ctrl.rs +++ b/virtio-devices/src/vhost_user/vu_common_ctrl.rs @@ -173,7 +173,7 @@ impl VhostUserHandle { // at early stage. for (queue_index, queue) in queues.iter().enumerate() { self.vu - .set_vring_num(queue_index, queue.actual_size()) + .set_vring_num(queue_index, queue.max_size()) .map_err(Error::VhostUserSetVringNum)?; } @@ -184,7 +184,7 @@ impl VhostUserHandle { mmap_size: 0, mmap_offset: 0, num_queues: queues.len() as u16, - queue_size: queues[0].actual_size(), + queue_size: queues[0].max_size(), }; let (info, fd) = self .vu @@ -203,11 +203,11 @@ impl VhostUserHandle { let mut vrings_info = Vec::new(); for (queue_index, queue) in queues.into_iter().enumerate() { - let actual_size: usize = queue.actual_size().try_into().unwrap(); + let actual_size: usize = queue.max_size().try_into().unwrap(); let config_data = VringConfigData { queue_max_size: queue.max_size(), - queue_size: queue.actual_size(), + queue_size: queue.max_size(), flags: 0u32, desc_table_addr: get_host_address_range( mem, diff --git a/virtio-devices/src/vsock/packet.rs b/virtio-devices/src/vsock/packet.rs index 17ec59ccd..6f6b97647 100644 --- a/virtio-devices/src/vsock/packet.rs +++ b/virtio-devices/src/vsock/packet.rs @@ -21,7 +21,7 @@ use super::defs; use super::{Result, VsockError}; use crate::{get_host_address_range, GuestMemoryMmap}; use virtio_queue::DescriptorChain; -use vm_memory::GuestMemoryAtomic; +use vm_memory::GuestMemoryLoadGuard; // The vsock packet header is defined by the C struct: // @@ -106,7 +106,7 @@ impl VsockPacket { /// creating the wrapper. /// pub fn from_tx_virtq_head( - desc_chain: &mut DescriptorChain>, + desc_chain: &mut DescriptorChain>, ) -> Result { let head = desc_chain.next().ok_or(VsockError::HdrDescMissing)?; @@ -168,7 +168,7 @@ impl VsockPacket { /// descriptor. Bounds and pointer checks are performed when creating the wrapper. /// pub fn from_rx_virtq_head( - desc_chain: &mut DescriptorChain>, + desc_chain: &mut DescriptorChain>, ) -> Result { let head = desc_chain.next().ok_or(VsockError::HdrDescMissing)?; diff --git a/virtio-queue/Cargo.toml b/virtio-queue/Cargo.toml index 24ec399cd..63300d1d3 100644 --- a/virtio-queue/Cargo.toml +++ b/virtio-queue/Cargo.toml @@ -6,13 +6,14 @@ description = "virtio queue implementation" repository = "https://github.com/rust-vmm/vm-virtio" keywords = ["virtio"] readme = "README.md" -license = "Apache-2.0 OR MIT" +license = "Apache-2.0 OR BSD-3-Clause" edition = "2018" -[features] -test-utils = [] - [dependencies] vm-memory = "0.7.0" vmm-sys-util = ">=0.8.0" -log = ">=0.4.6" \ No newline at end of file +log = ">=0.4.6" + +[dev-dependencies] +vm-memory = { version = "0.7.0", features = ["backend-mmap", "backend-atomic"] } +memoffset = "~0" diff --git a/virtio-queue/src/chain.rs b/virtio-queue/src/chain.rs new file mode 100644 index 000000000..5f5c0a795 --- /dev/null +++ b/virtio-queue/src/chain.rs @@ -0,0 +1,491 @@ +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Copyright © 2019 Intel Corporation +// +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::convert::TryFrom; +use std::fmt::{self, Debug}; +use std::mem::size_of; +use std::ops::Deref; +use std::sync::Arc; + +use vm_memory::{Address, Bytes, GuestAddress, GuestMemory}; + +use crate::defs::VIRTQ_DESCRIPTOR_SIZE; +use crate::{AccessPlatform, Descriptor, Error}; + +/// A virtio descriptor chain. +#[derive(Clone, Debug)] +pub struct DescriptorChain { + mem: M, + desc_table: GuestAddress, + queue_size: u16, + head_index: u16, + next_index: u16, + ttl: u16, + is_indirect: bool, + access_platform: Option>, +} + +impl DescriptorChain +where + M: Deref, + M::Target: GuestMemory, +{ + fn with_ttl( + mem: M, + desc_table: GuestAddress, + queue_size: u16, + ttl: u16, + head_index: u16, + access_platform: Option>, + ) -> Self { + DescriptorChain { + mem, + desc_table, + queue_size, + head_index, + next_index: head_index, + ttl, + is_indirect: false, + access_platform, + } + } + + /// Create a new `DescriptorChain` instance. + /// + /// # Arguments + /// * `mem` - the `GuestMemory` object that can be used to access the buffers pointed to by the + /// descriptor chain. + /// * `desc_table` - the address of the descriptor table. + /// * `queue_size` - the size of the queue, which is also the maximum size of a descriptor + /// chain. + /// * `head_index` - the descriptor index of the chain head. + pub(crate) fn new( + mem: M, + desc_table: GuestAddress, + queue_size: u16, + head_index: u16, + access_platform: Option>, + ) -> Self { + Self::with_ttl( + mem, + desc_table, + queue_size, + queue_size, + head_index, + access_platform, + ) + } + + /// Get the descriptor index of the chain head. + pub fn head_index(&self) -> u16 { + self.head_index + } + + /// Return a `GuestMemory` object that can be used to access the buffers pointed to by the + /// descriptor chain. + pub fn memory(&self) -> &M::Target { + self.mem.deref() + } + + /// Return an iterator that only yields the readable descriptors in the chain. + pub fn readable(self) -> DescriptorChainRwIter { + DescriptorChainRwIter { + chain: self, + writable: false, + } + } + + /// Return an iterator that only yields the writable descriptors in the chain. + pub fn writable(self) -> DescriptorChainRwIter { + DescriptorChainRwIter { + chain: self, + writable: true, + } + } + + // Alters the internal state of the `DescriptorChain` to switch iterating over an + // indirect descriptor table defined by `desc`. + fn switch_to_indirect_table(&mut self, desc: Descriptor) -> Result<(), Error> { + // Check the VIRTQ_DESC_F_INDIRECT flag (i.e., is_indirect) is not set inside + // an indirect descriptor. + // (see VIRTIO Spec, Section 2.6.5.3.1 Driver Requirements: Indirect Descriptors) + if self.is_indirect { + return Err(Error::InvalidIndirectDescriptor); + } + + // Check the target indirect descriptor table is correctly aligned. + if desc.addr().raw_value() & (VIRTQ_DESCRIPTOR_SIZE as u64 - 1) != 0 + || desc.len() & (VIRTQ_DESCRIPTOR_SIZE as u32 - 1) != 0 + { + return Err(Error::InvalidIndirectDescriptorTable); + } + + // It is safe to do a plain division since we checked above that desc.len() is a multiple of + // VIRTQ_DESCRIPTOR_SIZE, and VIRTQ_DESCRIPTOR_SIZE is != 0. + let table_len = (desc.len() as usize) / VIRTQ_DESCRIPTOR_SIZE; + if table_len > usize::from(u16::MAX) { + return Err(Error::InvalidIndirectDescriptorTable); + } + + self.desc_table = desc.addr(); + // try_from cannot fail as we've checked table_len above + self.queue_size = u16::try_from(table_len).expect("invalid table_len"); + self.next_index = 0; + self.ttl = self.queue_size; + self.is_indirect = true; + + Ok(()) + } +} + +impl Iterator for DescriptorChain +where + M: Deref, + M::Target: GuestMemory, +{ + type Item = Descriptor; + + /// Return the next descriptor in this descriptor chain, if there is one. + /// + /// Note that this is distinct from the next descriptor chain returned by + /// [`AvailIter`](struct.AvailIter.html), which is the head of the next + /// _available_ descriptor chain. + fn next(&mut self) -> Option { + if self.ttl == 0 || self.next_index >= self.queue_size { + return None; + } + + let desc_addr = self + .desc_table + // The multiplication can not overflow an u64 since we are multiplying an u16 with a + // small number. + .checked_add(self.next_index as u64 * size_of::() as u64)?; + + // The guest device driver should not touch the descriptor once submitted, so it's safe + // to use read_obj() here. + let mut desc = self.mem.read_obj::(desc_addr).ok()?; + // When needed, it's very important to translate the decriptor address + // before returning the Descriptor to the consumer. + if let Some(access_platform) = &self.access_platform { + desc.set_addr( + access_platform + .translate(desc.addr().0, u64::from(desc.len())) + .ok()?, + ); + } + + if desc.refers_to_indirect_table() { + self.switch_to_indirect_table(desc).ok()?; + return self.next(); + } + + if desc.has_next() { + self.next_index = desc.next(); + // It's ok to decrement `self.ttl` here because we check at the start of the method + // that it's greater than 0. + self.ttl -= 1; + } else { + self.ttl = 0; + } + + Some(desc) + } +} + +/// An iterator for readable or writable descriptors. +#[derive(Clone)] +pub struct DescriptorChainRwIter { + chain: DescriptorChain, + writable: bool, +} + +impl Iterator for DescriptorChainRwIter +where + M: Deref, + M::Target: GuestMemory, +{ + type Item = Descriptor; + + /// Return the next readable/writeable descriptor (depending on the `writable` value) in this + /// descriptor chain, if there is one. + /// + /// Note that this is distinct from the next descriptor chain returned by + /// [`AvailIter`](struct.AvailIter.html), which is the head of the next + /// _available_ descriptor chain. + fn next(&mut self) -> Option { + loop { + match self.chain.next() { + Some(v) => { + if v.is_write_only() == self.writable { + return Some(v); + } + } + None => return None, + } + } + } +} + +// We can't derive Debug, because rustc doesn't generate the `M::T: Debug` constraint +impl Debug for DescriptorChainRwIter +where + M: Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("DescriptorChainRwIter") + .field("chain", &self.chain) + .field("writable", &self.writable) + .finish() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::defs::{VIRTQ_DESC_F_INDIRECT, VIRTQ_DESC_F_NEXT}; + use crate::mock::{DescriptorTable, MockSplitQueue}; + use vm_memory::GuestMemoryMmap; + + #[test] + fn test_checked_new_descriptor_chain() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + assert!(vq.end().0 < 0x1000); + + // index >= queue_size + assert!( + DescriptorChain::<&GuestMemoryMmap>::new(m, vq.start(), 16, 16, None) + .next() + .is_none() + ); + + // desc_table address is way off + assert!(DescriptorChain::<&GuestMemoryMmap>::new( + m, + GuestAddress(0x00ff_ffff_ffff), + 16, + 0, + None + ) + .next() + .is_none()); + + { + // the first desc has a normal len, and the next_descriptor flag is set + // but the the index of the next descriptor is too large + let desc = Descriptor::new(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 16); + vq.desc_table().store(0, desc); + + let mut c = DescriptorChain::<&GuestMemoryMmap>::new(m, vq.start(), 16, 0, None); + c.next().unwrap(); + assert!(c.next().is_none()); + } + + // finally, let's test an ok chain + { + let desc = Descriptor::new(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.desc_table().store(0, desc); + + let desc = Descriptor::new(0x2000, 0x1000, 0, 0); + vq.desc_table().store(1, desc); + + let mut c = DescriptorChain::<&GuestMemoryMmap>::new(m, vq.start(), 16, 0, None); + + assert_eq!( + c.memory() as *const GuestMemoryMmap, + m as *const GuestMemoryMmap + ); + + assert_eq!(c.desc_table, vq.start()); + assert_eq!(c.queue_size, 16); + assert_eq!(c.ttl, c.queue_size); + + let desc = c.next().unwrap(); + assert_eq!(desc.addr(), GuestAddress(0x1000)); + assert_eq!(desc.len(), 0x1000); + assert_eq!(desc.flags(), VIRTQ_DESC_F_NEXT); + assert_eq!(desc.next(), 1); + assert_eq!(c.ttl, c.queue_size - 1); + + assert!(c.next().is_some()); + // The descriptor above was the last from the chain, so `ttl` should be 0 now. + assert_eq!(c.ttl, 0); + assert!(c.next().is_none()); + assert_eq!(c.ttl, 0); + } + } + + #[test] + fn test_ttl_wrap_around() { + const QUEUE_SIZE: u16 = 16; + + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x100000)]).unwrap(); + let vq = MockSplitQueue::new(m, QUEUE_SIZE); + + // Populate the entire descriptor table with entries. Only the last one should not have the + // VIRTQ_DESC_F_NEXT set. + for i in 0..QUEUE_SIZE - 1 { + let desc = Descriptor::new(0x1000 * (i + 1) as u64, 0x1000, VIRTQ_DESC_F_NEXT, i + 1); + vq.desc_table().store(i, desc); + } + let desc = Descriptor::new((0x1000 * 16) as u64, 0x1000, 0, 0); + vq.desc_table().store(QUEUE_SIZE - 1, desc); + + let mut c = DescriptorChain::<&GuestMemoryMmap>::new(m, vq.start(), QUEUE_SIZE, 0, None); + assert_eq!(c.ttl, c.queue_size); + + // Validate that `ttl` wraps around even when the entire descriptor table is populated. + for i in 0..QUEUE_SIZE { + let _desc = c.next().unwrap(); + assert_eq!(c.ttl, c.queue_size - i - 1); + } + assert!(c.next().is_none()); + } + + #[test] + fn test_new_from_indirect_descriptor() { + // This is testing that chaining an indirect table works as expected. It is also a negative + // test for the following requirement from the spec: + // `A driver MUST NOT set both VIRTQ_DESC_F_INDIRECT and VIRTQ_DESC_F_NEXT in flags.`. In + // case the driver is setting both of these flags, we check that the device doesn't panic. + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + let dtable = vq.desc_table(); + + // Create a chain with one normal descriptor and one pointing to an indirect table. + let desc = Descriptor::new(0x6000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + dtable.store(0, desc); + // The spec forbids setting both VIRTQ_DESC_F_INDIRECT and VIRTQ_DESC_F_NEXT in flags. We do + // not currently enforce this rule, we just ignore the VIRTQ_DESC_F_NEXT flag. + let desc = Descriptor::new(0x7000, 0x1000, VIRTQ_DESC_F_INDIRECT | VIRTQ_DESC_F_NEXT, 2); + dtable.store(1, desc); + let desc = Descriptor::new(0x8000, 0x1000, 0, 0); + dtable.store(2, desc); + + let mut c: DescriptorChain<&GuestMemoryMmap> = + DescriptorChain::new(m, vq.start(), 16, 0, None); + + // create an indirect table with 4 chained descriptors + let idtable = DescriptorTable::new(m, GuestAddress(0x7000), 4); + for i in 0..4u16 { + let desc: Descriptor; + if i < 3 { + desc = Descriptor::new(0x1000 * i as u64, 0x1000, VIRTQ_DESC_F_NEXT, i + 1); + } else { + desc = Descriptor::new(0x1000 * i as u64, 0x1000, 0, 0); + } + idtable.store(i, desc); + } + + assert_eq!(c.head_index(), 0); + // Consume the first descriptor. + c.next().unwrap(); + + // The chain logic hasn't parsed the indirect descriptor yet. + assert!(!c.is_indirect); + + // Try to iterate through the indirect descriptor chain. + for i in 0..4 { + let desc = c.next().unwrap(); + assert!(c.is_indirect); + if i < 3 { + assert_eq!(desc.flags(), VIRTQ_DESC_F_NEXT); + assert_eq!(desc.next(), i + 1); + } + } + // Even though we added a new descriptor after the one that is pointing to the indirect + // table, this descriptor won't be available when parsing the chain. + assert!(c.next().is_none()); + } + + #[test] + fn test_indirect_descriptor_err() { + // We are testing here different misconfigurations of the indirect table. For these error + // case scenarios, the iterator over the descriptor chain won't return a new descriptor. + { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + // Create a chain with a descriptor pointing to an invalid indirect table: addr not a + // multiple of descriptor size. + let desc = Descriptor::new(0x1001, 0x1000, VIRTQ_DESC_F_INDIRECT, 0); + vq.desc_table().store(0, desc); + + let mut c: DescriptorChain<&GuestMemoryMmap> = + DescriptorChain::new(m, vq.start(), 16, 0, None); + + assert!(c.next().is_none()); + } + + { + let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + // Create a chain with a descriptor pointing to an invalid indirect table: len not a + // multiple of descriptor size. + let desc = Descriptor::new(0x1000, 0x1001, VIRTQ_DESC_F_INDIRECT, 0); + vq.desc_table().store(0, desc); + + let mut c: DescriptorChain<&GuestMemoryMmap> = + DescriptorChain::new(m, vq.start(), 16, 0, None); + + assert!(c.next().is_none()); + } + + { + let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + // Create a chain with a descriptor pointing to an invalid indirect table: table len > + // u16::MAX. + let desc = Descriptor::new( + 0x1000, + (u16::MAX as u32 + 1) * VIRTQ_DESCRIPTOR_SIZE as u32, + VIRTQ_DESC_F_INDIRECT, + 0, + ); + vq.desc_table().store(0, desc); + + let mut c: DescriptorChain<&GuestMemoryMmap> = + DescriptorChain::new(m, vq.start(), 16, 0, None); + + assert!(c.next().is_none()); + } + + { + let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + // Create a chain with a descriptor pointing to an indirect table. + let desc = Descriptor::new(0x1000, 0x1000, VIRTQ_DESC_F_INDIRECT, 0); + vq.desc_table().store(0, desc); + // It's ok for an indirect descriptor to have flags = 0. + let desc = Descriptor::new(0x3000, 0x1000, 0, 0); + m.write_obj(desc, GuestAddress(0x1000)).unwrap(); + + let mut c: DescriptorChain<&GuestMemoryMmap> = + DescriptorChain::new(m, vq.start(), 16, 0, None); + assert!(c.next().is_some()); + + // But it's not allowed to have an indirect descriptor that points to another indirect + // table. + let desc = Descriptor::new(0x3000, 0x1000, VIRTQ_DESC_F_INDIRECT, 0); + m.write_obj(desc, GuestAddress(0x1000)).unwrap(); + + let mut c: DescriptorChain<&GuestMemoryMmap> = + DescriptorChain::new(m, vq.start(), 16, 0, None); + + assert!(c.next().is_none()); + } + } +} diff --git a/virtio-queue/src/defs.rs b/virtio-queue/src/defs.rs index d47563f38..1d5acbe20 100644 --- a/virtio-queue/src/defs.rs +++ b/virtio-queue/src/defs.rs @@ -10,37 +10,50 @@ pub const VIRTQ_DESC_F_NEXT: u16 = 0x1; /// Marks a buffer as device write-only. pub const VIRTQ_DESC_F_WRITE: u16 = 0x2; -/// Shows that the buffer contains a list of buffer descriptors. +/// Marks a buffer as containing a list of buffer descriptors. pub const VIRTQ_DESC_F_INDIRECT: u16 = 0x4; -/// Used flags +/// Flag to disable guest notification for used descriptors. pub const VIRTQ_USED_F_NO_NOTIFY: u16 = 0x1; -/// This is the size of one element in the used ring, id (le32) + len (le32). +/// Size of one element in the used ring, id (le32) + len (le32). pub(crate) const VIRTQ_USED_ELEMENT_SIZE: u64 = 8; -/// Used ring header: flags (u16) + idx (u16) +/// Size of used ring header: flags (u16) + idx (u16) pub(crate) const VIRTQ_USED_RING_HEADER_SIZE: u64 = 4; -/// This is the size of the used ring metadata: header + avail_event (le16). +/// Size of the used ring metadata: header + avail_event (le16). +/// /// The total size of the used ring is: -/// VIRTQ_USED_RING_HMETA_SIZE + VIRTQ_USED_ELEMENT_SIZE * queue_size +/// VIRTQ_USED_RING_META_SIZE + VIRTQ_USED_ELEMENT_SIZE * queue_size. pub(crate) const VIRTQ_USED_RING_META_SIZE: u64 = VIRTQ_USED_RING_HEADER_SIZE + 2; -/// This is the size of one element in the available ring (le16). +/// Size of one element in the available ring (le16). pub(crate) const VIRTQ_AVAIL_ELEMENT_SIZE: u64 = 2; -/// Avail ring header: flags(u16) + idx(u16) +/// Size of available ring header: flags(u16) + idx(u16) pub(crate) const VIRTQ_AVAIL_RING_HEADER_SIZE: u64 = 4; -/// This is the size of the available ring metadata: header + used_event (le16). +/// Size of the available ring metadata: header + used_event (le16). +/// /// The total size of the available ring is: -/// VIRTQ_AVAIL_RING_META_SIZE + VIRTQ_AVAIL_ELEMENT_SIZE * queue_size +/// VIRTQ_AVAIL_RING_META_SIZE + VIRTQ_AVAIL_ELEMENT_SIZE * queue_size. pub(crate) const VIRTQ_AVAIL_RING_META_SIZE: u64 = VIRTQ_AVAIL_RING_HEADER_SIZE + 2; +/// Size of virtio descriptor. +/// /// The Virtio Spec 1.0 defines the alignment of VirtIO descriptor is 16 bytes, /// which fulfills the explicit constraint of GuestMemory::read_obj(). pub(crate) const VIRTQ_DESCRIPTOR_SIZE: usize = 16; +/// Default guest physical address for descriptor table. +pub(crate) const DEFAULT_DESC_TABLE_ADDR: u64 = 0x0; + +/// Default guest physical address for available ring. +pub(crate) const DEFAULT_AVAIL_RING_ADDR: u64 = 0x0; + +/// Default guest physical address for used ring. +pub(crate) const DEFAULT_USED_RING_ADDR: u64 = 0x0; + /// Vector value used to disable MSI for a queue. pub const VIRTQ_MSI_NO_VECTOR: u16 = 0xffff; diff --git a/virtio-queue/src/descriptor.rs b/virtio-queue/src/descriptor.rs new file mode 100644 index 000000000..e7f7a293f --- /dev/null +++ b/virtio-queue/src/descriptor.rs @@ -0,0 +1,270 @@ +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Copyright © 2019 Intel Corporation +// +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use vm_memory::{ByteValued, GuestAddress, Le16, Le32, Le64}; + +use crate::defs::{VIRTQ_DESC_F_INDIRECT, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; + +/// A virtio descriptor constraints with C representation. +/// +/// # Example +/// +/// ```rust +/// # use virtio_queue::defs::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; +/// # use virtio_queue::mock::MockSplitQueue; +/// use virtio_queue::{Descriptor, Queue}; +/// use vm_memory::{GuestAddress, GuestMemoryMmap}; +/// +/// # fn populate_queue(m: &GuestMemoryMmap) -> Queue<&GuestMemoryMmap> { +/// # let vq = MockSplitQueue::new(m, 16); +/// # let mut q = vq.create_queue(m); +/// # +/// # // We have only one chain: (0, 1). +/// # let desc = Descriptor::new(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); +/// # vq.desc_table().store(0, desc); +/// # let desc = Descriptor::new(0x2000, 0x1000, VIRTQ_DESC_F_WRITE, 0); +/// # vq.desc_table().store(1, desc); +/// # +/// # vq.avail().ring().ref_at(0).store(u16::to_le(0)); +/// # vq.avail().idx().store(u16::to_le(1)); +/// # q +/// # } +/// let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); +/// // Populate the queue with descriptor chains and update the available ring accordingly. +/// let mut queue = populate_queue(m); +/// let mut i = queue.iter().unwrap(); +/// let mut c = i.next().unwrap(); +/// +/// // Get the first descriptor and access its fields. +/// let desc = c.next().unwrap(); +/// let _addr = desc.addr(); +/// let _len = desc.len(); +/// let _flags = desc.flags(); +/// let _next = desc.next(); +/// let _is_write_only = desc.is_write_only(); +/// let _has_next = desc.has_next(); +/// let _refers_to_ind_table = desc.refers_to_indirect_table(); +/// ``` +// Note that the `ByteValued` implementation of this structure expects the `Descriptor` to store +// only plain old data types. +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +pub struct Descriptor { + /// Guest physical address of device specific data. + addr: Le64, + + /// Length of device specific data. + len: Le32, + + /// Includes next, write, and indirect bits. + flags: Le16, + + /// Index into the descriptor table of the next descriptor if flags has the `next` bit set. + next: Le16, +} + +#[allow(clippy::len_without_is_empty)] +impl Descriptor { + /// Return the guest physical address of the descriptor buffer. + pub fn addr(&self) -> GuestAddress { + GuestAddress(self.addr.into()) + } + + /// Return the length of the descriptor buffer. + pub fn len(&self) -> u32 { + self.len.into() + } + + /// Return the flags for this descriptor, including next, write and indirect bits. + pub fn flags(&self) -> u16 { + self.flags.into() + } + + /// Return the value stored in the `next` field of the descriptor. + pub fn next(&self) -> u16 { + self.next.into() + } + + /// Check whether this descriptor refers to a buffer containing an indirect descriptor table. + pub fn refers_to_indirect_table(&self) -> bool { + self.flags() & VIRTQ_DESC_F_INDIRECT != 0 + } + + /// Check whether the `VIRTQ_DESC_F_NEXT` is set for the descriptor. + pub fn has_next(&self) -> bool { + self.flags() & VIRTQ_DESC_F_NEXT != 0 + } + + /// Check if the driver designated this as a write only descriptor. + /// + /// If this is false, this descriptor is read only. + /// Write only means the the emulated device can write and the driver can read. + pub fn is_write_only(&self) -> bool { + self.flags() & VIRTQ_DESC_F_WRITE != 0 + } + + /// Set the guest physical address of the descriptor buffer. + pub fn set_addr(&mut self, addr: u64) { + self.addr = addr.into(); + } +} + +impl Descriptor { + /// Create a new descriptor. + /// + /// # Arguments + /// * `addr` - the guest physical address of the descriptor buffer. + /// * `len` - the length of the descriptor buffer. + /// * `flags` - the `flags` for the descriptor. + /// * `next` - the `next` field of the descriptor. + pub fn new(addr: u64, len: u32, flags: u16, next: u16) -> Self { + Descriptor { + addr: addr.into(), + len: len.into(), + flags: flags.into(), + next: next.into(), + } + } + + /// Set the length of the descriptor buffer. + pub fn set_len(&mut self, len: u32) { + self.len = len.into(); + } + + /// Set the flags for this descriptor. + pub fn set_flags(&mut self, flags: u16) { + self.flags = flags.into(); + } + + /// Set the value stored in the `next` field of the descriptor. + pub fn set_next(&mut self, next: u16) { + self.next = next.into(); + } +} + +// This is safe because `Descriptor` contains only wrappers over POD types and all accesses through +// safe `vm-memory` API will validate any garbage that could be included in there. +unsafe impl ByteValued for Descriptor {} + +/// Represents the contents of an element from the used virtqueue ring. +// Note that the `ByteValued` implementation of this structure expects the `VirtqUsedElem` to store +// only plain old data types. +#[repr(C)] +#[derive(Clone, Copy, Default, Debug)] +pub struct VirtqUsedElem { + id: Le32, + len: Le32, +} + +impl VirtqUsedElem { + /// Create a new `VirtqUsedElem` instance. + /// + /// # Arguments + /// * `id` - the index of the used descriptor chain. + /// * `len` - the total length of the descriptor chain which was used (written to). + pub(crate) fn new(id: u32, len: u32) -> Self { + VirtqUsedElem { + id: id.into(), + len: len.into(), + } + } +} + +#[allow(clippy::len_without_is_empty)] +impl VirtqUsedElem { + /// Get the index of the used descriptor chain. + pub fn id(&self) -> u32 { + self.id.into() + } + + /// Get `length` field of the used ring entry. + pub fn len(&self) -> u32 { + self.len.into() + } +} + +// This is safe because `VirtqUsedElem` contains only wrappers over POD types and all accesses +// through safe `vm-memory` API will validate any garbage that could be included in there. +unsafe impl ByteValued for VirtqUsedElem {} + +#[cfg(test)] +mod tests { + use super::*; + use memoffset::offset_of; + use std::mem::{align_of, size_of}; + + #[test] + fn test_descriptor_offset() { + assert_eq!(size_of::(), 16); + assert_eq!(offset_of!(Descriptor, addr), 0); + assert_eq!(offset_of!(Descriptor, len), 8); + assert_eq!(offset_of!(Descriptor, flags), 12); + assert_eq!(offset_of!(Descriptor, next), 14); + assert!(align_of::() <= 16); + } + + #[test] + fn test_descriptor_getter_setter() { + let mut desc = Descriptor::new(0, 0, 0, 0); + + desc.set_addr(0x1000); + assert_eq!(desc.addr(), GuestAddress(0x1000)); + desc.set_len(0x2000); + assert_eq!(desc.len(), 0x2000); + desc.set_flags(VIRTQ_DESC_F_NEXT); + assert_eq!(desc.flags(), VIRTQ_DESC_F_NEXT); + assert!(desc.has_next()); + assert!(!desc.is_write_only()); + assert!(!desc.refers_to_indirect_table()); + desc.set_flags(VIRTQ_DESC_F_WRITE); + assert_eq!(desc.flags(), VIRTQ_DESC_F_WRITE); + assert!(!desc.has_next()); + assert!(desc.is_write_only()); + assert!(!desc.refers_to_indirect_table()); + desc.set_flags(VIRTQ_DESC_F_INDIRECT); + assert_eq!(desc.flags(), VIRTQ_DESC_F_INDIRECT); + assert!(!desc.has_next()); + assert!(!desc.is_write_only()); + assert!(desc.refers_to_indirect_table()); + desc.set_next(3); + assert_eq!(desc.next(), 3); + } + + #[test] + fn test_descriptor_copy() { + let e1 = Descriptor::new(1, 2, VIRTQ_DESC_F_NEXT, 3); + let mut e2 = Descriptor::default(); + + e2.as_mut_slice().copy_from_slice(e1.as_slice()); + assert_eq!(e1.addr(), e2.addr()); + assert_eq!(e1.len(), e2.len()); + assert_eq!(e1.flags(), e2.flags()); + assert_eq!(e1.next(), e2.next()); + } + + #[test] + fn test_used_elem_offset() { + assert_eq!(offset_of!(VirtqUsedElem, id), 0); + assert_eq!(offset_of!(VirtqUsedElem, len), 4); + assert_eq!(size_of::(), 8); + } + + #[test] + fn test_used_elem_copy() { + let e1 = VirtqUsedElem::new(3, 15); + let mut e2 = VirtqUsedElem::new(0, 0); + + e2.as_mut_slice().copy_from_slice(e1.as_slice()); + assert_eq!(e1.id, e2.id); + assert_eq!(e1.len, e2.len); + } +} diff --git a/virtio-queue/src/iterator.rs b/virtio-queue/src/iterator.rs new file mode 100644 index 000000000..45fb7cf6a --- /dev/null +++ b/virtio-queue/src/iterator.rs @@ -0,0 +1,323 @@ +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Copyright © 2019 Intel Corporation +// +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::num::Wrapping; +use std::ops::Deref; +use std::sync::atomic::Ordering; +use std::sync::Arc; + +use vm_memory::{Address, Bytes, GuestAddress, GuestMemory}; + +use crate::defs::{VIRTQ_AVAIL_ELEMENT_SIZE, VIRTQ_AVAIL_RING_HEADER_SIZE}; +use crate::{error, AccessPlatform, DescriptorChain, QueueState}; + +/// Consuming iterator over all available descriptor chain heads in the queue. +/// +/// # Example +/// +/// ```rust +/// # use virtio_queue::defs::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; +/// # use virtio_queue::mock::MockSplitQueue; +/// use virtio_queue::{Descriptor, Queue}; +/// use vm_memory::{GuestAddress, GuestMemoryMmap}; +/// +/// # fn populate_queue(m: &GuestMemoryMmap) -> Queue<&GuestMemoryMmap> { +/// # let vq = MockSplitQueue::new(m, 16); +/// # let mut q = vq.create_queue(m); +/// # +/// # // The chains are (0, 1), (2, 3, 4) and (5, 6). +/// # for i in 0..7 { +/// # let flags = match i { +/// # 1 | 6 => 0, +/// # 2 | 5 => VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, +/// # 4 => VIRTQ_DESC_F_WRITE, +/// # _ => VIRTQ_DESC_F_NEXT, +/// # }; +/// # +/// # let desc = Descriptor::new((0x1000 * (i + 1)) as u64, 0x1000, flags, i + 1); +/// # vq.desc_table().store(i, desc); +/// # } +/// # +/// # vq.avail().ring().ref_at(0).store(u16::to_le(0)); +/// # vq.avail().ring().ref_at(1).store(u16::to_le(2)); +/// # vq.avail().ring().ref_at(2).store(u16::to_le(5)); +/// # vq.avail().idx().store(u16::to_le(3)); +/// # q +/// # } +/// let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); +/// // Populate the queue with descriptor chains and update the available ring accordingly. +/// let mut queue = populate_queue(m); +/// let mut i = queue.iter().unwrap(); +/// +/// { +/// let mut c = i.next().unwrap(); +/// let _first_head_index = c.head_index(); +/// // We should have two descriptors in the first chain. +/// let _desc1 = c.next().unwrap(); +/// let _desc2 = c.next().unwrap(); +/// } +/// +/// { +/// let c = i.next().unwrap(); +/// let _second_head_index = c.head_index(); +/// +/// let mut iter = c.writable(); +/// // We should have two writable descriptors in the second chain. +/// let _desc1 = iter.next().unwrap(); +/// let _desc2 = iter.next().unwrap(); +/// } +/// +/// { +/// let c = i.next().unwrap(); +/// let _third_head_index = c.head_index(); +/// +/// let mut iter = c.readable(); +/// // We should have one readable descriptor in the third chain. +/// let _desc1 = iter.next().unwrap(); +/// } +/// // Let's go back one position in the available ring. +/// i.go_to_previous_position(); +/// // We should be able to access again the third descriptor chain. +/// let c = i.next().unwrap(); +/// let _third_head_index = c.head_index(); +/// ``` +#[derive(Debug)] +pub struct AvailIter<'b, M> { + mem: M, + desc_table: GuestAddress, + avail_ring: GuestAddress, + queue_size: u16, + last_index: Wrapping, + next_avail: &'b mut Wrapping, + access_platform: &'b Option>, +} + +impl<'b, M> AvailIter<'b, M> +where + M: Deref, + M::Target: GuestMemory + Sized, +{ + /// Create a new instance of `AvailInter`. + /// + /// # Arguments + /// * `mem` - the `GuestMemory` object that can be used to access the queue buffers. + /// * `idx` - the index of the available ring entry where the driver would put the next + /// available descriptor chain. + /// * `state` - the `QueueState` object from which the needed data to create the `AvailIter` can + /// be retrieved. + pub(crate) fn new(mem: M, idx: Wrapping, state: &'b mut QueueState) -> Self { + AvailIter { + mem, + desc_table: state.desc_table, + avail_ring: state.avail_ring, + queue_size: state.size, + last_index: idx, + next_avail: &mut state.next_avail, + access_platform: &state.access_platform, + } + } + + /// Goes back one position in the available descriptor chain offered by the driver. + /// + /// Rust does not support bidirectional iterators. This is the only way to revert the effect + /// of an iterator increment on the queue. + /// + /// Note: this method assumes there's only one thread manipulating the queue, so it should only + /// be invoked in single-threaded context. + pub fn go_to_previous_position(&mut self) { + *self.next_avail -= Wrapping(1); + } +} + +impl<'b, M> Iterator for AvailIter<'b, M> +where + M: Clone + Deref, + M::Target: GuestMemory, +{ + type Item = DescriptorChain; + + fn next(&mut self) -> Option { + if *self.next_avail == self.last_index { + return None; + } + + // These two operations can not overflow an u64 since they're working with relatively small + // numbers compared to u64::MAX. + let elem_off = u64::from(self.next_avail.0 % self.queue_size) * VIRTQ_AVAIL_ELEMENT_SIZE; + let offset = VIRTQ_AVAIL_RING_HEADER_SIZE + elem_off; + + let addr = self.avail_ring.checked_add(offset)?; + let head_index: u16 = self + .mem + .load(addr, Ordering::Acquire) + .map(u16::from_le) + .map_err(|_| error!("Failed to read from memory {:x}", addr.raw_value())) + .ok()?; + + *self.next_avail += Wrapping(1); + + Some(DescriptorChain::new( + self.mem.clone(), + self.desc_table, + self.queue_size, + head_index, + self.access_platform.clone(), + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::defs::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; + use crate::mock::MockSplitQueue; + use crate::Descriptor; + use vm_memory::GuestMemoryMmap; + + #[test] + fn test_descriptor_and_iterator() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + let mut q = vq.create_queue(m); + + // q is currently valid + assert!(q.is_valid()); + + // the chains are (0, 1), (2, 3, 4) and (5, 6) + for j in 0..7 { + let flags = match j { + 1 | 6 => 0, + 2 | 5 => VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, + 4 => VIRTQ_DESC_F_WRITE, + _ => VIRTQ_DESC_F_NEXT, + }; + + let desc = Descriptor::new((0x1000 * (j + 1)) as u64, 0x1000, flags, j + 1); + vq.desc_table().store(j, desc); + } + + vq.avail().ring().ref_at(0).store(u16::to_le(0)); + vq.avail().ring().ref_at(1).store(u16::to_le(2)); + vq.avail().ring().ref_at(2).store(u16::to_le(5)); + vq.avail().idx().store(u16::to_le(3)); + + let mut i = q.iter().unwrap(); + + { + let c = i.next().unwrap(); + assert_eq!(c.head_index(), 0); + + let mut iter = c; + assert!(iter.next().is_some()); + assert!(iter.next().is_some()); + assert!(iter.next().is_none()); + assert!(iter.next().is_none()); + } + + { + let c = i.next().unwrap(); + assert_eq!(c.head_index(), 2); + + let mut iter = c.writable(); + assert!(iter.next().is_some()); + assert!(iter.next().is_some()); + assert!(iter.next().is_none()); + assert!(iter.next().is_none()); + } + + { + let c = i.next().unwrap(); + assert_eq!(c.head_index(), 5); + + let mut iter = c.readable(); + assert!(iter.next().is_some()); + assert!(iter.next().is_none()); + assert!(iter.next().is_none()); + } + } + + #[test] + fn test_iterator() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + let mut q = vq.create_queue(m); + + q.state.size = q.state.max_size; + q.state.desc_table = vq.desc_table_addr(); + q.state.avail_ring = vq.avail_addr(); + q.state.used_ring = vq.used_addr(); + assert!(q.is_valid()); + + { + // an invalid queue should return an iterator with no next + q.state.ready = false; + let mut i = q.iter().unwrap(); + assert!(i.next().is_none()); + } + + q.state.ready = true; + + // now let's create two simple descriptor chains + // the chains are (0, 1) and (2, 3, 4) + { + for j in 0..5u16 { + let flags = match j { + 1 | 4 => 0, + _ => VIRTQ_DESC_F_NEXT, + }; + + let desc = Descriptor::new((0x1000 * (j + 1)) as u64, 0x1000, flags, j + 1); + vq.desc_table().store(j, desc); + } + + vq.avail().ring().ref_at(0).store(u16::to_le(0)); + vq.avail().ring().ref_at(1).store(u16::to_le(2)); + vq.avail().idx().store(u16::to_le(2)); + + let mut i = q.iter().unwrap(); + + { + let mut c = i.next().unwrap(); + assert_eq!(c.head_index(), 0); + + c.next().unwrap(); + assert!(c.next().is_some()); + assert!(c.next().is_none()); + assert_eq!(c.head_index(), 0); + } + + { + let mut c = i.next().unwrap(); + assert_eq!(c.head_index(), 2); + + c.next().unwrap(); + c.next().unwrap(); + c.next().unwrap(); + assert!(c.next().is_none()); + assert_eq!(c.head_index(), 2); + } + + // also test go_to_previous_position() works as expected + { + assert!(i.next().is_none()); + i.go_to_previous_position(); + let mut c = q.iter().unwrap().next().unwrap(); + c.next().unwrap(); + c.next().unwrap(); + c.next().unwrap(); + assert!(c.next().is_none()); + } + } + } +} diff --git a/virtio-queue/src/lib.rs b/virtio-queue/src/lib.rs index 353901439..27731ac61 100644 --- a/virtio-queue/src/lib.rs +++ b/virtio-queue/src/lib.rs @@ -14,29 +14,32 @@ #![deny(missing_docs)] -pub mod defs; - -use std::cmp::min; -use std::convert::TryFrom; use std::fmt::{self, Debug, Display}; -use std::marker::PhantomData; -use std::mem::size_of; use std::num::Wrapping; use std::ops::{Deref, DerefMut}; -use std::sync::atomic::{fence, Ordering}; -use std::sync::{Arc, Mutex, MutexGuard}; +use std::sync::atomic::Ordering; use log::error; -use vm_memory::{ - Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryError, -}; +use vm_memory::{GuestMemory, GuestMemoryError}; -use self::defs::{ - VIRTQ_AVAIL_ELEMENT_SIZE, VIRTQ_AVAIL_RING_HEADER_SIZE, VIRTQ_AVAIL_RING_META_SIZE, - VIRTQ_DESCRIPTOR_SIZE, VIRTQ_DESC_F_INDIRECT, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE, - VIRTQ_MSI_NO_VECTOR, VIRTQ_USED_ELEMENT_SIZE, VIRTQ_USED_F_NO_NOTIFY, - VIRTQ_USED_RING_HEADER_SIZE, VIRTQ_USED_RING_META_SIZE, -}; +pub use self::chain::{DescriptorChain, DescriptorChainRwIter}; +pub use self::descriptor::{Descriptor, VirtqUsedElem}; +pub use self::iterator::AvailIter; +pub use self::queue::Queue; +pub use self::queue_guard::QueueGuard; +pub use self::state::QueueState; +pub use self::state_sync::QueueStateSync; + +pub mod defs; +pub mod mock; + +mod chain; +mod descriptor; +mod iterator; +mod queue; +mod queue_guard; +mod state; +mod state_sync; /// Trait for devices with access to data in memory being limited and/or /// translated. @@ -48,6 +51,8 @@ pub trait AccessPlatform: Send + Sync + Debug { /// Virtio Queue related errors. #[derive(Debug)] pub enum Error { + /// Address overflow. + AddressOverflow, /// Failed to access guest memory. GuestMemory(GuestMemoryError), /// Invalid indirect descriptor. @@ -65,6 +70,7 @@ impl Display for Error { use self::Error::*; match self { + AddressOverflow => write!(f, "address overflow"), GuestMemory(_) => write!(f, "error accessing guest memory"), InvalidChain => write!(f, "invalid descriptor chain"), InvalidIndirectDescriptor => write!(f, "invalid indirect descriptor"), @@ -76,403 +82,25 @@ impl Display for Error { impl std::error::Error for Error {} -/// A virtio descriptor constraints with C representation. -#[repr(C)] -#[derive(Default, Clone, Copy, Debug)] -pub struct Descriptor { - /// Guest physical address of device specific data - addr: u64, - - /// Length of device specific data - len: u32, - - /// Includes next, write, and indirect bits - flags: u16, - - /// Index into the descriptor table of the next descriptor if flags has - /// the next bit set - next: u16, -} - -// SAFETY: Descriptor only contains a series of integers and has no implicit padding -unsafe impl ByteValued for Descriptor {} - -#[allow(clippy::len_without_is_empty)] -impl Descriptor { - /// Creates a new descriptor - #[cfg(any(test, feature = "test-utils"))] - pub fn new(addr: u64, len: u32, flags: u16, next: u16) -> Self { - Descriptor { - addr, - len, - flags, - next, - } - } - - /// Return the guest physical address of descriptor buffer - pub fn addr(&self) -> GuestAddress { - GuestAddress(self.addr) - } - - /// Return the length of descriptor buffer - pub fn len(&self) -> u32 { - self.len - } - - /// Return the flags for this descriptor, including next, write and indirect - /// bits - pub fn flags(&self) -> u16 { - self.flags - } - - /// Return the value stored in the `next` field of the descriptor. - pub fn next(&self) -> u16 { - self.next - } - - /// Check whether this is an indirect descriptor. - pub fn is_indirect(&self) -> bool { - // TODO: The are a couple of restrictions in terms of which flags combinations are - // actually valid for indirect descriptors. Implement those checks as well somewhere. - self.flags() & VIRTQ_DESC_F_INDIRECT != 0 - } - - /// Check whether the `VIRTQ_DESC_F_NEXT` is set for the descriptor. - pub fn has_next(&self) -> bool { - self.flags() & VIRTQ_DESC_F_NEXT != 0 - } - - /// Checks if the driver designated this as a write only descriptor. - /// - /// If this is false, this descriptor is read only. - /// Write only means the the emulated device can write and the driver can read. - pub fn is_write_only(&self) -> bool { - self.flags & VIRTQ_DESC_F_WRITE != 0 - } -} - -/// A virtio descriptor chain. -#[derive(Clone, Debug)] -pub struct DescriptorChain { - mem: M::T, - desc_table: GuestAddress, - queue_size: u16, - head_index: u16, - next_index: u16, - ttl: u16, - is_indirect: bool, - access_platform: Option>, -} - -impl DescriptorChain { - fn with_ttl( - mem: M::T, - desc_table: GuestAddress, - queue_size: u16, - ttl: u16, - head_index: u16, - access_platform: Option>, - ) -> Self { - DescriptorChain { - mem, - desc_table, - queue_size, - head_index, - next_index: head_index, - ttl, - is_indirect: false, - access_platform, - } - } - - /// Create a new `DescriptorChain` instance. - fn new( - mem: M::T, - desc_table: GuestAddress, - queue_size: u16, - head_index: u16, - access_platform: Option>, - ) -> Self { - Self::with_ttl( - mem, - desc_table, - queue_size, - queue_size, - head_index, - access_platform, - ) - } - - /// Get the descriptor index of the chain header - pub fn head_index(&self) -> u16 { - self.head_index - } - - /// Return a `GuestMemory` object that can be used to access the buffers - /// pointed to by the descriptor chain. - pub fn memory(&self) -> &M::M { - &*self.mem - } - - /// Returns an iterator that only yields the readable descriptors in the chain. - pub fn readable(self) -> DescriptorChainRwIter { - DescriptorChainRwIter { - chain: self, - writable: false, - } - } - - /// Returns an iterator that only yields the writable descriptors in the chain. - pub fn writable(self) -> DescriptorChainRwIter { - DescriptorChainRwIter { - chain: self, - writable: true, - } - } - - // Alters the internal state of the `DescriptorChain` to switch iterating over an - // indirect descriptor table defined by `desc`. - fn process_indirect_descriptor(&mut self, desc: Descriptor) -> Result<(), Error> { - if self.is_indirect { - return Err(Error::InvalidIndirectDescriptor); - } - - let table_len = (desc.len as usize) / VIRTQ_DESCRIPTOR_SIZE; - // Check the target indirect descriptor table is correctly aligned. - if desc.addr().raw_value() & (VIRTQ_DESCRIPTOR_SIZE as u64 - 1) != 0 - || (desc.len as usize) & (VIRTQ_DESCRIPTOR_SIZE - 1) != 0 - || table_len > usize::from(u16::MAX) - { - return Err(Error::InvalidIndirectDescriptorTable); - } - - self.desc_table = desc.addr(); - // try_from cannot fail as we've checked table_len above - self.queue_size = u16::try_from(table_len).expect("invalid table_len"); - self.next_index = 0; - self.ttl = self.queue_size; - self.is_indirect = true; - - Ok(()) - } -} - -impl Iterator for DescriptorChain { - type Item = Descriptor; - - /// Returns the next descriptor in this descriptor chain, if there is one. - /// - /// Note that this is distinct from the next descriptor chain returned by - /// [`AvailIter`](struct.AvailIter.html), which is the head of the next - /// _available_ descriptor chain. - fn next(&mut self) -> Option { - if self.ttl == 0 || self.next_index >= self.queue_size { - return None; - } - - // It's ok to use `unchecked_add` here because we previously verify the index does not - // exceed the queue size, and the descriptor table location is expected to have been - // validate before (for example, before activating a device). Moreover, this cannot - // lead to unsafety because the actual memory accesses are always checked. - let desc_addr = self - .desc_table - .unchecked_add(self.next_index as u64 * size_of::() as u64); - - // The guest device driver should not touch the descriptor once submitted, so it's safe - // to use read_obj() here. - let mut desc = self.mem.read_obj::(desc_addr).ok()?; - // When needed, it's very important to translate the decriptor address - // before returning the Descriptor to the consumer. - if let Some(access_platform) = &self.access_platform { - desc.addr = access_platform - .translate(desc.addr, u64::from(desc.len)) - .ok()?; - } - - if desc.is_indirect() { - self.process_indirect_descriptor(desc).ok()?; - return self.next(); - } - - if desc.has_next() { - self.next_index = desc.next(); - // It's ok to decrement `self.ttl` here because we check at the start of the method - // that it's greater than 0. - self.ttl -= 1; - } else { - self.ttl = 0; - } - - Some(desc) - } -} - -/// An iterator for readable or writable descriptors. -#[derive(Clone)] -pub struct DescriptorChainRwIter { - chain: DescriptorChain, - writable: bool, -} - -impl Iterator for DescriptorChainRwIter { - type Item = Descriptor; - - /// Returns the next descriptor in this descriptor chain, if there is one. - /// - /// Note that this is distinct from the next descriptor chain returned by - /// [`AvailIter`](struct.AvailIter.html), which is the head of the next - /// _available_ descriptor chain. - fn next(&mut self) -> Option { - loop { - match self.chain.next() { - Some(v) => { - if v.is_write_only() == self.writable { - return Some(v); - } - } - None => return None, - } - } - } -} - -// We can't derive Debug, because rustc doesn't generate the M::T: Debug -// constraint -impl Debug for DescriptorChainRwIter -where - M::T: Debug, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("DescriptorChainRwIter") - .field("chain", &self.chain) - .field("writable", &self.writable) - .finish() - } -} - -/// Consuming iterator over all available descriptor chain heads in the queue. -#[derive(Debug)] -pub struct AvailIter<'b, M: GuestAddressSpace> { - mem: M::T, - desc_table: GuestAddress, - avail_ring: GuestAddress, - last_index: Wrapping, - queue_size: u16, - next_avail: &'b mut Wrapping, - access_platform: &'b Option>, -} - -impl<'b, M: GuestAddressSpace> AvailIter<'b, M> { - /// Goes back one position in the available descriptor chain offered by the driver. - /// - /// Rust does not support bidirectional iterators. This is the only way to revert the effect - /// of an iterator increment on the queue. - /// - /// Note: this method assumes there's only one thread manipulating the queue, so it should only - /// be invoked in single-threaded context. - pub fn go_to_previous_position(&mut self) { - *self.next_avail -= Wrapping(1); - } -} - -impl<'b, M: GuestAddressSpace> Iterator for AvailIter<'b, M> { - type Item = DescriptorChain; - - fn next(&mut self) -> Option { - if *self.next_avail == self.last_index { - return None; - } - - // This computation cannot overflow because all the values involved are actually - // `u16`s cast to `u64`. - let elem_off = u64::from(self.next_avail.0 % self.queue_size) * VIRTQ_AVAIL_ELEMENT_SIZE; - let offset = VIRTQ_AVAIL_RING_HEADER_SIZE + elem_off; - - // The logic in `Queue::is_valid` ensures it's ok to use `unchecked_add` as long - // as the index is within bounds. We do not currently enforce that a queue is only used - // after checking `is_valid`, but rather expect the device implementations to do so - // before activation. The standard also forbids drivers to change queue parameters - // while the device is "running". A warp-around cannot lead to unsafe memory accesses - // because the memory model performs its own validations. - let addr = self.avail_ring.unchecked_add(offset); - let head_index: u16 = self - .mem - .load(addr, Ordering::Acquire) - .map_err(|_| error!("Failed to read from memory {:x}", addr.raw_value())) - .ok()?; - - *self.next_avail += Wrapping(1); - - Some(DescriptorChain::new( - self.mem.clone(), - self.desc_table, - self.queue_size, - head_index, - self.access_platform.clone(), - )) - } -} - -/// Represents the contents of an element from the used virtqueue ring. -#[repr(C)] -#[derive(Clone, Copy, Default, Debug)] -pub struct VirtqUsedElem { - id: u32, - len: u32, -} - -// SAFETY: VirtqUsedElem only contains a series of integers and has no implicit padding -unsafe impl ByteValued for VirtqUsedElem {} - -impl VirtqUsedElem { - /// Create a new `VirtqUsedElem` instance. - pub fn new(id: u16, len: u32) -> Self { - VirtqUsedElem { - id: u32::from(id), - len, - } - } -} - -/// Struct to hold an exclusive reference to the underlying `QueueState` object. -pub enum QueueStateGuard<'a, M: GuestAddressSpace> { - /// A reference to a `QueueState` object. - StateObject(&'a mut QueueState), - /// A `MutexGuard` for a `QueueState` object. - MutexGuard(MutexGuard<'a, QueueState>), -} - -impl<'a, M: GuestAddressSpace> Deref for QueueStateGuard<'a, M> { - type Target = QueueState; - - fn deref(&self) -> &Self::Target { - match self { - QueueStateGuard::StateObject(v) => v, - QueueStateGuard::MutexGuard(v) => v.deref(), - } - } -} - -impl<'a, M: GuestAddressSpace> DerefMut for QueueStateGuard<'a, M> { - fn deref_mut(&mut self) -> &mut Self::Target { - match self { - QueueStateGuard::StateObject(v) => v, - QueueStateGuard::MutexGuard(v) => v.deref_mut(), - } - } +/// Trait for objects returned by `QueueStateT::lock()`. +pub trait QueueStateGuard<'a> { + /// Type for guard returned by `Self::lock()`. + type G: DerefMut; } /// Trait to access and manipulate a virtio queue. /// /// To optimize for performance, different implementations of the `QueueStateT` trait may be /// provided for single-threaded context and multi-threaded context. -pub trait QueueStateT { +/// +/// Using Higher-Rank Trait Bounds (HRTBs) to effectively define an associated type that has a +/// lifetime parameter, without tagging the `QueueStateT` trait with a lifetime as well. +pub trait QueueStateT: for<'a> QueueStateGuard<'a> { /// Construct an empty virtio queue state object with the given `max_size`. fn new(max_size: u16) -> Self; /// Check whether the queue configuration is valid. - fn is_valid(&self, mem: &M::T) -> bool; + fn is_valid(&self, mem: &M) -> bool; /// Reset the queue to the initial state. fn reset(&mut self); @@ -481,67 +109,73 @@ pub trait QueueStateT { /// /// Logically this method will acquire the underlying lock protecting the `QueueState` Object. /// The lock will be released when the returned object gets dropped. - fn lock(&mut self) -> QueueStateGuard<'_, M>; + fn lock(&mut self) -> ::G; + + /// Get an exclusive reference to the underlying `QueueState` object with an associated + /// `GuestMemory` object. + /// + /// Logically this method will acquire the underlying lock protecting the `QueueState` Object. + /// The lock will be released when the returned object gets dropped. + fn lock_with_memory(&mut self, mem: M) -> QueueGuard::G> + where + M: Deref + Clone, + M::Target: GuestMemory + Sized, + { + QueueGuard::new(self.lock(), mem) + } /// Get the maximum size of the virtio queue. fn max_size(&self) -> u16; - /// Return the actual size of the queue. - /// - /// The virtio driver may configure queue size smaller than the value reported by `max_size()`. - fn actual_size(&self) -> u16; - /// Configure the queue size for the virtio queue. - /// - /// The `size` should power of two and less than or equal to value reported by `max_size()`, - /// otherwise it will panic. fn set_size(&mut self, size: u16); /// Check whether the queue is ready to be processed. fn ready(&self) -> bool; - /// Configure the queue to ready for processing. + /// Configure the queue to `ready for processing` state. fn set_ready(&mut self, ready: bool); - /// Set descriptor table address for the queue. + /// Set the descriptor table address for the queue. /// /// The descriptor table address is 64-bit, the corresponding part will be updated if 'low' - /// and/or `high` is valid. + /// and/or `high` is `Some` and valid. fn set_desc_table_address(&mut self, low: Option, high: Option); - /// Set available ring address for the queue. + /// Set the available ring address for the queue. /// /// The available ring address is 64-bit, the corresponding part will be updated if 'low' - /// and/or `high` is valid. + /// and/or `high` is `Some` and valid. fn set_avail_ring_address(&mut self, low: Option, high: Option); - /// Set used ring address for the queue. + /// Set the used ring address for the queue. /// /// The used ring address is 64-bit, the corresponding part will be updated if 'low' - /// and/or `high` is valid. + /// and/or `high` is `Some` and valid. fn set_used_ring_address(&mut self, low: Option, high: Option); /// Enable/disable the VIRTIO_F_RING_EVENT_IDX feature for interrupt coalescing. fn set_event_idx(&mut self, enabled: bool); /// Read the `idx` field from the available ring. - fn avail_idx(&self, mem: &M::T, order: Ordering) -> Result, Error>; + fn avail_idx(&self, mem: &M, order: Ordering) -> Result, Error>; /// Read the `idx` field from the used ring. - fn used_idx(&self, mem: &M::T, order: Ordering) -> Result, Error>; + fn used_idx(&self, mem: &M, order: Ordering) -> Result, Error>; /// Put a used descriptor head into the used ring. - fn add_used(&mut self, mem: &M::T, head_index: u16, len: u32) -> Result<(), Error>; + fn add_used(&mut self, mem: &M, head_index: u16, len: u32) + -> Result<(), Error>; /// Enable notification events from the guest driver. /// /// Return true if one or more descriptors can be consumed from the available ring after /// notifications were enabled (and thus it's possible there will be no corresponding /// notification). - fn enable_notification(&mut self, mem: &M::T) -> Result; + fn enable_notification(&mut self, mem: &M) -> Result; /// Disable notification events from the guest driver. - fn disable_notification(&mut self, mem: &M::T) -> Result<(), Error>; + fn disable_notification(&mut self, mem: &M) -> Result<(), Error>; /// Check whether a notification to the guest is needed. /// @@ -549,692 +183,17 @@ pub trait QueueStateT { /// driver will actually be notified, remember the associated index in the used ring, and /// won't return `true` again until the driver updates `used_event` and/or the notification /// conditions hold once more. - fn needs_notification(&mut self, mem: &M::T) -> Result; + fn needs_notification(&mut self, mem: &M) -> Result; - /// Return the index for the next descriptor in the available ring. + /// Return the index of the next entry in the available ring. fn next_avail(&self) -> u16; /// Return the index for the next descriptor in the used ring. fn next_used(&self) -> u16; - /// Set the index for the next descriptor in the available ring. + /// Set the index of the next entry in the available ring. fn set_next_avail(&mut self, next_avail: u16); /// Set the index for the next descriptor in the used ring. fn set_next_used(&mut self, next_used: u16); } - -/// Struct to maintain information and manipulate state of a virtio queue. -#[derive(Clone, Debug)] -pub struct QueueState { - /// The maximal size in elements offered by the device - pub max_size: u16, - - /// Tail position of the available ring. - pub next_avail: Wrapping, - - /// Head position of the used ring. - pub next_used: Wrapping, - - /// VIRTIO_F_RING_EVENT_IDX negotiated - pub event_idx_enabled: bool, - - /// The last used value when using EVENT_IDX - pub signalled_used: Option>, - - /// The queue size in elements the driver selected - pub size: u16, - - /// Indicates if the queue is finished with configuration - pub ready: bool, - - /// Guest physical address of the descriptor table - pub desc_table: GuestAddress, - - /// Guest physical address of the available ring - pub avail_ring: GuestAddress, - - /// Guest physical address of the used ring - pub used_ring: GuestAddress, - - phantom: PhantomData, - - /// Interrupt vector - pub vector: u16, - - /// Access platform handler - pub access_platform: Option>, -} - -impl QueueState { - /// Get a consuming iterator over all available descriptor chain heads offered by the driver. - pub fn iter(&mut self, mem: M::T) -> Result, Error> { - self.avail_idx(&mem, Ordering::Acquire) - .map(move |idx| AvailIter { - mem, - desc_table: self.desc_table, - avail_ring: self.avail_ring, - last_index: idx, - queue_size: self.actual_size(), - next_avail: &mut self.next_avail, - access_platform: &self.access_platform, - }) - } - - // Helper method that writes `val` to the `avail_event` field of the used ring, using - // the provided ordering. - fn set_avail_event(&self, mem: &M::T, val: u16, order: Ordering) -> Result<(), Error> { - let elem_sz = VIRTQ_USED_ELEMENT_SIZE * u64::from(self.actual_size()); - let offset = VIRTQ_USED_RING_HEADER_SIZE + elem_sz; - let addr = self.used_ring.unchecked_add(offset); - - mem.store(val, addr, order).map_err(Error::GuestMemory) - } - - // Set the value of the `flags` field of the used ring, applying the specified ordering. - fn set_used_flags(&mut self, mem: &M::T, val: u16, order: Ordering) -> Result<(), Error> { - mem.store(val, self.used_ring, order) - .map_err(Error::GuestMemory) - } - - // Write the appropriate values to enable or disable notifications from the driver. - // - // Every access in this method uses `Relaxed` ordering because a fence is added by the caller - // when appropriate. - fn set_notification(&mut self, mem: &M::T, enable: bool) -> Result<(), Error> { - if enable { - if self.event_idx_enabled { - // We call `set_avail_event` using the `next_avail` value, instead of reading - // and using the current `avail_idx` to avoid missing notifications. More - // details in `enable_notification`. - self.set_avail_event(mem, self.next_avail.0, Ordering::Relaxed) - } else { - self.set_used_flags(mem, 0, Ordering::Relaxed) - } - } else if !self.event_idx_enabled { - self.set_used_flags(mem, VIRTQ_USED_F_NO_NOTIFY, Ordering::Relaxed) - } else { - // Notifications are effectively disabled by default after triggering once when - // `VIRTIO_F_EVENT_IDX` is negotiated, so we don't do anything in that case. - Ok(()) - } - } - - /// Return the value present in the used_event field of the avail ring. - /// - /// If the VIRTIO_F_EVENT_IDX feature bit is not negotiated, the flags field in the available - /// ring offers a crude mechanism for the driver to inform the device that it doesn’t want - /// interrupts when buffers are used. Otherwise virtq_avail.used_event is a more performant - /// alternative where the driver specifies how far the device can progress before interrupting. - /// - /// Neither of these interrupt suppression methods are reliable, as they are not synchronized - /// with the device, but they serve as useful optimizations. So we only ensure access to the - /// virtq_avail.used_event is atomic, but do not need to synchronize with other memory accesses. - fn used_event(&self, mem: &M::T, order: Ordering) -> Result, Error> { - // Safe because we have validated the queue and access guest - // memory through GuestMemory interfaces. - let elem_sz = u64::from(self.actual_size()) * VIRTQ_AVAIL_ELEMENT_SIZE; - let offset = VIRTQ_AVAIL_RING_HEADER_SIZE + elem_sz; - let used_event_addr = self.avail_ring.unchecked_add(offset); - - mem.load(used_event_addr, order) - .map(Wrapping) - .map_err(Error::GuestMemory) - } - - /// Set the queue to "ready", and update desc_table, avail_ring and - /// used_ring addresses based on the AccessPlatform handler. - fn enable(&mut self, set: bool) { - self.ready = set; - - if set { - // Translate address of descriptor table and vrings. - if let Some(access_platform) = &self.access_platform { - self.desc_table = - GuestAddress(access_platform.translate(self.desc_table.0, 0).unwrap()); - self.avail_ring = - GuestAddress(access_platform.translate(self.avail_ring.0, 0).unwrap()); - self.used_ring = - GuestAddress(access_platform.translate(self.used_ring.0, 0).unwrap()); - } - } else { - self.desc_table = GuestAddress(0); - self.avail_ring = GuestAddress(0); - self.used_ring = GuestAddress(0); - } - } -} - -impl QueueStateT for QueueState { - fn new(max_size: u16) -> Self { - QueueState { - max_size, - size: max_size, - ready: false, - desc_table: GuestAddress(0), - avail_ring: GuestAddress(0), - used_ring: GuestAddress(0), - next_avail: Wrapping(0), - next_used: Wrapping(0), - event_idx_enabled: false, - signalled_used: None, - phantom: PhantomData, - vector: VIRTQ_MSI_NO_VECTOR, - access_platform: None, - } - } - - fn is_valid(&self, mem: &M::T) -> bool { - let queue_size = self.actual_size() as u64; - let desc_table = self.desc_table; - let desc_table_size = size_of::() as u64 * queue_size; - let avail_ring = self.avail_ring; - let avail_ring_size = VIRTQ_AVAIL_RING_META_SIZE + VIRTQ_AVAIL_ELEMENT_SIZE * queue_size; - let used_ring = self.used_ring; - let used_ring_size = VIRTQ_USED_RING_META_SIZE + VIRTQ_USED_ELEMENT_SIZE * queue_size; - if !self.ready { - error!("attempt to use virtio queue that is not marked ready"); - false - } else if self.size > self.max_size || self.size == 0 || (self.size & (self.size - 1)) != 0 - { - error!("virtio queue with invalid size: {}", self.size); - false - } else if desc_table - .checked_add(desc_table_size) - .map_or(true, |v| !mem.address_in_range(v)) - { - error!( - "virtio queue descriptor table goes out of bounds: start:0x{:08x} size:0x{:08x}", - desc_table.raw_value(), - desc_table_size - ); - false - } else if avail_ring - .checked_add(avail_ring_size) - .map_or(true, |v| !mem.address_in_range(v)) - { - error!( - "virtio queue available ring goes out of bounds: start:0x{:08x} size:0x{:08x}", - avail_ring.raw_value(), - avail_ring_size - ); - false - } else if used_ring - .checked_add(used_ring_size) - .map_or(true, |v| !mem.address_in_range(v)) - { - error!( - "virtio queue used ring goes out of bounds: start:0x{:08x} size:0x{:08x}", - used_ring.raw_value(), - used_ring_size - ); - false - } else if desc_table.mask(0xf) != 0 { - error!("virtio queue descriptor table breaks alignment contraints"); - false - } else if avail_ring.mask(0x1) != 0 { - error!("virtio queue available ring breaks alignment contraints"); - false - } else if used_ring.mask(0x3) != 0 { - error!("virtio queue used ring breaks alignment contraints"); - false - } else { - true - } - } - - fn reset(&mut self) { - self.ready = false; - self.size = self.max_size; - self.desc_table = GuestAddress(0); - self.avail_ring = GuestAddress(0); - self.used_ring = GuestAddress(0); - self.next_avail = Wrapping(0); - self.next_used = Wrapping(0); - self.signalled_used = None; - self.event_idx_enabled = false; - self.vector = VIRTQ_MSI_NO_VECTOR; - } - - fn lock(&mut self) -> QueueStateGuard<'_, M> { - QueueStateGuard::StateObject(self) - } - - fn max_size(&self) -> u16 { - self.max_size - } - - fn actual_size(&self) -> u16 { - min(self.size, self.max_size) - } - - fn set_size(&mut self, size: u16) { - self.size = size; - } - - fn ready(&self) -> bool { - self.ready - } - - fn set_ready(&mut self, ready: bool) { - self.ready = ready; - } - - fn set_desc_table_address(&mut self, low: Option, high: Option) { - let low = low.unwrap_or(self.desc_table.0 as u32) as u64; - let high = high.unwrap_or((self.desc_table.0 >> 32) as u32) as u64; - - self.desc_table = GuestAddress((high << 32) | low); - } - - fn set_avail_ring_address(&mut self, low: Option, high: Option) { - let low = low.unwrap_or(self.avail_ring.0 as u32) as u64; - let high = high.unwrap_or((self.avail_ring.0 >> 32) as u32) as u64; - - self.avail_ring = GuestAddress((high << 32) | low); - } - - fn set_used_ring_address(&mut self, low: Option, high: Option) { - let low = low.unwrap_or(self.used_ring.0 as u32) as u64; - let high = high.unwrap_or((self.used_ring.0 >> 32) as u32) as u64; - - self.used_ring = GuestAddress((high << 32) | low); - } - - fn set_event_idx(&mut self, enabled: bool) { - self.signalled_used = None; - self.event_idx_enabled = enabled; - } - - fn avail_idx(&self, mem: &M::T, order: Ordering) -> Result, Error> { - let addr = self.avail_ring.unchecked_add(2); - - mem.load(addr, order) - .map(Wrapping) - .map_err(Error::GuestMemory) - } - - fn used_idx(&self, mem: &M::T, order: Ordering) -> Result, Error> { - let addr = self.used_ring.unchecked_add(2); - - mem.load(addr, order) - .map(Wrapping) - .map_err(Error::GuestMemory) - } - - fn add_used(&mut self, mem: &M::T, head_index: u16, len: u32) -> Result<(), Error> { - if head_index >= self.actual_size() { - error!( - "attempted to add out of bounds descriptor to used ring: {}", - head_index - ); - return Err(Error::InvalidDescriptorIndex); - } - - let next_used_index = u64::from(self.next_used.0 % self.actual_size()); - let elem_sz = next_used_index * VIRTQ_USED_ELEMENT_SIZE; - let offset = VIRTQ_USED_RING_HEADER_SIZE + elem_sz; - let addr = self.used_ring.unchecked_add(offset); - mem.write_obj(VirtqUsedElem::new(head_index, len), addr) - .map_err(Error::GuestMemory)?; - - self.next_used += Wrapping(1); - - mem.store( - self.next_used.0, - self.used_ring.unchecked_add(2), - Ordering::Release, - ) - .map_err(Error::GuestMemory) - } - - // TODO: Turn this into a doc comment/example. - // With the current implementation, a common way of consuming entries from the available ring - // while also leveraging notification suppression is to use a loop, for example: - // - // loop { - // // We have to explicitly disable notifications if `VIRTIO_F_EVENT_IDX` has not been - // // negotiated. - // self.disable_notification()?; - // - // for chain in self.iter()? { - // // Do something with each chain ... - // // Let's assume we process all available chains here. - // } - // - // // If `enable_notification` returns `true`, the driver has added more entries to the - // // available ring. - // if !self.enable_notification()? { - // break; - // } - // } - fn enable_notification(&mut self, mem: &M::T) -> Result { - self.set_notification(mem, true)?; - // Ensures the following read is not reordered before any previous write operation. - fence(Ordering::SeqCst); - - // We double check here to avoid the situation where the available ring has been updated - // just before we re-enabled notifications, and it's possible to miss one. We compare the - // current `avail_idx` value to `self.next_avail` because it's where we stopped processing - // entries. There are situations where we intentionally avoid processing everything in the - // available ring (which will cause this method to return `true`), but in that case we'll - // probably not re-enable notifications as we already know there are pending entries. - self.avail_idx(mem, Ordering::Relaxed) - .map(|idx| idx != self.next_avail) - } - - fn disable_notification(&mut self, mem: &M::T) -> Result<(), Error> { - self.set_notification(mem, false) - } - - fn needs_notification(&mut self, mem: &M::T) -> Result { - let used_idx = self.next_used; - - // Complete all the writes in add_used() before reading the event. - fence(Ordering::SeqCst); - - // The VRING_AVAIL_F_NO_INTERRUPT flag isn't supported yet. - if self.event_idx_enabled { - if let Some(old_idx) = self.signalled_used.replace(used_idx) { - let used_event = self.used_event(mem, Ordering::Relaxed)?; - // This check looks at `used_idx`, `used_event`, and `old_idx` as if they are on - // an axis that wraps around. If `used_idx - used_used - Wrapping(1)` is greater - // than or equal to the difference between `used_idx` and `old_idx`, then - // `old_idx` is closer to `used_idx` than `used_event` (and thus more recent), so - // we don't need to elicit another notification. - if (used_idx - used_event - Wrapping(1u16)) >= (used_idx - old_idx) { - return Ok(false); - } - } - } - - Ok(true) - } - - fn next_avail(&self) -> u16 { - self.next_avail.0 - } - - fn next_used(&self) -> u16 { - self.next_used.0 - } - - fn set_next_avail(&mut self, next_avail: u16) { - self.next_avail = Wrapping(next_avail); - } - - fn set_next_used(&mut self, next_used: u16) { - self.next_used = Wrapping(next_used); - } -} - -/// Struct to maintain information and manipulate state of a virtio queue for multi-threaded -/// context. -#[derive(Clone, Debug)] -pub struct QueueStateSync { - state: Arc>>, -} - -impl QueueStateT for QueueStateSync { - fn new(max_size: u16) -> Self { - QueueStateSync { - state: Arc::new(Mutex::new(QueueState::new(max_size))), - } - } - - fn is_valid(&self, mem: &M::T) -> bool { - self.state.lock().unwrap().is_valid(mem) - } - - fn reset(&mut self) { - self.state.lock().unwrap().reset(); - } - - fn lock(&mut self) -> QueueStateGuard<'_, M> { - QueueStateGuard::MutexGuard(self.state.lock().unwrap()) - } - - fn max_size(&self) -> u16 { - self.state.lock().unwrap().max_size() - } - - fn actual_size(&self) -> u16 { - self.state.lock().unwrap().actual_size() - } - - fn set_size(&mut self, size: u16) { - self.state.lock().unwrap().set_size(size) - } - - fn ready(&self) -> bool { - self.state.lock().unwrap().ready - } - - fn set_ready(&mut self, ready: bool) { - self.state.lock().unwrap().set_ready(ready) - } - - fn set_desc_table_address(&mut self, low: Option, high: Option) { - self.state.lock().unwrap().set_desc_table_address(low, high); - } - - fn set_avail_ring_address(&mut self, low: Option, high: Option) { - self.state.lock().unwrap().set_avail_ring_address(low, high); - } - - fn set_used_ring_address(&mut self, low: Option, high: Option) { - self.state.lock().unwrap().set_used_ring_address(low, high); - } - - fn set_event_idx(&mut self, enabled: bool) { - self.state.lock().unwrap().set_event_idx(enabled); - } - - fn avail_idx(&self, mem: &M::T, order: Ordering) -> Result, Error> { - self.state.lock().unwrap().avail_idx(mem, order) - } - - fn used_idx(&self, mem: &M::T, order: Ordering) -> Result, Error> { - self.state.lock().unwrap().used_idx(mem, order) - } - - fn add_used(&mut self, mem: &M::T, head_index: u16, len: u32) -> Result<(), Error> { - self.state.lock().unwrap().add_used(mem, head_index, len) - } - - fn enable_notification(&mut self, mem: &M::T) -> Result { - self.state.lock().unwrap().enable_notification(mem) - } - - fn disable_notification(&mut self, mem: &M::T) -> Result<(), Error> { - self.state.lock().unwrap().disable_notification(mem) - } - - fn needs_notification(&mut self, mem: &M::T) -> Result { - self.state.lock().unwrap().needs_notification(mem) - } - - fn next_avail(&self) -> u16 { - self.state.lock().unwrap().next_avail() - } - - fn next_used(&self) -> u16 { - self.state.lock().unwrap().next_used() - } - - fn set_next_avail(&mut self, next_avail: u16) { - self.state.lock().unwrap().set_next_avail(next_avail); - } - - fn set_next_used(&mut self, next_used: u16) { - self.state.lock().unwrap().set_next_used(next_used); - } -} - -/// A convenient wrapper struct for a virtio queue, with associated GuestMemory object. -#[derive(Clone, Debug)] -pub struct Queue = QueueState> { - /// Guest memory object associated with the queue. - pub mem: M, - /// Virtio queue state. - pub state: S, -} - -impl> Queue { - /// Construct an empty virtio queue with the given `max_size`. - pub fn new(mem: M, max_size: u16) -> Self { - Queue { - mem, - state: S::new(max_size), - } - } - - /// Check whether the queue configuration is valid. - pub fn is_valid(&self) -> bool { - self.state.is_valid(&self.mem.memory()) - } - - /// Reset the queue to the initial state. - pub fn reset(&mut self) { - self.state.reset() - } - - /// Get an exclusive reference to the underlying `QueueState` object. - /// - /// Logically this method will acquire the underlying lock protecting the `QueueState` Object. - /// The lock will be released when the returned object gets dropped. - pub fn lock(&mut self) -> QueueStateGuard<'_, M> { - self.state.lock() - } - - /// Get the maximum size of the virtio queue. - pub fn max_size(&self) -> u16 { - self.state.max_size() - } - - /// Return the actual size of the queue. - /// - /// The virtio driver may configure queue size smaller than the value reported by `max_size()`. - pub fn actual_size(&self) -> u16 { - self.state.actual_size() - } - - /// Configure the queue size for the virtio queue. - /// - /// The `size` should power of two and less than or equal to value reported by `max_size()`, - /// otherwise it will panic. - pub fn set_size(&mut self, size: u16) { - self.state.set_size(size) - } - - /// Check whether the queue is ready to be processed. - pub fn ready(&self) -> bool { - self.state.ready() - } - - /// Configure the queue to ready for processing. - pub fn set_ready(&mut self, ready: bool) { - self.state.set_ready(ready) - } - - /// Set descriptor table address for the queue. - /// - /// The descriptor table address is 64-bit, the corresponding part will be updated if 'low' - /// and/or `high` is valid. - pub fn set_desc_table_address(&mut self, low: Option, high: Option) { - self.state.set_desc_table_address(low, high); - } - - /// Set available ring address for the queue. - /// - /// The available ring address is 64-bit, the corresponding part will be updated if 'low' - /// and/or `high` is valid. - pub fn set_avail_ring_address(&mut self, low: Option, high: Option) { - self.state.set_avail_ring_address(low, high); - } - - /// Set used ring address for the queue. - /// - /// The used ring address is 64-bit, the corresponding part will be updated if 'low' - /// and/or `high` is valid. - pub fn set_used_ring_address(&mut self, low: Option, high: Option) { - self.state.set_used_ring_address(low, high) - } - - /// Enable/disable the VIRTIO_F_RING_EVENT_IDX feature for interrupt coalescing. - pub fn set_event_idx(&mut self, enabled: bool) { - self.state.set_event_idx(enabled) - } - - /// Read the `idx` field from the available ring. - pub fn avail_idx(&self, order: Ordering) -> Result, Error> { - self.state.avail_idx(&self.mem.memory(), order) - } - - /// Reads the `idx` field from the used ring. - pub fn used_idx(&self, order: Ordering) -> Result, Error> { - self.state.used_idx(&self.mem.memory(), order) - } - - /// Put a used descriptor head into the used ring. - pub fn add_used(&mut self, head_index: u16, len: u32) -> Result<(), Error> { - self.state.add_used(&self.mem.memory(), head_index, len) - } - - /// Enable notification events from the guest driver. - /// - /// Return true if one or more descriptors can be consumed from the available ring after - /// notifications were enabled (and thus it's possible there will be no corresponding - /// notification). - pub fn enable_notification(&mut self) -> Result { - self.state.enable_notification(&self.mem.memory()) - } - - /// Disable notification events from the guest driver. - pub fn disable_notification(&mut self) -> Result<(), Error> { - self.state.disable_notification(&self.mem.memory()) - } - - /// Check whether a notification to the guest is needed. - /// - /// Please note this method has side effects: once it returns `true`, it considers the - /// driver will actually be notified, remember the associated index in the used ring, and - /// won't return `true` again until the driver updates `used_event` and/or the notification - /// conditions hold once more. - pub fn needs_notification(&mut self) -> Result { - self.state.needs_notification(&self.mem.memory()) - } - - /// Return the index for the next descriptor in the available ring. - pub fn next_avail(&self) -> u16 { - self.state.next_avail() - } - - /// Returns the index for the next descriptor in the used ring. - pub fn next_used(&self) -> u16 { - self.state.next_used() - } - - /// Sets the index for the next descriptor in the available ring. - pub fn set_next_avail(&mut self, next_avail: u16) { - self.state.set_next_avail(next_avail); - } - - /// Sets the index for the next descriptor in the used ring. - pub fn set_next_used(&mut self, next_used: u16) { - self.state.set_next_used(next_used); - } -} - -impl Queue> { - /// A consuming iterator over all available descriptor chain heads offered by the driver. - pub fn iter(&mut self) -> Result, Error> { - self.state.iter(self.mem.memory()) - } - - /// Set the queue to "ready", and update desc_table, avail_ring and - /// used_ring addresses based on the AccessPlatform handler. - pub fn enable(&mut self, set: bool) { - self.state.enable(set) - } -} diff --git a/virtio-queue/src/mock.rs b/virtio-queue/src/mock.rs new file mode 100644 index 000000000..663dc0b53 --- /dev/null +++ b/virtio-queue/src/mock.rs @@ -0,0 +1,370 @@ +// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +//! Utilities used by unit tests and benchmarks for mocking the driver side +//! of the virtio protocol. + +use std::marker::PhantomData; +use std::mem::size_of; + +use vm_memory::{ + Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestUsize, +}; + +use crate::defs::{VIRTQ_DESC_F_INDIRECT, VIRTQ_DESC_F_NEXT}; +use crate::{Descriptor, Queue, QueueState, VirtqUsedElem}; + +/// Wrapper struct used for accessing a particular address of a GuestMemory area. +pub struct Ref<'a, M, T> { + mem: &'a M, + addr: GuestAddress, + phantom: PhantomData<*const T>, +} + +impl<'a, M: GuestMemory, T: ByteValued> Ref<'a, M, T> { + fn new(mem: &'a M, addr: GuestAddress) -> Self { + Ref { + mem, + addr, + phantom: PhantomData, + } + } + + /// Read an object of type T from the underlying memory found at self.addr. + pub fn load(&self) -> T { + self.mem.read_obj(self.addr).unwrap() + } + + /// Write an object of type T from the underlying memory found at self.addr. + pub fn store(&self, val: T) { + self.mem.write_obj(val, self.addr).unwrap() + } +} + +/// Wrapper struct used for accessing a subregion of a GuestMemory area. +pub struct ArrayRef<'a, M, T> { + mem: &'a M, + addr: GuestAddress, + len: usize, + phantom: PhantomData<*const T>, +} + +impl<'a, M: GuestMemory, T: ByteValued> ArrayRef<'a, M, T> { + fn new(mem: &'a M, addr: GuestAddress, len: usize) -> Self { + ArrayRef { + mem, + addr, + len, + phantom: PhantomData, + } + } + + /// Return a `Ref` object pointing to an address defined by a particular + /// index offset in the region. + pub fn ref_at(&self, index: usize) -> Ref<'a, M, T> { + // TODO: add better error handling to the mock logic. + assert!(index < self.len); + + let addr = self + .addr + .checked_add((index * size_of::()) as u64) + .unwrap(); + + Ref::new(self.mem, addr) + } +} + +/// Represents a virtio queue ring. The only difference between the used and available rings, +/// is the ring element type. +pub struct SplitQueueRing<'a, M, T: ByteValued> { + flags: Ref<'a, M, u16>, + // The value stored here should more precisely be a `Wrapping`, but that would require a + // `ByteValued` impl for this type, which is not provided in vm-memory. Implementing the trait + // here would require defining a wrapper for `Wrapping` and that would be too much for a + // mock framework that is only used in tests. + idx: Ref<'a, M, u16>, + ring: ArrayRef<'a, M, T>, + // `used_event` for `AvailRing`, `avail_event` for `UsedRing`. + event: Ref<'a, M, u16>, +} + +impl<'a, M: GuestMemory, T: ByteValued> SplitQueueRing<'a, M, T> { + /// Create a new `SplitQueueRing` instance + pub fn new(mem: &'a M, base: GuestAddress, len: u16) -> Self { + let event_addr = base + .checked_add(4) + .and_then(|a| a.checked_add((size_of::() * len as usize) as u64)) + .unwrap(); + + let split_queue_ring = SplitQueueRing { + flags: Ref::new(mem, base), + idx: Ref::new(mem, base.checked_add(2).unwrap()), + ring: ArrayRef::new(mem, base.checked_add(4).unwrap(), len as usize), + event: Ref::new(mem, event_addr), + }; + + split_queue_ring.flags.store(0); + split_queue_ring.idx.store(0); + split_queue_ring.event.store(0); + + split_queue_ring + } + + /// Return the starting address of the `SplitQueueRing`. + pub fn start(&self) -> GuestAddress { + self.ring.addr + } + + /// Return the end address of the `SplitQueueRing`. + pub fn end(&self) -> GuestAddress { + self.start() + .checked_add(self.ring.len as GuestUsize) + .unwrap() + } + + /// Return a reference to the idx field. + pub fn idx(&self) -> &Ref<'a, M, u16> { + &self.idx + } + + /// Return a reference to the ring field. + pub fn ring(&self) -> &ArrayRef<'a, M, T> { + &self.ring + } +} + +/// The available ring is used by the driver to offer buffers to the device. +pub type AvailRing<'a, M> = SplitQueueRing<'a, M, u16>; +/// The used ring is where the device returns buffers once it is done with them. +pub type UsedRing<'a, M> = SplitQueueRing<'a, M, VirtqUsedElem>; + +/// Refers to the buffers the driver is using for the device. +pub struct DescriptorTable<'a, M> { + table: ArrayRef<'a, M, Descriptor>, + len: u16, + free_descriptors: Vec, +} + +impl<'a, M: GuestMemory> DescriptorTable<'a, M> { + /// Create a new `DescriptorTable` instance + pub fn new(mem: &'a M, addr: GuestAddress, len: u16) -> Self { + let table = ArrayRef::new(mem, addr, len as usize); + let free_descriptors = (0..len).rev().collect(); + + DescriptorTable { + table, + len, + free_descriptors, + } + } + + /// Read one descriptor from the specified index. + pub fn load(&self, index: u16) -> Descriptor { + self.table.ref_at(index as usize).load() + } + + /// Write one descriptor at the specified index. + pub fn store(&self, index: u16, value: Descriptor) { + self.table.ref_at(index as usize).store(value) + } + + /// Return the total size of the DescriptorTable in bytes. + pub fn total_size(&self) -> u64 { + (self.len as usize * size_of::()) as u64 + } + + /// Create a chain of descriptors. + pub fn build_chain(&mut self, len: u16) -> u16 { + let indices = self + .free_descriptors + .iter() + .copied() + .rev() + .take(usize::from(len)) + .collect::>(); + + assert_eq!(indices.len(), len as usize); + + for (pos, index_value) in indices.iter().copied().enumerate() { + // Addresses and lens constant for now. + let mut desc = Descriptor::new(0x1000, 0x1000, 0, 0); + + // It's not the last descriptor in the chain. + if pos < indices.len() - 1 { + desc.set_flags(VIRTQ_DESC_F_NEXT); + desc.set_next(indices[pos + 1]); + } else { + desc.set_flags(0); + } + self.store(index_value, desc); + } + + indices[0] + } +} + +trait GuestAddressExt { + fn align_up(&self, x: GuestUsize) -> GuestAddress; +} + +impl GuestAddressExt for GuestAddress { + fn align_up(&self, x: GuestUsize) -> GuestAddress { + Self((self.0 + (x - 1)) & !(x - 1)) + } +} + +/// A mock version of the virtio queue implemented from the perspective of the driver. +pub struct MockSplitQueue<'a, M> { + mem: &'a M, + len: u16, + desc_table_addr: GuestAddress, + desc_table: DescriptorTable<'a, M>, + avail_addr: GuestAddress, + avail: AvailRing<'a, M>, + used_addr: GuestAddress, + used: UsedRing<'a, M>, + indirect_addr: GuestAddress, +} + +impl<'a, M: GuestMemory> MockSplitQueue<'a, M> { + /// Create a new `MockSplitQueue` instance with 0 as the default guest + /// physical starting address. + pub fn new(mem: &'a M, len: u16) -> Self { + Self::create(mem, GuestAddress(0), len) + } + + /// Create a new `MockSplitQueue` instance. + pub fn create(mem: &'a M, start: GuestAddress, len: u16) -> Self { + const AVAIL_ALIGN: GuestUsize = 2; + const USED_ALIGN: GuestUsize = 4; + + let desc_table_addr = start; + let desc_table = DescriptorTable::new(mem, desc_table_addr, len); + + let avail_addr = start + .checked_add(16 * len as GuestUsize) + .unwrap() + .align_up(AVAIL_ALIGN); + let avail = AvailRing::new(mem, avail_addr, len); + + let used_addr = avail.end().align_up(USED_ALIGN); + let used = UsedRing::new(mem, used_addr, len); + + let indirect_addr = GuestAddress(0x3000_0000); + + MockSplitQueue { + mem, + len, + desc_table_addr, + desc_table, + avail_addr, + avail, + used_addr, + used, + indirect_addr, + } + } + + /// Return the starting address of the queue. + pub fn start(&self) -> GuestAddress { + self.desc_table_addr + } + + /// Return the end address of the queue. + pub fn end(&self) -> GuestAddress { + self.used.end() + } + + /// Descriptor table accessor. + pub fn desc_table(&self) -> &DescriptorTable<'a, M> { + &self.desc_table + } + + /// Available ring accessor. + pub fn avail(&self) -> &AvailRing { + &self.avail + } + + /// Used ring accessor. + pub fn used(&self) -> &UsedRing { + &self.used + } + + /// Return the starting address of the descriptor table. + pub fn desc_table_addr(&self) -> GuestAddress { + self.desc_table_addr + } + + /// Return the starting address of the available ring. + pub fn avail_addr(&self) -> GuestAddress { + self.avail_addr + } + + /// Return the starting address of the used ring. + pub fn used_addr(&self) -> GuestAddress { + self.used_addr + } + + fn update_avail_idx(&mut self, value: u16) { + let avail_idx = self.avail.idx.load(); + self.avail.ring.ref_at(avail_idx as usize).store(value); + self.avail.idx.store(avail_idx.wrapping_add(1)); + } + + fn alloc_indirect_chain(&mut self, len: u16) -> GuestAddress { + // To simplify things for now, we round up the table len as a multiple of 16. When this is + // no longer the case, we should make sure the starting address of the descriptor table + // we're creating below is properly aligned. + + let table_len = if len % 16 == 0 { + len + } else { + 16 * (len / 16 + 1) + }; + + let mut table = DescriptorTable::new(self.mem, self.indirect_addr, table_len); + let head_decriptor_index = table.build_chain(len); + // When building indirect descriptor tables, the descriptor at index 0 is supposed to be + // first in the resulting chain. Just making sure our logic actually makes that happen. + assert_eq!(head_decriptor_index, 0); + + let table_addr = self.indirect_addr; + self.indirect_addr = self.indirect_addr.checked_add(table.total_size()).unwrap(); + table_addr + } + + /// Add a descriptor chain to the table. + pub fn add_chain(&mut self, len: u16) { + let head_idx = self.desc_table.build_chain(len); + self.update_avail_idx(head_idx); + } + + /// Add an indirect descriptor chain to the table. + pub fn add_indirect_chain(&mut self, len: u16) { + let head_idx = self.desc_table.build_chain(1); + + // We just allocate the indirect table and forget about it for now. + let indirect_addr = self.alloc_indirect_chain(len); + + let mut desc = self.desc_table.load(head_idx); + desc.set_flags(VIRTQ_DESC_F_INDIRECT); + desc.set_addr(indirect_addr.raw_value()); + desc.set_len(u32::from(len) * size_of::() as u32); + + self.desc_table.store(head_idx, desc); + self.update_avail_idx(head_idx); + } + + /// Creates a new `Queue`, using the underlying memory regions represented + /// by the `MockSplitQueue`. + pub fn create_queue(&self, a: A) -> Queue { + let mut q = Queue::::new(a, self.len); + + q.state.size = self.len; + q.state.ready = true; + q.state.desc_table = self.desc_table_addr; + q.state.avail_ring = self.avail_addr; + q.state.used_ring = self.used_addr; + q + } +} diff --git a/virtio-queue/src/queue.rs b/virtio-queue/src/queue.rs new file mode 100644 index 000000000..a2918c50e --- /dev/null +++ b/virtio-queue/src/queue.rs @@ -0,0 +1,683 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// Copyright © 2019 Intel Corporation +// +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::num::Wrapping; +use std::ops::Deref; +use std::sync::atomic::Ordering; + +use vm_memory::GuestAddressSpace; + +use crate::{AvailIter, Error, QueueGuard, QueueState, QueueStateGuard, QueueStateT}; + +/// A convenient wrapper struct for a virtio queue, with associated `GuestMemory` object. +/// +/// # Example +/// +/// ```rust +/// use virtio_queue::{Queue, QueueState}; +/// use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap}; +/// +/// let m = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); +/// let mut queue = Queue::<&GuestMemoryMmap, QueueState>::new(&m, 1024); +/// +/// // First, the driver sets up the queue; this set up is done via writes on the bus (PCI, MMIO). +/// queue.set_size(8); +/// queue.set_desc_table_address(Some(0x1000), None); +/// queue.set_avail_ring_address(Some(0x2000), None); +/// queue.set_used_ring_address(Some(0x3000), None); +/// queue.set_event_idx(true); +/// queue.set_ready(true); +/// // The user should check if the queue is valid before starting to use it. +/// assert!(queue.is_valid()); +/// +/// // Here the driver would add entries in the available ring and then update the `idx` field of +/// // the available ring (address = 0x2000 + 2). +/// m.write_obj(3, GuestAddress(0x2002)); +/// +/// loop { +/// queue.disable_notification().unwrap(); +/// +/// // Consume entries from the available ring. +/// while let Some(chain) = queue.iter().unwrap().next() { +/// // Process the descriptor chain, and then add an entry in the used ring and optionally +/// // notify the driver. +/// queue.add_used(chain.head_index(), 0x100).unwrap(); +/// +/// if queue.needs_notification().unwrap() { +/// // Here we would notify the driver it has new entries in the used ring to consume. +/// } +/// } +/// if !queue.enable_notification().unwrap() { +/// break; +/// } +/// } +/// +/// // We can reset the queue at some point. +/// queue.reset(); +/// // The queue should not be ready after reset. +/// assert!(!queue.ready()); +/// ``` +#[derive(Clone, Debug)] +pub struct Queue { + /// Guest memory object associated with the queue. + pub mem: M, + /// Virtio queue state. + pub state: S, +} + +impl Queue { + /// Construct an empty virtio queue with the given `max_size`. + /// + /// # Arguments + /// * `mem` - the guest memory object that can be used to access the queue buffers. + /// * `max_size` - the maximum size (and the default one) of the queue. + pub fn new(mem: M, max_size: u16) -> Self { + Queue { + mem, + state: S::new(max_size), + } + } + + /// Check whether the queue configuration is valid. + pub fn is_valid(&self) -> bool { + self.state.is_valid(self.mem.memory().deref()) + } + + /// Reset the queue to the initial state. + pub fn reset(&mut self) { + self.state.reset() + } + + /// Get an exclusive reference to the underlying `QueueState` object. + /// + /// Logically this method will acquire the underlying lock protecting the `QueueState` Object. + /// The lock will be released when the returned object gets dropped. + pub fn lock(&mut self) -> ::G { + self.state.lock() + } + + /// Get an exclusive reference to the underlying `QueueState` object with an associated + /// `GuestMemory` object. + /// + /// Logically this method will acquire the underlying lock protecting the `QueueState` Object. + /// The lock will be released when the returned object gets dropped. + pub fn lock_with_memory( + &mut self, + ) -> QueueGuard<::T, ::G> { + QueueGuard::new(self.state.lock(), self.mem.memory()) + } + + /// Get the maximum size of the virtio queue. + pub fn max_size(&self) -> u16 { + self.state.max_size() + } + + /// Configure the queue size for the virtio queue. + /// + /// # Arguments + /// * `size` - the queue size; it should be a power of two, different than 0 and less than or + /// equal to the value reported by `max_size()`, otherwise the queue size remains the + /// default one (which is the maximum one). + pub fn set_size(&mut self, size: u16) { + self.state.set_size(size); + } + + /// Check whether the queue is ready to be processed. + pub fn ready(&self) -> bool { + self.state.ready() + } + + /// Configure the queue to the `ready for processing` state. + /// + /// # Arguments + /// * `ready` - a boolean to indicate whether the queue is ready to be used or not. + pub fn set_ready(&mut self, ready: bool) { + self.state.set_ready(ready) + } + + /// Set the descriptor table address for the queue. + /// + /// The descriptor table address is 64-bit, the corresponding part will be updated if 'low' + /// and/or `high` is `Some` and valid. + /// + /// # Arguments + /// * `low` - an optional value for the lowest 32 bits of the address. + /// * `high` - an optional value for the highest 32 bits of the address. + pub fn set_desc_table_address(&mut self, low: Option, high: Option) { + self.state.set_desc_table_address(low, high); + } + + /// Set the available ring address for the queue. + /// + /// The available ring address is 64-bit, the corresponding part will be updated if 'low' + /// and/or `high` is `Some` and valid. + /// + /// # Arguments + /// * `low` - an optional value for the lowest 32 bits of the address. + /// * `high` - an optional value for the highest 32 bits of the address. + pub fn set_avail_ring_address(&mut self, low: Option, high: Option) { + self.state.set_avail_ring_address(low, high); + } + + /// Set the used ring address for the queue. + /// + /// The used ring address is 64-bit, the corresponding part will be updated if 'low' + /// and/or `high` is `Some` and valid. + /// + /// # Arguments + /// * `low` - an optional value for the lowest 32 bits of the address. + /// * `high` - an optional value for the highest 32 bits of the address. + pub fn set_used_ring_address(&mut self, low: Option, high: Option) { + self.state.set_used_ring_address(low, high); + } + + /// Enable/disable the VIRTIO_F_RING_EVENT_IDX feature for interrupt coalescing. + /// + /// # Arguments + /// * `enabled` - a boolean to indicate whether the VIRTIO_F_RING_EVENT_IDX feature was + /// successfully negotiated or not. + pub fn set_event_idx(&mut self, enabled: bool) { + self.state.set_event_idx(enabled) + } + + /// Read the `idx` field from the available ring. + /// + /// # Arguments + /// * `order` - the memory ordering used to access the `idx` field from memory. + pub fn avail_idx(&self, order: Ordering) -> Result, Error> { + self.state.avail_idx(self.mem.memory().deref(), order) + } + + /// Reads the `idx` field from the used ring. + /// + /// # Arguments + /// * `order` - the memory ordering used to access the `idx` field from memory. + pub fn used_idx(&self, order: Ordering) -> Result, Error> { + self.state.used_idx(self.mem.memory().deref(), order) + } + + /// Put a used descriptor head into the used ring. + /// + /// # Arguments + /// * `head_index` - the index of the used descriptor chain. + /// * `len` - the total length of the descriptor chain which was used (written to). + pub fn add_used(&mut self, head_index: u16, len: u32) -> Result<(), Error> { + self.state + .add_used(self.mem.memory().deref(), head_index, len) + } + + /// Enable notification events from the guest driver. + /// + /// Return true if one or more descriptors can be consumed from the available ring after + /// notifications were enabled (and thus it's possible there will be no corresponding + /// notification). + pub fn enable_notification(&mut self) -> Result { + self.state.enable_notification(self.mem.memory().deref()) + } + + /// Disable notification events from the guest driver. + pub fn disable_notification(&mut self) -> Result<(), Error> { + self.state.disable_notification(self.mem.memory().deref()) + } + + /// Check whether a notification to the guest is needed. + /// + /// Please note this method has side effects: once it returns `true`, it considers the + /// driver will actually be notified, remember the associated index in the used ring, and + /// won't return `true` again until the driver updates `used_event` and/or the notification + /// conditions hold once more. + pub fn needs_notification(&mut self) -> Result { + self.state.needs_notification(self.mem.memory().deref()) + } + + /// Return the index of the next entry in the available ring. + pub fn next_avail(&self) -> u16 { + self.state.next_avail() + } + + /// Returns the index for the next descriptor in the used ring. + pub fn next_used(&self) -> u16 { + self.state.next_used() + } + + /// Set the index of the next entry in the available ring. + /// + /// # Arguments + /// * `next_avail` - the index of the next available ring entry. + pub fn set_next_avail(&mut self, next_avail: u16) { + self.state.set_next_avail(next_avail); + } + + /// Sets the index for the next descriptor in the used ring. + /// + /// # Arguments + /// * `next_used` - the index of the next used ring entry. + pub fn set_next_used(&mut self, next_used: u16) { + self.state.set_next_used(next_used); + } +} + +impl Queue { + /// A consuming iterator over all available descriptor chain heads offered by the driver. + pub fn iter(&mut self) -> Result, Error> { + self.state.iter(self.mem.memory()) + } + + /// Set the queue to "ready", and update desc_table, avail_ring and + /// used_ring addresses based on the AccessPlatform handler. + pub fn enable(&mut self, set: bool) { + self.state.enable(set) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::defs::{ + DEFAULT_AVAIL_RING_ADDR, DEFAULT_DESC_TABLE_ADDR, DEFAULT_USED_RING_ADDR, + VIRTQ_DESC_F_NEXT, VIRTQ_USED_F_NO_NOTIFY, + }; + use crate::mock::MockSplitQueue; + use crate::Descriptor; + + use vm_memory::{Address, Bytes, GuestAddress, GuestMemoryMmap}; + + #[test] + fn test_queue_is_valid() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + let mut q = vq.create_queue(m); + + // q is currently valid + assert!(q.is_valid()); + + // shouldn't be valid when not marked as ready + q.set_ready(false); + assert!(!q.ready()); + assert!(!q.is_valid()); + q.set_ready(true); + + // shouldn't be allowed to set a size > max_size + q.set_size(q.max_size() << 1); + assert_eq!(q.state.size, q.max_size()); + + // or set the size to 0 + q.set_size(0); + assert_eq!(q.state.size, q.max_size()); + + // or set a size which is not a power of 2 + q.set_size(11); + assert_eq!(q.state.size, q.max_size()); + + // but should be allowed to set a size if 0 < size <= max_size and size is a power of two + q.set_size(4); + assert_eq!(q.state.size, 4); + q.state.size = q.max_size(); + + // shouldn't be allowed to set an address that breaks the alignment constraint + q.set_desc_table_address(Some(0xf), None); + assert_eq!(q.state.desc_table.0, vq.desc_table_addr().0); + // should be allowed to set an aligned out of bounds address + q.set_desc_table_address(Some(0xffff_fff0), None); + assert_eq!(q.state.desc_table.0, 0xffff_fff0); + // but shouldn't be valid + assert!(!q.is_valid()); + // but should be allowed to set a valid description table address + q.set_desc_table_address(Some(0x10), None); + assert_eq!(q.state.desc_table.0, 0x10); + assert!(q.is_valid()); + let addr = vq.desc_table_addr().0; + q.set_desc_table_address(Some(addr as u32), Some((addr >> 32) as u32)); + + // shouldn't be allowed to set an address that breaks the alignment constraint + q.set_avail_ring_address(Some(0x1), None); + assert_eq!(q.state.avail_ring.0, vq.avail_addr().0); + // should be allowed to set an aligned out of bounds address + q.set_avail_ring_address(Some(0xffff_fffe), None); + assert_eq!(q.state.avail_ring.0, 0xffff_fffe); + // but shouldn't be valid + assert!(!q.is_valid()); + // but should be allowed to set a valid available ring address + q.set_avail_ring_address(Some(0x2), None); + assert_eq!(q.state.avail_ring.0, 0x2); + assert!(q.is_valid()); + let addr = vq.avail_addr().0; + q.set_avail_ring_address(Some(addr as u32), Some((addr >> 32) as u32)); + + // shouldn't be allowed to set an address that breaks the alignment constraint + q.set_used_ring_address(Some(0x3), None); + assert_eq!(q.state.used_ring.0, vq.used_addr().0); + // should be allowed to set an aligned out of bounds address + q.set_used_ring_address(Some(0xffff_fffc), None); + assert_eq!(q.state.used_ring.0, 0xffff_fffc); + // but shouldn't be valid + assert!(!q.is_valid()); + // but should be allowed to set a valid used ring address + q.set_used_ring_address(Some(0x4), None); + assert_eq!(q.state.used_ring.0, 0x4); + let addr = vq.used_addr().0; + q.set_used_ring_address(Some(addr as u32), Some((addr >> 32) as u32)); + assert!(q.is_valid()); + } + + #[test] + fn test_add_used() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + let mut q = vq.create_queue(m); + + assert_eq!(u16::from_le(vq.used().idx().load()), 0); + + // index too large + assert!(q.add_used(16, 0x1000).is_err()); + assert_eq!(u16::from_le(vq.used().idx().load()), 0); + + // should be ok + q.add_used(1, 0x1000).unwrap(); + assert_eq!(q.state.next_used, Wrapping(1)); + assert_eq!(u16::from_le(vq.used().idx().load()), 1); + + let x = vq.used().ring().ref_at(0).load(); + assert_eq!(x.id(), 1); + assert_eq!(x.len(), 0x1000); + } + + #[test] + fn test_reset_queue() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + let mut q = vq.create_queue(m); + + q.set_size(8); + // The address set by `MockSplitQueue` for the descriptor table is DEFAULT_DESC_TABLE_ADDR, + // so let's change it for testing the reset. + q.set_desc_table_address(Some(0x5000), None); + // Same for `event_idx_enabled`, `next_avail` `next_used` and `signalled_used`. + q.set_event_idx(true); + q.set_next_avail(2); + q.add_used(1, 200).unwrap(); + q.state.signalled_used = Some(Wrapping(15)); + assert_eq!(q.state.size, 8); + // `create_queue` also marks the queue as ready. + assert!(q.state.ready); + assert_ne!(q.state.desc_table, GuestAddress(DEFAULT_DESC_TABLE_ADDR)); + assert_ne!(q.state.avail_ring, GuestAddress(DEFAULT_AVAIL_RING_ADDR)); + assert_ne!(q.state.used_ring, GuestAddress(DEFAULT_USED_RING_ADDR)); + assert_ne!(q.state.next_avail, Wrapping(0)); + assert_ne!(q.state.next_used, Wrapping(0)); + assert_ne!(q.state.signalled_used, None); + assert!(q.state.event_idx_enabled); + + q.reset(); + assert_eq!(q.state.size, 16); + assert!(!q.state.ready); + assert_eq!(q.state.desc_table, GuestAddress(DEFAULT_DESC_TABLE_ADDR)); + assert_eq!(q.state.avail_ring, GuestAddress(DEFAULT_AVAIL_RING_ADDR)); + assert_eq!(q.state.used_ring, GuestAddress(DEFAULT_USED_RING_ADDR)); + assert_eq!(q.state.next_avail, Wrapping(0)); + assert_eq!(q.state.next_used, Wrapping(0)); + assert_eq!(q.state.signalled_used, None); + assert!(!q.state.event_idx_enabled); + } + + #[test] + fn test_needs_notification() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let qsize = 16; + let vq = MockSplitQueue::new(m, qsize); + let mut q = vq.create_queue(m); + let avail_addr = vq.avail_addr(); + + // It should always return true when EVENT_IDX isn't enabled. + for i in 0..qsize { + q.state.next_used = Wrapping(i); + assert!(q.needs_notification().unwrap()); + } + + m.write_obj::( + u16::to_le(4), + avail_addr.unchecked_add(4 + qsize as u64 * 2), + ) + .unwrap(); + q.state.set_event_idx(true); + + // Incrementing up to this value causes an `u16` to wrap back to 0. + let wrap = u32::from(u16::MAX) + 1; + + for i in 0..wrap + 12 { + q.state.next_used = Wrapping(i as u16); + // Let's test wrapping around the maximum index value as well. + let expected = i == 5 || i == (5 + wrap) || q.state.signalled_used.is_none(); + assert_eq!(q.needs_notification().unwrap(), expected); + } + + m.write_obj::(8, avail_addr.unchecked_add(4 + qsize as u64 * 2)) + .unwrap(); + + // Returns `false` because `signalled_used` already passed this value. + assert!(!q.needs_notification().unwrap()); + + m.write_obj::(15, avail_addr.unchecked_add(4 + qsize as u64 * 2)) + .unwrap(); + + assert!(!q.needs_notification().unwrap()); + q.state.next_used = Wrapping(15); + assert!(!q.needs_notification().unwrap()); + q.state.next_used = Wrapping(0); + assert!(q.needs_notification().unwrap()); + assert!(!q.needs_notification().unwrap()); + + m.write_obj::(u16::MAX - 3, avail_addr.unchecked_add(4 + qsize as u64 * 2)) + .unwrap(); + q.state.next_used = Wrapping(u16::MAX - 2); + // Returns `true` because the value we wrote in the `used_event` < the next used value and + // the last `signalled_used` is 0. + assert!(q.needs_notification().unwrap()); + } + + #[test] + fn test_enable_disable_notification() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + let mut q = vq.create_queue(m); + let used_addr = vq.used_addr(); + + assert!(!q.state.event_idx_enabled); + + q.enable_notification().unwrap(); + let v = m.read_obj::(used_addr).map(u16::from_le).unwrap(); + assert_eq!(v, 0); + + q.disable_notification().unwrap(); + let v = m.read_obj::(used_addr).map(u16::from_le).unwrap(); + assert_eq!(v, VIRTQ_USED_F_NO_NOTIFY); + + q.enable_notification().unwrap(); + let v = m.read_obj::(used_addr).map(u16::from_le).unwrap(); + assert_eq!(v, 0); + + q.set_event_idx(true); + let avail_addr = vq.avail_addr(); + m.write_obj::(u16::to_le(2), avail_addr.unchecked_add(2)) + .unwrap(); + + assert!(q.enable_notification().unwrap()); + q.state.next_avail = Wrapping(2); + assert!(!q.enable_notification().unwrap()); + + m.write_obj::(u16::to_le(8), avail_addr.unchecked_add(2)) + .unwrap(); + + assert!(q.enable_notification().unwrap()); + q.state.next_avail = Wrapping(8); + assert!(!q.enable_notification().unwrap()); + } + + #[test] + fn test_consume_chains_with_notif() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + let mut q = vq.create_queue(m); + + // q is currently valid. + assert!(q.is_valid()); + + // The chains are (0, 1), (2, 3, 4), (5, 6), (7, 8), (9, 10, 11, 12). + for i in 0..13 { + let flags = match i { + 1 | 4 | 6 | 8 | 12 => 0, + _ => VIRTQ_DESC_F_NEXT, + }; + + let desc = Descriptor::new((0x1000 * (i + 1)) as u64, 0x1000, flags, i + 1); + vq.desc_table().store(i, desc); + } + + vq.avail().ring().ref_at(0).store(u16::to_le(0)); + vq.avail().ring().ref_at(1).store(u16::to_le(2)); + vq.avail().ring().ref_at(2).store(u16::to_le(5)); + vq.avail().ring().ref_at(3).store(u16::to_le(7)); + vq.avail().ring().ref_at(4).store(u16::to_le(9)); + // Let the device know it can consume chains with the index < 2. + vq.avail().idx().store(u16::to_le(2)); + // No descriptor chains are consumed at this point. + assert_eq!(q.next_avail(), 0); + + let mut i = 0; + + loop { + i += 1; + q.disable_notification().unwrap(); + + while let Some(_chain) = q.iter().unwrap().next() { + // Here the device would consume entries from the available ring, add an entry in + // the used ring and optionally notify the driver. For the purpose of this test, we + // don't need to do anything with the chain, only consume it. + } + if !q.enable_notification().unwrap() { + break; + } + } + // The chains should be consumed in a single loop iteration because there's nothing updating + // the `idx` field of the available ring in the meantime. + assert_eq!(i, 1); + // The next chain that can be consumed should have index 2. + assert_eq!(q.next_avail(), 2); + // Let the device know it can consume one more chain. + vq.avail().idx().store(u16::to_le(3)); + i = 0; + + loop { + i += 1; + q.disable_notification().unwrap(); + + while let Some(_chain) = q.iter().unwrap().next() { + // In a real use case, we would do something with the chain here. + } + + // For the simplicity of the test we are updating here the `idx` value of the available + // ring. Ideally this should be done on a separate thread. + // Because of this update, the loop should be iterated again to consume the new + // available descriptor chains. + vq.avail().idx().store(u16::to_le(4)); + if !q.enable_notification().unwrap() { + break; + } + } + assert_eq!(i, 2); + // The next chain that can be consumed should have index 4. + assert_eq!(q.next_avail(), 4); + + // Set an `idx` that is bigger than the number of entries added in the ring. + // This is an allowed scenario, but the indexes of the chain will have unexpected values. + vq.avail().idx().store(u16::to_le(7)); + loop { + q.disable_notification().unwrap(); + + while let Some(_chain) = q.iter().unwrap().next() { + // In a real use case, we would do something with the chain here. + } + if !q.enable_notification().unwrap() { + break; + } + } + assert_eq!(q.next_avail(), 7); + } + + #[test] + fn test_invalid_avail_idx() { + // This is a negative test for the following MUST from the spec: `A driver MUST NOT + // decrement the available idx on a virtqueue (ie. there is no way to “unexpose” buffers).`. + // We validate that for this misconfiguration, the device does not panic. + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + let mut q = vq.create_queue(m); + + // q is currently valid. + assert!(q.is_valid()); + + // The chains are (0, 1), (2, 3, 4), (5, 6). + for i in 0..7 { + let flags = match i { + 1 | 4 | 6 => 0, + _ => VIRTQ_DESC_F_NEXT, + }; + + let desc = Descriptor::new((0x1000 * (i + 1)) as u64, 0x1000, flags, i + 1); + vq.desc_table().store(i, desc); + } + + vq.avail().ring().ref_at(0).store(u16::to_le(0)); + vq.avail().ring().ref_at(1).store(u16::to_le(2)); + vq.avail().ring().ref_at(2).store(u16::to_le(5)); + // Let the device know it can consume chains with the index < 2. + vq.avail().idx().store(u16::to_le(3)); + // No descriptor chains are consumed at this point. + assert_eq!(q.next_avail(), 0); + + loop { + q.disable_notification().unwrap(); + + while let Some(_chain) = q.iter().unwrap().next() { + // Here the device would consume entries from the available ring, add an entry in + // the used ring and optionally notify the driver. For the purpose of this test, we + // don't need to do anything with the chain, only consume it. + } + if !q.enable_notification().unwrap() { + break; + } + } + // The next chain that can be consumed should have index 3. + assert_eq!(q.next_avail(), 3); + assert_eq!(q.avail_idx(Ordering::Acquire).unwrap(), Wrapping(3)); + assert!(q.lock().ready()); + + // Decrement `idx` which should be forbidden. We don't enforce this thing, but we should + // test that we don't panic in case the driver decrements it. + vq.avail().idx().store(u16::to_le(1)); + + loop { + q.disable_notification().unwrap(); + + while let Some(_chain) = q.iter().unwrap().next() { + // In a real use case, we would do something with the chain here. + } + + if !q.enable_notification().unwrap() { + break; + } + } + } +} diff --git a/virtio-queue/src/queue_guard.rs b/virtio-queue/src/queue_guard.rs new file mode 100644 index 000000000..b9fa284cc --- /dev/null +++ b/virtio-queue/src/queue_guard.rs @@ -0,0 +1,260 @@ +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::num::Wrapping; +use std::ops::{Deref, DerefMut}; +use std::sync::atomic::Ordering; + +use vm_memory::GuestMemory; + +use crate::{AvailIter, Error, QueueState, QueueStateT}; + +/// A guard object to exclusively access an `Queue` object. +/// +/// The guard object holds an exclusive lock to the underlying `QueueState` object, with an +/// associated guest memory object. It helps to guarantee that the whole session is served +/// with the same guest memory object. +/// +/// # Example +/// +/// ```rust +/// use virtio_queue::{Queue, QueueState}; +/// use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap}; +/// +/// let m = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); +/// let mut queue = Queue::<&GuestMemoryMmap, QueueState>::new(&m, 1024); +/// let mut queue_guard = queue.lock_with_memory(); +/// +/// // First, the driver sets up the queue; this set up is done via writes on the bus (PCI, MMIO). +/// queue_guard.set_size(8); +/// queue_guard.set_desc_table_address(Some(0x1000), None); +/// queue_guard.set_avail_ring_address(Some(0x2000), None); +/// queue_guard.set_used_ring_address(Some(0x3000), None); +/// queue_guard.set_event_idx(true); +/// queue_guard.set_ready(true); +/// // The user should check if the queue is valid before starting to use it. +/// assert!(queue_guard.is_valid()); +/// +/// // Here the driver would add entries in the available ring and then update the `idx` field of +/// // the available ring (address = 0x2000 + 2). +/// m.write_obj(3, GuestAddress(0x2002)); +/// +/// loop { +/// queue_guard.disable_notification().unwrap(); +/// +/// // Consume entries from the available ring. +/// while let Some(chain) = queue_guard.iter().unwrap().next() { +/// // Process the descriptor chain, and then add an entry in the used ring and optionally +/// // notify the driver. +/// queue_guard.add_used(chain.head_index(), 0x100).unwrap(); +/// +/// if queue_guard.needs_notification().unwrap() { +/// // Here we would notify the driver it has new entries in the used ring to consume. +/// } +/// } +/// if !queue_guard.enable_notification().unwrap() { +/// break; +/// } +/// } +/// ``` +pub struct QueueGuard { + state: S, + mem: M, +} + +impl QueueGuard +where + M: Deref + Clone, + M::Target: GuestMemory + Sized, + S: DerefMut, +{ + /// Create a new instance of `QueueGuard`. + pub fn new(state: S, mem: M) -> Self { + QueueGuard { state, mem } + } + + /// Check whether the queue configuration is valid. + pub fn is_valid(&self) -> bool { + self.state.is_valid(self.mem.deref()) + } + + /// Reset the queue to the initial state. + pub fn reset(&mut self) { + self.state.reset() + } + + /// Get the maximum size of the virtio queue. + pub fn max_size(&self) -> u16 { + self.state.max_size() + } + + /// Configure the queue size for the virtio queue. + pub fn set_size(&mut self, size: u16) { + self.state.set_size(size); + } + + /// Check whether the queue is ready to be processed. + pub fn ready(&self) -> bool { + self.state.ready() + } + + /// Configure the queue to `ready for processing` state. + pub fn set_ready(&mut self, ready: bool) { + self.state.set_ready(ready) + } + + /// Set the descriptor table address for the queue. + /// + /// The descriptor table address is 64-bit, the corresponding part will be updated if 'low' + /// and/or `high` is `Some` and valid. + pub fn set_desc_table_address(&mut self, low: Option, high: Option) { + self.state.set_desc_table_address(low, high); + } + + /// Set the available ring address for the queue. + /// + /// The available ring address is 64-bit, the corresponding part will be updated if 'low' + /// and/or `high` is `Some` and valid. + pub fn set_avail_ring_address(&mut self, low: Option, high: Option) { + self.state.set_avail_ring_address(low, high); + } + + /// Set the used ring address for the queue. + /// + /// The used ring address is 64-bit, the corresponding part will be updated if 'low' + /// and/or `high` is `Some` and valid. + pub fn set_used_ring_address(&mut self, low: Option, high: Option) { + self.state.set_used_ring_address(low, high); + } + + /// Enable/disable the VIRTIO_F_RING_EVENT_IDX feature for interrupt coalescing. + pub fn set_event_idx(&mut self, enabled: bool) { + self.state.set_event_idx(enabled) + } + + /// Read the `idx` field from the available ring. + pub fn avail_idx(&self, order: Ordering) -> Result, Error> { + self.state.avail_idx(self.mem.deref(), order) + } + + /// Put a used descriptor head into the used ring. + pub fn add_used(&mut self, head_index: u16, len: u32) -> Result<(), Error> { + self.state.add_used(self.mem.deref(), head_index, len) + } + + /// Enable notification events from the guest driver. + /// + /// Return true if one or more descriptors can be consumed from the available ring after + /// notifications were enabled (and thus it's possible there will be no corresponding + /// notification). + pub fn enable_notification(&mut self) -> Result { + self.state.enable_notification(self.mem.deref()) + } + + /// Disable notification events from the guest driver. + pub fn disable_notification(&mut self) -> Result<(), Error> { + self.state.disable_notification(self.mem.deref()) + } + + /// Check whether a notification to the guest is needed. + /// + /// Please note this method has side effects: once it returns `true`, it considers the + /// driver will actually be notified, remember the associated index in the used ring, and + /// won't return `true` again until the driver updates `used_event` and/or the notification + /// conditions hold once more. + pub fn needs_notification(&mut self) -> Result { + self.state.needs_notification(self.mem.deref()) + } + + /// Return the index of the next entry in the available ring. + pub fn next_avail(&self) -> u16 { + self.state.next_avail() + } + + /// Set the index of the next entry in the available ring. + pub fn set_next_avail(&mut self, next_avail: u16) { + self.state.set_next_avail(next_avail); + } + + /// Get a consuming iterator over all available descriptor chain heads offered by the driver. + pub fn iter(&mut self) -> Result, Error> { + self.state.deref_mut().iter(self.mem.clone()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::defs::VIRTQ_DESC_F_NEXT; + use crate::mock::MockSplitQueue; + use crate::Descriptor; + + use vm_memory::{GuestAddress, GuestMemoryMmap}; + + #[test] + fn test_queue_guard_object() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 0x100); + let mut q = vq.create_queue(m); + let mut g = q.lock_with_memory(); + + // g is currently valid. + assert!(g.is_valid()); + assert!(g.ready()); + assert_eq!(g.max_size(), 0x100); + g.set_size(16); + + // The chains are (0, 1), (2, 3, 4), (5, 6). + for i in 0..7 { + let flags = match i { + 1 | 4 | 6 => 0, + _ => VIRTQ_DESC_F_NEXT, + }; + + let desc = Descriptor::new((0x1000 * (i + 1)) as u64, 0x1000, flags, i + 1); + vq.desc_table().store(i, desc); + } + + vq.avail().ring().ref_at(0).store(0); + vq.avail().ring().ref_at(1).store(2); + vq.avail().ring().ref_at(2).store(5); + // Let the device know it can consume chains with the index < 2. + vq.avail().idx().store(3); + // No descriptor chains are consumed at this point. + assert_eq!(g.next_avail(), 0); + + loop { + g.disable_notification().unwrap(); + + while let Some(_chain) = g.iter().unwrap().next() { + // Here the device would consume entries from the available ring, add an entry in + // the used ring and optionally notify the driver. For the purpose of this test, we + // don't need to do anything with the chain, only consume it. + } + if !g.enable_notification().unwrap() { + break; + } + } + // The next chain that can be consumed should have index 3. + assert_eq!(g.next_avail(), 3); + assert_eq!(g.avail_idx(Ordering::Acquire).unwrap(), Wrapping(3)); + assert!(g.ready()); + + // Decrement `idx` which should be forbidden. We don't enforce this thing, but we should + // test that we don't panic in case the driver decrements it. + vq.avail().idx().store(1); + + loop { + g.disable_notification().unwrap(); + + while let Some(_chain) = g.iter().unwrap().next() { + // In a real use case, we would do something with the chain here. + } + + if !g.enable_notification().unwrap() { + break; + } + } + } +} diff --git a/virtio-queue/src/state.rs b/virtio-queue/src/state.rs new file mode 100644 index 000000000..36979c0e5 --- /dev/null +++ b/virtio-queue/src/state.rs @@ -0,0 +1,472 @@ +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::mem::size_of; +use std::num::Wrapping; +use std::ops::Deref; +use std::sync::atomic::{fence, Ordering}; +use std::sync::Arc; + +use vm_memory::{Address, Bytes, GuestAddress, GuestMemory}; + +use crate::defs::{ + DEFAULT_AVAIL_RING_ADDR, DEFAULT_DESC_TABLE_ADDR, DEFAULT_USED_RING_ADDR, + VIRTQ_AVAIL_ELEMENT_SIZE, VIRTQ_AVAIL_RING_HEADER_SIZE, VIRTQ_AVAIL_RING_META_SIZE, + VIRTQ_MSI_NO_VECTOR, VIRTQ_USED_ELEMENT_SIZE, VIRTQ_USED_F_NO_NOTIFY, + VIRTQ_USED_RING_HEADER_SIZE, VIRTQ_USED_RING_META_SIZE, +}; +use crate::{ + error, AccessPlatform, AvailIter, Descriptor, Error, QueueStateGuard, QueueStateT, + VirtqUsedElem, +}; + +/// Struct to maintain information and manipulate state of a virtio queue. +#[derive(Clone, Debug)] +pub struct QueueState { + /// The maximum size in elements offered by the device. + pub max_size: u16, + + /// Tail position of the available ring. + pub next_avail: Wrapping, + + /// Head position of the used ring. + pub next_used: Wrapping, + + /// VIRTIO_F_RING_EVENT_IDX negotiated. + pub event_idx_enabled: bool, + + /// The last used value when using VIRTIO_F_EVENT_IDX. + pub signalled_used: Option>, + + /// The queue size in elements the driver selected. + pub size: u16, + + /// Indicates if the queue is finished with configuration. + pub ready: bool, + + /// Guest physical address of the descriptor table. + pub desc_table: GuestAddress, + + /// Guest physical address of the available ring. + pub avail_ring: GuestAddress, + + /// Guest physical address of the used ring. + pub used_ring: GuestAddress, + + /// Interrupt vector + pub vector: u16, + + /// Access platform handler + pub access_platform: Option>, +} + +impl QueueState { + /// Get a consuming iterator over all available descriptor chain heads offered by the driver. + /// + /// # Arguments + /// * `mem` - the `GuestMemory` object that can be used to access the queue buffers. + pub fn iter(&mut self, mem: M) -> Result, Error> + where + M: Deref, + M::Target: GuestMemory + Sized, + { + self.avail_idx(mem.deref(), Ordering::Acquire) + .map(move |idx| AvailIter::new(mem, idx, self)) + } + + // Helper method that writes `val` to the `avail_event` field of the used ring, using + // the provided ordering. + fn set_avail_event( + &self, + mem: &M, + val: u16, + order: Ordering, + ) -> Result<(), Error> { + // This can not overflow an u64 since it is working with relatively small numbers compared + // to u64::MAX. + let avail_event_offset = + VIRTQ_USED_RING_HEADER_SIZE + VIRTQ_USED_ELEMENT_SIZE * u64::from(self.size); + let addr = self + .used_ring + .checked_add(avail_event_offset) + .ok_or(Error::AddressOverflow)?; + + mem.store(u16::to_le(val), addr, order) + .map_err(Error::GuestMemory) + } + + // Set the value of the `flags` field of the used ring, applying the specified ordering. + fn set_used_flags( + &mut self, + mem: &M, + val: u16, + order: Ordering, + ) -> Result<(), Error> { + mem.store(u16::to_le(val), self.used_ring, order) + .map_err(Error::GuestMemory) + } + + // Write the appropriate values to enable or disable notifications from the driver. + // + // Every access in this method uses `Relaxed` ordering because a fence is added by the caller + // when appropriate. + fn set_notification(&mut self, mem: &M, enable: bool) -> Result<(), Error> { + if enable { + if self.event_idx_enabled { + // We call `set_avail_event` using the `next_avail` value, instead of reading + // and using the current `avail_idx` to avoid missing notifications. More + // details in `enable_notification`. + self.set_avail_event(mem, self.next_avail.0, Ordering::Relaxed) + } else { + self.set_used_flags(mem, 0, Ordering::Relaxed) + } + } else if !self.event_idx_enabled { + self.set_used_flags(mem, VIRTQ_USED_F_NO_NOTIFY, Ordering::Relaxed) + } else { + // Notifications are effectively disabled by default after triggering once when + // `VIRTIO_F_EVENT_IDX` is negotiated, so we don't do anything in that case. + Ok(()) + } + } + + // Return the value present in the used_event field of the avail ring. + // + // If the VIRTIO_F_EVENT_IDX feature bit is not negotiated, the flags field in the available + // ring offers a crude mechanism for the driver to inform the device that it doesn’t want + // interrupts when buffers are used. Otherwise virtq_avail.used_event is a more performant + // alternative where the driver specifies how far the device can progress before interrupting. + // + // Neither of these interrupt suppression methods are reliable, as they are not synchronized + // with the device, but they serve as useful optimizations. So we only ensure access to the + // virtq_avail.used_event is atomic, but do not need to synchronize with other memory accesses. + fn used_event(&self, mem: &M, order: Ordering) -> Result, Error> { + // This can not overflow an u64 since it is working with relatively small numbers compared + // to u64::MAX. + let used_event_offset = + VIRTQ_AVAIL_RING_HEADER_SIZE + u64::from(self.size) * VIRTQ_AVAIL_ELEMENT_SIZE; + let used_event_addr = self + .avail_ring + .checked_add(used_event_offset) + .ok_or(Error::AddressOverflow)?; + + mem.load(used_event_addr, order) + .map(u16::from_le) + .map(Wrapping) + .map_err(Error::GuestMemory) + } + + /// Set the queue to "ready", and update desc_table, avail_ring and + /// used_ring addresses based on the AccessPlatform handler. + pub fn enable(&mut self, set: bool) { + self.ready = set; + + if set { + // Translate address of descriptor table and vrings. + if let Some(access_platform) = &self.access_platform { + self.desc_table = + GuestAddress(access_platform.translate(self.desc_table.0, 0).unwrap()); + self.avail_ring = + GuestAddress(access_platform.translate(self.avail_ring.0, 0).unwrap()); + self.used_ring = + GuestAddress(access_platform.translate(self.used_ring.0, 0).unwrap()); + } + } else { + self.desc_table = GuestAddress(0); + self.avail_ring = GuestAddress(0); + self.used_ring = GuestAddress(0); + } + } +} + +impl<'a> QueueStateGuard<'a> for QueueState { + type G = &'a mut Self; +} + +impl QueueStateT for QueueState { + fn new(max_size: u16) -> Self { + QueueState { + max_size, + size: max_size, + ready: false, + desc_table: GuestAddress(DEFAULT_DESC_TABLE_ADDR), + avail_ring: GuestAddress(DEFAULT_AVAIL_RING_ADDR), + used_ring: GuestAddress(DEFAULT_USED_RING_ADDR), + next_avail: Wrapping(0), + next_used: Wrapping(0), + event_idx_enabled: false, + signalled_used: None, + vector: VIRTQ_MSI_NO_VECTOR, + access_platform: None, + } + } + + fn is_valid(&self, mem: &M) -> bool { + let queue_size = self.size as u64; + let desc_table = self.desc_table; + // The multiplication can not overflow an u64 since we are multiplying an u16 with a + // small number. + let desc_table_size = size_of::() as u64 * queue_size; + let avail_ring = self.avail_ring; + // The operations below can not overflow an u64 since they're working with relatively small + // numbers compared to u64::MAX. + let avail_ring_size = VIRTQ_AVAIL_RING_META_SIZE + VIRTQ_AVAIL_ELEMENT_SIZE * queue_size; + let used_ring = self.used_ring; + let used_ring_size = VIRTQ_USED_RING_META_SIZE + VIRTQ_USED_ELEMENT_SIZE * queue_size; + + if !self.ready { + error!("attempt to use virtio queue that is not marked ready"); + false + } else if desc_table + .checked_add(desc_table_size) + .map_or(true, |v| !mem.address_in_range(v)) + { + error!( + "virtio queue descriptor table goes out of bounds: start:0x{:08x} size:0x{:08x}", + desc_table.raw_value(), + desc_table_size + ); + false + } else if avail_ring + .checked_add(avail_ring_size) + .map_or(true, |v| !mem.address_in_range(v)) + { + error!( + "virtio queue available ring goes out of bounds: start:0x{:08x} size:0x{:08x}", + avail_ring.raw_value(), + avail_ring_size + ); + false + } else if used_ring + .checked_add(used_ring_size) + .map_or(true, |v| !mem.address_in_range(v)) + { + error!( + "virtio queue used ring goes out of bounds: start:0x{:08x} size:0x{:08x}", + used_ring.raw_value(), + used_ring_size + ); + false + } else { + true + } + } + + fn reset(&mut self) { + self.ready = false; + self.size = self.max_size; + self.desc_table = GuestAddress(DEFAULT_DESC_TABLE_ADDR); + self.avail_ring = GuestAddress(DEFAULT_AVAIL_RING_ADDR); + self.used_ring = GuestAddress(DEFAULT_USED_RING_ADDR); + self.next_avail = Wrapping(0); + self.next_used = Wrapping(0); + self.signalled_used = None; + self.event_idx_enabled = false; + self.vector = VIRTQ_MSI_NO_VECTOR; + } + + fn lock(&mut self) -> ::G { + self + } + + fn max_size(&self) -> u16 { + self.max_size + } + + fn set_size(&mut self, size: u16) { + if size > self.max_size() || size == 0 || (size & (size - 1)) != 0 { + error!("virtio queue with invalid size: {}", size); + return; + } + self.size = size; + } + + fn ready(&self) -> bool { + self.ready + } + + fn set_ready(&mut self, ready: bool) { + self.ready = ready; + } + + fn set_desc_table_address(&mut self, low: Option, high: Option) { + let low = low.unwrap_or(self.desc_table.0 as u32) as u64; + let high = high.unwrap_or((self.desc_table.0 >> 32) as u32) as u64; + + let desc_table = GuestAddress((high << 32) | low); + if desc_table.mask(0xf) != 0 { + error!("virtio queue descriptor table breaks alignment constraints"); + return; + } + self.desc_table = desc_table; + } + + fn set_avail_ring_address(&mut self, low: Option, high: Option) { + let low = low.unwrap_or(self.avail_ring.0 as u32) as u64; + let high = high.unwrap_or((self.avail_ring.0 >> 32) as u32) as u64; + + let avail_ring = GuestAddress((high << 32) | low); + if avail_ring.mask(0x1) != 0 { + error!("virtio queue available ring breaks alignment constraints"); + return; + } + self.avail_ring = avail_ring; + } + + fn set_used_ring_address(&mut self, low: Option, high: Option) { + let low = low.unwrap_or(self.used_ring.0 as u32) as u64; + let high = high.unwrap_or((self.used_ring.0 >> 32) as u32) as u64; + + let used_ring = GuestAddress((high << 32) | low); + if used_ring.mask(0x3) != 0 { + error!("virtio queue used ring breaks alignment constraints"); + return; + } + self.used_ring = used_ring; + } + + fn set_event_idx(&mut self, enabled: bool) { + self.signalled_used = None; + self.event_idx_enabled = enabled; + } + + fn avail_idx(&self, mem: &M, order: Ordering) -> Result, Error> { + let addr = self + .avail_ring + .checked_add(2) + .ok_or(Error::AddressOverflow)?; + + mem.load(addr, order) + .map(u16::from_le) + .map(Wrapping) + .map_err(Error::GuestMemory) + } + + fn used_idx(&self, mem: &M, order: Ordering) -> Result, Error> { + let addr = self.used_ring.unchecked_add(2); + + mem.load(addr, order) + .map(Wrapping) + .map_err(Error::GuestMemory) + } + + fn add_used( + &mut self, + mem: &M, + head_index: u16, + len: u32, + ) -> Result<(), Error> { + if head_index >= self.size { + error!( + "attempted to add out of bounds descriptor to used ring: {}", + head_index + ); + return Err(Error::InvalidDescriptorIndex); + } + + let next_used_index = u64::from(self.next_used.0 % self.size); + // This can not overflow an u64 since it is working with relatively small numbers compared + // to u64::MAX. + let offset = VIRTQ_USED_RING_HEADER_SIZE + next_used_index * VIRTQ_USED_ELEMENT_SIZE; + let addr = self + .used_ring + .checked_add(offset) + .ok_or(Error::AddressOverflow)?; + mem.write_obj(VirtqUsedElem::new(head_index.into(), len), addr) + .map_err(Error::GuestMemory)?; + + self.next_used += Wrapping(1); + + mem.store( + u16::to_le(self.next_used.0), + self.used_ring + .checked_add(2) + .ok_or(Error::AddressOverflow)?, + Ordering::Release, + ) + .map_err(Error::GuestMemory) + } + + // TODO: Turn this into a doc comment/example. + // With the current implementation, a common way of consuming entries from the available ring + // while also leveraging notification suppression is to use a loop, for example: + // + // loop { + // // We have to explicitly disable notifications if `VIRTIO_F_EVENT_IDX` has not been + // // negotiated. + // self.disable_notification()?; + // + // for chain in self.iter()? { + // // Do something with each chain ... + // // Let's assume we process all available chains here. + // } + // + // // If `enable_notification` returns `true`, the driver has added more entries to the + // // available ring. + // if !self.enable_notification()? { + // break; + // } + // } + fn enable_notification(&mut self, mem: &M) -> Result { + self.set_notification(mem, true)?; + // Ensures the following read is not reordered before any previous write operation. + fence(Ordering::SeqCst); + + // We double check here to avoid the situation where the available ring has been updated + // just before we re-enabled notifications, and it's possible to miss one. We compare the + // current `avail_idx` value to `self.next_avail` because it's where we stopped processing + // entries. There are situations where we intentionally avoid processing everything in the + // available ring (which will cause this method to return `true`), but in that case we'll + // probably not re-enable notifications as we already know there are pending entries. + self.avail_idx(mem, Ordering::Relaxed) + .map(|idx| idx != self.next_avail) + } + + fn disable_notification(&mut self, mem: &M) -> Result<(), Error> { + self.set_notification(mem, false) + } + + fn needs_notification(&mut self, mem: &M) -> Result { + let used_idx = self.next_used; + + // Complete all the writes in add_used() before reading the event. + fence(Ordering::SeqCst); + + // The VRING_AVAIL_F_NO_INTERRUPT flag isn't supported yet. + if self.event_idx_enabled { + if let Some(old_idx) = self.signalled_used.replace(used_idx) { + let used_event = self.used_event(mem, Ordering::Relaxed)?; + // This check looks at `used_idx`, `used_event`, and `old_idx` as if they are on + // an axis that wraps around. If `used_idx - used_used - Wrapping(1)` is greater + // than or equal to the difference between `used_idx` and `old_idx`, then + // `old_idx` is closer to `used_idx` than `used_event` (and thus more recent), so + // we don't need to elicit another notification. + if (used_idx - used_event - Wrapping(1u16)) >= (used_idx - old_idx) { + return Ok(false); + } + } + } + + Ok(true) + } + + fn next_avail(&self) -> u16 { + self.next_avail.0 + } + + fn next_used(&self) -> u16 { + self.next_used.0 + } + + fn set_next_avail(&mut self, next_avail: u16) { + self.next_avail = Wrapping(next_avail); + } + + fn set_next_used(&mut self, next_used: u16) { + self.next_used = Wrapping(next_used); + } +} diff --git a/virtio-queue/src/state_sync.rs b/virtio-queue/src/state_sync.rs new file mode 100644 index 000000000..fcb304126 --- /dev/null +++ b/virtio-queue/src/state_sync.rs @@ -0,0 +1,333 @@ +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::num::Wrapping; +use std::sync::atomic::Ordering; +use std::sync::{Arc, Mutex, MutexGuard}; + +use vm_memory::GuestMemory; + +use crate::{Error, QueueState, QueueStateGuard, QueueStateT}; + +/// Struct to maintain information and manipulate state of a virtio queue for multi-threaded +/// context. +/// +/// # Example +/// +/// ```rust +/// use virtio_queue::{Queue, QueueState, QueueStateSync, QueueStateT}; +/// use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap}; +/// +/// let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); +/// let mut queue = QueueStateSync::new(1024); +/// +/// // First, the driver sets up the queue; this set up is done via writes on the bus (PCI, MMIO). +/// queue.set_size(8); +/// queue.set_desc_table_address(Some(0x1000), None); +/// queue.set_avail_ring_address(Some(0x2000), None); +/// queue.set_used_ring_address(Some(0x3000), None); +/// queue.set_ready(true); +/// // The user should check if the queue is valid before starting to use it. +/// assert!(queue.is_valid(m.memory())); +/// +/// // The memory object is not embedded in the `QueueStateSync`, so we have to pass it as a +/// // parameter to the methods that access the guest memory. Examples would be: +/// queue.add_used(m.memory(), 1, 0x100).unwrap(); +/// queue.needs_notification(m.memory()).unwrap(); +/// ``` +#[derive(Clone, Debug)] +pub struct QueueStateSync { + state: Arc>, +} + +impl QueueStateSync { + fn lock_state(&self) -> MutexGuard { + // Do not expect poisoned lock. + self.state.lock().unwrap() + } +} + +impl<'a> QueueStateGuard<'a> for QueueStateSync { + type G = MutexGuard<'a, QueueState>; +} + +impl QueueStateT for QueueStateSync { + fn new(max_size: u16) -> Self { + QueueStateSync { + state: Arc::new(Mutex::new(QueueState::new(max_size))), + } + } + + fn is_valid(&self, mem: &M) -> bool { + self.lock_state().is_valid(mem) + } + + fn reset(&mut self) { + self.lock_state().reset(); + } + + fn lock(&mut self) -> ::G { + self.lock_state() + } + + fn max_size(&self) -> u16 { + self.lock_state().max_size() + } + + fn set_size(&mut self, size: u16) { + self.lock_state().set_size(size); + } + + fn ready(&self) -> bool { + self.lock_state().ready + } + + fn set_ready(&mut self, ready: bool) { + self.lock_state().set_ready(ready) + } + + fn set_desc_table_address(&mut self, low: Option, high: Option) { + self.lock_state().set_desc_table_address(low, high); + } + + fn set_avail_ring_address(&mut self, low: Option, high: Option) { + self.lock_state().set_avail_ring_address(low, high); + } + + fn set_used_ring_address(&mut self, low: Option, high: Option) { + self.lock_state().set_used_ring_address(low, high); + } + + fn set_event_idx(&mut self, enabled: bool) { + self.lock_state().set_event_idx(enabled); + } + + fn avail_idx(&self, mem: &M, order: Ordering) -> Result, Error> { + self.lock_state().avail_idx(mem, order) + } + + fn used_idx(&self, mem: &M, order: Ordering) -> Result, Error> { + self.lock_state().used_idx(mem, order) + } + + fn add_used( + &mut self, + mem: &M, + head_index: u16, + len: u32, + ) -> Result<(), Error> { + self.lock_state().add_used(mem, head_index, len) + } + + fn enable_notification(&mut self, mem: &M) -> Result { + self.lock_state().enable_notification(mem) + } + + fn disable_notification(&mut self, mem: &M) -> Result<(), Error> { + self.lock_state().disable_notification(mem) + } + + fn needs_notification(&mut self, mem: &M) -> Result { + self.lock_state().needs_notification(mem) + } + + fn next_avail(&self) -> u16 { + self.lock_state().next_avail() + } + + fn next_used(&self) -> u16 { + self.lock_state().next_used() + } + + fn set_next_avail(&mut self, next_avail: u16) { + self.lock_state().set_next_avail(next_avail); + } + + fn set_next_used(&mut self, next_used: u16) { + self.lock_state().set_next_used(next_used); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::defs::{ + DEFAULT_AVAIL_RING_ADDR, DEFAULT_DESC_TABLE_ADDR, DEFAULT_USED_RING_ADDR, + VIRTQ_USED_F_NO_NOTIFY, + }; + use std::sync::Barrier; + use vm_memory::{Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap}; + + #[test] + fn test_queue_state_sync() { + let mut q = QueueStateSync::new(0x1000); + let mut q2 = q.clone(); + let q3 = q.clone(); + let barrier = Arc::new(Barrier::new(3)); + let b2 = barrier.clone(); + let b3 = barrier.clone(); + + let t1 = std::thread::spawn(move || { + { + let guard = q2.lock(); + assert!(!guard.ready()); + } + b2.wait(); + b2.wait(); + { + let guard = q2.lock(); + assert!(guard.ready()); + } + }); + + let t2 = std::thread::spawn(move || { + assert!(!q3.ready()); + b3.wait(); + b3.wait(); + assert!(q3.ready()); + }); + + barrier.wait(); + q.set_ready(true); + barrier.wait(); + + t1.join().unwrap(); + t2.join().unwrap(); + } + + #[test] + fn test_state_sync_add_used() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let mut q = QueueStateSync::new(0x100); + + q.set_desc_table_address(Some(0x1000), None); + q.set_avail_ring_address(Some(0x2000), None); + q.set_used_ring_address(Some(0x3000), None); + q.set_event_idx(true); + q.set_ready(true); + assert!(q.is_valid(m.memory())); + assert_eq!(q.lock().size, 0x100); + + assert_eq!(q.max_size(), 0x100); + q.set_size(0x80); + assert_eq!(q.max_size(), 0x100); + q.set_next_avail(5); + assert_eq!(q.next_avail(), 5); + assert_eq!( + q.avail_idx(m.memory(), Ordering::Acquire).unwrap(), + Wrapping(0) + ); + + assert_eq!(q.lock_state().next_used, Wrapping(0)); + + // index too large + assert!(q.add_used(m.memory(), 0x200, 0x1000).is_err()); + assert_eq!(q.lock_state().next_used, Wrapping(0)); + + // should be ok + q.add_used(m.memory(), 1, 0x1000).unwrap(); + assert_eq!(q.lock_state().next_used, Wrapping(1)); + } + + #[test] + fn test_sync_state_reset_queue() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let mut q = QueueStateSync::new(0x100); + + q.set_desc_table_address(Some(0x1000), None); + q.set_avail_ring_address(Some(0x2000), None); + q.set_used_ring_address(Some(0x3000), None); + q.set_event_idx(true); + q.set_next_avail(2); + q.set_size(0x8); + q.set_ready(true); + assert!(q.is_valid(m.memory())); + + q.add_used(m.memory(), 1, 0x100).unwrap(); + q.needs_notification(m.memory()).unwrap(); + + assert_eq!(q.lock_state().size, 0x8); + assert!(q.lock_state().ready); + assert_ne!( + q.lock_state().desc_table, + GuestAddress(DEFAULT_DESC_TABLE_ADDR) + ); + assert_ne!( + q.lock_state().avail_ring, + GuestAddress(DEFAULT_AVAIL_RING_ADDR) + ); + assert_ne!( + q.lock_state().used_ring, + GuestAddress(DEFAULT_USED_RING_ADDR) + ); + assert_ne!(q.lock_state().next_avail, Wrapping(0)); + assert_ne!(q.lock_state().next_used, Wrapping(0)); + assert_ne!(q.lock_state().signalled_used, None); + assert!(q.lock_state().event_idx_enabled); + + q.reset(); + assert_eq!(q.lock_state().size, 0x100); + assert!(!q.lock_state().ready); + assert_eq!( + q.lock_state().desc_table, + GuestAddress(DEFAULT_DESC_TABLE_ADDR) + ); + assert_eq!( + q.lock_state().avail_ring, + GuestAddress(DEFAULT_AVAIL_RING_ADDR) + ); + assert_eq!( + q.lock_state().used_ring, + GuestAddress(DEFAULT_USED_RING_ADDR) + ); + assert_eq!(q.lock_state().next_avail, Wrapping(0)); + assert_eq!(q.lock_state().next_used, Wrapping(0)); + assert_eq!(q.lock_state().signalled_used, None); + assert!(!q.lock_state().event_idx_enabled); + } + + #[test] + fn test_enable_disable_notification() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let mem = m.memory(); + let mut q = QueueStateSync::new(0x100); + + q.set_desc_table_address(Some(0x1000), None); + q.set_avail_ring_address(Some(0x2000), None); + q.set_used_ring_address(Some(0x3000), None); + q.set_ready(true); + assert!(q.is_valid(mem)); + + let used_addr = q.lock_state().used_ring; + + assert!(!q.lock_state().event_idx_enabled); + q.enable_notification(mem).unwrap(); + let v = m.read_obj::(used_addr).map(u16::from_le).unwrap(); + assert_eq!(v, 0); + + q.disable_notification(m.memory()).unwrap(); + let v = m.read_obj::(used_addr).map(u16::from_le).unwrap(); + assert_eq!(v, VIRTQ_USED_F_NO_NOTIFY); + + q.enable_notification(mem).unwrap(); + let v = m.read_obj::(used_addr).map(u16::from_le).unwrap(); + assert_eq!(v, 0); + + q.set_event_idx(true); + let avail_addr = q.lock_state().avail_ring; + m.write_obj::(u16::to_le(2), avail_addr.unchecked_add(2)) + .unwrap(); + + assert!(q.enable_notification(mem).unwrap()); + q.lock_state().next_avail = Wrapping(2); + assert!(!q.enable_notification(mem).unwrap()); + + m.write_obj::(u16::to_le(8), avail_addr.unchecked_add(2)) + .unwrap(); + + assert!(q.enable_notification(mem).unwrap()); + q.lock_state().next_avail = Wrapping(8); + assert!(!q.enable_notification(mem).unwrap()); + } +} diff --git a/vm-virtio/src/queue.rs b/vm-virtio/src/queue.rs index 1d8b29e7e..e037dd5f4 100644 --- a/vm-virtio/src/queue.rs +++ b/vm-virtio/src/queue.rs @@ -227,10 +227,8 @@ pub mod testing { // Creates a new Queue, using the underlying memory regions represented by the VirtQueue. pub fn create_queue(&self) -> Queue> { let mem = GuestMemoryAtomic::new(self.mem.clone()); - let mut q = Queue::< - GuestMemoryAtomic, - QueueState>, - >::new(mem, self.size()); + let mut q = + Queue::, QueueState>::new(mem, self.size()); q.state.size = self.size(); q.state.ready = true;