From 09392f0530bcb8e7e880123de0f809b4e2d776a8 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Tue, 24 Sep 2019 18:53:34 -0700 Subject: [PATCH] vhost_user_backend: Fix remaining issues This commit fixes all the remaining issues that were found as part of the integration with vhost-user-net. It fixes the way to notify that a vring is used, by using the proper EventFd. It removes the process_queue() function from the trait, since the complexity it was introducing was leading to deadlocks with mutexes. It moves the register/unregister functions for registering custom events from the backend, from the VringEpollHandler to the VringWorker. This allows for a lot of simplification and solve a deadlock issue. Signed-off-by: Sebastien Boeuf --- vhost_user_backend/src/lib.rs | 361 +++++++++++++--------------------- 1 file changed, 134 insertions(+), 227 deletions(-) diff --git a/vhost_user_backend/src/lib.rs b/vhost_user_backend/src/lib.rs index 9b82b6699..eae8107d0 100644 --- a/vhost_user_backend/src/lib.rs +++ b/vhost_user_backend/src/lib.rs @@ -22,7 +22,7 @@ use vhost_rs::vhost_user::{ }; use vm_memory::guest_memory::FileOffset; use vm_memory::{GuestAddress, GuestMemoryMmap}; -use vm_virtio::{DescriptorChain, Queue}; +use vm_virtio::Queue; use vmm_sys_util::eventfd::EventFd; #[derive(Debug)] @@ -63,6 +63,9 @@ pub trait VhostUserBackend: Send + Sync + 'static { /// Virtio features. fn features(&self) -> u64; + /// Update guest memory regions. + fn update_memory(&mut self, mem: GuestMemoryMmap) -> result::Result<(), io::Error>; + /// This function gets called if the backend registered some additional /// listeners onto specific file descriptors. The library can handle /// virtqueues on its own, but does not know what to do with events @@ -71,28 +74,20 @@ pub trait VhostUserBackend: Send + Sync + 'static { &mut self, device_event: u16, evset: epoll::Events, + vrings: &Vec>>, ) -> result::Result; - /// This function is responsible for the actual processing that needs to - /// happen when one of the virtqueues is available. - fn process_queue( - &mut self, - q_idx: u16, - avail_desc: &DescriptorChain, - mem: &GuestMemoryMmap, - ) -> result::Result; - /// Get virtio device configuration. /// A default implementation is provided as we cannot expect all backends /// to implement this function. - fn get_config(&self, offset: u32, size: u32) -> Vec { + fn get_config(&self, _offset: u32, _size: u32) -> Vec { Vec::new() } /// Set virtio device configuration. /// A default implementation is provided as we cannot expect all backends /// to implement this function. - fn set_config(&mut self, offset: u32, buf: &[u8]) -> result::Result<(), io::Error> { + fn set_config(&mut self, _offset: u32, _buf: &[u8]) -> result::Result<(), io::Error> { Ok(()) } } @@ -132,7 +127,7 @@ impl VhostUserDaemon { /// disconnects. pub fn start(&mut self) -> Result<()> { let mut slave_listener = - SlaveListener::new(self.sock_path.as_str(), false, self.handler.clone()) + SlaveListener::new(self.sock_path.as_str(), true, self.handler.clone()) .map_err(Error::CreateSlaveListener)?; let mut slave_handler = slave_listener .accept() @@ -161,11 +156,11 @@ impl VhostUserDaemon { Ok(()) } - /// Retrieve the vring handler. This is necessary to perform further + /// Retrieve the vring worker. This is necessary to perform further /// actions like registering and unregistering some extra event file - /// descriptors, as well as forcing some vring to be processed. - pub fn get_vring_handler(&self) -> Arc>> { - self.handler.lock().unwrap().get_vring_handler() + /// descriptors. + pub fn get_vring_worker(&self) -> Arc { + self.handler.lock().unwrap().get_vring_worker() } } @@ -179,7 +174,7 @@ struct Memory { mappings: Vec, } -struct Vring { +pub struct Vring { queue: Queue, kick: Option, call: Option, @@ -197,6 +192,18 @@ impl Vring { enabled: false, } } + + pub fn mut_queue(&mut self) -> &mut Queue { + &mut self.queue + } + + pub fn signal_used_queue(&self) -> result::Result<(), io::Error> { + if let Some(call) = self.call.as_ref() { + return call.write(1); + } + + Ok(()) + } } #[derive(Debug)] @@ -210,143 +217,104 @@ pub enum VringEpollHandlerError { HandleEventReadKick(io::Error), /// Failed to handle the event from the backend. HandleEventBackendHandling(io::Error), - /// Failed to register vring listener. - RegisterVringListener(io::Error), - /// Failed to unregister vring listener. - UnregisterVringListener(io::Error), } -impl std::fmt::Display for VringEpollHandlerError { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - VringEpollHandlerError::ProcessQueueBackendProcessing(e) => { - write!(f, "failed processing queue from backend: {}", e) - } - VringEpollHandlerError::SignalUsedQueue(e) => { - write!(f, "failed signalling used queue: {}", e) - } - VringEpollHandlerError::HandleEventReadKick(e) => { - write!(f, "failed reading from kick eventfd: {}", e) - } - VringEpollHandlerError::HandleEventBackendHandling(e) => { - write!(f, "failed handling event from backend: {}", e) - } - VringEpollHandlerError::RegisterVringListener(e) => { - write!(f, "failed registering vring listener: {}", e) - } - VringEpollHandlerError::UnregisterVringListener(e) => { - write!(f, "failed unregistering vring listener: {}", e) - } - } - } -} - -impl error::Error for VringEpollHandlerError {} - /// Result of vring epoll handler operations. type VringEpollHandlerResult = std::result::Result; -pub struct VringEpollHandler { +struct VringEpollHandler { backend: Arc>, vrings: Vec>>, - mem: Option, - epoll_fd: RawFd, } impl VringEpollHandler { - fn update_memory(&mut self, mem: Option) { - self.mem = mem; - } - - /// Trigger the processing of a virtqueue. This function is meant to be - /// used by the caller whenever it might need some available queues to - /// send data back to the guest. - /// A concrete example is a backend registering one extra listener for - /// data that needs to be sent to the guest. When the associated event - /// is triggered, the backend will be invoked through its `handle_event` - /// implementation. And in this case, the way to handle the event is to - /// call into `process_queue` to let it invoke the backend implementation - /// of `process_queue`. With this twisted trick, all common parts related - /// to the virtqueues can remain part of the library. - pub fn process_queue(&mut self, q_idx: u16) -> VringEpollHandlerResult<()> { - let vring = &mut self.vrings[q_idx as usize].write().unwrap(); - let mut used_desc_heads = vec![(0, 0); vring.queue.size as usize]; - let mut used_count = 0; - if let Some(mem) = &self.mem { - for avail_desc in vring.queue.iter(&mem) { - let used_len = self - .backend - .write() - .unwrap() - .process_queue(q_idx, &avail_desc, &mem) - .map_err(VringEpollHandlerError::ProcessQueueBackendProcessing)?; - - used_desc_heads[used_count] = (avail_desc.index, used_len); - used_count += 1; - } - - for &(desc_index, len) in &used_desc_heads[..used_count] { - vring.queue.add_used(&mem, desc_index, len); - } - } - - if used_count > 0 { - if let Some(call) = &vring.call { - call.write(1) - .map_err(VringEpollHandlerError::SignalUsedQueue)?; - } - } - - Ok(()) - } - fn handle_event( - &mut self, + &self, device_event: u16, evset: epoll::Events, ) -> VringEpollHandlerResult { let num_queues = self.vrings.len(); - match device_event as usize { - x if x < num_queues => { - if let Some(kick) = &self.vrings[device_event as usize].read().unwrap().kick { - kick.read() - .map_err(VringEpollHandlerError::HandleEventReadKick)?; - } - - // If the vring is not enabled, it should not be processed. - // The event is only read to be discarded. - if !self.vrings[device_event as usize].read().unwrap().enabled { - return Ok(false); - } - - self.process_queue(device_event)?; - Ok(false) + if (device_event as usize) < num_queues { + if let Some(kick) = &self.vrings[device_event as usize].read().unwrap().kick { + kick.read() + .map_err(VringEpollHandlerError::HandleEventReadKick)?; } - _ => self - .backend - .write() - .unwrap() - .handle_event(device_event, evset) - .map_err(VringEpollHandlerError::HandleEventBackendHandling), - } - } - fn register_vring_listener(&self, q_idx: usize) -> VringEpollHandlerResult<()> { - if let Some(fd) = &self.vrings[q_idx].read().unwrap().kick { - self.register_listener(fd.as_raw_fd(), epoll::Events::EPOLLIN, q_idx as u64) - .map_err(VringEpollHandlerError::RegisterVringListener) - } else { - Ok(()) + // If the vring is not enabled, it should not be processed. + // The event is only read to be discarded. + if !self.vrings[device_event as usize].read().unwrap().enabled { + return Ok(false); + } } - } - fn unregister_vring_listener(&self, q_idx: usize) -> VringEpollHandlerResult<()> { - if let Some(fd) = &self.vrings[q_idx].read().unwrap().kick { - self.unregister_listener(fd.as_raw_fd(), epoll::Events::EPOLLIN, q_idx as u64) - .map_err(VringEpollHandlerError::UnregisterVringListener) - } else { - Ok(()) + self.backend + .write() + .unwrap() + .handle_event(device_event, evset, &self.vrings) + .map_err(VringEpollHandlerError::HandleEventBackendHandling) + } +} + +#[derive(Debug)] +/// Errors related to vring worker. +enum VringWorkerError { + /// Failed while waiting for events. + EpollWait(io::Error), +} + +/// Result of vring worker operations. +type VringWorkerResult = std::result::Result; + +pub struct VringWorker { + epoll_fd: RawFd, +} + +impl VringWorker { + fn run(&self, handler: VringEpollHandler) -> VringWorkerResult<()> { + const EPOLL_EVENTS_LEN: usize = 100; + let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; + + 'epoll: loop { + let num_events = match epoll::wait(self.epoll_fd, -1, &mut events[..]) { + Ok(res) => res, + Err(e) => { + if e.kind() == io::ErrorKind::Interrupted { + // It's well defined from the epoll_wait() syscall + // documentation that the epoll loop can be interrupted + // before any of the requested events occurred or the + // timeout expired. In both those cases, epoll_wait() + // returns an error of type EINTR, but this should not + // be considered as a regular error. Instead it is more + // appropriate to retry, by calling into epoll_wait(). + continue; + } + return Err(VringWorkerError::EpollWait(e)); + } + }; + + for event in events.iter().take(num_events) { + let evset = match epoll::Events::from_bits(event.events) { + Some(evset) => evset, + None => { + let evbits = event.events; + println!("epoll: ignoring unknown event set: 0x{:x}", evbits); + continue; + } + }; + + let ev_type = event.data as u16; + + if let Err(e) = handler.handle_event(ev_type, evset) { + println!( + "vring handler handle event {} with error {:?}\n", + ev_type, e + ); + break 'epoll; + } + } } + + Ok(()) } /// Register a custom event only meaningful to the caller. When this event @@ -387,71 +355,6 @@ impl VringEpollHandler { } } -#[derive(Debug)] -/// Errors related to vring worker. -enum VringWorkerError { - /// Failed while waiting for events. - EpollWait(io::Error), - /// Failed to handle event. - HandleEvent(VringEpollHandlerError), -} - -/// Result of vring worker operations. -type VringWorkerResult = std::result::Result; - -struct VringWorker { - handler: Arc>>, -} - -impl VringWorker { - fn run(&self, epoll_fd: RawFd) -> VringWorkerResult<()> { - const EPOLL_EVENTS_LEN: usize = 100; - let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; - - 'epoll: loop { - let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { - Ok(res) => res, - Err(e) => { - if e.kind() == io::ErrorKind::Interrupted { - // It's well defined from the epoll_wait() syscall - // documentation that the epoll loop can be interrupted - // before any of the requested events occurred or the - // timeout expired. In both those cases, epoll_wait() - // returns an error of type EINTR, but this should not - // be considered as a regular error. Instead it is more - // appropriate to retry, by calling into epoll_wait(). - continue; - } - return Err(VringWorkerError::EpollWait(e)); - } - }; - - for event in events.iter().take(num_events) { - let evset = match epoll::Events::from_bits(event.events) { - Some(evset) => evset, - None => { - let evbits = event.events; - println!("epoll: ignoring unknown event set: 0x{:x}", evbits); - continue; - } - }; - - let ev_type = event.data as u16; - - if let Err(e) = self.handler.write().unwrap().handle_event(ev_type, evset) { - println!( - "vring handler handle event {} with error {:?}\n", - ev_type, e - ); - break 'epoll; - } - } - } - - Ok(()) - } -} - #[derive(Debug)] /// Errors related to vhost-user handler. pub enum VhostUserHandlerError { @@ -482,7 +385,7 @@ type VhostUserHandlerResult = std::result::Result; struct VhostUserHandler { backend: Arc>, - vring_handler: Arc>>, + worker: Arc, owned: bool, features_acked: bool, acked_features: u64, @@ -498,28 +401,30 @@ impl VhostUserHandler { let num_queues = backend.read().unwrap().num_queues(); let max_queue_size = backend.read().unwrap().max_queue_size(); - let vrings = vec![Arc::new(RwLock::new(Vring::new(max_queue_size as u16))); num_queues]; + let mut vrings: Vec>> = Vec::new(); + for _ in 0..num_queues { + let vring = Arc::new(RwLock::new(Vring::new(max_queue_size as u16))); + vrings.push(vring); + } + // Create the epoll file descriptor let epoll_fd = epoll::create(true).map_err(VhostUserHandlerError::EpollCreateFd)?; - let vring_handler = Arc::new(RwLock::new(VringEpollHandler { + let vring_handler = VringEpollHandler { backend: backend.clone(), vrings: vrings.clone(), - mem: None, - epoll_fd, - })); - let worker = VringWorker { - handler: vring_handler.clone(), }; + let vring_worker = Arc::new(VringWorker { epoll_fd }); + let worker = vring_worker.clone(); thread::Builder::new() .name("vring_worker".to_string()) - .spawn(move || worker.run(epoll_fd)) + .spawn(move || vring_worker.run(vring_handler)) .map_err(VhostUserHandlerError::SpawnVringWorker)?; Ok(VhostUserHandler { backend, - vring_handler, + worker, owned: false, features_acked: false, acked_features: 0, @@ -531,8 +436,8 @@ impl VhostUserHandler { }) } - fn get_vring_handler(&self) -> Arc>> { - self.vring_handler.clone() + fn get_vring_worker(&self) -> Arc { + self.worker.clone() } fn vmm_va_to_gpa(&self, vmm_va: u64) -> VhostUserHandlerResult { @@ -634,7 +539,13 @@ impl VhostUserSlaveReqHandler for VhostUserHandler { let mem = GuestMemoryMmap::with_files(regions).map_err(|e| { VhostUserError::ReqHandlerError(io::Error::new(io::ErrorKind::Other, e)) })?; - self.vring_handler.write().unwrap().update_memory(Some(mem)); + self.backend + .write() + .unwrap() + .update_memory(mem) + .map_err(|e| { + VhostUserError::ReqHandlerError(io::Error::new(io::ErrorKind::Other, e)) + })?; self.memory = Some(Memory { mappings }); Ok(()) @@ -712,13 +623,11 @@ impl VhostUserSlaveReqHandler for VhostUserHandler { // VHOST_USER_SET_VRING_KICK, and stop ring upon receiving // VHOST_USER_GET_VRING_BASE. self.vrings[index as usize].write().unwrap().queue.ready = false; - self.vring_handler - .read() - .unwrap() - .unregister_vring_listener(index as usize) - .map_err(|e| { - VhostUserError::ReqHandlerError(io::Error::new(io::ErrorKind::Other, e)) - })?; + if let Some(fd) = self.vrings[index as usize].read().unwrap().kick.as_ref() { + self.worker + .unregister_listener(fd.as_raw_fd(), epoll::Events::EPOLLIN, index as u64) + .map_err(VhostUserError::ReqHandlerError)?; + } let next_avail = self.vrings[index as usize] .read() @@ -748,13 +657,11 @@ impl VhostUserSlaveReqHandler for VhostUserHandler { // VHOST_USER_SET_VRING_KICK, and stop ring upon receiving // VHOST_USER_GET_VRING_BASE. self.vrings[index as usize].write().unwrap().queue.ready = true; - self.vring_handler - .read() - .unwrap() - .register_vring_listener(index as usize) - .map_err(|e| { - VhostUserError::ReqHandlerError(io::Error::new(io::ErrorKind::Other, e)) - })?; + if let Some(fd) = self.vrings[index as usize].read().unwrap().kick.as_ref() { + self.worker + .register_listener(fd.as_raw_fd(), epoll::Events::EPOLLIN, index as u64) + .map_err(VhostUserError::ReqHandlerError)?; + } Ok(()) }