vmm: cpu: Retry signalling the vCPU thread if it doesn't acknowledge

Resignal every 10ms the thread if it has not acknowledged the signal via setting the atomic when the vCPU thread was acknowledged. Further, avoid an infinite loop by generating an error if it takes more than 1000ms to interrupt the thread. The retry helps mitigate a race condition where the signal is received between checking the pause atomic and entering KVM_RUN ioctl when pausing. Hitting this race condition would leave the wait_untial_signal_acknowledged() method spinning indefinitely. The timeout error avoids the VMM process being blocked indefinitely. See: #7427 Signed-off-by: Rob Bradford <rbradford@rivosinc.com>
2025-11-04 19:41:04 +00:00 · 2025-11-04 19:41:04 +00:00 · e023efce3d
commit e023efce3d
parent bb7730e00f
1 changed files with 26 additions and 10 deletions
--- a/vmm/src/cpu.rs
+++ b/vmm/src/cpu.rs
@ -153,6 +153,9 @@ pub enum Error {
    #[error("Failed to join on vCPU threads: {0:?}")]
    ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>),

+    #[error("Timeout when waiting for signal to be acknowledged")]
+    SignalAcknowledgeTimeout,
+
    #[error("Error adding CpuManager to MMIO bus")]
    BusError(#[source] vm_device::BusError),

@ -707,21 +710,31 @@ impl VcpuState {
        }
    }

-    /// Blocks until the vCPU thread has acknowledged the signal.
+    /// Blocks until the vCPU thread has acknowledged the signal. It retries to send
+    /// the signal every 10ms. Times out after 1000ms.
    ///
    /// This is the counterpart of [`Self::signal_thread`].
-    fn wait_until_signal_acknowledged(&self) {
+    fn wait_until_signal_acknowledged(&self) -> Result<()> {
        if let Some(_handle) = self.handle.as_ref() {
+            let mut count = 0;
            loop {
                if self.vcpu_run_interrupted.load(Ordering::SeqCst) {
-                    break;
+                    return Ok(());
                } else {
                    // This is more effective than thread::yield_now() at
                    // avoiding a priority inversion with the vCPU thread
                    thread::sleep(std::time::Duration::from_millis(1));
+                    count += 1;
+                    if count >= 1000 {
+                        return Err(Error::SignalAcknowledgeTimeout);
+                    } else if count % 10 == 0 {
+                        warn!("vCPU thread did not respond in {count}ms to signal - retrying");
+                        self.signal_thread();
+                    }
                }
            }
        }
+        Ok(())
    }

    fn join_thread(&mut self) -> Result<()> {
@ -1356,7 +1369,7 @@ impl CpuManager {
        let state = &mut self.vcpu_states[usize::try_from(cpu_id).unwrap()];
        state.kill.store(true, Ordering::SeqCst);
        state.signal_thread();
-        state.wait_until_signal_acknowledged();
+        state.wait_until_signal_acknowledged()?;
        state.join_thread()?;
        state.handle = None;

@ -1428,15 +1441,17 @@ impl CpuManager {
    ///
    /// For the vCPU threads this will interrupt the KVM_RUN ioctl() allowing
    /// the loop to check the shared state booleans.
-    fn signal_vcpus(&self) {
+    fn signal_vcpus(&mut self) -> Result<()> {
        // Splitting this into two loops reduced the time to pause many vCPUs
        // massively. Example: 254 vCPUs. >254ms -> ~4ms.
        for state in self.vcpu_states.iter() {
            state.signal_thread();
        }
        for state in self.vcpu_states.iter() {
-            state.wait_until_signal_acknowledged();
+            state.wait_until_signal_acknowledged()?;
        }
+
+        Ok(())
    }

    pub fn shutdown(&mut self) -> Result<()> {
@ -1451,7 +1466,7 @@ impl CpuManager {
            state.unpark_thread();
        }

-        self.signal_vcpus();
+        self.signal_vcpus()?;

        // Wait for all the threads to finish. This removes the state from the vector.
        for mut state in self.vcpu_states.drain(..) {
@ -2065,9 +2080,9 @@ impl CpuManager {
        self.sev_snp_enabled
    }

-    pub(crate) fn nmi(&self) -> Result<()> {
+    pub(crate) fn nmi(&mut self) -> Result<()> {
        self.vcpus_kick_signalled.store(true, Ordering::SeqCst);
-        self.signal_vcpus();
+        self.signal_vcpus()?;
        self.vcpus_kick_signalled.store(false, Ordering::SeqCst);

        Ok(())
@ -2429,7 +2444,8 @@ impl Pausable for CpuManager {
        // Tell the vCPUs to pause themselves next time they exit
        self.vcpus_pause_signalled.store(true, Ordering::SeqCst);

-        self.signal_vcpus();
+        self.signal_vcpus()
+            .map_err(|e| MigratableError::Pause(anyhow!("Error signalling vCPUs: {e}")))?;

        #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
        for vcpu in self.vcpus.iter() {