From de2fcc2d878db138f7c90faedd8b928910d85e87 Mon Sep 17 00:00:00 2001
From: Bo Chen <chen.bo@intel.com>
Date: Thu, 9 Nov 2023 11:26:57 -0800
Subject: [PATCH] tests: Stabilize snapshot_restore tests

Since the 'write()' to the event file was moved to its own thread
(see #5633), we have no reliable way to read the latest contents of
the event file from our integration tests, since we can't ensure the
'read()' from our test always happen after 'write()' is completed from
Cloud Hypervisor. This is also why we started to see random failures on
snapshot_restore tests (particularly when the system workload is high).

This patch adds a 1s sleep before reading the event file to mitigate the
random failures.

Signed-off-by: Bo Chen <chen.bo@intel.com>
---
 tests/integration.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/integration.rs b/tests/integration.rs
index 0563d2c6f..46b9c380c 100644
--- a/tests/integration.rs
+++ b/tests/integration.rs
@@ -6080,6 +6080,12 @@ mod common_parallel {
         let r = std::panic::catch_unwind(|| {
             // Resume the VM
             assert!(remote_command(&api_socket_restored, "resume", None));
+            // There is no way that we can ensure the 'write()' to the
+            // event file is completed when the 'resume' request is
+            // returned successfully, because the 'write()' was done
+            // asynchronously from a different thread of Cloud
+            // Hypervisor (e.g. the event-monitor thread).
+            thread::sleep(std::time::Duration::new(1, 0));
             let latest_events = [
                 &MetaEvent {
                     event: "resuming".to_string(),