From de2fcc2d878db138f7c90faedd8b928910d85e87 Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Thu, 9 Nov 2023 11:26:57 -0800 Subject: [PATCH] tests: Stabilize snapshot_restore tests Since the 'write()' to the event file was moved to its own thread (see #5633), we have no reliable way to read the latest contents of the event file from our integration tests, since we can't ensure the 'read()' from our test always happen after 'write()' is completed from Cloud Hypervisor. This is also why we started to see random failures on snapshot_restore tests (particularly when the system workload is high). This patch adds a 1s sleep before reading the event file to mitigate the random failures. Signed-off-by: Bo Chen --- tests/integration.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/integration.rs b/tests/integration.rs index 0563d2c6f..46b9c380c 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -6080,6 +6080,12 @@ mod common_parallel { let r = std::panic::catch_unwind(|| { // Resume the VM assert!(remote_command(&api_socket_restored, "resume", None)); + // There is no way that we can ensure the 'write()' to the + // event file is completed when the 'resume' request is + // returned successfully, because the 'write()' was done + // asynchronously from a different thread of Cloud + // Hypervisor (e.g. the event-monitor thread). + thread::sleep(std::time::Duration::new(1, 0)); let latest_events = [ &MetaEvent { event: "resuming".to_string(),