From 52ce1c9d5352d13ffedf6f2a0a261abe07785c3c Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 4 Aug 2022 12:57:15 +0300 Subject: [PATCH] Speed up test shutdown, by polling more frequently. A fair amount of the time in our python tests is spent waiting for the pageserver and safekeeper processes to shut down. It doesn't matter so much when you're running a lot of tests in parallel, but it's quite noticeable when running them sequentially. A big part of the slowness is that is that after sending the SIGTERM signal, we poll to see if the process is still running, and the polling happened at 1 s interval. Reduce it to 0.1 s. --- control_plane/src/safekeeper.rs | 10 ++++++---- control_plane/src/storage.rs | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/control_plane/src/safekeeper.rs b/control_plane/src/safekeeper.rs index d87be95b82..0cae479d71 100644 --- a/control_plane/src/safekeeper.rs +++ b/control_plane/src/safekeeper.rs @@ -247,7 +247,7 @@ impl SafekeeperNode { // Shutting down may take a long time, // if safekeeper flushes a lot of data let mut tcp_stopped = false; - for _ in 0..100 { + for i in 0..600 { if !tcp_stopped { if let Err(err) = TcpStream::connect(&address) { tcp_stopped = true; @@ -272,9 +272,11 @@ impl SafekeeperNode { } } } - print!("."); - io::stdout().flush().unwrap(); - thread::sleep(Duration::from_secs(1)); + if i % 10 == 0 { + print!("."); + io::stdout().flush().unwrap(); + } + thread::sleep(Duration::from_millis(100)); } bail!("Failed to stop safekeeper with pid {}", pid); diff --git a/control_plane/src/storage.rs b/control_plane/src/storage.rs index 13d64a79f0..c2ed3fc824 100644 --- a/control_plane/src/storage.rs +++ b/control_plane/src/storage.rs @@ -318,7 +318,7 @@ impl PageServerNode { // Shutting down may take a long time, // if pageserver checkpoints a lot of data let mut tcp_stopped = false; - for _ in 0..100 { + for i in 0..600 { if !tcp_stopped { if let Err(err) = TcpStream::connect(&address) { tcp_stopped = true; @@ -344,9 +344,11 @@ impl PageServerNode { } } } - print!("."); - io::stdout().flush().unwrap(); - thread::sleep(Duration::from_secs(1)); + if i % 10 == 0 { + print!("."); + io::stdout().flush().unwrap(); + } + thread::sleep(Duration::from_millis(100)); } bail!("Failed to stop pageserver with pid {}", pid);