callmemaybe refactoring

- Don't spawn a separate thread for each connection.
Instead use one thread per safekeeper, that iterates over all connections and sends callback requests for them.

-Use tokio postgres to connect to the pageserver, to avoid spawning a new thread for each connection.

callmemaybe review fixes:
- Spawn all request_callback tasks separately.
- Remember 'last_call_time' and only send request_callback if 'recall_period' has passed.
- If task hasn't finished till next recall, abort it and try again.
- Add pause/resume CallmeEvents to avoid spamming pageserver when connection already established.
This commit is contained in:
anastasia
2021-11-16 15:32:44 +03:00
committed by lubennikovaav
parent 7dece8e4a0
commit 41dce68bdd
9 changed files with 439 additions and 107 deletions

View File

@@ -8,15 +8,18 @@ use daemonize::Daemonize;
use std::path::{Path, PathBuf};
use std::thread;
use tracing::*;
use zenith_utils::http::endpoint;
use zenith_utils::{logging, tcp_listener, GIT_VERSION};
use tokio::sync::mpsc;
use walkeeper::callmemaybe;
use walkeeper::defaults::{DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_PG_LISTEN_ADDR};
use walkeeper::http;
use walkeeper::s3_offload;
use walkeeper::wal_service;
use walkeeper::SafeKeeperConf;
use zenith_utils::http::endpoint;
use zenith_utils::shutdown::exit_now;
use zenith_utils::signals;
use zenith_utils::{logging, tcp_listener, GIT_VERSION};
fn main() -> Result<()> {
zenith_metrics::set_common_metrics_prefix("safekeeper");
@@ -181,16 +184,35 @@ fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
);
}
threads.push(
thread::Builder::new()
.name("WAL acceptor thread".into())
.spawn(|| {
let thread_result = wal_service::thread_main(conf, pg_listener);
if let Err(e) = thread_result {
info!("wal_service thread terminated: {}", e);
}
})?,
);
let (tx, rx) = mpsc::channel(100);
let conf_cloned = conf.clone();
let wal_acceptor_thread = thread::Builder::new()
.name("WAL acceptor thread".into())
.spawn(|| {
// thread code
let thread_result = wal_service::thread_main(conf_cloned, pg_listener, tx);
if let Err(e) = thread_result {
info!("wal_service thread terminated: {}", e);
}
})
.unwrap();
threads.push(wal_acceptor_thread);
let callmemaybe_thread = thread::Builder::new()
.name("callmemaybe thread".into())
.spawn(|| {
// thread code
let thread_result = callmemaybe::thread_main(conf, rx);
if let Err(e) = thread_result {
error!("callmemaybe thread terminated: {}", e);
}
})
.unwrap();
threads.push(callmemaybe_thread);
// TODO: put more thoughts into handling of failed threads
// We probably should restart them.
// NOTE: we still have to handle signals like SIGQUIT to prevent coredumps
signals.handle(|signal| {