Compare commits

...

4 Commits

Author SHA1 Message Date
Konstantin Knizhnik
24b1b412ee Launch multiple wal-redo postgres instances 2021-12-08 13:02:57 +03:00
Konstantin Knizhnik
0ce4dca05a Diable relish upload and backpressure 2021-12-08 10:30:45 +03:00
Konstantin Knizhnik
1530143e00 Merge branch 'main' into main_local 2021-12-06 17:22:20 +03:00
Konstantin Knizhnik
f77c9c987f Use different default values 2021-12-06 17:21:48 +03:00
5 changed files with 25 additions and 19 deletions

View File

@@ -287,14 +287,15 @@ impl PostgresNode {
conf.append("max_replication_slots", "10");
conf.append("hot_standby", "on");
conf.append("shared_buffers", "1MB");
conf.append("max_wal_size", "100GB");
conf.append("fsync", "off");
conf.append("max_connections", "100");
conf.append("wal_level", "replica");
// wal_sender_timeout is the maximum time to wait for WAL replication.
// It also defines how often the walreciever will send a feedback message to the wal sender.
conf.append("wal_sender_timeout", "5s");
conf.append("max_replication_flush_lag", "160MB");
conf.append("max_replication_apply_lag", "1500MB");
//conf.append("wal_sender_timeout", "5s");
//conf.append("max_replication_flush_lag", "160MB");
//conf.append("max_replication_apply_lag", "1500MB");
conf.append("listen_addresses", &self.address.ip().to_string());
conf.append("port", &self.address.port().to_string());

View File

@@ -39,10 +39,10 @@ pub mod defaults {
// would be more appropriate. But a low value forces the code to be exercised more,
// which is good for now to trigger bugs.
pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
pub const DEFAULT_CHECKPOINT_PERIOD: Duration = Duration::from_secs(1);
pub const DEFAULT_CHECKPOINT_PERIOD: Duration = Duration::from_secs(10);
pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
pub const DEFAULT_GC_PERIOD: Duration = Duration::from_secs(100);
pub const DEFAULT_GC_PERIOD: Duration = Duration::from_secs(10);
pub const DEFAULT_SUPERUSER: &str = "zenith_admin";
pub const DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC: usize = 100;

View File

@@ -106,7 +106,7 @@ fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) {
conf,
Arc::new(walredo_mgr),
tenant_id,
true,
false,
));
let mut m = access_tenants();

View File

@@ -32,6 +32,7 @@ use std::os::unix::io::AsRawFd;
use std::path::PathBuf;
use std::process::Stdio;
use std::process::{Child, ChildStderr, ChildStdin, ChildStdout, Command};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Mutex;
use std::time::Duration;
use std::time::Instant;
@@ -53,6 +54,8 @@ use postgres_ffi::nonrelfile_utils::transaction_id_set_status;
use postgres_ffi::pg_constants;
use postgres_ffi::XLogRecord;
const N_WAL_REDO_PROCS: usize = 1;
///
/// `RelTag` + block number (`blknum`) gives us a unique id of the page in the cluster.
///
@@ -139,7 +142,8 @@ pub struct PostgresRedoManager {
tenantid: ZTenantId,
conf: &'static PageServerConf,
process: Mutex<Option<PostgresRedoProcess>>,
round_robin: AtomicUsize,
processes: [Mutex<Option<PostgresRedoProcess>>; N_WAL_REDO_PROCS],
}
#[derive(Debug)]
@@ -209,12 +213,13 @@ impl WalRedoManager for PostgresRedoManager {
end_time = Instant::now();
WAL_REDO_TIME.observe(end_time.duration_since(start_time).as_secs_f64());
} else {
let mut process_guard = self.process.lock().unwrap();
let rr = self.round_robin.fetch_add(1, Ordering::Relaxed) % N_WAL_REDO_PROCS;
let mut process_guard = self.processes[rr].lock().unwrap();
let lock_time = Instant::now();
// launch the WAL redo process on first use
if process_guard.is_none() {
let p = PostgresRedoProcess::launch(self.conf, &self.tenantid)?;
let p = PostgresRedoProcess::launch(self.conf, &self.tenantid, rr)?;
*process_guard = Some(p);
}
let process = process_guard.as_mut().unwrap();
@@ -246,7 +251,8 @@ impl PostgresRedoManager {
PostgresRedoManager {
tenantid,
conf,
process: Mutex::new(None),
round_robin: AtomicUsize::new(0),
processes: [(); N_WAL_REDO_PROCS].map(|_| Mutex::new(None)),
}
}
@@ -472,11 +478,17 @@ impl PostgresRedoProcess {
//
// Start postgres binary in special WAL redo mode.
//
fn launch(conf: &PageServerConf, tenantid: &ZTenantId) -> Result<PostgresRedoProcess, Error> {
fn launch(
conf: &PageServerConf,
tenantid: &ZTenantId,
id: usize,
) -> Result<PostgresRedoProcess, Error> {
// FIXME: We need a dummy Postgres cluster to run the process in. Currently, we
// just create one with constant name. That fails if you try to launch more than
// one WAL redo manager concurrently.
let datadir = conf.tenant_path(tenantid).join("wal-redo-datadir");
let datadir = conf
.tenant_path(tenantid)
.join(format! {"wal-redo-datadir-{}", id});
// Create empty data directory for wal-redo postgres, deleting old one first.
if datadir.exists() {

View File

@@ -36,17 +36,10 @@ pg_port = {pageserver_pg_port}
http_port = {pageserver_http_port}
auth_type = '{pageserver_auth_type}'
[[safekeepers]]
name = '{safekeeper_name}'
pg_port = {safekeeper_pg_port}
http_port = {safekeeper_http_port}
"#,
pageserver_pg_port = DEFAULT_PAGESERVER_PG_PORT,
pageserver_http_port = DEFAULT_PAGESERVER_HTTP_PORT,
pageserver_auth_type = AuthType::Trust,
safekeeper_name = DEFAULT_SAFEKEEPER_NAME,
safekeeper_pg_port = DEFAULT_SAFEKEEPER_PG_PORT,
safekeeper_http_port = DEFAULT_SAFEKEEPER_HTTP_PORT,
)
}