From 198fc9ea53952fa67d1798a1790396462d88b0bd Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 7 Apr 2021 18:51:34 +0300 Subject: [PATCH 1/5] Capture initdb's stdout/stderr, to avoid messing with log formatting. Especially with --interactive. --- pageserver/src/walredo.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index a06c87d584..2eb1ea3a57 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -155,12 +155,14 @@ impl WalRedoProcess { Command::new("initdb") .args(&["-D", datadir.to_str().unwrap()]) .arg("-N") - .status(), + .output(), ) .expect("failed to execute initdb"); - if !initdb.success() { - panic!("initdb failed"); + if !initdb.status.success() { + panic!("initdb failed: {}\nstderr:\n{}", + std::str::from_utf8(&initdb.stdout).unwrap(), + std::str::from_utf8(&initdb.stderr).unwrap()); } // Start postgres itself From 0c6471ca0db9ccbf832fbc66c4a9a2cc1f2af3d1 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 7 Apr 2021 19:05:28 +0300 Subject: [PATCH 2/5] Print a few more LSNs in the standard format. --- pageserver/src/page_cache.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pageserver/src/page_cache.rs b/pageserver/src/page_cache.rs index 7c77ca5926..8df35ea534 100644 --- a/pageserver/src/page_cache.rs +++ b/pageserver/src/page_cache.rs @@ -276,8 +276,8 @@ impl PageCache { shared = wait_result.0; if wait_result.1.timed_out() { return Err(format!( - "Timed out while waiting for WAL record at LSN {} to arrive", - lsn + "Timed out while waiting for WAL record at LSN {:X}/{:X} to arrive", + lsn >> 32, lsn & 0xffff_ffff ))?; } } @@ -286,7 +286,8 @@ impl PageCache { } if lsn < shared.first_valid_lsn { - return Err(format!("LSN {} has already been removed", lsn))?; + return Err(format!("LSN {:X}/{:X} has already been removed", + lsn >> 32, lsn & 0xffff_ffff))?; } let pagecache = &shared.pagecache; From 3e09cb5718779073bd39911097ed62cda188a26b Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 8 Apr 2021 13:30:10 +0300 Subject: [PATCH 3/5] Add README to give an overview of the WAL safekeeper. --- walkeeper/README | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 walkeeper/README diff --git a/walkeeper/README b/walkeeper/README new file mode 100644 index 0000000000..9672cc3b76 --- /dev/null +++ b/walkeeper/README @@ -0,0 +1,38 @@ +# WAL safekeeper + +Also know as the WAL service, WAL keeper or WAL acceptor. + +The WAL safekeeper acts as a holding area and redistribution center +for recently generated WAL. The primary Postgres server streams the +WAL to the WAL safekeeper, and treats it like a (synchronous) +replica. A replication slot is used in the primary to prevent the +primary from discarding WAL that hasn't been streamed to the +safekeeper yet. + +The primary connects to the WAL safekeeper, so it works in a "push" +fashion. That's different from how streaming replication usually +works, where the replica initiates the connection. To do that, there +is a component called "safekeeper_proxy". The safekeeper_proxy runs on +the same host as the primary Postgres server and connects to it to do +streaming replication. It also connects to the WAL safekeeper, and +forwards all the WAL. (PostgreSQL's archive_commands works in the +"push" style, but it operates on a WAL segment granularity. If +PostgreSQL had a push style API for streaming, we wouldn't need the +proxy). + +The Page Server connects to the WAL safekeeper, using the same +streaming replication protocol that's used between Postgres primary +and standby. You can also connect the Page Server directly to a +primary PostgreSQL node for testing. + +In a production installation, there are multiple WAL safekeepers +running on different nodes, and there is a quorum mechanism using the +Paxos algorithm to ensure that a piece of WAL is considered as durable +only after it has been flushed to disk on more than half of the WAL +safekeepers. The Paxos and crash recovery algorithm ensures that only +one primary node can be actively streaming WAL to the quorum of +safekeepers. + + +See vendor/postgres/src/bin/safekeeper/README.md for a more detailed +desription of the consensus protocol. (TODO: move the text here?) From ba4f8e94aa2c6703f5b0602babfc06166bd25b42 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 8 Apr 2021 13:30:42 +0300 Subject: [PATCH 4/5] Minor comment fixes. --- walkeeper/src/wal_service.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/walkeeper/src/wal_service.rs b/walkeeper/src/wal_service.rs index b91a966afa..d70c10ce22 100644 --- a/walkeeper/src/wal_service.rs +++ b/walkeeper/src/wal_service.rs @@ -585,7 +585,7 @@ impl Connection { fn set_system(&mut self, id: SystemId) -> Result<()> { let mut systems = SYSTEMS.lock().unwrap(); if id == 0 { - // non-multitenant configuration: just sigle instance + // non-multitenant configuration: just a single instance if let Some(system) = systems.values().next() { self.system = Some(system.clone()); return Ok(()); @@ -937,6 +937,10 @@ impl Connection { /* * Always start streaming at the beginning of a segment + * + * FIXME: It is common practice to start streaming at the beginning of + * the segment, but it should be up to the client to decide that. We + * shouldn't enforce that here. */ start_pos -= XLogSegmentOffset(start_pos, wal_seg_size) as u64; From 542dffa4a68aec026ae47b462683327300c5c355 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 8 Apr 2021 20:33:51 +0300 Subject: [PATCH 5/5] Set LD_LIBRARY_PATH for tests --- integration_tests/tests/control_plane/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/integration_tests/tests/control_plane/mod.rs b/integration_tests/tests/control_plane/mod.rs index eab3f345af..5ace192ac0 100644 --- a/integration_tests/tests/control_plane/mod.rs +++ b/integration_tests/tests/control_plane/mod.rs @@ -178,6 +178,7 @@ impl PageServerNode { .arg("--skip-recovery") .env_clear() .env("PATH", PG_BIN_DIR.to_str().unwrap()) // path to postres-wal-redo binary + .env("LD_LIBRARY_PATH", PG_LIB_DIR.to_str().unwrap()) .status() .expect("failed to start pageserver");