Normalize last_record LSN in wal receiver (#2529)

* Add test for branching on page boundary * Normalize start recovery point Co-authored-by: Heikki Linnakangas <heikki@neon.tech> Co-authored-by: Thang Pham <thang@neon.tech>
2026-01-08 22:12:56 +00:00 · 2022-10-06 09:01:56 +03:00
parent f25dd75be9
commit ff8c481777
2 changed files with 48 additions and 0 deletions
--- a/pageserver/src/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/walreceiver/walreceiver_connection.rs
@@ -12,6 +12,8 @@ use chrono::{NaiveDateTime, Utc};
 use fail::fail_point;
 use futures::StreamExt;
 use postgres::{SimpleQueryMessage, SimpleQueryRow};
+use postgres_ffi::v14::xlog_utils::normalize_lsn;
+use postgres_ffi::WAL_SEGMENT_SIZE;
 use postgres_protocol::message::backend::ReplicationMessage;
 use postgres_types::PgLsn;
 use tokio::{pin, select, sync::watch, time};
@@ -156,6 +158,14 @@ pub async fn handle_walreceiver_connection(
    // There might be some padding after the last full record, skip it.
    startpoint += startpoint.calc_padding(8u32);

+    // If the starting point is at a WAL page boundary, skip past the page header. We don't need the page headers
+    // for anything, and in some corner cases, the compute node might have never generated the WAL for page headers
+    //. That happens if you create a branch at page boundary: the start point of the branch is at the page boundary,
+    // but when the compute node first starts on the branch, we normalize the first REDO position to just after the page
+    // header (see generate_pg_control()), so the WAL for the page header is never streamed from the compute node
+    //  to the safekeepers.
+    startpoint = normalize_lsn(startpoint, WAL_SEGMENT_SIZE);
+
    info!("last_record_lsn {last_rec_lsn} starting replication from {startpoint}, safekeeper is at {end_of_wal}...");

    let query = format!("START_REPLICATION PHYSICAL {startpoint}");
--- a/test_runner/regress/test_branching.py
+++ b/test_runner/regress/test_branching.py
@@ -6,6 +6,8 @@ from typing import List
 import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres
+from fixtures.types import Lsn
+from fixtures.utils import query_scalar
 from performance.test_perf_pgbench import get_scales_matrix


@@ -88,3 +90,39 @@ def test_branching_with_pgbench(
    for pg in pgs:
        res = pg.safe_psql("SELECT count(*) from pgbench_accounts")
        assert res[0] == (100000 * scale,)
+
+
+# Test branching from an "unnormalized" LSN.
+#
+# Context:
+# When doing basebackup for a newly created branch, pageserver generates
+# 'pg_control' file to bootstrap WAL segment by specifying the redo position
+# a "normalized" LSN based on the timeline's starting LSN:
+#
+# checkpoint.redo = normalize_lsn(self.lsn, pg_constants::WAL_SEGMENT_SIZE).0;
+#
+# This test checks if the pageserver is able to handle a "unnormalized" starting LSN.
+#
+# Related: see discussion in https://github.com/neondatabase/neon/pull/2143#issuecomment-1209092186
+def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBin):
+    XLOG_BLCKSZ = 8192
+
+    env = neon_simple_env
+
+    env.neon_cli.create_branch("b0")
+    pg0 = env.postgres.create_start("b0")
+
+    pg_bin.run_capture(["pgbench", "-i", pg0.connstr()])
+
+    with pg0.cursor() as cur:
+        curr_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
+
+    # Specify the `start_lsn` as a number that is divided by `XLOG_BLCKSZ`
+    # and is smaller than `curr_lsn`.
+    start_lsn = Lsn((int(curr_lsn) - XLOG_BLCKSZ) // XLOG_BLCKSZ * XLOG_BLCKSZ)
+
+    log.info(f"Branching b1 from b0 starting at lsn {start_lsn}...")
+    env.neon_cli.create_branch("b1", "b0", ancestor_start_lsn=start_lsn)
+    pg1 = env.postgres.create_start("b1")
+
+    pg_bin.run_capture(["pgbench", "-i", pg1.connstr()])