mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-03 19:42:55 +00:00
On-demand WAL download for walsender (#6872)
## Problem There's allegedly a bug where if we connect a subscriber before WAL is downloaded from the safekeeper, it creates an error. ## Summary of changes Adds support for pausing safekeepers from sending WAL to computes, and then creates a compute and attaches a subscriber while it's in this paused state. Fails to reproduce the issue, but probably a good test to have --------- Co-authored-by: Arseny Sher <sher-ars@yandex.ru>
This commit is contained in:
@@ -7,6 +7,7 @@ import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
logical_replication_sync,
|
||||
wait_for_last_flush_lsn,
|
||||
)
|
||||
@@ -203,6 +204,81 @@ def test_obsolete_slot_drop(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
wait_until(number_of_iterations=10, interval=2, func=partial(slot_removed, endpoint))
|
||||
|
||||
|
||||
# Tests that walsender correctly blocks until WAL is downloaded from safekeepers
|
||||
def test_lr_with_slow_safekeeper(neon_env_builder: NeonEnvBuilder, vanilla_pg):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch("init")
|
||||
endpoint = env.endpoints.create_start("init")
|
||||
|
||||
with endpoint.connect().cursor() as cur:
|
||||
cur.execute("create table wal_generator (id serial primary key, data text)")
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO wal_generator (data)
|
||||
SELECT repeat('A', 1024) -- Generates a kilobyte of data per row
|
||||
FROM generate_series(1, 16384) AS seq; -- Inserts enough rows to exceed 16MB of data
|
||||
"""
|
||||
)
|
||||
cur.execute("create table t(a int)")
|
||||
cur.execute("create publication pub for table t")
|
||||
cur.execute("insert into t values (1)")
|
||||
|
||||
vanilla_pg.start()
|
||||
vanilla_pg.safe_psql("create table t(a int)")
|
||||
connstr = endpoint.connstr().replace("'", "''")
|
||||
vanilla_pg.safe_psql(f"create subscription sub1 connection '{connstr}' publication pub")
|
||||
logical_replication_sync(vanilla_pg, endpoint)
|
||||
vanilla_pg.stop()
|
||||
|
||||
# Pause the safekeepers so that they can't send WAL (except to pageserver)
|
||||
for sk in env.safekeepers:
|
||||
sk_http = sk.http_client()
|
||||
sk_http.configure_failpoints([("sk-pause-send", "return")])
|
||||
|
||||
# Insert a 2
|
||||
with endpoint.connect().cursor() as cur:
|
||||
cur.execute("insert into t values (2)")
|
||||
|
||||
endpoint.stop_and_destroy()
|
||||
|
||||
# This new endpoint should contain [1, 2], but it can't access WAL from safekeeper
|
||||
endpoint = env.endpoints.create_start("init")
|
||||
with endpoint.connect().cursor() as cur:
|
||||
cur.execute("select * from t")
|
||||
res = [r[0] for r in cur.fetchall()]
|
||||
assert res == [1, 2]
|
||||
|
||||
# Reconnect subscriber
|
||||
vanilla_pg.start()
|
||||
connstr = endpoint.connstr().replace("'", "''")
|
||||
vanilla_pg.safe_psql(f"alter subscription sub1 connection '{connstr}'")
|
||||
|
||||
time.sleep(5)
|
||||
# Make sure the 2 isn't replicated
|
||||
assert [r[0] for r in vanilla_pg.safe_psql("select * from t")] == [1]
|
||||
|
||||
# Re-enable WAL download
|
||||
for sk in env.safekeepers:
|
||||
sk_http = sk.http_client()
|
||||
sk_http.configure_failpoints([("sk-pause-send", "off")])
|
||||
|
||||
logical_replication_sync(vanilla_pg, endpoint)
|
||||
assert [r[0] for r in vanilla_pg.safe_psql("select * from t")] == [1, 2]
|
||||
|
||||
# Check that local reads also work
|
||||
with endpoint.connect().cursor() as cur:
|
||||
cur.execute("insert into t values (3)")
|
||||
logical_replication_sync(vanilla_pg, endpoint)
|
||||
assert [r[0] for r in vanilla_pg.safe_psql("select * from t")] == [1, 2, 3]
|
||||
|
||||
log_path = vanilla_pg.pgdatadir / "pg.log"
|
||||
with open(log_path, "r") as log_file:
|
||||
logs = log_file.read()
|
||||
assert "could not receive data from WAL stream" not in logs
|
||||
|
||||
|
||||
# Test compute start at LSN page of which starts with contrecord
|
||||
# https://github.com/neondatabase/neon/issues/5749
|
||||
def test_wal_page_boundary_start(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
|
||||
Reference in New Issue
Block a user