Files
neon/test_runner/regress/test_ancestor_branch.py
Alex Chi Z 3fc3666df7 make flush frozen layer an atomic operation (#4720)
## Problem

close https://github.com/neondatabase/neon/issues/4712

## Summary of changes

Previously, when flushing frozen layers, it was split into two
operations: add delta layer to disk + remove frozen layer from memory.
This would cause a short period of time where we will have the same data
both in frozen and delta layer. In this PR, we merge them into one
atomic operation in layer map manager, therefore simplifying the code.
Note that if we decide to create image layers for L0 flush, it will
still be split into two operations on layer map.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
Co-authored-by: Joonas Koivunen <joonas@neon.tech>
2023-07-20 13:39:19 -04:00

101 lines
3.7 KiB
Python

from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.types import TimelineId
from fixtures.utils import query_scalar
#
# Create ancestor branches off the main branch.
#
def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
# Override defaults: 4M checkpoint_distance, disable background compaction and gc.
tenant, _ = env.neon_cli.create_tenant(
conf={
"checkpoint_distance": "4194304",
"gc_period": "0s",
"compaction_period": "0s",
}
)
pageserver_http.configure_failpoints(("flush-frozen-pausable", "sleep(10000)"))
endpoint_branch0 = env.endpoints.create_start("main", tenant_id=tenant)
branch0_cur = endpoint_branch0.connect().cursor()
branch0_timeline = TimelineId(query_scalar(branch0_cur, "SHOW neon.timeline_id"))
log.info(f"b0 timeline {branch0_timeline}")
# Create table, and insert 100k rows.
branch0_lsn = query_scalar(branch0_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"b0 at lsn {branch0_lsn}")
branch0_cur.execute("CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)")
branch0_cur.execute(
"""
INSERT INTO foo
SELECT '00112233445566778899AABBCCDDEEFF' || ':branch0:' || g
FROM generate_series(1, 100000) g
"""
)
lsn_100 = query_scalar(branch0_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"LSN after 100k rows: {lsn_100}")
# Create branch1.
env.neon_cli.create_branch("branch1", "main", tenant_id=tenant, ancestor_start_lsn=lsn_100)
endpoint_branch1 = env.endpoints.create_start("branch1", tenant_id=tenant)
log.info("postgres is running on 'branch1' branch")
branch1_cur = endpoint_branch1.connect().cursor()
branch1_timeline = TimelineId(query_scalar(branch1_cur, "SHOW neon.timeline_id"))
log.info(f"b1 timeline {branch1_timeline}")
branch1_lsn = query_scalar(branch1_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"b1 at lsn {branch1_lsn}")
# Insert 100k rows.
branch1_cur.execute(
"""
INSERT INTO foo
SELECT '00112233445566778899AABBCCDDEEFF' || ':branch1:' || g
FROM generate_series(1, 100000) g
"""
)
lsn_200 = query_scalar(branch1_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"LSN after 200k rows: {lsn_200}")
# Create branch2.
env.neon_cli.create_branch("branch2", "branch1", tenant_id=tenant, ancestor_start_lsn=lsn_200)
endpoint_branch2 = env.endpoints.create_start("branch2", tenant_id=tenant)
log.info("postgres is running on 'branch2' branch")
branch2_cur = endpoint_branch2.connect().cursor()
branch2_timeline = TimelineId(query_scalar(branch2_cur, "SHOW neon.timeline_id"))
log.info(f"b2 timeline {branch2_timeline}")
branch2_lsn = query_scalar(branch2_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"b2 at lsn {branch2_lsn}")
# Insert 100k rows.
branch2_cur.execute(
"""
INSERT INTO foo
SELECT '00112233445566778899AABBCCDDEEFF' || ':branch2:' || g
FROM generate_series(1, 100000) g
"""
)
lsn_300 = query_scalar(branch2_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"LSN after 300k rows: {lsn_300}")
# Run compaction on branch1.
compact = f"compact {tenant} {branch1_timeline}"
log.info(compact)
pageserver_http.timeline_compact(tenant, branch1_timeline)
assert query_scalar(branch0_cur, "SELECT count(*) FROM foo") == 100000
assert query_scalar(branch1_cur, "SELECT count(*) FROM foo") == 200000
assert query_scalar(branch2_cur, "SELECT count(*) FROM foo") == 300000