Files
neon/test_runner/regress/test_ondemand_slru_download.py
Alex Chi Z. 9e6ca2932f fix(test): convert bool to lowercase when invoking neon-cli (#12688)
## Problem

There has been some inconsistencies of providing tenant config via
`tenant_create` and via other tenant config APIs due to how the
properties are processed: in `tenant_create`, the test framework calls
neon-cli and therefore puts those properties in the cmdline. In other
cases, it's done via the HTTP API by directly serializing to a JSON.
When using the cmdline, the program only accepts serde bool that is
true/false.

## Summary of changes

Convert Python bool into `true`/`false` when using neon-cli.

Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-07-23 18:56:37 +00:00

162 lines
5.4 KiB
Python

from __future__ import annotations
import pytest
from fixtures.common_types import Lsn
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder, tenant_get_shards
from fixtures.utils import query_scalar
#
# Test on-demand download of the pg_xact SLRUs
#
@pytest.mark.parametrize("shard_count", [None, 4])
def test_ondemand_download_pg_xact(neon_env_builder: NeonEnvBuilder, shard_count: int | None):
if shard_count is not None:
neon_env_builder.num_pageservers = shard_count
tenant_conf = {
"lazy_slru_download": True,
# set PITR interval to be small, so we can do GC
"pitr_interval": "0 s",
}
env = neon_env_builder.init_start(
initial_tenant_conf=tenant_conf, initial_tenant_shard_count=shard_count
)
timeline_id = env.initial_timeline
tenant_id = env.initial_tenant
endpoint = env.endpoints.create_start("main")
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
cur.execute("CREATE EXTENSION neon_test_utils")
# Create a test table
cur.execute("CREATE TABLE clogtest (id integer)")
cur.execute("INSERT INTO clogtest VALUES (1)")
# Consume a lot of XIDs, to create more pg_xact segments
for _ in range(1000):
cur.execute("select test_consume_xids(10000);")
cur.execute("INSERT INTO clogtest VALUES (2)")
for _ in range(1000):
cur.execute("select test_consume_xids(10000);")
cur.execute("INSERT INTO clogtest VALUES (2)")
for _ in range(1000):
cur.execute("select test_consume_xids(10000);")
cur.execute("INSERT INTO clogtest VALUES (3)")
# Restart postgres. After restart, the new instance will download the
# pg_xact segments lazily.
endpoint.stop()
endpoint.start()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
# Consume more WAL, so that the pageserver can compact and GC older data,
# including the LSN that we started the new endpoint at,
cur.execute("CREATE TABLE anothertable (i int, t text)")
cur.execute(
"INSERT INTO anothertable SELECT g, 'long string to consume some space' || g FROM generate_series(1, 10000) g"
)
# Run GC
shards = tenant_get_shards(env, tenant_id, None)
for tenant_shard_id, pageserver in shards:
client = pageserver.http_client()
client.timeline_checkpoint(tenant_shard_id, timeline_id)
client.timeline_compact(tenant_shard_id, timeline_id)
client.timeline_gc(tenant_shard_id, timeline_id, 0)
# Test that this can still on-demand download the old pg_xact segments
cur.execute("select xmin, xmax, * from clogtest")
tup = cur.fetchall()
log.info(f"tuples = {tup}")
@pytest.mark.parametrize("shard_count", [None, 4])
def test_ondemand_download_replica(neon_env_builder: NeonEnvBuilder, shard_count: int | None):
if shard_count is not None:
neon_env_builder.num_pageservers = shard_count
tenant_conf = {
"lazy_slru_download": True,
}
env = neon_env_builder.init_start(
initial_tenant_conf=tenant_conf, initial_tenant_shard_count=shard_count
)
endpoint = env.endpoints.create_start("main")
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
cur.execute("CREATE EXTENSION neon_test_utils")
# Create a test table
cur.execute("CREATE TABLE clogtest (id integer)")
cur.execute("INSERT INTO clogtest VALUES (1)")
# Consume a lot of XIDs, to create more pg_xact segments
for _ in range(1000):
cur.execute("select test_consume_xids(10000);")
# Open a new connection and insert another row, but leave
# the transaction open
pg_conn2 = endpoint.connect()
cur2 = pg_conn2.cursor()
cur2.execute("BEGIN")
cur2.execute("INSERT INTO clogtest VALUES (2)")
# Another insert on the first connection, which is committed.
for _ in range(1000):
cur.execute("select test_consume_xids(10000);")
cur.execute("INSERT INTO clogtest VALUES (3)")
# Start standby at this point in time
lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_insert_lsn()"))
endpoint_at_lsn = env.endpoints.create_start(
branch_name="main", endpoint_id="ep-at-lsn", lsn=lsn
)
# Commit transaction 2, after the standby was launched.
cur2.execute("COMMIT")
# The replica should not see transaction 2 as committed.
conn_replica = endpoint_at_lsn.connect()
cur_replica = conn_replica.cursor()
cur_replica.execute("SELECT * FROM clogtest")
assert cur_replica.fetchall() == [(1,), (3,)]
def test_ondemand_download_after_wal_switch(neon_env_builder: NeonEnvBuilder):
"""
Test on-demand SLRU download on standby, when starting right after
WAL segment switch.
This is a repro for a bug in how the LSN at WAL page/segment
boundary was handled (https://github.com/neondatabase/neon/issues/8030)
"""
tenant_conf = {
"lazy_slru_download": True,
}
env = neon_env_builder.init_start(initial_tenant_conf=tenant_conf)
endpoint = env.endpoints.create_start("main")
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
# Create a test table
cur.execute("CREATE TABLE clogtest (id integer)")
cur.execute("INSERT INTO clogtest VALUES (1)")
# Start standby at WAL segment boundary
cur.execute("SELECT pg_switch_wal()")
lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_insert_lsn()"))
_endpoint_at_lsn = env.endpoints.create_start(
branch_name="main", endpoint_id="ep-at-lsn", lsn=lsn
)