mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 05:22:56 +00:00
ondemand_download_large_rel: solve flakyness (#3697)
Disable background tasks to not get compaction downloading all layers but also stop safekeepers before checkpointing, use a readonly endpoint. Fixes: #3666 Co-authored-by: Christian Schwarz <christian@neon.tech>
This commit is contained in:
@@ -64,12 +64,15 @@ def test_ondemand_download_large_rel(
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
# disable background GC
|
||||
"gc_period": "10 m",
|
||||
"gc_period": "0s",
|
||||
"gc_horizon": f"{10 * 1024 ** 3}", # 10 GB
|
||||
# small checkpoint distance to create more delta layer files
|
||||
"checkpoint_distance": f"{10 * 1024 ** 2}", # 10 MB
|
||||
# allow compaction with the checkpoint
|
||||
"compaction_threshold": "3",
|
||||
"compaction_target_size": f"{10 * 1024 ** 2}", # 10 MB
|
||||
# but don't run compaction in background or on restart
|
||||
"compaction_period": "0s",
|
||||
}
|
||||
)
|
||||
env.initial_tenant = tenant
|
||||
@@ -96,9 +99,17 @@ def test_ondemand_download_large_rel(
|
||||
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
|
||||
# wait until pageserver receives that data
|
||||
wait_for_last_record_lsn(client, tenant_id, timeline_id, current_lsn)
|
||||
|
||||
# stop endpoint before checkpoint to stop wal generation
|
||||
endpoint.stop()
|
||||
|
||||
# stopping of safekeepers now will help us not to calculate logical size
|
||||
# after startup, so page requests should be the only one on-demand
|
||||
# downloading the layers
|
||||
for sk in env.safekeepers:
|
||||
sk.stop()
|
||||
|
||||
# run checkpoint manually to be sure that data landed in remote storage
|
||||
client.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
@@ -107,7 +118,6 @@ def test_ondemand_download_large_rel(
|
||||
log.info("uploads have finished")
|
||||
|
||||
##### Stop the first pageserver instance, erase all its data
|
||||
endpoint.stop()
|
||||
env.pageserver.stop()
|
||||
|
||||
# remove all the layer files
|
||||
@@ -118,8 +128,13 @@ def test_ondemand_download_large_rel(
|
||||
##### Second start, restore the data and ensure it's the same
|
||||
env.pageserver.start()
|
||||
|
||||
endpoint.start()
|
||||
# start a readonly endpoint which we'll use to check the database.
|
||||
# readonly (with lsn=) is required so that we don't try to connect to
|
||||
# safekeepers, that have now been shut down.
|
||||
endpoint = env.endpoints.create_start("main", lsn=current_lsn)
|
||||
|
||||
before_downloads = get_num_downloaded_layers(client, tenant_id, timeline_id)
|
||||
assert before_downloads != 0, "basebackup should on-demand non-zero layers"
|
||||
|
||||
# Probe in the middle of the table. There's a high chance that the beginning
|
||||
# and end of the table was stored together in the same layer files with data
|
||||
|
||||
Reference in New Issue
Block a user