Compare commits

...

1 Commits

Author SHA1 Message Date
Dmitry Rodionov
66ad58e3c8 prove timeline resurrection during attach 2023-02-20 18:08:12 +02:00

View File

@@ -636,4 +636,95 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue(
time.sleep(10)
@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
def test_timeline_resurrection_on_attach(
neon_env_builder: NeonEnvBuilder,
remote_storage_kind: RemoteStorageKind,
):
# Use this test to check more realistic SK ids: some etcd key parsing bugs were related,
# and this test needs SK to write data to pageserver, so it will be visible
neon_env_builder.safekeepers_id_start = 12
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storage_kind,
test_name="test_timeline_resurrection",
)
##### First start, insert data and upload it to the remote storage
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
client = env.pageserver.http_client()
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
with pg.cursor() as cur:
cur.execute("CREATE TABLE f (i integer);")
cur.execute("INSERT INTO f VALUES (generate_series(1,1000));")
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
# wait until pageserver receives that data
wait_for_last_record_lsn(client, tenant_id, timeline_id, current_lsn)
# run checkpoint manually to be sure that data landed in remote storage
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
# wait until pageserver successfully uploaded a checkpoint to remote storage
log.info("waiting for checkpoint upload")
wait_for_upload(client, tenant_id, timeline_id, current_lsn)
log.info("upload of checkpoint is done")
new_timeline_id = env.neon_cli.create_branch("new", "main")
new_pg = env.postgres.create_start("new")
with new_pg.cursor() as cur:
cur.execute("INSERT INTO f VALUES (generate_series(1,1000));")
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
# wait until pageserver receives that data
wait_for_last_record_lsn(client, tenant_id, new_timeline_id, current_lsn)
# run checkpoint manually to be sure that data landed in remote storage
pageserver_http.timeline_checkpoint(tenant_id, new_timeline_id)
# wait until pageserver successfully uploaded a checkpoint to remote storage
log.info("waiting for checkpoint upload")
wait_for_upload(client, tenant_id, new_timeline_id, current_lsn)
log.info("upload of checkpoint is done")
# delete new timeline
client.timeline_delete(tenant_id=tenant_id, timeline_id=new_timeline_id)
##### Stop the pageserver instance, erase all its data
env.postgres.stop_all()
env.pageserver.stop()
dir_to_clear = Path(env.repo_dir) / "tenants"
shutil.rmtree(dir_to_clear)
os.mkdir(dir_to_clear)
##### Second start, restore the data and ensure it's the same
env.pageserver.start()
client.tenant_attach(tenant_id=tenant_id)
def tenant_active():
all_states = client.tenant_list()
[tenant] = [t for t in all_states if TenantId(t["id"]) == tenant_id]
print(tenant)
assert tenant["state"] == "Active"
wait_until(
number_of_iterations=5,
interval=1,
func=tenant_active,
)
timelines = client.timeline_list(tenant_id=tenant_id)
assert len(timelines) == 1
# TODO Test that we correctly handle GC of files that are stuck in upload queue.