mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 13:32:57 +00:00
Ensure branches with no layers have their remote storage counterpart created eventually (#3857)
Discovered during writing a test for https://github.com/neondatabase/neon/pull/3843
This commit is contained in:
@@ -11,8 +11,10 @@ from typing import Dict, List, Tuple
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
LocalFsStorage,
|
||||
NeonEnvBuilder,
|
||||
PageserverApiException,
|
||||
PageserverHttpClient,
|
||||
RemoteStorageKind,
|
||||
available_remote_storages,
|
||||
wait_for_last_flush_lsn,
|
||||
@@ -421,23 +423,6 @@ def test_remote_timeline_client_calls_started_metric(
|
||||
)
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
|
||||
def get_queued_count(file_kind, op_kind):
|
||||
val = client.get_remote_timeline_client_metric(
|
||||
"pageserver_remote_timeline_client_calls_unfinished",
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
file_kind,
|
||||
op_kind,
|
||||
)
|
||||
if val is None:
|
||||
return val
|
||||
return int(val)
|
||||
|
||||
def wait_upload_queue_empty():
|
||||
wait_until(2, 1, lambda: get_queued_count(file_kind="layer", op_kind="upload") == 0)
|
||||
wait_until(2, 1, lambda: get_queued_count(file_kind="index", op_kind="upload") == 0)
|
||||
wait_until(2, 1, lambda: get_queued_count(file_kind="layer", op_kind="delete") == 0)
|
||||
|
||||
calls_started: Dict[Tuple[str, str], List[int]] = {
|
||||
("layer", "upload"): [0],
|
||||
("index", "upload"): [0],
|
||||
@@ -478,7 +463,7 @@ def test_remote_timeline_client_calls_started_metric(
|
||||
# create some layers & wait for uploads to finish
|
||||
churn("a", "b")
|
||||
|
||||
wait_upload_queue_empty()
|
||||
wait_upload_queue_empty(client, tenant_id, timeline_id)
|
||||
|
||||
# ensure that we updated the calls_started metric
|
||||
fetch_calls_started()
|
||||
@@ -637,4 +622,147 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue(
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
# Branches off a root branch, but does not write anything to the new branch, so it has a metadata file only.
|
||||
# Ensures that such branch is still persisted on the remote storage, and can be restored during tenant (re)attach.
|
||||
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
|
||||
def test_empty_branch_remote_storage_upload(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
remote_storage_kind: RemoteStorageKind,
|
||||
):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_empty_branch_remote_storage_upload",
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
new_branch_name = "new_branch"
|
||||
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
|
||||
|
||||
with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg:
|
||||
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id)
|
||||
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
|
||||
|
||||
timelines_before_detach = set(
|
||||
map(
|
||||
lambda t: TimelineId(t["timeline_id"]),
|
||||
client.timeline_list(env.initial_tenant),
|
||||
)
|
||||
)
|
||||
expected_timelines = set([env.initial_timeline, new_branch_timeline_id])
|
||||
assert (
|
||||
timelines_before_detach == expected_timelines
|
||||
), f"Expected to have an initial timeline and the branch timeline only, but got {timelines_before_detach}"
|
||||
|
||||
client.tenant_detach(env.initial_tenant)
|
||||
client.tenant_attach(env.initial_tenant)
|
||||
wait_until_tenant_state(client, env.initial_tenant, "Active", 5)
|
||||
|
||||
timelines_after_detach = set(
|
||||
map(
|
||||
lambda t: TimelineId(t["timeline_id"]),
|
||||
client.timeline_list(env.initial_tenant),
|
||||
)
|
||||
)
|
||||
|
||||
assert (
|
||||
timelines_before_detach == timelines_after_detach
|
||||
), f"Expected to have same timelines after reattach, but got {timelines_after_detach}"
|
||||
|
||||
|
||||
# Branches off a root branch, but does not write anything to the new branch, so it has a metadata file only.
|
||||
# Ensures the branch is not on the remote storage and restarts the pageserver — the branch should be uploaded after the restart.
|
||||
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
|
||||
def test_empty_branch_remote_storage_upload_on_restart(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
remote_storage_kind: RemoteStorageKind,
|
||||
):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_empty_branch_remote_storage_upload_on_restart",
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
new_branch_name = "new_branch"
|
||||
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
|
||||
|
||||
with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg:
|
||||
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id)
|
||||
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
|
||||
|
||||
env.pageserver.stop()
|
||||
|
||||
# Remove new branch from the remote storage
|
||||
assert isinstance(env.remote_storage, LocalFsStorage)
|
||||
new_branch_on_remote_storage = (
|
||||
env.remote_storage.root
|
||||
/ "tenants"
|
||||
/ str(env.initial_tenant)
|
||||
/ "timelines"
|
||||
/ str(new_branch_timeline_id)
|
||||
)
|
||||
assert (
|
||||
new_branch_on_remote_storage.is_dir()
|
||||
), f"'{new_branch_on_remote_storage}' path does not exist on the remote storage"
|
||||
shutil.rmtree(new_branch_on_remote_storage)
|
||||
|
||||
env.pageserver.start()
|
||||
|
||||
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
|
||||
assert (
|
||||
new_branch_on_remote_storage.is_dir()
|
||||
), f"New branch should have been reuploaded on pageserver restart to the remote storage path '{new_branch_on_remote_storage}'"
|
||||
|
||||
|
||||
def wait_upload_queue_empty(
|
||||
client: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId
|
||||
):
|
||||
wait_until(
|
||||
2,
|
||||
1,
|
||||
lambda: get_queued_count(
|
||||
client, tenant_id, timeline_id, file_kind="layer", op_kind="upload"
|
||||
)
|
||||
== 0,
|
||||
)
|
||||
wait_until(
|
||||
2,
|
||||
1,
|
||||
lambda: get_queued_count(
|
||||
client, tenant_id, timeline_id, file_kind="index", op_kind="upload"
|
||||
)
|
||||
== 0,
|
||||
)
|
||||
wait_until(
|
||||
2,
|
||||
1,
|
||||
lambda: get_queued_count(
|
||||
client, tenant_id, timeline_id, file_kind="layer", op_kind="delete"
|
||||
)
|
||||
== 0,
|
||||
)
|
||||
|
||||
|
||||
def get_queued_count(
|
||||
client: PageserverHttpClient,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
file_kind: str,
|
||||
op_kind: str,
|
||||
):
|
||||
val = client.get_remote_timeline_client_metric(
|
||||
"pageserver_remote_timeline_client_calls_unfinished",
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
file_kind,
|
||||
op_kind,
|
||||
)
|
||||
if val is None:
|
||||
return val
|
||||
return int(val)
|
||||
|
||||
|
||||
# TODO Test that we correctly handle GC of files that are stuck in upload queue.
|
||||
|
||||
Reference in New Issue
Block a user