diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 5f1e23b873..b462c93b2d 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -478,7 +478,7 @@ impl Tenant { let dummy_timeline = self.create_timeline_data( timeline_id, - up_to_date_metadata.clone(), + up_to_date_metadata, ancestor.clone(), remote_client, )?; @@ -503,7 +503,7 @@ impl Tenant { let broken_timeline = self .create_timeline_data( timeline_id, - up_to_date_metadata.clone(), + up_to_date_metadata, ancestor.clone(), None, ) @@ -1142,7 +1142,7 @@ impl Tenant { ); self.prepare_timeline( new_timeline_id, - new_metadata, + &new_metadata, timeline_uninit_mark, true, None, @@ -1700,7 +1700,7 @@ impl Tenant { fn create_timeline_data( &self, new_timeline_id: TimelineId, - new_metadata: TimelineMetadata, + new_metadata: &TimelineMetadata, ancestor: Option>, remote_client: Option, ) -> anyhow::Result> { @@ -2160,13 +2160,25 @@ impl Tenant { let new_timeline = self .prepare_timeline( dst_id, - metadata, + &metadata, timeline_uninit_mark, false, Some(Arc::clone(src_timeline)), )? .initialize_with_lock(&mut timelines, true, true)?; drop(timelines); + + // Root timeline gets its layers during creation and uploads them along with the metadata. + // A branch timeline though, when created, can get no writes for some time, hence won't get any layers created. + // We still need to upload its metadata eagerly: if other nodes `attach` the tenant and miss this timeline, their GC + // could get incorrect information and remove more layers, than needed. + // See also https://github.com/neondatabase/neon/issues/3865 + if let Some(remote_client) = new_timeline.remote_client.as_ref() { + remote_client + .schedule_index_upload_for_metadata_update(&metadata) + .context("branch initial metadata upload")?; + } + info!("branched timeline {dst_id} from {src_id} at {start_lsn}"); Ok(new_timeline) @@ -2229,7 +2241,7 @@ impl Tenant { pg_version, ); let raw_timeline = - self.prepare_timeline(timeline_id, new_metadata, timeline_uninit_mark, true, None)?; + self.prepare_timeline(timeline_id, &new_metadata, timeline_uninit_mark, true, None)?; let tenant_id = raw_timeline.owning_tenant.tenant_id; let unfinished_timeline = raw_timeline.raw_timeline()?; @@ -2283,7 +2295,7 @@ impl Tenant { fn prepare_timeline( &self, new_timeline_id: TimelineId, - new_metadata: TimelineMetadata, + new_metadata: &TimelineMetadata, uninit_mark: TimelineUninitMark, init_layers: bool, ancestor: Option>, @@ -2297,7 +2309,7 @@ impl Tenant { tenant_id, new_timeline_id, ); - remote_client.init_upload_queue_for_empty_remote(&new_metadata)?; + remote_client.init_upload_queue_for_empty_remote(new_metadata)?; Some(remote_client) } else { None @@ -2336,17 +2348,12 @@ impl Tenant { &self, timeline_path: &Path, new_timeline_id: TimelineId, - new_metadata: TimelineMetadata, + new_metadata: &TimelineMetadata, ancestor: Option>, remote_client: Option, ) -> anyhow::Result> { let timeline_data = self - .create_timeline_data( - new_timeline_id, - new_metadata.clone(), - ancestor, - remote_client, - ) + .create_timeline_data(new_timeline_id, new_metadata, ancestor, remote_client) .context("Failed to create timeline data structure")?; crashsafe::create_dir_all(timeline_path).context("Failed to create timeline directory")?; @@ -2358,7 +2365,7 @@ impl Tenant { self.conf, new_timeline_id, self.tenant_id, - &new_metadata, + new_metadata, true, ) .context("Failed to create timeline metadata")?; diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 4d03a78883..33909e749b 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -1163,7 +1163,7 @@ impl Timeline { pub(super) fn new( conf: &'static PageServerConf, tenant_conf: Arc>, - metadata: TimelineMetadata, + metadata: &TimelineMetadata, ancestor: Option>, timeline_id: TimelineId, tenant_id: TenantId, @@ -1629,6 +1629,8 @@ impl Timeline { .map(|l| (l.filename(), l)) .collect::>(); + // If no writes happen, new branches do not have any layers, only the metadata file. + let has_local_layers = !local_layers.is_empty(); let local_only_layers = match index_part { Some(index_part) => { info!( @@ -1646,21 +1648,40 @@ impl Timeline { } }; - // Are there local files that don't exist remotely? Schedule uploads for them - for (layer_name, layer) in &local_only_layers { - // XXX solve this in the type system - let layer_path = layer - .local_path() - .expect("local_only_layers only contains local layers"); - let layer_size = layer_path - .metadata() - .with_context(|| format!("failed to get file {layer_path:?} metadata"))? - .len(); - info!("scheduling {layer_path:?} for upload"); - remote_client - .schedule_layer_file_upload(layer_name, &LayerFileMetadata::new(layer_size))?; + if has_local_layers { + // Are there local files that don't exist remotely? Schedule uploads for them. + // Local timeline metadata will get uploaded to remove along witht he layers. + for (layer_name, layer) in &local_only_layers { + // XXX solve this in the type system + let layer_path = layer + .local_path() + .expect("local_only_layers only contains local layers"); + let layer_size = layer_path + .metadata() + .with_context(|| format!("failed to get file {layer_path:?} metadata"))? + .len(); + info!("scheduling {layer_path:?} for upload"); + remote_client + .schedule_layer_file_upload(layer_name, &LayerFileMetadata::new(layer_size))?; + } + remote_client.schedule_index_upload_for_file_changes()?; + } else if index_part.is_none() { + // No data on the remote storage, no local layers, local metadata file. + // + // TODO https://github.com/neondatabase/neon/issues/3865 + // Currently, console does not wait for the timeline data upload to the remote storage + // and considers the timeline created, expecting other pageserver nodes to work with it. + // Branch metadata upload could get interrupted (e.g pageserver got killed), + // hence any locally existing branch metadata with no remote counterpart should be uploaded, + // otherwise any other pageserver won't see the branch on `attach`. + // + // After the issue gets implemented, pageserver should rather remove the branch, + // since absence on S3 means we did not acknowledge the branch creation and console will have to retry, + // no need to keep the old files. + remote_client.schedule_index_upload_for_metadata_update(up_to_date_metadata)?; + } else { + // Local timeline has a metadata file, remote one too, both have no layers to sync. } - remote_client.schedule_index_upload_for_file_changes()?; info!("Done"); diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 1f6f0c67cc..f6600e8974 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -11,8 +11,10 @@ from typing import Dict, List, Tuple import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + LocalFsStorage, NeonEnvBuilder, PageserverApiException, + PageserverHttpClient, RemoteStorageKind, available_remote_storages, wait_for_last_flush_lsn, @@ -421,23 +423,6 @@ def test_remote_timeline_client_calls_started_metric( ) wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) - def get_queued_count(file_kind, op_kind): - val = client.get_remote_timeline_client_metric( - "pageserver_remote_timeline_client_calls_unfinished", - tenant_id, - timeline_id, - file_kind, - op_kind, - ) - if val is None: - return val - return int(val) - - def wait_upload_queue_empty(): - wait_until(2, 1, lambda: get_queued_count(file_kind="layer", op_kind="upload") == 0) - wait_until(2, 1, lambda: get_queued_count(file_kind="index", op_kind="upload") == 0) - wait_until(2, 1, lambda: get_queued_count(file_kind="layer", op_kind="delete") == 0) - calls_started: Dict[Tuple[str, str], List[int]] = { ("layer", "upload"): [0], ("index", "upload"): [0], @@ -478,7 +463,7 @@ def test_remote_timeline_client_calls_started_metric( # create some layers & wait for uploads to finish churn("a", "b") - wait_upload_queue_empty() + wait_upload_queue_empty(client, tenant_id, timeline_id) # ensure that we updated the calls_started metric fetch_calls_started() @@ -637,4 +622,147 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue( time.sleep(10) +# Branches off a root branch, but does not write anything to the new branch, so it has a metadata file only. +# Ensures that such branch is still persisted on the remote storage, and can be restored during tenant (re)attach. +@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS]) +def test_empty_branch_remote_storage_upload( + neon_env_builder: NeonEnvBuilder, + remote_storage_kind: RemoteStorageKind, +): + neon_env_builder.enable_remote_storage( + remote_storage_kind=remote_storage_kind, + test_name="test_empty_branch_remote_storage_upload", + ) + + env = neon_env_builder.init_start() + client = env.pageserver.http_client() + + new_branch_name = "new_branch" + new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant) + + with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg: + wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id) + wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id) + + timelines_before_detach = set( + map( + lambda t: TimelineId(t["timeline_id"]), + client.timeline_list(env.initial_tenant), + ) + ) + expected_timelines = set([env.initial_timeline, new_branch_timeline_id]) + assert ( + timelines_before_detach == expected_timelines + ), f"Expected to have an initial timeline and the branch timeline only, but got {timelines_before_detach}" + + client.tenant_detach(env.initial_tenant) + client.tenant_attach(env.initial_tenant) + wait_until_tenant_state(client, env.initial_tenant, "Active", 5) + + timelines_after_detach = set( + map( + lambda t: TimelineId(t["timeline_id"]), + client.timeline_list(env.initial_tenant), + ) + ) + + assert ( + timelines_before_detach == timelines_after_detach + ), f"Expected to have same timelines after reattach, but got {timelines_after_detach}" + + +# Branches off a root branch, but does not write anything to the new branch, so it has a metadata file only. +# Ensures the branch is not on the remote storage and restarts the pageserver — the branch should be uploaded after the restart. +@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS]) +def test_empty_branch_remote_storage_upload_on_restart( + neon_env_builder: NeonEnvBuilder, + remote_storage_kind: RemoteStorageKind, +): + neon_env_builder.enable_remote_storage( + remote_storage_kind=remote_storage_kind, + test_name="test_empty_branch_remote_storage_upload_on_restart", + ) + + env = neon_env_builder.init_start() + client = env.pageserver.http_client() + + new_branch_name = "new_branch" + new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant) + + with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg: + wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id) + wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id) + + env.pageserver.stop() + + # Remove new branch from the remote storage + assert isinstance(env.remote_storage, LocalFsStorage) + new_branch_on_remote_storage = ( + env.remote_storage.root + / "tenants" + / str(env.initial_tenant) + / "timelines" + / str(new_branch_timeline_id) + ) + assert ( + new_branch_on_remote_storage.is_dir() + ), f"'{new_branch_on_remote_storage}' path does not exist on the remote storage" + shutil.rmtree(new_branch_on_remote_storage) + + env.pageserver.start() + + wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id) + assert ( + new_branch_on_remote_storage.is_dir() + ), f"New branch should have been reuploaded on pageserver restart to the remote storage path '{new_branch_on_remote_storage}'" + + +def wait_upload_queue_empty( + client: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId +): + wait_until( + 2, + 1, + lambda: get_queued_count( + client, tenant_id, timeline_id, file_kind="layer", op_kind="upload" + ) + == 0, + ) + wait_until( + 2, + 1, + lambda: get_queued_count( + client, tenant_id, timeline_id, file_kind="index", op_kind="upload" + ) + == 0, + ) + wait_until( + 2, + 1, + lambda: get_queued_count( + client, tenant_id, timeline_id, file_kind="layer", op_kind="delete" + ) + == 0, + ) + + +def get_queued_count( + client: PageserverHttpClient, + tenant_id: TenantId, + timeline_id: TimelineId, + file_kind: str, + op_kind: str, +): + val = client.get_remote_timeline_client_metric( + "pageserver_remote_timeline_client_calls_unfinished", + tenant_id, + timeline_id, + file_kind, + op_kind, + ) + if val is None: + return val + return int(val) + + # TODO Test that we correctly handle GC of files that are stuck in upload queue.