Ensure branches with no layers have their remote storage counterpart created eventually (#3857)

Discovered during writing a test for
https://github.com/neondatabase/neon/pull/3843
This commit is contained in:
Kirill Bulatov
2023-03-22 17:42:31 +02:00
committed by GitHub
parent 6033dfdf4a
commit 8bd565e09e
3 changed files with 205 additions and 49 deletions

View File

@@ -478,7 +478,7 @@ impl Tenant {
let dummy_timeline = self.create_timeline_data(
timeline_id,
up_to_date_metadata.clone(),
up_to_date_metadata,
ancestor.clone(),
remote_client,
)?;
@@ -503,7 +503,7 @@ impl Tenant {
let broken_timeline = self
.create_timeline_data(
timeline_id,
up_to_date_metadata.clone(),
up_to_date_metadata,
ancestor.clone(),
None,
)
@@ -1142,7 +1142,7 @@ impl Tenant {
);
self.prepare_timeline(
new_timeline_id,
new_metadata,
&new_metadata,
timeline_uninit_mark,
true,
None,
@@ -1700,7 +1700,7 @@ impl Tenant {
fn create_timeline_data(
&self,
new_timeline_id: TimelineId,
new_metadata: TimelineMetadata,
new_metadata: &TimelineMetadata,
ancestor: Option<Arc<Timeline>>,
remote_client: Option<RemoteTimelineClient>,
) -> anyhow::Result<Arc<Timeline>> {
@@ -2160,13 +2160,25 @@ impl Tenant {
let new_timeline = self
.prepare_timeline(
dst_id,
metadata,
&metadata,
timeline_uninit_mark,
false,
Some(Arc::clone(src_timeline)),
)?
.initialize_with_lock(&mut timelines, true, true)?;
drop(timelines);
// Root timeline gets its layers during creation and uploads them along with the metadata.
// A branch timeline though, when created, can get no writes for some time, hence won't get any layers created.
// We still need to upload its metadata eagerly: if other nodes `attach` the tenant and miss this timeline, their GC
// could get incorrect information and remove more layers, than needed.
// See also https://github.com/neondatabase/neon/issues/3865
if let Some(remote_client) = new_timeline.remote_client.as_ref() {
remote_client
.schedule_index_upload_for_metadata_update(&metadata)
.context("branch initial metadata upload")?;
}
info!("branched timeline {dst_id} from {src_id} at {start_lsn}");
Ok(new_timeline)
@@ -2229,7 +2241,7 @@ impl Tenant {
pg_version,
);
let raw_timeline =
self.prepare_timeline(timeline_id, new_metadata, timeline_uninit_mark, true, None)?;
self.prepare_timeline(timeline_id, &new_metadata, timeline_uninit_mark, true, None)?;
let tenant_id = raw_timeline.owning_tenant.tenant_id;
let unfinished_timeline = raw_timeline.raw_timeline()?;
@@ -2283,7 +2295,7 @@ impl Tenant {
fn prepare_timeline(
&self,
new_timeline_id: TimelineId,
new_metadata: TimelineMetadata,
new_metadata: &TimelineMetadata,
uninit_mark: TimelineUninitMark,
init_layers: bool,
ancestor: Option<Arc<Timeline>>,
@@ -2297,7 +2309,7 @@ impl Tenant {
tenant_id,
new_timeline_id,
);
remote_client.init_upload_queue_for_empty_remote(&new_metadata)?;
remote_client.init_upload_queue_for_empty_remote(new_metadata)?;
Some(remote_client)
} else {
None
@@ -2336,17 +2348,12 @@ impl Tenant {
&self,
timeline_path: &Path,
new_timeline_id: TimelineId,
new_metadata: TimelineMetadata,
new_metadata: &TimelineMetadata,
ancestor: Option<Arc<Timeline>>,
remote_client: Option<RemoteTimelineClient>,
) -> anyhow::Result<Arc<Timeline>> {
let timeline_data = self
.create_timeline_data(
new_timeline_id,
new_metadata.clone(),
ancestor,
remote_client,
)
.create_timeline_data(new_timeline_id, new_metadata, ancestor, remote_client)
.context("Failed to create timeline data structure")?;
crashsafe::create_dir_all(timeline_path).context("Failed to create timeline directory")?;
@@ -2358,7 +2365,7 @@ impl Tenant {
self.conf,
new_timeline_id,
self.tenant_id,
&new_metadata,
new_metadata,
true,
)
.context("Failed to create timeline metadata")?;

View File

@@ -1163,7 +1163,7 @@ impl Timeline {
pub(super) fn new(
conf: &'static PageServerConf,
tenant_conf: Arc<RwLock<TenantConfOpt>>,
metadata: TimelineMetadata,
metadata: &TimelineMetadata,
ancestor: Option<Arc<Timeline>>,
timeline_id: TimelineId,
tenant_id: TenantId,
@@ -1629,6 +1629,8 @@ impl Timeline {
.map(|l| (l.filename(), l))
.collect::<HashMap<_, _>>();
// If no writes happen, new branches do not have any layers, only the metadata file.
let has_local_layers = !local_layers.is_empty();
let local_only_layers = match index_part {
Some(index_part) => {
info!(
@@ -1646,21 +1648,40 @@ impl Timeline {
}
};
// Are there local files that don't exist remotely? Schedule uploads for them
for (layer_name, layer) in &local_only_layers {
// XXX solve this in the type system
let layer_path = layer
.local_path()
.expect("local_only_layers only contains local layers");
let layer_size = layer_path
.metadata()
.with_context(|| format!("failed to get file {layer_path:?} metadata"))?
.len();
info!("scheduling {layer_path:?} for upload");
remote_client
.schedule_layer_file_upload(layer_name, &LayerFileMetadata::new(layer_size))?;
if has_local_layers {
// Are there local files that don't exist remotely? Schedule uploads for them.
// Local timeline metadata will get uploaded to remove along witht he layers.
for (layer_name, layer) in &local_only_layers {
// XXX solve this in the type system
let layer_path = layer
.local_path()
.expect("local_only_layers only contains local layers");
let layer_size = layer_path
.metadata()
.with_context(|| format!("failed to get file {layer_path:?} metadata"))?
.len();
info!("scheduling {layer_path:?} for upload");
remote_client
.schedule_layer_file_upload(layer_name, &LayerFileMetadata::new(layer_size))?;
}
remote_client.schedule_index_upload_for_file_changes()?;
} else if index_part.is_none() {
// No data on the remote storage, no local layers, local metadata file.
//
// TODO https://github.com/neondatabase/neon/issues/3865
// Currently, console does not wait for the timeline data upload to the remote storage
// and considers the timeline created, expecting other pageserver nodes to work with it.
// Branch metadata upload could get interrupted (e.g pageserver got killed),
// hence any locally existing branch metadata with no remote counterpart should be uploaded,
// otherwise any other pageserver won't see the branch on `attach`.
//
// After the issue gets implemented, pageserver should rather remove the branch,
// since absence on S3 means we did not acknowledge the branch creation and console will have to retry,
// no need to keep the old files.
remote_client.schedule_index_upload_for_metadata_update(up_to_date_metadata)?;
} else {
// Local timeline has a metadata file, remote one too, both have no layers to sync.
}
remote_client.schedule_index_upload_for_file_changes()?;
info!("Done");

View File

@@ -11,8 +11,10 @@ from typing import Dict, List, Tuple
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
LocalFsStorage,
NeonEnvBuilder,
PageserverApiException,
PageserverHttpClient,
RemoteStorageKind,
available_remote_storages,
wait_for_last_flush_lsn,
@@ -421,23 +423,6 @@ def test_remote_timeline_client_calls_started_metric(
)
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
def get_queued_count(file_kind, op_kind):
val = client.get_remote_timeline_client_metric(
"pageserver_remote_timeline_client_calls_unfinished",
tenant_id,
timeline_id,
file_kind,
op_kind,
)
if val is None:
return val
return int(val)
def wait_upload_queue_empty():
wait_until(2, 1, lambda: get_queued_count(file_kind="layer", op_kind="upload") == 0)
wait_until(2, 1, lambda: get_queued_count(file_kind="index", op_kind="upload") == 0)
wait_until(2, 1, lambda: get_queued_count(file_kind="layer", op_kind="delete") == 0)
calls_started: Dict[Tuple[str, str], List[int]] = {
("layer", "upload"): [0],
("index", "upload"): [0],
@@ -478,7 +463,7 @@ def test_remote_timeline_client_calls_started_metric(
# create some layers & wait for uploads to finish
churn("a", "b")
wait_upload_queue_empty()
wait_upload_queue_empty(client, tenant_id, timeline_id)
# ensure that we updated the calls_started metric
fetch_calls_started()
@@ -637,4 +622,147 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue(
time.sleep(10)
# Branches off a root branch, but does not write anything to the new branch, so it has a metadata file only.
# Ensures that such branch is still persisted on the remote storage, and can be restored during tenant (re)attach.
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
def test_empty_branch_remote_storage_upload(
neon_env_builder: NeonEnvBuilder,
remote_storage_kind: RemoteStorageKind,
):
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storage_kind,
test_name="test_empty_branch_remote_storage_upload",
)
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
new_branch_name = "new_branch"
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg:
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id)
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
timelines_before_detach = set(
map(
lambda t: TimelineId(t["timeline_id"]),
client.timeline_list(env.initial_tenant),
)
)
expected_timelines = set([env.initial_timeline, new_branch_timeline_id])
assert (
timelines_before_detach == expected_timelines
), f"Expected to have an initial timeline and the branch timeline only, but got {timelines_before_detach}"
client.tenant_detach(env.initial_tenant)
client.tenant_attach(env.initial_tenant)
wait_until_tenant_state(client, env.initial_tenant, "Active", 5)
timelines_after_detach = set(
map(
lambda t: TimelineId(t["timeline_id"]),
client.timeline_list(env.initial_tenant),
)
)
assert (
timelines_before_detach == timelines_after_detach
), f"Expected to have same timelines after reattach, but got {timelines_after_detach}"
# Branches off a root branch, but does not write anything to the new branch, so it has a metadata file only.
# Ensures the branch is not on the remote storage and restarts the pageserver — the branch should be uploaded after the restart.
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
def test_empty_branch_remote_storage_upload_on_restart(
neon_env_builder: NeonEnvBuilder,
remote_storage_kind: RemoteStorageKind,
):
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storage_kind,
test_name="test_empty_branch_remote_storage_upload_on_restart",
)
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
new_branch_name = "new_branch"
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg:
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id)
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
env.pageserver.stop()
# Remove new branch from the remote storage
assert isinstance(env.remote_storage, LocalFsStorage)
new_branch_on_remote_storage = (
env.remote_storage.root
/ "tenants"
/ str(env.initial_tenant)
/ "timelines"
/ str(new_branch_timeline_id)
)
assert (
new_branch_on_remote_storage.is_dir()
), f"'{new_branch_on_remote_storage}' path does not exist on the remote storage"
shutil.rmtree(new_branch_on_remote_storage)
env.pageserver.start()
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
assert (
new_branch_on_remote_storage.is_dir()
), f"New branch should have been reuploaded on pageserver restart to the remote storage path '{new_branch_on_remote_storage}'"
def wait_upload_queue_empty(
client: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId
):
wait_until(
2,
1,
lambda: get_queued_count(
client, tenant_id, timeline_id, file_kind="layer", op_kind="upload"
)
== 0,
)
wait_until(
2,
1,
lambda: get_queued_count(
client, tenant_id, timeline_id, file_kind="index", op_kind="upload"
)
== 0,
)
wait_until(
2,
1,
lambda: get_queued_count(
client, tenant_id, timeline_id, file_kind="layer", op_kind="delete"
)
== 0,
)
def get_queued_count(
client: PageserverHttpClient,
tenant_id: TenantId,
timeline_id: TimelineId,
file_kind: str,
op_kind: str,
):
val = client.get_remote_timeline_client_metric(
"pageserver_remote_timeline_client_calls_unfinished",
tenant_id,
timeline_id,
file_kind,
op_kind,
)
if val is None:
return val
return int(val)
# TODO Test that we correctly handle GC of files that are stuck in upload queue.