keep successfully downloaded index parts

This commit is contained in:
Dmitry Rodionov
2022-07-14 12:51:55 +03:00
committed by Dmitry Rodionov
parent 912a08317b
commit 7987889cb3
3 changed files with 30 additions and 17 deletions

View File

@@ -1557,6 +1557,7 @@ fn schedule_first_sync_tasks(
local_timeline_init_statuses
}
/// bool in return value stands for awaits_download
fn compare_local_and_remote_timeline(
new_sync_tasks: &mut VecDeque<(ZTenantTimelineId, SyncTask)>,
sync_id: ZTenantTimelineId,
@@ -1566,14 +1567,6 @@ fn compare_local_and_remote_timeline(
) -> (LocalTimelineInitStatus, bool) {
let remote_files = remote_entry.stored_files();
// TODO probably here we need more sophisticated logic,
// if more data is available remotely can we just download what's there?
// without trying to upload something. It may be tricky, needs further investigation.
// For now looks strange that we can request upload
// and download for the same timeline simultaneously.
// (upload needs to be only for previously unsynced files, not whole timeline dir).
// If one of the tasks fails they will be reordered in the queue which can lead
// to timeline being stuck in evicted state
let number_of_layers_to_download = remote_files.difference(&local_files).count();
let (initial_timeline_status, awaits_download) = if number_of_layers_to_download > 0 {
new_sync_tasks.push_back((

View File

@@ -3,6 +3,7 @@
use std::{
collections::{HashMap, HashSet},
fmt::Debug,
mem,
path::Path,
};
@@ -32,10 +33,29 @@ pub const TEMP_DOWNLOAD_EXTENSION: &str = "temp_download";
// Poisoned variant is returned.
// When data is received succesfully without errors Present variant is used.
pub enum TenantIndexParts {
Poisoned(ZTenantTimelineId),
Poisoned {
present: HashMap<ZTimelineId, IndexPart>,
missing: HashSet<ZTimelineId>,
},
Present(HashMap<ZTimelineId, IndexPart>),
}
impl TenantIndexParts {
fn add_poisoned(&mut self, timeline_id: ZTimelineId) {
match self {
TenantIndexParts::Poisoned { missing, .. } => {
missing.insert(timeline_id);
}
TenantIndexParts::Present(present) => {
*self = TenantIndexParts::Poisoned {
present: mem::take(present),
missing: HashSet::from([timeline_id]),
}
}
}
}
}
impl Default for TenantIndexParts {
fn default() -> Self {
TenantIndexParts::Present(HashMap::default())
@@ -63,8 +83,8 @@ where
Ok(index_part) => {
debug!("Successfully fetched index part for {id}");
match index_parts.entry(id.tenant_id).or_default() {
TenantIndexParts::Poisoned(id) => {
warn!("disgarding index part for poisoned tenant, poisoned by id: {id}")
TenantIndexParts::Poisoned { present, .. } => {
present.insert(id.timeline_id, index_part);
}
TenantIndexParts::Present(parts) => {
parts.insert(id.timeline_id, index_part);
@@ -77,8 +97,8 @@ where
// thats ok because it means that we didnt upload something we have locally for example
}
e => {
*index_parts.entry(id.tenant_id).or_default() =
TenantIndexParts::Poisoned(id);
let tenant_parts = index_parts.entry(id.tenant_id).or_default();
tenant_parts.add_poisoned(id.timeline_id);
error!(
"Failed to fetch index part for {id}: {e} poisoning tenant index parts"
);
@@ -144,8 +164,8 @@ where
.remove(&tenant_id)
.ok_or_else(|| anyhow::anyhow!("Missing tenant index parts. This is a bug."))?
{
TenantIndexParts::Poisoned(id) => {
anyhow::bail!("failed to download index parts for all timeline: {id}")
TenantIndexParts::Poisoned { missing, .. } => {
anyhow::bail!("Failed to download index parts for all timelines. Missing {missing:?}")
}
TenantIndexParts::Present(parts) => Ok(parts),
}

View File

@@ -97,8 +97,8 @@ impl RemoteIndex {
for (tenant_id, index_parts) in index_parts {
match index_parts {
// TODO: should we schedule a retry so it can be recovered? otherwise there is no way to revive it other restarting whole pageserver
TenantIndexParts::Poisoned(id) => warn!("skipping tenant_id set up for remote index because the index download has failed for timeline {id}"),
// TODO: should we schedule a retry so it can be recovered? otherwise we can revive it only through detach/attach or pageserver restart
TenantIndexParts::Poisoned { missing, ..} => warn!("skipping tenant_id set up for remote index because the index download has failed for timeline(s): {missing:?}"),
TenantIndexParts::Present(timelines) => {
for (timeline_id, index_part) in timelines {
let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);