mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-05 20:42:54 +00:00
keep successfully downloaded index parts
This commit is contained in:
committed by
Dmitry Rodionov
parent
912a08317b
commit
7987889cb3
@@ -1557,6 +1557,7 @@ fn schedule_first_sync_tasks(
|
||||
local_timeline_init_statuses
|
||||
}
|
||||
|
||||
/// bool in return value stands for awaits_download
|
||||
fn compare_local_and_remote_timeline(
|
||||
new_sync_tasks: &mut VecDeque<(ZTenantTimelineId, SyncTask)>,
|
||||
sync_id: ZTenantTimelineId,
|
||||
@@ -1566,14 +1567,6 @@ fn compare_local_and_remote_timeline(
|
||||
) -> (LocalTimelineInitStatus, bool) {
|
||||
let remote_files = remote_entry.stored_files();
|
||||
|
||||
// TODO probably here we need more sophisticated logic,
|
||||
// if more data is available remotely can we just download what's there?
|
||||
// without trying to upload something. It may be tricky, needs further investigation.
|
||||
// For now looks strange that we can request upload
|
||||
// and download for the same timeline simultaneously.
|
||||
// (upload needs to be only for previously unsynced files, not whole timeline dir).
|
||||
// If one of the tasks fails they will be reordered in the queue which can lead
|
||||
// to timeline being stuck in evicted state
|
||||
let number_of_layers_to_download = remote_files.difference(&local_files).count();
|
||||
let (initial_timeline_status, awaits_download) = if number_of_layers_to_download > 0 {
|
||||
new_sync_tasks.push_back((
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
fmt::Debug,
|
||||
mem,
|
||||
path::Path,
|
||||
};
|
||||
|
||||
@@ -32,10 +33,29 @@ pub const TEMP_DOWNLOAD_EXTENSION: &str = "temp_download";
|
||||
// Poisoned variant is returned.
|
||||
// When data is received succesfully without errors Present variant is used.
|
||||
pub enum TenantIndexParts {
|
||||
Poisoned(ZTenantTimelineId),
|
||||
Poisoned {
|
||||
present: HashMap<ZTimelineId, IndexPart>,
|
||||
missing: HashSet<ZTimelineId>,
|
||||
},
|
||||
Present(HashMap<ZTimelineId, IndexPart>),
|
||||
}
|
||||
|
||||
impl TenantIndexParts {
|
||||
fn add_poisoned(&mut self, timeline_id: ZTimelineId) {
|
||||
match self {
|
||||
TenantIndexParts::Poisoned { missing, .. } => {
|
||||
missing.insert(timeline_id);
|
||||
}
|
||||
TenantIndexParts::Present(present) => {
|
||||
*self = TenantIndexParts::Poisoned {
|
||||
present: mem::take(present),
|
||||
missing: HashSet::from([timeline_id]),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TenantIndexParts {
|
||||
fn default() -> Self {
|
||||
TenantIndexParts::Present(HashMap::default())
|
||||
@@ -63,8 +83,8 @@ where
|
||||
Ok(index_part) => {
|
||||
debug!("Successfully fetched index part for {id}");
|
||||
match index_parts.entry(id.tenant_id).or_default() {
|
||||
TenantIndexParts::Poisoned(id) => {
|
||||
warn!("disgarding index part for poisoned tenant, poisoned by id: {id}")
|
||||
TenantIndexParts::Poisoned { present, .. } => {
|
||||
present.insert(id.timeline_id, index_part);
|
||||
}
|
||||
TenantIndexParts::Present(parts) => {
|
||||
parts.insert(id.timeline_id, index_part);
|
||||
@@ -77,8 +97,8 @@ where
|
||||
// thats ok because it means that we didnt upload something we have locally for example
|
||||
}
|
||||
e => {
|
||||
*index_parts.entry(id.tenant_id).or_default() =
|
||||
TenantIndexParts::Poisoned(id);
|
||||
let tenant_parts = index_parts.entry(id.tenant_id).or_default();
|
||||
tenant_parts.add_poisoned(id.timeline_id);
|
||||
error!(
|
||||
"Failed to fetch index part for {id}: {e} poisoning tenant index parts"
|
||||
);
|
||||
@@ -144,8 +164,8 @@ where
|
||||
.remove(&tenant_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing tenant index parts. This is a bug."))?
|
||||
{
|
||||
TenantIndexParts::Poisoned(id) => {
|
||||
anyhow::bail!("failed to download index parts for all timeline: {id}")
|
||||
TenantIndexParts::Poisoned { missing, .. } => {
|
||||
anyhow::bail!("Failed to download index parts for all timelines. Missing {missing:?}")
|
||||
}
|
||||
TenantIndexParts::Present(parts) => Ok(parts),
|
||||
}
|
||||
|
||||
@@ -97,8 +97,8 @@ impl RemoteIndex {
|
||||
|
||||
for (tenant_id, index_parts) in index_parts {
|
||||
match index_parts {
|
||||
// TODO: should we schedule a retry so it can be recovered? otherwise there is no way to revive it other restarting whole pageserver
|
||||
TenantIndexParts::Poisoned(id) => warn!("skipping tenant_id set up for remote index because the index download has failed for timeline {id}"),
|
||||
// TODO: should we schedule a retry so it can be recovered? otherwise we can revive it only through detach/attach or pageserver restart
|
||||
TenantIndexParts::Poisoned { missing, ..} => warn!("skipping tenant_id set up for remote index because the index download has failed for timeline(s): {missing:?}"),
|
||||
TenantIndexParts::Present(timelines) => {
|
||||
for (timeline_id, index_part) in timelines {
|
||||
let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);
|
||||
|
||||
Reference in New Issue
Block a user