switch to per-tenant attach/detach

download operations of all timelines for one tenant are now grouped
together so when attach is invoked pageserver downloads all of them
and registers them in a single apply_sync_status_update call so
branches can be used safely with attach/detach
This commit is contained in:
Dmitry Rodionov
2022-06-15 17:59:24 +03:00
committed by Dmitry Rodionov
parent ae116ff0a9
commit 4c54e4b37d
19 changed files with 835 additions and 333 deletions

View File

@@ -1,10 +1,14 @@
//! Timeline synchronization logic to fetch the layer files from remote storage into pageserver's local directory.
use std::{collections::HashSet, fmt::Debug, path::Path};
use std::{
collections::{HashMap, HashSet},
fmt::Debug,
path::Path,
};
use anyhow::Context;
use futures::stream::{FuturesUnordered, StreamExt};
use remote_storage::{path_with_suffix_extension, RemoteStorage};
use remote_storage::{path_with_suffix_extension, RemoteObjectName, RemoteStorage};
use tokio::{
fs,
io::{self, AsyncWriteExt},
@@ -14,7 +18,7 @@ use tracing::{debug, error, info, warn};
use crate::{
config::PageServerConf, layered_repository::metadata::metadata_path, storage_sync::SyncTask,
};
use utils::zid::ZTenantTimelineId;
use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};
use super::{
index::{IndexPart, RemoteTimeline},
@@ -23,6 +27,105 @@ use super::{
pub const TEMP_DOWNLOAD_EXTENSION: &str = "temp_download";
/// FIXME: Needs cleanup. Currently it swallows errors. Here we need to ensure that
/// we successfully downloaded all metadata parts for one tenant.
/// And successful includes absence of index_part in the remote. Because it is valid situation
/// when timeline was just created and pageserver restarted before upload of index part was completed.
/// But currently RemoteStorage interface does not provide this knowledge because it uses
/// anyhow::Error as an error type. So this needs a refactoring.
///
/// In other words we need to yield only complete sets of tenant timelines.
/// Failure for one timeline of a tenant should exclude whole tenant from returned hashmap.
/// So there are two requirements: keep everything in one futures unordered
/// to allow higher concurrency. Mark tenants as failed independently.
/// That requires some bookeeping.
pub async fn try_download_index_parts<P, S>(
conf: &'static PageServerConf,
storage: &S,
keys: HashSet<ZTenantTimelineId>,
) -> HashMap<ZTenantId, HashMap<ZTimelineId, IndexPart>>
where
P: Debug + Send + Sync + 'static,
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
{
let mut index_parts: HashMap<ZTenantId, HashMap<ZTimelineId, IndexPart>> = HashMap::new();
let mut part_downloads = keys
.into_iter()
.map(|id| async move { (id, download_index_part(conf, storage, id).await) })
.collect::<FuturesUnordered<_>>();
while let Some((id, part_upload_result)) = part_downloads.next().await {
match part_upload_result {
Ok(index_part) => {
debug!("Successfully fetched index part for {id}");
index_parts
.entry(id.tenant_id)
.or_default()
.insert(id.timeline_id, index_part);
}
Err(e) => error!("Failed to fetch index part for {id}: {e}"),
}
}
index_parts
}
pub async fn download_tenant_index_parts<P, S>(
conf: &'static PageServerConf,
storage: &S,
tenant_id: ZTenantId,
) -> anyhow::Result<HashMap<ZTimelineId, IndexPart>>
where
P: RemoteObjectName + Debug + Send + Sync + 'static,
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
{
let tenant_path = conf.timelines_path(&tenant_id);
let tenant_storage_path = storage.remote_object_id(&tenant_path).with_context(|| {
format!(
"Failed to get tenant storage path for local path '{}'",
tenant_path.display()
)
})?;
let timelines = storage
.list_prefixes(Some(tenant_storage_path))
.await
.with_context(|| {
format!(
"Failed to list tenant storage path to get remote timelines to download: {}",
tenant_id
)
})?;
let mut sync_ids = HashSet::new();
for timeline_remote_storage_key in timelines {
let object_name = timeline_remote_storage_key.object_name().ok_or_else(|| {
anyhow::anyhow!("failed to get timeline id for remote tenant {tenant_id}")
})?;
let timeline_id: ZTimelineId = object_name
.parse()
.with_context(|| {
format!("failed to parse object name into timeline id for tenant {tenant_id} '{object_name}'")
})?;
sync_ids.insert(ZTenantTimelineId {
tenant_id,
timeline_id,
});
}
let index_parts = try_download_index_parts(conf, storage, sync_ids)
.await
.remove(&tenant_id)
.ok_or(anyhow::anyhow!(
"Missing tenant index parts. This is a bug."
))?;
Ok(index_parts)
}
/// Retrieves index data from the remote storage for a given timeline.
pub async fn download_index_part<P, S>(
conf: &'static PageServerConf,

View File

@@ -2,6 +2,7 @@
//! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about
//! remote timeline layers and its metadata.
use std::ops::{Deref, DerefMut};
use std::{
collections::{HashMap, HashSet},
path::{Path, PathBuf},
@@ -14,7 +15,10 @@ use serde_with::{serde_as, DisplayFromStr};
use tokio::sync::RwLock;
use crate::{config::PageServerConf, layered_repository::metadata::TimelineMetadata};
use utils::{lsn::Lsn, zid::ZTenantTimelineId};
use utils::{
lsn::Lsn,
zid::{ZTenantId, ZTenantTimelineId, ZTimelineId},
};
/// A part of the filesystem path, that needs a root to become a path again.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
@@ -41,38 +45,68 @@ impl RelativePath {
}
}
#[derive(Debug, Clone, Default)]
pub struct TenantEntry(HashMap<ZTimelineId, RemoteTimeline>);
impl TenantEntry {
pub fn has_in_progress_downloads(&self) -> bool {
self.values()
.any(|remote_timeline| remote_timeline.awaits_download)
}
}
impl Deref for TenantEntry {
type Target = HashMap<ZTimelineId, RemoteTimeline>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for TenantEntry {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl From<HashMap<ZTimelineId, RemoteTimeline>> for TenantEntry {
fn from(inner: HashMap<ZTimelineId, RemoteTimeline>) -> Self {
Self(inner)
}
}
/// An index to track tenant files that exist on the remote storage.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Default)]
pub struct RemoteTimelineIndex {
timeline_entries: HashMap<ZTenantTimelineId, RemoteTimeline>,
entries: HashMap<ZTenantId, TenantEntry>,
}
/// A wrapper to synchronize the access to the index, should be created and used before dealing with any [`RemoteTimelineIndex`].
#[derive(Default)]
pub struct RemoteIndex(Arc<RwLock<RemoteTimelineIndex>>);
impl RemoteIndex {
pub fn empty() -> Self {
Self(Arc::new(RwLock::new(RemoteTimelineIndex {
timeline_entries: HashMap::new(),
})))
}
pub fn from_parts(
conf: &'static PageServerConf,
index_parts: HashMap<ZTenantTimelineId, IndexPart>,
index_parts: HashMap<ZTenantId, HashMap<ZTimelineId, IndexPart>>,
) -> anyhow::Result<Self> {
let mut timeline_entries = HashMap::new();
let mut entries: HashMap<ZTenantId, TenantEntry> = HashMap::new();
for (sync_id, index_part) in index_parts {
let timeline_path = conf.timeline_path(&sync_id.timeline_id, &sync_id.tenant_id);
let remote_timeline = RemoteTimeline::from_index_part(&timeline_path, index_part)
.context("Failed to restore remote timeline data from index part")?;
timeline_entries.insert(sync_id, remote_timeline);
for (tenant_id, timelines) in index_parts {
for (timeline_id, index_part) in timelines {
let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);
let remote_timeline =
RemoteTimeline::from_index_part(&timeline_path, index_part)
.context("Failed to restore remote timeline data from index part")?;
entries
.entry(tenant_id)
.or_default()
.insert(timeline_id, remote_timeline);
}
}
Ok(Self(Arc::new(RwLock::new(RemoteTimelineIndex {
timeline_entries,
}))))
Ok(Self(Arc::new(RwLock::new(RemoteTimelineIndex { entries }))))
}
pub async fn read(&self) -> tokio::sync::RwLockReadGuard<'_, RemoteTimelineIndex> {
@@ -91,20 +125,50 @@ impl Clone for RemoteIndex {
}
impl RemoteTimelineIndex {
pub fn timeline_entry(&self, id: &ZTenantTimelineId) -> Option<&RemoteTimeline> {
self.timeline_entries.get(id)
pub fn timeline_entry(
&self,
ZTenantTimelineId {
tenant_id,
timeline_id,
}: &ZTenantTimelineId,
) -> Option<&RemoteTimeline> {
self.entries.get(tenant_id)?.get(timeline_id)
}
pub fn timeline_entry_mut(&mut self, id: &ZTenantTimelineId) -> Option<&mut RemoteTimeline> {
self.timeline_entries.get_mut(id)
pub fn timeline_entry_mut(
&mut self,
ZTenantTimelineId {
tenant_id,
timeline_id,
}: &ZTenantTimelineId,
) -> Option<&mut RemoteTimeline> {
self.entries.get_mut(tenant_id)?.get_mut(timeline_id)
}
pub fn add_timeline_entry(&mut self, id: ZTenantTimelineId, entry: RemoteTimeline) {
self.timeline_entries.insert(id, entry);
pub fn add_timeline_entry(
&mut self,
ZTenantTimelineId {
tenant_id,
timeline_id,
}: ZTenantTimelineId,
entry: RemoteTimeline,
) {
self.entries
.entry(tenant_id)
.or_default()
.insert(timeline_id, entry);
}
pub fn all_sync_ids(&self) -> impl Iterator<Item = ZTenantTimelineId> + '_ {
self.timeline_entries.keys().copied()
pub fn tenant_entry(&self, tenant_id: &ZTenantId) -> Option<&TenantEntry> {
self.entries.get(tenant_id)
}
pub fn tenant_entry_mut(&mut self, tenant_id: &ZTenantId) -> Option<&mut TenantEntry> {
self.entries.get_mut(tenant_id)
}
pub fn add_tenant_entry(&mut self, tenant_id: ZTenantId) -> &mut TenantEntry {
self.entries.entry(tenant_id).or_default()
}
pub fn set_awaits_download(