diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index 5c135e4eb4..728dcb53de 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -10,10 +10,7 @@ use daemonize::Daemonize;
use pageserver::{
config::{defaults::*, PageServerConf},
- http, page_cache, page_service, profiling,
- remote_storage::{self, SyncStartupData},
- repository::{Repository, TimelineSyncStatusUpdate},
- tenant_mgr, thread_mgr,
+ http, page_cache, page_service, profiling, tenant_mgr, thread_mgr,
thread_mgr::ThreadKind,
timelines, virtual_file, LOG_FILE_NAME,
};
@@ -235,47 +232,8 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
let signals = signals::install_shutdown_handlers()?;
- // Initialize repositories with locally available timelines.
- // Timelines that are only partially available locally (remote storage has more data than this pageserver)
- // are scheduled for download and added to the repository once download is completed.
- let SyncStartupData {
- remote_index,
- local_timeline_init_statuses,
- } = remote_storage::start_local_timeline_sync(conf)
- .context("Failed to set up local files sync with external storage")?;
-
- for (tenant_id, local_timeline_init_statuses) in local_timeline_init_statuses {
- // initialize local tenant
- let repo = tenant_mgr::load_local_repo(conf, tenant_id, &remote_index)
- .with_context(|| format!("Failed to load repo for tenant {}", tenant_id))?;
- for (timeline_id, init_status) in local_timeline_init_statuses {
- match init_status {
- remote_storage::LocalTimelineInitStatus::LocallyComplete => {
- debug!("timeline {} for tenant {} is locally complete, registering it in repository", timeline_id, tenant_id);
- // Lets fail here loudly to be on the safe side.
- // XXX: It may be a better api to actually distinguish between repository startup
- // and processing of newly downloaded timelines.
- repo.apply_timeline_remote_sync_status_update(
- timeline_id,
- TimelineSyncStatusUpdate::Downloaded,
- )
- .with_context(|| {
- format!(
- "Failed to bootstrap timeline {} for tenant {}",
- timeline_id, tenant_id
- )
- })?
- }
- remote_storage::LocalTimelineInitStatus::NeedsSync => {
- debug!(
- "timeline {} for tenant {} needs sync, \
- so skipped for adding into repository until sync is finished",
- tenant_id, timeline_id
- );
- }
- }
- }
- }
+ // start profiler (if enabled)
+ let profiler_guard = profiling::init_profiler(conf);
// initialize authentication for incoming connections
let auth = match &conf.auth_type {
@@ -288,8 +246,7 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
};
info!("Using auth: {:#?}", conf.auth_type);
- // start profiler (if enabled)
- let profiler_guard = profiling::init_profiler(conf);
+ let remote_index = tenant_mgr::init_tenant_mgr(conf)?;
// Spawn a new thread for the http endpoint
// bind before launching separate thread so the error reported before startup exits
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 05485ef3b6..f1b482cf50 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -244,7 +244,7 @@ async fn timeline_attach_handler(request: Request
) -> Result) -> Result, A
crate::tenant_mgr::list_tenants()
})
.await
- .map_err(ApiError::from_err)??;
+ .map_err(ApiError::from_err)?;
json_response(StatusCode::OK, response_data)
}
@@ -377,7 +377,7 @@ async fn tenant_create_handler(mut request: Request) -> Result> = Mutex::new(HashMap::new());
+mod tenants_state {
+ use std::{
+ collections::HashMap,
+ sync::{RwLock, RwLockReadGuard, RwLockWriteGuard},
+ };
+
+ use utils::zid::ZTenantId;
+
+ use crate::tenant_mgr::Tenant;
+
+ lazy_static::lazy_static! {
+ static ref TENANTS: RwLock> = RwLock::new(HashMap::new());
+ }
+
+ pub(super) fn read_tenants() -> RwLockReadGuard<'static, HashMap> {
+ TENANTS
+ .read()
+ .expect("Failed to read() tenants lock, it got poisoned")
+ }
+
+ pub(super) fn write_tenants() -> RwLockWriteGuard<'static, HashMap> {
+ TENANTS
+ .write()
+ .expect("Failed to write() tenants lock, it got poisoned")
+ }
}
struct Tenant {
state: TenantState,
+ /// Contains in-memory state, including the timeline that might not yet flushed on disk or loaded form disk.
repo: Arc,
-
- timelines: HashMap>,
+ /// Timelines, located locally in the pageserver's datadir.
+ /// Whatever manipulations happen, local timelines are not removed, only incremented with files.
+ ///
+ /// Local timelines have more metadata that's loaded into memory,
+ /// that is located in the `repo.timelines` field, [`crate::layered_repository::LayeredTimelineEntry`].
+ local_timelines: HashMap>,
}
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
@@ -60,43 +88,17 @@ impl fmt::Display for TenantState {
}
}
-fn access_tenants() -> MutexGuard<'static, HashMap> {
- TENANTS.lock().unwrap()
-}
-
-// Sets up wal redo manager and repository for tenant. Reduces code duplication.
-// Used during pageserver startup, or when new tenant is attached to pageserver.
-pub fn load_local_repo(
- conf: &'static PageServerConf,
- tenant_id: ZTenantId,
- remote_index: &RemoteIndex,
-) -> Result> {
- let mut m = access_tenants();
- let tenant = m.entry(tenant_id).or_insert_with(|| {
- // Set up a WAL redo manager, for applying WAL records.
- let walredo_mgr = PostgresRedoManager::new(conf, tenant_id);
-
- // Set up an object repository, for actual data storage.
- let repo: Arc = Arc::new(LayeredRepository::new(
- conf,
- Default::default(),
- Arc::new(walredo_mgr),
- tenant_id,
- remote_index.clone(),
- conf.remote_storage_config.is_some(),
- ));
- Tenant {
- state: TenantState::Idle,
- repo,
- timelines: HashMap::new(),
- }
- });
-
- // Restore tenant config
- let tenant_conf = LayeredRepository::load_tenant_config(conf, tenant_id)?;
- tenant.repo.update_tenant_config(tenant_conf)?;
-
- Ok(Arc::clone(&tenant.repo))
+/// Initialize repositories with locally available timelines.
+/// Timelines that are only partially available locally (remote storage has more data than this pageserver)
+/// are scheduled for download and added to the repository once download is completed.
+pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result {
+ let SyncStartupData {
+ remote_index,
+ local_timeline_init_statuses,
+ } = remote_storage::start_local_timeline_sync(conf)
+ .context("Failed to set up local files sync with external storage")?;
+ init_local_repositories(conf, local_timeline_init_statuses, &remote_index)?;
+ Ok(remote_index)
}
/// Updates tenants' repositories, changing their timelines state in memory.
@@ -113,32 +115,28 @@ pub fn apply_timeline_sync_status_updates(
"Applying sync status updates for {} timelines",
sync_status_updates.len()
);
- trace!("Sync status updates: {:?}", sync_status_updates);
+ debug!("Sync status updates: {sync_status_updates:?}");
- for (tenant_id, tenant_timelines_sync_status_updates) in sync_status_updates {
+ for (tenant_id, status_updates) in sync_status_updates {
let repo = match load_local_repo(conf, tenant_id, remote_index) {
Ok(repo) => repo,
Err(e) => {
- error!(
- "Failed to load repo for tenant {} Error: {:#}",
- tenant_id, e
- );
+ error!("Failed to load repo for tenant {tenant_id} Error: {e:?}",);
continue;
}
};
- for (timeline_id, timeline_sync_status_update) in tenant_timelines_sync_status_updates {
- match repo.apply_timeline_remote_sync_status_update(timeline_id, timeline_sync_status_update)
+ for (timeline_id, status_update) in status_updates {
+ match repo.apply_timeline_remote_sync_status_update(timeline_id, status_update)
{
- Ok(_) => debug!(
- "successfully applied timeline sync status update: {} -> {}",
- timeline_id, timeline_sync_status_update
- ),
+ Ok(()) => debug!("successfully applied timeline sync status update: {timeline_id} -> {status_update}"),
Err(e) => error!(
- "Failed to apply timeline sync status update for tenant {}. timeline {} update {} Error: {:#}",
- tenant_id, timeline_id, timeline_sync_status_update, e
+ "Failed to apply timeline sync status update for tenant {tenant_id}. timeline {timeline_id} update {status_update} Error: {e:?}"
),
}
+ match status_update {
+ TimelineSyncStatusUpdate::Downloaded => todo!("TODO kb "),
+ }
}
}
}
@@ -147,7 +145,7 @@ pub fn apply_timeline_sync_status_updates(
/// Shut down all tenants. This runs as part of pageserver shutdown.
///
pub fn shutdown_all_tenants() {
- let mut m = access_tenants();
+ let mut m = tenants_state::write_tenants();
let mut tenantids = Vec::new();
for (tenantid, tenant) in m.iter_mut() {
tenant.state = TenantState::Stopping;
@@ -167,22 +165,16 @@ pub fn shutdown_all_tenants() {
// should be no more activity in any of the repositories.
//
// On error, log it but continue with the shutdown for other tenants.
- for tenantid in tenantids {
- debug!("shutdown tenant {}", tenantid);
- match get_repository_for_tenant(tenantid) {
+ for tenant_id in tenantids {
+ debug!("shutdown tenant {tenant_id}");
+ match get_repository_for_tenant(tenant_id) {
Ok(repo) => {
if let Err(err) = repo.checkpoint() {
- error!(
- "Could not checkpoint tenant {} during shutdown: {:?}",
- tenantid, err
- );
+ error!("Could not checkpoint tenant {tenant_id} during shutdown: {err:?}");
}
}
Err(err) => {
- error!(
- "Could not get repository for tenant {} during shutdown: {:?}",
- tenantid, err
- );
+ error!("Could not get repository for tenant {tenant_id} during shutdown: {err:?}");
}
}
}
@@ -191,20 +183,20 @@ pub fn shutdown_all_tenants() {
pub fn create_tenant_repository(
conf: &'static PageServerConf,
tenant_conf: TenantConfOpt,
- tenantid: ZTenantId,
+ tenant_id: ZTenantId,
remote_index: RemoteIndex,
-) -> Result