Remove timeline files on detach

This commit is contained in:
Kirill Bulatov
2022-04-28 00:49:03 +03:00
committed by Kirill Bulatov
parent 6cca57f95a
commit 2911eb084a
5 changed files with 35 additions and 9 deletions

View File

@@ -347,7 +347,8 @@ async fn timeline_detach_handler(request: Request<Body>) -> Result<Response<Body
let _enter =
info_span!("timeline_detach_handler", tenant = %tenant_id, timeline = %timeline_id)
.entered();
tenant_mgr::detach_timeline(tenant_id, timeline_id)
let state = get_state(&request);
tenant_mgr::detach_timeline(state.conf, tenant_id, timeline_id)
})
.await
.map_err(ApiError::from_err)??;

View File

@@ -392,7 +392,11 @@ impl Repository for LayeredRepository {
}
fn detach_timeline(&self, timeline_id: ZTimelineId) -> anyhow::Result<()> {
self.timelines.lock().unwrap().remove(&timeline_id);
let mut timelines = self.timelines.lock().unwrap();
ensure!(
timelines.remove(&timeline_id).is_some(),
"cannot detach timeline {timeline_id} that is not available locally"
);
Ok(())
}

View File

@@ -332,7 +332,7 @@ mod tests {
.await;
assert!(
matches!(
dbg!(already_downloading_remote_timeline_download),
already_downloading_remote_timeline_download,
DownloadedTimeline::Abort,
),
"Should not allow downloading for remote timeline that does not expect it"

View File

@@ -56,7 +56,7 @@ struct Tenant {
/// Contains in-memory state, including the timeline that might not yet flushed on disk or loaded form disk.
repo: Arc<RepositoryImpl>,
/// Timelines, located locally in the pageserver's datadir.
/// Whatever manipulations happen, local timelines are not removed, only incremented with files.
/// Timelines can entirely be removed entirely by the `detach` operation only.
///
/// Local timelines have more metadata that's loaded into memory,
/// that is located in the `repo.timelines` field, [`crate::layered_repository::LayeredTimelineEntry`].
@@ -126,8 +126,8 @@ pub fn apply_timeline_sync_status_updates(
continue;
}
};
match register_new_timelines(&repo, status_updates) {
Ok(()) => info!("successfully applied tenant {tenant_id} sync status updates"),
match apply_timeline_remote_sync_status_updates(&repo, status_updates) {
Ok(()) => info!("successfully applied sync status updates for tenant {tenant_id}"),
Err(e) => error!(
"Failed to apply timeline sync timeline status updates for tenant {tenant_id}: {e:?}"
),
@@ -305,7 +305,11 @@ pub fn get_local_timeline_with_load(
Ok(page_tline)
}
pub fn detach_timeline(tenant_id: ZTenantId, timeline_id: ZTimelineId) -> anyhow::Result<()> {
pub fn detach_timeline(
conf: &'static PageServerConf,
tenant_id: ZTenantId,
timeline_id: ZTimelineId,
) -> anyhow::Result<()> {
// shutdown the timeline threads (this shuts down the walreceiver)
thread_mgr::shutdown_threads(None, Some(tenant_id), Some(timeline_id));
@@ -320,6 +324,14 @@ pub fn detach_timeline(tenant_id: ZTenantId, timeline_id: ZTimelineId) -> anyhow
None => bail!("Tenant {tenant_id} not found in local tenant state"),
}
let local_timeline_directory = conf.timeline_path(&timeline_id, &tenant_id);
std::fs::remove_dir_all(&local_timeline_directory).with_context(|| {
format!(
"Failed to remove local timeline directory '{}'",
local_timeline_directory.display()
)
})?;
Ok(())
}
@@ -386,13 +398,13 @@ fn init_local_repositories(
// Lets fail here loudly to be on the safe side.
// XXX: It may be a better api to actually distinguish between repository startup
// and processing of newly downloaded timelines.
register_new_timelines(&repo, status_updates)
apply_timeline_remote_sync_status_updates(&repo, status_updates)
.with_context(|| format!("Failed to bootstrap timelines for tenant {tenant_id}"))?
}
Ok(())
}
fn register_new_timelines(
fn apply_timeline_remote_sync_status_updates(
repo: &LayeredRepository,
status_updates: HashMap<ZTimelineId, TimelineSyncStatusUpdate>,
) -> anyhow::Result<()> {

View File

@@ -217,6 +217,13 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
tenant_pg.start()
timeline_to_detach_local_path = env.repo_dir / 'tenants' / tenant.hex / 'timelines' / timeline.hex
files_before_detach = os.listdir(timeline_to_detach_local_path)
assert 'metadata' in files_before_detach, f'Regular timeline {timeline_to_detach_local_path} should have the metadata file,\
but got: {files_before_detach}'
assert len(files_before_detach) > 2, f'Regular timeline {timeline_to_detach_local_path} should have at least one layer file,\
but got {files_before_detach}'
# detach tenant from old pageserver before we check
# that all the data is there to be sure that old pageserver
# is no longer involved, and if it is, we will see the errors
@@ -238,6 +245,8 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
load_thread.join(timeout=10)
log.info('load thread stopped')
assert not os.path.exists(timeline_to_detach_local_path), f'After detach, local timeline dir {timeline_to_detach_local_path} should be removed'
# bring old pageserver back for clean shutdown via zenith cli
# new pageserver will be shut down by the context manager
cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()