diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs
index 1ac07f6ebc..b3eab6c3cb 100644
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -27,7 +27,9 @@ use utils::{
use crate::tenant::{TENANT_ATTACHING_MARKER_FILENAME, TIMELINES_SEGMENT_NAME};
use crate::tenant_config::{TenantConf, TenantConfOpt};
-use crate::{METADATA_FILE_NAME, TENANT_CONFIG_NAME, TIMELINE_UNINIT_MARK_SUFFIX};
+use crate::{
+ IGNORED_TENANT_FILE_NAME, METADATA_FILE_NAME, TENANT_CONFIG_NAME, TIMELINE_UNINIT_MARK_SUFFIX,
+};
pub mod defaults {
use crate::tenant_config::defaults::*;
@@ -402,6 +404,10 @@ impl PageServerConf {
.join(TENANT_ATTACHING_MARKER_FILENAME)
}
+ pub fn tenant_ignore_mark_file_path(&self, tenant_id: TenantId) -> PathBuf {
+ self.tenant_path(&tenant_id).join(IGNORED_TENANT_FILE_NAME)
+ }
+
/// Points to a place in pageserver's local directory,
/// where certain tenant's tenantconf file should be located.
pub fn tenant_config_path(&self, tenant_id: TenantId) -> PathBuf {
diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml
index b8f467cd02..932cda50b7 100644
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -274,6 +274,7 @@ paths:
schema:
type: string
format: hex
+
post:
description: Schedules attach operation to happen in the background for given tenant
responses:
@@ -325,7 +326,9 @@ paths:
type: string
format: hex
post:
- description: Detach local tenant
+ description: |
+ Remove tenant data (including all corresponding timelines) from pageserver's memory and file system.
+ Files on the remote storage are not affected.
responses:
"200":
description: Tenant detached
@@ -354,6 +357,92 @@ paths:
schema:
$ref: "#/components/schemas/Error"
+ /v1/tenant/{tenant_id}/ignore:
+ parameters:
+ - name: tenant_id
+ in: path
+ required: true
+ schema:
+ type: string
+ format: hex
+ post:
+ description: |
+ Remove tenant data (including all corresponding timelines) from pageserver's memory.
+ Files on local disk and remote storage are not affected.
+
+ Future pageserver restarts won't load the data back until `load` is called on such tenant.
+ responses:
+ "200":
+ description: Tenant ignored
+ "400":
+ description: Error when no tenant id found in path parameters
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "401":
+ description: Unauthorized Error
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/UnauthorizedError"
+ "403":
+ description: Forbidden Error
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ForbiddenError"
+ "500":
+ description: Generic operation error
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+
+ /v1/tenant/{tenant_id}/load:
+ parameters:
+ - name: tenant_id
+ in: path
+ required: true
+ schema:
+ type: string
+ format: hex
+ post:
+ description: |
+ Schedules an operation that attempts to load a tenant from the local disk and
+ synchronise it with the remote storage (if enabled), repeating pageserver's restart logic for tenant load.
+ If the tenant was ignored before, removes the ignore mark and continues with load scheduling.
+
+ Errors if the tenant is absent on disk, already present in memory or fails to schedule its load.
+ Scheduling a load does not mean that the tenant would load successfully, check tenant status to ensure load correctness.
+ responses:
+ "202":
+ description: Tenant scheduled to load successfully
+ "400":
+ description: Error when no tenant id found in path parameters
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "401":
+ description: Unauthorized Error
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/UnauthorizedError"
+ "403":
+ description: Forbidden Error
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ForbiddenError"
+ "500":
+ description: Generic operation error
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+
/v1/tenant/{tenant_id}/size:
parameters:
- name: tenant_id
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index db262598d7..d1fdf26a5a 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -349,13 +349,13 @@ async fn tenant_attach_handler(request: Request
) -> Result,
if let Some(remote_storage) = &state.remote_storage {
// FIXME: distinguish between "Tenant already exists" and other errors
- tenant_mgr::attach_tenant(state.conf, tenant_id, remote_storage)
+ tenant_mgr::attach_tenant(state.conf, tenant_id, remote_storage.clone())
.instrument(info_span!("tenant_attach", tenant = %tenant_id))
.await
.map_err(ApiError::InternalServerError)?;
} else {
return Err(ApiError::BadRequest(anyhow!(
- "attach_tenant is possible because pageserver was configured without remote storage"
+ "attach_tenant is not possible because pageserver was configured without remote storage"
)));
}
@@ -394,6 +394,35 @@ async fn tenant_detach_handler(request: Request) -> Result,
json_response(StatusCode::OK, ())
}
+async fn tenant_load_handler(request: Request) -> Result, ApiError> {
+ let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+ check_permission(&request, Some(tenant_id))?;
+
+ let state = get_state(&request);
+ tenant_mgr::load_tenant(state.conf, tenant_id, state.remote_storage.clone())
+ .instrument(info_span!("load", tenant = %tenant_id))
+ .await
+ .map_err(ApiError::InternalServerError)?;
+
+ json_response(StatusCode::ACCEPTED, ())
+}
+
+async fn tenant_ignore_handler(request: Request) -> Result, ApiError> {
+ let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+ check_permission(&request, Some(tenant_id))?;
+
+ let state = get_state(&request);
+ let conf = state.conf;
+ tenant_mgr::ignore_tenant(conf, tenant_id)
+ .instrument(info_span!("ignore_tenant", tenant = %tenant_id))
+ .await
+ // FIXME: Errors from `ignore_tenant` can be caused by both both user and internal errors.
+ // Replace this with better handling once the error type permits it.
+ .map_err(ApiError::InternalServerError)?;
+
+ json_response(StatusCode::OK, ())
+}
+
async fn tenant_list_handler(request: Request) -> Result, ApiError> {
check_permission(&request, None)?;
@@ -833,6 +862,8 @@ pub fn make_router(
.post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
.post("/v1/tenant/:tenant_id/attach", tenant_attach_handler)
.post("/v1/tenant/:tenant_id/detach", tenant_detach_handler)
+ .post("/v1/tenant/:tenant_id/load", tenant_load_handler)
+ .post("/v1/tenant/:tenant_id/ignore", tenant_ignore_handler)
.get(
"/v1/tenant/:tenant_id/timeline/:timeline_id",
timeline_detail_handler,
diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs
index 5147bd26bb..eafcaa88d9 100644
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -125,6 +125,13 @@ pub const TEMP_FILE_SUFFIX: &str = "___temp";
/// Full path: `tenants//timelines/___uninit`.
pub const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit";
+/// A marker file to prevent pageserver from loading a certain tenant on restart.
+/// Different from [`TIMELINE_UNINIT_MARK_SUFFIX`] due to semantics of the corresponding
+/// `ignore` management API command, that expects the ignored tenant to be properly loaded
+/// into pageserver's memory before being ignored.
+/// Full path: `tenants//___ignored_tenant`.
+pub const IGNORED_TENANT_FILE_NAME: &str = "___ignored_tenant";
+
pub fn is_temporary(path: &Path) -> bool {
match path.file_name() {
Some(name) => name.to_string_lossy().ends_with(TEMP_FILE_SUFFIX),
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 981c049111..87f92402b1 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -571,7 +571,7 @@ impl Tenant {
pub fn spawn_attach(
conf: &'static PageServerConf,
tenant_id: TenantId,
- remote_storage: &GenericRemoteStorage,
+ remote_storage: GenericRemoteStorage,
) -> Arc {
// XXX: Attach should provide the config, especially during tenant migration.
// See https://github.com/neondatabase/neon/issues/1555
@@ -584,7 +584,7 @@ impl Tenant {
tenant_conf,
wal_redo_manager,
tenant_id,
- Some(remote_storage.clone()),
+ Some(remote_storage),
));
// Do all the hard work in the background
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index 1bf967c4bf..4011156ec5 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -1075,7 +1075,7 @@ impl Timeline {
continue;
}
- trace!("downloading image file: {}", path.display());
+ trace!("downloading image file: {path:?}");
let sz = remote_client
.download_layer_file(&RemotePath::new(path), &layer_metadata)
.await
@@ -1105,7 +1105,7 @@ impl Timeline {
continue;
}
- trace!("downloading delta file: {}", path.display());
+ trace!("downloading delta file: {path:?}");
let sz = remote_client
.download_layer_file(&RemotePath::new(path), &layer_metadata)
.await
diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs
index bd765dabf8..f4f1eba717 100644
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -13,11 +13,13 @@ use tokio::sync::RwLock;
use tracing::*;
use remote_storage::GenericRemoteStorage;
+use utils::crashsafe;
use crate::config::PageServerConf;
use crate::task_mgr::{self, TaskKind};
use crate::tenant::{Tenant, TenantState};
use crate::tenant_config::TenantConfOpt;
+use crate::IGNORED_TENANT_FILE_NAME;
use utils::fs_ext::PathExt;
use utils::id::{TenantId, TimelineId};
@@ -47,24 +49,52 @@ pub async fn init_tenant_mgr(
Ok(Some(dir_entry)) => {
let tenant_dir_path = dir_entry.path();
if crate::is_temporary(&tenant_dir_path) {
- info!("Found temporary tenant directory, removing: {tenant_dir_path:?}",);
+ info!(
+ "Found temporary tenant directory, removing: {}",
+ tenant_dir_path.display()
+ );
if let Err(e) = fs::remove_dir_all(&tenant_dir_path).await {
- error!("Failed to remove temporary directory {tenant_dir_path:?}: {e:?}");
+ error!(
+ "Failed to remove temporary directory '{}': {:?}",
+ tenant_dir_path.display(),
+ e
+ );
}
} else {
- match load_local_tenant(conf, &tenant_dir_path, remote_storage.clone()) {
- Ok(Some(tenant)) => {
- TENANTS.write().await.insert(tenant.tenant_id(), tenant);
- number_of_tenants += 1;
- }
- Ok(None) => {
- // This case happens if we crash during attach before creating the attach marker file
- if let Err(e) = fs::remove_dir(&tenant_dir_path).await {
- error!("Failed to remove empty tenant directory {tenant_dir_path:?}: {e:#}")
- }
- }
- Err(e) => error!("Failed to collect tenant files from dir {tenants_dir:?} for entry {dir_entry:?}, reason: {e:#}"),
+ // This case happens if we crash during attach before creating the attach marker file
+ let is_empty = tenant_dir_path.is_empty_dir().with_context(|| {
+ format!("Failed to check whether {tenant_dir_path:?} is an empty dir")
+ })?;
+ if is_empty {
+ info!("removing empty tenant directory {tenant_dir_path:?}");
+ if let Err(e) = fs::remove_dir(&tenant_dir_path).await {
+ error!(
+ "Failed to remove empty tenant directory '{}': {e:#}",
+ tenant_dir_path.display()
+ )
}
+ continue;
+ }
+
+ let tenant_ignore_mark_file = tenant_dir_path.join(IGNORED_TENANT_FILE_NAME);
+ if tenant_ignore_mark_file.exists() {
+ info!("Found an ignore mark file {tenant_ignore_mark_file:?}, skipping the tenant");
+ continue;
+ }
+
+ match schedule_local_tenant_processing(
+ conf,
+ &tenant_dir_path,
+ remote_storage.clone(),
+ ) {
+ Ok(tenant) => {
+ TENANTS.write().await.insert(tenant.tenant_id(), tenant);
+ number_of_tenants += 1;
+ }
+ Err(e) => {
+ error!("Failed to collect tenant files from dir {tenants_dir:?} for entry {dir_entry:?}, reason: {e:#}");
+ }
+ }
}
}
Err(e) => {
@@ -82,34 +112,45 @@ pub async fn init_tenant_mgr(
Ok(())
}
-fn load_local_tenant(
+pub fn schedule_local_tenant_processing(
conf: &'static PageServerConf,
tenant_path: &Path,
remote_storage: Option,
-) -> anyhow::Result