diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 70db0b7344..3db75b7d0e 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -293,22 +293,6 @@ pub struct TenantCreateRequest { pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it } -#[derive(Deserialize, Debug)] -#[serde(deny_unknown_fields)] -pub struct TenantLoadRequest { - #[serde(default)] - #[serde(skip_serializing_if = "Option::is_none")] - pub generation: Option, -} - -impl std::ops::Deref for TenantCreateRequest { - type Target = TenantConfig; - - fn deref(&self) -> &Self::Target { - &self.config - } -} - /// An alternative representation of `pageserver::tenant::TenantConf` with /// simpler types. #[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)] diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index b4a0d1ac02..badea48b98 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -39,8 +39,8 @@ use crate::tenant::{ use crate::{disk_usage_eviction_task::DiskUsageEvictionTaskConfig, virtual_file::io_engine}; use crate::{tenant::config::TenantConf, virtual_file}; use crate::{ - IGNORED_TENANT_FILE_NAME, TENANT_CONFIG_NAME, TENANT_HEATMAP_BASENAME, - TENANT_LOCATION_CONFIG_NAME, TIMELINE_DELETE_MARK_SUFFIX, + TENANT_CONFIG_NAME, TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME, + TIMELINE_DELETE_MARK_SUFFIX, }; use self::defaults::DEFAULT_CONCURRENT_TENANT_WARMUP; @@ -811,11 +811,6 @@ impl PageServerConf { self.tenants_path().join(tenant_shard_id.to_string()) } - pub fn tenant_ignore_mark_file_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf { - self.tenant_path(tenant_shard_id) - .join(IGNORED_TENANT_FILE_NAME) - } - /// Points to a place in pageserver's local directory, /// where certain tenant's tenantconf file should be located. /// diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index 71b486a4d3..4b6fe56b89 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -389,48 +389,6 @@ paths: application/json: schema: $ref: "#/components/schemas/ConflictError" - /v1/tenant/{tenant_id}/ignore: - parameters: - - name: tenant_id - in: path - required: true - schema: - type: string - post: - description: | - Remove tenant data (including all corresponding timelines) from pageserver's memory. - Files on local disk and remote storage are not affected. - - Future pageserver restarts won't load the data back until `load` is called on such tenant. - responses: - "200": - description: Tenant ignored - - - /v1/tenant/{tenant_id}/load: - parameters: - - name: tenant_id - in: path - required: true - schema: - type: string - post: - description: | - Schedules an operation that attempts to load a tenant from the local disk and - synchronise it with the remote storage (if enabled), repeating pageserver's restart logic for tenant load. - If the tenant was ignored before, removes the ignore mark and continues with load scheduling. - - Errors if the tenant is absent on disk, already present in memory or fails to schedule its load. - Scheduling a load does not mean that the tenant would load successfully, check tenant status to ensure load correctness. - requestBody: - required: false - content: - application/json: - schema: - $ref: "#/components/schemas/TenantLoadRequest" - responses: - "202": - description: Tenant scheduled to load successfully /v1/tenant/{tenant_id}/{timeline_id}/preserve_initdb_archive: parameters: diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 482879630a..eb74ca637f 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -36,7 +36,7 @@ use pageserver_api::models::TopTenantShardsRequest; use pageserver_api::models::TopTenantShardsResponse; use pageserver_api::models::{ DownloadRemoteLayersTaskSpawnRequest, LocationConfigMode, TenantAttachRequest, - TenantLoadRequest, TenantLocationConfigRequest, + TenantLocationConfigRequest, }; use pageserver_api::shard::ShardCount; use pageserver_api::shard::TenantShardId; @@ -205,7 +205,6 @@ impl From for ApiError { NotFound(tenant_id) => { ApiError::NotFound(anyhow::anyhow!("NotFound: tenant {tenant_id}").into()) } - e @ AlreadyExists(_, _) => ApiError::Conflict(format!("{e}")), InProgress => { ApiError::ResourceUnavailable("Tenant is being modified concurrently".into()) } @@ -891,8 +890,6 @@ async fn tenant_detach_handler( ) -> Result, ApiError> { let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?; check_permission(&request, Some(tenant_id))?; - let detach_ignored: Option = parse_query_param(&request, "detach_ignored")?; - // This is a legacy API (`/location_conf` is the replacement). It only supports unsharded tenants let tenant_shard_id = TenantShardId::unsharded(tenant_id); @@ -900,12 +897,7 @@ async fn tenant_detach_handler( let conf = state.conf; state .tenant_manager - .detach_tenant( - conf, - tenant_shard_id, - detach_ignored.unwrap_or(false), - &state.deletion_queue_client, - ) + .detach_tenant(conf, tenant_shard_id, &state.deletion_queue_client) .instrument(info_span!("tenant_detach", %tenant_id, shard_id=%tenant_shard_id.shard_slug())) .await?; @@ -932,54 +924,6 @@ async fn tenant_reset_handler( json_response(StatusCode::OK, ()) } -async fn tenant_load_handler( - mut request: Request, - _cancel: CancellationToken, -) -> Result, ApiError> { - let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?; - check_permission(&request, Some(tenant_id))?; - - let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn); - - let maybe_body: Option = json_request_or_empty_body(&mut request).await?; - - let state = get_state(&request); - - // The /load request is only usable when control_plane_api is not set. Once it is set, callers - // should always use /attach instead. - let generation = get_request_generation(state, maybe_body.as_ref().and_then(|r| r.generation))?; - - mgr::load_tenant( - state.conf, - tenant_id, - generation, - state.broker_client.clone(), - state.remote_storage.clone(), - state.deletion_queue_client.clone(), - &ctx, - ) - .instrument(info_span!("load", %tenant_id)) - .await?; - - json_response(StatusCode::ACCEPTED, ()) -} - -async fn tenant_ignore_handler( - request: Request, - _cancel: CancellationToken, -) -> Result, ApiError> { - let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?; - check_permission(&request, Some(tenant_id))?; - - let state = get_state(&request); - let conf = state.conf; - mgr::ignore_tenant(conf, tenant_id) - .instrument(info_span!("ignore_tenant", %tenant_id)) - .await?; - - json_response(StatusCode::OK, ()) -} - async fn tenant_list_handler( request: Request, _cancel: CancellationToken, @@ -1507,7 +1451,7 @@ async fn put_tenant_location_config_handler( if let LocationConfigMode::Detached = request_data.config.mode { if let Err(e) = state .tenant_manager - .detach_tenant(conf, tenant_shard_id, true, &state.deletion_queue_client) + .detach_tenant(conf, tenant_shard_id, &state.deletion_queue_client) .instrument(info_span!("tenant_detach", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug() @@ -2764,12 +2708,6 @@ pub fn make_router( .post("/v1/tenant/:tenant_shard_id/reset", |r| { api_handler(r, tenant_reset_handler) }) - .post("/v1/tenant/:tenant_id/load", |r| { - api_handler(r, tenant_load_handler) - }) - .post("/v1/tenant/:tenant_id/ignore", |r| { - api_handler(r, tenant_ignore_handler) - }) .post( "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive", |r| api_handler(r, timeline_preserve_initdb_handler), diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index c69fb8c83b..9e64eafffc 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -136,13 +136,6 @@ pub(crate) const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit"; pub(crate) const TIMELINE_DELETE_MARK_SUFFIX: &str = "___delete"; -/// A marker file to prevent pageserver from loading a certain tenant on restart. -/// Different from [`TIMELINE_UNINIT_MARK_SUFFIX`] due to semantics of the corresponding -/// `ignore` management API command, that expects the ignored tenant to be properly loaded -/// into pageserver's memory before being ignored. -/// Full path: `tenants//___ignored_tenant`. -pub const IGNORED_TENANT_FILE_NAME: &str = "___ignored_tenant"; - pub fn is_temporary(path: &Utf8Path) -> bool { match path.file_name() { Some(name) => name.ends_with(TEMP_FILE_SUFFIX), diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 4520bb9295..f61526f8c2 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -27,7 +27,6 @@ use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; use tracing::*; -use remote_storage::GenericRemoteStorage; use utils::{completion, crashsafe}; use crate::config::PageServerConf; @@ -47,7 +46,7 @@ use crate::tenant::span::debug_assert_current_span_has_tenant_id; use crate::tenant::storage_layer::inmemory_layer; use crate::tenant::timeline::ShutdownMode; use crate::tenant::{AttachedTenantConf, GcError, SpawnMode, Tenant, TenantState}; -use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, TEMP_FILE_SUFFIX}; +use crate::{InitializationOrder, TEMP_FILE_SUFFIX}; use utils::crashsafe::path_with_suffix_extension; use utils::fs_ext::PathExt; @@ -422,12 +421,6 @@ fn load_tenant_config( } }; - let tenant_ignore_mark_file = tenant_dir_path.join(IGNORED_TENANT_FILE_NAME); - if tenant_ignore_mark_file.exists() { - info!("Found an ignore mark file {tenant_ignore_mark_file:?}, skipping the tenant"); - return Ok(None); - } - Ok(Some(( tenant_shard_id, Tenant::load_tenant_config(conf, &tenant_shard_id), @@ -713,12 +706,6 @@ fn tenant_spawn( "Cannot load tenant from empty directory {tenant_path:?}" ); - let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id); - anyhow::ensure!( - !conf.tenant_ignore_mark_file_path(&tenant_shard_id).exists(), - "Cannot load tenant, ignore mark found at {tenant_ignore_mark:?}" - ); - let remote_storage = resources.remote_storage.clone(); let tenant = match Tenant::spawn( conf, @@ -1067,7 +1054,7 @@ impl TenantManager { // not do significant I/O, and shutdowns should be prompt via cancellation tokens. let mut slot_guard = tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::Any) .map_err(|e| match e { - TenantSlotError::AlreadyExists(_, _) | TenantSlotError::NotFound(_) => { + TenantSlotError::NotFound(_) => { unreachable!("Called with mode Any") } TenantSlotError::InProgress => UpsertLocationError::InProgress, @@ -1901,17 +1888,10 @@ impl TenantManager { &self, conf: &'static PageServerConf, tenant_shard_id: TenantShardId, - detach_ignored: bool, deletion_queue_client: &DeletionQueueClient, ) -> Result<(), TenantStateError> { let tmp_path = self - .detach_tenant0( - conf, - &TENANTS, - tenant_shard_id, - detach_ignored, - deletion_queue_client, - ) + .detach_tenant0(conf, &TENANTS, tenant_shard_id, deletion_queue_client) .await?; spawn_background_purge(tmp_path); @@ -1923,7 +1903,6 @@ impl TenantManager { conf: &'static PageServerConf, tenants: &std::sync::RwLock, tenant_shard_id: TenantShardId, - detach_ignored: bool, deletion_queue_client: &DeletionQueueClient, ) -> Result { let tenant_dir_rename_operation = |tenant_id_to_clean: TenantShardId| async move { @@ -1946,26 +1925,6 @@ impl TenantManager { // before this tenant is potentially re-attached elsewhere. deletion_queue_client.flush_advisory(); - // Ignored tenants are not present in memory and will bail the removal from memory operation. - // Before returning the error, check for ignored tenant removal case — we only need to clean its local files then. - if detach_ignored - && matches!( - removal_result, - Err(TenantStateError::SlotError(TenantSlotError::NotFound(_))) - ) - { - let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id); - if tenant_ignore_mark.exists() { - info!("Detaching an ignored tenant"); - let tmp_path = tenant_dir_rename_operation(tenant_shard_id) - .await - .with_context(|| { - format!("Ignored tenant {tenant_shard_id} local directory rename") - })?; - return Ok(tmp_path); - } - } - removal_result } @@ -2222,97 +2181,6 @@ pub(crate) enum TenantStateError { Other(#[from] anyhow::Error), } -pub(crate) async fn load_tenant( - conf: &'static PageServerConf, - tenant_id: TenantId, - generation: Generation, - broker_client: storage_broker::BrokerClientChannel, - remote_storage: GenericRemoteStorage, - deletion_queue_client: DeletionQueueClient, - ctx: &RequestContext, -) -> Result<(), TenantMapInsertError> { - // This is a legacy API (replaced by `/location_conf`). It does not support sharding - let tenant_shard_id = TenantShardId::unsharded(tenant_id); - - let slot_guard = - tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::MustNotExist)?; - let tenant_path = conf.tenant_path(&tenant_shard_id); - - let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id); - if tenant_ignore_mark.exists() { - std::fs::remove_file(&tenant_ignore_mark).with_context(|| { - format!( - "Failed to remove tenant ignore mark {tenant_ignore_mark:?} during tenant loading" - ) - })?; - } - - let resources = TenantSharedResources { - broker_client, - remote_storage, - deletion_queue_client, - }; - - let mut location_conf = - Tenant::load_tenant_config(conf, &tenant_shard_id).map_err(TenantMapInsertError::Other)?; - location_conf.attach_in_generation(AttachmentMode::Single, generation); - - Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await?; - - let shard_identity = location_conf.shard; - let new_tenant = tenant_spawn( - conf, - tenant_shard_id, - &tenant_path, - resources, - AttachedTenantConf::try_from(location_conf)?, - shard_identity, - None, - &TENANTS, - SpawnMode::Eager, - ctx, - ) - .with_context(|| format!("Failed to schedule tenant processing in path {tenant_path:?}"))?; - - slot_guard.upsert(TenantSlot::Attached(new_tenant))?; - Ok(()) -} - -pub(crate) async fn ignore_tenant( - conf: &'static PageServerConf, - tenant_id: TenantId, -) -> Result<(), TenantStateError> { - ignore_tenant0(conf, &TENANTS, tenant_id).await -} - -#[instrument(skip_all, fields(shard_id))] -async fn ignore_tenant0( - conf: &'static PageServerConf, - tenants: &std::sync::RwLock, - tenant_id: TenantId, -) -> Result<(), TenantStateError> { - // This is a legacy API (replaced by `/location_conf`). It does not support sharding - let tenant_shard_id = TenantShardId::unsharded(tenant_id); - tracing::Span::current().record( - "shard_id", - tracing::field::display(tenant_shard_id.shard_slug()), - ); - - remove_tenant_from_memory(tenants, tenant_shard_id, async { - let ignore_mark_file = conf.tenant_ignore_mark_file_path(&tenant_shard_id); - fs::File::create(&ignore_mark_file) - .await - .context("Failed to create ignore mark file") - .and_then(|_| { - crashsafe::fsync_file_and_parent(&ignore_mark_file) - .context("Failed to fsync ignore mark file") - }) - .with_context(|| format!("Failed to crate ignore mark for tenant {tenant_shard_id}"))?; - Ok(()) - }) - .await -} - #[derive(Debug, thiserror::Error)] pub(crate) enum TenantMapListError { #[error("tenant map is still initiailizing")] @@ -2337,10 +2205,6 @@ pub(crate) enum TenantSlotError { #[error("Tenant {0} not found")] NotFound(TenantShardId), - /// When acquiring a slot with the expectation that the tenant does not already exist. - #[error("tenant {0} already exists, state: {1:?}")] - AlreadyExists(TenantShardId, TenantState), - // Tried to read a slot that is currently being mutated by another administrative // operation. #[error("tenant has a state change in progress, try again later")] @@ -2656,8 +2520,6 @@ enum TenantSlotAcquireMode { Any, /// Return an error if trying to acquire a slot and it doesn't already exist MustExist, - /// Return an error if trying to acquire a slot and it already exists - MustNotExist, } fn tenant_map_acquire_slot( @@ -2711,27 +2573,6 @@ fn tenant_map_acquire_slot_impl( tracing::debug!("Occupied, failing for InProgress"); Err(TenantSlotError::InProgress) } - (slot, MustNotExist) => match slot { - TenantSlot::Attached(tenant) => { - tracing::debug!("Attached && MustNotExist, return AlreadyExists"); - Err(TenantSlotError::AlreadyExists( - *tenant_shard_id, - tenant.current_state(), - )) - } - _ => { - // FIXME: the AlreadyExists error assumes that we have a Tenant - // to get the state from - tracing::debug!("Occupied & MustNotExist, return AlreadyExists"); - Err(TenantSlotError::AlreadyExists( - *tenant_shard_id, - TenantState::Broken { - reason: "Present but not attached".to_string(), - backtrace: "".to_string(), - }, - )) - } - }, _ => { // Happy case: the slot was not in any state that violated our mode let (completion, barrier) = utils::completion::channel(); diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index b5d9a69d55..b5e40f5a46 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -2700,12 +2700,6 @@ class NeonPageserver(PgProtocol, LogUtils): client = self.http_client(auth_token=auth_token) return client.tenant_create(tenant_id, conf, generation=generation) - def tenant_load(self, tenant_id: TenantId): - client = self.http_client() - return client.tenant_load( - tenant_id, generation=self.env.storage_controller.attach_hook_issue(tenant_id, self.id) - ) - def list_layers( self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId ) -> list[Path]: diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index d5441bd694..ecc83a9546 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -340,17 +340,6 @@ class PageserverHttpClient(requests.Session, MetricsGetter): self.verbose_error(res) return res - def tenant_load(self, tenant_id: TenantId, generation=None): - body = None - if generation is not None: - body = {"generation": generation} - res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/load", json=body) - self.verbose_error(res) - - def tenant_ignore(self, tenant_id: TenantId): - res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/ignore") - self.verbose_error(res) - def tenant_status( self, tenant_id: Union[TenantId, TenantShardId], activate: bool = False ) -> Dict[Any, Any]: diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index 871351b2d5..4c49e6fb85 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -344,56 +344,6 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): pageserver_http.timeline_gc(tenant_id, timeline_id, 0) -# Creates and ignores a tenant, then detaches it: first, with no parameters (should fail), -# then with parameters to force ignored tenant detach (should not fail). -def test_tenant_detach_ignored_tenant(neon_simple_env: NeonEnv): - env = neon_simple_env - client = env.pageserver.http_client() - - # create a new tenant - tenant_id, _ = env.neon_cli.create_tenant() - - env.pageserver.allowed_errors.extend(PERMIT_PAGE_SERVICE_ERRORS) - - # assert tenant exists on disk - assert env.pageserver.tenant_dir(tenant_id).exists() - - endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) - # we rely upon autocommit after each statement - endpoint.safe_psql_many( - queries=[ - "CREATE TABLE t(key int primary key, value text)", - "INSERT INTO t SELECT generate_series(1,100000), 'payload'", - ] - ) - - # ignore tenant - client.tenant_ignore(tenant_id) - env.pageserver.allowed_errors.append(".*NotFound: tenant .*") - # ensure tenant couldn't be detached without the special flag for ignored tenant - log.info("detaching ignored tenant WITHOUT required flag") - with pytest.raises( - expected_exception=PageserverApiException, match=f"NotFound: tenant {tenant_id}" - ): - client.tenant_detach(tenant_id) - - log.info("tenant detached failed as expected") - - # ensure tenant is detached with ignore state - log.info("detaching ignored tenant with required flag") - client.tenant_detach(tenant_id, True) - log.info("ignored tenant detached without error") - - # check that nothing is left on disk for deleted tenant - assert not env.pageserver.tenant_dir(tenant_id).exists() - - # assert the tenant does not exists in the Pageserver - tenants_after_detach = [tenant["id"] for tenant in client.tenant_list()] - assert ( - tenant_id not in tenants_after_detach - ), f"Ignored and then detached tenant {tenant_id} should not be present in pageserver's memory" - - # Creates a tenant, and detaches it with extra paremeter that forces ignored tenant detach. # Tenant should be detached without issues. def test_tenant_detach_regular_tenant(neon_simple_env: NeonEnv): @@ -500,153 +450,6 @@ def test_detach_while_attaching( cur.execute("SELECT COUNT(*) FROM foo") -# Tests that `ignore` and `get` operations' combination is able to remove and restore the tenant in pageserver's memory. -# * writes some data into tenant's timeline -# * ensures it's synced with the remote storage -# * `ignore` the tenant -# * verify that ignored tenant files are generally unchanged, only an ignored mark had appeared -# * verify the ignored tenant is gone from pageserver's memory -# * restart the pageserver and verify that ignored tenant is still not loaded -# * `load` the same tenant -# * ensure that it's status is `Active` and it's present in pageserver's memory with all timelines -def test_ignored_tenant_reattach(neon_env_builder: NeonEnvBuilder): - neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.MOCK_S3) - env = neon_env_builder.init_start() - pageserver_http = env.pageserver.http_client() - - ignored_tenant_id, _ = env.neon_cli.create_tenant() - tenant_dir = env.pageserver.tenant_dir(ignored_tenant_id) - tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()] - tenants_before_ignore.sort() - timelines_before_ignore = [ - timeline["timeline_id"] - for timeline in pageserver_http.timeline_list(tenant_id=ignored_tenant_id) - ] - files_before_ignore = [tenant_path for tenant_path in tenant_dir.glob("**/*")] - - # ignore the tenant and veirfy it's not present in pageserver replies, with its files still on disk - pageserver_http.tenant_ignore(ignored_tenant_id) - - files_after_ignore_with_retain = [tenant_path for tenant_path in tenant_dir.glob("**/*")] - new_files = set(files_after_ignore_with_retain) - set(files_before_ignore) - disappeared_files = set(files_before_ignore) - set(files_after_ignore_with_retain) - assert ( - len(disappeared_files) == 0 - ), f"Tenant ignore should not remove files from disk, missing: {disappeared_files}" - assert ( - len(new_files) == 1 - ), f"Only tenant ignore file should appear on disk but got: {new_files}" - - tenants_after_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()] - assert ignored_tenant_id not in tenants_after_ignore, "Ignored tenant should be missing" - assert len(tenants_after_ignore) + 1 == len( - tenants_before_ignore - ), "Only ignored tenant should be missing" - - # restart the pageserver to ensure we don't load the ignore timeline - env.pageserver.stop() - env.pageserver.start() - tenants_after_restart = [tenant["id"] for tenant in pageserver_http.tenant_list()] - tenants_after_restart.sort() - assert ( - tenants_after_restart == tenants_after_ignore - ), "Ignored tenant should not be reloaded after pageserver restart" - - # now, load it from the local files and expect it works - env.pageserver.tenant_load(tenant_id=ignored_tenant_id) - wait_until_tenant_state(pageserver_http, ignored_tenant_id, "Active", 5) - - tenants_after_attach = [tenant["id"] for tenant in pageserver_http.tenant_list()] - tenants_after_attach.sort() - assert tenants_after_attach == tenants_before_ignore, "Should have all tenants back" - - timelines_after_ignore = [ - timeline["timeline_id"] - for timeline in pageserver_http.timeline_list(tenant_id=ignored_tenant_id) - ] - assert timelines_before_ignore == timelines_after_ignore, "Should have all timelines back" - - -# Tests that it's possible to `load` tenants with missing layers and get them restored: -# * writes some data into tenant's timeline -# * ensures it's synced with the remote storage -# * `ignore` the tenant -# * removes all timeline's local layers -# * `load` the same tenant -# * ensure that it's status is `Active` -# * check that timeline data is restored -def test_ignored_tenant_download_missing_layers(neon_env_builder: NeonEnvBuilder): - neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS) - env = neon_env_builder.init_start() - pageserver_http = env.pageserver.http_client() - endpoint = env.endpoints.create_start("main") - - tenant_id = env.initial_tenant - timeline_id = env.initial_timeline - - env.pageserver.allowed_errors.extend(PERMIT_PAGE_SERVICE_ERRORS) - - data_id = 1 - data_secret = "very secret secret" - insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, endpoint) - - tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()] - tenants_before_ignore.sort() - timelines_before_ignore = [ - timeline["timeline_id"] for timeline in pageserver_http.timeline_list(tenant_id=tenant_id) - ] - - # ignore the tenant and remove its layers - pageserver_http.tenant_ignore(tenant_id) - timeline_dir = env.pageserver.timeline_dir(tenant_id, timeline_id) - layers_removed = False - for dir_entry in timeline_dir.iterdir(): - if dir_entry.name.startswith("00000"): - # Looks like a layer file. Remove it - dir_entry.unlink() - layers_removed = True - assert layers_removed, f"Found no layers for tenant {timeline_dir}" - - # now, load it from the local files and expect it to work due to remote storage restoration - env.pageserver.tenant_load(tenant_id=tenant_id) - wait_until_tenant_state(pageserver_http, tenant_id, "Active", 5) - - tenants_after_attach = [tenant["id"] for tenant in pageserver_http.tenant_list()] - tenants_after_attach.sort() - assert tenants_after_attach == tenants_before_ignore, "Should have all tenants back" - - timelines_after_ignore = [ - timeline["timeline_id"] for timeline in pageserver_http.timeline_list(tenant_id=tenant_id) - ] - assert timelines_before_ignore == timelines_after_ignore, "Should have all timelines back" - - endpoint.stop() - endpoint.start() - ensure_test_data(data_id, data_secret, endpoint) - - -# Tests that attach is never working on a tenant, ignored or not, as long as it's not absent locally -# Similarly, tests that it's not possible to schedule a `load` for tenat that's not ignored. -def test_load_negatives(neon_env_builder: NeonEnvBuilder): - neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS) - env = neon_env_builder.init_start() - pageserver_http = env.pageserver.http_client() - env.endpoints.create_start("main") - - tenant_id = env.initial_tenant - - env.pageserver.allowed_errors.extend(PERMIT_PAGE_SERVICE_ERRORS) - - env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*") - with pytest.raises( - expected_exception=PageserverApiException, - match=f"tenant {tenant_id} already exists, state: Active", - ): - env.pageserver.tenant_load(tenant_id) - - pageserver_http.tenant_ignore(tenant_id) - - def test_detach_while_activating( neon_env_builder: NeonEnvBuilder, ): @@ -770,7 +573,7 @@ def test_metrics_while_ignoring_broken_tenant_and_reloading( wait_until(10, 0.5, found_broken) - client.tenant_ignore(env.initial_tenant) + client.tenant_detach(env.initial_tenant) def found_cleaned_up(): m = client.get_metrics() @@ -782,7 +585,7 @@ def test_metrics_while_ignoring_broken_tenant_and_reloading( wait_until(10, 0.5, found_cleaned_up) - env.pageserver.tenant_load(env.initial_tenant) + env.pageserver.tenant_attach(env.initial_tenant) def found_active(): m = client.get_metrics()