diff --git a/neon_local/src/main.rs b/neon_local/src/main.rs index 35e2d9c9e2..b29cc6978c 100644 --- a/neon_local/src/main.rs +++ b/neon_local/src/main.rs @@ -537,7 +537,13 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an match tenant_match.subcommand() { Some(("list", _)) => { for t in pageserver.tenant_list()? { - println!("{} {}", t.id, t.state); + println!( + "{} {}", + t.id, + t.state + .map(|s| s.to_string()) + .unwrap_or_else(|| String::from("")) + ); } } Some(("create", create_match)) => { diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index 6cfedc0931..408d066fb4 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -22,6 +22,49 @@ paths: properties: id: type: integer + + /v1/tenant/{tenant_id}: + parameters: + - name: tenant_id + in: path + required: true + schema: + type: string + format: hex + get: + description: Get tenant status + responses: + "200": + description: Currently returns the flag whether the tenant has inprogress timeline downloads + content: + application/json: + schema: + $ref: "#/components/schemas/TenantInfo" + "400": + description: Error when no tenant id found in path or no timeline id + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "401": + description: Unauthorized Error + content: + application/json: + schema: + $ref: "#/components/schemas/UnauthorizedError" + "403": + description: Forbidden Error + content: + application/json: + schema: + $ref: "#/components/schemas/ForbiddenError" + "500": + description: Generic operation error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + /v1/tenant/{tenant_id}/timeline: parameters: - name: tenant_id @@ -521,12 +564,13 @@ components: type: object required: - id - - state properties: id: type: string state: type: string + has_in_progress_downloads: + type: bool TenantCreateInfo: type: object properties: @@ -621,6 +665,7 @@ components: type: integer current_logical_size_non_incremental: type: integer + WalReceiverEntry: type: object required: diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 64aaa68e44..2cf5e7a828 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -14,6 +14,7 @@ use crate::repository::Repository; use crate::storage_sync; use crate::storage_sync::index::{RemoteIndex, RemoteTimeline}; use crate::tenant_config::TenantConfOpt; +use crate::tenant_mgr::TenantInfo; use crate::timelines::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo}; use crate::{config::PageServerConf, tenant_mgr, timelines}; use utils::{ @@ -403,9 +404,13 @@ async fn tenant_list_handler(request: Request) -> Result, A // check for management permission check_permission(&request, None)?; + let state = get_state(&request); + // clone to avoid holding the lock while awaiting for blocking task + let remote_index = state.remote_index.read().await.clone(); + let response_data = tokio::task::spawn_blocking(move || { let _enter = info_span!("tenant_list").entered(); - crate::tenant_mgr::list_tenants() + crate::tenant_mgr::list_tenants(&remote_index) }) .await .map_err(ApiError::from_err)?; @@ -413,6 +418,34 @@ async fn tenant_list_handler(request: Request) -> Result, A json_response(StatusCode::OK, response_data) } +async fn tenant_status(request: Request) -> Result, ApiError> { + let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?; + check_permission(&request, Some(tenant_id))?; + + // if tenant is in progress of downloading it can be absent in global tenant map + let tenant_state = tokio::task::spawn_blocking(move || tenant_mgr::get_tenant_state(tenant_id)) + .await + .map_err(ApiError::from_err)?; + + let state = get_state(&request); + let remote_index = &state.remote_index; + + let index_accessor = remote_index.read().await; + let has_in_progress_downloads = index_accessor + .tenant_entry(&tenant_id) + .ok_or_else(|| ApiError::NotFound("Tenant not found in remote index".to_string()))? + .has_in_progress_downloads(); + + json_response( + StatusCode::OK, + TenantInfo { + id: tenant_id, + state: tenant_state, + has_in_progress_downloads: Some(has_in_progress_downloads), + }, + ) +} + async fn tenant_create_handler(mut request: Request) -> Result, ApiError> { // check for management permission check_permission(&request, None)?; @@ -558,6 +591,7 @@ pub fn make_router( .get("/v1/status", status_handler) .get("/v1/tenant", tenant_list_handler) .post("/v1/tenant", tenant_create_handler) + .get("/v1/tenant/:tenant_id", tenant_status) .put("/v1/tenant/config", tenant_config_handler) .get("/v1/tenant/:tenant_id/timeline", timeline_list_handler) .post("/v1/tenant/:tenant_id/timeline", timeline_create_handler) diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index afdca9abbd..b3b8d2ce50 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -5,7 +5,7 @@ use crate::config::PageServerConf; use crate::layered_repository::{load_metadata, LayeredRepository}; use crate::pgdatadir_mapping::DatadirTimeline; use crate::repository::Repository; -use crate::storage_sync::index::RemoteIndex; +use crate::storage_sync::index::{RemoteIndex, RemoteTimelineIndex}; use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData}; use crate::tenant_config::TenantConfOpt; use crate::thread_mgr::ThreadKind; @@ -509,15 +509,27 @@ fn load_local_timeline( pub struct TenantInfo { #[serde_as(as = "DisplayFromStr")] pub id: ZTenantId, - pub state: TenantState, + pub state: Option, + pub has_in_progress_downloads: Option, } -pub fn list_tenants() -> Vec { +pub fn list_tenants(remote_index: &RemoteTimelineIndex) -> Vec { tenants_state::read_tenants() .iter() - .map(|(id, tenant)| TenantInfo { - id: *id, - state: tenant.state, + .map(|(id, tenant)| { + let has_in_progress_downloads = remote_index + .tenant_entry(id) + .map(|entry| entry.has_in_progress_downloads()); + + if has_in_progress_downloads.is_none() { + error!("timeline is not found in remote index while it is present in the tenants registry") + } + + TenantInfo { + id: *id, + state: Some(tenant.state), + has_in_progress_downloads, + } }) .collect() } diff --git a/test_runner/batch_others/test_remote_storage.py b/test_runner/batch_others/test_remote_storage.py index ac39c6290b..163912690c 100644 --- a/test_runner/batch_others/test_remote_storage.py +++ b/test_runner/batch_others/test_remote_storage.py @@ -6,7 +6,7 @@ from contextlib import closing from pathlib import Path import time from uuid import UUID -from fixtures.neon_fixtures import NeonEnvBuilder, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload +from fixtures.neon_fixtures import NeonEnvBuilder, assert_timeline_local, wait_until, wait_for_last_record_lsn, wait_for_upload from fixtures.log_helper import log from fixtures.utils import lsn_from_hex, lsn_to_hex import pytest @@ -114,7 +114,7 @@ def test_remote_storage_backup_and_restore(neon_env_builder: NeonEnvBuilder, sto log.info("waiting for timeline redownload") wait_until(number_of_iterations=10, interval=1, - func=lambda: assert_local(client, UUID(tenant_id), UUID(timeline_id))) + func=lambda: assert_timeline_local(client, UUID(tenant_id), UUID(timeline_id))) detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id)) assert detail['local'] is not None diff --git a/test_runner/batch_others/test_tenant_relocation.py b/test_runner/batch_others/test_tenant_relocation.py index 0560469ca1..f6f2d8ca9d 100644 --- a/test_runner/batch_others/test_tenant_relocation.py +++ b/test_runner/batch_others/test_tenant_relocation.py @@ -1,15 +1,30 @@ -from contextlib import closing, contextmanager import os import pathlib +import signal import subprocess import threading -from uuid import UUID -from fixtures.log_helper import log +from contextlib import closing, contextmanager from typing import Any, Dict, Optional, Tuple -import signal -import pytest +from uuid import UUID -from fixtures.neon_fixtures import NeonEnv, PortDistributor, Postgres, NeonEnvBuilder, Etcd, NeonPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, neon_binpath, pg_distrib_dir, base_dir +import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + Etcd, + NeonEnv, + NeonEnvBuilder, + NeonPageserverHttpClient, + PortDistributor, + Postgres, + assert_no_in_progress_downloads_for_tenant, + assert_timeline_local, + base_dir, + neon_binpath, + pg_distrib_dir, + wait_for_last_record_lsn, + wait_for_upload, + wait_until, +) from fixtures.utils import lsn_from_hex, subprocess_capture @@ -144,10 +159,7 @@ def check_timeline_attached( old_current_lsn: int, ): # new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint - new_timeline_detail = wait_until( - number_of_iterations=5, - interval=1, - func=lambda: assert_local(new_pageserver_http_client, tenant_id, timeline_id)) + new_timeline_detail = assert_timeline_local(new_pageserver_http_client, tenant_id, timeline_id) # when load is active these checks can break because lsns are not static # so lets check with some margin @@ -250,10 +262,10 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, # wait until pageserver receives that data wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_main, current_lsn_main) - timeline_detail_main = assert_local(pageserver_http, tenant_id, timeline_id_main) + timeline_detail_main = assert_timeline_local(pageserver_http, tenant_id, timeline_id_main) wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_second, current_lsn_second) - timeline_detail_second = assert_local(pageserver_http, tenant_id, timeline_id_second) + timeline_detail_second = assert_timeline_local(pageserver_http, tenant_id, timeline_id_second) if with_load == 'with_load': # create load table @@ -337,6 +349,16 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, # call to attach timeline to new pageserver new_pageserver_http.tenant_attach(tenant_id) + # check that it shows that download is in progress + tenant_status = new_pageserver_http.tenant_status(tenant_id=tenant_id) + assert tenant_status.get('has_in_progress_downloads'), tenant_status + + # wait until tenant is downloaded + wait_until(number_of_iterations=10, + interval=1, + func=lambda: assert_no_in_progress_downloads_for_tenant( + new_pageserver_http, tenant_id)) + check_timeline_attached( new_pageserver_http, tenant_id, diff --git a/test_runner/batch_others/test_timeline_size.py b/test_runner/batch_others/test_timeline_size.py index 5734091757..7b7b16bcbf 100644 --- a/test_runner/batch_others/test_timeline_size.py +++ b/test_runner/batch_others/test_timeline_size.py @@ -1,7 +1,7 @@ from contextlib import closing import psycopg2.extras import psycopg2.errors -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_local +from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_timeline_local from fixtures.log_helper import log import time @@ -11,7 +11,7 @@ def test_timeline_size(neon_simple_env: NeonEnv): new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty') client = env.pageserver.http_client() - timeline_details = assert_local(client, env.initial_tenant, new_timeline_id) + timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id) assert timeline_details['local']['current_logical_size'] == timeline_details['local'][ 'current_logical_size_non_incremental'] @@ -29,13 +29,13 @@ def test_timeline_size(neon_simple_env: NeonEnv): FROM generate_series(1, 10) g """) - res = assert_local(client, env.initial_tenant, new_timeline_id) + res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) local_details = res['local'] assert local_details["current_logical_size"] == local_details[ "current_logical_size_non_incremental"] cur.execute("TRUNCATE foo") - res = assert_local(client, env.initial_tenant, new_timeline_id) + res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) local_details = res['local'] assert local_details["current_logical_size"] == local_details[ "current_logical_size_non_incremental"] @@ -46,7 +46,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty') client = env.pageserver.http_client() - timeline_details = assert_local(client, env.initial_tenant, new_timeline_id) + timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id) assert timeline_details['local']['current_logical_size'] == timeline_details['local'][ 'current_logical_size_non_incremental'] @@ -57,7 +57,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): with conn.cursor() as cur: cur.execute("SHOW neon.timeline_id") - res = assert_local(client, env.initial_tenant, new_timeline_id) + res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) local_details = res['local'] assert local_details["current_logical_size"] == local_details[ "current_logical_size_non_incremental"] @@ -73,14 +73,14 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): FROM generate_series(1, 10) g """) - res = assert_local(client, env.initial_tenant, new_timeline_id) + res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) local_details = res['local'] assert local_details["current_logical_size"] == local_details[ "current_logical_size_non_incremental"] cur.execute('DROP DATABASE foodb') - res = assert_local(client, env.initial_tenant, new_timeline_id) + res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) local_details = res['local'] assert local_details["current_logical_size"] == local_details[ "current_logical_size_non_incremental"] @@ -117,7 +117,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): new_timeline_id = env.neon_cli.create_branch('test_timeline_size_quota') client = env.pageserver.http_client() - res = assert_local(client, env.initial_tenant, new_timeline_id) + res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) assert res['local']["current_logical_size"] == res['local'][ "current_logical_size_non_incremental"] diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index aaccb00399..24b234ac25 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -29,7 +29,7 @@ from dataclasses import dataclass # Type-related stuff from psycopg2.extensions import connection as PgConnection from psycopg2.extensions import make_dsn, parse_dsn -from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple +from typing import Any, Callable, Dict, Iterator, List, Optional, Type, TypeVar, cast, Union, Tuple from typing_extensions import Literal import requests @@ -824,7 +824,14 @@ class NeonPageserverHttpClient(requests.Session): res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/detach") self.verbose_error(res) - def timeline_list(self, tenant_id: uuid.UUID) -> List[Dict[Any, Any]]: + def tenant_status(self, tenant_id: uuid.UUID) -> Dict[Any, Any]: + res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}") + self.verbose_error(res) + res_json = res.json() + assert isinstance(res_json, dict) + return res_json + + def timeline_list(self, tenant_id: uuid.UUID) -> List[Dict[str, Any]]: res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline") self.verbose_error(res) res_json = res.json() @@ -2183,14 +2190,22 @@ def wait_until(number_of_iterations: int, interval: float, func): raise Exception("timed out while waiting for %s" % func) from last_exception -def assert_local(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID): +def assert_timeline_local(pageserver_http_client: NeonPageserverHttpClient, + tenant: uuid.UUID, + timeline: uuid.UUID): timeline_detail = pageserver_http_client.timeline_detail(tenant, timeline) assert timeline_detail.get('local', {}).get("disk_consistent_lsn"), timeline_detail return timeline_detail +def assert_no_in_progress_downloads_for_tenant( + pageserver_http_client: NeonPageserverHttpClient, + tenant: uuid.UUID, +): + tenant_status = pageserver_http_client.tenant_status(tenant) + assert tenant_status['has_in_progress_downloads'] is False, tenant_status + + def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient, tenant: uuid.UUID, timeline: uuid.UUID) -> int: