use tenant status endpoint to check whether timelines were downloaded or not

This commit is contained in:
Dmitry Rodionov
2022-06-30 20:36:32 +03:00
committed by Dmitry Rodionov
parent d9d4ef12c3
commit 168214e0b6
8 changed files with 171 additions and 37 deletions

View File

@@ -537,7 +537,13 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
match tenant_match.subcommand() {
Some(("list", _)) => {
for t in pageserver.tenant_list()? {
println!("{} {}", t.id, t.state);
println!(
"{} {}",
t.id,
t.state
.map(|s| s.to_string())
.unwrap_or_else(|| String::from(""))
);
}
}
Some(("create", create_match)) => {

View File

@@ -22,6 +22,49 @@ paths:
properties:
id:
type: integer
/v1/tenant/{tenant_id}:
parameters:
- name: tenant_id
in: path
required: true
schema:
type: string
format: hex
get:
description: Get tenant status
responses:
"200":
description: Currently returns the flag whether the tenant has inprogress timeline downloads
content:
application/json:
schema:
$ref: "#/components/schemas/TenantInfo"
"400":
description: Error when no tenant id found in path or no timeline id
content:
application/json:
schema:
$ref: "#/components/schemas/Error"
"401":
description: Unauthorized Error
content:
application/json:
schema:
$ref: "#/components/schemas/UnauthorizedError"
"403":
description: Forbidden Error
content:
application/json:
schema:
$ref: "#/components/schemas/ForbiddenError"
"500":
description: Generic operation error
content:
application/json:
schema:
$ref: "#/components/schemas/Error"
/v1/tenant/{tenant_id}/timeline:
parameters:
- name: tenant_id
@@ -521,12 +564,13 @@ components:
type: object
required:
- id
- state
properties:
id:
type: string
state:
type: string
has_in_progress_downloads:
type: bool
TenantCreateInfo:
type: object
properties:
@@ -621,6 +665,7 @@ components:
type: integer
current_logical_size_non_incremental:
type: integer
WalReceiverEntry:
type: object
required:

View File

@@ -14,6 +14,7 @@ use crate::repository::Repository;
use crate::storage_sync;
use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
use crate::tenant_config::TenantConfOpt;
use crate::tenant_mgr::TenantInfo;
use crate::timelines::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo};
use crate::{config::PageServerConf, tenant_mgr, timelines};
use utils::{
@@ -403,9 +404,13 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
// check for management permission
check_permission(&request, None)?;
let state = get_state(&request);
// clone to avoid holding the lock while awaiting for blocking task
let remote_index = state.remote_index.read().await.clone();
let response_data = tokio::task::spawn_blocking(move || {
let _enter = info_span!("tenant_list").entered();
crate::tenant_mgr::list_tenants()
crate::tenant_mgr::list_tenants(&remote_index)
})
.await
.map_err(ApiError::from_err)?;
@@ -413,6 +418,34 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
json_response(StatusCode::OK, response_data)
}
async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
// if tenant is in progress of downloading it can be absent in global tenant map
let tenant_state = tokio::task::spawn_blocking(move || tenant_mgr::get_tenant_state(tenant_id))
.await
.map_err(ApiError::from_err)?;
let state = get_state(&request);
let remote_index = &state.remote_index;
let index_accessor = remote_index.read().await;
let has_in_progress_downloads = index_accessor
.tenant_entry(&tenant_id)
.ok_or_else(|| ApiError::NotFound("Tenant not found in remote index".to_string()))?
.has_in_progress_downloads();
json_response(
StatusCode::OK,
TenantInfo {
id: tenant_id,
state: tenant_state,
has_in_progress_downloads: Some(has_in_progress_downloads),
},
)
}
async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
// check for management permission
check_permission(&request, None)?;
@@ -558,6 +591,7 @@ pub fn make_router(
.get("/v1/status", status_handler)
.get("/v1/tenant", tenant_list_handler)
.post("/v1/tenant", tenant_create_handler)
.get("/v1/tenant/:tenant_id", tenant_status)
.put("/v1/tenant/config", tenant_config_handler)
.get("/v1/tenant/:tenant_id/timeline", timeline_list_handler)
.post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)

View File

@@ -5,7 +5,7 @@ use crate::config::PageServerConf;
use crate::layered_repository::{load_metadata, LayeredRepository};
use crate::pgdatadir_mapping::DatadirTimeline;
use crate::repository::Repository;
use crate::storage_sync::index::RemoteIndex;
use crate::storage_sync::index::{RemoteIndex, RemoteTimelineIndex};
use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData};
use crate::tenant_config::TenantConfOpt;
use crate::thread_mgr::ThreadKind;
@@ -509,15 +509,27 @@ fn load_local_timeline(
pub struct TenantInfo {
#[serde_as(as = "DisplayFromStr")]
pub id: ZTenantId,
pub state: TenantState,
pub state: Option<TenantState>,
pub has_in_progress_downloads: Option<bool>,
}
pub fn list_tenants() -> Vec<TenantInfo> {
pub fn list_tenants(remote_index: &RemoteTimelineIndex) -> Vec<TenantInfo> {
tenants_state::read_tenants()
.iter()
.map(|(id, tenant)| TenantInfo {
id: *id,
state: tenant.state,
.map(|(id, tenant)| {
let has_in_progress_downloads = remote_index
.tenant_entry(id)
.map(|entry| entry.has_in_progress_downloads());
if has_in_progress_downloads.is_none() {
error!("timeline is not found in remote index while it is present in the tenants registry")
}
TenantInfo {
id: *id,
state: Some(tenant.state),
has_in_progress_downloads,
}
})
.collect()
}

View File

@@ -6,7 +6,7 @@ from contextlib import closing
from pathlib import Path
import time
from uuid import UUID
from fixtures.neon_fixtures import NeonEnvBuilder, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload
from fixtures.neon_fixtures import NeonEnvBuilder, assert_timeline_local, wait_until, wait_for_last_record_lsn, wait_for_upload
from fixtures.log_helper import log
from fixtures.utils import lsn_from_hex, lsn_to_hex
import pytest
@@ -114,7 +114,7 @@ def test_remote_storage_backup_and_restore(neon_env_builder: NeonEnvBuilder, sto
log.info("waiting for timeline redownload")
wait_until(number_of_iterations=10,
interval=1,
func=lambda: assert_local(client, UUID(tenant_id), UUID(timeline_id)))
func=lambda: assert_timeline_local(client, UUID(tenant_id), UUID(timeline_id)))
detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id))
assert detail['local'] is not None

View File

@@ -1,15 +1,30 @@
from contextlib import closing, contextmanager
import os
import pathlib
import signal
import subprocess
import threading
from uuid import UUID
from fixtures.log_helper import log
from contextlib import closing, contextmanager
from typing import Any, Dict, Optional, Tuple
import signal
import pytest
from uuid import UUID
from fixtures.neon_fixtures import NeonEnv, PortDistributor, Postgres, NeonEnvBuilder, Etcd, NeonPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, neon_binpath, pg_distrib_dir, base_dir
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Etcd,
NeonEnv,
NeonEnvBuilder,
NeonPageserverHttpClient,
PortDistributor,
Postgres,
assert_no_in_progress_downloads_for_tenant,
assert_timeline_local,
base_dir,
neon_binpath,
pg_distrib_dir,
wait_for_last_record_lsn,
wait_for_upload,
wait_until,
)
from fixtures.utils import lsn_from_hex, subprocess_capture
@@ -144,10 +159,7 @@ def check_timeline_attached(
old_current_lsn: int,
):
# new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint
new_timeline_detail = wait_until(
number_of_iterations=5,
interval=1,
func=lambda: assert_local(new_pageserver_http_client, tenant_id, timeline_id))
new_timeline_detail = assert_timeline_local(new_pageserver_http_client, tenant_id, timeline_id)
# when load is active these checks can break because lsns are not static
# so lets check with some margin
@@ -250,10 +262,10 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
# wait until pageserver receives that data
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_main, current_lsn_main)
timeline_detail_main = assert_local(pageserver_http, tenant_id, timeline_id_main)
timeline_detail_main = assert_timeline_local(pageserver_http, tenant_id, timeline_id_main)
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_second, current_lsn_second)
timeline_detail_second = assert_local(pageserver_http, tenant_id, timeline_id_second)
timeline_detail_second = assert_timeline_local(pageserver_http, tenant_id, timeline_id_second)
if with_load == 'with_load':
# create load table
@@ -337,6 +349,16 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
# call to attach timeline to new pageserver
new_pageserver_http.tenant_attach(tenant_id)
# check that it shows that download is in progress
tenant_status = new_pageserver_http.tenant_status(tenant_id=tenant_id)
assert tenant_status.get('has_in_progress_downloads'), tenant_status
# wait until tenant is downloaded
wait_until(number_of_iterations=10,
interval=1,
func=lambda: assert_no_in_progress_downloads_for_tenant(
new_pageserver_http, tenant_id))
check_timeline_attached(
new_pageserver_http,
tenant_id,

View File

@@ -1,7 +1,7 @@
from contextlib import closing
import psycopg2.extras
import psycopg2.errors
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_local
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_timeline_local
from fixtures.log_helper import log
import time
@@ -11,7 +11,7 @@ def test_timeline_size(neon_simple_env: NeonEnv):
new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')
client = env.pageserver.http_client()
timeline_details = assert_local(client, env.initial_tenant, new_timeline_id)
timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
assert timeline_details['local']['current_logical_size'] == timeline_details['local'][
'current_logical_size_non_incremental']
@@ -29,13 +29,13 @@ def test_timeline_size(neon_simple_env: NeonEnv):
FROM generate_series(1, 10) g
""")
res = assert_local(client, env.initial_tenant, new_timeline_id)
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
local_details = res['local']
assert local_details["current_logical_size"] == local_details[
"current_logical_size_non_incremental"]
cur.execute("TRUNCATE foo")
res = assert_local(client, env.initial_tenant, new_timeline_id)
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
local_details = res['local']
assert local_details["current_logical_size"] == local_details[
"current_logical_size_non_incremental"]
@@ -46,7 +46,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')
client = env.pageserver.http_client()
timeline_details = assert_local(client, env.initial_tenant, new_timeline_id)
timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
assert timeline_details['local']['current_logical_size'] == timeline_details['local'][
'current_logical_size_non_incremental']
@@ -57,7 +57,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
with conn.cursor() as cur:
cur.execute("SHOW neon.timeline_id")
res = assert_local(client, env.initial_tenant, new_timeline_id)
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
local_details = res['local']
assert local_details["current_logical_size"] == local_details[
"current_logical_size_non_incremental"]
@@ -73,14 +73,14 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
FROM generate_series(1, 10) g
""")
res = assert_local(client, env.initial_tenant, new_timeline_id)
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
local_details = res['local']
assert local_details["current_logical_size"] == local_details[
"current_logical_size_non_incremental"]
cur.execute('DROP DATABASE foodb')
res = assert_local(client, env.initial_tenant, new_timeline_id)
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
local_details = res['local']
assert local_details["current_logical_size"] == local_details[
"current_logical_size_non_incremental"]
@@ -117,7 +117,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
new_timeline_id = env.neon_cli.create_branch('test_timeline_size_quota')
client = env.pageserver.http_client()
res = assert_local(client, env.initial_tenant, new_timeline_id)
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
assert res['local']["current_logical_size"] == res['local'][
"current_logical_size_non_incremental"]

View File

@@ -29,7 +29,7 @@ from dataclasses import dataclass
# Type-related stuff
from psycopg2.extensions import connection as PgConnection
from psycopg2.extensions import make_dsn, parse_dsn
from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple
from typing import Any, Callable, Dict, Iterator, List, Optional, Type, TypeVar, cast, Union, Tuple
from typing_extensions import Literal
import requests
@@ -824,7 +824,14 @@ class NeonPageserverHttpClient(requests.Session):
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/detach")
self.verbose_error(res)
def timeline_list(self, tenant_id: uuid.UUID) -> List[Dict[Any, Any]]:
def tenant_status(self, tenant_id: uuid.UUID) -> Dict[Any, Any]:
res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}")
self.verbose_error(res)
res_json = res.json()
assert isinstance(res_json, dict)
return res_json
def timeline_list(self, tenant_id: uuid.UUID) -> List[Dict[str, Any]]:
res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline")
self.verbose_error(res)
res_json = res.json()
@@ -2183,14 +2190,22 @@ def wait_until(number_of_iterations: int, interval: float, func):
raise Exception("timed out while waiting for %s" % func) from last_exception
def assert_local(pageserver_http_client: NeonPageserverHttpClient,
tenant: uuid.UUID,
timeline: uuid.UUID):
def assert_timeline_local(pageserver_http_client: NeonPageserverHttpClient,
tenant: uuid.UUID,
timeline: uuid.UUID):
timeline_detail = pageserver_http_client.timeline_detail(tenant, timeline)
assert timeline_detail.get('local', {}).get("disk_consistent_lsn"), timeline_detail
return timeline_detail
def assert_no_in_progress_downloads_for_tenant(
pageserver_http_client: NeonPageserverHttpClient,
tenant: uuid.UUID,
):
tenant_status = pageserver_http_client.tenant_status(tenant)
assert tenant_status['has_in_progress_downloads'] is False, tenant_status
def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient,
tenant: uuid.UUID,
timeline: uuid.UUID) -> int: