mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-29 11:00:38 +00:00
Merge branch 'main' into problame/2024-02-walredo-work/prespawn/split-code
This commit is contained in:
@@ -178,6 +178,64 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ServiceUnavailableError"
|
||||
|
||||
/v1/tenant/{tenant_id}/time_travel_remote_storage:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: travel_to
|
||||
in: query
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: date-time
|
||||
- name: done_if_after
|
||||
in: query
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: date-time
|
||||
put:
|
||||
description: Time travel the tenant's remote storage
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: string
|
||||
"400":
|
||||
description: Error when no tenant id found in path or invalid timestamp
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/UnauthorizedError"
|
||||
"403":
|
||||
description: Forbidden Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"500":
|
||||
description: Generic operation error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"503":
|
||||
description: Temporarily unavailable, please retry.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ServiceUnavailableError"
|
||||
|
||||
/v1/tenant/{tenant_id}/timeline:
|
||||
parameters:
|
||||
|
||||
@@ -26,6 +26,7 @@ use pageserver_api::models::{
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use remote_storage::TimeTravelError;
|
||||
use tenant_size_model::{SizeResult, StorageModel};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
@@ -47,6 +48,7 @@ use crate::tenant::mgr::{
|
||||
TenantSlotError, TenantSlotUpsertError, TenantStateError,
|
||||
};
|
||||
use crate::tenant::mgr::{TenantSlot, UpsertLocationError};
|
||||
use crate::tenant::remote_timeline_client;
|
||||
use crate::tenant::secondary::SecondaryController;
|
||||
use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
||||
@@ -77,8 +79,14 @@ use utils::{
|
||||
// For APIs that require an Active tenant, how long should we block waiting for that state?
|
||||
// This is not functionally necessary (clients will retry), but avoids generating a lot of
|
||||
// failed API calls while tenants are activating.
|
||||
#[cfg(not(feature = "testing"))]
|
||||
const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(5000);
|
||||
|
||||
// Tests run on slow/oversubscribed nodes, and may need to wait much longer for tenants to
|
||||
// finish attaching, if calls to remote storage are slow.
|
||||
#[cfg(feature = "testing")]
|
||||
const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000);
|
||||
|
||||
pub struct State {
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
@@ -1424,6 +1432,79 @@ async fn list_location_config_handler(
|
||||
json_response(StatusCode::OK, result)
|
||||
}
|
||||
|
||||
// Do a time travel recovery on the given tenant/tenant shard. Tenant needs to be detached
|
||||
// (from all pageservers) as it invalidates consistency assumptions.
|
||||
async fn tenant_time_travel_remote_storage_handler(
|
||||
request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let timestamp_raw = must_get_query_param(&request, "travel_to")?;
|
||||
let timestamp = humantime::parse_rfc3339(×tamp_raw)
|
||||
.with_context(|| format!("Invalid time for travel_to: {timestamp_raw:?}"))
|
||||
.map_err(ApiError::BadRequest)?;
|
||||
|
||||
let done_if_after_raw = must_get_query_param(&request, "done_if_after")?;
|
||||
let done_if_after = humantime::parse_rfc3339(&done_if_after_raw)
|
||||
.with_context(|| format!("Invalid time for done_if_after: {done_if_after_raw:?}"))
|
||||
.map_err(ApiError::BadRequest)?;
|
||||
|
||||
// This is just a sanity check to fend off naive wrong usages of the API:
|
||||
// the tenant needs to be detached *everywhere*
|
||||
let state = get_state(&request);
|
||||
let we_manage_tenant = state.tenant_manager.manages_tenant_shard(tenant_shard_id);
|
||||
if we_manage_tenant {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"Tenant {tenant_shard_id} is already attached at this pageserver"
|
||||
)));
|
||||
}
|
||||
|
||||
let Some(storage) = state.remote_storage.as_ref() else {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"remote storage not configured, cannot run time travel"
|
||||
)));
|
||||
};
|
||||
|
||||
if timestamp > done_if_after {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"The done_if_after timestamp comes before the timestamp to recover to"
|
||||
)));
|
||||
}
|
||||
|
||||
tracing::info!("Issuing time travel request internally. timestamp={timestamp_raw}, done_if_after={done_if_after_raw}");
|
||||
|
||||
remote_timeline_client::upload::time_travel_recover_tenant(
|
||||
storage,
|
||||
&tenant_shard_id,
|
||||
timestamp,
|
||||
done_if_after,
|
||||
&cancel,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
TimeTravelError::BadInput(e) => {
|
||||
warn!("bad input error: {e}");
|
||||
ApiError::BadRequest(anyhow!("bad input error"))
|
||||
}
|
||||
TimeTravelError::Unimplemented => {
|
||||
ApiError::BadRequest(anyhow!("unimplemented for the configured remote storage"))
|
||||
}
|
||||
TimeTravelError::Cancelled => ApiError::InternalServerError(anyhow!("cancelled")),
|
||||
TimeTravelError::TooManyVersions => {
|
||||
ApiError::InternalServerError(anyhow!("too many versions in remote storage"))
|
||||
}
|
||||
TimeTravelError::Other(e) => {
|
||||
warn!("internal error: {e}");
|
||||
ApiError::InternalServerError(anyhow!("internal error"))
|
||||
}
|
||||
})?;
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
/// Testing helper to transition a tenant to [`crate::tenant::TenantState::Broken`].
|
||||
async fn handle_tenant_break(
|
||||
r: Request<Body>,
|
||||
@@ -1969,6 +2050,10 @@ pub fn make_router(
|
||||
.get("/v1/location_config", |r| {
|
||||
api_handler(r, list_location_config_handler)
|
||||
})
|
||||
.put(
|
||||
"/v1/tenant/:tenant_shard_id/time_travel_remote_storage",
|
||||
|r| api_handler(r, tenant_time_travel_remote_storage_handler),
|
||||
)
|
||||
.get("/v1/tenant/:tenant_shard_id/timeline", |r| {
|
||||
api_handler(r, timeline_list_handler)
|
||||
})
|
||||
|
||||
@@ -898,6 +898,17 @@ impl TenantManager {
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether the `TenantManager` is responsible for the tenant shard
|
||||
pub(crate) fn manages_tenant_shard(&self, tenant_shard_id: TenantShardId) -> bool {
|
||||
let locked = self.tenants.read().unwrap();
|
||||
|
||||
let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)
|
||||
.ok()
|
||||
.flatten();
|
||||
|
||||
peek_slot.is_some()
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]
|
||||
pub(crate) async fn upsert_location(
|
||||
&self,
|
||||
|
||||
@@ -1719,6 +1719,11 @@ pub fn remote_timelines_path(tenant_shard_id: &TenantShardId) -> RemotePath {
|
||||
RemotePath::from_string(&path).expect("Failed to construct path")
|
||||
}
|
||||
|
||||
fn remote_timelines_path_unsharded(tenant_id: &TenantId) -> RemotePath {
|
||||
let path = format!("tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}");
|
||||
RemotePath::from_string(&path).expect("Failed to construct path")
|
||||
}
|
||||
|
||||
pub fn remote_timeline_path(
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
|
||||
@@ -5,9 +5,11 @@ use camino::Utf8Path;
|
||||
use fail::fail_point;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use std::io::{ErrorKind, SeekFrom};
|
||||
use std::time::SystemTime;
|
||||
use tokio::fs::{self, File};
|
||||
use tokio::io::AsyncSeekExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::backoff;
|
||||
|
||||
use super::Generation;
|
||||
use crate::{
|
||||
@@ -17,7 +19,7 @@ use crate::{
|
||||
remote_initdb_preserved_archive_path, remote_path, upload_cancellable,
|
||||
},
|
||||
};
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use remote_storage::{GenericRemoteStorage, TimeTravelError};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use super::index::LayerFileMetadata;
|
||||
@@ -157,3 +159,45 @@ pub(crate) async fn preserve_initdb_archive(
|
||||
.await
|
||||
.with_context(|| format!("backing up initdb archive for '{tenant_id} / {timeline_id}'"))
|
||||
}
|
||||
|
||||
pub(crate) async fn time_travel_recover_tenant(
|
||||
storage: &GenericRemoteStorage,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timestamp: SystemTime,
|
||||
done_if_after: SystemTime,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(), TimeTravelError> {
|
||||
let warn_after = 3;
|
||||
let max_attempts = 10;
|
||||
let mut prefixes = Vec::with_capacity(2);
|
||||
if tenant_shard_id.is_zero() {
|
||||
// Also recover the unsharded prefix for a shard of zero:
|
||||
// - if the tenant is totally unsharded, the unsharded prefix contains all the data
|
||||
// - if the tenant is sharded, we still want to recover the initdb data, but we only
|
||||
// want to do it once, so let's do it on the 0 shard
|
||||
let timelines_path_unsharded =
|
||||
super::remote_timelines_path_unsharded(&tenant_shard_id.tenant_id);
|
||||
prefixes.push(timelines_path_unsharded);
|
||||
}
|
||||
if !tenant_shard_id.is_unsharded() {
|
||||
// If the tenant is sharded, we need to recover the sharded prefix
|
||||
let timelines_path = super::remote_timelines_path(tenant_shard_id);
|
||||
prefixes.push(timelines_path);
|
||||
}
|
||||
for prefix in &prefixes {
|
||||
backoff::retry(
|
||||
|| async {
|
||||
storage
|
||||
.time_travel_recover(Some(prefix), timestamp, done_if_after, cancel.clone())
|
||||
.await
|
||||
},
|
||||
|e| !matches!(e, TimeTravelError::Other(_)),
|
||||
warn_after,
|
||||
max_attempts,
|
||||
"time travel recovery of tenant prefix",
|
||||
backoff::Cancel::new(cancel.clone(), || TimeTravelError::Cancelled),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user