Merge pull request #6121 from neondatabase/releases/2023-12-13

Release 2023-12-13
This commit is contained in:
Vadim Kharitonov
2023-12-13 12:39:43 +01:00
committed by GitHub
67 changed files with 1561 additions and 808 deletions

77
Cargo.lock generated
View File

@@ -184,7 +184,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35"
dependencies = [
"concurrent-queue",
"event-listener",
"event-listener 2.5.3",
"futures-core",
]
@@ -205,11 +205,13 @@ dependencies = [
[[package]]
name = "async-lock"
version = "2.8.0"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b"
checksum = "7125e42787d53db9dd54261812ef17e937c95a51e4d291373b670342fa44310c"
dependencies = [
"event-listener",
"event-listener 4.0.0",
"event-listener-strategy",
"pin-project-lite",
]
[[package]]
@@ -692,9 +694,9 @@ dependencies = [
[[package]]
name = "azure_core"
version = "0.16.0"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e29286b9edfdd6f2c7e9d970bb5b015df8621258acab9ecfcea09b2d7692467"
checksum = "a6218987c374650fdad0b476bfc675729762c28dfb35f58608a38a2b1ea337dd"
dependencies = [
"async-trait",
"base64 0.21.1",
@@ -702,8 +704,10 @@ dependencies = [
"dyn-clone",
"futures",
"getrandom 0.2.11",
"hmac",
"http-types",
"log",
"once_cell",
"paste",
"pin-project",
"quick-xml",
@@ -712,6 +716,7 @@ dependencies = [
"rustc_version",
"serde",
"serde_json",
"sha2",
"time",
"url",
"uuid",
@@ -719,9 +724,9 @@ dependencies = [
[[package]]
name = "azure_identity"
version = "0.16.2"
version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b67b337346da8739e91ea1e9400a6ebc9bc54e0b2af1d23c9bcd565950588f9"
checksum = "9e1eacc4f7fb2a73d57c39139d0fc3aed78435606055779ddaef4b43cdf919a8"
dependencies = [
"async-lock",
"async-trait",
@@ -731,7 +736,6 @@ dependencies = [
"oauth2",
"pin-project",
"serde",
"serde_json",
"time",
"tz-rs",
"url",
@@ -740,21 +744,18 @@ dependencies = [
[[package]]
name = "azure_storage"
version = "0.16.0"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bed0ccefde57930b2886fd4aed1f70ac469c197b8c2e94828290d71bcbdb5d97"
checksum = "ade8f2653e408de88b9eafec9f48c3c26b94026375e88adbd34523a7dd9795a1"
dependencies = [
"RustyXML",
"async-lock",
"async-trait",
"azure_core",
"bytes",
"futures",
"hmac",
"log",
"serde",
"serde_derive",
"serde_json",
"sha2",
"time",
"url",
"uuid",
@@ -762,13 +763,14 @@ dependencies = [
[[package]]
name = "azure_storage_blobs"
version = "0.16.0"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91a52da2d192cfe43759f61e8bb31a5969f1722d5b85ac89627f356ad674ab4"
checksum = "025701c7cc5b523100f0f3b2b01723564ec5a86c03236521c06826337047e872"
dependencies = [
"RustyXML",
"azure_core",
"azure_storage",
"azure_svc_blobstorage",
"bytes",
"futures",
"log",
@@ -780,6 +782,22 @@ dependencies = [
"uuid",
]
[[package]]
name = "azure_svc_blobstorage"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76051e5bb67cea1055abe5e530a0878feac7e0ab4cbbcb4a6adc953a58993389"
dependencies = [
"azure_core",
"bytes",
"futures",
"log",
"once_cell",
"serde",
"serde_json",
"time",
]
[[package]]
name = "backtrace"
version = "0.3.67"
@@ -1686,6 +1704,27 @@ version = "2.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
[[package]]
name = "event-listener"
version = "4.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "770d968249b5d99410d61f5bf89057f3199a077a04d087092f58e7d10692baae"
dependencies = [
"concurrent-queue",
"parking",
"pin-project-lite",
]
[[package]]
name = "event-listener-strategy"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "958e4d70b6d5e81971bebec42271ec641e7ff4e170a6fa605f2b8a8b65cb97d3"
dependencies = [
"event-listener 4.0.0",
"pin-project-lite",
]
[[package]]
name = "fail"
version = "0.5.1"
@@ -3678,9 +3717,9 @@ dependencies = [
[[package]]
name = "quick-xml"
version = "0.30.0"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33"
dependencies = [
"memchr",
"serde",

View File

@@ -38,10 +38,10 @@ license = "Apache-2.0"
anyhow = { version = "1.0", features = ["backtrace"] }
arc-swap = "1.6"
async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
azure_core = "0.16"
azure_identity = "0.16"
azure_storage = "0.16"
azure_storage_blobs = "0.16"
azure_core = "0.18"
azure_identity = "0.18"
azure_storage = "0.18"
azure_storage_blobs = "0.18"
flate2 = "1.0.26"
async-stream = "0.3"
async-trait = "0.1"

View File

@@ -168,7 +168,7 @@ fn print_timelines_tree(
info: t.clone(),
children: BTreeSet::new(),
name: timeline_name_mappings
.remove(&TenantTimelineId::new(t.tenant_id, t.timeline_id)),
.remove(&TenantTimelineId::new(t.tenant_id.tenant_id, t.timeline_id)),
},
)
})

View File

@@ -165,7 +165,7 @@ pub fn migrate_tenant(
let found = other_ps_tenants
.into_iter()
.map(|t| t.id)
.any(|i| i == tenant_id);
.any(|i| i.tenant_id == tenant_id);
if !found {
continue;
}

View File

@@ -357,7 +357,7 @@ pub enum TenantAttachmentStatus {
#[derive(Serialize, Deserialize, Clone)]
pub struct TenantInfo {
pub id: TenantId,
pub id: TenantShardId,
// NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
pub state: TenantState,
/// Sum of the size of all layer files.
@@ -369,7 +369,7 @@ pub struct TenantInfo {
/// This represents the output of the "timeline_detail" and "timeline_list" API calls.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct TimelineInfo {
pub tenant_id: TenantId,
pub tenant_id: TenantShardId,
pub timeline_id: TimelineId,
pub ancestor_timeline_id: Option<TimelineId>,
@@ -385,6 +385,9 @@ pub struct TimelineInfo {
/// The LSN that we are advertizing to safekeepers
pub remote_consistent_lsn_visible: Lsn,
/// The LSN from the start of the root timeline (never changes)
pub initdb_lsn: Lsn,
pub current_logical_size: u64,
pub current_logical_size_is_accurate: bool,
@@ -823,7 +826,7 @@ mod tests {
fn test_tenantinfo_serde() {
// Test serialization/deserialization of TenantInfo
let original_active = TenantInfo {
id: TenantId::generate(),
id: TenantShardId::unsharded(TenantId::generate()),
state: TenantState::Active,
current_physical_size: Some(42),
attachment_status: TenantAttachmentStatus::Attached,
@@ -840,7 +843,7 @@ mod tests {
});
let original_broken = TenantInfo {
id: TenantId::generate(),
id: TenantShardId::unsharded(TenantId::generate()),
state: TenantState::Broken {
reason: "reason".into(),
backtrace: "backtrace info".into(),

View File

@@ -76,6 +76,11 @@ impl TenantShardId {
pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
ShardSlug(self)
}
/// Convenience for code that has special behavior on the 0th shard.
pub fn is_zero(&self) -> bool {
self.shard_number == ShardNumber(0)
}
}
/// Formatting helper

View File

@@ -271,17 +271,12 @@ impl RemoteStorage for AzureBlobStorage {
let mut builder = blob_client.get();
if let Some(end_exclusive) = end_exclusive {
builder = builder.range(Range::new(start_inclusive, end_exclusive));
let range: Range = if let Some(end_exclusive) = end_exclusive {
(start_inclusive..end_exclusive).into()
} else {
// Open ranges are not supported by the SDK so we work around
// by setting the upper limit extremely high (but high enough
// to still be representable by signed 64 bit integers).
// TODO remove workaround once the SDK adds open range support
// https://github.com/Azure/azure-sdk-for-rust/issues/1438
let end_exclusive = u64::MAX / 4;
builder = builder.range(Range::new(start_inclusive, end_exclusive));
}
(start_inclusive..).into()
};
builder = builder.range(range);
self.download_for_builder(builder).await
}

View File

@@ -30,18 +30,32 @@ async fn warn_if_stuck<Fut: std::future::Future>(
let mut fut = std::pin::pin!(fut);
loop {
let mut warned = false;
let ret = loop {
match tokio::time::timeout(warn_period, &mut fut).await {
Ok(ret) => return ret,
Ok(ret) => break ret,
Err(_) => {
tracing::warn!(
gate = name,
elapsed_ms = started.elapsed().as_millis(),
"still waiting, taking longer than expected..."
);
warned = true;
}
}
};
// If we emitted a warning for slowness, also emit a message when we complete, so that
// someone debugging a shutdown can know for sure whether we have moved past this operation.
if warned {
tracing::info!(
gate = name,
elapsed_ms = started.elapsed().as_millis(),
"completed, after taking longer than expected"
)
}
ret
}
#[derive(Debug)]

View File

@@ -269,12 +269,18 @@ async fn calculate_synthetic_size_worker(
}
};
for (tenant_id, tenant_state) in tenants {
for (tenant_shard_id, tenant_state) in tenants {
if tenant_state != TenantState::Active {
continue;
}
if let Ok(tenant) = mgr::get_tenant(tenant_id, true) {
if !tenant_shard_id.is_zero() {
// We only send consumption metrics from shard 0, so don't waste time calculating
// synthetic size on other shards.
continue;
}
if let Ok(tenant) = mgr::get_tenant(tenant_shard_id, true) {
// TODO should we use concurrent_background_tasks_rate_limit() here, like the other background tasks?
// We can put in some prioritization for consumption metrics.
// Same for the loop that fetches computed metrics.
@@ -291,7 +297,9 @@ async fn calculate_synthetic_size_worker(
);
if !is_cancelled {
error!("failed to calculate synthetic size for tenant {tenant_id}: {e:#}");
error!(
"failed to calculate synthetic size for tenant {tenant_shard_id}: {e:#}"
);
}
}
}

View File

@@ -2,7 +2,6 @@ use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogi
use chrono::{DateTime, Utc};
use consumption_metrics::EventType;
use futures::stream::StreamExt;
use pageserver_api::shard::ShardNumber;
use std::{sync::Arc, time::SystemTime};
use utils::{
id::{TenantId, TimelineId},
@@ -198,12 +197,12 @@ pub(super) async fn collect_all_metrics(
};
let tenants = futures::stream::iter(tenants).filter_map(|(id, state)| async move {
if state != TenantState::Active {
if state != TenantState::Active || !id.is_zero() {
None
} else {
crate::tenant::mgr::get_tenant(id, true)
.ok()
.map(|tenant| (id, tenant))
.map(|tenant| (id.tenant_id, tenant))
}
});
@@ -229,11 +228,6 @@ where
while let Some((tenant_id, tenant)) = tenants.next().await {
let mut tenant_resident_size = 0;
// Sharded tenants report all consumption metrics from shard zero
if tenant.tenant_shard_id().shard_number != ShardNumber(0) {
continue;
}
for timeline in tenant.list_timelines() {
let timeline_id = timeline.timeline_id;

View File

@@ -84,7 +84,6 @@ paths:
required: true
schema:
type: string
format: hex
get:
description: Get tenant status
responses:
@@ -181,7 +180,6 @@ paths:
required: true
schema:
type: string
format: hex
get:
description: Get timelines for tenant
responses:
@@ -232,7 +230,6 @@ paths:
required: true
schema:
type: string
format: hex
- name: timeline_id
in: path
required: true
@@ -338,7 +335,6 @@ paths:
required: true
schema:
type: string
format: hex
- name: timeline_id
in: path
required: true
@@ -401,7 +397,6 @@ paths:
required: true
schema:
type: string
format: hex
- name: timeline_id
in: path
required: true
@@ -469,7 +464,6 @@ paths:
required: true
schema:
type: string
format: hex
- name: timeline_id
in: path
required: true
@@ -523,7 +517,6 @@ paths:
required: true
schema:
type: string
format: hex
post:
description: |
Schedules attach operation to happen in the background for the given tenant.
@@ -631,7 +624,6 @@ paths:
required: true
schema:
type: string
format: hex
- name: flush_ms
in: query
required: false
@@ -724,7 +716,6 @@ paths:
required: true
schema:
type: string
format: hex
- name: detach_ignored
in: query
required: false
@@ -784,7 +775,6 @@ paths:
required: true
schema:
type: string
format: hex
post:
description: |
Remove tenant data (including all corresponding timelines) from pageserver's memory.
@@ -833,7 +823,6 @@ paths:
required: true
schema:
type: string
format: hex
post:
description: |
Schedules an operation that attempts to load a tenant from the local disk and
@@ -890,7 +879,6 @@ paths:
required: true
schema:
type: string
format: hex
get:
description: |
Calculate tenant's synthetic size
@@ -933,7 +921,6 @@ paths:
required: true
schema:
type: string
format: hex
- name: inputs_only
in: query
required: false
@@ -1003,7 +990,6 @@ paths:
required: true
schema:
type: string
format: hex
post:
description: |
Create a timeline. Returns new timeline id on success.\
@@ -1137,7 +1123,6 @@ paths:
application/json:
schema:
type: string
format: hex
"400":
description: Malformed tenant create request
content:
@@ -1234,7 +1219,6 @@ paths:
required: true
schema:
type: string
format: hex
get:
description: |
Returns tenant's config description: specific config overrides a tenant has
@@ -1340,7 +1324,6 @@ components:
properties:
new_tenant_id:
type: string
format: hex
generation:
type: integer
description: Attachment generation number.
@@ -1369,7 +1352,6 @@ components:
properties:
tenant_id:
type: string
format: hex
TenantLocationConfigRequest:
type: object
required:
@@ -1377,7 +1359,6 @@ components:
properties:
tenant_id:
type: string
format: hex
mode:
type: string
enum: ["AttachedSingle", "AttachedMulti", "AttachedStale", "Secondary", "Detached"]
@@ -1446,7 +1427,6 @@ components:
format: hex
tenant_id:
type: string
format: hex
last_record_lsn:
type: string
format: hex

View File

@@ -319,6 +319,7 @@ async fn build_timeline_info_common(
ctx: &RequestContext,
) -> anyhow::Result<TimelineInfo> {
crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
let initdb_lsn = timeline.initdb_lsn;
let last_record_lsn = timeline.get_last_record_lsn();
let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
let guard = timeline.last_received_wal.lock().unwrap();
@@ -352,14 +353,14 @@ async fn build_timeline_info_common(
let walreceiver_status = timeline.walreceiver_status();
let info = TimelineInfo {
// TODO(sharding): add a shard_id field, or make tenant_id into a tenant_shard_id
tenant_id: timeline.tenant_shard_id.tenant_id,
tenant_id: timeline.tenant_shard_id,
timeline_id: timeline.timeline_id,
ancestor_timeline_id,
ancestor_lsn,
disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
remote_consistent_lsn: remote_consistent_lsn_projected,
remote_consistent_lsn_visible,
initdb_lsn,
last_record_lsn,
prev_record_lsn: Some(timeline.get_prev_record_lsn()),
latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
@@ -480,15 +481,15 @@ async fn timeline_list_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let include_non_incremental_logical_size: Option<bool> =
parse_query_param(&request, "include-non-incremental-logical-size")?;
check_permission(&request, Some(tenant_id))?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let response_data = async {
let tenant = mgr::get_tenant(tenant_id, true)?;
let tenant = mgr::get_tenant(tenant_shard_id, true)?;
let timelines = tenant.list_timelines();
let mut response_data = Vec::with_capacity(timelines.len());
@@ -507,7 +508,9 @@ async fn timeline_list_handler(
}
Ok::<Vec<TimelineInfo>, ApiError>(response_data)
}
.instrument(info_span!("timeline_list", %tenant_id))
.instrument(info_span!("timeline_list",
tenant_id = %tenant_shard_id.tenant_id,
shard_id = %tenant_shard_id.shard_slug()))
.await?;
json_response(StatusCode::OK, response_data)
@@ -517,17 +520,17 @@ async fn timeline_detail_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let include_non_incremental_logical_size: Option<bool> =
parse_query_param(&request, "include-non-incremental-logical-size")?;
check_permission(&request, Some(tenant_id))?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
// Logical size calculation needs downloading.
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let timeline_info = async {
let tenant = mgr::get_tenant(tenant_id, true)?;
let tenant = mgr::get_tenant(tenant_shard_id, true)?;
let timeline = tenant
.get_timeline(timeline_id, false)
@@ -544,7 +547,10 @@ async fn timeline_detail_handler(
Ok::<_, ApiError>(timeline_info)
}
.instrument(info_span!("timeline_detail", %tenant_id, %timeline_id))
.instrument(info_span!("timeline_detail",
tenant_id = %tenant_shard_id.tenant_id,
shard_id = %tenant_shard_id.shard_slug(),
%timeline_id))
.await?;
json_response(StatusCode::OK, timeline_info)
@@ -554,8 +560,15 @@ async fn get_lsn_by_timestamp_handler(
request: Request<Body>,
cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
if !tenant_shard_id.is_zero() {
// Requires SLRU contents, which are only stored on shard zero
return Err(ApiError::BadRequest(anyhow!(
"Size calculations are only available on shard zero"
)));
}
let version: Option<u8> = parse_query_param(&request, "version")?;
@@ -567,7 +580,7 @@ async fn get_lsn_by_timestamp_handler(
let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
let result = timeline
.find_lsn_for_timestamp(timestamp_pg, &cancel, &ctx)
.await?;
@@ -602,8 +615,15 @@ async fn get_timestamp_of_lsn_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
if !tenant_shard_id.is_zero() {
// Requires SLRU contents, which are only stored on shard zero
return Err(ApiError::BadRequest(anyhow!(
"Size calculations are only available on shard zero"
)));
}
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -613,7 +633,7 @@ async fn get_timestamp_of_lsn_handler(
.map_err(ApiError::BadRequest)?;
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
let result = timeline.get_timestamp_for_lsn(lsn, &ctx).await?;
match result {
@@ -805,11 +825,11 @@ async fn tenant_status(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let tenant_info = async {
let tenant = mgr::get_tenant(tenant_id, false)?;
let tenant = mgr::get_tenant(tenant_shard_id, false)?;
// Calculate total physical size of all timelines
let mut current_physical_size = 0;
@@ -819,13 +839,15 @@ async fn tenant_status(
let state = tenant.current_state();
Result::<_, ApiError>::Ok(TenantInfo {
id: tenant_id,
id: tenant_shard_id,
state: state.clone(),
current_physical_size: Some(current_physical_size),
attachment_status: state.attachment_status(),
})
}
.instrument(info_span!("tenant_status_handler", %tenant_id))
.instrument(info_span!("tenant_status_handler",
tenant_id = %tenant_shard_id.tenant_id,
shard_id = %tenant_shard_id.shard_slug()))
.await?;
json_response(StatusCode::OK, tenant_info)
@@ -868,14 +890,20 @@ async fn tenant_size_handler(
request: Request<Body>,
cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let inputs_only: Option<bool> = parse_query_param(&request, "inputs_only")?;
let retention_period: Option<u64> = parse_query_param(&request, "retention_period")?;
let headers = request.headers();
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let tenant = mgr::get_tenant(tenant_id, true)?;
let tenant = mgr::get_tenant(tenant_shard_id, true)?;
if !tenant_shard_id.is_zero() {
return Err(ApiError::BadRequest(anyhow!(
"Size calculations are only available on shard zero"
)));
}
// this can be long operation
let inputs = tenant
@@ -927,7 +955,7 @@ async fn tenant_size_handler(
json_response(
StatusCode::OK,
TenantHistorySize {
id: tenant_id,
id: tenant_shard_id.tenant_id,
size: sizes.as_ref().map(|x| x.total_size),
segment_sizes: sizes.map(|x| x.segments),
inputs,
@@ -939,14 +967,14 @@ async fn layer_map_info_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let reset: LayerAccessStatsReset =
parse_query_param(&request, "reset")?.unwrap_or(LayerAccessStatsReset::NoReset);
check_permission(&request, Some(tenant_id))?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
let layer_map_info = timeline.layer_map_info(reset).await;
json_response(StatusCode::OK, layer_map_info)
@@ -956,13 +984,12 @@ async fn layer_download_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let layer_file_name = get_request_param(&request, "layer_file_name")?;
check_permission(&request, Some(tenant_id))?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
let downloaded = timeline
.download_layer(layer_file_name)
.await
@@ -973,7 +1000,7 @@ async fn layer_download_handler(
Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
None => json_response(
StatusCode::BAD_REQUEST,
format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
format!("Layer {tenant_shard_id}/{timeline_id}/{layer_file_name} not found"),
),
}
}
@@ -982,12 +1009,12 @@ async fn evict_timeline_layer_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let layer_file_name = get_request_param(&request, "layer_file_name")?;
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
let evicted = timeline
.evict_layer(layer_file_name)
.await
@@ -998,7 +1025,7 @@ async fn evict_timeline_layer_handler(
Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
None => json_response(
StatusCode::BAD_REQUEST,
format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
format!("Layer {tenant_shard_id}/{timeline_id}/{layer_file_name} not found"),
),
}
}
@@ -1130,10 +1157,10 @@ async fn get_tenant_config_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let tenant = mgr::get_tenant(tenant_id, false)?;
let tenant = mgr::get_tenant(tenant_shard_id, false)?;
let response = HashMap::from([
(
@@ -1227,9 +1254,9 @@ async fn handle_tenant_break(
r: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&r, "tenant_id")?;
let tenant_shard_id: TenantShardId = parse_request_param(&r, "tenant_shard_id")?;
let tenant = crate::tenant::mgr::get_tenant(tenant_id, true)
let tenant = crate::tenant::mgr::get_tenant(tenant_shard_id, true)
.map_err(|_| ApiError::Conflict(String::from("no active tenant found")))?;
tenant.set_broken("broken from test".to_owned()).await;
@@ -1270,14 +1297,15 @@ async fn timeline_gc_handler(
mut request: Request<Body>,
cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_id))?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let gc_req: TimelineGcRequest = json_request(&mut request).await?;
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let wait_task_done = mgr::immediate_gc(tenant_id, timeline_id, gc_req, cancel, &ctx).await?;
let wait_task_done =
mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx).await?;
let gc_result = wait_task_done
.await
.context("wait for gc task")
@@ -1292,9 +1320,9 @@ async fn timeline_compact_handler(
request: Request<Body>,
cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_id))?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let mut flags = EnumSet::empty();
if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
@@ -1302,14 +1330,14 @@ async fn timeline_compact_handler(
}
async {
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
timeline
.compact(&cancel, flags, &ctx)
.await
.map_err(|e| ApiError::InternalServerError(e.into()))?;
json_response(StatusCode::OK, ())
}
.instrument(info_span!("manual_compaction", %tenant_id, %timeline_id))
.instrument(info_span!("manual_compaction", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
.await
}
@@ -1318,9 +1346,9 @@ async fn timeline_checkpoint_handler(
request: Request<Body>,
cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_id))?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let mut flags = EnumSet::empty();
if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
@@ -1328,7 +1356,7 @@ async fn timeline_checkpoint_handler(
}
async {
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
timeline
.freeze_and_flush()
.await
@@ -1340,7 +1368,7 @@ async fn timeline_checkpoint_handler(
json_response(StatusCode::OK, ())
}
.instrument(info_span!("manual_checkpoint", %tenant_id, %timeline_id))
.instrument(info_span!("manual_checkpoint", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
.await
}
@@ -1348,12 +1376,12 @@ async fn timeline_download_remote_layers_handler_post(
mut request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let body: DownloadRemoteLayersTaskSpawnRequest = json_request(&mut request).await?;
check_permission(&request, Some(tenant_id))?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
match timeline.spawn_download_all_remote_layers(body).await {
Ok(st) => json_response(StatusCode::ACCEPTED, st),
Err(st) => json_response(StatusCode::CONFLICT, st),
@@ -1364,11 +1392,11 @@ async fn timeline_download_remote_layers_handler_get(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
let info = timeline
.get_download_all_remote_layers_task_info()
.context("task never started since last pageserver process start")
@@ -1414,9 +1442,9 @@ async fn getpage_at_lsn_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_id))?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
struct Key(crate::repository::Key);
@@ -1435,7 +1463,7 @@ async fn getpage_at_lsn_handler(
async {
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
let page = timeline.get(key.0, lsn, &ctx).await?;
@@ -1447,7 +1475,7 @@ async fn getpage_at_lsn_handler(
.unwrap(),
)
}
.instrument(info_span!("timeline_get", %tenant_id, %timeline_id))
.instrument(info_span!("timeline_get", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
.await
}
@@ -1455,9 +1483,9 @@ async fn timeline_collect_keyspace(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_id))?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
struct Partitioning {
keys: crate::keyspace::KeySpace,
@@ -1526,7 +1554,7 @@ async fn timeline_collect_keyspace(
async {
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
let at_lsn = at_lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
let keys = timeline
.collect_keyspace(at_lsn, &ctx)
@@ -1535,15 +1563,15 @@ async fn timeline_collect_keyspace(
json_response(StatusCode::OK, Partitioning { keys, at_lsn })
}
.instrument(info_span!("timeline_collect_keyspace", %tenant_id, %timeline_id))
.instrument(info_span!("timeline_collect_keyspace", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
.await
}
async fn active_timeline_of_active_tenant(
tenant_id: TenantId,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
) -> Result<Arc<Timeline>, ApiError> {
let tenant = mgr::get_tenant(tenant_id, true)?;
let tenant = mgr::get_tenant(tenant_shard_id, true)?;
tenant
.get_timeline(timeline_id, true)
.map_err(|e| ApiError::NotFound(e.into()))
@@ -1820,23 +1848,25 @@ pub fn make_router(
})
.get("/v1/tenant", |r| api_handler(r, tenant_list_handler))
.post("/v1/tenant", |r| api_handler(r, tenant_create_handler))
.get("/v1/tenant/:tenant_id", |r| api_handler(r, tenant_status))
.get("/v1/tenant/:tenant_shard_id", |r| {
api_handler(r, tenant_status)
})
.delete("/v1/tenant/:tenant_shard_id", |r| {
api_handler(r, tenant_delete_handler)
})
.get("/v1/tenant/:tenant_id/synthetic_size", |r| {
.get("/v1/tenant/:tenant_shard_id/synthetic_size", |r| {
api_handler(r, tenant_size_handler)
})
.put("/v1/tenant/config", |r| {
api_handler(r, update_tenant_config_handler)
})
.get("/v1/tenant/:tenant_id/config", |r| {
.get("/v1/tenant/:tenant_shard_id/config", |r| {
api_handler(r, get_tenant_config_handler)
})
.put("/v1/tenant/:tenant_shard_id/location_config", |r| {
api_handler(r, put_tenant_location_config_handler)
})
.get("/v1/tenant/:tenant_id/timeline", |r| {
.get("/v1/tenant/:tenant_shard_id/timeline", |r| {
api_handler(r, timeline_list_handler)
})
.post("/v1/tenant/:tenant_shard_id/timeline", |r| {
@@ -1857,47 +1887,50 @@ pub fn make_router(
.post("/v1/tenant/:tenant_id/ignore", |r| {
api_handler(r, tenant_ignore_handler)
})
.get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
.get("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
api_handler(r, timeline_detail_handler)
})
.get(
"/v1/tenant/:tenant_id/timeline/:timeline_id/get_lsn_by_timestamp",
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_lsn_by_timestamp",
|r| api_handler(r, get_lsn_by_timestamp_handler),
)
.get(
"/v1/tenant/:tenant_id/timeline/:timeline_id/get_timestamp_of_lsn",
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_timestamp_of_lsn",
|r| api_handler(r, get_timestamp_of_lsn_handler),
)
.put("/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc", |r| {
api_handler(r, timeline_gc_handler)
})
.put("/v1/tenant/:tenant_id/timeline/:timeline_id/compact", |r| {
testing_api_handler("run timeline compaction", r, timeline_compact_handler)
})
.put(
"/v1/tenant/:tenant_id/timeline/:timeline_id/checkpoint",
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/do_gc",
|r| api_handler(r, timeline_gc_handler),
)
.put(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
|r| testing_api_handler("run timeline compaction", r, timeline_compact_handler),
)
.put(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/checkpoint",
|r| testing_api_handler("run timeline checkpoint", r, timeline_checkpoint_handler),
)
.post(
"/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers",
|r| api_handler(r, timeline_download_remote_layers_handler_post),
)
.get(
"/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers",
|r| api_handler(r, timeline_download_remote_layers_handler_get),
)
.delete("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
api_handler(r, timeline_delete_handler)
})
.get("/v1/tenant/:tenant_id/timeline/:timeline_id/layer", |r| {
api_handler(r, layer_map_info_handler)
})
.get(
"/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer",
|r| api_handler(r, layer_map_info_handler),
)
.get(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
|r| api_handler(r, layer_download_handler),
)
.delete(
"/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
|r| api_handler(r, evict_timeline_layer_handler),
)
.put("/v1/disk_usage_eviction/run", |r| {
@@ -1906,18 +1939,19 @@ pub fn make_router(
.put("/v1/deletion_queue/flush", |r| {
api_handler(r, deletion_queue_flush)
})
.put("/v1/tenant/:tenant_id/break", |r| {
.put("/v1/tenant/:tenant_shard_id/break", |r| {
testing_api_handler("set tenant state to broken", r, handle_tenant_break)
})
.get("/v1/panic", |r| api_handler(r, always_panic_handler))
.post("/v1/tracing/event", |r| {
testing_api_handler("emit a tracing event", r, post_tracing_event_handler)
})
.get("/v1/tenant/:tenant_id/timeline/:timeline_id/getpage", |r| {
testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler)
})
.get(
"/v1/tenant/:tenant_id/timeline/:timeline_id/keyspace",
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/getpage",
|r| testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler),
)
.get(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/keyspace",
|r| testing_api_handler("read out the keyspace", r, timeline_collect_keyspace),
)
.any(handler_404))

View File

@@ -650,7 +650,7 @@ static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(||
"pageserver_evictions_with_low_residence_duration",
"If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
Residence duration is determined using the `residence_duration_data_source`.",
&["tenant_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
&["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
)
.expect("failed to define a metric")
});
@@ -714,10 +714,16 @@ impl EvictionsWithLowResidenceDurationBuilder {
}
}
fn build(&self, tenant_id: &str, timeline_id: &str) -> EvictionsWithLowResidenceDuration {
fn build(
&self,
tenant_id: &str,
shard_id: &str,
timeline_id: &str,
) -> EvictionsWithLowResidenceDuration {
let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
.get_metric_with_label_values(&[
tenant_id,
shard_id,
timeline_id,
self.data_source,
&EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
@@ -748,21 +754,24 @@ impl EvictionsWithLowResidenceDuration {
pub fn change_threshold(
&mut self,
tenant_id: &str,
shard_id: &str,
timeline_id: &str,
new_threshold: Duration,
) {
if new_threshold == self.threshold {
return;
}
let mut with_new =
EvictionsWithLowResidenceDurationBuilder::new(self.data_source, new_threshold)
.build(tenant_id, timeline_id);
let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
self.data_source,
new_threshold,
)
.build(tenant_id, shard_id, timeline_id);
std::mem::swap(self, &mut with_new);
with_new.remove(tenant_id, timeline_id);
with_new.remove(tenant_id, shard_id, timeline_id);
}
// This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
fn remove(&mut self, tenant_id: &str, timeline_id: &str) {
fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
let Some(_counter) = self.counter.take() else {
return;
};
@@ -771,6 +780,7 @@ impl EvictionsWithLowResidenceDuration {
let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
tenant_id,
shard_id,
timeline_id,
self.data_source,
&threshold,
@@ -1603,6 +1613,7 @@ impl StorageTimeMetrics {
#[derive(Debug)]
pub struct TimelineMetrics {
tenant_id: String,
shard_id: String,
timeline_id: String,
pub flush_time_histo: StorageTimeMetrics,
pub compact_time_histo: StorageTimeMetrics,
@@ -1623,11 +1634,12 @@ pub struct TimelineMetrics {
impl TimelineMetrics {
pub fn new(
tenant_id: &TenantId,
tenant_shard_id: &TenantShardId,
timeline_id: &TimelineId,
evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
) -> Self {
let tenant_id = tenant_id.to_string();
let tenant_id = tenant_shard_id.tenant_id.to_string();
let shard_id = format!("{}", tenant_shard_id.shard_slug());
let timeline_id = timeline_id.to_string();
let flush_time_histo =
StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
@@ -1664,11 +1676,12 @@ impl TimelineMetrics {
let evictions = EVICTIONS
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
.unwrap();
let evictions_with_low_residence_duration =
evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);
let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
.build(&tenant_id, &shard_id, &timeline_id);
TimelineMetrics {
tenant_id,
shard_id,
timeline_id,
flush_time_histo,
compact_time_histo,
@@ -1714,6 +1727,7 @@ impl Drop for TimelineMetrics {
fn drop(&mut self) {
let tenant_id = &self.tenant_id;
let timeline_id = &self.timeline_id;
let shard_id = &self.shard_id;
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
{
RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
@@ -1727,7 +1741,7 @@ impl Drop for TimelineMetrics {
self.evictions_with_low_residence_duration
.write()
.unwrap()
.remove(tenant_id, timeline_id);
.remove(tenant_id, shard_id, timeline_id);
// The following metrics are born outside of the TimelineMetrics lifecycle but still
// removed at the end of it. The idea is to have the metrics outlive the

View File

@@ -28,7 +28,7 @@
//! Page cache maps from a cache key to a buffer slot.
//! The cache key uniquely identifies the piece of data that is being cached.
//!
//! The cache key for **materialized pages** is [`TenantId`], [`TimelineId`], [`Key`], and [`Lsn`].
//! The cache key for **materialized pages** is [`TenantShardId`], [`TimelineId`], [`Key`], and [`Lsn`].
//! Use [`PageCache::memorize_materialized_page`] and [`PageCache::lookup_materialized_page`] for fill & access.
//!
//! The cache key for **immutable file** pages is [`FileId`] and a block number.
@@ -83,10 +83,8 @@ use std::{
use anyhow::Context;
use once_cell::sync::OnceCell;
use utils::{
id::{TenantId, TimelineId},
lsn::Lsn,
};
use pageserver_api::shard::TenantShardId;
use utils::{id::TimelineId, lsn::Lsn};
use crate::{
context::RequestContext,
@@ -154,7 +152,13 @@ enum CacheKey {
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
struct MaterializedPageHashKey {
tenant_id: TenantId,
/// Why is this TenantShardId rather than TenantId?
///
/// Usually, the materialized value of a page@lsn is identical on any shard in the same tenant. However, this
/// this not the case for certain internally-generated pages (e.g. relation sizes). In future, we may make this
/// key smaller by omitting the shard, if we ensure that reads to such pages always skip the cache, or are
/// special-cased in some other way.
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
key: Key,
}
@@ -378,7 +382,7 @@ impl PageCache {
/// returned page.
pub async fn lookup_materialized_page(
&self,
tenant_id: TenantId,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
key: &Key,
lsn: Lsn,
@@ -395,7 +399,7 @@ impl PageCache {
let mut cache_key = CacheKey::MaterializedPage {
hash_key: MaterializedPageHashKey {
tenant_id,
tenant_shard_id,
timeline_id,
key: *key,
},
@@ -436,7 +440,7 @@ impl PageCache {
///
pub async fn memorize_materialized_page(
&self,
tenant_id: TenantId,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
key: Key,
lsn: Lsn,
@@ -444,7 +448,7 @@ impl PageCache {
) -> anyhow::Result<()> {
let cache_key = CacheKey::MaterializedPage {
hash_key: MaterializedPageHashKey {
tenant_id,
tenant_shard_id,
timeline_id,
key,
},

View File

@@ -822,10 +822,7 @@ impl<'a> DatadirModification<'a> {
self.put(DBDIR_KEY, Value::Image(buf.into()));
// Create AuxFilesDirectory
let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
files: HashMap::new(),
})?;
self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
self.init_aux_dir()?;
let buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
xids: HashSet::new(),
@@ -933,10 +930,7 @@ impl<'a> DatadirModification<'a> {
self.put(DBDIR_KEY, Value::Image(buf.into()));
// Create AuxFilesDirectory as well
let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
files: HashMap::new(),
})?;
self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
self.init_aux_dir()?;
}
if r.is_none() {
// Create RelDirectory
@@ -1261,6 +1255,14 @@ impl<'a> DatadirModification<'a> {
Ok(())
}
pub fn init_aux_dir(&mut self) -> anyhow::Result<()> {
let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
files: HashMap::new(),
})?;
self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
Ok(())
}
pub async fn put_file(
&mut self,
path: &str,
@@ -1767,6 +1769,13 @@ const AUX_FILES_KEY: Key = Key {
// Reverse mappings for a few Keys.
// These are needed by WAL redo manager.
// AUX_FILES currently stores only data for logical replication (slots etc), and
// we don't preserve these on a branch because safekeepers can't follow timeline
// switch (and generally it likely should be optional), so ignore these.
pub fn is_inherited_key(key: Key) -> bool {
key != AUX_FILES_KEY
}
pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
Ok(match key.field1 {
0x00 => (

View File

@@ -42,6 +42,7 @@ use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex};
use futures::FutureExt;
use pageserver_api::shard::TenantShardId;
use tokio::runtime::Runtime;
use tokio::task::JoinHandle;
use tokio::task_local;
@@ -51,7 +52,7 @@ use tracing::{debug, error, info, warn};
use once_cell::sync::Lazy;
use utils::id::{TenantId, TimelineId};
use utils::id::TimelineId;
use crate::shutdown_pageserver;
@@ -317,7 +318,7 @@ struct PageServerTask {
/// Tasks may optionally be launched for a particular tenant/timeline, enabling
/// later cancelling tasks for that tenant/timeline in [`shutdown_tasks`]
tenant_id: Option<TenantId>,
tenant_shard_id: Option<TenantShardId>,
timeline_id: Option<TimelineId>,
mutable: Mutex<MutableTaskState>,
@@ -329,7 +330,7 @@ struct PageServerTask {
pub fn spawn<F>(
runtime: &tokio::runtime::Handle,
kind: TaskKind,
tenant_id: Option<TenantId>,
tenant_shard_id: Option<TenantShardId>,
timeline_id: Option<TimelineId>,
name: &str,
shutdown_process_on_error: bool,
@@ -345,7 +346,7 @@ where
kind,
name: name.to_string(),
cancel: cancel.clone(),
tenant_id,
tenant_shard_id,
timeline_id,
mutable: Mutex::new(MutableTaskState { join_handle: None }),
});
@@ -424,28 +425,28 @@ async fn task_finish(
Ok(Err(err)) => {
if shutdown_process_on_error {
error!(
"Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
task_name, task.tenant_id, task.timeline_id, err
"Shutting down: task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}",
task_name, task.tenant_shard_id, task.timeline_id, err
);
shutdown_process = true;
} else {
error!(
"Task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
task_name, task.tenant_id, task.timeline_id, err
"Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}",
task_name, task.tenant_shard_id, task.timeline_id, err
);
}
}
Err(err) => {
if shutdown_process_on_error {
error!(
"Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
task_name, task.tenant_id, task.timeline_id, err
"Shutting down: task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}",
task_name, task.tenant_shard_id, task.timeline_id, err
);
shutdown_process = true;
} else {
error!(
"Task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
task_name, task.tenant_id, task.timeline_id, err
"Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}",
task_name, task.tenant_shard_id, task.timeline_id, err
);
}
}
@@ -467,11 +468,11 @@ async fn task_finish(
///
/// Or to shut down all tasks for given timeline:
///
/// shutdown_tasks(None, Some(tenant_id), Some(timeline_id))
/// shutdown_tasks(None, Some(tenant_shard_id), Some(timeline_id))
///
pub async fn shutdown_tasks(
kind: Option<TaskKind>,
tenant_id: Option<TenantId>,
tenant_shard_id: Option<TenantShardId>,
timeline_id: Option<TimelineId>,
) {
let mut victim_tasks = Vec::new();
@@ -480,35 +481,35 @@ pub async fn shutdown_tasks(
let tasks = TASKS.lock().unwrap();
for task in tasks.values() {
if (kind.is_none() || Some(task.kind) == kind)
&& (tenant_id.is_none() || task.tenant_id == tenant_id)
&& (tenant_shard_id.is_none() || task.tenant_shard_id == tenant_shard_id)
&& (timeline_id.is_none() || task.timeline_id == timeline_id)
{
task.cancel.cancel();
victim_tasks.push((
Arc::clone(task),
task.kind,
task.tenant_id,
task.tenant_shard_id,
task.timeline_id,
));
}
}
}
let log_all = kind.is_none() && tenant_id.is_none() && timeline_id.is_none();
let log_all = kind.is_none() && tenant_shard_id.is_none() && timeline_id.is_none();
for (task, task_kind, tenant_id, timeline_id) in victim_tasks {
for (task, task_kind, tenant_shard_id, timeline_id) in victim_tasks {
let join_handle = {
let mut task_mut = task.mutable.lock().unwrap();
task_mut.join_handle.take()
};
if let Some(mut join_handle) = join_handle {
if log_all {
if tenant_id.is_none() {
if tenant_shard_id.is_none() {
// there are quite few of these
info!(name = task.name, kind = ?task_kind, "stopping global task");
} else {
// warn to catch these in tests; there shouldn't be any
warn!(name = task.name, tenant_id = ?tenant_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
warn!(name = task.name, tenant_shard_id = ?tenant_shard_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
}
}
if tokio::time::timeout(std::time::Duration::from_secs(1), &mut join_handle)
@@ -517,12 +518,13 @@ pub async fn shutdown_tasks(
{
// allow some time to elapse before logging to cut down the number of log
// lines.
info!("waiting for {} to shut down", task.name);
info!("waiting for task {} to shut down", task.name);
// we never handled this return value, but:
// - we don't deschedule which would lead to is_cancelled
// - panics are already logged (is_panicked)
// - task errors are already logged in the wrapper
let _ = join_handle.await;
info!("task {} completed", task.name);
}
} else {
// Possibly one of:

View File

@@ -608,7 +608,7 @@ impl Tenant {
task_mgr::spawn(
&tokio::runtime::Handle::current(),
TaskKind::Attach,
Some(tenant_shard_id.tenant_id),
Some(tenant_shard_id),
None,
"attach tenant",
false,
@@ -1917,7 +1917,7 @@ impl Tenant {
//
// this will additionally shutdown and await all timeline tasks.
tracing::debug!("Waiting for tasks...");
task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id.tenant_id), None).await;
task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id), None).await;
// Wait for any in-flight operations to complete
self.gate.close().await;

View File

@@ -463,7 +463,7 @@ impl DeleteTenantFlow {
task_mgr::spawn(
task_mgr::BACKGROUND_RUNTIME.handle(),
TaskKind::TimelineDeletionWorker,
Some(tenant_shard_id.tenant_id),
Some(tenant_shard_id),
None,
"tenant_delete",
false,
@@ -550,7 +550,7 @@ impl DeleteTenantFlow {
// we encounter an InProgress marker, yield the barrier it contains and wait on it.
let barrier = {
let mut locked = tenants.write().unwrap();
let removed = locked.remove(&tenant.tenant_shard_id.tenant_id);
let removed = locked.remove(tenant.tenant_shard_id);
// FIXME: we should not be modifying this from outside of mgr.rs.
// This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)

View File

@@ -98,33 +98,6 @@ pub(crate) enum TenantsMap {
ShuttingDown(BTreeMap<TenantShardId, TenantSlot>),
}
/// Helper for mapping shard-unaware functions to a sharding-aware map
/// TODO(sharding): all users of this must be made shard-aware.
fn exactly_one_or_none<'a>(
map: &'a BTreeMap<TenantShardId, TenantSlot>,
tenant_id: &TenantId,
) -> Option<(&'a TenantShardId, &'a TenantSlot)> {
let mut slots = map.range(TenantShardId::tenant_range(*tenant_id));
// Retrieve the first two slots in the range: if both are populated, we must panic because the caller
// needs a shard-naive view of the world in which only one slot can exist for a TenantId at a time.
let slot_a = slots.next();
let slot_b = slots.next();
match (slot_a, slot_b) {
(None, None) => None,
(Some(slot), None) => {
// Exactly one matching slot
Some(slot)
}
(Some(_slot_a), Some(_slot_b)) => {
// Multiple shards for this tenant: cannot handle this yet.
// TODO(sharding): callers of get() should be shard-aware.
todo!("Attaching multiple shards in teh same tenant to the same pageserver")
}
(None, Some(_)) => unreachable!(),
}
}
pub(crate) enum TenantsMapRemoveResult {
Occupied(TenantSlot),
Vacant,
@@ -147,12 +120,11 @@ impl TenantsMap {
/// Convenience function for typical usage, where we want to get a `Tenant` object, for
/// working with attached tenants. If the TenantId is in the map but in Secondary state,
/// None is returned.
pub(crate) fn get(&self, tenant_id: &TenantId) -> Option<&Arc<Tenant>> {
pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<Tenant>> {
match self {
TenantsMap::Initializing => None,
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
// TODO(sharding): callers of get() should be shard-aware.
exactly_one_or_none(m, tenant_id).and_then(|(_, slot)| slot.get_attached())
m.get(tenant_shard_id).and_then(|slot| slot.get_attached())
}
}
}
@@ -204,25 +176,19 @@ impl TenantsMap {
///
/// The normal way to remove a tenant is using a SlotGuard, which will gracefully remove the guarded
/// slot if the enclosed tenant is shutdown.
pub(crate) fn remove(&mut self, tenant_id: &TenantId) -> TenantsMapRemoveResult {
pub(crate) fn remove(&mut self, tenant_shard_id: TenantShardId) -> TenantsMapRemoveResult {
use std::collections::btree_map::Entry;
match self {
TenantsMap::Initializing => TenantsMapRemoveResult::Vacant,
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
let key = exactly_one_or_none(m, tenant_id).map(|(k, _)| *k);
match key {
Some(key) => match m.entry(key) {
Entry::Occupied(entry) => match entry.get() {
TenantSlot::InProgress(barrier) => {
TenantsMapRemoveResult::InProgress(barrier.clone())
}
_ => TenantsMapRemoveResult::Occupied(entry.remove()),
},
Entry::Vacant(_entry) => TenantsMapRemoveResult::Vacant,
},
None => TenantsMapRemoveResult::Vacant,
}
}
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => match m.entry(tenant_shard_id) {
Entry::Occupied(entry) => match entry.get() {
TenantSlot::InProgress(barrier) => {
TenantsMapRemoveResult::InProgress(barrier.clone())
}
_ => TenantsMapRemoveResult::Occupied(entry.remove()),
},
Entry::Vacant(_entry) => TenantsMapRemoveResult::Vacant,
},
}
}
@@ -822,14 +788,16 @@ pub(crate) async fn set_new_tenant_config(
new_tenant_conf: TenantConfOpt,
tenant_id: TenantId,
) -> Result<(), SetNewTenantConfigError> {
// Legacy API: does not support sharding
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
info!("configuring tenant {tenant_id}");
let tenant = get_tenant(tenant_id, true)?;
let tenant = get_tenant(tenant_shard_id, true)?;
// This is a legacy API that only operates on attached tenants: the preferred
// API to use is the location_config/ endpoint, which lets the caller provide
// the full LocationConf.
let location_conf = LocationConf::attached_single(new_tenant_conf, tenant.generation);
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf)
.await
@@ -1143,14 +1111,11 @@ pub(crate) enum GetTenantError {
///
/// This method is cancel-safe.
pub(crate) fn get_tenant(
tenant_id: TenantId,
tenant_shard_id: TenantShardId,
active_only: bool,
) -> Result<Arc<Tenant>, GetTenantError> {
let locked = TENANTS.read().unwrap();
// TODO(sharding): make all callers of get_tenant shard-aware
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;
match peek_slot {
@@ -1162,14 +1127,18 @@ pub(crate) fn get_tenant(
TenantState::Active => Ok(Arc::clone(tenant)),
_ => {
if active_only {
Err(GetTenantError::NotActive(tenant_id))
Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
} else {
Ok(Arc::clone(tenant))
}
}
},
Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_id)),
None | Some(TenantSlot::Secondary) => Err(GetTenantError::NotFound(tenant_id)),
Some(TenantSlot::InProgress(_)) => {
Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
}
None | Some(TenantSlot::Secondary) => {
Err(GetTenantError::NotFound(tenant_shard_id.tenant_id))
}
}
}
@@ -1542,7 +1511,8 @@ pub(crate) enum TenantMapListError {
///
/// Get list of tenants, for the mgmt API
///
pub(crate) async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, TenantMapListError> {
pub(crate) async fn list_tenants() -> Result<Vec<(TenantShardId, TenantState)>, TenantMapListError>
{
let tenants = TENANTS.read().unwrap();
let m = match &*tenants {
TenantsMap::Initializing => return Err(TenantMapListError::Initializing),
@@ -1550,12 +1520,10 @@ pub(crate) async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, Tenan
};
Ok(m.iter()
.filter_map(|(id, tenant)| match tenant {
TenantSlot::Attached(tenant) => Some((id, tenant.current_state())),
TenantSlot::Attached(tenant) => Some((*id, tenant.current_state())),
TenantSlot::Secondary => None,
TenantSlot::InProgress(_) => None,
})
// TODO(sharding): make callers of this function shard-aware
.map(|(k, v)| (k.tenant_id, v))
.collect())
}
@@ -2089,21 +2057,19 @@ use {
};
pub(crate) async fn immediate_gc(
tenant_id: TenantId,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
gc_req: TimelineGcRequest,
cancel: CancellationToken,
ctx: &RequestContext,
) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
let guard = TENANTS.read().unwrap();
let tenant = guard
.get(&tenant_id)
.map(Arc::clone)
.with_context(|| format!("tenant {tenant_id}"))
.map_err(|e| ApiError::NotFound(e.into()))?;
// TODO(sharding): make callers of this function shard-aware
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
let tenant = guard
.get(&tenant_shard_id)
.map(Arc::clone)
.with_context(|| format!("tenant {tenant_shard_id}"))
.map_err(|e| ApiError::NotFound(e.into()))?;
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
// Use tenant's pitr setting
@@ -2116,9 +2082,9 @@ pub(crate) async fn immediate_gc(
task_mgr::spawn(
&tokio::runtime::Handle::current(),
TaskKind::GarbageCollector,
Some(tenant_id),
Some(tenant_shard_id),
Some(timeline_id),
&format!("timeline_gc_handler garbage collection run for tenant {tenant_id} timeline {timeline_id}"),
&format!("timeline_gc_handler garbage collection run for tenant {tenant_shard_id} timeline {timeline_id}"),
false,
async move {
fail::fail_point!("immediate_gc_task_pre");

View File

@@ -1223,7 +1223,7 @@ impl RemoteTimelineClient {
task_mgr::spawn(
&self.runtime,
TaskKind::RemoteUploadTask,
Some(self.tenant_shard_id.tenant_id),
Some(self.tenant_shard_id),
Some(self.timeline_id),
"remote upload",
false,

View File

@@ -837,7 +837,7 @@ impl LayerInner {
crate::task_mgr::spawn(
&tokio::runtime::Handle::current(),
crate::task_mgr::TaskKind::RemoteDownloadTask,
Some(self.desc.tenant_shard_id.tenant_id),
Some(self.desc.tenant_shard_id),
Some(self.desc.timeline_id),
&task_name,
false,

View File

@@ -87,13 +87,13 @@ pub fn start_background_loops(
tenant: &Arc<Tenant>,
background_jobs_can_start: Option<&completion::Barrier>,
) {
let tenant_id = tenant.tenant_shard_id.tenant_id;
let tenant_shard_id = tenant.tenant_shard_id;
task_mgr::spawn(
BACKGROUND_RUNTIME.handle(),
TaskKind::Compaction,
Some(tenant_id),
Some(tenant_shard_id),
None,
&format!("compactor for tenant {tenant_id}"),
&format!("compactor for tenant {tenant_shard_id}"),
false,
{
let tenant = Arc::clone(tenant);
@@ -105,7 +105,7 @@ pub fn start_background_loops(
_ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
};
compaction_loop(tenant, cancel)
.instrument(info_span!("compaction_loop", tenant_id = %tenant_id))
.instrument(info_span!("compaction_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
.await;
Ok(())
}
@@ -114,9 +114,9 @@ pub fn start_background_loops(
task_mgr::spawn(
BACKGROUND_RUNTIME.handle(),
TaskKind::GarbageCollector,
Some(tenant_id),
Some(tenant_shard_id),
None,
&format!("garbage collector for tenant {tenant_id}"),
&format!("garbage collector for tenant {tenant_shard_id}"),
false,
{
let tenant = Arc::clone(tenant);
@@ -128,7 +128,7 @@ pub fn start_background_loops(
_ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
};
gc_loop(tenant, cancel)
.instrument(info_span!("gc_loop", tenant_id = %tenant_id))
.instrument(info_span!("gc_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
.await;
Ok(())
}

View File

@@ -66,7 +66,7 @@ use crate::metrics::{
TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
};
use crate::pgdatadir_mapping::LsnForTimestamp;
use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
use crate::pgdatadir_mapping::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key};
use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
use crate::tenant::config::{EvictionPolicy, TenantConfOpt};
use pageserver_api::reltag::RelTag;
@@ -77,7 +77,7 @@ use postgres_ffi::to_pg_timestamp;
use utils::{
completion,
generation::Generation,
id::{TenantId, TimelineId},
id::TimelineId,
lsn::{AtomicLsn, Lsn, RecordLsn},
seqwait::SeqWait,
simple_rcu::{Rcu, RcuReadGuard},
@@ -926,7 +926,7 @@ impl Timeline {
tracing::debug!("Waiting for WalReceiverManager...");
task_mgr::shutdown_tasks(
Some(TaskKind::WalReceiverManager),
Some(self.tenant_shard_id.tenant_id),
Some(self.tenant_shard_id),
Some(self.timeline_id),
)
.await;
@@ -977,7 +977,7 @@ impl Timeline {
// Shut down the layer flush task before the remote client, as one depends on the other
task_mgr::shutdown_tasks(
Some(TaskKind::LayerFlushTask),
Some(self.tenant_shard_id.tenant_id),
Some(self.tenant_shard_id),
Some(self.timeline_id),
)
.await;
@@ -995,12 +995,7 @@ impl Timeline {
tracing::debug!("Waiting for tasks...");
task_mgr::shutdown_tasks(
None,
Some(self.tenant_shard_id.tenant_id),
Some(self.timeline_id),
)
.await;
task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id), Some(self.timeline_id)).await;
// Finally wait until any gate-holders are complete
self.gate.close().await;
@@ -1314,16 +1309,20 @@ impl Timeline {
&self.conf.default_tenant_conf,
);
// TODO(sharding): make evictions state shard aware
// (https://github.com/neondatabase/neon/issues/5953)
let tenant_id_str = self.tenant_shard_id.tenant_id.to_string();
let shard_id_str = format!("{}", self.tenant_shard_id.shard_slug());
let timeline_id_str = self.timeline_id.to_string();
self.metrics
.evictions_with_low_residence_duration
.write()
.unwrap()
.change_threshold(&tenant_id_str, &timeline_id_str, new_threshold);
.change_threshold(
&tenant_id_str,
&shard_id_str,
&timeline_id_str,
new_threshold,
);
}
}
@@ -1395,7 +1394,7 @@ impl Timeline {
ancestor_lsn: metadata.ancestor_lsn(),
metrics: TimelineMetrics::new(
&tenant_shard_id.tenant_id,
&tenant_shard_id,
&timeline_id,
crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
"mtime",
@@ -1496,7 +1495,7 @@ impl Timeline {
task_mgr::spawn(
task_mgr::BACKGROUND_RUNTIME.handle(),
task_mgr::TaskKind::LayerFlushTask,
Some(self.tenant_shard_id.tenant_id),
Some(self.tenant_shard_id),
Some(self.timeline_id),
"layer flush task",
false,
@@ -1847,7 +1846,7 @@ impl Timeline {
task_mgr::spawn(
task_mgr::BACKGROUND_RUNTIME.handle(),
task_mgr::TaskKind::InitialLogicalSizeCalculation,
Some(self.tenant_shard_id.tenant_id),
Some(self.tenant_shard_id),
Some(self.timeline_id),
"initial size calculation",
false,
@@ -2020,7 +2019,7 @@ impl Timeline {
task_mgr::spawn(
task_mgr::BACKGROUND_RUNTIME.handle(),
task_mgr::TaskKind::OndemandLogicalSizeCalculation,
Some(self.tenant_shard_id.tenant_id),
Some(self.tenant_shard_id),
Some(self.timeline_id),
"ondemand logical size calculation",
false,
@@ -2279,7 +2278,7 @@ impl Timeline {
}
// Recurse into ancestor if needed
if Lsn(cont_lsn.0 - 1) <= timeline.ancestor_lsn {
if is_inherited_key(key) && Lsn(cont_lsn.0 - 1) <= timeline.ancestor_lsn {
trace!(
"going into ancestor {}, cont_lsn is {}",
timeline.ancestor_lsn,
@@ -2461,13 +2460,7 @@ impl Timeline {
// FIXME: It's pointless to check the cache for things that are not 8kB pages.
// We should look at the key to determine if it's a cacheable object
let (lsn, read_guard) = cache
.lookup_materialized_page(
self.tenant_shard_id.tenant_id,
self.timeline_id,
key,
lsn,
ctx,
)
.lookup_materialized_page(self.tenant_shard_id, self.timeline_id, key, lsn, ctx)
.await?;
let img = Bytes::from(read_guard.to_vec());
Some((lsn, img))
@@ -3209,7 +3202,7 @@ impl DurationRecorder {
#[derive(Default)]
struct CompactLevel0Phase1StatsBuilder {
version: Option<u64>,
tenant_id: Option<TenantId>,
tenant_id: Option<TenantShardId>,
timeline_id: Option<TimelineId>,
read_lock_acquisition_micros: DurationRecorder,
read_lock_held_spawn_blocking_startup_micros: DurationRecorder,
@@ -3226,7 +3219,7 @@ struct CompactLevel0Phase1StatsBuilder {
#[derive(serde::Serialize)]
struct CompactLevel0Phase1Stats {
version: u64,
tenant_id: TenantId,
tenant_id: TenantShardId,
timeline_id: TimelineId,
read_lock_acquisition_micros: RecordedDuration,
read_lock_held_spawn_blocking_startup_micros: RecordedDuration,
@@ -3745,7 +3738,7 @@ impl Timeline {
let ctx = ctx.attached_child();
let mut stats = CompactLevel0Phase1StatsBuilder {
version: Some(2),
tenant_id: Some(self.tenant_shard_id.tenant_id),
tenant_id: Some(self.tenant_shard_id),
timeline_id: Some(self.timeline_id),
..Default::default()
};
@@ -4207,7 +4200,7 @@ impl Timeline {
let cache = page_cache::get();
if let Err(e) = cache
.memorize_materialized_page(
self.tenant_shard_id.tenant_id,
self.tenant_shard_id,
self.timeline_id,
key,
last_rec_lsn,
@@ -4251,7 +4244,7 @@ impl Timeline {
let task_id = task_mgr::spawn(
task_mgr::BACKGROUND_RUNTIME.handle(),
task_mgr::TaskKind::DownloadAllRemoteLayers,
Some(self.tenant_shard_id.tenant_id),
Some(self.tenant_shard_id),
Some(self.timeline_id),
"download all remote layers task",
false,

View File

@@ -43,7 +43,7 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
// Shut down the layer flush task before the remote client, as one depends on the other
task_mgr::shutdown_tasks(
Some(TaskKind::LayerFlushTask),
Some(timeline.tenant_shard_id.tenant_id),
Some(timeline.tenant_shard_id),
Some(timeline.timeline_id),
)
.await;
@@ -71,7 +71,7 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
info!("waiting for timeline tasks to shutdown");
task_mgr::shutdown_tasks(
None,
Some(timeline.tenant_shard_id.tenant_id),
Some(timeline.tenant_shard_id),
Some(timeline.timeline_id),
)
.await;
@@ -528,7 +528,7 @@ impl DeleteTimelineFlow {
task_mgr::spawn(
task_mgr::BACKGROUND_RUNTIME.handle(),
TaskKind::TimelineDeletionWorker,
Some(tenant_shard_id.tenant_id),
Some(tenant_shard_id),
Some(timeline_id),
"timeline_delete",
false,

View File

@@ -60,7 +60,7 @@ impl Timeline {
task_mgr::spawn(
BACKGROUND_RUNTIME.handle(),
TaskKind::Eviction,
Some(self.tenant_shard_id.tenant_id),
Some(self.tenant_shard_id),
Some(self.timeline_id),
&format!(
"layer eviction for {}/{}",
@@ -343,7 +343,7 @@ impl Timeline {
// Make one of the tenant's timelines draw the short straw and run the calculation.
// The others wait until the calculation is done so that they take into account the
// imitated accesses that the winner made.
let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id.tenant_id, true) {
let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id, true) {
Ok(t) => t,
Err(_) => {
return ControlFlow::Break(());

View File

@@ -30,6 +30,7 @@ use crate::tenant::timeline::walreceiver::connection_manager::{
connection_manager_loop_step, ConnectionManagerState,
};
use pageserver_api::shard::TenantShardId;
use std::future::Future;
use std::num::NonZeroU64;
use std::ops::ControlFlow;
@@ -41,7 +42,7 @@ use tokio::sync::watch;
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::id::TenantTimelineId;
use utils::id::TimelineId;
use self::connection_manager::ConnectionManagerStatus;
@@ -60,7 +61,8 @@ pub struct WalReceiverConf {
}
pub struct WalReceiver {
timeline: TenantTimelineId,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
manager_status: Arc<std::sync::RwLock<Option<ConnectionManagerStatus>>>,
}
@@ -71,7 +73,7 @@ impl WalReceiver {
mut broker_client: BrokerClientChannel,
ctx: &RequestContext,
) -> Self {
let tenant_id = timeline.tenant_shard_id.tenant_id;
let tenant_shard_id = timeline.tenant_shard_id;
let timeline_id = timeline.timeline_id;
let walreceiver_ctx =
ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error);
@@ -81,9 +83,9 @@ impl WalReceiver {
task_mgr::spawn(
WALRECEIVER_RUNTIME.handle(),
TaskKind::WalReceiverManager,
Some(tenant_id),
Some(timeline.tenant_shard_id),
Some(timeline_id),
&format!("walreceiver for timeline {tenant_id}/{timeline_id}"),
&format!("walreceiver for timeline {tenant_shard_id}/{timeline_id}"),
false,
async move {
debug_assert_current_span_has_tenant_and_timeline_id();
@@ -117,11 +119,12 @@ impl WalReceiver {
*loop_status.write().unwrap() = None;
Ok(())
}
.instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_id, timeline_id = %timeline_id))
.instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), timeline_id = %timeline_id))
);
Self {
timeline: TenantTimelineId::new(tenant_id, timeline_id),
tenant_shard_id,
timeline_id,
manager_status,
}
}
@@ -129,8 +132,8 @@ impl WalReceiver {
pub async fn stop(self) {
task_mgr::shutdown_tasks(
Some(TaskKind::WalReceiverManager),
Some(self.timeline.tenant_id),
Some(self.timeline.timeline_id),
Some(self.tenant_shard_id),
Some(self.timeline_id),
)
.await;
}

View File

@@ -163,7 +163,7 @@ pub(super) async fn handle_walreceiver_connection(
task_mgr::spawn(
WALRECEIVER_RUNTIME.handle(),
TaskKind::WalReceiverConnectionPoller,
Some(timeline.tenant_shard_id.tenant_id),
Some(timeline.tenant_shard_id),
Some(timeline.timeline_id),
"walreceiver connection",
false,

View File

@@ -458,8 +458,10 @@ impl<'a> WalIngest<'a> {
&& decoded.xl_rmid == pg_constants::RM_XLOG_ID
&& (decoded.xl_info == pg_constants::XLOG_FPI
|| decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
// compression of WAL is not yet supported: fall back to storing the original WAL record
// compression of WAL is not yet supported: fall back to storing the original WAL record
&& !postgres_ffi::bkpimage_is_compressed(blk.bimg_info, self.timeline.pg_version)?
// do not materialize null pages because them most likely be soon replaced with real data
&& blk.bimg_len != 0
{
// Extract page image from FPI record
let img_len = blk.bimg_len as usize;

View File

@@ -62,6 +62,9 @@ pub enum AuthErrorImpl {
Please add it to the allowed list in the Neon console."
)]
IpAddressNotAllowed,
#[error("Too many connections to this endpoint. Please try again later.")]
TooManyConnections,
}
#[derive(Debug, Error)]
@@ -80,6 +83,10 @@ impl AuthError {
pub fn ip_address_not_allowed() -> Self {
AuthErrorImpl::IpAddressNotAllowed.into()
}
pub fn too_many_connections() -> Self {
AuthErrorImpl::TooManyConnections.into()
}
}
impl<E: Into<AuthErrorImpl>> From<E> for AuthError {
@@ -102,6 +109,7 @@ impl UserFacingError for AuthError {
MissingEndpointName => self.to_string(),
Io(_) => "Internal error".to_string(),
IpAddressNotAllowed => self.to_string(),
TooManyConnections => self.to_string(),
}
}
}

View File

@@ -166,7 +166,7 @@ impl TryFrom<ClientCredentials> for ComputeUserInfo {
/// All authentication flows will emit an AuthenticationOk message if successful.
async fn auth_quirks(
api: &impl console::Api,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
creds: ClientCredentials,
client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
allow_cleartext: bool,
@@ -235,7 +235,7 @@ async fn auth_quirks(
/// only if authentication was successfuly.
async fn auth_and_wake_compute(
api: &impl console::Api,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
creds: ClientCredentials,
client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
allow_cleartext: bool,
@@ -314,7 +314,7 @@ impl<'a> BackendType<'a, ClientCredentials> {
#[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]
pub async fn authenticate(
self,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
allow_cleartext: bool,
config: &'static AuthenticationConfig,
@@ -387,7 +387,7 @@ impl<'a> BackendType<'a, ClientCredentials> {
impl BackendType<'_, ComputeUserInfo> {
pub async fn get_allowed_ips(
&self,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
use BackendType::*;
match self {
@@ -404,7 +404,7 @@ impl BackendType<'_, ComputeUserInfo> {
/// The link auth flow doesn't support this, so we return [`None`] in that case.
pub async fn wake_compute(
&self,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
) -> Result<Option<CachedNodeInfo>, console::errors::WakeComputeError> {
use BackendType::*;

View File

@@ -112,6 +112,9 @@ struct ProxyCliArgs {
/// Timeout for rate limiter. If it didn't manage to aquire a permit in this time, it will return an error.
#[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
rate_limiter_timeout: tokio::time::Duration,
/// Endpoint rate limiter max number of requests per second.
#[clap(long, default_value_t = 300)]
endpoint_rps_limit: u32,
/// Initial limit for dynamic rate limiter. Makes sense only if `rate_limit_algorithm` is *not* `None`.
#[clap(long, default_value_t = 100)]
initial_limit: usize,
@@ -317,6 +320,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
authentication_config,
require_client_ip: args.require_client_ip,
disable_ip_check_for_http: args.disable_ip_check_for_http,
endpoint_rps_limit: args.endpoint_rps_limit,
}));
Ok(config)

View File

@@ -20,6 +20,7 @@ pub struct ProxyConfig {
pub authentication_config: AuthenticationConfig,
pub require_client_ip: bool,
pub disable_ip_check_for_http: bool,
pub endpoint_rps_limit: u32,
}
#[derive(Debug)]

View File

@@ -196,15 +196,15 @@ pub mod errors {
}
/// Extra query params we'd like to pass to the console.
pub struct ConsoleReqExtra<'a> {
pub struct ConsoleReqExtra {
/// A unique identifier for a connection.
pub session_id: uuid::Uuid,
/// Name of client application, if set.
pub application_name: Option<&'a str>,
pub application_name: String,
pub options: Vec<(String, String)>,
}
impl<'a> ConsoleReqExtra<'a> {
impl ConsoleReqExtra {
// https://swagger.io/docs/specification/serialization/ DeepObject format
// paramName[prop1]=value1&paramName[prop2]=value2&....
pub fn options_as_deep_object(&self) -> Vec<(String, String)> {
@@ -259,20 +259,20 @@ pub trait Api {
/// Get the client's auth secret for authentication.
async fn get_auth_info(
&self,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<AuthInfo, errors::GetAuthInfoError>;
async fn get_allowed_ips(
&self,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<Arc<Vec<String>>, errors::GetAuthInfoError>;
/// Wake up the compute node and return the corresponding connection info.
async fn wake_compute(
&self,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<CachedNodeInfo, errors::WakeComputeError>;
}

View File

@@ -144,7 +144,7 @@ impl super::Api for Api {
#[tracing::instrument(skip_all)]
async fn get_auth_info(
&self,
_extra: &ConsoleReqExtra<'_>,
_extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<AuthInfo, GetAuthInfoError> {
self.do_get_auth_info(creds).await
@@ -152,7 +152,7 @@ impl super::Api for Api {
async fn get_allowed_ips(
&self,
_extra: &ConsoleReqExtra<'_>,
_extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
Ok(Arc::new(self.do_get_auth_info(creds).await?.allowed_ips))
@@ -161,7 +161,7 @@ impl super::Api for Api {
#[tracing::instrument(skip_all)]
async fn wake_compute(
&self,
_extra: &ConsoleReqExtra<'_>,
_extra: &ConsoleReqExtra,
_creds: &ComputeUserInfo,
) -> Result<CachedNodeInfo, WakeComputeError> {
self.do_wake_compute()

View File

@@ -48,7 +48,7 @@ impl Api {
async fn do_get_auth_info(
&self,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<AuthInfo, GetAuthInfoError> {
let request_id = uuid::Uuid::new_v4().to_string();
@@ -60,9 +60,9 @@ impl Api {
.header("Authorization", format!("Bearer {}", &self.jwt))
.query(&[("session_id", extra.session_id)])
.query(&[
("application_name", extra.application_name),
("project", Some(&creds.endpoint)),
("role", Some(&creds.inner.user)),
("application_name", extra.application_name.as_str()),
("project", creds.endpoint.as_str()),
("role", creds.inner.user.as_str()),
])
.build()?;
@@ -101,7 +101,7 @@ impl Api {
async fn do_wake_compute(
&self,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<NodeInfo, WakeComputeError> {
let request_id = uuid::Uuid::new_v4().to_string();
@@ -113,8 +113,8 @@ impl Api {
.header("Authorization", format!("Bearer {}", &self.jwt))
.query(&[("session_id", extra.session_id)])
.query(&[
("application_name", extra.application_name),
("project", Some(&creds.endpoint)),
("application_name", extra.application_name.as_str()),
("project", creds.endpoint.as_str()),
]);
request_builder = if extra.options.is_empty() {
@@ -161,7 +161,7 @@ impl super::Api for Api {
#[tracing::instrument(skip_all)]
async fn get_auth_info(
&self,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<AuthInfo, GetAuthInfoError> {
self.do_get_auth_info(extra, creds).await
@@ -169,7 +169,7 @@ impl super::Api for Api {
async fn get_allowed_ips(
&self,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
let key: &str = &creds.endpoint;
@@ -192,7 +192,7 @@ impl super::Api for Api {
#[tracing::instrument(skip_all)]
async fn wake_compute(
&self,
extra: &ConsoleReqExtra<'_>,
extra: &ConsoleReqExtra,
creds: &ComputeUserInfo,
) -> Result<CachedNodeInfo, WakeComputeError> {
let key: &str = &creds.inner.cache_key;

View File

@@ -9,6 +9,7 @@ use crate::{
console::{self, errors::WakeComputeError, messages::MetricsAuxInfo, Api},
http::StatusCode,
protocol2::WithClientIp,
rate_limiter::EndpointRateLimiter,
stream::{PqStream, Stream},
usage_metrics::{Ids, USAGE_METRICS},
};
@@ -307,6 +308,7 @@ pub async fn task_main(
let connections = tokio_util::task::task_tracker::TaskTracker::new();
let cancel_map = Arc::new(CancelMap::default());
let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(config.endpoint_rps_limit));
while let Some(accept_result) =
run_until_cancelled(listener.accept(), &cancellation_token).await
@@ -315,6 +317,8 @@ pub async fn task_main(
let session_id = uuid::Uuid::new_v4();
let cancel_map = Arc::clone(&cancel_map);
let endpoint_rate_limiter = endpoint_rate_limiter.clone();
connections.spawn(
async move {
info!("accepted postgres client connection");
@@ -340,6 +344,7 @@ pub async fn task_main(
socket,
ClientMode::Tcp,
peer_addr.ip(),
endpoint_rate_limiter,
)
.await
}
@@ -415,6 +420,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
stream: S,
mode: ClientMode,
peer_addr: IpAddr,
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
) -> anyhow::Result<()> {
info!(
protocol = mode.protocol_label(),
@@ -463,6 +469,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
&params,
session_id,
mode.allow_self_signed_compute(config),
endpoint_rate_limiter,
);
cancel_map
.with_session(|session| client.connect_to_db(session, mode, &config.authentication_config))
@@ -671,7 +678,7 @@ fn report_error(e: &WakeComputeError, retry: bool) {
pub async fn connect_to_compute<M: ConnectMechanism>(
mechanism: &M,
mut node_info: console::CachedNodeInfo,
extra: &console::ConsoleReqExtra<'_>,
extra: &console::ConsoleReqExtra,
creds: &auth::BackendType<'_, auth::backend::ComputeUserInfo>,
mut latency_timer: LatencyTimer,
) -> Result<M::Connection, M::Error>
@@ -928,6 +935,8 @@ struct Client<'a, S> {
session_id: uuid::Uuid,
/// Allow self-signed certificates (for testing).
allow_self_signed_compute: bool,
/// Rate limiter for endpoints
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
}
impl<'a, S> Client<'a, S> {
@@ -938,6 +947,7 @@ impl<'a, S> Client<'a, S> {
params: &'a StartupMessageParams,
session_id: uuid::Uuid,
allow_self_signed_compute: bool,
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
) -> Self {
Self {
stream,
@@ -945,6 +955,7 @@ impl<'a, S> Client<'a, S> {
params,
session_id,
allow_self_signed_compute,
endpoint_rate_limiter,
}
}
}
@@ -966,15 +977,29 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
params,
session_id,
allow_self_signed_compute,
endpoint_rate_limiter,
} = self;
// check rate limit
if let Some(ep) = creds.get_endpoint() {
if !endpoint_rate_limiter.check(ep) {
return stream
.throw_error(auth::AuthError::too_many_connections())
.await;
}
}
let proto = mode.protocol_label();
let extra = console::ConsoleReqExtra {
session_id, // aka this connection's id
application_name: params.get("application_name"),
application_name: format!(
"{}/{}",
params.get("application_name").unwrap_or_default(),
proto
),
options: neon_options(params),
};
let mut latency_timer = LatencyTimer::new(mode.protocol_label());
let mut latency_timer = LatencyTimer::new(proto);
let user = creds.get_user().to_owned();
let auth_result = match creds
@@ -1012,7 +1037,6 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
.or_else(|e| stream.throw_error(e))
.await?;
let proto = mode.protocol_label();
NUM_DB_CONNECTIONS_OPENED_COUNTER
.with_label_values(&[proto])
.inc();

View File

@@ -484,13 +484,13 @@ fn helper_create_connect_info(
mechanism: &TestConnectMechanism,
) -> (
CachedNodeInfo,
console::ConsoleReqExtra<'static>,
console::ConsoleReqExtra,
auth::BackendType<'_, ComputeUserInfo>,
) {
let cache = helper_create_cached_node_info();
let extra = console::ConsoleReqExtra {
session_id: uuid::Uuid::new_v4(),
application_name: Some("TEST"),
application_name: "TEST".into(),
options: vec![],
};
let creds = auth::BackendType::Test(mechanism);

View File

@@ -3,4 +3,5 @@ mod limit_algorithm;
mod limiter;
pub use aimd::Aimd;
pub use limit_algorithm::{AimdConfig, Fixed, RateLimitAlgorithm, RateLimiterConfig};
pub use limiter::EndpointRateLimiter;
pub use limiter::Limiter;

View File

@@ -6,6 +6,9 @@ use std::{
time::Duration,
};
use dashmap::DashMap;
use parking_lot::Mutex;
use smol_str::SmolStr;
use tokio::sync::{Mutex as AsyncMutex, Semaphore, SemaphorePermit};
use tokio::time::{timeout, Instant};
use tracing::info;
@@ -15,6 +18,74 @@ use super::{
RateLimiterConfig,
};
// Simple per-endpoint rate limiter.
//
// Check that number of connections to the endpoint is below `max_rps` rps.
// Purposefully ignore user name and database name as clients can reconnect
// with different names, so we'll end up sending some http requests to
// the control plane.
//
// We also may save quite a lot of CPU (I think) by bailing out right after we
// saw SNI, before doing TLS handshake. User-side error messages in that case
// does not look very nice (`SSL SYSCALL error: Undefined error: 0`), so for now
// I went with a more expensive way that yields user-friendlier error messages.
//
// TODO: add a better bucketing here, e.g. not more than 300 requests per second,
// and not more than 1000 requests per 10 seconds, etc. Short bursts of reconnects
// are noramal during redeployments, so we should not block them.
pub struct EndpointRateLimiter {
map: DashMap<SmolStr, Arc<Mutex<(chrono::NaiveTime, u32)>>>,
max_rps: u32,
access_count: AtomicUsize,
}
impl EndpointRateLimiter {
pub fn new(max_rps: u32) -> Self {
Self {
map: DashMap::new(),
max_rps,
access_count: AtomicUsize::new(1), // start from 1 to avoid GC on the first request
}
}
/// Check that number of connections to the endpoint is below `max_rps` rps.
pub fn check(&self, endpoint: SmolStr) -> bool {
// do GC every 100k requests (worst case memory usage is about 10MB)
if self.access_count.fetch_add(1, Ordering::AcqRel) % 100_000 == 0 {
self.do_gc();
}
let now = chrono::Utc::now().naive_utc().time();
let entry = self
.map
.entry(endpoint)
.or_insert_with(|| Arc::new(Mutex::new((now, 0))));
let mut entry = entry.lock();
let (last_time, count) = *entry;
if now - last_time < chrono::Duration::seconds(1) {
if count >= self.max_rps {
return false;
}
*entry = (last_time, count + 1);
} else {
*entry = (now, 1);
}
true
}
/// Clean the map. Simple strategy: remove all entries. At worst, we'll
/// double the effective max_rps during the cleanup. But that way deletion
/// does not aquire mutex on each entry access.
pub fn do_gc(&self) {
info!(
"cleaning up endpoint rate limiter, current size = {}",
self.map.len()
);
self.map.clear();
}
}
/// Limits the number of concurrent jobs.
///
/// Concurrency is limited through the use of [Token]s. Acquire a token to run a job, and release the

View File

@@ -14,6 +14,7 @@ use tokio_util::task::TaskTracker;
use crate::protocol2::{ProxyProtocolAccept, WithClientIp};
use crate::proxy::{NUM_CLIENT_CONNECTION_CLOSED_COUNTER, NUM_CLIENT_CONNECTION_OPENED_COUNTER};
use crate::rate_limiter::EndpointRateLimiter;
use crate::{cancellation::CancelMap, config::ProxyConfig};
use futures::StreamExt;
use hyper::{
@@ -43,6 +44,7 @@ pub async fn task_main(
}
let conn_pool = conn_pool::GlobalConnPool::new(config);
let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(config.endpoint_rps_limit));
// shutdown the connection pool
tokio::spawn({
@@ -91,6 +93,7 @@ pub async fn task_main(
let sni_name = tls.server_name().map(|s| s.to_string());
let conn_pool = conn_pool.clone();
let ws_connections = ws_connections.clone();
let endpoint_rate_limiter = endpoint_rate_limiter.clone();
async move {
let peer_addr = match client_addr {
@@ -103,6 +106,7 @@ pub async fn task_main(
let sni_name = sni_name.clone();
let conn_pool = conn_pool.clone();
let ws_connections = ws_connections.clone();
let endpoint_rate_limiter = endpoint_rate_limiter.clone();
async move {
let cancel_map = Arc::new(CancelMap::default());
@@ -117,6 +121,7 @@ pub async fn task_main(
session_id,
sni_name,
peer_addr.ip(),
endpoint_rate_limiter,
)
.instrument(info_span!(
"serverless",
@@ -190,6 +195,7 @@ async fn request_handler(
session_id: uuid::Uuid,
sni_hostname: Option<String>,
peer_addr: IpAddr,
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
) -> Result<Response<Body>, ApiError> {
let host = request
.headers()
@@ -214,6 +220,7 @@ async fn request_handler(
session_id,
host,
peer_addr,
endpoint_rate_limiter,
)
.await
{

View File

@@ -37,7 +37,7 @@ use crate::proxy::ConnectMechanism;
use tracing::{error, warn, Span};
use tracing::{info, info_span, Instrument};
pub const APP_NAME: &str = "sql_over_http";
pub const APP_NAME: &str = "/sql_over_http";
const MAX_CONNS_PER_ENDPOINT: usize = 20;
#[derive(Debug, Clone)]
@@ -432,7 +432,7 @@ async fn connect_to_compute(
let extra = console::ConsoleReqExtra {
session_id: uuid::Uuid::new_v4(),
application_name: Some(APP_NAME),
application_name: APP_NAME.to_string(),
options: console_options,
};
// TODO(anna): this is a bit hacky way, consider using console notification listener.

View File

@@ -3,6 +3,7 @@ use crate::{
config::ProxyConfig,
error::io_error,
proxy::{handle_client, ClientMode},
rate_limiter::EndpointRateLimiter,
};
use bytes::{Buf, Bytes};
use futures::{Sink, Stream};
@@ -13,6 +14,7 @@ use pin_project_lite::pin_project;
use std::{
net::IpAddr,
pin::Pin,
sync::Arc,
task::{ready, Context, Poll},
};
use tokio::io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf};
@@ -134,6 +136,7 @@ pub async fn serve_websocket(
session_id: uuid::Uuid,
hostname: Option<String>,
peer_addr: IpAddr,
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
) -> anyhow::Result<()> {
let websocket = websocket.await?;
handle_client(
@@ -143,6 +146,7 @@ pub async fn serve_websocket(
WebSocketRw::new(websocket),
ClientMode::Websockets { hostname },
peer_addr,
endpoint_rate_limiter,
)
.await?;
Ok(())

View File

@@ -142,7 +142,9 @@ pub(crate) async fn branch_cleanup_and_check_errors(
.collect();
if !orphan_layers.is_empty() {
result.errors.push(format!(
// An orphan layer is not an error: it's arguably not even a warning, but it is helpful to report
// these as a hint that there is something worth cleaning up here.
result.warnings.push(format!(
"index_part.json does not contain layers from S3: {:?}",
orphan_layers
.iter()
@@ -170,6 +172,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
));
}
}
BlobDataParseResult::Relic => {}
BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend(
parse_errors
.into_iter()
@@ -215,6 +218,8 @@ pub(crate) enum BlobDataParseResult {
index_part_generation: Generation,
s3_layers: HashSet<(LayerFileName, Generation)>,
},
/// The remains of a deleted Timeline (i.e. an initdb archive only)
Relic,
Incorrect(Vec<String>),
}
@@ -245,6 +250,7 @@ pub(crate) async fn list_timeline_blobs(
timeline_dir_target.delimiter = String::new();
let mut index_parts: Vec<ObjectIdentifier> = Vec::new();
let mut initdb_archive: bool = false;
let stream = stream_listing(s3_client, &timeline_dir_target);
pin_mut!(stream);
@@ -258,6 +264,10 @@ pub(crate) async fn list_timeline_blobs(
tracing::info!("Index key {key}");
index_parts.push(obj)
}
Some("initdb.tar.zst") => {
tracing::info!("initdb archive {key}");
initdb_archive = true;
}
Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) {
Ok((new_layer, gen)) => {
tracing::info!("Parsed layer key: {} {:?}", new_layer, gen);
@@ -279,6 +289,16 @@ pub(crate) async fn list_timeline_blobs(
}
}
if index_parts.is_empty() && s3_layers.is_empty() && initdb_archive {
tracing::info!(
"Timeline is empty apart from initdb archive: expected post-deletion state."
);
return Ok(S3TimelineBlobData {
blob_data: BlobDataParseResult::Relic,
keys_to_remove: Vec::new(),
});
}
// Choose the index_part with the highest generation
let (index_part_object, index_part_generation) = match index_parts
.iter()

View File

@@ -86,7 +86,9 @@ impl S3Target {
if new_self.prefix_in_bucket.is_empty() {
new_self.prefix_in_bucket = format!("/{}/", new_segment);
} else {
let _ = new_self.prefix_in_bucket.pop();
if new_self.prefix_in_bucket.ends_with('/') {
new_self.prefix_in_bucket.pop();
}
new_self.prefix_in_bucket =
[&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
}

View File

@@ -57,7 +57,7 @@ async fn main() -> anyhow::Result<()> {
));
match cli.command {
Command::ScanMetadata { json } => match scan_metadata(bucket_config).await {
Command::ScanMetadata { json } => match scan_metadata(bucket_config.clone()).await {
Err(e) => {
tracing::error!("Failed: {e}");
Err(e)
@@ -70,6 +70,17 @@ async fn main() -> anyhow::Result<()> {
}
if summary.is_fatal() {
Err(anyhow::anyhow!("Fatal scrub errors detected"))
} else if summary.is_empty() {
// Strictly speaking an empty bucket is a valid bucket, but if someone ran the
// scrubber they were likely expecting to scan something, and if we see no timelines
// at all then it's likely due to some configuration issues like a bad prefix
Err(anyhow::anyhow!(
"No timelines found in bucket {} prefix {}",
bucket_config.bucket,
bucket_config
.prefix_in_bucket
.unwrap_or("<none>".to_string())
))
} else {
Ok(())
}

View File

@@ -174,6 +174,10 @@ Timeline layer count: {6}
pub fn is_fatal(&self) -> bool {
!self.with_errors.is_empty()
}
pub fn is_empty(&self) -> bool {
self.count == 0
}
}
/// Scan the pageserver metadata in an S3 bucket, reporting errors and statistics.

View File

@@ -28,6 +28,7 @@ import jwt
import psycopg2
import pytest
import requests
import toml
from _pytest.config import Config
from _pytest.config.argparsing import Parser
from _pytest.fixtures import FixtureRequest
@@ -436,7 +437,7 @@ class NeonEnvBuilder:
# Pageserver remote storage
self.pageserver_remote_storage = pageserver_remote_storage
# Safekeepers remote storage
self.sk_remote_storage: Optional[RemoteStorage] = None
self.safekeepers_remote_storage: Optional[RemoteStorage] = None
self.broker = broker
self.run_id = run_id
@@ -506,6 +507,66 @@ class NeonEnvBuilder:
return env
def from_repo_dir(
self,
repo_dir: Path,
neon_binpath: Optional[Path] = None,
pg_distrib_dir: Optional[Path] = None,
) -> NeonEnv:
"""
A simple method to import data into the current NeonEnvBuilder from a snapshot of a repo dir.
"""
# Setting custom `neon_binpath` and `pg_distrib_dir` is useful for compatibility tests
self.neon_binpath = neon_binpath or self.neon_binpath
self.pg_distrib_dir = pg_distrib_dir or self.pg_distrib_dir
# Get the initial tenant and timeline from the snapshot config
snapshot_config_toml = repo_dir / "config"
with snapshot_config_toml.open("r") as f:
snapshot_config = toml.load(f)
self.initial_tenant = TenantId(snapshot_config["default_tenant_id"])
self.initial_timeline = TimelineId(
dict(snapshot_config["branch_name_mappings"][DEFAULT_BRANCH_NAME])[
str(self.initial_tenant)
]
)
self.env = self.init_configs()
for ps_dir in repo_dir.glob("pageserver_*"):
tenants_from_dir = ps_dir / "tenants"
tenants_to_dir = self.repo_dir / ps_dir.name / "tenants"
log.info(f"Copying pageserver tenants directory {tenants_from_dir} to {tenants_to_dir}")
shutil.copytree(tenants_from_dir, tenants_to_dir)
for sk_from_dir in (repo_dir / "safekeepers").glob("sk*"):
sk_to_dir = self.repo_dir / "safekeepers" / sk_from_dir.name
log.info(f"Copying safekeeper directory {sk_from_dir} to {sk_to_dir}")
sk_to_dir.rmdir()
shutil.copytree(sk_from_dir, sk_to_dir, ignore=shutil.ignore_patterns("*.log", "*.pid"))
shutil.rmtree(self.repo_dir / "local_fs_remote_storage", ignore_errors=True)
shutil.copytree(
repo_dir / "local_fs_remote_storage", self.repo_dir / "local_fs_remote_storage"
)
if (attachments_json := Path(repo_dir / "attachments.json")).exists():
shutil.copyfile(attachments_json, self.repo_dir / attachments_json.name)
# Update the config with info about tenants and timelines
with (self.repo_dir / "config").open("r") as f:
config = toml.load(f)
config["default_tenant_id"] = snapshot_config["default_tenant_id"]
config["branch_name_mappings"] = snapshot_config["branch_name_mappings"]
with (self.repo_dir / "config").open("w") as f:
toml.dump(config, f)
return self.env
def enable_scrub_on_exit(self):
"""
Call this if you would like the fixture to automatically run
@@ -534,9 +595,11 @@ class NeonEnvBuilder:
self.pageserver_remote_storage = ret
def enable_safekeeper_remote_storage(self, kind: RemoteStorageKind):
assert self.sk_remote_storage is None, "sk_remote_storage already configured"
assert (
self.safekeepers_remote_storage is None
), "safekeepers_remote_storage already configured"
self.sk_remote_storage = self._configure_and_create_remote_storage(
self.safekeepers_remote_storage = self._configure_and_create_remote_storage(
kind, RemoteStorageUser.SAFEKEEPER
)
@@ -589,7 +652,7 @@ class NeonEnvBuilder:
directory_to_clean.rmdir()
def cleanup_remote_storage(self):
for x in [self.pageserver_remote_storage, self.sk_remote_storage]:
for x in [self.pageserver_remote_storage, self.safekeepers_remote_storage]:
if isinstance(x, S3Storage):
x.do_cleanup()
@@ -693,7 +756,7 @@ class NeonEnv:
self.pageservers: List[NeonPageserver] = []
self.broker = config.broker
self.pageserver_remote_storage = config.pageserver_remote_storage
self.safekeepers_remote_storage = config.sk_remote_storage
self.safekeepers_remote_storage = config.safekeepers_remote_storage
self.pg_version = config.pg_version
# Binary path for pageserver, safekeeper, etc
self.neon_binpath = config.neon_binpath
@@ -718,25 +781,17 @@ class NeonEnv:
self.attachment_service = None
# Create a config file corresponding to the options
toml = textwrap.dedent(
f"""
default_tenant_id = '{config.initial_tenant}'
"""
)
cfg: Dict[str, Any] = {
"default_tenant_id": str(self.initial_tenant),
"broker": {
"listen_addr": self.broker.listen_addr(),
},
"pageservers": [],
"safekeepers": [],
}
if self.control_plane_api is not None:
toml += textwrap.dedent(
f"""
control_plane_api = '{self.control_plane_api}'
"""
)
toml += textwrap.dedent(
f"""
[broker]
listen_addr = '{self.broker.listen_addr()}'
"""
)
cfg["control_plane_api"] = self.control_plane_api
# Create config for pageserver
http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
@@ -749,26 +804,24 @@ class NeonEnv:
http=self.port_distributor.get_port(),
)
toml += textwrap.dedent(
f"""
[[pageservers]]
id={ps_id}
listen_pg_addr = 'localhost:{pageserver_port.pg}'
listen_http_addr = 'localhost:{pageserver_port.http}'
pg_auth_type = '{pg_auth_type}'
http_auth_type = '{http_auth_type}'
"""
)
ps_cfg: Dict[str, Any] = {
"id": ps_id,
"listen_pg_addr": f"localhost:{pageserver_port.pg}",
"listen_http_addr": f"localhost:{pageserver_port.http}",
"pg_auth_type": pg_auth_type,
"http_auth_type": http_auth_type,
}
# Create a corresponding NeonPageserver object
self.pageservers.append(
NeonPageserver(
self,
ps_id,
port=pageserver_port,
config_override=config.pageserver_config_override,
config_override=self.pageserver_config_override,
)
)
cfg["pageservers"].append(ps_cfg)
# Create config and a Safekeeper object for each safekeeper
for i in range(1, config.num_safekeepers + 1):
port = SafekeeperPort(
@@ -777,32 +830,22 @@ class NeonEnv:
http=self.port_distributor.get_port(),
)
id = config.safekeepers_id_start + i # assign ids sequentially
toml += textwrap.dedent(
f"""
[[safekeepers]]
id = {id}
pg_port = {port.pg}
pg_tenant_only_port = {port.pg_tenant_only}
http_port = {port.http}
sync = {'true' if config.safekeepers_enable_fsync else 'false'}"""
)
sk_cfg: Dict[str, Any] = {
"id": id,
"pg_port": port.pg,
"pg_tenant_only_port": port.pg_tenant_only,
"http_port": port.http,
"sync": config.safekeepers_enable_fsync,
}
if config.auth_enabled:
toml += textwrap.dedent(
"""
auth_enabled = true
"""
)
if config.sk_remote_storage is not None:
toml += textwrap.dedent(
f"""
remote_storage = "{remote_storage_to_toml_inline_table(config.sk_remote_storage)}"
"""
)
safekeeper = Safekeeper(env=self, id=id, port=port)
self.safekeepers.append(safekeeper)
sk_cfg["auth_enabled"] = True
if self.safekeepers_remote_storage is not None:
sk_cfg["remote_storage"] = self.safekeepers_remote_storage.to_toml_inline_table()
self.safekeepers.append(Safekeeper(env=self, id=id, port=port))
cfg["safekeepers"].append(sk_cfg)
log.info(f"Config: {toml}")
self.neon_cli.init(toml)
log.info(f"Config: {cfg}")
self.neon_cli.init(cfg)
def start(self):
# Start up broker, pageserver and all safekeepers
@@ -1288,10 +1331,10 @@ class NeonCli(AbstractNeonCli):
def init(
self,
config_toml: str,
config: Dict[str, Any],
) -> "subprocess.CompletedProcess[str]":
with tempfile.NamedTemporaryFile(mode="w+") as tmp:
tmp.write(config_toml)
tmp.write(toml.dumps(config))
tmp.flush()
cmd = ["init", f"--config={tmp.name}", "--pg-version", self.env.pg_version]
@@ -1729,11 +1772,16 @@ class NeonPageserver(PgProtocol):
@property
def workdir(self) -> Path:
return Path(os.path.join(self.env.repo_dir, f"pageserver_{self.id}"))
return self.env.repo_dir / f"pageserver_{self.id}"
def assert_no_errors(self):
logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")
errors = scan_pageserver_log_for_errors(logfile, self.allowed_errors)
logfile = self.workdir / "pageserver.log"
if not logfile.exists():
log.warning(f"Skipping log check: {logfile} does not exist")
return
with logfile.open("r") as f:
errors = scan_pageserver_log_for_errors(f, self.allowed_errors)
for _lineno, error in errors:
log.info(f"not allowed error: {error.strip()}")
@@ -1757,7 +1805,10 @@ class NeonPageserver(PgProtocol):
def log_contains(self, pattern: str) -> Optional[str]:
"""Check that the pageserver log contains a line that matches the given regex"""
logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")
logfile = self.workdir / "pageserver.log"
if not logfile.exists():
log.warning(f"Skipping log check: {logfile} does not exist")
return None
contains_re = re.compile(pattern)
@@ -1766,14 +1817,11 @@ class NeonPageserver(PgProtocol):
# no guarantee it is already present in the log file. This hasn't
# been a problem in practice, our python tests are not fast enough
# to hit that race condition.
while True:
line = logfile.readline()
if not line:
break
if contains_re.search(line):
# found it!
return line
with logfile.open("r") as f:
for line in f:
if contains_re.search(line):
# found it!
return line
return None
@@ -1796,6 +1844,27 @@ class NeonPageserver(PgProtocol):
client = self.http_client()
return client.tenant_detach(tenant_id)
def tenant_location_configure(self, tenant_id: TenantId, config: dict[str, Any], **kwargs):
# This API is only for use when generations are enabled
assert self.env.attachment_service is not None
if config["mode"].startswith("Attached") and "generation" not in config:
config["generation"] = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
client = self.http_client()
return client.tenant_location_conf(tenant_id, config, **kwargs)
def read_tenant_location_conf(self, tenant_id: TenantId) -> dict[str, Any]:
path = self.tenant_dir(tenant_id) / "config-v1"
log.info(f"Reading location conf from {path}")
bytes = open(path, "r").read()
try:
decoded: dict[str, Any] = toml.loads(bytes)
return decoded
except:
log.error(f"Failed to decode LocationConf, raw content ({len(bytes)} bytes): {bytes}")
raise
def tenant_create(
self,
tenant_id: TenantId,
@@ -2729,6 +2798,7 @@ class EndpointFactory:
lsn: Optional[Lsn] = None,
hot_standby: bool = False,
config_lines: Optional[List[str]] = None,
pageserver_id: Optional[int] = None,
) -> Endpoint:
ep = Endpoint(
self.env,
@@ -2748,6 +2818,7 @@ class EndpointFactory:
lsn=lsn,
hot_standby=hot_standby,
config_lines=config_lines,
pageserver_id=pageserver_id,
)
def stop_all(self) -> "EndpointFactory":
@@ -3094,7 +3165,7 @@ def pytest_addoption(parser: Parser):
SMALL_DB_FILE_NAME_REGEX: re.Pattern = re.compile( # type: ignore[type-arg]
r"config|metadata|.+\.(?:toml|pid|json|sql)"
r"config|config-v1|heatmap-v1|metadata|.+\.(?:toml|pid|json|sql)"
)
@@ -3355,8 +3426,6 @@ def parse_project_git_version_output(s: str) -> str:
The information is generated by utils::project_git_version!
"""
import re
res = re.search(r"git(-env)?:([0-9a-fA-F]{8,40})(-\S+)?", s)
if res and (commit := res.group(2)):
return commit

View File

@@ -150,7 +150,7 @@ class PageserverHttpClient(requests.Session):
# (this may change in future if we do fault injection of a kind that causes
# requests TCP flows to stick)
read=False,
backoff_factor=0,
backoff_factor=0.2,
status_forcelist=[503],
allowed_methods=None,
remove_headers_on_redirect=[],
@@ -277,6 +277,23 @@ class PageserverHttpClient(requests.Session):
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/reset", params=params)
self.verbose_error(res)
def tenant_location_conf(
self, tenant_id: TenantId, location_conf=dict[str, Any], flush_ms=None
):
body = location_conf.copy()
body["tenant_id"] = str(tenant_id)
params = {}
if flush_ms is not None:
params["flush_ms"] = str(flush_ms)
res = self.put(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/location_config",
json=body,
params=params,
)
self.verbose_error(res)
def tenant_delete(self, tenant_id: TenantId):
res = self.delete(f"http://localhost:{self.port}/v1/tenant/{tenant_id}")
self.verbose_error(res)

View File

@@ -9,6 +9,7 @@ from pathlib import Path
from typing import Any, Dict, List, Optional, Union
import boto3
import toml
from mypy_boto3_s3 import S3Client
from fixtures.log_helper import log
@@ -133,7 +134,10 @@ class LocalFsStorage:
return json.load(f)
def to_toml_inline_table(self) -> str:
return f"local_path='{self.root}'"
rv = {
"local_path": str(self.root),
}
return toml.TomlEncoder().dump_inline_table(rv)
def cleanup(self):
# no cleanup is done here, because there's NeonEnvBuilder.cleanup_local_storage which will remove everything, including localfs files
@@ -174,18 +178,18 @@ class S3Storage:
)
def to_toml_inline_table(self) -> str:
s = [
f"bucket_name='{self.bucket_name}'",
f"bucket_region='{self.bucket_region}'",
]
rv = {
"bucket_name": self.bucket_name,
"bucket_region": self.bucket_region,
}
if self.prefix_in_bucket is not None:
s.append(f"prefix_in_bucket='{self.prefix_in_bucket}'")
rv["prefix_in_bucket"] = self.prefix_in_bucket
if self.endpoint is not None:
s.append(f"endpoint='{self.endpoint}'")
rv["endpoint"] = self.endpoint
return ",".join(s)
return toml.TomlEncoder().dump_inline_table(rv)
def do_cleanup(self):
if not self.cleanup:
@@ -384,4 +388,4 @@ def remote_storage_to_toml_inline_table(remote_storage: RemoteStorage) -> str:
if not isinstance(remote_storage, (LocalFsStorage, S3Storage)):
raise Exception("invalid remote storage type")
return f"{{{remote_storage.to_toml_inline_table()}}}"
return remote_storage.to_toml_inline_table()

View File

@@ -0,0 +1,148 @@
from typing import Optional
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
NeonEnv,
last_flush_lsn_upload,
wait_for_last_flush_lsn,
)
from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
from fixtures.types import TenantId, TimelineId
class Workload:
"""
This is not a general purpose load generator: it exists for storage tests that need to inject some
high level types of storage work via the postgres interface:
- layer writes (`write_rows`)
- work for compaction (`churn_rows`)
- reads, checking we get the right data (`validate`)
"""
def __init__(self, env: NeonEnv, tenant_id: TenantId, timeline_id: TimelineId):
self.env = env
self.tenant_id = tenant_id
self.timeline_id = timeline_id
self.table = "foo"
self.expect_rows = 0
self.churn_cursor = 0
self._endpoint: Optional[Endpoint] = None
def endpoint(self, pageserver_id: int) -> Endpoint:
if self._endpoint is None:
self._endpoint = self.env.endpoints.create(
"main",
tenant_id=self.tenant_id,
pageserver_id=pageserver_id,
endpoint_id="ep-workload",
)
self._endpoint.start(pageserver_id=pageserver_id)
else:
self._endpoint.reconfigure(pageserver_id=pageserver_id)
connstring = self._endpoint.safe_psql(
"SELECT setting FROM pg_settings WHERE name='neon.pageserver_connstring'"
)
log.info(f"Workload.endpoint: connstr={connstring}")
return self._endpoint
def __del__(self):
if self._endpoint is not None:
self._endpoint.stop()
def init(self, pageserver_id: int):
endpoint = self.endpoint(pageserver_id)
endpoint.safe_psql(f"CREATE TABLE {self.table} (id INTEGER PRIMARY KEY, val text);")
endpoint.safe_psql("CREATE EXTENSION IF NOT EXISTS neon_test_utils;")
last_flush_lsn_upload(
self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
)
def write_rows(self, n, pageserver_id):
endpoint = self.endpoint(pageserver_id)
start = self.expect_rows
end = start + n - 1
self.expect_rows += n
dummy_value = "blah"
endpoint.safe_psql(
f"""
INSERT INTO {self.table} (id, val)
SELECT g, '{dummy_value}'
FROM generate_series({start}, {end}) g
"""
)
return last_flush_lsn_upload(
self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
)
def churn_rows(self, n, pageserver_id, upload=True):
assert self.expect_rows >= n
max_iters = 10
endpoint = self.endpoint(pageserver_id)
todo = n
i = 0
while todo > 0:
i += 1
if i > max_iters:
raise RuntimeError("oops")
start = self.churn_cursor % self.expect_rows
n_iter = min((self.expect_rows - start), todo)
todo -= n_iter
end = start + n_iter - 1
log.info(
f"start,end = {start},{end}, cursor={self.churn_cursor}, expect_rows={self.expect_rows}"
)
assert end < self.expect_rows
self.churn_cursor += n_iter
dummy_value = "blah"
endpoint.safe_psql_many(
[
f"""
INSERT INTO {self.table} (id, val)
SELECT g, '{dummy_value}'
FROM generate_series({start}, {end}) g
ON CONFLICT (id) DO UPDATE
SET val = EXCLUDED.val
""",
f"VACUUM {self.table}",
]
)
last_flush_lsn = wait_for_last_flush_lsn(
self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
)
ps_http = self.env.get_pageserver(pageserver_id).http_client()
wait_for_last_record_lsn(ps_http, self.tenant_id, self.timeline_id, last_flush_lsn)
if upload:
# force a checkpoint to trigger upload
ps_http.timeline_checkpoint(self.tenant_id, self.timeline_id)
wait_for_upload(ps_http, self.tenant_id, self.timeline_id, last_flush_lsn)
log.info(f"Churn: waiting for remote LSN {last_flush_lsn}")
else:
log.info(f"Churn: not waiting for upload, disk LSN {last_flush_lsn}")
def validate(self, pageserver_id):
endpoint = self.endpoint(pageserver_id)
result = endpoint.safe_psql_many(
[
"select clear_buffer_cache()",
f"""
SELECT COUNT(*) FROM {self.table}
""",
]
)
log.info(f"validate({self.expect_rows}): {result}")
assert result == [[("",)], [(self.expect_rows,)]]

View File

@@ -92,8 +92,9 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
neon_env_builder.auth_enabled = True
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*")
env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*")
env.pageserver.allowed_errors.extend(
[".*Authentication error: InvalidSignature.*", ".*Unauthorized: malformed jwt token.*"]
)
pageserver_token_old = env.auth_keys.generate_pageserver_token()
pageserver_http_client_old = env.pageserver.http_client(pageserver_token_old)
@@ -145,9 +146,9 @@ def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
def test_pageserver_key_reload(neon_env_builder: NeonEnvBuilder):
neon_env_builder.auth_enabled = True
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*")
env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*")
env.pageserver.allowed_errors.extend(
[".*Authentication error: InvalidSignature.*", ".*Unauthorized: malformed jwt token.*"]
)
pageserver_token_old = env.auth_keys.generate_pageserver_token()
pageserver_http_client_old = env.pageserver.http_client(pageserver_token_old)

View File

@@ -14,8 +14,9 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.append(".*invalid branch start lsn.*")
env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*")
env.pageserver.allowed_errors.extend(
[".*invalid branch start lsn.*", ".*invalid start lsn .* for ancestor timeline.*"]
)
# Branch at the point where only 100 rows were inserted
branch_behind_timeline_id = env.neon_cli.create_branch("test_branch_behind")

View File

@@ -148,11 +148,11 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
env = neon_env_builder.init_configs()
env.start()
env.pageserver.allowed_errors.append(
".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*"
)
env.pageserver.allowed_errors.append(
".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading"
env.pageserver.allowed_errors.extend(
[
".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*",
".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading",
]
)
ps_http = env.pageserver.http_client()
@@ -247,11 +247,11 @@ def test_competing_branchings_from_loading_race_to_ok_or_err(neon_env_builder: N
env = neon_env_builder.init_configs()
env.start()
env.pageserver.allowed_errors.append(
".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*"
)
env.pageserver.allowed_errors.append(
".*Error processing HTTP request: InternalServerError\\(Timeline .*/.* already exists in pageserver's memory"
env.pageserver.allowed_errors.extend(
[
".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*",
".*Error processing HTTP request: InternalServerError\\(Timeline .*/.* already exists in pageserver's memory",
]
)
ps_http = env.pageserver.http_client()

View File

@@ -1,30 +1,25 @@
import copy
import os
import shutil
import subprocess
import tempfile
from pathlib import Path
from typing import Any, List, Optional
from typing import List, Optional
import pytest
import toml # TODO: replace with tomllib for Python >= 3.11
from fixtures.log_helper import log
import toml
from fixtures.neon_fixtures import (
NeonCli,
NeonEnv,
NeonEnvBuilder,
PgBin,
)
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pageserver.utils import (
timeline_delete_wait_completed,
wait_for_last_record_lsn,
wait_for_upload,
)
from fixtures.pg_version import PgVersion
from fixtures.port_distributor import PortDistributor
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, RemoteStorageUser
from fixtures.remote_storage import RemoteStorageKind
from fixtures.types import Lsn
from pytest import FixtureRequest
#
# A test suite that help to prevent unintentionally breaking backward or forward compatibility between Neon releases.
@@ -37,8 +32,8 @@ from pytest import FixtureRequest
# If the breakage is intentional, the test can be xfaild with setting ALLOW_FORWARD_COMPATIBILITY_BREAKAGE=true.
#
# The file contains a couple of helper functions:
# - prepare_snapshot copies the snapshot, cleans it up and makes it ready for the current version of Neon (replaces paths and ports in config files).
# - check_neon_works performs the test itself, feel free to add more checks there.
# - dump_differs compares two SQL dumps and writes the diff to a file.
#
#
# How to run `test_backward_compatibility` locally:
@@ -46,6 +41,7 @@ from pytest import FixtureRequest
# export DEFAULT_PG_VERSION=15
# export BUILD_TYPE=release
# export CHECK_ONDISK_DATA_COMPATIBILITY=true
# export COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION}
#
# # Build previous version of binaries and create a data snapshot:
# rm -rf pg_install target
@@ -59,8 +55,7 @@ from pytest import FixtureRequest
# CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc`
#
# # Run backward compatibility test
# COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION} \
# ./scripts/pytest -k test_backward_compatibility
# ./scripts/pytest -k test_backward_compatibility
#
#
# How to run `test_forward_compatibility` locally:
@@ -68,6 +63,8 @@ from pytest import FixtureRequest
# export DEFAULT_PG_VERSION=15
# export BUILD_TYPE=release
# export CHECK_ONDISK_DATA_COMPATIBILITY=true
# export COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE}
# export COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install
#
# # Build previous version of binaries and store them somewhere:
# rm -rf pg_install target
@@ -84,9 +81,7 @@ from pytest import FixtureRequest
# ./scripts/pytest -k test_create_snapshot
#
# # Run forward compatibility test
# COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE} \
# COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install \
# ./scripts/pytest -k test_forward_compatibility
# ./scripts/pytest -k test_forward_compatibility
#
check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif(
@@ -155,13 +150,9 @@ def test_create_snapshot(
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
def test_backward_compatibility(
pg_bin: PgBin,
port_distributor: PortDistributor,
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
neon_binpath: Path,
pg_distrib_dir: Path,
pg_version: PgVersion,
request: FixtureRequest,
):
"""
Test that the new binaries can read old data
@@ -177,23 +168,15 @@ def test_backward_compatibility(
)
try:
# Copy the snapshot to current directory, and prepare for the test
prepare_snapshot(
from_dir=compatibility_snapshot_dir,
to_dir=test_output_dir / "compatibility_snapshot",
port_distributor=port_distributor,
)
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.from_repo_dir(compatibility_snapshot_dir / "repo")
neon_env_builder.start()
check_neon_works(
test_output_dir / "compatibility_snapshot" / "repo",
neon_binpath,
neon_binpath,
pg_distrib_dir,
pg_version,
port_distributor,
test_output_dir,
pg_bin,
request,
env,
test_output_dir=test_output_dir,
sql_dump_path=compatibility_snapshot_dir / "dump.sql",
repo_dir=env.repo_dir,
)
except Exception:
if breaking_changes_allowed:
@@ -212,12 +195,10 @@ def test_backward_compatibility(
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
def test_forward_compatibility(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
top_output_dir: Path,
port_distributor: PortDistributor,
pg_version: PgVersion,
request: FixtureRequest,
neon_binpath: Path,
):
"""
Test that the old binaries can read new data
@@ -244,24 +225,19 @@ def test_forward_compatibility(
)
try:
# Copy the snapshot to current directory, and prepare for the test
prepare_snapshot(
from_dir=compatibility_snapshot_dir,
to_dir=test_output_dir / "compatibility_snapshot",
port_distributor=port_distributor,
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.from_repo_dir(
compatibility_snapshot_dir / "repo",
neon_binpath=compatibility_neon_bin,
pg_distrib_dir=compatibility_postgres_distrib_dir,
)
neon_env_builder.start()
check_neon_works(
test_output_dir / "compatibility_snapshot" / "repo",
compatibility_neon_bin,
neon_binpath,
compatibility_postgres_distrib_dir,
pg_version,
port_distributor,
test_output_dir,
PgBin(test_output_dir, compatibility_postgres_distrib_dir, pg_version),
request,
env,
test_output_dir=test_output_dir,
sql_dump_path=compatibility_snapshot_dir / "dump.sql",
repo_dir=env.repo_dir,
)
except Exception:
if breaking_changes_allowed:
@@ -276,189 +252,26 @@ def test_forward_compatibility(
), "Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"
def prepare_snapshot(
from_dir: Path,
to_dir: Path,
port_distributor: PortDistributor,
pg_distrib_dir: Optional[Path] = None,
):
assert from_dir.exists(), f"Snapshot '{from_dir}' doesn't exist"
assert (from_dir / "repo").exists(), f"Snapshot '{from_dir}' doesn't contain a repo directory"
assert (from_dir / "dump.sql").exists(), f"Snapshot '{from_dir}' doesn't contain a dump.sql"
def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, repo_dir: Path):
ep = env.endpoints.create_start("main")
pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version)
log.info(f"Copying snapshot from {from_dir} to {to_dir}")
shutil.copytree(from_dir, to_dir)
repo_dir = to_dir / "repo"
snapshot_config_toml = repo_dir / "config"
snapshot_config = toml.load(snapshot_config_toml)
# Remove old logs to avoid confusion in test artifacts
for logfile in repo_dir.glob("**/*.log"):
logfile.unlink()
# Remove old computes in 'endpoints'. Old versions of the control plane used a directory
# called "pgdatadirs". Delete it, too.
if (repo_dir / "endpoints").exists():
shutil.rmtree(repo_dir / "endpoints")
if (repo_dir / "pgdatadirs").exists():
shutil.rmtree(repo_dir / "pgdatadirs")
os.mkdir(repo_dir / "endpoints")
# Update paths and ports in config files
legacy_pageserver_toml = repo_dir / "pageserver.toml"
legacy_bundle = os.path.exists(legacy_pageserver_toml)
path_to_config: dict[Path, dict[Any, Any]] = {}
if legacy_bundle:
os.mkdir(repo_dir / "pageserver_1")
path_to_config[repo_dir / "pageserver_1" / "pageserver.toml"] = toml.load(
legacy_pageserver_toml
)
os.remove(legacy_pageserver_toml)
os.rename(repo_dir / "tenants", repo_dir / "pageserver_1" / "tenants")
else:
for ps_conf in snapshot_config["pageservers"]:
config_path = repo_dir / f"pageserver_{ps_conf['id']}" / "pageserver.toml"
path_to_config[config_path] = toml.load(config_path)
# For each pageserver config, edit it and rewrite
for config_path, pageserver_config in path_to_config.items():
pageserver_config["remote_storage"]["local_path"] = str(
LocalFsStorage.component_path(repo_dir, RemoteStorageUser.PAGESERVER)
)
for param in ("listen_http_addr", "listen_pg_addr", "broker_endpoint"):
pageserver_config[param] = port_distributor.replace_with_new_port(
pageserver_config[param]
)
# We don't use authentication in compatibility tests
# so just remove authentication related settings.
pageserver_config.pop("pg_auth_type", None)
pageserver_config.pop("http_auth_type", None)
if pg_distrib_dir:
pageserver_config["pg_distrib_dir"] = str(pg_distrib_dir)
with config_path.open("w") as f:
toml.dump(pageserver_config, f)
# neon_local config doesn't have to be backward compatible. If we're using a dump from before
# it supported multiple pageservers, fix it up.
if "pageservers" not in snapshot_config:
snapshot_config["pageservers"] = [snapshot_config["pageserver"]]
del snapshot_config["pageserver"]
for param in ("listen_http_addr", "listen_pg_addr"):
for pageserver in snapshot_config["pageservers"]:
pageserver[param] = port_distributor.replace_with_new_port(pageserver[param])
snapshot_config["broker"]["listen_addr"] = port_distributor.replace_with_new_port(
snapshot_config["broker"]["listen_addr"]
)
for sk in snapshot_config["safekeepers"]:
for param in ("http_port", "pg_port", "pg_tenant_only_port"):
sk[param] = port_distributor.replace_with_new_port(sk[param])
if pg_distrib_dir:
snapshot_config["pg_distrib_dir"] = str(pg_distrib_dir)
with snapshot_config_toml.open("w") as f:
toml.dump(snapshot_config, f)
# Ensure that snapshot doesn't contain references to the original path
rv = subprocess.run(
[
"grep",
"--recursive",
"--binary-file=without-match",
"--files-with-matches",
"test_create_snapshot/repo",
str(repo_dir),
],
capture_output=True,
text=True,
)
assert (
rv.returncode != 0
), f"there're files referencing `test_create_snapshot/repo`, this path should be replaced with {repo_dir}:\n{rv.stdout}"
def check_neon_works(
repo_dir: Path,
neon_target_binpath: Path,
neon_current_binpath: Path,
pg_distrib_dir: Path,
pg_version: PgVersion,
port_distributor: PortDistributor,
test_output_dir: Path,
pg_bin: PgBin,
request: FixtureRequest,
):
snapshot_config_toml = repo_dir / "config"
snapshot_config = toml.load(snapshot_config_toml)
snapshot_config["neon_distrib_dir"] = str(neon_target_binpath)
snapshot_config["postgres_distrib_dir"] = str(pg_distrib_dir)
with (snapshot_config_toml).open("w") as f:
toml.dump(snapshot_config, f)
# TODO: replace with NeonEnvBuilder / NeonEnv
config: Any = type("NeonEnvStub", (object,), {})
config.rust_log_override = None
config.repo_dir = repo_dir
config.pg_version = pg_version
config.initial_tenant = snapshot_config["default_tenant_id"]
config.pg_distrib_dir = pg_distrib_dir
config.remote_storage = None
config.sk_remote_storage = None
# Use the "target" binaries to launch the storage nodes
config_target = config
config_target.neon_binpath = neon_target_binpath
# We are using maybe-old binaries for neon services, but want to use current
# binaries for test utilities like neon_local
config_target.neon_local_binpath = neon_current_binpath
cli_target = NeonCli(config_target)
# And the current binaries to launch computes
snapshot_config["neon_distrib_dir"] = str(neon_current_binpath)
with (snapshot_config_toml).open("w") as f:
toml.dump(snapshot_config, f)
config_current = copy.copy(config)
config_current.neon_binpath = neon_current_binpath
cli_current = NeonCli(config_current)
cli_target.raw_cli(["start"])
request.addfinalizer(lambda: cli_target.raw_cli(["stop"]))
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
cli_current.endpoint_create(
branch_name="main", pg_port=pg_port, http_port=http_port, endpoint_id="ep-main"
)
cli_current.endpoint_start("ep-main")
request.addfinalizer(lambda: cli_current.endpoint_stop("ep-main"))
connstr = f"host=127.0.0.1 port={pg_port} user=cloud_admin dbname=postgres"
connstr = ep.connstr()
pg_bin.run_capture(
["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump.sql'}"]
)
initial_dump_differs = dump_differs(
repo_dir.parent / "dump.sql",
sql_dump_path,
test_output_dir / "dump.sql",
test_output_dir / "dump.filediff",
)
# Check that project can be recovered from WAL
# loosely based on https://www.notion.so/neondatabase/Storage-Recovery-from-WAL-d92c0aac0ebf40df892b938045d7d720
tenant_id = snapshot_config["default_tenant_id"]
timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id]
pageserver_port = snapshot_config["pageservers"][0]["listen_http_addr"].split(":")[-1]
pageserver_http = PageserverHttpClient(
port=pageserver_port,
is_testing_enabled_or_skip=lambda: True, # TODO: check if testing really enabled
)
pageserver_http = env.pageserver.http_client()
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
pg_version = env.pg_version
shutil.rmtree(repo_dir / "local_fs_remote_storage")
timeline_delete_wait_completed(pageserver_http, tenant_id, timeline_id)
@@ -494,6 +307,11 @@ def dump_differs(
Returns True if the dumps differ and produced diff is not allowed, False otherwise (in most cases we want it to return False).
"""
if not first.exists():
raise FileNotFoundError(f"{first} doesn't exist")
if not second.exists():
raise FileNotFoundError(f"{second} doesn't exist")
with output.open("w") as stdout:
res = subprocess.run(
[

0
test_runner/regress/test_config.py Executable file → Normal file
View File

View File

@@ -99,12 +99,13 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
]
)
# FIXME: we should clean up pageserver to not print this
env.pageserver.allowed_errors.append(".*exited with error: unexpected message type: CopyData.*")
# FIXME: Is this expected?
env.pageserver.allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
env.pageserver.allowed_errors.extend(
[
# FIXME: we should clean up pageserver to not print this
".*exited with error: unexpected message type: CopyData.*",
# FIXME: Is this expected?
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
]
)
def import_tar(base, wal):

View File

@@ -236,3 +236,30 @@ def test_wal_page_boundary_start(neon_simple_env: NeonEnv, vanilla_pg):
assert vanilla_pg.safe_psql(
"select sum(somedata) from replication_example"
) == endpoint.safe_psql("select sum(somedata) from replication_example")
#
# Check that slots are not inherited in brnach
#
def test_slots_and_branching(neon_simple_env: NeonEnv):
env = neon_simple_env
tenant, timeline = env.neon_cli.create_tenant()
env.pageserver.http_client()
main_branch = env.endpoints.create_start("main", tenant_id=tenant)
main_cur = main_branch.connect().cursor()
# Create table and insert some data
main_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")
wait_for_last_flush_lsn(env, main_branch, tenant, timeline)
# Create branch ws.
env.neon_cli.create_branch("ws", "main", tenant_id=tenant)
ws_branch = env.endpoints.create_start("ws", tenant_id=tenant)
log.info("postgres is running on 'ws' branch")
# Check that we can create slot with the same name
ws_cur = ws_branch.connect().cursor()
ws_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")

View File

@@ -23,14 +23,20 @@ from fixtures.neon_fixtures import (
PgBin,
S3Scrubber,
last_flush_lsn_upload,
wait_for_last_flush_lsn,
)
from fixtures.pageserver.utils import list_prefix
from fixtures.pageserver.http import PageserverApiException
from fixtures.pageserver.utils import (
assert_tenant_state,
list_prefix,
wait_for_last_record_lsn,
wait_for_upload,
)
from fixtures.remote_storage import (
RemoteStorageKind,
)
from fixtures.types import TenantId, TimelineId
from fixtures.utils import print_gc_result, wait_until
from fixtures.workload import Workload
# A tenant configuration that is convenient for generating uploads and deletions
# without a large amount of postgres traffic.
@@ -93,7 +99,10 @@ def generate_uploads_and_deletions(
)
assert tenant_id is not None
assert timeline_id is not None
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
# We are waiting for uploads as well as local flush, in order to avoid leaving the system
# in a state where there are "future layers" in remote storage that will generate deletions
# after a restart.
last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
ps_http.timeline_checkpoint(tenant_id, timeline_id)
# Compaction should generate some GC-elegible layers
@@ -560,3 +569,91 @@ def test_eviction_across_generations(neon_env_builder: NeonEnvBuilder):
read_all(env, tenant_id, timeline_id)
evict_all_layers(env, tenant_id, timeline_id)
read_all(env, tenant_id, timeline_id)
def test_multi_attach(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
):
neon_env_builder.enable_generations = True
neon_env_builder.num_pageservers = 3
neon_env_builder.enable_pageserver_remote_storage(
remote_storage_kind=RemoteStorageKind.MOCK_S3,
)
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
pageservers = env.pageservers
http_clients = list([p.http_client() for p in pageservers])
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
# We will intentionally create situations where stale deletions happen from non-latest-generation
# nodes when the tenant is multiply-attached
for ps in env.pageservers:
ps.allowed_errors.extend(
[".*Dropped remote consistent LSN updates.*", ".*Dropping stale deletions.*"]
)
# Initially, the tenant will be attached to the first pageserver (first is default in our test harness)
wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[0], tenant_id, "Active"))
_detail = http_clients[0].timeline_detail(tenant_id, timeline_id)
with pytest.raises(PageserverApiException):
http_clients[1].timeline_detail(tenant_id, timeline_id)
with pytest.raises(PageserverApiException):
http_clients[2].timeline_detail(tenant_id, timeline_id)
workload = Workload(env, tenant_id, timeline_id)
workload.init(pageservers[0].id)
workload.write_rows(1000, pageservers[0].id)
# Attach the tenant to the other two pageservers
pageservers[1].tenant_attach(env.initial_tenant)
pageservers[2].tenant_attach(env.initial_tenant)
wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[1], tenant_id, "Active"))
wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[2], tenant_id, "Active"))
# Now they all have it attached
_details = list([c.timeline_detail(tenant_id, timeline_id) for c in http_clients])
_detail = http_clients[1].timeline_detail(tenant_id, timeline_id)
_detail = http_clients[2].timeline_detail(tenant_id, timeline_id)
# The endpoint can use any pageserver to service its reads
for pageserver in pageservers:
workload.validate(pageserver.id)
# If we write some more data, all the nodes can see it, including stale ones
wrote_lsn = workload.write_rows(1000, pageservers[0].id)
for ps_http in http_clients:
wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, wrote_lsn)
# ...and indeed endpoints can see it via any of the pageservers
for pageserver in pageservers:
workload.validate(pageserver.id)
# Prompt all the pageservers, including stale ones, to upload ingested layers to remote storage
for ps_http in http_clients:
ps_http.timeline_checkpoint(tenant_id, timeline_id)
wait_for_upload(ps_http, tenant_id, timeline_id, wrote_lsn)
# Now, the contents of remote storage will be a set of layers from each pageserver, but with unique
# generation numbers
# TODO: validate remote storage contents
# Stop all pageservers
for ps in pageservers:
ps.stop()
# Returning to a normal healthy state: all pageservers will start, but only the one most
# recently attached via the control plane will re-attach on startup
for ps in pageservers:
ps.start()
with pytest.raises(PageserverApiException):
_detail = http_clients[0].timeline_detail(tenant_id, timeline_id)
with pytest.raises(PageserverApiException):
_detail = http_clients[1].timeline_detail(tenant_id, timeline_id)
_detail = http_clients[2].timeline_detail(tenant_id, timeline_id)
# All data we wrote while multi-attached remains readable
workload.validate(pageservers[2].id)

View File

@@ -64,13 +64,13 @@ def test_metric_collection(
# spin up neon, after http server is ready
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
# httpserver is shut down before pageserver during passing run
env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*")
# we have a fast rate of calculation, these can happen at shutdown
env.pageserver.allowed_errors.append(
".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*"
)
env.pageserver.allowed_errors.append(
".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes"
env.pageserver.allowed_errors.extend(
[
".*metrics endpoint refused the sent metrics*",
# we have a fast rate of calculation, these can happen at shutdown
".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
]
)
tenant_id = env.initial_tenant
@@ -212,13 +212,13 @@ def test_metric_collection_cleans_up_tempfile(
pageserver_http = env.pageserver.http_client()
# httpserver is shut down before pageserver during passing run
env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*")
# we have a fast rate of calculation, these can happen at shutdown
env.pageserver.allowed_errors.append(
".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*"
)
env.pageserver.allowed_errors.append(
".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes"
env.pageserver.allowed_errors.extend(
[
".*metrics endpoint refused the sent metrics*",
# we have a fast rate of calculation, these can happen at shutdown
".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
]
)
tenant_id = env.initial_tenant

View File

@@ -0,0 +1,332 @@
import random
from typing import Any, Dict, Optional
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver
from fixtures.remote_storage import RemoteStorageKind
from fixtures.types import TenantId, TimelineId
from fixtures.utils import wait_until
from fixtures.workload import Workload
# A tenant configuration that is convenient for generating uploads and deletions
# without a large amount of postgres traffic.
TENANT_CONF = {
# small checkpointing and compaction targets to ensure we generate many upload operations
"checkpoint_distance": f"{128 * 1024}",
"compaction_target_size": f"{128 * 1024}",
"compaction_threshold": "1",
# no PITR horizon, we specify the horizon when we request on-demand GC
"pitr_interval": "0s",
# disable background compaction and GC. We invoke it manually when we want it to happen.
"gc_period": "0s",
"compaction_period": "0s",
# create image layers eagerly, so that GC can remove some layers
"image_creation_threshold": "1",
}
def evict_random_layers(
rng: random.Random, pageserver: NeonPageserver, tenant_id: TenantId, timeline_id: TimelineId
):
"""
Evict 50% of the layers on a pageserver
"""
timeline_path = pageserver.timeline_dir(tenant_id, timeline_id)
initial_local_layers = sorted(
list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
)
client = pageserver.http_client()
for layer in initial_local_layers:
if "ephemeral" in layer.name or "temp_download" in layer.name:
continue
if rng.choice([True, False]):
log.info(f"Evicting layer {tenant_id}/{timeline_id} {layer.name}")
client.evict_layer(tenant_id=tenant_id, timeline_id=timeline_id, layer_name=layer.name)
@pytest.mark.parametrize("seed", [1, 2, 3])
def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
"""
Issue many location configuration changes, ensure that tenants
remain readable & we don't get any unexpected errors. We should
have no ERROR in the log, and no 500s in the API.
The location_config API is intentionally designed so that all destination
states are valid, so that we may test it in this way: the API should always
work as long as the tenant exists.
"""
neon_env_builder.enable_generations = True
neon_env_builder.num_pageservers = 3
neon_env_builder.enable_pageserver_remote_storage(
remote_storage_kind=RemoteStorageKind.MOCK_S3,
)
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
assert env.attachment_service is not None
pageservers = env.pageservers
list([p.http_client() for p in pageservers])
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
# We will make no effort to avoid stale attachments
for ps in env.pageservers:
ps.allowed_errors.extend(
[
".*Dropped remote consistent LSN updates.*",
".*Dropping stale deletions.*",
# page_service_conn_main{peer_addr=[::1]:41176}: query handler for 'pagestream 3b19aec5038c796f64b430b30a555121 d07776761d44050b8aab511df1657d83' failed: Tenant 3b19aec5038c796f64b430b30a555121 not found
".*query handler.*Tenant.*not found.*",
# page_service_conn_main{peer_addr=[::1]:45552}: query handler for 'pagestream 414ede7ad50f775a8e7d9ba0e43b9efc a43884be16f44b3626482b6981b2c745' failed: Tenant 414ede7ad50f775a8e7d9ba0e43b9efc is not active
".*query handler.*Tenant.*not active.*",
]
)
# these can happen, if we shutdown at a good time. to be fixed as part of #5172.
message = ".*duplicated L1 layer layer=.*"
ps.allowed_errors.append(message)
workload = Workload(env, tenant_id, timeline_id)
workload.init(env.pageservers[0].id)
workload.write_rows(256, env.pageservers[0].id)
# We use a fixed seed to make the test reproducible: we want a randomly
# chosen order, but not to change the order every time we run the test.
rng = random.Random(seed)
initial_generation = 1
last_state = {
env.pageservers[0].id: ("AttachedSingle", initial_generation),
env.pageservers[1].id: ("Detached", None),
env.pageservers[2].id: ("Detached", None),
}
latest_attached = env.pageservers[0].id
for _i in range(0, 64):
# Pick a pageserver
pageserver = rng.choice(env.pageservers)
# Pick a pseudorandom state
modes = [
"AttachedSingle",
"AttachedMulti",
"AttachedStale",
"Secondary",
"Detached",
"_Evictions",
"_Restart",
]
mode = rng.choice(modes)
last_state_ps = last_state[pageserver.id]
if mode == "_Evictions":
if last_state_ps[0].startswith("Attached"):
log.info(f"Action: evictions on pageserver {pageserver.id}")
evict_random_layers(rng, pageserver, tenant_id, timeline_id)
else:
log.info(
f"Action: skipping evictions on pageserver {pageserver.id}, is not attached"
)
elif mode == "_Restart":
log.info(f"Action: restarting pageserver {pageserver.id}")
pageserver.stop()
pageserver.start()
if last_state_ps[0].startswith("Attached") and latest_attached == pageserver.id:
log.info("Entering postgres...")
workload.churn_rows(rng.randint(128, 256), pageserver.id)
workload.validate(pageserver.id)
elif last_state_ps[0].startswith("Attached"):
# The `attachment_service` will only re-attach on startup when a pageserver was the
# holder of the latest generation: otherwise the pageserver will revert to detached
# state if it was running attached with a stale generation
last_state[pageserver.id] = ("Detached", None)
else:
secondary_conf: Optional[Dict[str, Any]] = None
if mode == "Secondary":
secondary_conf = {"warm": rng.choice([True, False])}
location_conf: Dict[str, Any] = {
"mode": mode,
"secondary_conf": secondary_conf,
"tenant_conf": {},
}
log.info(f"Action: Configuring pageserver {pageserver.id} to {location_conf}")
# Select a generation number
if mode.startswith("Attached"):
if last_state_ps[1] is not None:
if rng.choice([True, False]):
# Move between attached states, staying in the same generation
generation = last_state_ps[1]
else:
# Switch generations, while also jumping between attached states
generation = env.attachment_service.attach_hook_issue(
tenant_id, pageserver.id
)
latest_attached = pageserver.id
else:
generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver.id)
latest_attached = pageserver.id
else:
generation = None
location_conf["generation"] = generation
pageserver.tenant_location_configure(tenant_id, location_conf)
last_state[pageserver.id] = (mode, generation)
if mode.startswith("Attached"):
# This is a basic test: we are validating that he endpoint works properly _between_
# configuration changes. A stronger test would be to validate that clients see
# no errors while we are making the changes.
workload.churn_rows(
rng.randint(128, 256), pageserver.id, upload=mode != "AttachedStale"
)
workload.validate(pageserver.id)
# Attach all pageservers
for ps in env.pageservers:
location_conf = {"mode": "AttachedMulti", "secondary_conf": None, "tenant_conf": {}}
ps.tenant_location_configure(tenant_id, location_conf)
# Confirm that all are readable
for ps in env.pageservers:
workload.validate(ps.id)
# Detach all pageservers
for ps in env.pageservers:
location_conf = {"mode": "Detached", "secondary_conf": None, "tenant_conf": {}}
ps.tenant_location_configure(tenant_id, location_conf)
# Confirm that all local disk state was removed on detach
# TODO
def test_live_migration(neon_env_builder: NeonEnvBuilder):
"""
Test the sequence of location states that are used in a live migration.
"""
neon_env_builder.enable_generations = True
neon_env_builder.num_pageservers = 2
neon_env_builder.enable_pageserver_remote_storage(
remote_storage_kind=RemoteStorageKind.MOCK_S3,
)
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
assert env.attachment_service is not None
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
pageserver_a = env.pageservers[0]
pageserver_b = env.pageservers[1]
initial_generation = 1
workload = Workload(env, tenant_id, timeline_id)
workload.init(env.pageservers[0].id)
workload.write_rows(256, env.pageservers[0].id)
# Make the destination a secondary location
pageserver_b.tenant_location_configure(
tenant_id,
{
"mode": "Secondary",
"secondary_conf": {"warm": True},
"tenant_conf": {},
},
)
workload.churn_rows(64, pageserver_a.id, upload=False)
# Set origin attachment to stale
log.info("Setting origin to AttachedStale")
pageserver_a.tenant_location_configure(
tenant_id,
{
"mode": "AttachedStale",
"secondary_conf": None,
"tenant_conf": {},
"generation": initial_generation,
},
flush_ms=5000,
)
migrated_generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver_b.id)
log.info(f"Acquired generation {migrated_generation} for destination pageserver")
assert migrated_generation == initial_generation + 1
# Writes and reads still work in AttachedStale.
workload.validate(pageserver_a.id)
# TODO: call into secondary mode API hooks to do an upload/download sync
# Generate some more dirty writes: we expect the origin to ingest WAL in
# in AttachedStale
workload.churn_rows(64, pageserver_a.id, upload=False)
workload.validate(pageserver_a.id)
# Attach the destination
log.info("Setting destination to AttachedMulti")
pageserver_b.tenant_location_configure(
tenant_id,
{
"mode": "AttachedMulti",
"secondary_conf": None,
"tenant_conf": {},
"generation": migrated_generation,
},
)
# Wait for destination LSN to catch up with origin
origin_lsn = pageserver_a.http_client().timeline_detail(tenant_id, timeline_id)[
"last_record_lsn"
]
def caught_up():
destination_lsn = pageserver_b.http_client().timeline_detail(tenant_id, timeline_id)[
"last_record_lsn"
]
log.info(
f"Waiting for LSN to catch up: origin {origin_lsn} vs destination {destination_lsn}"
)
assert destination_lsn >= origin_lsn
wait_until(100, 0.1, caught_up)
# The destination should accept writes
workload.churn_rows(64, pageserver_b.id)
# Dual attached: both are readable.
workload.validate(pageserver_a.id)
workload.validate(pageserver_b.id)
# Revert the origin to secondary
log.info("Setting origin to Secondary")
pageserver_a.tenant_location_configure(
tenant_id,
{
"mode": "Secondary",
"secondary_conf": {"warm": True},
"tenant_conf": {},
},
)
workload.churn_rows(64, pageserver_b.id)
# Put the destination into final state
pageserver_b.tenant_location_configure(
tenant_id,
{
"mode": "AttachedSingle",
"secondary_conf": None,
"tenant_conf": {},
"generation": migrated_generation,
},
)
workload.churn_rows(64, pageserver_b.id)
workload.validate(pageserver_b.id)

View File

@@ -73,19 +73,20 @@ def test_remote_storage_backup_and_restore(
##### First start, insert data and upload it to the remote storage
env = neon_env_builder.init_start()
# FIXME: Is this expected?
env.pageserver.allowed_errors.append(
".*marking .* as locally complete, while it doesnt exist in remote index.*"
env.pageserver.allowed_errors.extend(
[
# FIXME: Is this expected?
".*marking .* as locally complete, while it doesnt exist in remote index.*",
".*No timelines to attach received.*",
".*Failed to get local tenant state.*",
# FIXME retry downloads without throwing errors
".*failed to load remote timeline.*",
# we have a bunch of pytest.raises for these below
".*tenant .*? already exists, state:.*",
".*tenant directory already exists.*",
".*simulated failure of remote operation.*",
]
)
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
env.pageserver.allowed_errors.append(".*Failed to get local tenant state.*")
# FIXME retry downloads without throwing errors
env.pageserver.allowed_errors.append(".*failed to load remote timeline.*")
# we have a bunch of pytest.raises for these below
env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*")
pageserver_http = env.pageserver.http_client()
endpoint = env.endpoints.create_start("main")

View File

@@ -395,13 +395,13 @@ def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonE
env.start()
pageserver_http = env.pageserver.http_client()
# happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero
env.pageserver.allowed_errors.append(
".*Timeline got dropped without initializing, cleaning its files"
)
# the response hit_pausable_failpoint_and_later_fail
env.pageserver.allowed_errors.append(
f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn"
env.pageserver.allowed_errors.extend(
[
# happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero
".*Timeline got dropped without initializing, cleaning its files",
# the response hit_pausable_failpoint_and_later_fail
f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn",
]
)
env.pageserver.tenant_create(env.initial_tenant)

View File

@@ -307,10 +307,14 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
bogus_timeline_id = TimelineId.generate()
pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0)
# the error will be printed to the log too
env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
# Timelines get stopped during detach, ignore the gc calls that error, witnessing that
env.pageserver.allowed_errors.append(".*InternalServerError\\(timeline is Stopping.*")
env.pageserver.allowed_errors.extend(
[
# the error will be printed to the log too
".*gc target timeline does not exist.*",
# Timelines get stopped during detach, ignore the gc calls that error, witnessing that
".*InternalServerError\\(timeline is Stopping.*",
]
)
# Detach while running manual GC.
# It should wait for manual GC to finish because it runs in a task associated with the tenant.

View File

@@ -216,16 +216,17 @@ def test_tenant_relocation(
tenant_id = TenantId("74ee8b079a0e437eb0afea7d26a07209")
# FIXME: Is this expected?
env.pageservers[0].allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
env.pageservers[0].allowed_errors.extend(
[
# FIXME: Is this expected?
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
# Needed for detach polling on the original pageserver
f".*NotFound: tenant {tenant_id}.*",
# We will dual-attach in this test, so stale generations are expected
".*Dropped remote consistent LSN updates.*",
]
)
# Needed for detach polling on the original pageserver
env.pageservers[0].allowed_errors.append(f".*NotFound: tenant {tenant_id}.*")
# We will dual-attach in this test, so stale generations are expected
env.pageservers[0].allowed_errors.append(".*Dropped remote consistent LSN updates.*")
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
# we use two branches to check that they are both relocated

View File

@@ -117,10 +117,12 @@ def test_tenants_attached_after_download(neon_env_builder: NeonEnvBuilder):
##### First start, insert secret data and upload it to the remote storage
env = neon_env_builder.init_start()
# FIXME: Are these expected?
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
env.pageserver.allowed_errors.append(
".*marking .* as locally complete, while it doesnt exist in remote index.*"
env.pageserver.allowed_errors.extend(
[
# FIXME: Are these expected?
".*No timelines to attach received.*",
".*marking .* as locally complete, while it doesnt exist in remote index.*",
]
)
pageserver_http = env.pageserver.http_client()
@@ -218,13 +220,14 @@ def test_tenant_redownloads_truncated_file_on_startup(
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
env.pageserver.allowed_errors.append(".*removing local file .* because .*")
# FIXME: Are these expected?
env.pageserver.allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
env.pageserver.allowed_errors.extend(
[
".*removing local file .* because .*",
# FIXME: Are these expected?
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
".*No timelines to attach received.*",
]
)
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
pageserver_http = env.pageserver.http_client()
endpoint = env.endpoints.create_start("main")

View File

@@ -36,12 +36,13 @@ def test_threshold_based_eviction(
".*metrics_collection:.* upload consumption_metrics (still failed|failed, will retry).*"
)
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.append(metrics_refused_log_line)
# these can happen whenever we run consumption metrics collection
env.pageserver.allowed_errors.append(r".*failed to calculate logical size at \S+: cancelled")
env.pageserver.allowed_errors.append(
r".*failed to calculate synthetic size for tenant \S+: failed to calculate some logical_sizes"
env.pageserver.allowed_errors.extend(
[
metrics_refused_log_line,
# these can happen whenever we run consumption metrics collection
r".*failed to calculate logical size at \S+: cancelled",
r".*failed to calculate synthetic size for tenant \S+: failed to calculate some logical_sizes",
]
)
tenant_id, timeline_id = env.initial_tenant, env.initial_timeline

View File

@@ -39,10 +39,14 @@ from urllib3.util.retry import Retry
def test_timeline_delete(neon_simple_env: NeonEnv):
env = neon_simple_env
env.pageserver.allowed_errors.append(".*Timeline .* was not found.*")
env.pageserver.allowed_errors.append(".*timeline not found.*")
env.pageserver.allowed_errors.append(".*Cannot delete timeline which has child timelines.*")
env.pageserver.allowed_errors.append(".*Precondition failed: Requested tenant is missing.*")
env.pageserver.allowed_errors.extend(
[
".*Timeline .* was not found.*",
".*timeline not found.*",
".*Cannot delete timeline which has child timelines.*",
".*Precondition failed: Requested tenant is missing.*",
]
)
ps_http = env.pageserver.http_client()
@@ -198,22 +202,22 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
),
)
env.pageserver.allowed_errors.append(f".*{timeline_id}.*failpoint: {failpoint}")
# It appears when we stopped flush loop during deletion and then pageserver is stopped
env.pageserver.allowed_errors.append(
".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
env.pageserver.allowed_errors.extend(
[
f".*{timeline_id}.*failpoint: {failpoint}",
# It appears when we stopped flush loop during deletion and then pageserver is stopped
".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
# This happens when we fail before scheduling background operation.
# Timeline is left in stopping state and retry tries to stop it again.
".*Ignoring new state, equal to the existing one: Stopping",
# This happens when we retry delete requests for broken timelines
".*Ignoring state update Stopping for broken timeline",
# This happens when timeline remains are cleaned up during loading
".*Timeline dir entry become invalid.*",
# In one of the branches we poll for tenant to become active. Polls can generate this log message:
f".*Tenant {env.initial_tenant} is not active*",
]
)
# This happens when we fail before scheduling background operation.
# Timeline is left in stopping state and retry tries to stop it again.
env.pageserver.allowed_errors.append(
".*Ignoring new state, equal to the existing one: Stopping"
)
# This happens when we retry delete requests for broken timelines
env.pageserver.allowed_errors.append(".*Ignoring state update Stopping for broken timeline")
# This happens when timeline remains are cleaned up during loading
env.pageserver.allowed_errors.append(".*Timeline dir entry become invalid.*")
# In one of the branches we poll for tenant to become active. Polls can generate this log message:
env.pageserver.allowed_errors.append(f".*Tenant {env.initial_tenant} is not active*")
ps_http.configure_failpoints((failpoint, "return"))
@@ -398,13 +402,13 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.append(".*failpoint: timeline-delete-before-rm")
env.pageserver.allowed_errors.append(
".*Ignoring new state, equal to the existing one: Stopping"
)
# this happens, because the stuck timeline is visible to shutdown
env.pageserver.allowed_errors.append(
".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
env.pageserver.allowed_errors.extend(
[
".*failpoint: timeline-delete-before-rm",
".*Ignoring new state, equal to the existing one: Stopping",
# this happens, because the stuck timeline is visible to shutdown
".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
]
)
ps_http = env.pageserver.http_client()
@@ -551,10 +555,12 @@ def test_concurrent_timeline_delete_stuck_on(
with pytest.raises(PageserverApiException, match=error_msg_re) as second_call_err:
ps_http.timeline_delete(env.initial_tenant, child_timeline_id)
assert second_call_err.value.status_code == 409
env.pageserver.allowed_errors.append(f".*{child_timeline_id}.*{error_msg_re}.*")
# the second call will try to transition the timeline into Stopping state as well
env.pageserver.allowed_errors.append(
f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping"
env.pageserver.allowed_errors.extend(
[
f".*{child_timeline_id}.*{error_msg_re}.*",
# the second call will try to transition the timeline into Stopping state as well
f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping",
]
)
log.info("second call failed as expected")