mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-04 14:00:38 +00:00
Merge pull request #6121 from neondatabase/releases/2023-12-13
Release 2023-12-13
This commit is contained in:
77
Cargo.lock
generated
77
Cargo.lock
generated
@@ -184,7 +184,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35"
|
||||
dependencies = [
|
||||
"concurrent-queue",
|
||||
"event-listener",
|
||||
"event-listener 2.5.3",
|
||||
"futures-core",
|
||||
]
|
||||
|
||||
@@ -205,11 +205,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "async-lock"
|
||||
version = "2.8.0"
|
||||
version = "3.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b"
|
||||
checksum = "7125e42787d53db9dd54261812ef17e937c95a51e4d291373b670342fa44310c"
|
||||
dependencies = [
|
||||
"event-listener",
|
||||
"event-listener 4.0.0",
|
||||
"event-listener-strategy",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -692,9 +694,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "azure_core"
|
||||
version = "0.16.0"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e29286b9edfdd6f2c7e9d970bb5b015df8621258acab9ecfcea09b2d7692467"
|
||||
checksum = "a6218987c374650fdad0b476bfc675729762c28dfb35f58608a38a2b1ea337dd"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.21.1",
|
||||
@@ -702,8 +704,10 @@ dependencies = [
|
||||
"dyn-clone",
|
||||
"futures",
|
||||
"getrandom 0.2.11",
|
||||
"hmac",
|
||||
"http-types",
|
||||
"log",
|
||||
"once_cell",
|
||||
"paste",
|
||||
"pin-project",
|
||||
"quick-xml",
|
||||
@@ -712,6 +716,7 @@ dependencies = [
|
||||
"rustc_version",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"time",
|
||||
"url",
|
||||
"uuid",
|
||||
@@ -719,9 +724,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "azure_identity"
|
||||
version = "0.16.2"
|
||||
version = "0.18.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b67b337346da8739e91ea1e9400a6ebc9bc54e0b2af1d23c9bcd565950588f9"
|
||||
checksum = "9e1eacc4f7fb2a73d57c39139d0fc3aed78435606055779ddaef4b43cdf919a8"
|
||||
dependencies = [
|
||||
"async-lock",
|
||||
"async-trait",
|
||||
@@ -731,7 +736,6 @@ dependencies = [
|
||||
"oauth2",
|
||||
"pin-project",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"time",
|
||||
"tz-rs",
|
||||
"url",
|
||||
@@ -740,21 +744,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "azure_storage"
|
||||
version = "0.16.0"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bed0ccefde57930b2886fd4aed1f70ac469c197b8c2e94828290d71bcbdb5d97"
|
||||
checksum = "ade8f2653e408de88b9eafec9f48c3c26b94026375e88adbd34523a7dd9795a1"
|
||||
dependencies = [
|
||||
"RustyXML",
|
||||
"async-lock",
|
||||
"async-trait",
|
||||
"azure_core",
|
||||
"bytes",
|
||||
"futures",
|
||||
"hmac",
|
||||
"log",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"time",
|
||||
"url",
|
||||
"uuid",
|
||||
@@ -762,13 +763,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "azure_storage_blobs"
|
||||
version = "0.16.0"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f91a52da2d192cfe43759f61e8bb31a5969f1722d5b85ac89627f356ad674ab4"
|
||||
checksum = "025701c7cc5b523100f0f3b2b01723564ec5a86c03236521c06826337047e872"
|
||||
dependencies = [
|
||||
"RustyXML",
|
||||
"azure_core",
|
||||
"azure_storage",
|
||||
"azure_svc_blobstorage",
|
||||
"bytes",
|
||||
"futures",
|
||||
"log",
|
||||
@@ -780,6 +782,22 @@ dependencies = [
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "azure_svc_blobstorage"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "76051e5bb67cea1055abe5e530a0878feac7e0ab4cbbcb4a6adc953a58993389"
|
||||
dependencies = [
|
||||
"azure_core",
|
||||
"bytes",
|
||||
"futures",
|
||||
"log",
|
||||
"once_cell",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.67"
|
||||
@@ -1686,6 +1704,27 @@ version = "2.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
|
||||
|
||||
[[package]]
|
||||
name = "event-listener"
|
||||
version = "4.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "770d968249b5d99410d61f5bf89057f3199a077a04d087092f58e7d10692baae"
|
||||
dependencies = [
|
||||
"concurrent-queue",
|
||||
"parking",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "event-listener-strategy"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "958e4d70b6d5e81971bebec42271ec641e7ff4e170a6fa605f2b8a8b65cb97d3"
|
||||
dependencies = [
|
||||
"event-listener 4.0.0",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fail"
|
||||
version = "0.5.1"
|
||||
@@ -3678,9 +3717,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.30.0"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
|
||||
checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"serde",
|
||||
|
||||
@@ -38,10 +38,10 @@ license = "Apache-2.0"
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
arc-swap = "1.6"
|
||||
async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
|
||||
azure_core = "0.16"
|
||||
azure_identity = "0.16"
|
||||
azure_storage = "0.16"
|
||||
azure_storage_blobs = "0.16"
|
||||
azure_core = "0.18"
|
||||
azure_identity = "0.18"
|
||||
azure_storage = "0.18"
|
||||
azure_storage_blobs = "0.18"
|
||||
flate2 = "1.0.26"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
|
||||
@@ -168,7 +168,7 @@ fn print_timelines_tree(
|
||||
info: t.clone(),
|
||||
children: BTreeSet::new(),
|
||||
name: timeline_name_mappings
|
||||
.remove(&TenantTimelineId::new(t.tenant_id, t.timeline_id)),
|
||||
.remove(&TenantTimelineId::new(t.tenant_id.tenant_id, t.timeline_id)),
|
||||
},
|
||||
)
|
||||
})
|
||||
|
||||
@@ -165,7 +165,7 @@ pub fn migrate_tenant(
|
||||
let found = other_ps_tenants
|
||||
.into_iter()
|
||||
.map(|t| t.id)
|
||||
.any(|i| i == tenant_id);
|
||||
.any(|i| i.tenant_id == tenant_id);
|
||||
if !found {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -357,7 +357,7 @@ pub enum TenantAttachmentStatus {
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TenantInfo {
|
||||
pub id: TenantId,
|
||||
pub id: TenantShardId,
|
||||
// NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
|
||||
pub state: TenantState,
|
||||
/// Sum of the size of all layer files.
|
||||
@@ -369,7 +369,7 @@ pub struct TenantInfo {
|
||||
/// This represents the output of the "timeline_detail" and "timeline_list" API calls.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineInfo {
|
||||
pub tenant_id: TenantId,
|
||||
pub tenant_id: TenantShardId,
|
||||
pub timeline_id: TimelineId,
|
||||
|
||||
pub ancestor_timeline_id: Option<TimelineId>,
|
||||
@@ -385,6 +385,9 @@ pub struct TimelineInfo {
|
||||
/// The LSN that we are advertizing to safekeepers
|
||||
pub remote_consistent_lsn_visible: Lsn,
|
||||
|
||||
/// The LSN from the start of the root timeline (never changes)
|
||||
pub initdb_lsn: Lsn,
|
||||
|
||||
pub current_logical_size: u64,
|
||||
pub current_logical_size_is_accurate: bool,
|
||||
|
||||
@@ -823,7 +826,7 @@ mod tests {
|
||||
fn test_tenantinfo_serde() {
|
||||
// Test serialization/deserialization of TenantInfo
|
||||
let original_active = TenantInfo {
|
||||
id: TenantId::generate(),
|
||||
id: TenantShardId::unsharded(TenantId::generate()),
|
||||
state: TenantState::Active,
|
||||
current_physical_size: Some(42),
|
||||
attachment_status: TenantAttachmentStatus::Attached,
|
||||
@@ -840,7 +843,7 @@ mod tests {
|
||||
});
|
||||
|
||||
let original_broken = TenantInfo {
|
||||
id: TenantId::generate(),
|
||||
id: TenantShardId::unsharded(TenantId::generate()),
|
||||
state: TenantState::Broken {
|
||||
reason: "reason".into(),
|
||||
backtrace: "backtrace info".into(),
|
||||
|
||||
@@ -76,6 +76,11 @@ impl TenantShardId {
|
||||
pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
|
||||
ShardSlug(self)
|
||||
}
|
||||
|
||||
/// Convenience for code that has special behavior on the 0th shard.
|
||||
pub fn is_zero(&self) -> bool {
|
||||
self.shard_number == ShardNumber(0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Formatting helper
|
||||
|
||||
@@ -271,17 +271,12 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
|
||||
let mut builder = blob_client.get();
|
||||
|
||||
if let Some(end_exclusive) = end_exclusive {
|
||||
builder = builder.range(Range::new(start_inclusive, end_exclusive));
|
||||
let range: Range = if let Some(end_exclusive) = end_exclusive {
|
||||
(start_inclusive..end_exclusive).into()
|
||||
} else {
|
||||
// Open ranges are not supported by the SDK so we work around
|
||||
// by setting the upper limit extremely high (but high enough
|
||||
// to still be representable by signed 64 bit integers).
|
||||
// TODO remove workaround once the SDK adds open range support
|
||||
// https://github.com/Azure/azure-sdk-for-rust/issues/1438
|
||||
let end_exclusive = u64::MAX / 4;
|
||||
builder = builder.range(Range::new(start_inclusive, end_exclusive));
|
||||
}
|
||||
(start_inclusive..).into()
|
||||
};
|
||||
builder = builder.range(range);
|
||||
|
||||
self.download_for_builder(builder).await
|
||||
}
|
||||
|
||||
@@ -30,18 +30,32 @@ async fn warn_if_stuck<Fut: std::future::Future>(
|
||||
|
||||
let mut fut = std::pin::pin!(fut);
|
||||
|
||||
loop {
|
||||
let mut warned = false;
|
||||
let ret = loop {
|
||||
match tokio::time::timeout(warn_period, &mut fut).await {
|
||||
Ok(ret) => return ret,
|
||||
Ok(ret) => break ret,
|
||||
Err(_) => {
|
||||
tracing::warn!(
|
||||
gate = name,
|
||||
elapsed_ms = started.elapsed().as_millis(),
|
||||
"still waiting, taking longer than expected..."
|
||||
);
|
||||
warned = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// If we emitted a warning for slowness, also emit a message when we complete, so that
|
||||
// someone debugging a shutdown can know for sure whether we have moved past this operation.
|
||||
if warned {
|
||||
tracing::info!(
|
||||
gate = name,
|
||||
elapsed_ms = started.elapsed().as_millis(),
|
||||
"completed, after taking longer than expected"
|
||||
)
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -269,12 +269,18 @@ async fn calculate_synthetic_size_worker(
|
||||
}
|
||||
};
|
||||
|
||||
for (tenant_id, tenant_state) in tenants {
|
||||
for (tenant_shard_id, tenant_state) in tenants {
|
||||
if tenant_state != TenantState::Active {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Ok(tenant) = mgr::get_tenant(tenant_id, true) {
|
||||
if !tenant_shard_id.is_zero() {
|
||||
// We only send consumption metrics from shard 0, so don't waste time calculating
|
||||
// synthetic size on other shards.
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Ok(tenant) = mgr::get_tenant(tenant_shard_id, true) {
|
||||
// TODO should we use concurrent_background_tasks_rate_limit() here, like the other background tasks?
|
||||
// We can put in some prioritization for consumption metrics.
|
||||
// Same for the loop that fetches computed metrics.
|
||||
@@ -291,7 +297,9 @@ async fn calculate_synthetic_size_worker(
|
||||
);
|
||||
|
||||
if !is_cancelled {
|
||||
error!("failed to calculate synthetic size for tenant {tenant_id}: {e:#}");
|
||||
error!(
|
||||
"failed to calculate synthetic size for tenant {tenant_shard_id}: {e:#}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogi
|
||||
use chrono::{DateTime, Utc};
|
||||
use consumption_metrics::EventType;
|
||||
use futures::stream::StreamExt;
|
||||
use pageserver_api::shard::ShardNumber;
|
||||
use std::{sync::Arc, time::SystemTime};
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
@@ -198,12 +197,12 @@ pub(super) async fn collect_all_metrics(
|
||||
};
|
||||
|
||||
let tenants = futures::stream::iter(tenants).filter_map(|(id, state)| async move {
|
||||
if state != TenantState::Active {
|
||||
if state != TenantState::Active || !id.is_zero() {
|
||||
None
|
||||
} else {
|
||||
crate::tenant::mgr::get_tenant(id, true)
|
||||
.ok()
|
||||
.map(|tenant| (id, tenant))
|
||||
.map(|tenant| (id.tenant_id, tenant))
|
||||
}
|
||||
});
|
||||
|
||||
@@ -229,11 +228,6 @@ where
|
||||
while let Some((tenant_id, tenant)) = tenants.next().await {
|
||||
let mut tenant_resident_size = 0;
|
||||
|
||||
// Sharded tenants report all consumption metrics from shard zero
|
||||
if tenant.tenant_shard_id().shard_number != ShardNumber(0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for timeline in tenant.list_timelines() {
|
||||
let timeline_id = timeline.timeline_id;
|
||||
|
||||
|
||||
@@ -84,7 +84,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
get:
|
||||
description: Get tenant status
|
||||
responses:
|
||||
@@ -181,7 +180,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
get:
|
||||
description: Get timelines for tenant
|
||||
responses:
|
||||
@@ -232,7 +230,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: timeline_id
|
||||
in: path
|
||||
required: true
|
||||
@@ -338,7 +335,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: timeline_id
|
||||
in: path
|
||||
required: true
|
||||
@@ -401,7 +397,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: timeline_id
|
||||
in: path
|
||||
required: true
|
||||
@@ -469,7 +464,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: timeline_id
|
||||
in: path
|
||||
required: true
|
||||
@@ -523,7 +517,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: |
|
||||
Schedules attach operation to happen in the background for the given tenant.
|
||||
@@ -631,7 +624,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: flush_ms
|
||||
in: query
|
||||
required: false
|
||||
@@ -724,7 +716,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: detach_ignored
|
||||
in: query
|
||||
required: false
|
||||
@@ -784,7 +775,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: |
|
||||
Remove tenant data (including all corresponding timelines) from pageserver's memory.
|
||||
@@ -833,7 +823,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: |
|
||||
Schedules an operation that attempts to load a tenant from the local disk and
|
||||
@@ -890,7 +879,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
get:
|
||||
description: |
|
||||
Calculate tenant's synthetic size
|
||||
@@ -933,7 +921,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: inputs_only
|
||||
in: query
|
||||
required: false
|
||||
@@ -1003,7 +990,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: |
|
||||
Create a timeline. Returns new timeline id on success.\
|
||||
@@ -1137,7 +1123,6 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
"400":
|
||||
description: Malformed tenant create request
|
||||
content:
|
||||
@@ -1234,7 +1219,6 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
get:
|
||||
description: |
|
||||
Returns tenant's config description: specific config overrides a tenant has
|
||||
@@ -1340,7 +1324,6 @@ components:
|
||||
properties:
|
||||
new_tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
generation:
|
||||
type: integer
|
||||
description: Attachment generation number.
|
||||
@@ -1369,7 +1352,6 @@ components:
|
||||
properties:
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
TenantLocationConfigRequest:
|
||||
type: object
|
||||
required:
|
||||
@@ -1377,7 +1359,6 @@ components:
|
||||
properties:
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
mode:
|
||||
type: string
|
||||
enum: ["AttachedSingle", "AttachedMulti", "AttachedStale", "Secondary", "Detached"]
|
||||
@@ -1446,7 +1427,6 @@ components:
|
||||
format: hex
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
last_record_lsn:
|
||||
type: string
|
||||
format: hex
|
||||
|
||||
@@ -319,6 +319,7 @@ async fn build_timeline_info_common(
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<TimelineInfo> {
|
||||
crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
let initdb_lsn = timeline.initdb_lsn;
|
||||
let last_record_lsn = timeline.get_last_record_lsn();
|
||||
let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
|
||||
let guard = timeline.last_received_wal.lock().unwrap();
|
||||
@@ -352,14 +353,14 @@ async fn build_timeline_info_common(
|
||||
let walreceiver_status = timeline.walreceiver_status();
|
||||
|
||||
let info = TimelineInfo {
|
||||
// TODO(sharding): add a shard_id field, or make tenant_id into a tenant_shard_id
|
||||
tenant_id: timeline.tenant_shard_id.tenant_id,
|
||||
tenant_id: timeline.tenant_shard_id,
|
||||
timeline_id: timeline.timeline_id,
|
||||
ancestor_timeline_id,
|
||||
ancestor_lsn,
|
||||
disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
|
||||
remote_consistent_lsn: remote_consistent_lsn_projected,
|
||||
remote_consistent_lsn_visible,
|
||||
initdb_lsn,
|
||||
last_record_lsn,
|
||||
prev_record_lsn: Some(timeline.get_prev_record_lsn()),
|
||||
latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
|
||||
@@ -480,15 +481,15 @@ async fn timeline_list_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let include_non_incremental_logical_size: Option<bool> =
|
||||
parse_query_param(&request, "include-non-incremental-logical-size")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
|
||||
let response_data = async {
|
||||
let tenant = mgr::get_tenant(tenant_id, true)?;
|
||||
let tenant = mgr::get_tenant(tenant_shard_id, true)?;
|
||||
let timelines = tenant.list_timelines();
|
||||
|
||||
let mut response_data = Vec::with_capacity(timelines.len());
|
||||
@@ -507,7 +508,9 @@ async fn timeline_list_handler(
|
||||
}
|
||||
Ok::<Vec<TimelineInfo>, ApiError>(response_data)
|
||||
}
|
||||
.instrument(info_span!("timeline_list", %tenant_id))
|
||||
.instrument(info_span!("timeline_list",
|
||||
tenant_id = %tenant_shard_id.tenant_id,
|
||||
shard_id = %tenant_shard_id.shard_slug()))
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::OK, response_data)
|
||||
@@ -517,17 +520,17 @@ async fn timeline_detail_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let include_non_incremental_logical_size: Option<bool> =
|
||||
parse_query_param(&request, "include-non-incremental-logical-size")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
// Logical size calculation needs downloading.
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
|
||||
let timeline_info = async {
|
||||
let tenant = mgr::get_tenant(tenant_id, true)?;
|
||||
let tenant = mgr::get_tenant(tenant_shard_id, true)?;
|
||||
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, false)
|
||||
@@ -544,7 +547,10 @@ async fn timeline_detail_handler(
|
||||
|
||||
Ok::<_, ApiError>(timeline_info)
|
||||
}
|
||||
.instrument(info_span!("timeline_detail", %tenant_id, %timeline_id))
|
||||
.instrument(info_span!("timeline_detail",
|
||||
tenant_id = %tenant_shard_id.tenant_id,
|
||||
shard_id = %tenant_shard_id.shard_slug(),
|
||||
%timeline_id))
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::OK, timeline_info)
|
||||
@@ -554,8 +560,15 @@ async fn get_lsn_by_timestamp_handler(
|
||||
request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
if !tenant_shard_id.is_zero() {
|
||||
// Requires SLRU contents, which are only stored on shard zero
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"Size calculations are only available on shard zero"
|
||||
)));
|
||||
}
|
||||
|
||||
let version: Option<u8> = parse_query_param(&request, "version")?;
|
||||
|
||||
@@ -567,7 +580,7 @@ async fn get_lsn_by_timestamp_handler(
|
||||
let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
|
||||
let result = timeline
|
||||
.find_lsn_for_timestamp(timestamp_pg, &cancel, &ctx)
|
||||
.await?;
|
||||
@@ -602,8 +615,15 @@ async fn get_timestamp_of_lsn_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
if !tenant_shard_id.is_zero() {
|
||||
// Requires SLRU contents, which are only stored on shard zero
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"Size calculations are only available on shard zero"
|
||||
)));
|
||||
}
|
||||
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
|
||||
@@ -613,7 +633,7 @@ async fn get_timestamp_of_lsn_handler(
|
||||
.map_err(ApiError::BadRequest)?;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
|
||||
let result = timeline.get_timestamp_for_lsn(lsn, &ctx).await?;
|
||||
|
||||
match result {
|
||||
@@ -805,11 +825,11 @@ async fn tenant_status(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let tenant_info = async {
|
||||
let tenant = mgr::get_tenant(tenant_id, false)?;
|
||||
let tenant = mgr::get_tenant(tenant_shard_id, false)?;
|
||||
|
||||
// Calculate total physical size of all timelines
|
||||
let mut current_physical_size = 0;
|
||||
@@ -819,13 +839,15 @@ async fn tenant_status(
|
||||
|
||||
let state = tenant.current_state();
|
||||
Result::<_, ApiError>::Ok(TenantInfo {
|
||||
id: tenant_id,
|
||||
id: tenant_shard_id,
|
||||
state: state.clone(),
|
||||
current_physical_size: Some(current_physical_size),
|
||||
attachment_status: state.attachment_status(),
|
||||
})
|
||||
}
|
||||
.instrument(info_span!("tenant_status_handler", %tenant_id))
|
||||
.instrument(info_span!("tenant_status_handler",
|
||||
tenant_id = %tenant_shard_id.tenant_id,
|
||||
shard_id = %tenant_shard_id.shard_slug()))
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::OK, tenant_info)
|
||||
@@ -868,14 +890,20 @@ async fn tenant_size_handler(
|
||||
request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
let inputs_only: Option<bool> = parse_query_param(&request, "inputs_only")?;
|
||||
let retention_period: Option<u64> = parse_query_param(&request, "retention_period")?;
|
||||
let headers = request.headers();
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let tenant = mgr::get_tenant(tenant_id, true)?;
|
||||
let tenant = mgr::get_tenant(tenant_shard_id, true)?;
|
||||
|
||||
if !tenant_shard_id.is_zero() {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"Size calculations are only available on shard zero"
|
||||
)));
|
||||
}
|
||||
|
||||
// this can be long operation
|
||||
let inputs = tenant
|
||||
@@ -927,7 +955,7 @@ async fn tenant_size_handler(
|
||||
json_response(
|
||||
StatusCode::OK,
|
||||
TenantHistorySize {
|
||||
id: tenant_id,
|
||||
id: tenant_shard_id.tenant_id,
|
||||
size: sizes.as_ref().map(|x| x.total_size),
|
||||
segment_sizes: sizes.map(|x| x.segments),
|
||||
inputs,
|
||||
@@ -939,14 +967,14 @@ async fn layer_map_info_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let reset: LayerAccessStatsReset =
|
||||
parse_query_param(&request, "reset")?.unwrap_or(LayerAccessStatsReset::NoReset);
|
||||
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
|
||||
let layer_map_info = timeline.layer_map_info(reset).await;
|
||||
|
||||
json_response(StatusCode::OK, layer_map_info)
|
||||
@@ -956,13 +984,12 @@ async fn layer_download_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let layer_file_name = get_request_param(&request, "layer_file_name")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
|
||||
let downloaded = timeline
|
||||
.download_layer(layer_file_name)
|
||||
.await
|
||||
@@ -973,7 +1000,7 @@ async fn layer_download_handler(
|
||||
Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
|
||||
None => json_response(
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
|
||||
format!("Layer {tenant_shard_id}/{timeline_id}/{layer_file_name} not found"),
|
||||
),
|
||||
}
|
||||
}
|
||||
@@ -982,12 +1009,12 @@ async fn evict_timeline_layer_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let layer_file_name = get_request_param(&request, "layer_file_name")?;
|
||||
|
||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
|
||||
let evicted = timeline
|
||||
.evict_layer(layer_file_name)
|
||||
.await
|
||||
@@ -998,7 +1025,7 @@ async fn evict_timeline_layer_handler(
|
||||
Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
|
||||
None => json_response(
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
|
||||
format!("Layer {tenant_shard_id}/{timeline_id}/{layer_file_name} not found"),
|
||||
),
|
||||
}
|
||||
}
|
||||
@@ -1130,10 +1157,10 @@ async fn get_tenant_config_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let tenant = mgr::get_tenant(tenant_id, false)?;
|
||||
let tenant = mgr::get_tenant(tenant_shard_id, false)?;
|
||||
|
||||
let response = HashMap::from([
|
||||
(
|
||||
@@ -1227,9 +1254,9 @@ async fn handle_tenant_break(
|
||||
r: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&r, "tenant_id")?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&r, "tenant_shard_id")?;
|
||||
|
||||
let tenant = crate::tenant::mgr::get_tenant(tenant_id, true)
|
||||
let tenant = crate::tenant::mgr::get_tenant(tenant_shard_id, true)
|
||||
.map_err(|_| ApiError::Conflict(String::from("no active tenant found")))?;
|
||||
|
||||
tenant.set_broken("broken from test".to_owned()).await;
|
||||
@@ -1270,14 +1297,15 @@ async fn timeline_gc_handler(
|
||||
mut request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let gc_req: TimelineGcRequest = json_request(&mut request).await?;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let wait_task_done = mgr::immediate_gc(tenant_id, timeline_id, gc_req, cancel, &ctx).await?;
|
||||
let wait_task_done =
|
||||
mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx).await?;
|
||||
let gc_result = wait_task_done
|
||||
.await
|
||||
.context("wait for gc task")
|
||||
@@ -1292,9 +1320,9 @@ async fn timeline_compact_handler(
|
||||
request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let mut flags = EnumSet::empty();
|
||||
if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
|
||||
@@ -1302,14 +1330,14 @@ async fn timeline_compact_handler(
|
||||
}
|
||||
async {
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
|
||||
timeline
|
||||
.compact(&cancel, flags, &ctx)
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(e.into()))?;
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
.instrument(info_span!("manual_compaction", %tenant_id, %timeline_id))
|
||||
.instrument(info_span!("manual_compaction", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -1318,9 +1346,9 @@ async fn timeline_checkpoint_handler(
|
||||
request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let mut flags = EnumSet::empty();
|
||||
if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
|
||||
@@ -1328,7 +1356,7 @@ async fn timeline_checkpoint_handler(
|
||||
}
|
||||
async {
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
|
||||
timeline
|
||||
.freeze_and_flush()
|
||||
.await
|
||||
@@ -1340,7 +1368,7 @@ async fn timeline_checkpoint_handler(
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
.instrument(info_span!("manual_checkpoint", %tenant_id, %timeline_id))
|
||||
.instrument(info_span!("manual_checkpoint", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -1348,12 +1376,12 @@ async fn timeline_download_remote_layers_handler_post(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let body: DownloadRemoteLayersTaskSpawnRequest = json_request(&mut request).await?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
|
||||
match timeline.spawn_download_all_remote_layers(body).await {
|
||||
Ok(st) => json_response(StatusCode::ACCEPTED, st),
|
||||
Err(st) => json_response(StatusCode::CONFLICT, st),
|
||||
@@ -1364,11 +1392,11 @@ async fn timeline_download_remote_layers_handler_get(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
|
||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
|
||||
let info = timeline
|
||||
.get_download_all_remote_layers_task_info()
|
||||
.context("task never started since last pageserver process start")
|
||||
@@ -1414,9 +1442,9 @@ async fn getpage_at_lsn_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
struct Key(crate::repository::Key);
|
||||
|
||||
@@ -1435,7 +1463,7 @@ async fn getpage_at_lsn_handler(
|
||||
|
||||
async {
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
|
||||
|
||||
let page = timeline.get(key.0, lsn, &ctx).await?;
|
||||
|
||||
@@ -1447,7 +1475,7 @@ async fn getpage_at_lsn_handler(
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
.instrument(info_span!("timeline_get", %tenant_id, %timeline_id))
|
||||
.instrument(info_span!("timeline_get", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -1455,9 +1483,9 @@ async fn timeline_collect_keyspace(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
struct Partitioning {
|
||||
keys: crate::keyspace::KeySpace,
|
||||
@@ -1526,7 +1554,7 @@ async fn timeline_collect_keyspace(
|
||||
|
||||
async {
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||
let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
|
||||
let at_lsn = at_lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
|
||||
let keys = timeline
|
||||
.collect_keyspace(at_lsn, &ctx)
|
||||
@@ -1535,15 +1563,15 @@ async fn timeline_collect_keyspace(
|
||||
|
||||
json_response(StatusCode::OK, Partitioning { keys, at_lsn })
|
||||
}
|
||||
.instrument(info_span!("timeline_collect_keyspace", %tenant_id, %timeline_id))
|
||||
.instrument(info_span!("timeline_collect_keyspace", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
|
||||
.await
|
||||
}
|
||||
|
||||
async fn active_timeline_of_active_tenant(
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<Arc<Timeline>, ApiError> {
|
||||
let tenant = mgr::get_tenant(tenant_id, true)?;
|
||||
let tenant = mgr::get_tenant(tenant_shard_id, true)?;
|
||||
tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
.map_err(|e| ApiError::NotFound(e.into()))
|
||||
@@ -1820,23 +1848,25 @@ pub fn make_router(
|
||||
})
|
||||
.get("/v1/tenant", |r| api_handler(r, tenant_list_handler))
|
||||
.post("/v1/tenant", |r| api_handler(r, tenant_create_handler))
|
||||
.get("/v1/tenant/:tenant_id", |r| api_handler(r, tenant_status))
|
||||
.get("/v1/tenant/:tenant_shard_id", |r| {
|
||||
api_handler(r, tenant_status)
|
||||
})
|
||||
.delete("/v1/tenant/:tenant_shard_id", |r| {
|
||||
api_handler(r, tenant_delete_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/synthetic_size", |r| {
|
||||
.get("/v1/tenant/:tenant_shard_id/synthetic_size", |r| {
|
||||
api_handler(r, tenant_size_handler)
|
||||
})
|
||||
.put("/v1/tenant/config", |r| {
|
||||
api_handler(r, update_tenant_config_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/config", |r| {
|
||||
.get("/v1/tenant/:tenant_shard_id/config", |r| {
|
||||
api_handler(r, get_tenant_config_handler)
|
||||
})
|
||||
.put("/v1/tenant/:tenant_shard_id/location_config", |r| {
|
||||
api_handler(r, put_tenant_location_config_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline", |r| {
|
||||
.get("/v1/tenant/:tenant_shard_id/timeline", |r| {
|
||||
api_handler(r, timeline_list_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_shard_id/timeline", |r| {
|
||||
@@ -1857,47 +1887,50 @@ pub fn make_router(
|
||||
.post("/v1/tenant/:tenant_id/ignore", |r| {
|
||||
api_handler(r, tenant_ignore_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
|
||||
.get("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
|
||||
api_handler(r, timeline_detail_handler)
|
||||
})
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/get_lsn_by_timestamp",
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_lsn_by_timestamp",
|
||||
|r| api_handler(r, get_lsn_by_timestamp_handler),
|
||||
)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/get_timestamp_of_lsn",
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_timestamp_of_lsn",
|
||||
|r| api_handler(r, get_timestamp_of_lsn_handler),
|
||||
)
|
||||
.put("/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc", |r| {
|
||||
api_handler(r, timeline_gc_handler)
|
||||
})
|
||||
.put("/v1/tenant/:tenant_id/timeline/:timeline_id/compact", |r| {
|
||||
testing_api_handler("run timeline compaction", r, timeline_compact_handler)
|
||||
})
|
||||
.put(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/checkpoint",
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/do_gc",
|
||||
|r| api_handler(r, timeline_gc_handler),
|
||||
)
|
||||
.put(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
|
||||
|r| testing_api_handler("run timeline compaction", r, timeline_compact_handler),
|
||||
)
|
||||
.put(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/checkpoint",
|
||||
|r| testing_api_handler("run timeline checkpoint", r, timeline_checkpoint_handler),
|
||||
)
|
||||
.post(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers",
|
||||
|r| api_handler(r, timeline_download_remote_layers_handler_post),
|
||||
)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers",
|
||||
|r| api_handler(r, timeline_download_remote_layers_handler_get),
|
||||
)
|
||||
.delete("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
|
||||
api_handler(r, timeline_delete_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline/:timeline_id/layer", |r| {
|
||||
api_handler(r, layer_map_info_handler)
|
||||
})
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer",
|
||||
|r| api_handler(r, layer_map_info_handler),
|
||||
)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
|
||||
|r| api_handler(r, layer_download_handler),
|
||||
)
|
||||
.delete(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
|
||||
|r| api_handler(r, evict_timeline_layer_handler),
|
||||
)
|
||||
.put("/v1/disk_usage_eviction/run", |r| {
|
||||
@@ -1906,18 +1939,19 @@ pub fn make_router(
|
||||
.put("/v1/deletion_queue/flush", |r| {
|
||||
api_handler(r, deletion_queue_flush)
|
||||
})
|
||||
.put("/v1/tenant/:tenant_id/break", |r| {
|
||||
.put("/v1/tenant/:tenant_shard_id/break", |r| {
|
||||
testing_api_handler("set tenant state to broken", r, handle_tenant_break)
|
||||
})
|
||||
.get("/v1/panic", |r| api_handler(r, always_panic_handler))
|
||||
.post("/v1/tracing/event", |r| {
|
||||
testing_api_handler("emit a tracing event", r, post_tracing_event_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline/:timeline_id/getpage", |r| {
|
||||
testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler)
|
||||
})
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/keyspace",
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/getpage",
|
||||
|r| testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler),
|
||||
)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/keyspace",
|
||||
|r| testing_api_handler("read out the keyspace", r, timeline_collect_keyspace),
|
||||
)
|
||||
.any(handler_404))
|
||||
|
||||
@@ -650,7 +650,7 @@ static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(||
|
||||
"pageserver_evictions_with_low_residence_duration",
|
||||
"If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
|
||||
Residence duration is determined using the `residence_duration_data_source`.",
|
||||
&["tenant_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
|
||||
&["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
@@ -714,10 +714,16 @@ impl EvictionsWithLowResidenceDurationBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
fn build(&self, tenant_id: &str, timeline_id: &str) -> EvictionsWithLowResidenceDuration {
|
||||
fn build(
|
||||
&self,
|
||||
tenant_id: &str,
|
||||
shard_id: &str,
|
||||
timeline_id: &str,
|
||||
) -> EvictionsWithLowResidenceDuration {
|
||||
let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
|
||||
.get_metric_with_label_values(&[
|
||||
tenant_id,
|
||||
shard_id,
|
||||
timeline_id,
|
||||
self.data_source,
|
||||
&EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
|
||||
@@ -748,21 +754,24 @@ impl EvictionsWithLowResidenceDuration {
|
||||
pub fn change_threshold(
|
||||
&mut self,
|
||||
tenant_id: &str,
|
||||
shard_id: &str,
|
||||
timeline_id: &str,
|
||||
new_threshold: Duration,
|
||||
) {
|
||||
if new_threshold == self.threshold {
|
||||
return;
|
||||
}
|
||||
let mut with_new =
|
||||
EvictionsWithLowResidenceDurationBuilder::new(self.data_source, new_threshold)
|
||||
.build(tenant_id, timeline_id);
|
||||
let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
|
||||
self.data_source,
|
||||
new_threshold,
|
||||
)
|
||||
.build(tenant_id, shard_id, timeline_id);
|
||||
std::mem::swap(self, &mut with_new);
|
||||
with_new.remove(tenant_id, timeline_id);
|
||||
with_new.remove(tenant_id, shard_id, timeline_id);
|
||||
}
|
||||
|
||||
// This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
|
||||
fn remove(&mut self, tenant_id: &str, timeline_id: &str) {
|
||||
fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
|
||||
let Some(_counter) = self.counter.take() else {
|
||||
return;
|
||||
};
|
||||
@@ -771,6 +780,7 @@ impl EvictionsWithLowResidenceDuration {
|
||||
|
||||
let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
|
||||
tenant_id,
|
||||
shard_id,
|
||||
timeline_id,
|
||||
self.data_source,
|
||||
&threshold,
|
||||
@@ -1603,6 +1613,7 @@ impl StorageTimeMetrics {
|
||||
#[derive(Debug)]
|
||||
pub struct TimelineMetrics {
|
||||
tenant_id: String,
|
||||
shard_id: String,
|
||||
timeline_id: String,
|
||||
pub flush_time_histo: StorageTimeMetrics,
|
||||
pub compact_time_histo: StorageTimeMetrics,
|
||||
@@ -1623,11 +1634,12 @@ pub struct TimelineMetrics {
|
||||
|
||||
impl TimelineMetrics {
|
||||
pub fn new(
|
||||
tenant_id: &TenantId,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
|
||||
) -> Self {
|
||||
let tenant_id = tenant_id.to_string();
|
||||
let tenant_id = tenant_shard_id.tenant_id.to_string();
|
||||
let shard_id = format!("{}", tenant_shard_id.shard_slug());
|
||||
let timeline_id = timeline_id.to_string();
|
||||
let flush_time_histo =
|
||||
StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
|
||||
@@ -1664,11 +1676,12 @@ impl TimelineMetrics {
|
||||
let evictions = EVICTIONS
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let evictions_with_low_residence_duration =
|
||||
evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);
|
||||
let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
|
||||
.build(&tenant_id, &shard_id, &timeline_id);
|
||||
|
||||
TimelineMetrics {
|
||||
tenant_id,
|
||||
shard_id,
|
||||
timeline_id,
|
||||
flush_time_histo,
|
||||
compact_time_histo,
|
||||
@@ -1714,6 +1727,7 @@ impl Drop for TimelineMetrics {
|
||||
fn drop(&mut self) {
|
||||
let tenant_id = &self.tenant_id;
|
||||
let timeline_id = &self.timeline_id;
|
||||
let shard_id = &self.shard_id;
|
||||
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
|
||||
{
|
||||
RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
|
||||
@@ -1727,7 +1741,7 @@ impl Drop for TimelineMetrics {
|
||||
self.evictions_with_low_residence_duration
|
||||
.write()
|
||||
.unwrap()
|
||||
.remove(tenant_id, timeline_id);
|
||||
.remove(tenant_id, shard_id, timeline_id);
|
||||
|
||||
// The following metrics are born outside of the TimelineMetrics lifecycle but still
|
||||
// removed at the end of it. The idea is to have the metrics outlive the
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
//! Page cache maps from a cache key to a buffer slot.
|
||||
//! The cache key uniquely identifies the piece of data that is being cached.
|
||||
//!
|
||||
//! The cache key for **materialized pages** is [`TenantId`], [`TimelineId`], [`Key`], and [`Lsn`].
|
||||
//! The cache key for **materialized pages** is [`TenantShardId`], [`TimelineId`], [`Key`], and [`Lsn`].
|
||||
//! Use [`PageCache::memorize_materialized_page`] and [`PageCache::lookup_materialized_page`] for fill & access.
|
||||
//!
|
||||
//! The cache key for **immutable file** pages is [`FileId`] and a block number.
|
||||
@@ -83,10 +83,8 @@ use std::{
|
||||
|
||||
use anyhow::Context;
|
||||
use once_cell::sync::OnceCell;
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use utils::{id::TimelineId, lsn::Lsn};
|
||||
|
||||
use crate::{
|
||||
context::RequestContext,
|
||||
@@ -154,7 +152,13 @@ enum CacheKey {
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
|
||||
struct MaterializedPageHashKey {
|
||||
tenant_id: TenantId,
|
||||
/// Why is this TenantShardId rather than TenantId?
|
||||
///
|
||||
/// Usually, the materialized value of a page@lsn is identical on any shard in the same tenant. However, this
|
||||
/// this not the case for certain internally-generated pages (e.g. relation sizes). In future, we may make this
|
||||
/// key smaller by omitting the shard, if we ensure that reads to such pages always skip the cache, or are
|
||||
/// special-cased in some other way.
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
key: Key,
|
||||
}
|
||||
@@ -378,7 +382,7 @@ impl PageCache {
|
||||
/// returned page.
|
||||
pub async fn lookup_materialized_page(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
key: &Key,
|
||||
lsn: Lsn,
|
||||
@@ -395,7 +399,7 @@ impl PageCache {
|
||||
|
||||
let mut cache_key = CacheKey::MaterializedPage {
|
||||
hash_key: MaterializedPageHashKey {
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
key: *key,
|
||||
},
|
||||
@@ -436,7 +440,7 @@ impl PageCache {
|
||||
///
|
||||
pub async fn memorize_materialized_page(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
@@ -444,7 +448,7 @@ impl PageCache {
|
||||
) -> anyhow::Result<()> {
|
||||
let cache_key = CacheKey::MaterializedPage {
|
||||
hash_key: MaterializedPageHashKey {
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
key,
|
||||
},
|
||||
|
||||
@@ -822,10 +822,7 @@ impl<'a> DatadirModification<'a> {
|
||||
self.put(DBDIR_KEY, Value::Image(buf.into()));
|
||||
|
||||
// Create AuxFilesDirectory
|
||||
let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
|
||||
files: HashMap::new(),
|
||||
})?;
|
||||
self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
|
||||
self.init_aux_dir()?;
|
||||
|
||||
let buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
|
||||
xids: HashSet::new(),
|
||||
@@ -933,10 +930,7 @@ impl<'a> DatadirModification<'a> {
|
||||
self.put(DBDIR_KEY, Value::Image(buf.into()));
|
||||
|
||||
// Create AuxFilesDirectory as well
|
||||
let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
|
||||
files: HashMap::new(),
|
||||
})?;
|
||||
self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
|
||||
self.init_aux_dir()?;
|
||||
}
|
||||
if r.is_none() {
|
||||
// Create RelDirectory
|
||||
@@ -1261,6 +1255,14 @@ impl<'a> DatadirModification<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn init_aux_dir(&mut self) -> anyhow::Result<()> {
|
||||
let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
|
||||
files: HashMap::new(),
|
||||
})?;
|
||||
self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn put_file(
|
||||
&mut self,
|
||||
path: &str,
|
||||
@@ -1767,6 +1769,13 @@ const AUX_FILES_KEY: Key = Key {
|
||||
// Reverse mappings for a few Keys.
|
||||
// These are needed by WAL redo manager.
|
||||
|
||||
// AUX_FILES currently stores only data for logical replication (slots etc), and
|
||||
// we don't preserve these on a branch because safekeepers can't follow timeline
|
||||
// switch (and generally it likely should be optional), so ignore these.
|
||||
pub fn is_inherited_key(key: Key) -> bool {
|
||||
key != AUX_FILES_KEY
|
||||
}
|
||||
|
||||
pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
|
||||
Ok(match key.field1 {
|
||||
0x00 => (
|
||||
|
||||
@@ -42,6 +42,7 @@ use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use futures::FutureExt;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use tokio::runtime::Runtime;
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::task_local;
|
||||
@@ -51,7 +52,7 @@ use tracing::{debug, error, info, warn};
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use crate::shutdown_pageserver;
|
||||
|
||||
@@ -317,7 +318,7 @@ struct PageServerTask {
|
||||
|
||||
/// Tasks may optionally be launched for a particular tenant/timeline, enabling
|
||||
/// later cancelling tasks for that tenant/timeline in [`shutdown_tasks`]
|
||||
tenant_id: Option<TenantId>,
|
||||
tenant_shard_id: Option<TenantShardId>,
|
||||
timeline_id: Option<TimelineId>,
|
||||
|
||||
mutable: Mutex<MutableTaskState>,
|
||||
@@ -329,7 +330,7 @@ struct PageServerTask {
|
||||
pub fn spawn<F>(
|
||||
runtime: &tokio::runtime::Handle,
|
||||
kind: TaskKind,
|
||||
tenant_id: Option<TenantId>,
|
||||
tenant_shard_id: Option<TenantShardId>,
|
||||
timeline_id: Option<TimelineId>,
|
||||
name: &str,
|
||||
shutdown_process_on_error: bool,
|
||||
@@ -345,7 +346,7 @@ where
|
||||
kind,
|
||||
name: name.to_string(),
|
||||
cancel: cancel.clone(),
|
||||
tenant_id,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
mutable: Mutex::new(MutableTaskState { join_handle: None }),
|
||||
});
|
||||
@@ -424,28 +425,28 @@ async fn task_finish(
|
||||
Ok(Err(err)) => {
|
||||
if shutdown_process_on_error {
|
||||
error!(
|
||||
"Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
|
||||
task_name, task.tenant_id, task.timeline_id, err
|
||||
"Shutting down: task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}",
|
||||
task_name, task.tenant_shard_id, task.timeline_id, err
|
||||
);
|
||||
shutdown_process = true;
|
||||
} else {
|
||||
error!(
|
||||
"Task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
|
||||
task_name, task.tenant_id, task.timeline_id, err
|
||||
"Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}",
|
||||
task_name, task.tenant_shard_id, task.timeline_id, err
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
if shutdown_process_on_error {
|
||||
error!(
|
||||
"Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
|
||||
task_name, task.tenant_id, task.timeline_id, err
|
||||
"Shutting down: task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}",
|
||||
task_name, task.tenant_shard_id, task.timeline_id, err
|
||||
);
|
||||
shutdown_process = true;
|
||||
} else {
|
||||
error!(
|
||||
"Task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
|
||||
task_name, task.tenant_id, task.timeline_id, err
|
||||
"Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}",
|
||||
task_name, task.tenant_shard_id, task.timeline_id, err
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -467,11 +468,11 @@ async fn task_finish(
|
||||
///
|
||||
/// Or to shut down all tasks for given timeline:
|
||||
///
|
||||
/// shutdown_tasks(None, Some(tenant_id), Some(timeline_id))
|
||||
/// shutdown_tasks(None, Some(tenant_shard_id), Some(timeline_id))
|
||||
///
|
||||
pub async fn shutdown_tasks(
|
||||
kind: Option<TaskKind>,
|
||||
tenant_id: Option<TenantId>,
|
||||
tenant_shard_id: Option<TenantShardId>,
|
||||
timeline_id: Option<TimelineId>,
|
||||
) {
|
||||
let mut victim_tasks = Vec::new();
|
||||
@@ -480,35 +481,35 @@ pub async fn shutdown_tasks(
|
||||
let tasks = TASKS.lock().unwrap();
|
||||
for task in tasks.values() {
|
||||
if (kind.is_none() || Some(task.kind) == kind)
|
||||
&& (tenant_id.is_none() || task.tenant_id == tenant_id)
|
||||
&& (tenant_shard_id.is_none() || task.tenant_shard_id == tenant_shard_id)
|
||||
&& (timeline_id.is_none() || task.timeline_id == timeline_id)
|
||||
{
|
||||
task.cancel.cancel();
|
||||
victim_tasks.push((
|
||||
Arc::clone(task),
|
||||
task.kind,
|
||||
task.tenant_id,
|
||||
task.tenant_shard_id,
|
||||
task.timeline_id,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let log_all = kind.is_none() && tenant_id.is_none() && timeline_id.is_none();
|
||||
let log_all = kind.is_none() && tenant_shard_id.is_none() && timeline_id.is_none();
|
||||
|
||||
for (task, task_kind, tenant_id, timeline_id) in victim_tasks {
|
||||
for (task, task_kind, tenant_shard_id, timeline_id) in victim_tasks {
|
||||
let join_handle = {
|
||||
let mut task_mut = task.mutable.lock().unwrap();
|
||||
task_mut.join_handle.take()
|
||||
};
|
||||
if let Some(mut join_handle) = join_handle {
|
||||
if log_all {
|
||||
if tenant_id.is_none() {
|
||||
if tenant_shard_id.is_none() {
|
||||
// there are quite few of these
|
||||
info!(name = task.name, kind = ?task_kind, "stopping global task");
|
||||
} else {
|
||||
// warn to catch these in tests; there shouldn't be any
|
||||
warn!(name = task.name, tenant_id = ?tenant_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
|
||||
warn!(name = task.name, tenant_shard_id = ?tenant_shard_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
|
||||
}
|
||||
}
|
||||
if tokio::time::timeout(std::time::Duration::from_secs(1), &mut join_handle)
|
||||
@@ -517,12 +518,13 @@ pub async fn shutdown_tasks(
|
||||
{
|
||||
// allow some time to elapse before logging to cut down the number of log
|
||||
// lines.
|
||||
info!("waiting for {} to shut down", task.name);
|
||||
info!("waiting for task {} to shut down", task.name);
|
||||
// we never handled this return value, but:
|
||||
// - we don't deschedule which would lead to is_cancelled
|
||||
// - panics are already logged (is_panicked)
|
||||
// - task errors are already logged in the wrapper
|
||||
let _ = join_handle.await;
|
||||
info!("task {} completed", task.name);
|
||||
}
|
||||
} else {
|
||||
// Possibly one of:
|
||||
|
||||
@@ -608,7 +608,7 @@ impl Tenant {
|
||||
task_mgr::spawn(
|
||||
&tokio::runtime::Handle::current(),
|
||||
TaskKind::Attach,
|
||||
Some(tenant_shard_id.tenant_id),
|
||||
Some(tenant_shard_id),
|
||||
None,
|
||||
"attach tenant",
|
||||
false,
|
||||
@@ -1917,7 +1917,7 @@ impl Tenant {
|
||||
//
|
||||
// this will additionally shutdown and await all timeline tasks.
|
||||
tracing::debug!("Waiting for tasks...");
|
||||
task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id.tenant_id), None).await;
|
||||
task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id), None).await;
|
||||
|
||||
// Wait for any in-flight operations to complete
|
||||
self.gate.close().await;
|
||||
|
||||
@@ -463,7 +463,7 @@ impl DeleteTenantFlow {
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
TaskKind::TimelineDeletionWorker,
|
||||
Some(tenant_shard_id.tenant_id),
|
||||
Some(tenant_shard_id),
|
||||
None,
|
||||
"tenant_delete",
|
||||
false,
|
||||
@@ -550,7 +550,7 @@ impl DeleteTenantFlow {
|
||||
// we encounter an InProgress marker, yield the barrier it contains and wait on it.
|
||||
let barrier = {
|
||||
let mut locked = tenants.write().unwrap();
|
||||
let removed = locked.remove(&tenant.tenant_shard_id.tenant_id);
|
||||
let removed = locked.remove(tenant.tenant_shard_id);
|
||||
|
||||
// FIXME: we should not be modifying this from outside of mgr.rs.
|
||||
// This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
|
||||
|
||||
@@ -98,33 +98,6 @@ pub(crate) enum TenantsMap {
|
||||
ShuttingDown(BTreeMap<TenantShardId, TenantSlot>),
|
||||
}
|
||||
|
||||
/// Helper for mapping shard-unaware functions to a sharding-aware map
|
||||
/// TODO(sharding): all users of this must be made shard-aware.
|
||||
fn exactly_one_or_none<'a>(
|
||||
map: &'a BTreeMap<TenantShardId, TenantSlot>,
|
||||
tenant_id: &TenantId,
|
||||
) -> Option<(&'a TenantShardId, &'a TenantSlot)> {
|
||||
let mut slots = map.range(TenantShardId::tenant_range(*tenant_id));
|
||||
|
||||
// Retrieve the first two slots in the range: if both are populated, we must panic because the caller
|
||||
// needs a shard-naive view of the world in which only one slot can exist for a TenantId at a time.
|
||||
let slot_a = slots.next();
|
||||
let slot_b = slots.next();
|
||||
match (slot_a, slot_b) {
|
||||
(None, None) => None,
|
||||
(Some(slot), None) => {
|
||||
// Exactly one matching slot
|
||||
Some(slot)
|
||||
}
|
||||
(Some(_slot_a), Some(_slot_b)) => {
|
||||
// Multiple shards for this tenant: cannot handle this yet.
|
||||
// TODO(sharding): callers of get() should be shard-aware.
|
||||
todo!("Attaching multiple shards in teh same tenant to the same pageserver")
|
||||
}
|
||||
(None, Some(_)) => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) enum TenantsMapRemoveResult {
|
||||
Occupied(TenantSlot),
|
||||
Vacant,
|
||||
@@ -147,12 +120,11 @@ impl TenantsMap {
|
||||
/// Convenience function for typical usage, where we want to get a `Tenant` object, for
|
||||
/// working with attached tenants. If the TenantId is in the map but in Secondary state,
|
||||
/// None is returned.
|
||||
pub(crate) fn get(&self, tenant_id: &TenantId) -> Option<&Arc<Tenant>> {
|
||||
pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<Tenant>> {
|
||||
match self {
|
||||
TenantsMap::Initializing => None,
|
||||
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
|
||||
// TODO(sharding): callers of get() should be shard-aware.
|
||||
exactly_one_or_none(m, tenant_id).and_then(|(_, slot)| slot.get_attached())
|
||||
m.get(tenant_shard_id).and_then(|slot| slot.get_attached())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -204,25 +176,19 @@ impl TenantsMap {
|
||||
///
|
||||
/// The normal way to remove a tenant is using a SlotGuard, which will gracefully remove the guarded
|
||||
/// slot if the enclosed tenant is shutdown.
|
||||
pub(crate) fn remove(&mut self, tenant_id: &TenantId) -> TenantsMapRemoveResult {
|
||||
pub(crate) fn remove(&mut self, tenant_shard_id: TenantShardId) -> TenantsMapRemoveResult {
|
||||
use std::collections::btree_map::Entry;
|
||||
match self {
|
||||
TenantsMap::Initializing => TenantsMapRemoveResult::Vacant,
|
||||
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
|
||||
let key = exactly_one_or_none(m, tenant_id).map(|(k, _)| *k);
|
||||
match key {
|
||||
Some(key) => match m.entry(key) {
|
||||
Entry::Occupied(entry) => match entry.get() {
|
||||
TenantSlot::InProgress(barrier) => {
|
||||
TenantsMapRemoveResult::InProgress(barrier.clone())
|
||||
}
|
||||
_ => TenantsMapRemoveResult::Occupied(entry.remove()),
|
||||
},
|
||||
Entry::Vacant(_entry) => TenantsMapRemoveResult::Vacant,
|
||||
},
|
||||
None => TenantsMapRemoveResult::Vacant,
|
||||
}
|
||||
}
|
||||
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => match m.entry(tenant_shard_id) {
|
||||
Entry::Occupied(entry) => match entry.get() {
|
||||
TenantSlot::InProgress(barrier) => {
|
||||
TenantsMapRemoveResult::InProgress(barrier.clone())
|
||||
}
|
||||
_ => TenantsMapRemoveResult::Occupied(entry.remove()),
|
||||
},
|
||||
Entry::Vacant(_entry) => TenantsMapRemoveResult::Vacant,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -822,14 +788,16 @@ pub(crate) async fn set_new_tenant_config(
|
||||
new_tenant_conf: TenantConfOpt,
|
||||
tenant_id: TenantId,
|
||||
) -> Result<(), SetNewTenantConfigError> {
|
||||
// Legacy API: does not support sharding
|
||||
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||
|
||||
info!("configuring tenant {tenant_id}");
|
||||
let tenant = get_tenant(tenant_id, true)?;
|
||||
let tenant = get_tenant(tenant_shard_id, true)?;
|
||||
|
||||
// This is a legacy API that only operates on attached tenants: the preferred
|
||||
// API to use is the location_config/ endpoint, which lets the caller provide
|
||||
// the full LocationConf.
|
||||
let location_conf = LocationConf::attached_single(new_tenant_conf, tenant.generation);
|
||||
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||
|
||||
Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf)
|
||||
.await
|
||||
@@ -1143,14 +1111,11 @@ pub(crate) enum GetTenantError {
|
||||
///
|
||||
/// This method is cancel-safe.
|
||||
pub(crate) fn get_tenant(
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
active_only: bool,
|
||||
) -> Result<Arc<Tenant>, GetTenantError> {
|
||||
let locked = TENANTS.read().unwrap();
|
||||
|
||||
// TODO(sharding): make all callers of get_tenant shard-aware
|
||||
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||
|
||||
let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;
|
||||
|
||||
match peek_slot {
|
||||
@@ -1162,14 +1127,18 @@ pub(crate) fn get_tenant(
|
||||
TenantState::Active => Ok(Arc::clone(tenant)),
|
||||
_ => {
|
||||
if active_only {
|
||||
Err(GetTenantError::NotActive(tenant_id))
|
||||
Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
|
||||
} else {
|
||||
Ok(Arc::clone(tenant))
|
||||
}
|
||||
}
|
||||
},
|
||||
Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_id)),
|
||||
None | Some(TenantSlot::Secondary) => Err(GetTenantError::NotFound(tenant_id)),
|
||||
Some(TenantSlot::InProgress(_)) => {
|
||||
Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
|
||||
}
|
||||
None | Some(TenantSlot::Secondary) => {
|
||||
Err(GetTenantError::NotFound(tenant_shard_id.tenant_id))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1542,7 +1511,8 @@ pub(crate) enum TenantMapListError {
|
||||
///
|
||||
/// Get list of tenants, for the mgmt API
|
||||
///
|
||||
pub(crate) async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, TenantMapListError> {
|
||||
pub(crate) async fn list_tenants() -> Result<Vec<(TenantShardId, TenantState)>, TenantMapListError>
|
||||
{
|
||||
let tenants = TENANTS.read().unwrap();
|
||||
let m = match &*tenants {
|
||||
TenantsMap::Initializing => return Err(TenantMapListError::Initializing),
|
||||
@@ -1550,12 +1520,10 @@ pub(crate) async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, Tenan
|
||||
};
|
||||
Ok(m.iter()
|
||||
.filter_map(|(id, tenant)| match tenant {
|
||||
TenantSlot::Attached(tenant) => Some((id, tenant.current_state())),
|
||||
TenantSlot::Attached(tenant) => Some((*id, tenant.current_state())),
|
||||
TenantSlot::Secondary => None,
|
||||
TenantSlot::InProgress(_) => None,
|
||||
})
|
||||
// TODO(sharding): make callers of this function shard-aware
|
||||
.map(|(k, v)| (k.tenant_id, v))
|
||||
.collect())
|
||||
}
|
||||
|
||||
@@ -2089,21 +2057,19 @@ use {
|
||||
};
|
||||
|
||||
pub(crate) async fn immediate_gc(
|
||||
tenant_id: TenantId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
gc_req: TimelineGcRequest,
|
||||
cancel: CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
|
||||
let guard = TENANTS.read().unwrap();
|
||||
let tenant = guard
|
||||
.get(&tenant_id)
|
||||
.map(Arc::clone)
|
||||
.with_context(|| format!("tenant {tenant_id}"))
|
||||
.map_err(|e| ApiError::NotFound(e.into()))?;
|
||||
|
||||
// TODO(sharding): make callers of this function shard-aware
|
||||
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||
let tenant = guard
|
||||
.get(&tenant_shard_id)
|
||||
.map(Arc::clone)
|
||||
.with_context(|| format!("tenant {tenant_shard_id}"))
|
||||
.map_err(|e| ApiError::NotFound(e.into()))?;
|
||||
|
||||
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
|
||||
// Use tenant's pitr setting
|
||||
@@ -2116,9 +2082,9 @@ pub(crate) async fn immediate_gc(
|
||||
task_mgr::spawn(
|
||||
&tokio::runtime::Handle::current(),
|
||||
TaskKind::GarbageCollector,
|
||||
Some(tenant_id),
|
||||
Some(tenant_shard_id),
|
||||
Some(timeline_id),
|
||||
&format!("timeline_gc_handler garbage collection run for tenant {tenant_id} timeline {timeline_id}"),
|
||||
&format!("timeline_gc_handler garbage collection run for tenant {tenant_shard_id} timeline {timeline_id}"),
|
||||
false,
|
||||
async move {
|
||||
fail::fail_point!("immediate_gc_task_pre");
|
||||
|
||||
@@ -1223,7 +1223,7 @@ impl RemoteTimelineClient {
|
||||
task_mgr::spawn(
|
||||
&self.runtime,
|
||||
TaskKind::RemoteUploadTask,
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.tenant_shard_id),
|
||||
Some(self.timeline_id),
|
||||
"remote upload",
|
||||
false,
|
||||
|
||||
@@ -837,7 +837,7 @@ impl LayerInner {
|
||||
crate::task_mgr::spawn(
|
||||
&tokio::runtime::Handle::current(),
|
||||
crate::task_mgr::TaskKind::RemoteDownloadTask,
|
||||
Some(self.desc.tenant_shard_id.tenant_id),
|
||||
Some(self.desc.tenant_shard_id),
|
||||
Some(self.desc.timeline_id),
|
||||
&task_name,
|
||||
false,
|
||||
|
||||
@@ -87,13 +87,13 @@ pub fn start_background_loops(
|
||||
tenant: &Arc<Tenant>,
|
||||
background_jobs_can_start: Option<&completion::Barrier>,
|
||||
) {
|
||||
let tenant_id = tenant.tenant_shard_id.tenant_id;
|
||||
let tenant_shard_id = tenant.tenant_shard_id;
|
||||
task_mgr::spawn(
|
||||
BACKGROUND_RUNTIME.handle(),
|
||||
TaskKind::Compaction,
|
||||
Some(tenant_id),
|
||||
Some(tenant_shard_id),
|
||||
None,
|
||||
&format!("compactor for tenant {tenant_id}"),
|
||||
&format!("compactor for tenant {tenant_shard_id}"),
|
||||
false,
|
||||
{
|
||||
let tenant = Arc::clone(tenant);
|
||||
@@ -105,7 +105,7 @@ pub fn start_background_loops(
|
||||
_ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
|
||||
};
|
||||
compaction_loop(tenant, cancel)
|
||||
.instrument(info_span!("compaction_loop", tenant_id = %tenant_id))
|
||||
.instrument(info_span!("compaction_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
|
||||
.await;
|
||||
Ok(())
|
||||
}
|
||||
@@ -114,9 +114,9 @@ pub fn start_background_loops(
|
||||
task_mgr::spawn(
|
||||
BACKGROUND_RUNTIME.handle(),
|
||||
TaskKind::GarbageCollector,
|
||||
Some(tenant_id),
|
||||
Some(tenant_shard_id),
|
||||
None,
|
||||
&format!("garbage collector for tenant {tenant_id}"),
|
||||
&format!("garbage collector for tenant {tenant_shard_id}"),
|
||||
false,
|
||||
{
|
||||
let tenant = Arc::clone(tenant);
|
||||
@@ -128,7 +128,7 @@ pub fn start_background_loops(
|
||||
_ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
|
||||
};
|
||||
gc_loop(tenant, cancel)
|
||||
.instrument(info_span!("gc_loop", tenant_id = %tenant_id))
|
||||
.instrument(info_span!("gc_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
|
||||
.await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ use crate::metrics::{
|
||||
TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
|
||||
};
|
||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||
use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
|
||||
use crate::pgdatadir_mapping::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key};
|
||||
use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
|
||||
use crate::tenant::config::{EvictionPolicy, TenantConfOpt};
|
||||
use pageserver_api::reltag::RelTag;
|
||||
@@ -77,7 +77,7 @@ use postgres_ffi::to_pg_timestamp;
|
||||
use utils::{
|
||||
completion,
|
||||
generation::Generation,
|
||||
id::{TenantId, TimelineId},
|
||||
id::TimelineId,
|
||||
lsn::{AtomicLsn, Lsn, RecordLsn},
|
||||
seqwait::SeqWait,
|
||||
simple_rcu::{Rcu, RcuReadGuard},
|
||||
@@ -926,7 +926,7 @@ impl Timeline {
|
||||
tracing::debug!("Waiting for WalReceiverManager...");
|
||||
task_mgr::shutdown_tasks(
|
||||
Some(TaskKind::WalReceiverManager),
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.tenant_shard_id),
|
||||
Some(self.timeline_id),
|
||||
)
|
||||
.await;
|
||||
@@ -977,7 +977,7 @@ impl Timeline {
|
||||
// Shut down the layer flush task before the remote client, as one depends on the other
|
||||
task_mgr::shutdown_tasks(
|
||||
Some(TaskKind::LayerFlushTask),
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.tenant_shard_id),
|
||||
Some(self.timeline_id),
|
||||
)
|
||||
.await;
|
||||
@@ -995,12 +995,7 @@ impl Timeline {
|
||||
|
||||
tracing::debug!("Waiting for tasks...");
|
||||
|
||||
task_mgr::shutdown_tasks(
|
||||
None,
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.timeline_id),
|
||||
)
|
||||
.await;
|
||||
task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id), Some(self.timeline_id)).await;
|
||||
|
||||
// Finally wait until any gate-holders are complete
|
||||
self.gate.close().await;
|
||||
@@ -1314,16 +1309,20 @@ impl Timeline {
|
||||
&self.conf.default_tenant_conf,
|
||||
);
|
||||
|
||||
// TODO(sharding): make evictions state shard aware
|
||||
// (https://github.com/neondatabase/neon/issues/5953)
|
||||
let tenant_id_str = self.tenant_shard_id.tenant_id.to_string();
|
||||
let shard_id_str = format!("{}", self.tenant_shard_id.shard_slug());
|
||||
|
||||
let timeline_id_str = self.timeline_id.to_string();
|
||||
self.metrics
|
||||
.evictions_with_low_residence_duration
|
||||
.write()
|
||||
.unwrap()
|
||||
.change_threshold(&tenant_id_str, &timeline_id_str, new_threshold);
|
||||
.change_threshold(
|
||||
&tenant_id_str,
|
||||
&shard_id_str,
|
||||
&timeline_id_str,
|
||||
new_threshold,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1395,7 +1394,7 @@ impl Timeline {
|
||||
ancestor_lsn: metadata.ancestor_lsn(),
|
||||
|
||||
metrics: TimelineMetrics::new(
|
||||
&tenant_shard_id.tenant_id,
|
||||
&tenant_shard_id,
|
||||
&timeline_id,
|
||||
crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
|
||||
"mtime",
|
||||
@@ -1496,7 +1495,7 @@ impl Timeline {
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
task_mgr::TaskKind::LayerFlushTask,
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.tenant_shard_id),
|
||||
Some(self.timeline_id),
|
||||
"layer flush task",
|
||||
false,
|
||||
@@ -1847,7 +1846,7 @@ impl Timeline {
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
task_mgr::TaskKind::InitialLogicalSizeCalculation,
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.tenant_shard_id),
|
||||
Some(self.timeline_id),
|
||||
"initial size calculation",
|
||||
false,
|
||||
@@ -2020,7 +2019,7 @@ impl Timeline {
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
task_mgr::TaskKind::OndemandLogicalSizeCalculation,
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.tenant_shard_id),
|
||||
Some(self.timeline_id),
|
||||
"ondemand logical size calculation",
|
||||
false,
|
||||
@@ -2279,7 +2278,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
// Recurse into ancestor if needed
|
||||
if Lsn(cont_lsn.0 - 1) <= timeline.ancestor_lsn {
|
||||
if is_inherited_key(key) && Lsn(cont_lsn.0 - 1) <= timeline.ancestor_lsn {
|
||||
trace!(
|
||||
"going into ancestor {}, cont_lsn is {}",
|
||||
timeline.ancestor_lsn,
|
||||
@@ -2461,13 +2460,7 @@ impl Timeline {
|
||||
// FIXME: It's pointless to check the cache for things that are not 8kB pages.
|
||||
// We should look at the key to determine if it's a cacheable object
|
||||
let (lsn, read_guard) = cache
|
||||
.lookup_materialized_page(
|
||||
self.tenant_shard_id.tenant_id,
|
||||
self.timeline_id,
|
||||
key,
|
||||
lsn,
|
||||
ctx,
|
||||
)
|
||||
.lookup_materialized_page(self.tenant_shard_id, self.timeline_id, key, lsn, ctx)
|
||||
.await?;
|
||||
let img = Bytes::from(read_guard.to_vec());
|
||||
Some((lsn, img))
|
||||
@@ -3209,7 +3202,7 @@ impl DurationRecorder {
|
||||
#[derive(Default)]
|
||||
struct CompactLevel0Phase1StatsBuilder {
|
||||
version: Option<u64>,
|
||||
tenant_id: Option<TenantId>,
|
||||
tenant_id: Option<TenantShardId>,
|
||||
timeline_id: Option<TimelineId>,
|
||||
read_lock_acquisition_micros: DurationRecorder,
|
||||
read_lock_held_spawn_blocking_startup_micros: DurationRecorder,
|
||||
@@ -3226,7 +3219,7 @@ struct CompactLevel0Phase1StatsBuilder {
|
||||
#[derive(serde::Serialize)]
|
||||
struct CompactLevel0Phase1Stats {
|
||||
version: u64,
|
||||
tenant_id: TenantId,
|
||||
tenant_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
read_lock_acquisition_micros: RecordedDuration,
|
||||
read_lock_held_spawn_blocking_startup_micros: RecordedDuration,
|
||||
@@ -3745,7 +3738,7 @@ impl Timeline {
|
||||
let ctx = ctx.attached_child();
|
||||
let mut stats = CompactLevel0Phase1StatsBuilder {
|
||||
version: Some(2),
|
||||
tenant_id: Some(self.tenant_shard_id.tenant_id),
|
||||
tenant_id: Some(self.tenant_shard_id),
|
||||
timeline_id: Some(self.timeline_id),
|
||||
..Default::default()
|
||||
};
|
||||
@@ -4207,7 +4200,7 @@ impl Timeline {
|
||||
let cache = page_cache::get();
|
||||
if let Err(e) = cache
|
||||
.memorize_materialized_page(
|
||||
self.tenant_shard_id.tenant_id,
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
key,
|
||||
last_rec_lsn,
|
||||
@@ -4251,7 +4244,7 @@ impl Timeline {
|
||||
let task_id = task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
task_mgr::TaskKind::DownloadAllRemoteLayers,
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.tenant_shard_id),
|
||||
Some(self.timeline_id),
|
||||
"download all remote layers task",
|
||||
false,
|
||||
|
||||
@@ -43,7 +43,7 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
|
||||
// Shut down the layer flush task before the remote client, as one depends on the other
|
||||
task_mgr::shutdown_tasks(
|
||||
Some(TaskKind::LayerFlushTask),
|
||||
Some(timeline.tenant_shard_id.tenant_id),
|
||||
Some(timeline.tenant_shard_id),
|
||||
Some(timeline.timeline_id),
|
||||
)
|
||||
.await;
|
||||
@@ -71,7 +71,7 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
|
||||
info!("waiting for timeline tasks to shutdown");
|
||||
task_mgr::shutdown_tasks(
|
||||
None,
|
||||
Some(timeline.tenant_shard_id.tenant_id),
|
||||
Some(timeline.tenant_shard_id),
|
||||
Some(timeline.timeline_id),
|
||||
)
|
||||
.await;
|
||||
@@ -528,7 +528,7 @@ impl DeleteTimelineFlow {
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
TaskKind::TimelineDeletionWorker,
|
||||
Some(tenant_shard_id.tenant_id),
|
||||
Some(tenant_shard_id),
|
||||
Some(timeline_id),
|
||||
"timeline_delete",
|
||||
false,
|
||||
|
||||
@@ -60,7 +60,7 @@ impl Timeline {
|
||||
task_mgr::spawn(
|
||||
BACKGROUND_RUNTIME.handle(),
|
||||
TaskKind::Eviction,
|
||||
Some(self.tenant_shard_id.tenant_id),
|
||||
Some(self.tenant_shard_id),
|
||||
Some(self.timeline_id),
|
||||
&format!(
|
||||
"layer eviction for {}/{}",
|
||||
@@ -343,7 +343,7 @@ impl Timeline {
|
||||
// Make one of the tenant's timelines draw the short straw and run the calculation.
|
||||
// The others wait until the calculation is done so that they take into account the
|
||||
// imitated accesses that the winner made.
|
||||
let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id.tenant_id, true) {
|
||||
let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id, true) {
|
||||
Ok(t) => t,
|
||||
Err(_) => {
|
||||
return ControlFlow::Break(());
|
||||
|
||||
@@ -30,6 +30,7 @@ use crate::tenant::timeline::walreceiver::connection_manager::{
|
||||
connection_manager_loop_step, ConnectionManagerState,
|
||||
};
|
||||
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use std::future::Future;
|
||||
use std::num::NonZeroU64;
|
||||
use std::ops::ControlFlow;
|
||||
@@ -41,7 +42,7 @@ use tokio::sync::watch;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
|
||||
use utils::id::TenantTimelineId;
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use self::connection_manager::ConnectionManagerStatus;
|
||||
|
||||
@@ -60,7 +61,8 @@ pub struct WalReceiverConf {
|
||||
}
|
||||
|
||||
pub struct WalReceiver {
|
||||
timeline: TenantTimelineId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
manager_status: Arc<std::sync::RwLock<Option<ConnectionManagerStatus>>>,
|
||||
}
|
||||
|
||||
@@ -71,7 +73,7 @@ impl WalReceiver {
|
||||
mut broker_client: BrokerClientChannel,
|
||||
ctx: &RequestContext,
|
||||
) -> Self {
|
||||
let tenant_id = timeline.tenant_shard_id.tenant_id;
|
||||
let tenant_shard_id = timeline.tenant_shard_id;
|
||||
let timeline_id = timeline.timeline_id;
|
||||
let walreceiver_ctx =
|
||||
ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error);
|
||||
@@ -81,9 +83,9 @@ impl WalReceiver {
|
||||
task_mgr::spawn(
|
||||
WALRECEIVER_RUNTIME.handle(),
|
||||
TaskKind::WalReceiverManager,
|
||||
Some(tenant_id),
|
||||
Some(timeline.tenant_shard_id),
|
||||
Some(timeline_id),
|
||||
&format!("walreceiver for timeline {tenant_id}/{timeline_id}"),
|
||||
&format!("walreceiver for timeline {tenant_shard_id}/{timeline_id}"),
|
||||
false,
|
||||
async move {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
@@ -117,11 +119,12 @@ impl WalReceiver {
|
||||
*loop_status.write().unwrap() = None;
|
||||
Ok(())
|
||||
}
|
||||
.instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_id, timeline_id = %timeline_id))
|
||||
.instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), timeline_id = %timeline_id))
|
||||
);
|
||||
|
||||
Self {
|
||||
timeline: TenantTimelineId::new(tenant_id, timeline_id),
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
manager_status,
|
||||
}
|
||||
}
|
||||
@@ -129,8 +132,8 @@ impl WalReceiver {
|
||||
pub async fn stop(self) {
|
||||
task_mgr::shutdown_tasks(
|
||||
Some(TaskKind::WalReceiverManager),
|
||||
Some(self.timeline.tenant_id),
|
||||
Some(self.timeline.timeline_id),
|
||||
Some(self.tenant_shard_id),
|
||||
Some(self.timeline_id),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -163,7 +163,7 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
task_mgr::spawn(
|
||||
WALRECEIVER_RUNTIME.handle(),
|
||||
TaskKind::WalReceiverConnectionPoller,
|
||||
Some(timeline.tenant_shard_id.tenant_id),
|
||||
Some(timeline.tenant_shard_id),
|
||||
Some(timeline.timeline_id),
|
||||
"walreceiver connection",
|
||||
false,
|
||||
|
||||
@@ -458,8 +458,10 @@ impl<'a> WalIngest<'a> {
|
||||
&& decoded.xl_rmid == pg_constants::RM_XLOG_ID
|
||||
&& (decoded.xl_info == pg_constants::XLOG_FPI
|
||||
|| decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
|
||||
// compression of WAL is not yet supported: fall back to storing the original WAL record
|
||||
// compression of WAL is not yet supported: fall back to storing the original WAL record
|
||||
&& !postgres_ffi::bkpimage_is_compressed(blk.bimg_info, self.timeline.pg_version)?
|
||||
// do not materialize null pages because them most likely be soon replaced with real data
|
||||
&& blk.bimg_len != 0
|
||||
{
|
||||
// Extract page image from FPI record
|
||||
let img_len = blk.bimg_len as usize;
|
||||
|
||||
@@ -62,6 +62,9 @@ pub enum AuthErrorImpl {
|
||||
Please add it to the allowed list in the Neon console."
|
||||
)]
|
||||
IpAddressNotAllowed,
|
||||
|
||||
#[error("Too many connections to this endpoint. Please try again later.")]
|
||||
TooManyConnections,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
@@ -80,6 +83,10 @@ impl AuthError {
|
||||
pub fn ip_address_not_allowed() -> Self {
|
||||
AuthErrorImpl::IpAddressNotAllowed.into()
|
||||
}
|
||||
|
||||
pub fn too_many_connections() -> Self {
|
||||
AuthErrorImpl::TooManyConnections.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: Into<AuthErrorImpl>> From<E> for AuthError {
|
||||
@@ -102,6 +109,7 @@ impl UserFacingError for AuthError {
|
||||
MissingEndpointName => self.to_string(),
|
||||
Io(_) => "Internal error".to_string(),
|
||||
IpAddressNotAllowed => self.to_string(),
|
||||
TooManyConnections => self.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,7 +166,7 @@ impl TryFrom<ClientCredentials> for ComputeUserInfo {
|
||||
/// All authentication flows will emit an AuthenticationOk message if successful.
|
||||
async fn auth_quirks(
|
||||
api: &impl console::Api,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
creds: ClientCredentials,
|
||||
client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
|
||||
allow_cleartext: bool,
|
||||
@@ -235,7 +235,7 @@ async fn auth_quirks(
|
||||
/// only if authentication was successfuly.
|
||||
async fn auth_and_wake_compute(
|
||||
api: &impl console::Api,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
creds: ClientCredentials,
|
||||
client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
|
||||
allow_cleartext: bool,
|
||||
@@ -314,7 +314,7 @@ impl<'a> BackendType<'a, ClientCredentials> {
|
||||
#[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]
|
||||
pub async fn authenticate(
|
||||
self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
|
||||
allow_cleartext: bool,
|
||||
config: &'static AuthenticationConfig,
|
||||
@@ -387,7 +387,7 @@ impl<'a> BackendType<'a, ClientCredentials> {
|
||||
impl BackendType<'_, ComputeUserInfo> {
|
||||
pub async fn get_allowed_ips(
|
||||
&self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
|
||||
use BackendType::*;
|
||||
match self {
|
||||
@@ -404,7 +404,7 @@ impl BackendType<'_, ComputeUserInfo> {
|
||||
/// The link auth flow doesn't support this, so we return [`None`] in that case.
|
||||
pub async fn wake_compute(
|
||||
&self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
) -> Result<Option<CachedNodeInfo>, console::errors::WakeComputeError> {
|
||||
use BackendType::*;
|
||||
|
||||
|
||||
@@ -112,6 +112,9 @@ struct ProxyCliArgs {
|
||||
/// Timeout for rate limiter. If it didn't manage to aquire a permit in this time, it will return an error.
|
||||
#[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
|
||||
rate_limiter_timeout: tokio::time::Duration,
|
||||
/// Endpoint rate limiter max number of requests per second.
|
||||
#[clap(long, default_value_t = 300)]
|
||||
endpoint_rps_limit: u32,
|
||||
/// Initial limit for dynamic rate limiter. Makes sense only if `rate_limit_algorithm` is *not* `None`.
|
||||
#[clap(long, default_value_t = 100)]
|
||||
initial_limit: usize,
|
||||
@@ -317,6 +320,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
|
||||
authentication_config,
|
||||
require_client_ip: args.require_client_ip,
|
||||
disable_ip_check_for_http: args.disable_ip_check_for_http,
|
||||
endpoint_rps_limit: args.endpoint_rps_limit,
|
||||
}));
|
||||
|
||||
Ok(config)
|
||||
|
||||
@@ -20,6 +20,7 @@ pub struct ProxyConfig {
|
||||
pub authentication_config: AuthenticationConfig,
|
||||
pub require_client_ip: bool,
|
||||
pub disable_ip_check_for_http: bool,
|
||||
pub endpoint_rps_limit: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -196,15 +196,15 @@ pub mod errors {
|
||||
}
|
||||
|
||||
/// Extra query params we'd like to pass to the console.
|
||||
pub struct ConsoleReqExtra<'a> {
|
||||
pub struct ConsoleReqExtra {
|
||||
/// A unique identifier for a connection.
|
||||
pub session_id: uuid::Uuid,
|
||||
/// Name of client application, if set.
|
||||
pub application_name: Option<&'a str>,
|
||||
pub application_name: String,
|
||||
pub options: Vec<(String, String)>,
|
||||
}
|
||||
|
||||
impl<'a> ConsoleReqExtra<'a> {
|
||||
impl ConsoleReqExtra {
|
||||
// https://swagger.io/docs/specification/serialization/ DeepObject format
|
||||
// paramName[prop1]=value1¶mName[prop2]=value2&....
|
||||
pub fn options_as_deep_object(&self) -> Vec<(String, String)> {
|
||||
@@ -259,20 +259,20 @@ pub trait Api {
|
||||
/// Get the client's auth secret for authentication.
|
||||
async fn get_auth_info(
|
||||
&self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
creds: &ComputeUserInfo,
|
||||
) -> Result<AuthInfo, errors::GetAuthInfoError>;
|
||||
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
creds: &ComputeUserInfo,
|
||||
) -> Result<Arc<Vec<String>>, errors::GetAuthInfoError>;
|
||||
|
||||
/// Wake up the compute node and return the corresponding connection info.
|
||||
async fn wake_compute(
|
||||
&self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
creds: &ComputeUserInfo,
|
||||
) -> Result<CachedNodeInfo, errors::WakeComputeError>;
|
||||
}
|
||||
|
||||
@@ -144,7 +144,7 @@ impl super::Api for Api {
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn get_auth_info(
|
||||
&self,
|
||||
_extra: &ConsoleReqExtra<'_>,
|
||||
_extra: &ConsoleReqExtra,
|
||||
creds: &ComputeUserInfo,
|
||||
) -> Result<AuthInfo, GetAuthInfoError> {
|
||||
self.do_get_auth_info(creds).await
|
||||
@@ -152,7 +152,7 @@ impl super::Api for Api {
|
||||
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
_extra: &ConsoleReqExtra<'_>,
|
||||
_extra: &ConsoleReqExtra,
|
||||
creds: &ComputeUserInfo,
|
||||
) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
|
||||
Ok(Arc::new(self.do_get_auth_info(creds).await?.allowed_ips))
|
||||
@@ -161,7 +161,7 @@ impl super::Api for Api {
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn wake_compute(
|
||||
&self,
|
||||
_extra: &ConsoleReqExtra<'_>,
|
||||
_extra: &ConsoleReqExtra,
|
||||
_creds: &ComputeUserInfo,
|
||||
) -> Result<CachedNodeInfo, WakeComputeError> {
|
||||
self.do_wake_compute()
|
||||
|
||||
@@ -48,7 +48,7 @@ impl Api {
|
||||
|
||||
async fn do_get_auth_info(
|
||||
&self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
creds: &ComputeUserInfo,
|
||||
) -> Result<AuthInfo, GetAuthInfoError> {
|
||||
let request_id = uuid::Uuid::new_v4().to_string();
|
||||
@@ -60,9 +60,9 @@ impl Api {
|
||||
.header("Authorization", format!("Bearer {}", &self.jwt))
|
||||
.query(&[("session_id", extra.session_id)])
|
||||
.query(&[
|
||||
("application_name", extra.application_name),
|
||||
("project", Some(&creds.endpoint)),
|
||||
("role", Some(&creds.inner.user)),
|
||||
("application_name", extra.application_name.as_str()),
|
||||
("project", creds.endpoint.as_str()),
|
||||
("role", creds.inner.user.as_str()),
|
||||
])
|
||||
.build()?;
|
||||
|
||||
@@ -101,7 +101,7 @@ impl Api {
|
||||
|
||||
async fn do_wake_compute(
|
||||
&self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
creds: &ComputeUserInfo,
|
||||
) -> Result<NodeInfo, WakeComputeError> {
|
||||
let request_id = uuid::Uuid::new_v4().to_string();
|
||||
@@ -113,8 +113,8 @@ impl Api {
|
||||
.header("Authorization", format!("Bearer {}", &self.jwt))
|
||||
.query(&[("session_id", extra.session_id)])
|
||||
.query(&[
|
||||
("application_name", extra.application_name),
|
||||
("project", Some(&creds.endpoint)),
|
||||
("application_name", extra.application_name.as_str()),
|
||||
("project", creds.endpoint.as_str()),
|
||||
]);
|
||||
|
||||
request_builder = if extra.options.is_empty() {
|
||||
@@ -161,7 +161,7 @@ impl super::Api for Api {
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn get_auth_info(
|
||||
&self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
creds: &ComputeUserInfo,
|
||||
) -> Result<AuthInfo, GetAuthInfoError> {
|
||||
self.do_get_auth_info(extra, creds).await
|
||||
@@ -169,7 +169,7 @@ impl super::Api for Api {
|
||||
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
creds: &ComputeUserInfo,
|
||||
) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
|
||||
let key: &str = &creds.endpoint;
|
||||
@@ -192,7 +192,7 @@ impl super::Api for Api {
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn wake_compute(
|
||||
&self,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
extra: &ConsoleReqExtra,
|
||||
creds: &ComputeUserInfo,
|
||||
) -> Result<CachedNodeInfo, WakeComputeError> {
|
||||
let key: &str = &creds.inner.cache_key;
|
||||
|
||||
@@ -9,6 +9,7 @@ use crate::{
|
||||
console::{self, errors::WakeComputeError, messages::MetricsAuxInfo, Api},
|
||||
http::StatusCode,
|
||||
protocol2::WithClientIp,
|
||||
rate_limiter::EndpointRateLimiter,
|
||||
stream::{PqStream, Stream},
|
||||
usage_metrics::{Ids, USAGE_METRICS},
|
||||
};
|
||||
@@ -307,6 +308,7 @@ pub async fn task_main(
|
||||
|
||||
let connections = tokio_util::task::task_tracker::TaskTracker::new();
|
||||
let cancel_map = Arc::new(CancelMap::default());
|
||||
let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(config.endpoint_rps_limit));
|
||||
|
||||
while let Some(accept_result) =
|
||||
run_until_cancelled(listener.accept(), &cancellation_token).await
|
||||
@@ -315,6 +317,8 @@ pub async fn task_main(
|
||||
|
||||
let session_id = uuid::Uuid::new_v4();
|
||||
let cancel_map = Arc::clone(&cancel_map);
|
||||
let endpoint_rate_limiter = endpoint_rate_limiter.clone();
|
||||
|
||||
connections.spawn(
|
||||
async move {
|
||||
info!("accepted postgres client connection");
|
||||
@@ -340,6 +344,7 @@ pub async fn task_main(
|
||||
socket,
|
||||
ClientMode::Tcp,
|
||||
peer_addr.ip(),
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -415,6 +420,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
stream: S,
|
||||
mode: ClientMode,
|
||||
peer_addr: IpAddr,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> anyhow::Result<()> {
|
||||
info!(
|
||||
protocol = mode.protocol_label(),
|
||||
@@ -463,6 +469,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
¶ms,
|
||||
session_id,
|
||||
mode.allow_self_signed_compute(config),
|
||||
endpoint_rate_limiter,
|
||||
);
|
||||
cancel_map
|
||||
.with_session(|session| client.connect_to_db(session, mode, &config.authentication_config))
|
||||
@@ -671,7 +678,7 @@ fn report_error(e: &WakeComputeError, retry: bool) {
|
||||
pub async fn connect_to_compute<M: ConnectMechanism>(
|
||||
mechanism: &M,
|
||||
mut node_info: console::CachedNodeInfo,
|
||||
extra: &console::ConsoleReqExtra<'_>,
|
||||
extra: &console::ConsoleReqExtra,
|
||||
creds: &auth::BackendType<'_, auth::backend::ComputeUserInfo>,
|
||||
mut latency_timer: LatencyTimer,
|
||||
) -> Result<M::Connection, M::Error>
|
||||
@@ -928,6 +935,8 @@ struct Client<'a, S> {
|
||||
session_id: uuid::Uuid,
|
||||
/// Allow self-signed certificates (for testing).
|
||||
allow_self_signed_compute: bool,
|
||||
/// Rate limiter for endpoints
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
}
|
||||
|
||||
impl<'a, S> Client<'a, S> {
|
||||
@@ -938,6 +947,7 @@ impl<'a, S> Client<'a, S> {
|
||||
params: &'a StartupMessageParams,
|
||||
session_id: uuid::Uuid,
|
||||
allow_self_signed_compute: bool,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> Self {
|
||||
Self {
|
||||
stream,
|
||||
@@ -945,6 +955,7 @@ impl<'a, S> Client<'a, S> {
|
||||
params,
|
||||
session_id,
|
||||
allow_self_signed_compute,
|
||||
endpoint_rate_limiter,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -966,15 +977,29 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
|
||||
params,
|
||||
session_id,
|
||||
allow_self_signed_compute,
|
||||
endpoint_rate_limiter,
|
||||
} = self;
|
||||
|
||||
// check rate limit
|
||||
if let Some(ep) = creds.get_endpoint() {
|
||||
if !endpoint_rate_limiter.check(ep) {
|
||||
return stream
|
||||
.throw_error(auth::AuthError::too_many_connections())
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
let proto = mode.protocol_label();
|
||||
let extra = console::ConsoleReqExtra {
|
||||
session_id, // aka this connection's id
|
||||
application_name: params.get("application_name"),
|
||||
application_name: format!(
|
||||
"{}/{}",
|
||||
params.get("application_name").unwrap_or_default(),
|
||||
proto
|
||||
),
|
||||
options: neon_options(params),
|
||||
};
|
||||
|
||||
let mut latency_timer = LatencyTimer::new(mode.protocol_label());
|
||||
let mut latency_timer = LatencyTimer::new(proto);
|
||||
|
||||
let user = creds.get_user().to_owned();
|
||||
let auth_result = match creds
|
||||
@@ -1012,7 +1037,6 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
|
||||
.or_else(|e| stream.throw_error(e))
|
||||
.await?;
|
||||
|
||||
let proto = mode.protocol_label();
|
||||
NUM_DB_CONNECTIONS_OPENED_COUNTER
|
||||
.with_label_values(&[proto])
|
||||
.inc();
|
||||
|
||||
@@ -484,13 +484,13 @@ fn helper_create_connect_info(
|
||||
mechanism: &TestConnectMechanism,
|
||||
) -> (
|
||||
CachedNodeInfo,
|
||||
console::ConsoleReqExtra<'static>,
|
||||
console::ConsoleReqExtra,
|
||||
auth::BackendType<'_, ComputeUserInfo>,
|
||||
) {
|
||||
let cache = helper_create_cached_node_info();
|
||||
let extra = console::ConsoleReqExtra {
|
||||
session_id: uuid::Uuid::new_v4(),
|
||||
application_name: Some("TEST"),
|
||||
application_name: "TEST".into(),
|
||||
options: vec![],
|
||||
};
|
||||
let creds = auth::BackendType::Test(mechanism);
|
||||
|
||||
@@ -3,4 +3,5 @@ mod limit_algorithm;
|
||||
mod limiter;
|
||||
pub use aimd::Aimd;
|
||||
pub use limit_algorithm::{AimdConfig, Fixed, RateLimitAlgorithm, RateLimiterConfig};
|
||||
pub use limiter::EndpointRateLimiter;
|
||||
pub use limiter::Limiter;
|
||||
|
||||
@@ -6,6 +6,9 @@ use std::{
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use dashmap::DashMap;
|
||||
use parking_lot::Mutex;
|
||||
use smol_str::SmolStr;
|
||||
use tokio::sync::{Mutex as AsyncMutex, Semaphore, SemaphorePermit};
|
||||
use tokio::time::{timeout, Instant};
|
||||
use tracing::info;
|
||||
@@ -15,6 +18,74 @@ use super::{
|
||||
RateLimiterConfig,
|
||||
};
|
||||
|
||||
// Simple per-endpoint rate limiter.
|
||||
//
|
||||
// Check that number of connections to the endpoint is below `max_rps` rps.
|
||||
// Purposefully ignore user name and database name as clients can reconnect
|
||||
// with different names, so we'll end up sending some http requests to
|
||||
// the control plane.
|
||||
//
|
||||
// We also may save quite a lot of CPU (I think) by bailing out right after we
|
||||
// saw SNI, before doing TLS handshake. User-side error messages in that case
|
||||
// does not look very nice (`SSL SYSCALL error: Undefined error: 0`), so for now
|
||||
// I went with a more expensive way that yields user-friendlier error messages.
|
||||
//
|
||||
// TODO: add a better bucketing here, e.g. not more than 300 requests per second,
|
||||
// and not more than 1000 requests per 10 seconds, etc. Short bursts of reconnects
|
||||
// are noramal during redeployments, so we should not block them.
|
||||
pub struct EndpointRateLimiter {
|
||||
map: DashMap<SmolStr, Arc<Mutex<(chrono::NaiveTime, u32)>>>,
|
||||
max_rps: u32,
|
||||
access_count: AtomicUsize,
|
||||
}
|
||||
|
||||
impl EndpointRateLimiter {
|
||||
pub fn new(max_rps: u32) -> Self {
|
||||
Self {
|
||||
map: DashMap::new(),
|
||||
max_rps,
|
||||
access_count: AtomicUsize::new(1), // start from 1 to avoid GC on the first request
|
||||
}
|
||||
}
|
||||
|
||||
/// Check that number of connections to the endpoint is below `max_rps` rps.
|
||||
pub fn check(&self, endpoint: SmolStr) -> bool {
|
||||
// do GC every 100k requests (worst case memory usage is about 10MB)
|
||||
if self.access_count.fetch_add(1, Ordering::AcqRel) % 100_000 == 0 {
|
||||
self.do_gc();
|
||||
}
|
||||
|
||||
let now = chrono::Utc::now().naive_utc().time();
|
||||
let entry = self
|
||||
.map
|
||||
.entry(endpoint)
|
||||
.or_insert_with(|| Arc::new(Mutex::new((now, 0))));
|
||||
let mut entry = entry.lock();
|
||||
let (last_time, count) = *entry;
|
||||
|
||||
if now - last_time < chrono::Duration::seconds(1) {
|
||||
if count >= self.max_rps {
|
||||
return false;
|
||||
}
|
||||
*entry = (last_time, count + 1);
|
||||
} else {
|
||||
*entry = (now, 1);
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Clean the map. Simple strategy: remove all entries. At worst, we'll
|
||||
/// double the effective max_rps during the cleanup. But that way deletion
|
||||
/// does not aquire mutex on each entry access.
|
||||
pub fn do_gc(&self) {
|
||||
info!(
|
||||
"cleaning up endpoint rate limiter, current size = {}",
|
||||
self.map.len()
|
||||
);
|
||||
self.map.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/// Limits the number of concurrent jobs.
|
||||
///
|
||||
/// Concurrency is limited through the use of [Token]s. Acquire a token to run a job, and release the
|
||||
|
||||
@@ -14,6 +14,7 @@ use tokio_util::task::TaskTracker;
|
||||
|
||||
use crate::protocol2::{ProxyProtocolAccept, WithClientIp};
|
||||
use crate::proxy::{NUM_CLIENT_CONNECTION_CLOSED_COUNTER, NUM_CLIENT_CONNECTION_OPENED_COUNTER};
|
||||
use crate::rate_limiter::EndpointRateLimiter;
|
||||
use crate::{cancellation::CancelMap, config::ProxyConfig};
|
||||
use futures::StreamExt;
|
||||
use hyper::{
|
||||
@@ -43,6 +44,7 @@ pub async fn task_main(
|
||||
}
|
||||
|
||||
let conn_pool = conn_pool::GlobalConnPool::new(config);
|
||||
let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(config.endpoint_rps_limit));
|
||||
|
||||
// shutdown the connection pool
|
||||
tokio::spawn({
|
||||
@@ -91,6 +93,7 @@ pub async fn task_main(
|
||||
let sni_name = tls.server_name().map(|s| s.to_string());
|
||||
let conn_pool = conn_pool.clone();
|
||||
let ws_connections = ws_connections.clone();
|
||||
let endpoint_rate_limiter = endpoint_rate_limiter.clone();
|
||||
|
||||
async move {
|
||||
let peer_addr = match client_addr {
|
||||
@@ -103,6 +106,7 @@ pub async fn task_main(
|
||||
let sni_name = sni_name.clone();
|
||||
let conn_pool = conn_pool.clone();
|
||||
let ws_connections = ws_connections.clone();
|
||||
let endpoint_rate_limiter = endpoint_rate_limiter.clone();
|
||||
|
||||
async move {
|
||||
let cancel_map = Arc::new(CancelMap::default());
|
||||
@@ -117,6 +121,7 @@ pub async fn task_main(
|
||||
session_id,
|
||||
sni_name,
|
||||
peer_addr.ip(),
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.instrument(info_span!(
|
||||
"serverless",
|
||||
@@ -190,6 +195,7 @@ async fn request_handler(
|
||||
session_id: uuid::Uuid,
|
||||
sni_hostname: Option<String>,
|
||||
peer_addr: IpAddr,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let host = request
|
||||
.headers()
|
||||
@@ -214,6 +220,7 @@ async fn request_handler(
|
||||
session_id,
|
||||
host,
|
||||
peer_addr,
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await
|
||||
{
|
||||
|
||||
@@ -37,7 +37,7 @@ use crate::proxy::ConnectMechanism;
|
||||
use tracing::{error, warn, Span};
|
||||
use tracing::{info, info_span, Instrument};
|
||||
|
||||
pub const APP_NAME: &str = "sql_over_http";
|
||||
pub const APP_NAME: &str = "/sql_over_http";
|
||||
const MAX_CONNS_PER_ENDPOINT: usize = 20;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -432,7 +432,7 @@ async fn connect_to_compute(
|
||||
|
||||
let extra = console::ConsoleReqExtra {
|
||||
session_id: uuid::Uuid::new_v4(),
|
||||
application_name: Some(APP_NAME),
|
||||
application_name: APP_NAME.to_string(),
|
||||
options: console_options,
|
||||
};
|
||||
// TODO(anna): this is a bit hacky way, consider using console notification listener.
|
||||
|
||||
@@ -3,6 +3,7 @@ use crate::{
|
||||
config::ProxyConfig,
|
||||
error::io_error,
|
||||
proxy::{handle_client, ClientMode},
|
||||
rate_limiter::EndpointRateLimiter,
|
||||
};
|
||||
use bytes::{Buf, Bytes};
|
||||
use futures::{Sink, Stream};
|
||||
@@ -13,6 +14,7 @@ use pin_project_lite::pin_project;
|
||||
use std::{
|
||||
net::IpAddr,
|
||||
pin::Pin,
|
||||
sync::Arc,
|
||||
task::{ready, Context, Poll},
|
||||
};
|
||||
use tokio::io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf};
|
||||
@@ -134,6 +136,7 @@ pub async fn serve_websocket(
|
||||
session_id: uuid::Uuid,
|
||||
hostname: Option<String>,
|
||||
peer_addr: IpAddr,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> anyhow::Result<()> {
|
||||
let websocket = websocket.await?;
|
||||
handle_client(
|
||||
@@ -143,6 +146,7 @@ pub async fn serve_websocket(
|
||||
WebSocketRw::new(websocket),
|
||||
ClientMode::Websockets { hostname },
|
||||
peer_addr,
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
|
||||
@@ -142,7 +142,9 @@ pub(crate) async fn branch_cleanup_and_check_errors(
|
||||
.collect();
|
||||
|
||||
if !orphan_layers.is_empty() {
|
||||
result.errors.push(format!(
|
||||
// An orphan layer is not an error: it's arguably not even a warning, but it is helpful to report
|
||||
// these as a hint that there is something worth cleaning up here.
|
||||
result.warnings.push(format!(
|
||||
"index_part.json does not contain layers from S3: {:?}",
|
||||
orphan_layers
|
||||
.iter()
|
||||
@@ -170,6 +172,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
|
||||
));
|
||||
}
|
||||
}
|
||||
BlobDataParseResult::Relic => {}
|
||||
BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend(
|
||||
parse_errors
|
||||
.into_iter()
|
||||
@@ -215,6 +218,8 @@ pub(crate) enum BlobDataParseResult {
|
||||
index_part_generation: Generation,
|
||||
s3_layers: HashSet<(LayerFileName, Generation)>,
|
||||
},
|
||||
/// The remains of a deleted Timeline (i.e. an initdb archive only)
|
||||
Relic,
|
||||
Incorrect(Vec<String>),
|
||||
}
|
||||
|
||||
@@ -245,6 +250,7 @@ pub(crate) async fn list_timeline_blobs(
|
||||
timeline_dir_target.delimiter = String::new();
|
||||
|
||||
let mut index_parts: Vec<ObjectIdentifier> = Vec::new();
|
||||
let mut initdb_archive: bool = false;
|
||||
|
||||
let stream = stream_listing(s3_client, &timeline_dir_target);
|
||||
pin_mut!(stream);
|
||||
@@ -258,6 +264,10 @@ pub(crate) async fn list_timeline_blobs(
|
||||
tracing::info!("Index key {key}");
|
||||
index_parts.push(obj)
|
||||
}
|
||||
Some("initdb.tar.zst") => {
|
||||
tracing::info!("initdb archive {key}");
|
||||
initdb_archive = true;
|
||||
}
|
||||
Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) {
|
||||
Ok((new_layer, gen)) => {
|
||||
tracing::info!("Parsed layer key: {} {:?}", new_layer, gen);
|
||||
@@ -279,6 +289,16 @@ pub(crate) async fn list_timeline_blobs(
|
||||
}
|
||||
}
|
||||
|
||||
if index_parts.is_empty() && s3_layers.is_empty() && initdb_archive {
|
||||
tracing::info!(
|
||||
"Timeline is empty apart from initdb archive: expected post-deletion state."
|
||||
);
|
||||
return Ok(S3TimelineBlobData {
|
||||
blob_data: BlobDataParseResult::Relic,
|
||||
keys_to_remove: Vec::new(),
|
||||
});
|
||||
}
|
||||
|
||||
// Choose the index_part with the highest generation
|
||||
let (index_part_object, index_part_generation) = match index_parts
|
||||
.iter()
|
||||
|
||||
@@ -86,7 +86,9 @@ impl S3Target {
|
||||
if new_self.prefix_in_bucket.is_empty() {
|
||||
new_self.prefix_in_bucket = format!("/{}/", new_segment);
|
||||
} else {
|
||||
let _ = new_self.prefix_in_bucket.pop();
|
||||
if new_self.prefix_in_bucket.ends_with('/') {
|
||||
new_self.prefix_in_bucket.pop();
|
||||
}
|
||||
new_self.prefix_in_bucket =
|
||||
[&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
|
||||
}
|
||||
|
||||
@@ -57,7 +57,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
));
|
||||
|
||||
match cli.command {
|
||||
Command::ScanMetadata { json } => match scan_metadata(bucket_config).await {
|
||||
Command::ScanMetadata { json } => match scan_metadata(bucket_config.clone()).await {
|
||||
Err(e) => {
|
||||
tracing::error!("Failed: {e}");
|
||||
Err(e)
|
||||
@@ -70,6 +70,17 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
if summary.is_fatal() {
|
||||
Err(anyhow::anyhow!("Fatal scrub errors detected"))
|
||||
} else if summary.is_empty() {
|
||||
// Strictly speaking an empty bucket is a valid bucket, but if someone ran the
|
||||
// scrubber they were likely expecting to scan something, and if we see no timelines
|
||||
// at all then it's likely due to some configuration issues like a bad prefix
|
||||
Err(anyhow::anyhow!(
|
||||
"No timelines found in bucket {} prefix {}",
|
||||
bucket_config.bucket,
|
||||
bucket_config
|
||||
.prefix_in_bucket
|
||||
.unwrap_or("<none>".to_string())
|
||||
))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -174,6 +174,10 @@ Timeline layer count: {6}
|
||||
pub fn is_fatal(&self) -> bool {
|
||||
!self.with_errors.is_empty()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.count == 0
|
||||
}
|
||||
}
|
||||
|
||||
/// Scan the pageserver metadata in an S3 bucket, reporting errors and statistics.
|
||||
|
||||
@@ -28,6 +28,7 @@ import jwt
|
||||
import psycopg2
|
||||
import pytest
|
||||
import requests
|
||||
import toml
|
||||
from _pytest.config import Config
|
||||
from _pytest.config.argparsing import Parser
|
||||
from _pytest.fixtures import FixtureRequest
|
||||
@@ -436,7 +437,7 @@ class NeonEnvBuilder:
|
||||
# Pageserver remote storage
|
||||
self.pageserver_remote_storage = pageserver_remote_storage
|
||||
# Safekeepers remote storage
|
||||
self.sk_remote_storage: Optional[RemoteStorage] = None
|
||||
self.safekeepers_remote_storage: Optional[RemoteStorage] = None
|
||||
|
||||
self.broker = broker
|
||||
self.run_id = run_id
|
||||
@@ -506,6 +507,66 @@ class NeonEnvBuilder:
|
||||
|
||||
return env
|
||||
|
||||
def from_repo_dir(
|
||||
self,
|
||||
repo_dir: Path,
|
||||
neon_binpath: Optional[Path] = None,
|
||||
pg_distrib_dir: Optional[Path] = None,
|
||||
) -> NeonEnv:
|
||||
"""
|
||||
A simple method to import data into the current NeonEnvBuilder from a snapshot of a repo dir.
|
||||
"""
|
||||
|
||||
# Setting custom `neon_binpath` and `pg_distrib_dir` is useful for compatibility tests
|
||||
self.neon_binpath = neon_binpath or self.neon_binpath
|
||||
self.pg_distrib_dir = pg_distrib_dir or self.pg_distrib_dir
|
||||
|
||||
# Get the initial tenant and timeline from the snapshot config
|
||||
snapshot_config_toml = repo_dir / "config"
|
||||
with snapshot_config_toml.open("r") as f:
|
||||
snapshot_config = toml.load(f)
|
||||
|
||||
self.initial_tenant = TenantId(snapshot_config["default_tenant_id"])
|
||||
self.initial_timeline = TimelineId(
|
||||
dict(snapshot_config["branch_name_mappings"][DEFAULT_BRANCH_NAME])[
|
||||
str(self.initial_tenant)
|
||||
]
|
||||
)
|
||||
self.env = self.init_configs()
|
||||
|
||||
for ps_dir in repo_dir.glob("pageserver_*"):
|
||||
tenants_from_dir = ps_dir / "tenants"
|
||||
tenants_to_dir = self.repo_dir / ps_dir.name / "tenants"
|
||||
|
||||
log.info(f"Copying pageserver tenants directory {tenants_from_dir} to {tenants_to_dir}")
|
||||
shutil.copytree(tenants_from_dir, tenants_to_dir)
|
||||
|
||||
for sk_from_dir in (repo_dir / "safekeepers").glob("sk*"):
|
||||
sk_to_dir = self.repo_dir / "safekeepers" / sk_from_dir.name
|
||||
log.info(f"Copying safekeeper directory {sk_from_dir} to {sk_to_dir}")
|
||||
sk_to_dir.rmdir()
|
||||
shutil.copytree(sk_from_dir, sk_to_dir, ignore=shutil.ignore_patterns("*.log", "*.pid"))
|
||||
|
||||
shutil.rmtree(self.repo_dir / "local_fs_remote_storage", ignore_errors=True)
|
||||
shutil.copytree(
|
||||
repo_dir / "local_fs_remote_storage", self.repo_dir / "local_fs_remote_storage"
|
||||
)
|
||||
|
||||
if (attachments_json := Path(repo_dir / "attachments.json")).exists():
|
||||
shutil.copyfile(attachments_json, self.repo_dir / attachments_json.name)
|
||||
|
||||
# Update the config with info about tenants and timelines
|
||||
with (self.repo_dir / "config").open("r") as f:
|
||||
config = toml.load(f)
|
||||
|
||||
config["default_tenant_id"] = snapshot_config["default_tenant_id"]
|
||||
config["branch_name_mappings"] = snapshot_config["branch_name_mappings"]
|
||||
|
||||
with (self.repo_dir / "config").open("w") as f:
|
||||
toml.dump(config, f)
|
||||
|
||||
return self.env
|
||||
|
||||
def enable_scrub_on_exit(self):
|
||||
"""
|
||||
Call this if you would like the fixture to automatically run
|
||||
@@ -534,9 +595,11 @@ class NeonEnvBuilder:
|
||||
self.pageserver_remote_storage = ret
|
||||
|
||||
def enable_safekeeper_remote_storage(self, kind: RemoteStorageKind):
|
||||
assert self.sk_remote_storage is None, "sk_remote_storage already configured"
|
||||
assert (
|
||||
self.safekeepers_remote_storage is None
|
||||
), "safekeepers_remote_storage already configured"
|
||||
|
||||
self.sk_remote_storage = self._configure_and_create_remote_storage(
|
||||
self.safekeepers_remote_storage = self._configure_and_create_remote_storage(
|
||||
kind, RemoteStorageUser.SAFEKEEPER
|
||||
)
|
||||
|
||||
@@ -589,7 +652,7 @@ class NeonEnvBuilder:
|
||||
directory_to_clean.rmdir()
|
||||
|
||||
def cleanup_remote_storage(self):
|
||||
for x in [self.pageserver_remote_storage, self.sk_remote_storage]:
|
||||
for x in [self.pageserver_remote_storage, self.safekeepers_remote_storage]:
|
||||
if isinstance(x, S3Storage):
|
||||
x.do_cleanup()
|
||||
|
||||
@@ -693,7 +756,7 @@ class NeonEnv:
|
||||
self.pageservers: List[NeonPageserver] = []
|
||||
self.broker = config.broker
|
||||
self.pageserver_remote_storage = config.pageserver_remote_storage
|
||||
self.safekeepers_remote_storage = config.sk_remote_storage
|
||||
self.safekeepers_remote_storage = config.safekeepers_remote_storage
|
||||
self.pg_version = config.pg_version
|
||||
# Binary path for pageserver, safekeeper, etc
|
||||
self.neon_binpath = config.neon_binpath
|
||||
@@ -718,25 +781,17 @@ class NeonEnv:
|
||||
self.attachment_service = None
|
||||
|
||||
# Create a config file corresponding to the options
|
||||
toml = textwrap.dedent(
|
||||
f"""
|
||||
default_tenant_id = '{config.initial_tenant}'
|
||||
"""
|
||||
)
|
||||
cfg: Dict[str, Any] = {
|
||||
"default_tenant_id": str(self.initial_tenant),
|
||||
"broker": {
|
||||
"listen_addr": self.broker.listen_addr(),
|
||||
},
|
||||
"pageservers": [],
|
||||
"safekeepers": [],
|
||||
}
|
||||
|
||||
if self.control_plane_api is not None:
|
||||
toml += textwrap.dedent(
|
||||
f"""
|
||||
control_plane_api = '{self.control_plane_api}'
|
||||
"""
|
||||
)
|
||||
|
||||
toml += textwrap.dedent(
|
||||
f"""
|
||||
[broker]
|
||||
listen_addr = '{self.broker.listen_addr()}'
|
||||
"""
|
||||
)
|
||||
cfg["control_plane_api"] = self.control_plane_api
|
||||
|
||||
# Create config for pageserver
|
||||
http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
|
||||
@@ -749,26 +804,24 @@ class NeonEnv:
|
||||
http=self.port_distributor.get_port(),
|
||||
)
|
||||
|
||||
toml += textwrap.dedent(
|
||||
f"""
|
||||
[[pageservers]]
|
||||
id={ps_id}
|
||||
listen_pg_addr = 'localhost:{pageserver_port.pg}'
|
||||
listen_http_addr = 'localhost:{pageserver_port.http}'
|
||||
pg_auth_type = '{pg_auth_type}'
|
||||
http_auth_type = '{http_auth_type}'
|
||||
"""
|
||||
)
|
||||
|
||||
ps_cfg: Dict[str, Any] = {
|
||||
"id": ps_id,
|
||||
"listen_pg_addr": f"localhost:{pageserver_port.pg}",
|
||||
"listen_http_addr": f"localhost:{pageserver_port.http}",
|
||||
"pg_auth_type": pg_auth_type,
|
||||
"http_auth_type": http_auth_type,
|
||||
}
|
||||
# Create a corresponding NeonPageserver object
|
||||
self.pageservers.append(
|
||||
NeonPageserver(
|
||||
self,
|
||||
ps_id,
|
||||
port=pageserver_port,
|
||||
config_override=config.pageserver_config_override,
|
||||
config_override=self.pageserver_config_override,
|
||||
)
|
||||
)
|
||||
cfg["pageservers"].append(ps_cfg)
|
||||
|
||||
# Create config and a Safekeeper object for each safekeeper
|
||||
for i in range(1, config.num_safekeepers + 1):
|
||||
port = SafekeeperPort(
|
||||
@@ -777,32 +830,22 @@ class NeonEnv:
|
||||
http=self.port_distributor.get_port(),
|
||||
)
|
||||
id = config.safekeepers_id_start + i # assign ids sequentially
|
||||
toml += textwrap.dedent(
|
||||
f"""
|
||||
[[safekeepers]]
|
||||
id = {id}
|
||||
pg_port = {port.pg}
|
||||
pg_tenant_only_port = {port.pg_tenant_only}
|
||||
http_port = {port.http}
|
||||
sync = {'true' if config.safekeepers_enable_fsync else 'false'}"""
|
||||
)
|
||||
sk_cfg: Dict[str, Any] = {
|
||||
"id": id,
|
||||
"pg_port": port.pg,
|
||||
"pg_tenant_only_port": port.pg_tenant_only,
|
||||
"http_port": port.http,
|
||||
"sync": config.safekeepers_enable_fsync,
|
||||
}
|
||||
if config.auth_enabled:
|
||||
toml += textwrap.dedent(
|
||||
"""
|
||||
auth_enabled = true
|
||||
"""
|
||||
)
|
||||
if config.sk_remote_storage is not None:
|
||||
toml += textwrap.dedent(
|
||||
f"""
|
||||
remote_storage = "{remote_storage_to_toml_inline_table(config.sk_remote_storage)}"
|
||||
"""
|
||||
)
|
||||
safekeeper = Safekeeper(env=self, id=id, port=port)
|
||||
self.safekeepers.append(safekeeper)
|
||||
sk_cfg["auth_enabled"] = True
|
||||
if self.safekeepers_remote_storage is not None:
|
||||
sk_cfg["remote_storage"] = self.safekeepers_remote_storage.to_toml_inline_table()
|
||||
self.safekeepers.append(Safekeeper(env=self, id=id, port=port))
|
||||
cfg["safekeepers"].append(sk_cfg)
|
||||
|
||||
log.info(f"Config: {toml}")
|
||||
self.neon_cli.init(toml)
|
||||
log.info(f"Config: {cfg}")
|
||||
self.neon_cli.init(cfg)
|
||||
|
||||
def start(self):
|
||||
# Start up broker, pageserver and all safekeepers
|
||||
@@ -1288,10 +1331,10 @@ class NeonCli(AbstractNeonCli):
|
||||
|
||||
def init(
|
||||
self,
|
||||
config_toml: str,
|
||||
config: Dict[str, Any],
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
with tempfile.NamedTemporaryFile(mode="w+") as tmp:
|
||||
tmp.write(config_toml)
|
||||
tmp.write(toml.dumps(config))
|
||||
tmp.flush()
|
||||
|
||||
cmd = ["init", f"--config={tmp.name}", "--pg-version", self.env.pg_version]
|
||||
@@ -1729,11 +1772,16 @@ class NeonPageserver(PgProtocol):
|
||||
|
||||
@property
|
||||
def workdir(self) -> Path:
|
||||
return Path(os.path.join(self.env.repo_dir, f"pageserver_{self.id}"))
|
||||
return self.env.repo_dir / f"pageserver_{self.id}"
|
||||
|
||||
def assert_no_errors(self):
|
||||
logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")
|
||||
errors = scan_pageserver_log_for_errors(logfile, self.allowed_errors)
|
||||
logfile = self.workdir / "pageserver.log"
|
||||
if not logfile.exists():
|
||||
log.warning(f"Skipping log check: {logfile} does not exist")
|
||||
return
|
||||
|
||||
with logfile.open("r") as f:
|
||||
errors = scan_pageserver_log_for_errors(f, self.allowed_errors)
|
||||
|
||||
for _lineno, error in errors:
|
||||
log.info(f"not allowed error: {error.strip()}")
|
||||
@@ -1757,7 +1805,10 @@ class NeonPageserver(PgProtocol):
|
||||
|
||||
def log_contains(self, pattern: str) -> Optional[str]:
|
||||
"""Check that the pageserver log contains a line that matches the given regex"""
|
||||
logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")
|
||||
logfile = self.workdir / "pageserver.log"
|
||||
if not logfile.exists():
|
||||
log.warning(f"Skipping log check: {logfile} does not exist")
|
||||
return None
|
||||
|
||||
contains_re = re.compile(pattern)
|
||||
|
||||
@@ -1766,14 +1817,11 @@ class NeonPageserver(PgProtocol):
|
||||
# no guarantee it is already present in the log file. This hasn't
|
||||
# been a problem in practice, our python tests are not fast enough
|
||||
# to hit that race condition.
|
||||
while True:
|
||||
line = logfile.readline()
|
||||
if not line:
|
||||
break
|
||||
|
||||
if contains_re.search(line):
|
||||
# found it!
|
||||
return line
|
||||
with logfile.open("r") as f:
|
||||
for line in f:
|
||||
if contains_re.search(line):
|
||||
# found it!
|
||||
return line
|
||||
|
||||
return None
|
||||
|
||||
@@ -1796,6 +1844,27 @@ class NeonPageserver(PgProtocol):
|
||||
client = self.http_client()
|
||||
return client.tenant_detach(tenant_id)
|
||||
|
||||
def tenant_location_configure(self, tenant_id: TenantId, config: dict[str, Any], **kwargs):
|
||||
# This API is only for use when generations are enabled
|
||||
assert self.env.attachment_service is not None
|
||||
|
||||
if config["mode"].startswith("Attached") and "generation" not in config:
|
||||
config["generation"] = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
|
||||
|
||||
client = self.http_client()
|
||||
return client.tenant_location_conf(tenant_id, config, **kwargs)
|
||||
|
||||
def read_tenant_location_conf(self, tenant_id: TenantId) -> dict[str, Any]:
|
||||
path = self.tenant_dir(tenant_id) / "config-v1"
|
||||
log.info(f"Reading location conf from {path}")
|
||||
bytes = open(path, "r").read()
|
||||
try:
|
||||
decoded: dict[str, Any] = toml.loads(bytes)
|
||||
return decoded
|
||||
except:
|
||||
log.error(f"Failed to decode LocationConf, raw content ({len(bytes)} bytes): {bytes}")
|
||||
raise
|
||||
|
||||
def tenant_create(
|
||||
self,
|
||||
tenant_id: TenantId,
|
||||
@@ -2729,6 +2798,7 @@ class EndpointFactory:
|
||||
lsn: Optional[Lsn] = None,
|
||||
hot_standby: bool = False,
|
||||
config_lines: Optional[List[str]] = None,
|
||||
pageserver_id: Optional[int] = None,
|
||||
) -> Endpoint:
|
||||
ep = Endpoint(
|
||||
self.env,
|
||||
@@ -2748,6 +2818,7 @@ class EndpointFactory:
|
||||
lsn=lsn,
|
||||
hot_standby=hot_standby,
|
||||
config_lines=config_lines,
|
||||
pageserver_id=pageserver_id,
|
||||
)
|
||||
|
||||
def stop_all(self) -> "EndpointFactory":
|
||||
@@ -3094,7 +3165,7 @@ def pytest_addoption(parser: Parser):
|
||||
|
||||
|
||||
SMALL_DB_FILE_NAME_REGEX: re.Pattern = re.compile( # type: ignore[type-arg]
|
||||
r"config|metadata|.+\.(?:toml|pid|json|sql)"
|
||||
r"config|config-v1|heatmap-v1|metadata|.+\.(?:toml|pid|json|sql)"
|
||||
)
|
||||
|
||||
|
||||
@@ -3355,8 +3426,6 @@ def parse_project_git_version_output(s: str) -> str:
|
||||
|
||||
The information is generated by utils::project_git_version!
|
||||
"""
|
||||
import re
|
||||
|
||||
res = re.search(r"git(-env)?:([0-9a-fA-F]{8,40})(-\S+)?", s)
|
||||
if res and (commit := res.group(2)):
|
||||
return commit
|
||||
|
||||
@@ -150,7 +150,7 @@ class PageserverHttpClient(requests.Session):
|
||||
# (this may change in future if we do fault injection of a kind that causes
|
||||
# requests TCP flows to stick)
|
||||
read=False,
|
||||
backoff_factor=0,
|
||||
backoff_factor=0.2,
|
||||
status_forcelist=[503],
|
||||
allowed_methods=None,
|
||||
remove_headers_on_redirect=[],
|
||||
@@ -277,6 +277,23 @@ class PageserverHttpClient(requests.Session):
|
||||
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/reset", params=params)
|
||||
self.verbose_error(res)
|
||||
|
||||
def tenant_location_conf(
|
||||
self, tenant_id: TenantId, location_conf=dict[str, Any], flush_ms=None
|
||||
):
|
||||
body = location_conf.copy()
|
||||
body["tenant_id"] = str(tenant_id)
|
||||
|
||||
params = {}
|
||||
if flush_ms is not None:
|
||||
params["flush_ms"] = str(flush_ms)
|
||||
|
||||
res = self.put(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/location_config",
|
||||
json=body,
|
||||
params=params,
|
||||
)
|
||||
self.verbose_error(res)
|
||||
|
||||
def tenant_delete(self, tenant_id: TenantId):
|
||||
res = self.delete(f"http://localhost:{self.port}/v1/tenant/{tenant_id}")
|
||||
self.verbose_error(res)
|
||||
|
||||
@@ -9,6 +9,7 @@ from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import boto3
|
||||
import toml
|
||||
from mypy_boto3_s3 import S3Client
|
||||
|
||||
from fixtures.log_helper import log
|
||||
@@ -133,7 +134,10 @@ class LocalFsStorage:
|
||||
return json.load(f)
|
||||
|
||||
def to_toml_inline_table(self) -> str:
|
||||
return f"local_path='{self.root}'"
|
||||
rv = {
|
||||
"local_path": str(self.root),
|
||||
}
|
||||
return toml.TomlEncoder().dump_inline_table(rv)
|
||||
|
||||
def cleanup(self):
|
||||
# no cleanup is done here, because there's NeonEnvBuilder.cleanup_local_storage which will remove everything, including localfs files
|
||||
@@ -174,18 +178,18 @@ class S3Storage:
|
||||
)
|
||||
|
||||
def to_toml_inline_table(self) -> str:
|
||||
s = [
|
||||
f"bucket_name='{self.bucket_name}'",
|
||||
f"bucket_region='{self.bucket_region}'",
|
||||
]
|
||||
rv = {
|
||||
"bucket_name": self.bucket_name,
|
||||
"bucket_region": self.bucket_region,
|
||||
}
|
||||
|
||||
if self.prefix_in_bucket is not None:
|
||||
s.append(f"prefix_in_bucket='{self.prefix_in_bucket}'")
|
||||
rv["prefix_in_bucket"] = self.prefix_in_bucket
|
||||
|
||||
if self.endpoint is not None:
|
||||
s.append(f"endpoint='{self.endpoint}'")
|
||||
rv["endpoint"] = self.endpoint
|
||||
|
||||
return ",".join(s)
|
||||
return toml.TomlEncoder().dump_inline_table(rv)
|
||||
|
||||
def do_cleanup(self):
|
||||
if not self.cleanup:
|
||||
@@ -384,4 +388,4 @@ def remote_storage_to_toml_inline_table(remote_storage: RemoteStorage) -> str:
|
||||
if not isinstance(remote_storage, (LocalFsStorage, S3Storage)):
|
||||
raise Exception("invalid remote storage type")
|
||||
|
||||
return f"{{{remote_storage.to_toml_inline_table()}}}"
|
||||
return remote_storage.to_toml_inline_table()
|
||||
|
||||
148
test_runner/fixtures/workload.py
Normal file
148
test_runner/fixtures/workload.py
Normal file
@@ -0,0 +1,148 @@
|
||||
from typing import Optional
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
Endpoint,
|
||||
NeonEnv,
|
||||
last_flush_lsn_upload,
|
||||
wait_for_last_flush_lsn,
|
||||
)
|
||||
from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
|
||||
|
||||
class Workload:
|
||||
"""
|
||||
This is not a general purpose load generator: it exists for storage tests that need to inject some
|
||||
high level types of storage work via the postgres interface:
|
||||
- layer writes (`write_rows`)
|
||||
- work for compaction (`churn_rows`)
|
||||
- reads, checking we get the right data (`validate`)
|
||||
"""
|
||||
|
||||
def __init__(self, env: NeonEnv, tenant_id: TenantId, timeline_id: TimelineId):
|
||||
self.env = env
|
||||
self.tenant_id = tenant_id
|
||||
self.timeline_id = timeline_id
|
||||
self.table = "foo"
|
||||
|
||||
self.expect_rows = 0
|
||||
self.churn_cursor = 0
|
||||
|
||||
self._endpoint: Optional[Endpoint] = None
|
||||
|
||||
def endpoint(self, pageserver_id: int) -> Endpoint:
|
||||
if self._endpoint is None:
|
||||
self._endpoint = self.env.endpoints.create(
|
||||
"main",
|
||||
tenant_id=self.tenant_id,
|
||||
pageserver_id=pageserver_id,
|
||||
endpoint_id="ep-workload",
|
||||
)
|
||||
self._endpoint.start(pageserver_id=pageserver_id)
|
||||
else:
|
||||
self._endpoint.reconfigure(pageserver_id=pageserver_id)
|
||||
|
||||
connstring = self._endpoint.safe_psql(
|
||||
"SELECT setting FROM pg_settings WHERE name='neon.pageserver_connstring'"
|
||||
)
|
||||
log.info(f"Workload.endpoint: connstr={connstring}")
|
||||
|
||||
return self._endpoint
|
||||
|
||||
def __del__(self):
|
||||
if self._endpoint is not None:
|
||||
self._endpoint.stop()
|
||||
|
||||
def init(self, pageserver_id: int):
|
||||
endpoint = self.endpoint(pageserver_id)
|
||||
|
||||
endpoint.safe_psql(f"CREATE TABLE {self.table} (id INTEGER PRIMARY KEY, val text);")
|
||||
endpoint.safe_psql("CREATE EXTENSION IF NOT EXISTS neon_test_utils;")
|
||||
last_flush_lsn_upload(
|
||||
self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
|
||||
)
|
||||
|
||||
def write_rows(self, n, pageserver_id):
|
||||
endpoint = self.endpoint(pageserver_id)
|
||||
start = self.expect_rows
|
||||
end = start + n - 1
|
||||
self.expect_rows += n
|
||||
dummy_value = "blah"
|
||||
endpoint.safe_psql(
|
||||
f"""
|
||||
INSERT INTO {self.table} (id, val)
|
||||
SELECT g, '{dummy_value}'
|
||||
FROM generate_series({start}, {end}) g
|
||||
"""
|
||||
)
|
||||
|
||||
return last_flush_lsn_upload(
|
||||
self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
|
||||
)
|
||||
|
||||
def churn_rows(self, n, pageserver_id, upload=True):
|
||||
assert self.expect_rows >= n
|
||||
|
||||
max_iters = 10
|
||||
endpoint = self.endpoint(pageserver_id)
|
||||
todo = n
|
||||
i = 0
|
||||
while todo > 0:
|
||||
i += 1
|
||||
if i > max_iters:
|
||||
raise RuntimeError("oops")
|
||||
start = self.churn_cursor % self.expect_rows
|
||||
n_iter = min((self.expect_rows - start), todo)
|
||||
todo -= n_iter
|
||||
|
||||
end = start + n_iter - 1
|
||||
|
||||
log.info(
|
||||
f"start,end = {start},{end}, cursor={self.churn_cursor}, expect_rows={self.expect_rows}"
|
||||
)
|
||||
|
||||
assert end < self.expect_rows
|
||||
|
||||
self.churn_cursor += n_iter
|
||||
dummy_value = "blah"
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
f"""
|
||||
INSERT INTO {self.table} (id, val)
|
||||
SELECT g, '{dummy_value}'
|
||||
FROM generate_series({start}, {end}) g
|
||||
ON CONFLICT (id) DO UPDATE
|
||||
SET val = EXCLUDED.val
|
||||
""",
|
||||
f"VACUUM {self.table}",
|
||||
]
|
||||
)
|
||||
|
||||
last_flush_lsn = wait_for_last_flush_lsn(
|
||||
self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
|
||||
)
|
||||
ps_http = self.env.get_pageserver(pageserver_id).http_client()
|
||||
wait_for_last_record_lsn(ps_http, self.tenant_id, self.timeline_id, last_flush_lsn)
|
||||
|
||||
if upload:
|
||||
# force a checkpoint to trigger upload
|
||||
ps_http.timeline_checkpoint(self.tenant_id, self.timeline_id)
|
||||
wait_for_upload(ps_http, self.tenant_id, self.timeline_id, last_flush_lsn)
|
||||
log.info(f"Churn: waiting for remote LSN {last_flush_lsn}")
|
||||
else:
|
||||
log.info(f"Churn: not waiting for upload, disk LSN {last_flush_lsn}")
|
||||
|
||||
def validate(self, pageserver_id):
|
||||
endpoint = self.endpoint(pageserver_id)
|
||||
result = endpoint.safe_psql_many(
|
||||
[
|
||||
"select clear_buffer_cache()",
|
||||
f"""
|
||||
SELECT COUNT(*) FROM {self.table}
|
||||
""",
|
||||
]
|
||||
)
|
||||
|
||||
log.info(f"validate({self.expect_rows}): {result}")
|
||||
assert result == [[("",)], [(self.expect_rows,)]]
|
||||
@@ -92,8 +92,9 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
|
||||
def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.auth_enabled = True
|
||||
env = neon_env_builder.init_start()
|
||||
env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*")
|
||||
env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*")
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[".*Authentication error: InvalidSignature.*", ".*Unauthorized: malformed jwt token.*"]
|
||||
)
|
||||
|
||||
pageserver_token_old = env.auth_keys.generate_pageserver_token()
|
||||
pageserver_http_client_old = env.pageserver.http_client(pageserver_token_old)
|
||||
@@ -145,9 +146,9 @@ def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
|
||||
def test_pageserver_key_reload(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.auth_enabled = True
|
||||
env = neon_env_builder.init_start()
|
||||
env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*")
|
||||
env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*")
|
||||
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[".*Authentication error: InvalidSignature.*", ".*Unauthorized: malformed jwt token.*"]
|
||||
)
|
||||
pageserver_token_old = env.auth_keys.generate_pageserver_token()
|
||||
pageserver_http_client_old = env.pageserver.http_client(pageserver_token_old)
|
||||
|
||||
|
||||
@@ -14,8 +14,9 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.pageserver.allowed_errors.append(".*invalid branch start lsn.*")
|
||||
env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*")
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[".*invalid branch start lsn.*", ".*invalid start lsn .* for ancestor timeline.*"]
|
||||
)
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
branch_behind_timeline_id = env.neon_cli.create_branch("test_branch_behind")
|
||||
|
||||
@@ -148,11 +148,11 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
|
||||
env = neon_env_builder.init_configs()
|
||||
env.start()
|
||||
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading"
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*",
|
||||
".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading",
|
||||
]
|
||||
)
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
@@ -247,11 +247,11 @@ def test_competing_branchings_from_loading_race_to_ok_or_err(neon_env_builder: N
|
||||
env = neon_env_builder.init_configs()
|
||||
env.start()
|
||||
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Error processing HTTP request: InternalServerError\\(Timeline .*/.* already exists in pageserver's memory"
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*",
|
||||
".*Error processing HTTP request: InternalServerError\\(Timeline .*/.* already exists in pageserver's memory",
|
||||
]
|
||||
)
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
|
||||
@@ -1,30 +1,25 @@
|
||||
import copy
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Optional
|
||||
from typing import List, Optional
|
||||
|
||||
import pytest
|
||||
import toml # TODO: replace with tomllib for Python >= 3.11
|
||||
from fixtures.log_helper import log
|
||||
import toml
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonCli,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
PgBin,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverHttpClient
|
||||
from fixtures.pageserver.utils import (
|
||||
timeline_delete_wait_completed,
|
||||
wait_for_last_record_lsn,
|
||||
wait_for_upload,
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.port_distributor import PortDistributor
|
||||
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, RemoteStorageUser
|
||||
from fixtures.remote_storage import RemoteStorageKind
|
||||
from fixtures.types import Lsn
|
||||
from pytest import FixtureRequest
|
||||
|
||||
#
|
||||
# A test suite that help to prevent unintentionally breaking backward or forward compatibility between Neon releases.
|
||||
@@ -37,8 +32,8 @@ from pytest import FixtureRequest
|
||||
# If the breakage is intentional, the test can be xfaild with setting ALLOW_FORWARD_COMPATIBILITY_BREAKAGE=true.
|
||||
#
|
||||
# The file contains a couple of helper functions:
|
||||
# - prepare_snapshot copies the snapshot, cleans it up and makes it ready for the current version of Neon (replaces paths and ports in config files).
|
||||
# - check_neon_works performs the test itself, feel free to add more checks there.
|
||||
# - dump_differs compares two SQL dumps and writes the diff to a file.
|
||||
#
|
||||
#
|
||||
# How to run `test_backward_compatibility` locally:
|
||||
@@ -46,6 +41,7 @@ from pytest import FixtureRequest
|
||||
# export DEFAULT_PG_VERSION=15
|
||||
# export BUILD_TYPE=release
|
||||
# export CHECK_ONDISK_DATA_COMPATIBILITY=true
|
||||
# export COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION}
|
||||
#
|
||||
# # Build previous version of binaries and create a data snapshot:
|
||||
# rm -rf pg_install target
|
||||
@@ -59,8 +55,7 @@ from pytest import FixtureRequest
|
||||
# CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc`
|
||||
#
|
||||
# # Run backward compatibility test
|
||||
# COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION} \
|
||||
# ./scripts/pytest -k test_backward_compatibility
|
||||
# ./scripts/pytest -k test_backward_compatibility
|
||||
#
|
||||
#
|
||||
# How to run `test_forward_compatibility` locally:
|
||||
@@ -68,6 +63,8 @@ from pytest import FixtureRequest
|
||||
# export DEFAULT_PG_VERSION=15
|
||||
# export BUILD_TYPE=release
|
||||
# export CHECK_ONDISK_DATA_COMPATIBILITY=true
|
||||
# export COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE}
|
||||
# export COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install
|
||||
#
|
||||
# # Build previous version of binaries and store them somewhere:
|
||||
# rm -rf pg_install target
|
||||
@@ -84,9 +81,7 @@ from pytest import FixtureRequest
|
||||
# ./scripts/pytest -k test_create_snapshot
|
||||
#
|
||||
# # Run forward compatibility test
|
||||
# COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE} \
|
||||
# COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install \
|
||||
# ./scripts/pytest -k test_forward_compatibility
|
||||
# ./scripts/pytest -k test_forward_compatibility
|
||||
#
|
||||
|
||||
check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif(
|
||||
@@ -155,13 +150,9 @@ def test_create_snapshot(
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(after="test_create_snapshot")
|
||||
def test_backward_compatibility(
|
||||
pg_bin: PgBin,
|
||||
port_distributor: PortDistributor,
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
test_output_dir: Path,
|
||||
neon_binpath: Path,
|
||||
pg_distrib_dir: Path,
|
||||
pg_version: PgVersion,
|
||||
request: FixtureRequest,
|
||||
):
|
||||
"""
|
||||
Test that the new binaries can read old data
|
||||
@@ -177,23 +168,15 @@ def test_backward_compatibility(
|
||||
)
|
||||
|
||||
try:
|
||||
# Copy the snapshot to current directory, and prepare for the test
|
||||
prepare_snapshot(
|
||||
from_dir=compatibility_snapshot_dir,
|
||||
to_dir=test_output_dir / "compatibility_snapshot",
|
||||
port_distributor=port_distributor,
|
||||
)
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.from_repo_dir(compatibility_snapshot_dir / "repo")
|
||||
neon_env_builder.start()
|
||||
|
||||
check_neon_works(
|
||||
test_output_dir / "compatibility_snapshot" / "repo",
|
||||
neon_binpath,
|
||||
neon_binpath,
|
||||
pg_distrib_dir,
|
||||
pg_version,
|
||||
port_distributor,
|
||||
test_output_dir,
|
||||
pg_bin,
|
||||
request,
|
||||
env,
|
||||
test_output_dir=test_output_dir,
|
||||
sql_dump_path=compatibility_snapshot_dir / "dump.sql",
|
||||
repo_dir=env.repo_dir,
|
||||
)
|
||||
except Exception:
|
||||
if breaking_changes_allowed:
|
||||
@@ -212,12 +195,10 @@ def test_backward_compatibility(
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(after="test_create_snapshot")
|
||||
def test_forward_compatibility(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
test_output_dir: Path,
|
||||
top_output_dir: Path,
|
||||
port_distributor: PortDistributor,
|
||||
pg_version: PgVersion,
|
||||
request: FixtureRequest,
|
||||
neon_binpath: Path,
|
||||
):
|
||||
"""
|
||||
Test that the old binaries can read new data
|
||||
@@ -244,24 +225,19 @@ def test_forward_compatibility(
|
||||
)
|
||||
|
||||
try:
|
||||
# Copy the snapshot to current directory, and prepare for the test
|
||||
prepare_snapshot(
|
||||
from_dir=compatibility_snapshot_dir,
|
||||
to_dir=test_output_dir / "compatibility_snapshot",
|
||||
port_distributor=port_distributor,
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.from_repo_dir(
|
||||
compatibility_snapshot_dir / "repo",
|
||||
neon_binpath=compatibility_neon_bin,
|
||||
pg_distrib_dir=compatibility_postgres_distrib_dir,
|
||||
)
|
||||
neon_env_builder.start()
|
||||
|
||||
check_neon_works(
|
||||
test_output_dir / "compatibility_snapshot" / "repo",
|
||||
compatibility_neon_bin,
|
||||
neon_binpath,
|
||||
compatibility_postgres_distrib_dir,
|
||||
pg_version,
|
||||
port_distributor,
|
||||
test_output_dir,
|
||||
PgBin(test_output_dir, compatibility_postgres_distrib_dir, pg_version),
|
||||
request,
|
||||
env,
|
||||
test_output_dir=test_output_dir,
|
||||
sql_dump_path=compatibility_snapshot_dir / "dump.sql",
|
||||
repo_dir=env.repo_dir,
|
||||
)
|
||||
except Exception:
|
||||
if breaking_changes_allowed:
|
||||
@@ -276,189 +252,26 @@ def test_forward_compatibility(
|
||||
), "Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"
|
||||
|
||||
|
||||
def prepare_snapshot(
|
||||
from_dir: Path,
|
||||
to_dir: Path,
|
||||
port_distributor: PortDistributor,
|
||||
pg_distrib_dir: Optional[Path] = None,
|
||||
):
|
||||
assert from_dir.exists(), f"Snapshot '{from_dir}' doesn't exist"
|
||||
assert (from_dir / "repo").exists(), f"Snapshot '{from_dir}' doesn't contain a repo directory"
|
||||
assert (from_dir / "dump.sql").exists(), f"Snapshot '{from_dir}' doesn't contain a dump.sql"
|
||||
def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, repo_dir: Path):
|
||||
ep = env.endpoints.create_start("main")
|
||||
pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version)
|
||||
|
||||
log.info(f"Copying snapshot from {from_dir} to {to_dir}")
|
||||
shutil.copytree(from_dir, to_dir)
|
||||
|
||||
repo_dir = to_dir / "repo"
|
||||
|
||||
snapshot_config_toml = repo_dir / "config"
|
||||
snapshot_config = toml.load(snapshot_config_toml)
|
||||
|
||||
# Remove old logs to avoid confusion in test artifacts
|
||||
for logfile in repo_dir.glob("**/*.log"):
|
||||
logfile.unlink()
|
||||
|
||||
# Remove old computes in 'endpoints'. Old versions of the control plane used a directory
|
||||
# called "pgdatadirs". Delete it, too.
|
||||
if (repo_dir / "endpoints").exists():
|
||||
shutil.rmtree(repo_dir / "endpoints")
|
||||
if (repo_dir / "pgdatadirs").exists():
|
||||
shutil.rmtree(repo_dir / "pgdatadirs")
|
||||
os.mkdir(repo_dir / "endpoints")
|
||||
|
||||
# Update paths and ports in config files
|
||||
legacy_pageserver_toml = repo_dir / "pageserver.toml"
|
||||
legacy_bundle = os.path.exists(legacy_pageserver_toml)
|
||||
|
||||
path_to_config: dict[Path, dict[Any, Any]] = {}
|
||||
if legacy_bundle:
|
||||
os.mkdir(repo_dir / "pageserver_1")
|
||||
path_to_config[repo_dir / "pageserver_1" / "pageserver.toml"] = toml.load(
|
||||
legacy_pageserver_toml
|
||||
)
|
||||
os.remove(legacy_pageserver_toml)
|
||||
os.rename(repo_dir / "tenants", repo_dir / "pageserver_1" / "tenants")
|
||||
else:
|
||||
for ps_conf in snapshot_config["pageservers"]:
|
||||
config_path = repo_dir / f"pageserver_{ps_conf['id']}" / "pageserver.toml"
|
||||
path_to_config[config_path] = toml.load(config_path)
|
||||
|
||||
# For each pageserver config, edit it and rewrite
|
||||
for config_path, pageserver_config in path_to_config.items():
|
||||
pageserver_config["remote_storage"]["local_path"] = str(
|
||||
LocalFsStorage.component_path(repo_dir, RemoteStorageUser.PAGESERVER)
|
||||
)
|
||||
|
||||
for param in ("listen_http_addr", "listen_pg_addr", "broker_endpoint"):
|
||||
pageserver_config[param] = port_distributor.replace_with_new_port(
|
||||
pageserver_config[param]
|
||||
)
|
||||
|
||||
# We don't use authentication in compatibility tests
|
||||
# so just remove authentication related settings.
|
||||
pageserver_config.pop("pg_auth_type", None)
|
||||
pageserver_config.pop("http_auth_type", None)
|
||||
|
||||
if pg_distrib_dir:
|
||||
pageserver_config["pg_distrib_dir"] = str(pg_distrib_dir)
|
||||
|
||||
with config_path.open("w") as f:
|
||||
toml.dump(pageserver_config, f)
|
||||
|
||||
# neon_local config doesn't have to be backward compatible. If we're using a dump from before
|
||||
# it supported multiple pageservers, fix it up.
|
||||
if "pageservers" not in snapshot_config:
|
||||
snapshot_config["pageservers"] = [snapshot_config["pageserver"]]
|
||||
del snapshot_config["pageserver"]
|
||||
|
||||
for param in ("listen_http_addr", "listen_pg_addr"):
|
||||
for pageserver in snapshot_config["pageservers"]:
|
||||
pageserver[param] = port_distributor.replace_with_new_port(pageserver[param])
|
||||
snapshot_config["broker"]["listen_addr"] = port_distributor.replace_with_new_port(
|
||||
snapshot_config["broker"]["listen_addr"]
|
||||
)
|
||||
for sk in snapshot_config["safekeepers"]:
|
||||
for param in ("http_port", "pg_port", "pg_tenant_only_port"):
|
||||
sk[param] = port_distributor.replace_with_new_port(sk[param])
|
||||
|
||||
if pg_distrib_dir:
|
||||
snapshot_config["pg_distrib_dir"] = str(pg_distrib_dir)
|
||||
|
||||
with snapshot_config_toml.open("w") as f:
|
||||
toml.dump(snapshot_config, f)
|
||||
|
||||
# Ensure that snapshot doesn't contain references to the original path
|
||||
rv = subprocess.run(
|
||||
[
|
||||
"grep",
|
||||
"--recursive",
|
||||
"--binary-file=without-match",
|
||||
"--files-with-matches",
|
||||
"test_create_snapshot/repo",
|
||||
str(repo_dir),
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
assert (
|
||||
rv.returncode != 0
|
||||
), f"there're files referencing `test_create_snapshot/repo`, this path should be replaced with {repo_dir}:\n{rv.stdout}"
|
||||
|
||||
|
||||
def check_neon_works(
|
||||
repo_dir: Path,
|
||||
neon_target_binpath: Path,
|
||||
neon_current_binpath: Path,
|
||||
pg_distrib_dir: Path,
|
||||
pg_version: PgVersion,
|
||||
port_distributor: PortDistributor,
|
||||
test_output_dir: Path,
|
||||
pg_bin: PgBin,
|
||||
request: FixtureRequest,
|
||||
):
|
||||
snapshot_config_toml = repo_dir / "config"
|
||||
snapshot_config = toml.load(snapshot_config_toml)
|
||||
snapshot_config["neon_distrib_dir"] = str(neon_target_binpath)
|
||||
snapshot_config["postgres_distrib_dir"] = str(pg_distrib_dir)
|
||||
with (snapshot_config_toml).open("w") as f:
|
||||
toml.dump(snapshot_config, f)
|
||||
|
||||
# TODO: replace with NeonEnvBuilder / NeonEnv
|
||||
config: Any = type("NeonEnvStub", (object,), {})
|
||||
config.rust_log_override = None
|
||||
config.repo_dir = repo_dir
|
||||
config.pg_version = pg_version
|
||||
config.initial_tenant = snapshot_config["default_tenant_id"]
|
||||
config.pg_distrib_dir = pg_distrib_dir
|
||||
config.remote_storage = None
|
||||
config.sk_remote_storage = None
|
||||
|
||||
# Use the "target" binaries to launch the storage nodes
|
||||
config_target = config
|
||||
config_target.neon_binpath = neon_target_binpath
|
||||
# We are using maybe-old binaries for neon services, but want to use current
|
||||
# binaries for test utilities like neon_local
|
||||
config_target.neon_local_binpath = neon_current_binpath
|
||||
cli_target = NeonCli(config_target)
|
||||
|
||||
# And the current binaries to launch computes
|
||||
snapshot_config["neon_distrib_dir"] = str(neon_current_binpath)
|
||||
with (snapshot_config_toml).open("w") as f:
|
||||
toml.dump(snapshot_config, f)
|
||||
config_current = copy.copy(config)
|
||||
config_current.neon_binpath = neon_current_binpath
|
||||
cli_current = NeonCli(config_current)
|
||||
|
||||
cli_target.raw_cli(["start"])
|
||||
request.addfinalizer(lambda: cli_target.raw_cli(["stop"]))
|
||||
|
||||
pg_port = port_distributor.get_port()
|
||||
http_port = port_distributor.get_port()
|
||||
cli_current.endpoint_create(
|
||||
branch_name="main", pg_port=pg_port, http_port=http_port, endpoint_id="ep-main"
|
||||
)
|
||||
cli_current.endpoint_start("ep-main")
|
||||
request.addfinalizer(lambda: cli_current.endpoint_stop("ep-main"))
|
||||
|
||||
connstr = f"host=127.0.0.1 port={pg_port} user=cloud_admin dbname=postgres"
|
||||
connstr = ep.connstr()
|
||||
pg_bin.run_capture(
|
||||
["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump.sql'}"]
|
||||
)
|
||||
initial_dump_differs = dump_differs(
|
||||
repo_dir.parent / "dump.sql",
|
||||
sql_dump_path,
|
||||
test_output_dir / "dump.sql",
|
||||
test_output_dir / "dump.filediff",
|
||||
)
|
||||
|
||||
# Check that project can be recovered from WAL
|
||||
# loosely based on https://www.notion.so/neondatabase/Storage-Recovery-from-WAL-d92c0aac0ebf40df892b938045d7d720
|
||||
tenant_id = snapshot_config["default_tenant_id"]
|
||||
timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id]
|
||||
pageserver_port = snapshot_config["pageservers"][0]["listen_http_addr"].split(":")[-1]
|
||||
pageserver_http = PageserverHttpClient(
|
||||
port=pageserver_port,
|
||||
is_testing_enabled_or_skip=lambda: True, # TODO: check if testing really enabled
|
||||
)
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
pg_version = env.pg_version
|
||||
|
||||
shutil.rmtree(repo_dir / "local_fs_remote_storage")
|
||||
timeline_delete_wait_completed(pageserver_http, tenant_id, timeline_id)
|
||||
@@ -494,6 +307,11 @@ def dump_differs(
|
||||
Returns True if the dumps differ and produced diff is not allowed, False otherwise (in most cases we want it to return False).
|
||||
"""
|
||||
|
||||
if not first.exists():
|
||||
raise FileNotFoundError(f"{first} doesn't exist")
|
||||
if not second.exists():
|
||||
raise FileNotFoundError(f"{second} doesn't exist")
|
||||
|
||||
with output.open("w") as stdout:
|
||||
res = subprocess.run(
|
||||
[
|
||||
|
||||
0
test_runner/regress/test_config.py
Executable file → Normal file
0
test_runner/regress/test_config.py
Executable file → Normal file
@@ -99,12 +99,13 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
|
||||
]
|
||||
)
|
||||
|
||||
# FIXME: we should clean up pageserver to not print this
|
||||
env.pageserver.allowed_errors.append(".*exited with error: unexpected message type: CopyData.*")
|
||||
|
||||
# FIXME: Is this expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
# FIXME: we should clean up pageserver to not print this
|
||||
".*exited with error: unexpected message type: CopyData.*",
|
||||
# FIXME: Is this expected?
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
|
||||
]
|
||||
)
|
||||
|
||||
def import_tar(base, wal):
|
||||
|
||||
@@ -236,3 +236,30 @@ def test_wal_page_boundary_start(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
assert vanilla_pg.safe_psql(
|
||||
"select sum(somedata) from replication_example"
|
||||
) == endpoint.safe_psql("select sum(somedata) from replication_example")
|
||||
|
||||
|
||||
#
|
||||
# Check that slots are not inherited in brnach
|
||||
#
|
||||
def test_slots_and_branching(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
tenant, timeline = env.neon_cli.create_tenant()
|
||||
env.pageserver.http_client()
|
||||
|
||||
main_branch = env.endpoints.create_start("main", tenant_id=tenant)
|
||||
main_cur = main_branch.connect().cursor()
|
||||
|
||||
# Create table and insert some data
|
||||
main_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")
|
||||
|
||||
wait_for_last_flush_lsn(env, main_branch, tenant, timeline)
|
||||
|
||||
# Create branch ws.
|
||||
env.neon_cli.create_branch("ws", "main", tenant_id=tenant)
|
||||
ws_branch = env.endpoints.create_start("ws", tenant_id=tenant)
|
||||
log.info("postgres is running on 'ws' branch")
|
||||
|
||||
# Check that we can create slot with the same name
|
||||
ws_cur = ws_branch.connect().cursor()
|
||||
ws_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")
|
||||
|
||||
@@ -23,14 +23,20 @@ from fixtures.neon_fixtures import (
|
||||
PgBin,
|
||||
S3Scrubber,
|
||||
last_flush_lsn_upload,
|
||||
wait_for_last_flush_lsn,
|
||||
)
|
||||
from fixtures.pageserver.utils import list_prefix
|
||||
from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.pageserver.utils import (
|
||||
assert_tenant_state,
|
||||
list_prefix,
|
||||
wait_for_last_record_lsn,
|
||||
wait_for_upload,
|
||||
)
|
||||
from fixtures.remote_storage import (
|
||||
RemoteStorageKind,
|
||||
)
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
from fixtures.utils import print_gc_result, wait_until
|
||||
from fixtures.workload import Workload
|
||||
|
||||
# A tenant configuration that is convenient for generating uploads and deletions
|
||||
# without a large amount of postgres traffic.
|
||||
@@ -93,7 +99,10 @@ def generate_uploads_and_deletions(
|
||||
)
|
||||
assert tenant_id is not None
|
||||
assert timeline_id is not None
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
# We are waiting for uploads as well as local flush, in order to avoid leaving the system
|
||||
# in a state where there are "future layers" in remote storage that will generate deletions
|
||||
# after a restart.
|
||||
last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
|
||||
ps_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
# Compaction should generate some GC-elegible layers
|
||||
@@ -560,3 +569,91 @@ def test_eviction_across_generations(neon_env_builder: NeonEnvBuilder):
|
||||
read_all(env, tenant_id, timeline_id)
|
||||
evict_all_layers(env, tenant_id, timeline_id)
|
||||
read_all(env, tenant_id, timeline_id)
|
||||
|
||||
|
||||
def test_multi_attach(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
pg_bin: PgBin,
|
||||
):
|
||||
neon_env_builder.enable_generations = True
|
||||
neon_env_builder.num_pageservers = 3
|
||||
neon_env_builder.enable_pageserver_remote_storage(
|
||||
remote_storage_kind=RemoteStorageKind.MOCK_S3,
|
||||
)
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
|
||||
|
||||
pageservers = env.pageservers
|
||||
http_clients = list([p.http_client() for p in pageservers])
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
# We will intentionally create situations where stale deletions happen from non-latest-generation
|
||||
# nodes when the tenant is multiply-attached
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.extend(
|
||||
[".*Dropped remote consistent LSN updates.*", ".*Dropping stale deletions.*"]
|
||||
)
|
||||
|
||||
# Initially, the tenant will be attached to the first pageserver (first is default in our test harness)
|
||||
wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[0], tenant_id, "Active"))
|
||||
_detail = http_clients[0].timeline_detail(tenant_id, timeline_id)
|
||||
with pytest.raises(PageserverApiException):
|
||||
http_clients[1].timeline_detail(tenant_id, timeline_id)
|
||||
with pytest.raises(PageserverApiException):
|
||||
http_clients[2].timeline_detail(tenant_id, timeline_id)
|
||||
|
||||
workload = Workload(env, tenant_id, timeline_id)
|
||||
workload.init(pageservers[0].id)
|
||||
workload.write_rows(1000, pageservers[0].id)
|
||||
|
||||
# Attach the tenant to the other two pageservers
|
||||
pageservers[1].tenant_attach(env.initial_tenant)
|
||||
pageservers[2].tenant_attach(env.initial_tenant)
|
||||
|
||||
wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[1], tenant_id, "Active"))
|
||||
wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[2], tenant_id, "Active"))
|
||||
|
||||
# Now they all have it attached
|
||||
_details = list([c.timeline_detail(tenant_id, timeline_id) for c in http_clients])
|
||||
_detail = http_clients[1].timeline_detail(tenant_id, timeline_id)
|
||||
_detail = http_clients[2].timeline_detail(tenant_id, timeline_id)
|
||||
|
||||
# The endpoint can use any pageserver to service its reads
|
||||
for pageserver in pageservers:
|
||||
workload.validate(pageserver.id)
|
||||
|
||||
# If we write some more data, all the nodes can see it, including stale ones
|
||||
wrote_lsn = workload.write_rows(1000, pageservers[0].id)
|
||||
for ps_http in http_clients:
|
||||
wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, wrote_lsn)
|
||||
|
||||
# ...and indeed endpoints can see it via any of the pageservers
|
||||
for pageserver in pageservers:
|
||||
workload.validate(pageserver.id)
|
||||
|
||||
# Prompt all the pageservers, including stale ones, to upload ingested layers to remote storage
|
||||
for ps_http in http_clients:
|
||||
ps_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
wait_for_upload(ps_http, tenant_id, timeline_id, wrote_lsn)
|
||||
|
||||
# Now, the contents of remote storage will be a set of layers from each pageserver, but with unique
|
||||
# generation numbers
|
||||
# TODO: validate remote storage contents
|
||||
|
||||
# Stop all pageservers
|
||||
for ps in pageservers:
|
||||
ps.stop()
|
||||
|
||||
# Returning to a normal healthy state: all pageservers will start, but only the one most
|
||||
# recently attached via the control plane will re-attach on startup
|
||||
for ps in pageservers:
|
||||
ps.start()
|
||||
|
||||
with pytest.raises(PageserverApiException):
|
||||
_detail = http_clients[0].timeline_detail(tenant_id, timeline_id)
|
||||
with pytest.raises(PageserverApiException):
|
||||
_detail = http_clients[1].timeline_detail(tenant_id, timeline_id)
|
||||
_detail = http_clients[2].timeline_detail(tenant_id, timeline_id)
|
||||
|
||||
# All data we wrote while multi-attached remains readable
|
||||
workload.validate(pageservers[2].id)
|
||||
|
||||
@@ -64,13 +64,13 @@ def test_metric_collection(
|
||||
# spin up neon, after http server is ready
|
||||
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
|
||||
# httpserver is shut down before pageserver during passing run
|
||||
env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*")
|
||||
# we have a fast rate of calculation, these can happen at shutdown
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes"
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*metrics endpoint refused the sent metrics*",
|
||||
# we have a fast rate of calculation, these can happen at shutdown
|
||||
".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
|
||||
".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
|
||||
]
|
||||
)
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
@@ -212,13 +212,13 @@ def test_metric_collection_cleans_up_tempfile(
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# httpserver is shut down before pageserver during passing run
|
||||
env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*")
|
||||
# we have a fast rate of calculation, these can happen at shutdown
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes"
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*metrics endpoint refused the sent metrics*",
|
||||
# we have a fast rate of calculation, these can happen at shutdown
|
||||
".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
|
||||
".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
|
||||
]
|
||||
)
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
|
||||
332
test_runner/regress/test_pageserver_secondary.py
Normal file
332
test_runner/regress/test_pageserver_secondary.py
Normal file
@@ -0,0 +1,332 @@
|
||||
import random
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver
|
||||
from fixtures.remote_storage import RemoteStorageKind
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
from fixtures.utils import wait_until
|
||||
from fixtures.workload import Workload
|
||||
|
||||
# A tenant configuration that is convenient for generating uploads and deletions
|
||||
# without a large amount of postgres traffic.
|
||||
TENANT_CONF = {
|
||||
# small checkpointing and compaction targets to ensure we generate many upload operations
|
||||
"checkpoint_distance": f"{128 * 1024}",
|
||||
"compaction_target_size": f"{128 * 1024}",
|
||||
"compaction_threshold": "1",
|
||||
# no PITR horizon, we specify the horizon when we request on-demand GC
|
||||
"pitr_interval": "0s",
|
||||
# disable background compaction and GC. We invoke it manually when we want it to happen.
|
||||
"gc_period": "0s",
|
||||
"compaction_period": "0s",
|
||||
# create image layers eagerly, so that GC can remove some layers
|
||||
"image_creation_threshold": "1",
|
||||
}
|
||||
|
||||
|
||||
def evict_random_layers(
|
||||
rng: random.Random, pageserver: NeonPageserver, tenant_id: TenantId, timeline_id: TimelineId
|
||||
):
|
||||
"""
|
||||
Evict 50% of the layers on a pageserver
|
||||
"""
|
||||
timeline_path = pageserver.timeline_dir(tenant_id, timeline_id)
|
||||
initial_local_layers = sorted(
|
||||
list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
|
||||
)
|
||||
client = pageserver.http_client()
|
||||
for layer in initial_local_layers:
|
||||
if "ephemeral" in layer.name or "temp_download" in layer.name:
|
||||
continue
|
||||
|
||||
if rng.choice([True, False]):
|
||||
log.info(f"Evicting layer {tenant_id}/{timeline_id} {layer.name}")
|
||||
client.evict_layer(tenant_id=tenant_id, timeline_id=timeline_id, layer_name=layer.name)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("seed", [1, 2, 3])
|
||||
def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
|
||||
"""
|
||||
Issue many location configuration changes, ensure that tenants
|
||||
remain readable & we don't get any unexpected errors. We should
|
||||
have no ERROR in the log, and no 500s in the API.
|
||||
|
||||
The location_config API is intentionally designed so that all destination
|
||||
states are valid, so that we may test it in this way: the API should always
|
||||
work as long as the tenant exists.
|
||||
"""
|
||||
neon_env_builder.enable_generations = True
|
||||
neon_env_builder.num_pageservers = 3
|
||||
neon_env_builder.enable_pageserver_remote_storage(
|
||||
remote_storage_kind=RemoteStorageKind.MOCK_S3,
|
||||
)
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
|
||||
assert env.attachment_service is not None
|
||||
|
||||
pageservers = env.pageservers
|
||||
list([p.http_client() for p in pageservers])
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
# We will make no effort to avoid stale attachments
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.extend(
|
||||
[
|
||||
".*Dropped remote consistent LSN updates.*",
|
||||
".*Dropping stale deletions.*",
|
||||
# page_service_conn_main{peer_addr=[::1]:41176}: query handler for 'pagestream 3b19aec5038c796f64b430b30a555121 d07776761d44050b8aab511df1657d83' failed: Tenant 3b19aec5038c796f64b430b30a555121 not found
|
||||
".*query handler.*Tenant.*not found.*",
|
||||
# page_service_conn_main{peer_addr=[::1]:45552}: query handler for 'pagestream 414ede7ad50f775a8e7d9ba0e43b9efc a43884be16f44b3626482b6981b2c745' failed: Tenant 414ede7ad50f775a8e7d9ba0e43b9efc is not active
|
||||
".*query handler.*Tenant.*not active.*",
|
||||
]
|
||||
)
|
||||
|
||||
# these can happen, if we shutdown at a good time. to be fixed as part of #5172.
|
||||
message = ".*duplicated L1 layer layer=.*"
|
||||
ps.allowed_errors.append(message)
|
||||
|
||||
workload = Workload(env, tenant_id, timeline_id)
|
||||
workload.init(env.pageservers[0].id)
|
||||
workload.write_rows(256, env.pageservers[0].id)
|
||||
|
||||
# We use a fixed seed to make the test reproducible: we want a randomly
|
||||
# chosen order, but not to change the order every time we run the test.
|
||||
rng = random.Random(seed)
|
||||
|
||||
initial_generation = 1
|
||||
last_state = {
|
||||
env.pageservers[0].id: ("AttachedSingle", initial_generation),
|
||||
env.pageservers[1].id: ("Detached", None),
|
||||
env.pageservers[2].id: ("Detached", None),
|
||||
}
|
||||
|
||||
latest_attached = env.pageservers[0].id
|
||||
|
||||
for _i in range(0, 64):
|
||||
# Pick a pageserver
|
||||
pageserver = rng.choice(env.pageservers)
|
||||
|
||||
# Pick a pseudorandom state
|
||||
modes = [
|
||||
"AttachedSingle",
|
||||
"AttachedMulti",
|
||||
"AttachedStale",
|
||||
"Secondary",
|
||||
"Detached",
|
||||
"_Evictions",
|
||||
"_Restart",
|
||||
]
|
||||
|
||||
mode = rng.choice(modes)
|
||||
|
||||
last_state_ps = last_state[pageserver.id]
|
||||
if mode == "_Evictions":
|
||||
if last_state_ps[0].startswith("Attached"):
|
||||
log.info(f"Action: evictions on pageserver {pageserver.id}")
|
||||
evict_random_layers(rng, pageserver, tenant_id, timeline_id)
|
||||
else:
|
||||
log.info(
|
||||
f"Action: skipping evictions on pageserver {pageserver.id}, is not attached"
|
||||
)
|
||||
elif mode == "_Restart":
|
||||
log.info(f"Action: restarting pageserver {pageserver.id}")
|
||||
pageserver.stop()
|
||||
pageserver.start()
|
||||
if last_state_ps[0].startswith("Attached") and latest_attached == pageserver.id:
|
||||
log.info("Entering postgres...")
|
||||
workload.churn_rows(rng.randint(128, 256), pageserver.id)
|
||||
workload.validate(pageserver.id)
|
||||
elif last_state_ps[0].startswith("Attached"):
|
||||
# The `attachment_service` will only re-attach on startup when a pageserver was the
|
||||
# holder of the latest generation: otherwise the pageserver will revert to detached
|
||||
# state if it was running attached with a stale generation
|
||||
last_state[pageserver.id] = ("Detached", None)
|
||||
else:
|
||||
secondary_conf: Optional[Dict[str, Any]] = None
|
||||
if mode == "Secondary":
|
||||
secondary_conf = {"warm": rng.choice([True, False])}
|
||||
|
||||
location_conf: Dict[str, Any] = {
|
||||
"mode": mode,
|
||||
"secondary_conf": secondary_conf,
|
||||
"tenant_conf": {},
|
||||
}
|
||||
|
||||
log.info(f"Action: Configuring pageserver {pageserver.id} to {location_conf}")
|
||||
|
||||
# Select a generation number
|
||||
if mode.startswith("Attached"):
|
||||
if last_state_ps[1] is not None:
|
||||
if rng.choice([True, False]):
|
||||
# Move between attached states, staying in the same generation
|
||||
generation = last_state_ps[1]
|
||||
else:
|
||||
# Switch generations, while also jumping between attached states
|
||||
generation = env.attachment_service.attach_hook_issue(
|
||||
tenant_id, pageserver.id
|
||||
)
|
||||
latest_attached = pageserver.id
|
||||
else:
|
||||
generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver.id)
|
||||
latest_attached = pageserver.id
|
||||
else:
|
||||
generation = None
|
||||
|
||||
location_conf["generation"] = generation
|
||||
|
||||
pageserver.tenant_location_configure(tenant_id, location_conf)
|
||||
last_state[pageserver.id] = (mode, generation)
|
||||
|
||||
if mode.startswith("Attached"):
|
||||
# This is a basic test: we are validating that he endpoint works properly _between_
|
||||
# configuration changes. A stronger test would be to validate that clients see
|
||||
# no errors while we are making the changes.
|
||||
workload.churn_rows(
|
||||
rng.randint(128, 256), pageserver.id, upload=mode != "AttachedStale"
|
||||
)
|
||||
workload.validate(pageserver.id)
|
||||
|
||||
# Attach all pageservers
|
||||
for ps in env.pageservers:
|
||||
location_conf = {"mode": "AttachedMulti", "secondary_conf": None, "tenant_conf": {}}
|
||||
ps.tenant_location_configure(tenant_id, location_conf)
|
||||
|
||||
# Confirm that all are readable
|
||||
for ps in env.pageservers:
|
||||
workload.validate(ps.id)
|
||||
|
||||
# Detach all pageservers
|
||||
for ps in env.pageservers:
|
||||
location_conf = {"mode": "Detached", "secondary_conf": None, "tenant_conf": {}}
|
||||
ps.tenant_location_configure(tenant_id, location_conf)
|
||||
|
||||
# Confirm that all local disk state was removed on detach
|
||||
# TODO
|
||||
|
||||
|
||||
def test_live_migration(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Test the sequence of location states that are used in a live migration.
|
||||
"""
|
||||
neon_env_builder.enable_generations = True
|
||||
neon_env_builder.num_pageservers = 2
|
||||
neon_env_builder.enable_pageserver_remote_storage(
|
||||
remote_storage_kind=RemoteStorageKind.MOCK_S3,
|
||||
)
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
|
||||
assert env.attachment_service is not None
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
pageserver_a = env.pageservers[0]
|
||||
pageserver_b = env.pageservers[1]
|
||||
|
||||
initial_generation = 1
|
||||
|
||||
workload = Workload(env, tenant_id, timeline_id)
|
||||
workload.init(env.pageservers[0].id)
|
||||
workload.write_rows(256, env.pageservers[0].id)
|
||||
|
||||
# Make the destination a secondary location
|
||||
pageserver_b.tenant_location_configure(
|
||||
tenant_id,
|
||||
{
|
||||
"mode": "Secondary",
|
||||
"secondary_conf": {"warm": True},
|
||||
"tenant_conf": {},
|
||||
},
|
||||
)
|
||||
|
||||
workload.churn_rows(64, pageserver_a.id, upload=False)
|
||||
|
||||
# Set origin attachment to stale
|
||||
log.info("Setting origin to AttachedStale")
|
||||
pageserver_a.tenant_location_configure(
|
||||
tenant_id,
|
||||
{
|
||||
"mode": "AttachedStale",
|
||||
"secondary_conf": None,
|
||||
"tenant_conf": {},
|
||||
"generation": initial_generation,
|
||||
},
|
||||
flush_ms=5000,
|
||||
)
|
||||
|
||||
migrated_generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver_b.id)
|
||||
log.info(f"Acquired generation {migrated_generation} for destination pageserver")
|
||||
assert migrated_generation == initial_generation + 1
|
||||
|
||||
# Writes and reads still work in AttachedStale.
|
||||
workload.validate(pageserver_a.id)
|
||||
|
||||
# TODO: call into secondary mode API hooks to do an upload/download sync
|
||||
|
||||
# Generate some more dirty writes: we expect the origin to ingest WAL in
|
||||
# in AttachedStale
|
||||
workload.churn_rows(64, pageserver_a.id, upload=False)
|
||||
workload.validate(pageserver_a.id)
|
||||
|
||||
# Attach the destination
|
||||
log.info("Setting destination to AttachedMulti")
|
||||
pageserver_b.tenant_location_configure(
|
||||
tenant_id,
|
||||
{
|
||||
"mode": "AttachedMulti",
|
||||
"secondary_conf": None,
|
||||
"tenant_conf": {},
|
||||
"generation": migrated_generation,
|
||||
},
|
||||
)
|
||||
|
||||
# Wait for destination LSN to catch up with origin
|
||||
origin_lsn = pageserver_a.http_client().timeline_detail(tenant_id, timeline_id)[
|
||||
"last_record_lsn"
|
||||
]
|
||||
|
||||
def caught_up():
|
||||
destination_lsn = pageserver_b.http_client().timeline_detail(tenant_id, timeline_id)[
|
||||
"last_record_lsn"
|
||||
]
|
||||
log.info(
|
||||
f"Waiting for LSN to catch up: origin {origin_lsn} vs destination {destination_lsn}"
|
||||
)
|
||||
assert destination_lsn >= origin_lsn
|
||||
|
||||
wait_until(100, 0.1, caught_up)
|
||||
|
||||
# The destination should accept writes
|
||||
workload.churn_rows(64, pageserver_b.id)
|
||||
|
||||
# Dual attached: both are readable.
|
||||
workload.validate(pageserver_a.id)
|
||||
workload.validate(pageserver_b.id)
|
||||
|
||||
# Revert the origin to secondary
|
||||
log.info("Setting origin to Secondary")
|
||||
pageserver_a.tenant_location_configure(
|
||||
tenant_id,
|
||||
{
|
||||
"mode": "Secondary",
|
||||
"secondary_conf": {"warm": True},
|
||||
"tenant_conf": {},
|
||||
},
|
||||
)
|
||||
|
||||
workload.churn_rows(64, pageserver_b.id)
|
||||
|
||||
# Put the destination into final state
|
||||
pageserver_b.tenant_location_configure(
|
||||
tenant_id,
|
||||
{
|
||||
"mode": "AttachedSingle",
|
||||
"secondary_conf": None,
|
||||
"tenant_conf": {},
|
||||
"generation": migrated_generation,
|
||||
},
|
||||
)
|
||||
|
||||
workload.churn_rows(64, pageserver_b.id)
|
||||
workload.validate(pageserver_b.id)
|
||||
@@ -73,19 +73,20 @@ def test_remote_storage_backup_and_restore(
|
||||
##### First start, insert data and upload it to the remote storage
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# FIXME: Is this expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
# FIXME: Is this expected?
|
||||
".*marking .* as locally complete, while it doesnt exist in remote index.*",
|
||||
".*No timelines to attach received.*",
|
||||
".*Failed to get local tenant state.*",
|
||||
# FIXME retry downloads without throwing errors
|
||||
".*failed to load remote timeline.*",
|
||||
# we have a bunch of pytest.raises for these below
|
||||
".*tenant .*? already exists, state:.*",
|
||||
".*tenant directory already exists.*",
|
||||
".*simulated failure of remote operation.*",
|
||||
]
|
||||
)
|
||||
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
|
||||
|
||||
env.pageserver.allowed_errors.append(".*Failed to get local tenant state.*")
|
||||
# FIXME retry downloads without throwing errors
|
||||
env.pageserver.allowed_errors.append(".*failed to load remote timeline.*")
|
||||
# we have a bunch of pytest.raises for these below
|
||||
env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
|
||||
env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
|
||||
env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
@@ -395,13 +395,13 @@ def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonE
|
||||
env.start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Timeline got dropped without initializing, cleaning its files"
|
||||
)
|
||||
# the response hit_pausable_failpoint_and_later_fail
|
||||
env.pageserver.allowed_errors.append(
|
||||
f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn"
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
# happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero
|
||||
".*Timeline got dropped without initializing, cleaning its files",
|
||||
# the response hit_pausable_failpoint_and_later_fail
|
||||
f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn",
|
||||
]
|
||||
)
|
||||
|
||||
env.pageserver.tenant_create(env.initial_tenant)
|
||||
|
||||
@@ -307,10 +307,14 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
bogus_timeline_id = TimelineId.generate()
|
||||
pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0)
|
||||
|
||||
# the error will be printed to the log too
|
||||
env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
|
||||
# Timelines get stopped during detach, ignore the gc calls that error, witnessing that
|
||||
env.pageserver.allowed_errors.append(".*InternalServerError\\(timeline is Stopping.*")
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
# the error will be printed to the log too
|
||||
".*gc target timeline does not exist.*",
|
||||
# Timelines get stopped during detach, ignore the gc calls that error, witnessing that
|
||||
".*InternalServerError\\(timeline is Stopping.*",
|
||||
]
|
||||
)
|
||||
|
||||
# Detach while running manual GC.
|
||||
# It should wait for manual GC to finish because it runs in a task associated with the tenant.
|
||||
|
||||
@@ -216,16 +216,17 @@ def test_tenant_relocation(
|
||||
|
||||
tenant_id = TenantId("74ee8b079a0e437eb0afea7d26a07209")
|
||||
|
||||
# FIXME: Is this expected?
|
||||
env.pageservers[0].allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
env.pageservers[0].allowed_errors.extend(
|
||||
[
|
||||
# FIXME: Is this expected?
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
|
||||
# Needed for detach polling on the original pageserver
|
||||
f".*NotFound: tenant {tenant_id}.*",
|
||||
# We will dual-attach in this test, so stale generations are expected
|
||||
".*Dropped remote consistent LSN updates.*",
|
||||
]
|
||||
)
|
||||
|
||||
# Needed for detach polling on the original pageserver
|
||||
env.pageservers[0].allowed_errors.append(f".*NotFound: tenant {tenant_id}.*")
|
||||
# We will dual-attach in this test, so stale generations are expected
|
||||
env.pageservers[0].allowed_errors.append(".*Dropped remote consistent LSN updates.*")
|
||||
|
||||
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
|
||||
|
||||
# we use two branches to check that they are both relocated
|
||||
|
||||
@@ -117,10 +117,12 @@ def test_tenants_attached_after_download(neon_env_builder: NeonEnvBuilder):
|
||||
##### First start, insert secret data and upload it to the remote storage
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# FIXME: Are these expected?
|
||||
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
# FIXME: Are these expected?
|
||||
".*No timelines to attach received.*",
|
||||
".*marking .* as locally complete, while it doesnt exist in remote index.*",
|
||||
]
|
||||
)
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
@@ -218,13 +220,14 @@ def test_tenant_redownloads_truncated_file_on_startup(
|
||||
|
||||
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
|
||||
|
||||
env.pageserver.allowed_errors.append(".*removing local file .* because .*")
|
||||
|
||||
# FIXME: Are these expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*removing local file .* because .*",
|
||||
# FIXME: Are these expected?
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
|
||||
".*No timelines to attach received.*",
|
||||
]
|
||||
)
|
||||
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
@@ -36,12 +36,13 @@ def test_threshold_based_eviction(
|
||||
".*metrics_collection:.* upload consumption_metrics (still failed|failed, will retry).*"
|
||||
)
|
||||
env = neon_env_builder.init_start()
|
||||
env.pageserver.allowed_errors.append(metrics_refused_log_line)
|
||||
|
||||
# these can happen whenever we run consumption metrics collection
|
||||
env.pageserver.allowed_errors.append(r".*failed to calculate logical size at \S+: cancelled")
|
||||
env.pageserver.allowed_errors.append(
|
||||
r".*failed to calculate synthetic size for tenant \S+: failed to calculate some logical_sizes"
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
metrics_refused_log_line,
|
||||
# these can happen whenever we run consumption metrics collection
|
||||
r".*failed to calculate logical size at \S+: cancelled",
|
||||
r".*failed to calculate synthetic size for tenant \S+: failed to calculate some logical_sizes",
|
||||
]
|
||||
)
|
||||
|
||||
tenant_id, timeline_id = env.initial_tenant, env.initial_timeline
|
||||
|
||||
@@ -39,10 +39,14 @@ from urllib3.util.retry import Retry
|
||||
def test_timeline_delete(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
env.pageserver.allowed_errors.append(".*Timeline .* was not found.*")
|
||||
env.pageserver.allowed_errors.append(".*timeline not found.*")
|
||||
env.pageserver.allowed_errors.append(".*Cannot delete timeline which has child timelines.*")
|
||||
env.pageserver.allowed_errors.append(".*Precondition failed: Requested tenant is missing.*")
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*Timeline .* was not found.*",
|
||||
".*timeline not found.*",
|
||||
".*Cannot delete timeline which has child timelines.*",
|
||||
".*Precondition failed: Requested tenant is missing.*",
|
||||
]
|
||||
)
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
@@ -198,22 +202,22 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
|
||||
),
|
||||
)
|
||||
|
||||
env.pageserver.allowed_errors.append(f".*{timeline_id}.*failpoint: {failpoint}")
|
||||
# It appears when we stopped flush loop during deletion and then pageserver is stopped
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
f".*{timeline_id}.*failpoint: {failpoint}",
|
||||
# It appears when we stopped flush loop during deletion and then pageserver is stopped
|
||||
".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
|
||||
# This happens when we fail before scheduling background operation.
|
||||
# Timeline is left in stopping state and retry tries to stop it again.
|
||||
".*Ignoring new state, equal to the existing one: Stopping",
|
||||
# This happens when we retry delete requests for broken timelines
|
||||
".*Ignoring state update Stopping for broken timeline",
|
||||
# This happens when timeline remains are cleaned up during loading
|
||||
".*Timeline dir entry become invalid.*",
|
||||
# In one of the branches we poll for tenant to become active. Polls can generate this log message:
|
||||
f".*Tenant {env.initial_tenant} is not active*",
|
||||
]
|
||||
)
|
||||
# This happens when we fail before scheduling background operation.
|
||||
# Timeline is left in stopping state and retry tries to stop it again.
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Ignoring new state, equal to the existing one: Stopping"
|
||||
)
|
||||
# This happens when we retry delete requests for broken timelines
|
||||
env.pageserver.allowed_errors.append(".*Ignoring state update Stopping for broken timeline")
|
||||
# This happens when timeline remains are cleaned up during loading
|
||||
env.pageserver.allowed_errors.append(".*Timeline dir entry become invalid.*")
|
||||
# In one of the branches we poll for tenant to become active. Polls can generate this log message:
|
||||
env.pageserver.allowed_errors.append(f".*Tenant {env.initial_tenant} is not active*")
|
||||
|
||||
ps_http.configure_failpoints((failpoint, "return"))
|
||||
|
||||
@@ -398,13 +402,13 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.pageserver.allowed_errors.append(".*failpoint: timeline-delete-before-rm")
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Ignoring new state, equal to the existing one: Stopping"
|
||||
)
|
||||
# this happens, because the stuck timeline is visible to shutdown
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*failpoint: timeline-delete-before-rm",
|
||||
".*Ignoring new state, equal to the existing one: Stopping",
|
||||
# this happens, because the stuck timeline is visible to shutdown
|
||||
".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
|
||||
]
|
||||
)
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
@@ -551,10 +555,12 @@ def test_concurrent_timeline_delete_stuck_on(
|
||||
with pytest.raises(PageserverApiException, match=error_msg_re) as second_call_err:
|
||||
ps_http.timeline_delete(env.initial_tenant, child_timeline_id)
|
||||
assert second_call_err.value.status_code == 409
|
||||
env.pageserver.allowed_errors.append(f".*{child_timeline_id}.*{error_msg_re}.*")
|
||||
# the second call will try to transition the timeline into Stopping state as well
|
||||
env.pageserver.allowed_errors.append(
|
||||
f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping"
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
f".*{child_timeline_id}.*{error_msg_re}.*",
|
||||
# the second call will try to transition the timeline into Stopping state as well
|
||||
f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping",
|
||||
]
|
||||
)
|
||||
log.info("second call failed as expected")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user