mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-23 16:10:37 +00:00
Compare commits
1 Commits
problame/r
...
alexk/tmp-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2394c9c16d |
2
.github/workflows/build_and_test.yml
vendored
2
.github/workflows/build_and_test.yml
vendored
@@ -824,7 +824,7 @@ jobs:
|
||||
- pg: v17
|
||||
debian: bookworm
|
||||
env:
|
||||
VM_BUILDER_VERSION: v0.46.0
|
||||
VM_BUILDER_VERSION: v0.42.2
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
|
||||
68
Cargo.lock
generated
68
Cargo.lock
generated
@@ -2491,18 +2491,6 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"r-efi",
|
||||
"wasi 0.14.2+wasi-0.2.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gettid"
|
||||
version = "0.1.3"
|
||||
@@ -5271,12 +5259,6 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "r-efi"
|
||||
version = "5.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.7.3"
|
||||
@@ -5301,16 +5283,6 @@ dependencies = [
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
|
||||
dependencies = [
|
||||
"rand_chacha 0.9.0",
|
||||
"rand_core 0.9.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.2.2"
|
||||
@@ -5331,16 +5303,6 @@ dependencies = [
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core 0.9.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.5.1"
|
||||
@@ -5359,15 +5321,6 @@ dependencies = [
|
||||
"getrandom 0.2.11",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
|
||||
dependencies = [
|
||||
"getrandom 0.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_distr"
|
||||
version = "0.4.3"
|
||||
@@ -7244,11 +7197,11 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tokio-epoll-uring"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#781989bb540a1408b0b93daa1e9d1fa452195497"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"nix 0.26.4",
|
||||
"once_cell",
|
||||
"rand 0.9.1",
|
||||
"scopeguard",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
@@ -7855,6 +7808,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "uring-common"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#781989bb540a1408b0b93daa1e9d1fa452195497"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"io-uring",
|
||||
@@ -8096,15 +8050,6 @@ version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.14.2+wasi-0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
|
||||
dependencies = [
|
||||
"wit-bindgen-rt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasite"
|
||||
version = "0.1.0"
|
||||
@@ -8462,15 +8407,6 @@ dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen-rt"
|
||||
version = "0.39.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
|
||||
dependencies = [
|
||||
"bitflags 2.8.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "workspace_hack"
|
||||
version = "0.1.0"
|
||||
|
||||
@@ -187,8 +187,7 @@ thiserror = "1.0"
|
||||
tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
|
||||
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
|
||||
tokio = { version = "1.43.1", features = ["macros"] }
|
||||
#tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
|
||||
tokio-epoll-uring = { path = "../tokio-epoll-uring/tokio-epoll-uring" }
|
||||
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
|
||||
tokio-io-timeout = "1.2.0"
|
||||
tokio-postgres-rustls = "0.12.0"
|
||||
tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
|
||||
|
||||
@@ -1084,18 +1084,7 @@ RUN cargo install --locked --version 0.12.9 cargo-pgrx && \
|
||||
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
|
||||
|
||||
USER root
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "rust extensions pgrx14"
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx14
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN cargo install --locked --version 0.14.1 cargo-pgrx && \
|
||||
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
|
||||
|
||||
USER root
|
||||
#########################################################################################
|
||||
#
|
||||
# Layers "pg-onnx-build" and "pgrag-build"
|
||||
@@ -1111,11 +1100,11 @@ RUN wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.
|
||||
mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
|
||||
echo "#nothing to test here" > neon-test.sh
|
||||
|
||||
RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.1.1.tar.gz -O pgrag.tar.gz && \
|
||||
echo "087b2ecd11ba307dc968042ef2e9e43dc04d9ba60e8306e882c407bbe1350a50 pgrag.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.0.0.tar.gz -O pgrag.tar.gz && \
|
||||
echo "2cbe394c1e74fc8bcad9b52d5fbbfb783aef834ca3ce44626cfd770573700bb4 pgrag.tar.gz" | sha256sum --check && \
|
||||
mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C .
|
||||
|
||||
FROM rust-extensions-build-pgrx14 AS pgrag-build
|
||||
FROM rust-extensions-build-pgrx12 AS pgrag-build
|
||||
COPY --from=pgrag-src /ext-src/ /ext-src/
|
||||
|
||||
# Install build-time dependencies
|
||||
@@ -1135,19 +1124,19 @@ RUN . venv/bin/activate && \
|
||||
|
||||
WORKDIR /ext-src/pgrag-src
|
||||
RUN cd exts/rag && \
|
||||
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag.control
|
||||
|
||||
RUN cd exts/rag_bge_small_en_v15 && \
|
||||
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
|
||||
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/bge_small_en_v15.onnx \
|
||||
cargo pgrx install --release --features remote_onnx && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control
|
||||
|
||||
RUN cd exts/rag_jina_reranker_v1_tiny_en && \
|
||||
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
|
||||
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/jina_reranker_v1_tiny_en.onnx \
|
||||
cargo pgrx install --release --features remote_onnx && \
|
||||
@@ -1316,8 +1305,8 @@ ARG PG_VERSION
|
||||
# Do not update without approve from proxy team
|
||||
# Make sure the version is reflected in proxy/src/serverless/local_conn_pool.rs
|
||||
WORKDIR /ext-src
|
||||
RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.3.0.tar.gz -O pg_session_jwt.tar.gz && \
|
||||
echo "19be2dc0b3834d643706ed430af998bb4c2cdf24b3c45e7b102bb3a550e8660c pg_session_jwt.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/heads/alexk/tmp-disable-audit.tar.gz -O pg_session_jwt.tar.gz && \
|
||||
echo "b868227950973df5186af54dfa03617536d21e96dddcb7b25e7ce199b4956bdb pg_session_jwt.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_session_jwt-src && cd pg_session_jwt-src && tar xzf ../pg_session_jwt.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/version = "0.12.6"/version = "0.12.9"/g' pgrx-tests/Cargo.toml && \
|
||||
|
||||
@@ -23,8 +23,6 @@
|
||||
import 'sql_exporter/getpage_prefetch_requests_total.libsonnet',
|
||||
import 'sql_exporter/getpage_prefetches_buffered.libsonnet',
|
||||
import 'sql_exporter/getpage_sync_requests_total.libsonnet',
|
||||
import 'sql_exporter/compute_getpage_stuck_requests_total.libsonnet',
|
||||
import 'sql_exporter/compute_getpage_max_inflight_stuck_time_ms.libsonnet',
|
||||
import 'sql_exporter/getpage_wait_seconds_bucket.libsonnet',
|
||||
import 'sql_exporter/getpage_wait_seconds_count.libsonnet',
|
||||
import 'sql_exporter/getpage_wait_seconds_sum.libsonnet',
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
{
|
||||
metric_name: 'compute_getpage_max_inflight_stuck_time_ms',
|
||||
type: 'gauge',
|
||||
help: 'Max wait time for stuck requests among all backends. Includes only active stuck requests, terminated or disconnected ones are not accounted for',
|
||||
values: [
|
||||
'compute_getpage_max_inflight_stuck_time_ms',
|
||||
],
|
||||
query_ref: 'neon_perf_counters',
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
{
|
||||
metric_name: 'compute_getpage_stuck_requests_total',
|
||||
type: 'counter',
|
||||
help: 'Total number of Getpage requests left without an answer for more than pageserver_response_log_timeout but less than pageserver_response_disconnect_timeout',
|
||||
values: [
|
||||
'compute_getpage_stuck_requests_total',
|
||||
],
|
||||
query_ref: 'neon_perf_counters',
|
||||
}
|
||||
@@ -9,8 +9,6 @@ SELECT d.* FROM pg_catalog.jsonb_to_record((SELECT jb FROM c)) AS d(
|
||||
getpage_wait_seconds_sum numeric,
|
||||
getpage_prefetch_requests_total numeric,
|
||||
getpage_sync_requests_total numeric,
|
||||
compute_getpage_stuck_requests_total numeric,
|
||||
compute_getpage_max_inflight_stuck_time_ms numeric,
|
||||
getpage_prefetch_misses_total numeric,
|
||||
getpage_prefetch_discards_total numeric,
|
||||
getpage_prefetches_buffered numeric,
|
||||
|
||||
@@ -38,6 +38,11 @@ Currently, the following metrics are collected:
|
||||
Amount of WAL produced , by a timeline, i.e. last_record_lsn
|
||||
This is an absolute, per-timeline metric.
|
||||
|
||||
- `resident_size`
|
||||
|
||||
Size of all the layer files in the tenant's directory on disk on the pageserver.
|
||||
This is an absolute, per-tenant metric.
|
||||
|
||||
- `remote_storage_size`
|
||||
|
||||
Size of the remote storage (S3) directory.
|
||||
|
||||
@@ -841,10 +841,6 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
|
||||
|
||||
let expected_end = match &end {
|
||||
ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF | Cancelled => true,
|
||||
// The timeline doesn't exist and we have been requested to not auto-create it.
|
||||
// Compute requests for timelines that haven't been created yet
|
||||
// might reach us before the storcon request to create those timelines.
|
||||
TimelineNoCreate => true,
|
||||
CopyStreamHandlerEnd::Disconnected(ConnectionError::Io(io_error))
|
||||
if is_expected_io_error(io_error) =>
|
||||
{
|
||||
@@ -1063,8 +1059,6 @@ pub enum CopyStreamHandlerEnd {
|
||||
Terminate,
|
||||
#[error("EOF on COPY stream")]
|
||||
EOF,
|
||||
#[error("timeline not found, and allow_timeline_creation is false")]
|
||||
TimelineNoCreate,
|
||||
/// The connection was lost
|
||||
#[error("connection error: {0}")]
|
||||
Disconnected(#[from] ConnectionError),
|
||||
|
||||
@@ -303,8 +303,7 @@ pub struct PullTimelineRequest {
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct PullTimelineResponse {
|
||||
/// Donor safekeeper host.
|
||||
/// None if no pull happened because the timeline already exists.
|
||||
pub safekeeper_host: Option<String>,
|
||||
// Donor safekeeper host
|
||||
pub safekeeper_host: String,
|
||||
// TODO: add more fields?
|
||||
}
|
||||
|
||||
@@ -46,7 +46,6 @@ pub async fn signal_handler(token: tokio_util::sync::CancellationToken) {
|
||||
let mut sigint = signal(SignalKind::interrupt()).unwrap();
|
||||
let mut sigterm = signal(SignalKind::terminate()).unwrap();
|
||||
let mut sigquit = signal(SignalKind::quit()).unwrap();
|
||||
let mut sigusr1 = signal(SignalKind::user_defined1()).unwrap();
|
||||
|
||||
loop {
|
||||
let signal = tokio::select! {
|
||||
@@ -56,17 +55,13 @@ pub async fn signal_handler(token: tokio_util::sync::CancellationToken) {
|
||||
}
|
||||
_ = sigint.recv() => "SIGINT",
|
||||
_ = sigterm.recv() => "SIGTERM",
|
||||
_ = sigusr1.recv() => {
|
||||
info!("Got signal SIGUSR1");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if !token.is_cancelled() {
|
||||
info!(thread_id=?std::thread::current().id(), "Got signal {signal}. Terminating gracefully in fast shutdown mode.");
|
||||
info!("Got signal {signal}. Terminating gracefully in fast shutdown mode.");
|
||||
token.cancel();
|
||||
} else {
|
||||
info!(thread_id=?std::thread::current().id(), "Got signal {signal}. Already shutting down.");
|
||||
info!("Got signal {signal}. Already shutting down.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,6 +30,9 @@ pub(super) enum Name {
|
||||
/// Tenant remote size
|
||||
#[serde(rename = "remote_storage_size")]
|
||||
RemoteSize,
|
||||
/// Tenant resident size
|
||||
#[serde(rename = "resident_size")]
|
||||
ResidentSize,
|
||||
/// Tenant synthetic size
|
||||
#[serde(rename = "synthetic_storage_size")]
|
||||
SyntheticSize,
|
||||
@@ -184,6 +187,18 @@ impl MetricsKey {
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// Sum of [`Timeline::resident_physical_size`] for each `Tenant`.
|
||||
///
|
||||
/// [`Timeline::resident_physical_size`]: crate::tenant::Timeline::resident_physical_size
|
||||
const fn resident_size(tenant_id: TenantId) -> AbsoluteValueFactory {
|
||||
MetricsKey {
|
||||
tenant_id,
|
||||
timeline_id: None,
|
||||
metric: Name::ResidentSize,
|
||||
}
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// [`TenantShard::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
|
||||
///
|
||||
/// [`TenantShard::cached_synthetic_size`]: crate::tenant::TenantShard::cached_synthetic_size
|
||||
@@ -246,7 +261,10 @@ where
|
||||
let mut tenants = std::pin::pin!(tenants);
|
||||
|
||||
while let Some((tenant_id, tenant)) = tenants.next().await {
|
||||
let mut tenant_resident_size = 0;
|
||||
|
||||
let timelines = tenant.list_timelines();
|
||||
let timelines_len = timelines.len();
|
||||
for timeline in timelines {
|
||||
let timeline_id = timeline.timeline_id;
|
||||
|
||||
@@ -269,9 +287,16 @@ where
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
tenant_resident_size += timeline.resident_physical_size();
|
||||
}
|
||||
|
||||
let snap = TenantSnapshot::collect(&tenant);
|
||||
if timelines_len == 0 {
|
||||
// Force set it to 1 byte to avoid not being reported -- all timelines are offloaded.
|
||||
tenant_resident_size = 1;
|
||||
}
|
||||
|
||||
let snap = TenantSnapshot::collect(&tenant, tenant_resident_size);
|
||||
snap.to_metrics(tenant_id, Utc::now(), cache, &mut current_metrics);
|
||||
}
|
||||
|
||||
@@ -280,14 +305,19 @@ where
|
||||
|
||||
/// In-between abstraction to allow testing metrics without actual Tenants.
|
||||
struct TenantSnapshot {
|
||||
resident_size: u64,
|
||||
remote_size: u64,
|
||||
synthetic_size: u64,
|
||||
}
|
||||
|
||||
impl TenantSnapshot {
|
||||
/// Collect tenant status to have metrics created out of it.
|
||||
fn collect(t: &Arc<crate::tenant::TenantShard>) -> Self {
|
||||
///
|
||||
/// `resident_size` is calculated of the timelines we had access to for other metrics, so we
|
||||
/// cannot just list timelines here.
|
||||
fn collect(t: &Arc<crate::tenant::TenantShard>, resident_size: u64) -> Self {
|
||||
TenantSnapshot {
|
||||
resident_size,
|
||||
remote_size: t.remote_size(),
|
||||
// Note that this metric is calculated in a separate bgworker
|
||||
// Here we only use cached value, which may lag behind the real latest one
|
||||
@@ -304,6 +334,8 @@ impl TenantSnapshot {
|
||||
) {
|
||||
let remote_size = MetricsKey::remote_storage_size(tenant_id).at(now, self.remote_size);
|
||||
|
||||
let resident_size = MetricsKey::resident_size(tenant_id).at(now, self.resident_size);
|
||||
|
||||
let synthetic_size = {
|
||||
let factory = MetricsKey::synthetic_size(tenant_id);
|
||||
let mut synthetic_size = self.synthetic_size;
|
||||
@@ -323,7 +355,11 @@ impl TenantSnapshot {
|
||||
}
|
||||
};
|
||||
|
||||
metrics.extend([Some(remote_size), synthetic_size].into_iter().flatten());
|
||||
metrics.extend(
|
||||
[Some(remote_size), Some(resident_size), synthetic_size]
|
||||
.into_iter()
|
||||
.flatten(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -224,6 +224,7 @@ fn post_restart_synthetic_size_uses_cached_if_available() {
|
||||
let tenant_id = TenantId::generate();
|
||||
|
||||
let ts = TenantSnapshot {
|
||||
resident_size: 1000,
|
||||
remote_size: 1000,
|
||||
// not yet calculated
|
||||
synthetic_size: 0,
|
||||
@@ -244,6 +245,7 @@ fn post_restart_synthetic_size_uses_cached_if_available() {
|
||||
metrics,
|
||||
&[
|
||||
MetricsKey::remote_storage_size(tenant_id).at(now, 1000),
|
||||
MetricsKey::resident_size(tenant_id).at(now, 1000),
|
||||
MetricsKey::synthetic_size(tenant_id).at(now, 1000),
|
||||
]
|
||||
);
|
||||
@@ -254,6 +256,7 @@ fn post_restart_synthetic_size_is_not_sent_when_not_cached() {
|
||||
let tenant_id = TenantId::generate();
|
||||
|
||||
let ts = TenantSnapshot {
|
||||
resident_size: 1000,
|
||||
remote_size: 1000,
|
||||
// not yet calculated
|
||||
synthetic_size: 0,
|
||||
@@ -271,6 +274,7 @@ fn post_restart_synthetic_size_is_not_sent_when_not_cached() {
|
||||
metrics,
|
||||
&[
|
||||
MetricsKey::remote_storage_size(tenant_id).at(now, 1000),
|
||||
MetricsKey::resident_size(tenant_id).at(now, 1000),
|
||||
// no synthetic size here
|
||||
]
|
||||
);
|
||||
@@ -291,13 +295,14 @@ pub(crate) const fn metric_examples_old(
|
||||
timeline_id: TimelineId,
|
||||
now: DateTime<Utc>,
|
||||
before: DateTime<Utc>,
|
||||
) -> [RawMetric; 5] {
|
||||
) -> [RawMetric; 6] {
|
||||
[
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at_old_format(now, 0),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id)
|
||||
.from_until_old_format(before, now, 0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at_old_format(now, 0),
|
||||
MetricsKey::remote_storage_size(tenant_id).at_old_format(now, 0),
|
||||
MetricsKey::resident_size(tenant_id).at_old_format(now, 0),
|
||||
MetricsKey::synthetic_size(tenant_id).at_old_format(now, 1),
|
||||
]
|
||||
}
|
||||
@@ -307,12 +312,13 @@ pub(crate) const fn metric_examples(
|
||||
timeline_id: TimelineId,
|
||||
now: DateTime<Utc>,
|
||||
before: DateTime<Utc>,
|
||||
) -> [NewRawMetric; 5] {
|
||||
) -> [NewRawMetric; 6] {
|
||||
[
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, 0),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0),
|
||||
MetricsKey::remote_storage_size(tenant_id).at(now, 0),
|
||||
MetricsKey::resident_size(tenant_id).at(now, 0),
|
||||
MetricsKey::synthetic_size(tenant_id).at(now, 1),
|
||||
]
|
||||
}
|
||||
|
||||
@@ -521,6 +521,10 @@ mod tests {
|
||||
line!(),
|
||||
r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"remote_storage_size","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000"}"#,
|
||||
),
|
||||
(
|
||||
line!(),
|
||||
r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"resident_size","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000"}"#,
|
||||
),
|
||||
(
|
||||
line!(),
|
||||
r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"synthetic_storage_size","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":1,"tenant_id":"00000000000000000000000000000000"}"#,
|
||||
@@ -560,7 +564,7 @@ mod tests {
|
||||
assert_eq!(upgraded_samples, new_samples);
|
||||
}
|
||||
|
||||
fn metric_samples_old() -> [RawMetric; 5] {
|
||||
fn metric_samples_old() -> [RawMetric; 6] {
|
||||
let tenant_id = TenantId::from_array([0; 16]);
|
||||
let timeline_id = TimelineId::from_array([0xff; 16]);
|
||||
|
||||
@@ -572,7 +576,7 @@ mod tests {
|
||||
super::super::metrics::metric_examples_old(tenant_id, timeline_id, now, before)
|
||||
}
|
||||
|
||||
fn metric_samples() -> [NewRawMetric; 5] {
|
||||
fn metric_samples() -> [NewRawMetric; 6] {
|
||||
let tenant_id = TenantId::from_array([0; 16]);
|
||||
let timeline_id = TimelineId::from_array([0xff; 16]);
|
||||
|
||||
|
||||
@@ -497,24 +497,6 @@ pub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy<IntCounter> = Lazy::n
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static ONDEMAND_DOWNLOAD_BYTES: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_ondemand_download_bytes_total",
|
||||
"Total bytes of layers on-demand downloaded",
|
||||
&["task_kind"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static ONDEMAND_DOWNLOAD_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_ondemand_download_count",
|
||||
"Total count of layers on-demand downloaded",
|
||||
&["task_kind"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) mod wait_ondemand_download_time {
|
||||
use super::*;
|
||||
const WAIT_ONDEMAND_DOWNLOAD_TIME_BUCKETS: &[f64] = &[
|
||||
@@ -2198,10 +2180,6 @@ impl BasebackupQueryTimeOngoingRecording<'_> {
|
||||
// If you want to change categorize of a specific error, also change it in `log_query_error`.
|
||||
let metric = match res {
|
||||
Ok(_) => &self.parent.ok,
|
||||
Err(QueryError::Shutdown) => {
|
||||
// Do not observe ok/err for shutdown
|
||||
return;
|
||||
}
|
||||
Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
|
||||
if is_expected_io_error(io_error) =>
|
||||
{
|
||||
|
||||
@@ -1035,25 +1035,10 @@ impl PageServerHandler {
|
||||
// avoid a somewhat costly Span::record() by constructing the entire span in one go.
|
||||
macro_rules! mkspan {
|
||||
(before shard routing) => {{
|
||||
tracing::info_span!(
|
||||
parent: &parent_span,
|
||||
"handle_get_page_request",
|
||||
rel = %req.rel,
|
||||
blkno = %req.blkno,
|
||||
req_lsn = %req.hdr.request_lsn,
|
||||
not_modified_since_lsn = %req.hdr.not_modified_since
|
||||
)
|
||||
tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn)
|
||||
}};
|
||||
($shard_id:expr) => {{
|
||||
tracing::info_span!(
|
||||
parent: &parent_span,
|
||||
"handle_get_page_request",
|
||||
rel = %req.rel,
|
||||
blkno = %req.blkno,
|
||||
req_lsn = %req.hdr.request_lsn,
|
||||
not_modified_since_lsn = %req.hdr.not_modified_since,
|
||||
shard_id = %$shard_id
|
||||
)
|
||||
tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, shard_id = %$shard_id)
|
||||
}};
|
||||
}
|
||||
|
||||
@@ -1117,7 +1102,6 @@ impl PageServerHandler {
|
||||
shard_id = %shard.get_shard_identity().shard_slug(),
|
||||
timeline_id = %timeline_id,
|
||||
lsn = %req.hdr.request_lsn,
|
||||
not_modified_since_lsn = %req.hdr.not_modified_since,
|
||||
request_id = %req.hdr.reqid,
|
||||
key = %key,
|
||||
)
|
||||
|
||||
@@ -4,7 +4,6 @@ use std::sync::{Arc, Weak};
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use crate::PERF_TRACE_TARGET;
|
||||
use crate::metrics::{ONDEMAND_DOWNLOAD_BYTES, ONDEMAND_DOWNLOAD_COUNT};
|
||||
use anyhow::Context;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
@@ -1256,14 +1255,6 @@ impl LayerInner {
|
||||
|
||||
self.access_stats.record_residence_event();
|
||||
|
||||
let task_kind: &'static str = ctx.task_kind().into();
|
||||
ONDEMAND_DOWNLOAD_BYTES
|
||||
.with_label_values(&[task_kind])
|
||||
.inc_by(self.desc.file_size);
|
||||
ONDEMAND_DOWNLOAD_COUNT
|
||||
.with_label_values(&[task_kind])
|
||||
.inc();
|
||||
|
||||
Ok(self.initialize_after_layer_is_on_disk(permit))
|
||||
}
|
||||
Err(e) => {
|
||||
|
||||
@@ -76,13 +76,12 @@ pub async fn thread_local_system() -> Handle {
|
||||
)
|
||||
.await;
|
||||
let per_system_metrics = metrics::THREAD_LOCAL_METRICS_STORAGE.register_system(inner.thread_local_state_id);
|
||||
info!(thread_id=?std::thread::current().id(), thread_name=?std::thread::current().name(), "launching system");
|
||||
let res = System::launch_with_metrics(per_system_metrics, usize::try_from(inner.thread_local_state_id).unwrap())
|
||||
let res = System::launch_with_metrics(per_system_metrics)
|
||||
// this might move us to another executor thread => loop outside the get_or_try_init, not inside it
|
||||
.await;
|
||||
match res {
|
||||
Ok(system) => {
|
||||
info!(thread_id=?std::thread::current().id(), thread_name=?std::thread::current().name(), "successfully launched system");
|
||||
info!("successfully launched system");
|
||||
metrics::THREAD_LOCAL_LAUNCH_SUCCESSES.inc();
|
||||
Ok(system)
|
||||
}
|
||||
|
||||
@@ -687,14 +687,8 @@ prefetch_wait_for(uint64 ring_index)
|
||||
END_PREFETCH_RECEIVE_WORK();
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
}
|
||||
if (result)
|
||||
{
|
||||
/* Check that slot is actually received (srver can be disconnected in prefetch_pump_state called from CHECK_FOR_INTERRUPTS */
|
||||
PrefetchRequest *slot = GetPrfSlot(ring_index);
|
||||
return slot->status == PRFS_RECEIVED;
|
||||
}
|
||||
return false;
|
||||
;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -877,7 +877,6 @@ retry:
|
||||
int port;
|
||||
int sndbuf;
|
||||
int recvbuf;
|
||||
uint64* max_wait;
|
||||
|
||||
get_local_port(PQsocket(pageserver_conn), &port);
|
||||
get_socket_stats(PQsocket(pageserver_conn), &sndbuf, &recvbuf);
|
||||
@@ -888,10 +887,7 @@ retry:
|
||||
shard->nrequests_sent, shard->nresponses_received, port, sndbuf, recvbuf,
|
||||
pageserver_conn->inStart, pageserver_conn->inEnd);
|
||||
shard->receive_last_log_time = now;
|
||||
MyNeonCounters->compute_getpage_stuck_requests_total += !shard->receive_logged;
|
||||
shard->receive_logged = true;
|
||||
max_wait = &MyNeonCounters->compute_getpage_max_inflight_stuck_time_ms;
|
||||
*max_wait = Max(*max_wait, INSTR_TIME_GET_MILLISEC(since_start));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -914,7 +910,6 @@ retry:
|
||||
get_local_port(PQsocket(pageserver_conn), &port);
|
||||
neon_shard_log(shard_no, LOG, "no response from pageserver for %0.3f s, disconnecting (socket port=%d)",
|
||||
INSTR_TIME_GET_DOUBLE(since_start), port);
|
||||
MyNeonCounters->compute_getpage_max_inflight_stuck_time_ms = 0;
|
||||
pageserver_disconnect(shard_no);
|
||||
return -1;
|
||||
}
|
||||
@@ -938,7 +933,6 @@ retry:
|
||||
INSTR_TIME_SET_ZERO(shard->receive_start_time);
|
||||
INSTR_TIME_SET_ZERO(shard->receive_last_log_time);
|
||||
shard->receive_logged = false;
|
||||
MyNeonCounters->compute_getpage_max_inflight_stuck_time_ms = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -148,7 +148,7 @@ histogram_to_metrics(IOHistogram histogram,
|
||||
static metric_t *
|
||||
neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
|
||||
{
|
||||
#define NUM_METRICS ((2 + NUM_IO_WAIT_BUCKETS) * 3 + 12)
|
||||
#define NUM_METRICS ((2 + NUM_IO_WAIT_BUCKETS) * 3 + 10)
|
||||
metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
|
||||
int i = 0;
|
||||
|
||||
@@ -166,8 +166,6 @@ neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
|
||||
|
||||
APPEND_METRIC(getpage_prefetch_requests_total);
|
||||
APPEND_METRIC(getpage_sync_requests_total);
|
||||
APPEND_METRIC(compute_getpage_stuck_requests_total);
|
||||
APPEND_METRIC(compute_getpage_max_inflight_stuck_time_ms);
|
||||
APPEND_METRIC(getpage_prefetch_misses_total);
|
||||
APPEND_METRIC(getpage_prefetch_discards_total);
|
||||
APPEND_METRIC(pageserver_requests_sent_total);
|
||||
@@ -296,11 +294,6 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
|
||||
totals.file_cache_hits_total += counters->file_cache_hits_total;
|
||||
histogram_merge_into(&totals.file_cache_read_hist, &counters->file_cache_read_hist);
|
||||
histogram_merge_into(&totals.file_cache_write_hist, &counters->file_cache_write_hist);
|
||||
|
||||
totals.compute_getpage_stuck_requests_total += counters->compute_getpage_stuck_requests_total;
|
||||
totals.compute_getpage_max_inflight_stuck_time_ms = Max(
|
||||
totals.compute_getpage_max_inflight_stuck_time_ms,
|
||||
counters->compute_getpage_max_inflight_stuck_time_ms);
|
||||
}
|
||||
|
||||
metrics = neon_perf_counters_to_metrics(&totals);
|
||||
|
||||
@@ -57,18 +57,6 @@ typedef struct
|
||||
uint64 getpage_prefetch_requests_total;
|
||||
uint64 getpage_sync_requests_total;
|
||||
|
||||
/*
|
||||
* Total number of Getpage requests left without an answer for more than
|
||||
* pageserver_response_log_timeout but less than pageserver_response_disconnect_timeout
|
||||
*/
|
||||
uint64 compute_getpage_stuck_requests_total;
|
||||
|
||||
/*
|
||||
* Longest waiting time for active stuck requests. If a stuck request gets a
|
||||
* response or disconnects, this metric is updated
|
||||
*/
|
||||
uint64 compute_getpage_max_inflight_stuck_time_ms;
|
||||
|
||||
/*
|
||||
* Total number of readahead misses; consisting of either prefetches that
|
||||
* don't satisfy the LSN bounds, or cases where no readahead was issued
|
||||
|
||||
@@ -836,7 +836,7 @@ TermsCollectedMset(WalProposer *wp, MemberSet *mset, Safekeeper **msk, StringInf
|
||||
{
|
||||
uint32 n_greeted = 0;
|
||||
|
||||
for (uint32 i = 0; i < mset->len; i++)
|
||||
for (uint32 i = 0; i < wp->mconf.members.len; i++)
|
||||
{
|
||||
Safekeeper *sk = msk[i];
|
||||
|
||||
@@ -1106,7 +1106,7 @@ VotesCollectedMset(WalProposer *wp, MemberSet *mset, Safekeeper **msk, StringInf
|
||||
{
|
||||
uint32 n_votes = 0;
|
||||
|
||||
for (uint32 i = 0; i < mset->len; i++)
|
||||
for (uint32 i = 0; i < wp->mconf.members.len; i++)
|
||||
{
|
||||
Safekeeper *sk = msk[i];
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
char *wal_acceptors_list = "";
|
||||
int wal_acceptor_reconnect_timeout = 1000;
|
||||
int wal_acceptor_connection_timeout = 10000;
|
||||
int safekeeper_proto_version = 3;
|
||||
int safekeeper_proto_version = 2;
|
||||
|
||||
/* Set to true in the walproposer bgw. */
|
||||
static bool am_walproposer;
|
||||
@@ -228,7 +228,7 @@ nwp_register_gucs(void)
|
||||
"Version of compute <-> safekeeper protocol.",
|
||||
"Used while migrating from 2 to 3.",
|
||||
&safekeeper_proto_version,
|
||||
3, 0, INT_MAX,
|
||||
2, 0, INT_MAX,
|
||||
PGC_POSTMASTER,
|
||||
0,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
@@ -32,7 +32,7 @@ To play with it locally one may start proxy over a local postgres installation
|
||||
(see end of this page on how to generate certs with openssl):
|
||||
|
||||
```
|
||||
LOGFMT=text ./target/debug/proxy -c server.crt -k server.key --auth-backend=postgres --auth-endpoint=postgres://stas@127.0.0.1:5432/stas --wss 0.0.0.0:4444
|
||||
./target/debug/proxy -c server.crt -k server.key --auth-backend=postgres --auth-endpoint=postgres://stas@127.0.0.1:5432/stas --wss 0.0.0.0:4444
|
||||
```
|
||||
|
||||
If both postgres and proxy are running you may send a SQL query:
|
||||
@@ -130,7 +130,7 @@ openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key
|
||||
|
||||
Then we need to build proxy with 'testing' feature and run, e.g.:
|
||||
```sh
|
||||
RUST_LOG=proxy LOGFMT=text cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://postgres:proxy-postgres@127.0.0.1:5432/postgres' -c server.crt -k server.key
|
||||
RUST_LOG=proxy cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://postgres:proxy-postgres@127.0.0.1:5432/postgres' -c server.crt -k server.key
|
||||
```
|
||||
|
||||
Now from client you can start a new session:
|
||||
|
||||
@@ -132,10 +132,11 @@ impl Drop for LoggingGuard {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: make JSON the default
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Default, Debug)]
|
||||
enum LogFormat {
|
||||
Text,
|
||||
#[default]
|
||||
Text = 1,
|
||||
Json,
|
||||
}
|
||||
|
||||
|
||||
@@ -401,10 +401,7 @@ pub async fn handle_request(
|
||||
request.timeline_id,
|
||||
));
|
||||
if existing_tli.is_ok() {
|
||||
info!("Timeline {} already exists", request.timeline_id);
|
||||
return Ok(PullTimelineResponse {
|
||||
safekeeper_host: None,
|
||||
});
|
||||
bail!("Timeline {} already exists", request.timeline_id);
|
||||
}
|
||||
|
||||
let mut http_client = reqwest::Client::builder();
|
||||
@@ -428,25 +425,8 @@ pub async fn handle_request(
|
||||
|
||||
let mut statuses = Vec::new();
|
||||
for (i, response) in responses.into_iter().enumerate() {
|
||||
match response {
|
||||
Ok(status) => {
|
||||
statuses.push((status, i));
|
||||
}
|
||||
Err(e) => {
|
||||
info!("error fetching status from {}: {e}", http_hosts[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Allow missing responses from up to one safekeeper (say due to downtime)
|
||||
// e.g. if we created a timeline on PS A and B, with C being offline. Then B goes
|
||||
// offline and C comes online. Then we want a pull on C with A and B as hosts to work.
|
||||
let min_required_successful = (http_hosts.len() - 1).max(1);
|
||||
if statuses.len() < min_required_successful {
|
||||
bail!(
|
||||
"only got {} successful status responses. required: {min_required_successful}",
|
||||
statuses.len()
|
||||
)
|
||||
let status = response.context(format!("fetching status from {}", http_hosts[i]))?;
|
||||
statuses.push((status, i));
|
||||
}
|
||||
|
||||
// Find the most advanced safekeeper
|
||||
@@ -556,6 +536,6 @@ async fn pull_timeline(
|
||||
.await?;
|
||||
|
||||
Ok(PullTimelineResponse {
|
||||
safekeeper_host: Some(host),
|
||||
safekeeper_host: host,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ use crate::metrics::{
|
||||
WAL_RECEIVERS,
|
||||
};
|
||||
use crate::safekeeper::{AcceptorProposerMessage, ProposerAcceptorMessage};
|
||||
use crate::timeline::{TimelineError, WalResidentTimeline};
|
||||
use crate::timeline::WalResidentTimeline;
|
||||
|
||||
const DEFAULT_FEEDBACK_CAPACITY: usize = 8;
|
||||
|
||||
@@ -357,14 +357,9 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> NetworkReader<'_, IO> {
|
||||
.await
|
||||
.context("create timeline")?
|
||||
} else {
|
||||
let timeline_res = self.global_timelines.get(self.ttid);
|
||||
match timeline_res {
|
||||
Ok(tl) => tl,
|
||||
Err(TimelineError::NotFound(_)) => {
|
||||
return Err(CopyStreamHandlerEnd::TimelineNoCreate);
|
||||
}
|
||||
other => other.context("get_timeline")?,
|
||||
}
|
||||
self.global_timelines
|
||||
.get(self.ttid)
|
||||
.context("get timeline")?
|
||||
};
|
||||
tli.wal_residence_guard().await?
|
||||
}
|
||||
|
||||
@@ -19,8 +19,7 @@ use storage_controller::service::chaos_injector::ChaosInjector;
|
||||
use storage_controller::service::{
|
||||
Config, HEARTBEAT_INTERVAL_DEFAULT, LONG_RECONCILE_THRESHOLD_DEFAULT,
|
||||
MAX_OFFLINE_INTERVAL_DEFAULT, MAX_WARMING_UP_INTERVAL_DEFAULT,
|
||||
PRIORITY_RECONCILER_CONCURRENCY_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT,
|
||||
SAFEKEEPER_RECONCILER_CONCURRENCY_DEFAULT, Service,
|
||||
PRIORITY_RECONCILER_CONCURRENCY_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT, Service,
|
||||
};
|
||||
use tokio::signal::unix::SignalKind;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -133,10 +132,6 @@ struct Cli {
|
||||
#[arg(long)]
|
||||
priority_reconciler_concurrency: Option<usize>,
|
||||
|
||||
/// Maximum number of safekeeper reconciliations that may run in parallel (per safekeeper)
|
||||
#[arg(long)]
|
||||
safekeeper_reconciler_concurrency: Option<usize>,
|
||||
|
||||
/// Tenant API rate limit, as requests per second per tenant.
|
||||
#[arg(long, default_value = "10")]
|
||||
tenant_rate_limit: NonZeroU32,
|
||||
@@ -408,9 +403,6 @@ async fn async_main() -> anyhow::Result<()> {
|
||||
priority_reconciler_concurrency: args
|
||||
.priority_reconciler_concurrency
|
||||
.unwrap_or(PRIORITY_RECONCILER_CONCURRENCY_DEFAULT),
|
||||
safekeeper_reconciler_concurrency: args
|
||||
.safekeeper_reconciler_concurrency
|
||||
.unwrap_or(SAFEKEEPER_RECONCILER_CONCURRENCY_DEFAULT),
|
||||
tenant_rate_limit: args.tenant_rate_limit,
|
||||
split_threshold: args.split_threshold,
|
||||
max_split_shards: args.max_split_shards,
|
||||
|
||||
@@ -194,7 +194,6 @@ pub(crate) enum LeadershipStatus {
|
||||
|
||||
pub const RECONCILER_CONCURRENCY_DEFAULT: usize = 128;
|
||||
pub const PRIORITY_RECONCILER_CONCURRENCY_DEFAULT: usize = 256;
|
||||
pub const SAFEKEEPER_RECONCILER_CONCURRENCY_DEFAULT: usize = 32;
|
||||
|
||||
// Depth of the channel used to enqueue shards for reconciliation when they can't do it immediately.
|
||||
// This channel is finite-size to avoid using excessive memory if we get into a state where reconciles are finishing more slowly
|
||||
@@ -383,9 +382,6 @@ pub struct Config {
|
||||
/// How many high-priority Reconcilers may be spawned concurrently
|
||||
pub priority_reconciler_concurrency: usize,
|
||||
|
||||
/// How many safekeeper reconciles may happen concurrently (per safekeeper)
|
||||
pub safekeeper_reconciler_concurrency: usize,
|
||||
|
||||
/// How many API requests per second to allow per tenant, across all
|
||||
/// tenant-scoped API endpoints. Further API requests queue until ready.
|
||||
pub tenant_rate_limit: NonZeroU32,
|
||||
@@ -3663,7 +3659,7 @@ impl Service {
|
||||
locations: ShardMutationLocations,
|
||||
http_client: reqwest::Client,
|
||||
jwt: Option<String>,
|
||||
mut create_req: TimelineCreateRequest,
|
||||
create_req: TimelineCreateRequest,
|
||||
) -> Result<TimelineInfo, ApiError> {
|
||||
let latest = locations.latest.node;
|
||||
|
||||
@@ -3682,15 +3678,6 @@ impl Service {
|
||||
.await
|
||||
.map_err(|e| passthrough_api_error(&latest, e))?;
|
||||
|
||||
// If we are going to create the timeline on some stale locations for shard 0, then ask them to re-use
|
||||
// the initdb generated by the latest location, rather than generating their own. This avoids racing uploads
|
||||
// of initdb to S3 which might not be binary-identical if different pageservers have different postgres binaries.
|
||||
if tenant_shard_id.is_shard_zero() {
|
||||
if let models::TimelineCreateRequestMode::Bootstrap { existing_initdb_timeline_id, .. } = &mut create_req.mode {
|
||||
*existing_initdb_timeline_id = Some(create_req.new_timeline_id);
|
||||
}
|
||||
}
|
||||
|
||||
// We propagate timeline creations to all attached locations such that a compute
|
||||
// for the new timeline is able to start regardless of the current state of the
|
||||
// tenant shard reconciliation.
|
||||
|
||||
@@ -3,10 +3,7 @@ use std::{collections::HashMap, str::FromStr, sync::Arc, time::Duration};
|
||||
use clashmap::{ClashMap, Entry};
|
||||
use safekeeper_api::models::PullTimelineRequest;
|
||||
use safekeeper_client::mgmt_api;
|
||||
use tokio::sync::{
|
||||
Semaphore,
|
||||
mpsc::{self, UnboundedReceiver, UnboundedSender},
|
||||
};
|
||||
use tokio::sync::mpsc::{self, UnboundedReceiver, UnboundedSender};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::Instrument;
|
||||
use utils::{
|
||||
@@ -209,27 +206,18 @@ impl ReconcilerHandle {
|
||||
}
|
||||
|
||||
pub(crate) struct SafekeeperReconciler {
|
||||
inner: SafekeeperReconcilerInner,
|
||||
concurrency_limiter: Arc<Semaphore>,
|
||||
service: Arc<Service>,
|
||||
rx: UnboundedReceiver<(ScheduleRequest, CancellationToken)>,
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
/// Thin wrapper over `Service` to not clutter its inherent functions
|
||||
#[derive(Clone)]
|
||||
struct SafekeeperReconcilerInner {
|
||||
service: Arc<Service>,
|
||||
}
|
||||
|
||||
impl SafekeeperReconciler {
|
||||
fn spawn(cancel: CancellationToken, service: Arc<Service>) -> ReconcilerHandle {
|
||||
// We hold the ServiceInner lock so we don't want to make sending to the reconciler channel to be blocking.
|
||||
let (tx, rx) = mpsc::unbounded_channel();
|
||||
let concurrency = service.config.safekeeper_reconciler_concurrency;
|
||||
let mut reconciler = SafekeeperReconciler {
|
||||
inner: SafekeeperReconcilerInner { service },
|
||||
service,
|
||||
rx,
|
||||
concurrency_limiter: Arc::new(Semaphore::new(concurrency)),
|
||||
cancel: cancel.clone(),
|
||||
};
|
||||
let handle = ReconcilerHandle {
|
||||
@@ -242,44 +230,31 @@ impl SafekeeperReconciler {
|
||||
}
|
||||
async fn run(&mut self) {
|
||||
loop {
|
||||
// TODO add parallelism with semaphore here
|
||||
let req = tokio::select! {
|
||||
req = self.rx.recv() => req,
|
||||
_ = self.cancel.cancelled() => break,
|
||||
};
|
||||
let Some((req, req_cancel)) = req else { break };
|
||||
|
||||
let permit_res = tokio::select! {
|
||||
req = self.concurrency_limiter.clone().acquire_owned() => req,
|
||||
_ = self.cancel.cancelled() => break,
|
||||
};
|
||||
let Ok(_permit) = permit_res else { return };
|
||||
|
||||
let inner = self.inner.clone();
|
||||
if req_cancel.is_cancelled() {
|
||||
continue;
|
||||
}
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
let kind = req.kind;
|
||||
let tenant_id = req.tenant_id;
|
||||
let timeline_id = req.timeline_id;
|
||||
let node_id = req.safekeeper.skp.id;
|
||||
inner
|
||||
.reconcile_one(req, req_cancel)
|
||||
.instrument(tracing::info_span!(
|
||||
"reconcile_one",
|
||||
?kind,
|
||||
%tenant_id,
|
||||
?timeline_id,
|
||||
%node_id,
|
||||
))
|
||||
.await;
|
||||
});
|
||||
let kind = req.kind;
|
||||
let tenant_id = req.tenant_id;
|
||||
let timeline_id = req.timeline_id;
|
||||
let node_id = req.safekeeper.skp.id;
|
||||
self.reconcile_one(req, req_cancel)
|
||||
.instrument(tracing::info_span!(
|
||||
"reconcile_one",
|
||||
?kind,
|
||||
%tenant_id,
|
||||
?timeline_id,
|
||||
%node_id,
|
||||
))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SafekeeperReconcilerInner {
|
||||
async fn reconcile_one(&self, req: ScheduleRequest, req_cancel: CancellationToken) {
|
||||
let req_host = req.safekeeper.skp.host.clone();
|
||||
match req.kind {
|
||||
@@ -306,11 +281,10 @@ impl SafekeeperReconcilerInner {
|
||||
req,
|
||||
async |client| client.pull_timeline(&pull_req).await,
|
||||
|resp| {
|
||||
if let Some(host) = resp.safekeeper_host {
|
||||
tracing::info!("pulled timeline from {host} onto {req_host}");
|
||||
} else {
|
||||
tracing::info!("timeline already present on safekeeper on {req_host}");
|
||||
}
|
||||
tracing::info!(
|
||||
"pulled timeline from {} onto {req_host}",
|
||||
resp.safekeeper_host,
|
||||
);
|
||||
},
|
||||
req_cancel,
|
||||
)
|
||||
|
||||
@@ -2,8 +2,6 @@ from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
@@ -12,7 +10,6 @@ if TYPE_CHECKING:
|
||||
# while initial compute node is down and pageserver is lagging behind safekeepers.
|
||||
# Ensure that basebackup after restart of all components is correct
|
||||
# and new compute node contains all data.
|
||||
@pytest.mark.repeat(1000)
|
||||
def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
@@ -506,6 +506,7 @@ class SyntheticSizeVerifier:
|
||||
|
||||
PER_METRIC_VERIFIERS = {
|
||||
"remote_storage_size": CannotVerifyAnything,
|
||||
"resident_size": CannotVerifyAnything,
|
||||
"written_size": WrittenDataVerifier,
|
||||
"written_data_bytes_delta": WrittenDataDeltaVerifier,
|
||||
"timeline_logical_size": CannotVerifyAnything,
|
||||
|
||||
Reference in New Issue
Block a user