mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-23 16:10:37 +00:00
Compare commits
47 Commits
sk-capacit
...
problame/b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6ebdf2784f | ||
|
|
0fd45db92e | ||
|
|
962af0ad6c | ||
|
|
192bbd6c54 | ||
|
|
0035cf840e | ||
|
|
2b01d661bd | ||
|
|
f51e608193 | ||
|
|
78a28f787c | ||
|
|
001a0e4006 | ||
|
|
daa2ea7ebe | ||
|
|
37e8eba57f | ||
|
|
f45882ef3c | ||
|
|
d16d02d61d | ||
|
|
947f6d9491 | ||
|
|
40441f8ada | ||
|
|
a8a39cd464 | ||
|
|
b989ad1922 | ||
|
|
acef742a6e | ||
|
|
11d9d801b5 | ||
|
|
fc47af156f | ||
|
|
e310533ed3 | ||
|
|
1d68f52b57 | ||
|
|
4cd47b7d4b | ||
|
|
0141c95788 | ||
|
|
0ac4cf67a6 | ||
|
|
4be6bc7251 | ||
|
|
a394f49e0d | ||
|
|
c00651ff9b | ||
|
|
bea8efac24 | ||
|
|
ad5b02e175 | ||
|
|
b09a851705 | ||
|
|
85cd97af61 | ||
|
|
e6470ee92e | ||
|
|
dc72567288 | ||
|
|
6defa2b5d5 | ||
|
|
b3d3a2587d | ||
|
|
b85fc39bdb | ||
|
|
09b5954526 | ||
|
|
306c4f9967 | ||
|
|
5ceccdc7de | ||
|
|
cdcaa329bf | ||
|
|
27bdbf5e36 | ||
|
|
4c7fa12a2a | ||
|
|
367971a0e9 | ||
|
|
51570114ea | ||
|
|
098d3111a5 | ||
|
|
3737fe3a4b |
@@ -22,5 +22,11 @@ platforms = [
|
||||
# "x86_64-pc-windows-msvc",
|
||||
]
|
||||
|
||||
[final-excludes]
|
||||
# vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but
|
||||
# it is built primarly in separate repo neondatabase/autoscaling and thus is excluded
|
||||
# from depending on workspace-hack because most of the dependencies are not used.
|
||||
workspace-members = ["vm_monitor"]
|
||||
|
||||
# Write out exact versions rather than a semver range. (Defaults to false.)
|
||||
# exact-versions = true
|
||||
|
||||
1
.github/workflows/build_and_test.yml
vendored
1
.github/workflows/build_and_test.yml
vendored
@@ -723,6 +723,7 @@ jobs:
|
||||
--cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
|
||||
--context .
|
||||
--build-arg GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||
--build-arg BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
||||
--build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
|
||||
--destination neondatabase/neon:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
50
Cargo.lock
generated
50
Cargo.lock
generated
@@ -170,6 +170,12 @@ dependencies = [
|
||||
"backtrace",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arc-swap"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6"
|
||||
|
||||
[[package]]
|
||||
name = "archery"
|
||||
version = "0.5.0"
|
||||
@@ -2921,6 +2927,16 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.46.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
|
||||
dependencies = [
|
||||
"overload",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.4.3"
|
||||
@@ -3187,6 +3203,12 @@ version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
|
||||
|
||||
[[package]]
|
||||
name = "overload"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||
|
||||
[[package]]
|
||||
name = "pagectl"
|
||||
version = "0.1.0"
|
||||
@@ -3272,10 +3294,12 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-io-timeout",
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"url",
|
||||
"utils",
|
||||
"walkdir",
|
||||
@@ -3550,7 +3574,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres"
|
||||
version = "0.19.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#ef8559b5f60f5c1d2b0184a62f49035600824518"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -3563,7 +3587,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-native-tls"
|
||||
version = "0.5.0"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#ef8559b5f60f5c1d2b0184a62f49035600824518"
|
||||
dependencies = [
|
||||
"native-tls",
|
||||
"tokio",
|
||||
@@ -3574,7 +3598,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#ef8559b5f60f5c1d2b0184a62f49035600824518"
|
||||
dependencies = [
|
||||
"base64 0.20.0",
|
||||
"byteorder",
|
||||
@@ -3592,7 +3616,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#ef8559b5f60f5c1d2b0184a62f49035600824518"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -4419,6 +4443,7 @@ dependencies = [
|
||||
"itertools",
|
||||
"pageserver",
|
||||
"rand 0.8.5",
|
||||
"remote_storage",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -4477,6 +4502,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-io-timeout",
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"url",
|
||||
@@ -4679,6 +4705,16 @@ dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_assert"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eda563240c1288b044209be1f0d38bb4d15044fb3e00dc354fbc922ab4733e80"
|
||||
dependencies = [
|
||||
"hashbrown 0.13.2",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.183"
|
||||
@@ -5396,7 +5432,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.7"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#ef8559b5f60f5c1d2b0184a62f49035600824518"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
@@ -5753,6 +5789,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
|
||||
dependencies = [
|
||||
"matchers",
|
||||
"nu-ansi-term",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"serde",
|
||||
@@ -5939,6 +5976,7 @@ name = "utils"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"arc-swap",
|
||||
"async-trait",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -5965,6 +6003,7 @@ dependencies = [
|
||||
"routerify",
|
||||
"sentry",
|
||||
"serde",
|
||||
"serde_assert",
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"signal-hook",
|
||||
@@ -6035,7 +6074,6 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
14
Cargo.toml
14
Cargo.toml
@@ -36,6 +36,7 @@ license = "Apache-2.0"
|
||||
## All dependency versions, used in the project
|
||||
[workspace.dependencies]
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
arc-swap = "1.6"
|
||||
async-compression = { version = "0.4.0", features = ["tokio", "gzip"] }
|
||||
azure_core = "0.16"
|
||||
azure_identity = "0.16"
|
||||
@@ -124,6 +125,7 @@ sentry = { version = "0.31", default-features = false, features = ["backtrace",
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_with = "2.0"
|
||||
serde_assert = "0.5.0"
|
||||
sha2 = "0.10.2"
|
||||
signal-hook = "0.3"
|
||||
smallvec = "1.11"
|
||||
@@ -161,11 +163,11 @@ env_logger = "0.10"
|
||||
log = "0.4"
|
||||
|
||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
|
||||
## Other git libraries
|
||||
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
|
||||
@@ -202,7 +204,7 @@ tonic-build = "0.9"
|
||||
|
||||
# This is only needed for proxy's tests.
|
||||
# TODO: we should probably fork `tokio-postgres-rustls` instead.
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
|
||||
################# Binary contents sections
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ RUN set -e \
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS build
|
||||
WORKDIR /home/nonroot
|
||||
ARG GIT_VERSION=local
|
||||
ARG BUILD_TAG
|
||||
|
||||
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
|
||||
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
|
||||
@@ -78,9 +79,9 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/pg_sni_router
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pagectl /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local /usr/local/bin
|
||||
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
|
||||
|
||||
4
Makefile
4
Makefile
@@ -72,6 +72,10 @@ neon: postgres-headers walproposer-lib
|
||||
#
|
||||
$(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
||||
+@echo "Configuring Postgres $* build"
|
||||
@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
|
||||
echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
|
||||
echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
|
||||
exit 1; }
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/$* && \
|
||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \
|
||||
|
||||
@@ -283,7 +283,6 @@ fn main() -> Result<()> {
|
||||
.expect("--vm-monitor-addr should always be set because it has a default arg");
|
||||
let file_cache_connstr = matches.get_one::<String>("filecache-connstr");
|
||||
let cgroup = matches.get_one::<String>("cgroup");
|
||||
let file_cache_on_disk = matches.get_flag("file-cache-on-disk");
|
||||
|
||||
// Only make a runtime if we need to.
|
||||
// Note: it seems like you can make a runtime in an inner scope and
|
||||
@@ -310,7 +309,6 @@ fn main() -> Result<()> {
|
||||
cgroup: cgroup.cloned(),
|
||||
pgconnstr: file_cache_connstr.cloned(),
|
||||
addr: vm_monitor_addr.clone(),
|
||||
file_cache_on_disk,
|
||||
})),
|
||||
token.clone(),
|
||||
))
|
||||
@@ -482,6 +480,8 @@ fn cli() -> clap::Command {
|
||||
.value_name("FILECACHE_CONNSTR"),
|
||||
)
|
||||
.arg(
|
||||
// DEPRECATED, NO LONGER DOES ANYTHING.
|
||||
// See https://github.com/neondatabase/cloud/issues/7516
|
||||
Arg::new("file-cache-on-disk")
|
||||
.long("file-cache-on-disk")
|
||||
.action(clap::ArgAction::SetTrue),
|
||||
|
||||
@@ -710,8 +710,12 @@ impl ComputeNode {
|
||||
// `pg_ctl` for start / stop, so this just seems much easier to do as we already
|
||||
// have opened connection to Postgres and superuser access.
|
||||
#[instrument(skip_all)]
|
||||
fn pg_reload_conf(&self, client: &mut Client) -> Result<()> {
|
||||
client.simple_query("SELECT pg_reload_conf()")?;
|
||||
fn pg_reload_conf(&self) -> Result<()> {
|
||||
let pgctl_bin = Path::new(&self.pgbin).parent().unwrap().join("pg_ctl");
|
||||
Command::new(pgctl_bin)
|
||||
.args(["reload", "-D", &self.pgdata])
|
||||
.output()
|
||||
.expect("cannot run pg_ctl process");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -724,9 +728,9 @@ impl ComputeNode {
|
||||
// Write new config
|
||||
let pgdata_path = Path::new(&self.pgdata);
|
||||
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec, None)?;
|
||||
self.pg_reload_conf()?;
|
||||
|
||||
let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
|
||||
self.pg_reload_conf(&mut client)?;
|
||||
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
// Disable DDL forwarding because control plane already knows about these roles/databases.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//!
|
||||
//! Various tools and helpers to handle cluster / compute node (Postgres)
|
||||
//! configuration.
|
||||
//!
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
pub mod checker;
|
||||
pub mod config;
|
||||
pub mod configurator;
|
||||
|
||||
@@ -68,7 +68,7 @@ pub fn get_spec_from_control_plane(
|
||||
base_uri: &str,
|
||||
compute_id: &str,
|
||||
) -> Result<Option<ComputeSpec>> {
|
||||
let cp_uri = format!("{base_uri}/management/api/v2/computes/{compute_id}/spec");
|
||||
let cp_uri = format!("{base_uri}/compute/api/v2/computes/{compute_id}/spec");
|
||||
let jwt: String = match std::env::var("NEON_CONTROL_PLANE_TOKEN") {
|
||||
Ok(v) => v,
|
||||
Err(_) => "".to_string(),
|
||||
|
||||
@@ -2,7 +2,6 @@ use crate::{background_process, local_env::LocalEnv};
|
||||
use anyhow::anyhow;
|
||||
use camino::Utf8PathBuf;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use std::{path::PathBuf, process::Child};
|
||||
use utils::id::{NodeId, TenantId};
|
||||
|
||||
@@ -14,10 +13,8 @@ pub struct AttachmentService {
|
||||
|
||||
const COMMAND: &str = "attachment_service";
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct AttachHookRequest {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub tenant_id: TenantId,
|
||||
pub node_id: Option<NodeId>,
|
||||
}
|
||||
|
||||
@@ -262,7 +262,7 @@ where
|
||||
P: Into<Utf8PathBuf>,
|
||||
{
|
||||
let path: Utf8PathBuf = path.into();
|
||||
// SAFETY
|
||||
// SAFETY:
|
||||
// pre_exec is marked unsafe because it runs between fork and exec.
|
||||
// Why is that dangerous in various ways?
|
||||
// Long answer: https://github.com/rust-lang/rust/issues/39575
|
||||
|
||||
@@ -46,7 +46,6 @@ use std::time::Duration;
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
@@ -57,13 +56,10 @@ use compute_api::responses::{ComputeState, ComputeStatus};
|
||||
use compute_api::spec::{Cluster, ComputeMode, ComputeSpec};
|
||||
|
||||
// contents of a endpoint.json file
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
pub struct EndpointConf {
|
||||
endpoint_id: String,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
tenant_id: TenantId,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
timeline_id: TimelineId,
|
||||
mode: ComputeMode,
|
||||
pg_port: u16,
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
//
|
||||
// Local control plane.
|
||||
//
|
||||
// Can start, configure and stop postgres instances running as a local processes.
|
||||
//
|
||||
// Intended to be used in integration tests and in CLI tools for
|
||||
// local installations.
|
||||
//
|
||||
//! Local control plane.
|
||||
//!
|
||||
//! Can start, configure and stop postgres instances running as a local processes.
|
||||
//!
|
||||
//! Intended to be used in integration tests and in CLI tools for
|
||||
//! local installations.
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
pub mod attachment_service;
|
||||
mod background_process;
|
||||
|
||||
@@ -8,7 +8,6 @@ use anyhow::{bail, ensure, Context};
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::Url;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
@@ -33,7 +32,6 @@ pub const DEFAULT_PG_VERSION: u32 = 15;
|
||||
// to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
|
||||
// an example.
|
||||
//
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
pub struct LocalEnv {
|
||||
// Base directory for all the nodes (the pageserver, safekeepers and
|
||||
@@ -59,7 +57,6 @@ pub struct LocalEnv {
|
||||
// Default tenant ID to use with the 'neon_local' command line utility, when
|
||||
// --tenant_id is not explicitly specified.
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub default_tenant_id: Option<TenantId>,
|
||||
|
||||
// used to issue tokens during e.g pg start
|
||||
@@ -84,7 +81,6 @@ pub struct LocalEnv {
|
||||
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
|
||||
// but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
|
||||
// https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
|
||||
#[serde_as(as = "HashMap<_, Vec<(DisplayFromStr, DisplayFromStr)>>")]
|
||||
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
pub mod requests;
|
||||
pub mod responses;
|
||||
pub mod spec;
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -19,7 +18,6 @@ pub type PgIdent = String;
|
||||
|
||||
/// Cluster spec or configuration represented as an optional number of
|
||||
/// delta operations + final cluster state description.
|
||||
#[serde_as]
|
||||
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
|
||||
pub struct ComputeSpec {
|
||||
pub format_version: f32,
|
||||
@@ -50,12 +48,12 @@ pub struct ComputeSpec {
|
||||
// these, and instead set the "neon.tenant_id", "neon.timeline_id",
|
||||
// etc. GUCs in cluster.settings. TODO: Once the control plane has been
|
||||
// updated to fill these fields, we can make these non optional.
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub tenant_id: Option<TenantId>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
|
||||
pub timeline_id: Option<TimelineId>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
|
||||
pub pageserver_connstring: Option<String>,
|
||||
|
||||
#[serde(default)]
|
||||
pub safekeeper_connstrings: Vec<String>,
|
||||
|
||||
@@ -140,14 +138,13 @@ impl RemoteExtSpec {
|
||||
}
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
|
||||
pub enum ComputeMode {
|
||||
/// A read-write node
|
||||
#[default]
|
||||
Primary,
|
||||
/// A read-only node, pinned at a particular LSN
|
||||
Static(#[serde_as(as = "DisplayFromStr")] Lsn),
|
||||
Static(Lsn),
|
||||
/// A read-only node that follows the tip of the branch in hot standby mode
|
||||
///
|
||||
/// Future versions may want to distinguish between replicas with hot standby
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//!
|
||||
//! Shared code for consumption metics collection
|
||||
//!
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
use chrono::{DateTime, Utc};
|
||||
use rand::Rng;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
//! make sure that we use the same dep version everywhere.
|
||||
//! Otherwise, we might not see all metrics registered via
|
||||
//! a default registry.
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
use once_cell::sync::Lazy;
|
||||
use prometheus::core::{AtomicU64, Collector, GenericGauge, GenericGaugeVec};
|
||||
pub use prometheus::opts;
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
//! See docs/rfcs/025-generation-numbers.md
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use utils::id::{NodeId, TenantId};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
@@ -12,10 +11,8 @@ pub struct ReAttachRequest {
|
||||
pub node_id: NodeId,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ReAttachResponseTenant {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub id: TenantId,
|
||||
pub gen: u32,
|
||||
}
|
||||
@@ -25,10 +22,8 @@ pub struct ReAttachResponse {
|
||||
pub tenants: Vec<ReAttachResponseTenant>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ValidateRequestTenant {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub id: TenantId,
|
||||
pub gen: u32,
|
||||
}
|
||||
@@ -43,10 +38,8 @@ pub struct ValidateResponse {
|
||||
pub tenants: Vec<ValidateResponseTenant>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ValidateResponseTenant {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub id: TenantId,
|
||||
pub valid: bool,
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
use const_format::formatcp;
|
||||
|
||||
/// Public API types
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::{
|
||||
|
||||
use byteorder::{BigEndian, ReadBytesExt};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use serde_with::serde_as;
|
||||
use strum_macros;
|
||||
use utils::{
|
||||
completion,
|
||||
@@ -18,7 +18,7 @@ use utils::{
|
||||
|
||||
use crate::reltag::RelTag;
|
||||
use anyhow::bail;
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
|
||||
/// The state of a tenant in this pageserver.
|
||||
///
|
||||
@@ -174,25 +174,19 @@ pub enum TimelineState {
|
||||
Broken { reason: String, backtrace: String },
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub new_timeline_id: TimelineId,
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_timeline_id: Option<TimelineId>,
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_start_lsn: Option<Lsn>,
|
||||
pub pg_version: Option<u32>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct TenantCreateRequest {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub new_tenant_id: TenantId,
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
@@ -201,7 +195,6 @@ pub struct TenantCreateRequest {
|
||||
pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct TenantLoadRequest {
|
||||
@@ -278,31 +271,26 @@ pub struct LocationConfig {
|
||||
pub tenant_conf: TenantConfig,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct TenantCreateResponse(#[serde_as(as = "DisplayFromStr")] pub TenantId);
|
||||
pub struct TenantCreateResponse(pub TenantId);
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct StatusResponse {
|
||||
pub id: NodeId,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct TenantLocationConfigRequest {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub tenant_id: TenantId,
|
||||
#[serde(flatten)]
|
||||
pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct TenantConfigRequest {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub tenant_id: TenantId,
|
||||
#[serde(flatten)]
|
||||
pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
|
||||
@@ -374,10 +362,8 @@ pub enum TenantAttachmentStatus {
|
||||
Failed { reason: String },
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TenantInfo {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub id: TenantId,
|
||||
// NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
|
||||
pub state: TenantState,
|
||||
@@ -388,33 +374,22 @@ pub struct TenantInfo {
|
||||
}
|
||||
|
||||
/// This represents the output of the "timeline_detail" and "timeline_list" API calls.
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineInfo {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub tenant_id: TenantId,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub timeline_id: TimelineId,
|
||||
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_timeline_id: Option<TimelineId>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_lsn: Option<Lsn>,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub last_record_lsn: Lsn,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub prev_record_lsn: Option<Lsn>,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub latest_gc_cutoff_lsn: Lsn,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub disk_consistent_lsn: Lsn,
|
||||
|
||||
/// The LSN that we have succesfully uploaded to remote storage
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub remote_consistent_lsn: Lsn,
|
||||
|
||||
/// The LSN that we are advertizing to safekeepers
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub remote_consistent_lsn_visible: Lsn,
|
||||
|
||||
pub current_logical_size: Option<u64>, // is None when timeline is Unloaded
|
||||
@@ -426,7 +401,6 @@ pub struct TimelineInfo {
|
||||
pub timeline_dir_layer_file_size_sum: Option<u64>,
|
||||
|
||||
pub wal_source_connstr: Option<String>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub last_received_msg_lsn: Option<Lsn>,
|
||||
/// the timestamp (in microseconds) of the last received message
|
||||
pub last_received_msg_ts: Option<u128>,
|
||||
@@ -523,23 +497,13 @@ pub struct LayerAccessStats {
|
||||
pub residence_events_history: HistoryBufferWithDropCounter<LayerResidenceEvent, 16>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(tag = "kind")]
|
||||
pub enum InMemoryLayerInfo {
|
||||
Open {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
lsn_start: Lsn,
|
||||
},
|
||||
Frozen {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
lsn_start: Lsn,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
lsn_end: Lsn,
|
||||
},
|
||||
Open { lsn_start: Lsn },
|
||||
Frozen { lsn_start: Lsn, lsn_end: Lsn },
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(tag = "kind")]
|
||||
pub enum HistoricLayerInfo {
|
||||
@@ -547,9 +511,7 @@ pub enum HistoricLayerInfo {
|
||||
layer_file_name: String,
|
||||
layer_file_size: u64,
|
||||
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
lsn_start: Lsn,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
lsn_end: Lsn,
|
||||
remote: bool,
|
||||
access_stats: LayerAccessStats,
|
||||
@@ -558,7 +520,6 @@ pub enum HistoricLayerInfo {
|
||||
layer_file_name: String,
|
||||
layer_file_size: u64,
|
||||
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
lsn_start: Lsn,
|
||||
remote: bool,
|
||||
access_stats: LayerAccessStats,
|
||||
@@ -611,15 +572,18 @@ pub enum PagestreamFeMessage {
|
||||
Nblocks(PagestreamNblocksRequest),
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
NoOp,
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(Debug)]
|
||||
pub enum PagestreamBeMessage {
|
||||
Exists(PagestreamExistsResponse),
|
||||
Nblocks(PagestreamNblocksResponse),
|
||||
GetPage(PagestreamGetPageResponse),
|
||||
Error(PagestreamErrorResponse),
|
||||
DbSize(PagestreamDbSizeResponse),
|
||||
NoOp,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
@@ -718,6 +682,10 @@ impl PagestreamFeMessage {
|
||||
bytes.put_u64(req.lsn.0);
|
||||
bytes.put_u32(req.dbnode);
|
||||
}
|
||||
|
||||
Self::NoOp => {
|
||||
bytes.put_u8(4);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
@@ -768,6 +736,7 @@ impl PagestreamFeMessage {
|
||||
lsn: Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
})),
|
||||
4 => Ok(PagestreamFeMessage::NoOp),
|
||||
_ => bail!("unknown smgr message tag: {:?}", msg_tag),
|
||||
}
|
||||
}
|
||||
@@ -802,10 +771,46 @@ impl PagestreamBeMessage {
|
||||
bytes.put_u8(104); /* tag from pagestore_client.h */
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
Self::NoOp => {
|
||||
bytes.put_u8(105);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
|
||||
let mut buf = buf.reader();
|
||||
let msg_tag = buf.read_u8()?;
|
||||
match msg_tag {
|
||||
100 => todo!(),
|
||||
101 => todo!(),
|
||||
102 => {
|
||||
let buf = buf.get_ref();
|
||||
/* TODO use constant */
|
||||
if buf.len() == 8192 {
|
||||
Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
|
||||
page: buf.clone(),
|
||||
}))
|
||||
} else {
|
||||
anyhow::bail!("invalid page size: {}", buf.len());
|
||||
}
|
||||
}
|
||||
103 => {
|
||||
let buf = buf.get_ref();
|
||||
let cstr = std::ffi::CStr::from_bytes_until_nul(buf)?;
|
||||
let rust_str = cstr.to_str()?;
|
||||
Ok(PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: rust_str.to_owned(),
|
||||
}))
|
||||
}
|
||||
104 => todo!(),
|
||||
105 => {
|
||||
Ok(PagestreamBeMessage::NoOp)
|
||||
},
|
||||
_ => bail!("unknown tag: {:?}", msg_tag),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
//! To use, create PostgresBackend and run() it, passing the Handler
|
||||
//! implementation determining how to process the queries. Currently its API
|
||||
//! is rather narrow, but we can extend it once required.
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
use anyhow::Context;
|
||||
use bytes::Bytes;
|
||||
use futures::pin_mut;
|
||||
@@ -728,12 +730,17 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
|
||||
|
||||
trace!("got query {query_string:?}");
|
||||
if let Err(e) = handler.process_query(self, query_string).await {
|
||||
log_query_error(query_string, &e);
|
||||
let short_error = short_error(&e);
|
||||
self.write_message_noflush(&BeMessage::ErrorResponse(
|
||||
&short_error,
|
||||
Some(e.pg_error_code()),
|
||||
))?;
|
||||
match e {
|
||||
QueryError::Shutdown => return Ok(ProcessMsgResult::Break),
|
||||
e => {
|
||||
log_query_error(query_string, &e);
|
||||
let short_error = short_error(&e);
|
||||
self.write_message_noflush(&BeMessage::ErrorResponse(
|
||||
&short_error,
|
||||
Some(e.pg_error_code()),
|
||||
))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
self.write_message_noflush(&BeMessage::ReadyForQuery)?;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
use anyhow::{bail, Context};
|
||||
use itertools::Itertools;
|
||||
use std::borrow::Cow;
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
// modules included with the postgres_ffi macro depend on the types of the specific version's
|
||||
// types, and trigger a too eager lint.
|
||||
#![allow(clippy::duplicate_mod)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
use bytes::Bytes;
|
||||
use utils::bin_ser::SerializeError;
|
||||
@@ -20,6 +21,7 @@ macro_rules! postgres_ffi {
|
||||
pub mod bindings {
|
||||
// bindgen generates bindings for a lot of stuff we don't need
|
||||
#![allow(dead_code)]
|
||||
#![allow(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
include!(concat!(
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
//! Postgres protocol messages serialization-deserialization. See
|
||||
//! <https://www.postgresql.org/docs/devel/protocol-message-formats.html>
|
||||
//! on message formats.
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
pub mod framed;
|
||||
|
||||
|
||||
@@ -1,21 +1,18 @@
|
||||
//! Azure Blob Storage wrapper
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::num::NonZeroU32;
|
||||
use std::sync::Arc;
|
||||
use std::{borrow::Cow, collections::HashMap, io::Cursor};
|
||||
use std::{borrow::Cow, io::Cursor};
|
||||
|
||||
use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
|
||||
use anyhow::Result;
|
||||
use azure_core::request_options::{MaxResults, Metadata, Range};
|
||||
use azure_core::Header;
|
||||
use azure_identity::DefaultAzureCredential;
|
||||
use azure_storage::StorageCredentials;
|
||||
use azure_storage_blobs::prelude::ClientBuilder;
|
||||
use azure_storage_blobs::{
|
||||
blob::operations::GetBlobBuilder,
|
||||
prelude::{BlobClient, ContainerClient},
|
||||
};
|
||||
use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerClient};
|
||||
use futures_util::StreamExt;
|
||||
use http_types::StatusCode;
|
||||
use tokio::io::AsyncRead;
|
||||
@@ -112,16 +109,19 @@ impl AzureBlobStorage {
|
||||
|
||||
async fn download_for_builder(
|
||||
&self,
|
||||
metadata: StorageMetadata,
|
||||
builder: GetBlobBuilder,
|
||||
) -> Result<Download, DownloadError> {
|
||||
let mut response = builder.into_stream();
|
||||
|
||||
let mut metadata = HashMap::new();
|
||||
// TODO give proper streaming response instead of buffering into RAM
|
||||
// https://github.com/neondatabase/neon/issues/5563
|
||||
let mut buf = Vec::new();
|
||||
while let Some(part) = response.next().await {
|
||||
let part = part.map_err(to_download_error)?;
|
||||
if let Some(blob_meta) = part.blob.metadata {
|
||||
metadata.extend(blob_meta.iter().map(|(k, v)| (k.to_owned(), v.to_owned())));
|
||||
}
|
||||
let data = part
|
||||
.data
|
||||
.collect()
|
||||
@@ -131,28 +131,9 @@ impl AzureBlobStorage {
|
||||
}
|
||||
Ok(Download {
|
||||
download_stream: Box::pin(Cursor::new(buf)),
|
||||
metadata: Some(metadata),
|
||||
metadata: Some(StorageMetadata(metadata)),
|
||||
})
|
||||
}
|
||||
// TODO get rid of this function once we have metadata included in the response
|
||||
// https://github.com/Azure/azure-sdk-for-rust/issues/1439
|
||||
async fn get_metadata(
|
||||
&self,
|
||||
blob_client: &BlobClient,
|
||||
) -> Result<StorageMetadata, DownloadError> {
|
||||
let builder = blob_client.get_metadata();
|
||||
|
||||
let response = builder.into_future().await.map_err(to_download_error)?;
|
||||
let mut map = HashMap::new();
|
||||
|
||||
for md in response.metadata.iter() {
|
||||
map.insert(
|
||||
md.name().as_str().to_string(),
|
||||
md.value().as_str().to_string(),
|
||||
);
|
||||
}
|
||||
Ok(StorageMetadata(map))
|
||||
}
|
||||
|
||||
async fn permit(&self, kind: RequestKind) -> tokio::sync::SemaphorePermit<'_> {
|
||||
self.concurrency_limiter
|
||||
@@ -269,11 +250,9 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
let _permit = self.permit(RequestKind::Get).await;
|
||||
let blob_client = self.client.blob_client(self.relative_path_to_name(from));
|
||||
|
||||
let metadata = self.get_metadata(&blob_client).await?;
|
||||
|
||||
let builder = blob_client.get();
|
||||
|
||||
self.download_for_builder(metadata, builder).await
|
||||
self.download_for_builder(builder).await
|
||||
}
|
||||
|
||||
async fn download_byte_range(
|
||||
@@ -285,8 +264,6 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
let _permit = self.permit(RequestKind::Get).await;
|
||||
let blob_client = self.client.blob_client(self.relative_path_to_name(from));
|
||||
|
||||
let metadata = self.get_metadata(&blob_client).await?;
|
||||
|
||||
let mut builder = blob_client.get();
|
||||
|
||||
if let Some(end_exclusive) = end_exclusive {
|
||||
@@ -301,7 +278,7 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
builder = builder.range(Range::new(start_inclusive, end_exclusive));
|
||||
}
|
||||
|
||||
self.download_for_builder(metadata, builder).await
|
||||
self.download_for_builder(builder).await
|
||||
}
|
||||
|
||||
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
//! * [`s3_bucket`] uses AWS S3 bucket as an external storage
|
||||
//! * [`azure_blob`] allows to use Azure Blob storage as an external storage
|
||||
//!
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
mod azure_blob;
|
||||
mod local_fs;
|
||||
@@ -112,7 +114,7 @@ impl RemotePath {
|
||||
self.0.file_name()
|
||||
}
|
||||
|
||||
pub fn join(&self, segment: &Utf8Path) -> Self {
|
||||
pub fn join<P: AsRef<Utf8Path>>(&self, segment: P) -> Self {
|
||||
Self(self.0.join(segment))
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
use const_format::formatcp;
|
||||
|
||||
/// Public API types
|
||||
|
||||
@@ -1,23 +1,18 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
|
||||
use utils::{
|
||||
id::{NodeId, TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub tenant_id: TenantId,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub timeline_id: TimelineId,
|
||||
pub peer_ids: Option<Vec<NodeId>>,
|
||||
pub pg_version: u32,
|
||||
pub system_id: Option<u64>,
|
||||
pub wal_seg_size: Option<u32>,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub commit_lsn: Lsn,
|
||||
// If not passed, it is assigned to the beginning of commit_lsn segment.
|
||||
pub local_start_lsn: Option<Lsn>,
|
||||
@@ -28,7 +23,6 @@ fn lsn_invalid() -> Lsn {
|
||||
}
|
||||
|
||||
/// Data about safekeeper's timeline, mirrors broker.proto.
|
||||
#[serde_as]
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct SkTimelineInfo {
|
||||
/// Term.
|
||||
@@ -36,25 +30,19 @@ pub struct SkTimelineInfo {
|
||||
/// Term of the last entry.
|
||||
pub last_log_term: Option<u64>,
|
||||
/// LSN of the last record.
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
#[serde(default = "lsn_invalid")]
|
||||
pub flush_lsn: Lsn,
|
||||
/// Up to which LSN safekeeper regards its WAL as committed.
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
#[serde(default = "lsn_invalid")]
|
||||
pub commit_lsn: Lsn,
|
||||
/// LSN up to which safekeeper has backed WAL.
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
#[serde(default = "lsn_invalid")]
|
||||
pub backup_lsn: Lsn,
|
||||
/// LSN of last checkpoint uploaded by pageserver.
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
#[serde(default = "lsn_invalid")]
|
||||
pub remote_consistent_lsn: Lsn,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
#[serde(default = "lsn_invalid")]
|
||||
pub peer_horizon_lsn: Lsn,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
#[serde(default = "lsn_invalid")]
|
||||
pub local_start_lsn: Lsn,
|
||||
/// A connection string to use for WAL receiving.
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
//! Synthetic size calculation
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
mod calculation;
|
||||
pub mod svg;
|
||||
|
||||
@@ -32,6 +32,8 @@
|
||||
//! .init();
|
||||
//! }
|
||||
//! ```
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
use opentelemetry::sdk::Resource;
|
||||
use opentelemetry::KeyValue;
|
||||
|
||||
@@ -5,6 +5,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arc-swap.workspace = true
|
||||
sentry.workspace = true
|
||||
async-trait.workspace = true
|
||||
anyhow.workspace = true
|
||||
@@ -55,6 +56,7 @@ bytes.workspace = true
|
||||
criterion.workspace = true
|
||||
hex-literal.workspace = true
|
||||
camino-tempfile.workspace = true
|
||||
serde_assert.workspace = true
|
||||
|
||||
[[bench]]
|
||||
name = "benchmarks"
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
// For details about authentication see docs/authentication.md
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use serde;
|
||||
use std::fs;
|
||||
use std::{fs, sync::Arc};
|
||||
|
||||
use anyhow::Result;
|
||||
use camino::Utf8Path;
|
||||
@@ -9,7 +10,6 @@ use jsonwebtoken::{
|
||||
decode, encode, Algorithm, DecodingKey, EncodingKey, Header, TokenData, Validation,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
|
||||
use crate::id::TenantId;
|
||||
|
||||
@@ -32,11 +32,9 @@ pub enum Scope {
|
||||
}
|
||||
|
||||
/// JWT payload. See docs/authentication.md for the format
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
pub struct Claims {
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub tenant_id: Option<TenantId>,
|
||||
pub scope: Scope,
|
||||
}
|
||||
@@ -47,31 +45,88 @@ impl Claims {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SwappableJwtAuth(ArcSwap<JwtAuth>);
|
||||
|
||||
impl SwappableJwtAuth {
|
||||
pub fn new(jwt_auth: JwtAuth) -> Self {
|
||||
SwappableJwtAuth(ArcSwap::new(Arc::new(jwt_auth)))
|
||||
}
|
||||
pub fn swap(&self, jwt_auth: JwtAuth) {
|
||||
self.0.swap(Arc::new(jwt_auth));
|
||||
}
|
||||
pub fn decode(&self, token: &str) -> Result<TokenData<Claims>> {
|
||||
self.0.load().decode(token)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for SwappableJwtAuth {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Swappable({:?})", self.0.load())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct JwtAuth {
|
||||
decoding_key: DecodingKey,
|
||||
decoding_keys: Vec<DecodingKey>,
|
||||
validation: Validation,
|
||||
}
|
||||
|
||||
impl JwtAuth {
|
||||
pub fn new(decoding_key: DecodingKey) -> Self {
|
||||
pub fn new(decoding_keys: Vec<DecodingKey>) -> Self {
|
||||
let mut validation = Validation::default();
|
||||
validation.algorithms = vec![STORAGE_TOKEN_ALGORITHM];
|
||||
// The default 'required_spec_claims' is 'exp'. But we don't want to require
|
||||
// expiration.
|
||||
validation.required_spec_claims = [].into();
|
||||
Self {
|
||||
decoding_key,
|
||||
decoding_keys,
|
||||
validation,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_key_path(key_path: &Utf8Path) -> Result<Self> {
|
||||
let public_key = fs::read(key_path)?;
|
||||
Ok(Self::new(DecodingKey::from_ed_pem(&public_key)?))
|
||||
let metadata = key_path.metadata()?;
|
||||
let decoding_keys = if metadata.is_dir() {
|
||||
let mut keys = Vec::new();
|
||||
for entry in fs::read_dir(key_path)? {
|
||||
let path = entry?.path();
|
||||
if !path.is_file() {
|
||||
// Ignore directories (don't recurse)
|
||||
continue;
|
||||
}
|
||||
let public_key = fs::read(path)?;
|
||||
keys.push(DecodingKey::from_ed_pem(&public_key)?);
|
||||
}
|
||||
keys
|
||||
} else if metadata.is_file() {
|
||||
let public_key = fs::read(key_path)?;
|
||||
vec![DecodingKey::from_ed_pem(&public_key)?]
|
||||
} else {
|
||||
anyhow::bail!("path is neither a directory or a file")
|
||||
};
|
||||
if decoding_keys.is_empty() {
|
||||
anyhow::bail!("Configured for JWT auth with zero decoding keys. All JWT gated requests would be rejected.");
|
||||
}
|
||||
Ok(Self::new(decoding_keys))
|
||||
}
|
||||
|
||||
/// Attempt to decode the token with the internal decoding keys.
|
||||
///
|
||||
/// The function tries the stored decoding keys in succession,
|
||||
/// and returns the first yielding a successful result.
|
||||
/// If there is no working decoding key, it returns the last error.
|
||||
pub fn decode(&self, token: &str) -> Result<TokenData<Claims>> {
|
||||
Ok(decode(token, &self.decoding_key, &self.validation)?)
|
||||
let mut res = None;
|
||||
for decoding_key in &self.decoding_keys {
|
||||
res = Some(decode(token, decoding_key, &self.validation));
|
||||
if let Some(Ok(res)) = res {
|
||||
return Ok(res);
|
||||
}
|
||||
}
|
||||
if let Some(res) = res {
|
||||
res.map_err(anyhow::Error::new)
|
||||
} else {
|
||||
anyhow::bail!("no JWT decoding keys configured")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -132,7 +187,7 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
let encoded_eddsa = "eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzY29wZSI6InRlbmFudCIsInRlbmFudF9pZCI6IjNkMWY3NTk1YjQ2ODIzMDMwNGUwYjczY2VjYmNiMDgxIiwiaXNzIjoibmVvbi5jb250cm9scGxhbmUiLCJleHAiOjE3MDkyMDA4NzksImlhdCI6MTY3ODQ0MjQ3OX0.U3eA8j-uU-JnhzeO3EDHRuXLwkAUFCPxtGHEgw6p7Ccc3YRbFs2tmCdbD9PZEXP-XsxSeBQi1FY0YPcT3NXADw";
|
||||
|
||||
// Check it can be validated with the public key
|
||||
let auth = JwtAuth::new(DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519)?);
|
||||
let auth = JwtAuth::new(vec![DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519)?]);
|
||||
let claims_from_token = auth.decode(encoded_eddsa)?.claims;
|
||||
assert_eq!(claims_from_token, expected_claims);
|
||||
|
||||
@@ -149,7 +204,7 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
let encoded = encode_from_key_file(&claims, TEST_PRIV_KEY_ED25519)?;
|
||||
|
||||
// decode it back
|
||||
let auth = JwtAuth::new(DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519)?);
|
||||
let auth = JwtAuth::new(vec![DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519)?]);
|
||||
let decoded = auth.decode(&encoded)?;
|
||||
|
||||
assert_eq!(decoded.claims, claims);
|
||||
|
||||
@@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
|
||||
///
|
||||
/// See docs/rfcs/025-generation-numbers.md for detail on how generation
|
||||
/// numbers are used.
|
||||
#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
|
||||
#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
||||
pub enum Generation {
|
||||
// Generations with this magic value will not add a suffix to S3 keys, and will not
|
||||
// be included in persisted index_part.json. This value is only to be used
|
||||
|
||||
41
libs/utils/src/hex.rs
Normal file
41
libs/utils/src/hex.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
/// Useful type for asserting that expected bytes match reporting the bytes more readable
|
||||
/// array-syntax compatible hex bytes.
|
||||
///
|
||||
/// # Usage
|
||||
///
|
||||
/// ```
|
||||
/// use utils::Hex;
|
||||
///
|
||||
/// let actual = serialize_something();
|
||||
/// let expected = [0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64];
|
||||
///
|
||||
/// // the type implements PartialEq and on mismatch, both sides are printed in 16 wide multiline
|
||||
/// // output suffixed with an array style length for easier comparisons.
|
||||
/// assert_eq!(Hex(&actual), Hex(&expected));
|
||||
///
|
||||
/// // with `let expected = [0x68];` the error would had been:
|
||||
/// // assertion `left == right` failed
|
||||
/// // left: [0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64; 11]
|
||||
/// // right: [0x68; 1]
|
||||
/// # fn serialize_something() -> Vec<u8> { "hello world".as_bytes().to_vec() }
|
||||
/// ```
|
||||
#[derive(PartialEq)]
|
||||
pub struct Hex<'a>(pub &'a [u8]);
|
||||
|
||||
impl std::fmt::Debug for Hex<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "[")?;
|
||||
for (i, c) in self.0.chunks(16).enumerate() {
|
||||
if i > 0 && !c.is_empty() {
|
||||
writeln!(f, ", ")?;
|
||||
}
|
||||
for (j, b) in c.iter().enumerate() {
|
||||
if j > 0 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
write!(f, "0x{b:02x}")?;
|
||||
}
|
||||
}
|
||||
write!(f, "; {}]", self.0.len())
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::auth::{Claims, JwtAuth};
|
||||
use crate::auth::{Claims, SwappableJwtAuth};
|
||||
use crate::http::error::{api_error_handler, route_error_handler, ApiError};
|
||||
use anyhow::Context;
|
||||
use hyper::header::{HeaderName, AUTHORIZATION};
|
||||
@@ -14,6 +14,11 @@ use tracing::{self, debug, info, info_span, warn, Instrument};
|
||||
use std::future::Future;
|
||||
use std::str::FromStr;
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use std::io::Write as _;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
|
||||
static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"libmetrics_metric_handler_requests_total",
|
||||
@@ -146,94 +151,89 @@ impl Drop for RequestCancelled {
|
||||
}
|
||||
}
|
||||
|
||||
/// An [`std::io::Write`] implementation on top of a channel sending [`bytes::Bytes`] chunks.
|
||||
pub struct ChannelWriter {
|
||||
buffer: BytesMut,
|
||||
pub tx: mpsc::Sender<std::io::Result<Bytes>>,
|
||||
written: usize,
|
||||
}
|
||||
|
||||
impl ChannelWriter {
|
||||
pub fn new(buf_len: usize, tx: mpsc::Sender<std::io::Result<Bytes>>) -> Self {
|
||||
assert_ne!(buf_len, 0);
|
||||
ChannelWriter {
|
||||
// split about half off the buffer from the start, because we flush depending on
|
||||
// capacity. first flush will come sooner than without this, but now resizes will
|
||||
// have better chance of picking up the "other" half. not guaranteed of course.
|
||||
buffer: BytesMut::with_capacity(buf_len).split_off(buf_len / 2),
|
||||
tx,
|
||||
written: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn flush0(&mut self) -> std::io::Result<usize> {
|
||||
let n = self.buffer.len();
|
||||
if n == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
tracing::trace!(n, "flushing");
|
||||
let ready = self.buffer.split().freeze();
|
||||
|
||||
// not ideal to call from blocking code to block_on, but we are sure that this
|
||||
// operation does not spawn_blocking other tasks
|
||||
let res: Result<(), ()> = tokio::runtime::Handle::current().block_on(async {
|
||||
self.tx.send(Ok(ready)).await.map_err(|_| ())?;
|
||||
|
||||
// throttle sending to allow reuse of our buffer in `write`.
|
||||
self.tx.reserve().await.map_err(|_| ())?;
|
||||
|
||||
// now the response task has picked up the buffer and hopefully started
|
||||
// sending it to the client.
|
||||
Ok(())
|
||||
});
|
||||
if res.is_err() {
|
||||
return Err(std::io::ErrorKind::BrokenPipe.into());
|
||||
}
|
||||
self.written += n;
|
||||
Ok(n)
|
||||
}
|
||||
|
||||
pub fn flushed_bytes(&self) -> usize {
|
||||
self.written
|
||||
}
|
||||
}
|
||||
|
||||
impl std::io::Write for ChannelWriter {
|
||||
fn write(&mut self, mut buf: &[u8]) -> std::io::Result<usize> {
|
||||
let remaining = self.buffer.capacity() - self.buffer.len();
|
||||
|
||||
let out_of_space = remaining < buf.len();
|
||||
|
||||
let original_len = buf.len();
|
||||
|
||||
if out_of_space {
|
||||
let can_still_fit = buf.len() - remaining;
|
||||
self.buffer.extend_from_slice(&buf[..can_still_fit]);
|
||||
buf = &buf[can_still_fit..];
|
||||
self.flush0()?;
|
||||
}
|
||||
|
||||
// assume that this will often under normal operation just move the pointer back to the
|
||||
// beginning of allocation, because previous split off parts are already sent and
|
||||
// dropped.
|
||||
self.buffer.extend_from_slice(buf);
|
||||
Ok(original_len)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
self.flush0().map(|_| ())
|
||||
}
|
||||
}
|
||||
|
||||
async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use std::io::Write as _;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
|
||||
SERVE_METRICS_COUNT.inc();
|
||||
|
||||
/// An [`std::io::Write`] implementation on top of a channel sending [`bytes::Bytes`] chunks.
|
||||
struct ChannelWriter {
|
||||
buffer: BytesMut,
|
||||
tx: mpsc::Sender<std::io::Result<Bytes>>,
|
||||
written: usize,
|
||||
}
|
||||
|
||||
impl ChannelWriter {
|
||||
fn new(buf_len: usize, tx: mpsc::Sender<std::io::Result<Bytes>>) -> Self {
|
||||
assert_ne!(buf_len, 0);
|
||||
ChannelWriter {
|
||||
// split about half off the buffer from the start, because we flush depending on
|
||||
// capacity. first flush will come sooner than without this, but now resizes will
|
||||
// have better chance of picking up the "other" half. not guaranteed of course.
|
||||
buffer: BytesMut::with_capacity(buf_len).split_off(buf_len / 2),
|
||||
tx,
|
||||
written: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn flush0(&mut self) -> std::io::Result<usize> {
|
||||
let n = self.buffer.len();
|
||||
if n == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
tracing::trace!(n, "flushing");
|
||||
let ready = self.buffer.split().freeze();
|
||||
|
||||
// not ideal to call from blocking code to block_on, but we are sure that this
|
||||
// operation does not spawn_blocking other tasks
|
||||
let res: Result<(), ()> = tokio::runtime::Handle::current().block_on(async {
|
||||
self.tx.send(Ok(ready)).await.map_err(|_| ())?;
|
||||
|
||||
// throttle sending to allow reuse of our buffer in `write`.
|
||||
self.tx.reserve().await.map_err(|_| ())?;
|
||||
|
||||
// now the response task has picked up the buffer and hopefully started
|
||||
// sending it to the client.
|
||||
Ok(())
|
||||
});
|
||||
if res.is_err() {
|
||||
return Err(std::io::ErrorKind::BrokenPipe.into());
|
||||
}
|
||||
self.written += n;
|
||||
Ok(n)
|
||||
}
|
||||
|
||||
fn flushed_bytes(&self) -> usize {
|
||||
self.written
|
||||
}
|
||||
}
|
||||
|
||||
impl std::io::Write for ChannelWriter {
|
||||
fn write(&mut self, mut buf: &[u8]) -> std::io::Result<usize> {
|
||||
let remaining = self.buffer.capacity() - self.buffer.len();
|
||||
|
||||
let out_of_space = remaining < buf.len();
|
||||
|
||||
let original_len = buf.len();
|
||||
|
||||
if out_of_space {
|
||||
let can_still_fit = buf.len() - remaining;
|
||||
self.buffer.extend_from_slice(&buf[..can_still_fit]);
|
||||
buf = &buf[can_still_fit..];
|
||||
self.flush0()?;
|
||||
}
|
||||
|
||||
// assume that this will often under normal operation just move the pointer back to the
|
||||
// beginning of allocation, because previous split off parts are already sent and
|
||||
// dropped.
|
||||
self.buffer.extend_from_slice(buf);
|
||||
Ok(original_len)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
self.flush0().map(|_| ())
|
||||
}
|
||||
}
|
||||
|
||||
let started_at = std::time::Instant::now();
|
||||
|
||||
let (tx, rx) = mpsc::channel(1);
|
||||
@@ -389,7 +389,7 @@ fn parse_token(header_value: &str) -> Result<&str, ApiError> {
|
||||
}
|
||||
|
||||
pub fn auth_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
|
||||
provide_auth: fn(&Request<Body>) -> Option<&JwtAuth>,
|
||||
provide_auth: fn(&Request<Body>) -> Option<&SwappableJwtAuth>,
|
||||
) -> Middleware<B, ApiError> {
|
||||
Middleware::pre(move |req| async move {
|
||||
if let Some(auth) = provide_auth(&req) {
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::{fmt, str::FromStr};
|
||||
use anyhow::Context;
|
||||
use hex::FromHex;
|
||||
use rand::Rng;
|
||||
use serde::de::Visitor;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
|
||||
@@ -17,12 +18,74 @@ pub enum IdError {
|
||||
///
|
||||
/// NOTE: It (de)serializes as an array of hex bytes, so the string representation would look
|
||||
/// like `[173,80,132,115,129,226,72,254,170,201,135,108,199,26,228,24]`.
|
||||
///
|
||||
/// Use `#[serde_as(as = "DisplayFromStr")]` to (de)serialize it as hex string instead: `ad50847381e248feaac9876cc71ae418`.
|
||||
/// Check the `serde_with::serde_as` documentation for options for more complex types.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
struct Id([u8; 16]);
|
||||
|
||||
impl Serialize for Id {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
if serializer.is_human_readable() {
|
||||
serializer.collect_str(self)
|
||||
} else {
|
||||
self.0.serialize(serializer)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Id {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
struct IdVisitor {
|
||||
is_human_readable_deserializer: bool,
|
||||
}
|
||||
|
||||
impl<'de> Visitor<'de> for IdVisitor {
|
||||
type Value = Id;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.is_human_readable_deserializer {
|
||||
formatter.write_str("value in form of hex string")
|
||||
} else {
|
||||
formatter.write_str("value in form of integer array([u8; 16])")
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
|
||||
where
|
||||
A: serde::de::SeqAccess<'de>,
|
||||
{
|
||||
let s = serde::de::value::SeqAccessDeserializer::new(seq);
|
||||
let id: [u8; 16] = Deserialize::deserialize(s)?;
|
||||
Ok(Id::from(id))
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: serde::de::Error,
|
||||
{
|
||||
Id::from_str(v).map_err(E::custom)
|
||||
}
|
||||
}
|
||||
|
||||
if deserializer.is_human_readable() {
|
||||
deserializer.deserialize_str(IdVisitor {
|
||||
is_human_readable_deserializer: true,
|
||||
})
|
||||
} else {
|
||||
deserializer.deserialize_tuple(
|
||||
16,
|
||||
IdVisitor {
|
||||
is_human_readable_deserializer: false,
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Id {
|
||||
pub fn get_from_buf(buf: &mut impl bytes::Buf) -> Id {
|
||||
let mut arr = [0u8; 16];
|
||||
@@ -57,6 +120,8 @@ impl Id {
|
||||
chunk[0] = HEX[((b >> 4) & 0xf) as usize];
|
||||
chunk[1] = HEX[(b & 0xf) as usize];
|
||||
}
|
||||
|
||||
// SAFETY: vec constructed out of `HEX`, it can only be ascii
|
||||
unsafe { String::from_utf8_unchecked(buf) }
|
||||
}
|
||||
}
|
||||
@@ -308,3 +373,112 @@ impl fmt::Display for NodeId {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_assert::{Deserializer, Serializer, Token, Tokens};
|
||||
|
||||
use crate::bin_ser::BeSer;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_id_serde_non_human_readable() {
|
||||
let original_id = Id([
|
||||
173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,
|
||||
]);
|
||||
let expected_tokens = Tokens(vec![
|
||||
Token::Tuple { len: 16 },
|
||||
Token::U8(173),
|
||||
Token::U8(80),
|
||||
Token::U8(132),
|
||||
Token::U8(115),
|
||||
Token::U8(129),
|
||||
Token::U8(226),
|
||||
Token::U8(72),
|
||||
Token::U8(254),
|
||||
Token::U8(170),
|
||||
Token::U8(201),
|
||||
Token::U8(135),
|
||||
Token::U8(108),
|
||||
Token::U8(199),
|
||||
Token::U8(26),
|
||||
Token::U8(228),
|
||||
Token::U8(24),
|
||||
Token::TupleEnd,
|
||||
]);
|
||||
|
||||
let serializer = Serializer::builder().is_human_readable(false).build();
|
||||
let serialized_tokens = original_id.serialize(&serializer).unwrap();
|
||||
assert_eq!(serialized_tokens, expected_tokens);
|
||||
|
||||
let mut deserializer = Deserializer::builder()
|
||||
.is_human_readable(false)
|
||||
.tokens(serialized_tokens)
|
||||
.build();
|
||||
let deserialized_id = Id::deserialize(&mut deserializer).unwrap();
|
||||
assert_eq!(deserialized_id, original_id);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_id_serde_human_readable() {
|
||||
let original_id = Id([
|
||||
173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,
|
||||
]);
|
||||
let expected_tokens = Tokens(vec![Token::Str(String::from(
|
||||
"ad50847381e248feaac9876cc71ae418",
|
||||
))]);
|
||||
|
||||
let serializer = Serializer::builder().is_human_readable(true).build();
|
||||
let serialized_tokens = original_id.serialize(&serializer).unwrap();
|
||||
assert_eq!(serialized_tokens, expected_tokens);
|
||||
|
||||
let mut deserializer = Deserializer::builder()
|
||||
.is_human_readable(true)
|
||||
.tokens(Tokens(vec![Token::Str(String::from(
|
||||
"ad50847381e248feaac9876cc71ae418",
|
||||
))]))
|
||||
.build();
|
||||
assert_eq!(Id::deserialize(&mut deserializer).unwrap(), original_id);
|
||||
}
|
||||
|
||||
macro_rules! roundtrip_type {
|
||||
($type:ty, $expected_bytes:expr) => {{
|
||||
let expected_bytes: [u8; 16] = $expected_bytes;
|
||||
let original_id = <$type>::from(expected_bytes);
|
||||
|
||||
let ser_bytes = original_id.ser().unwrap();
|
||||
assert_eq!(ser_bytes, expected_bytes);
|
||||
|
||||
let des_id = <$type>::des(&ser_bytes).unwrap();
|
||||
assert_eq!(des_id, original_id);
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_id_bincode_serde() {
|
||||
let expected_bytes = [
|
||||
173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,
|
||||
];
|
||||
|
||||
roundtrip_type!(Id, expected_bytes);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tenant_id_bincode_serde() {
|
||||
let expected_bytes = [
|
||||
173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,
|
||||
];
|
||||
|
||||
roundtrip_type!(TenantId, expected_bytes);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timeline_id_bincode_serde() {
|
||||
let expected_bytes = [
|
||||
173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,
|
||||
];
|
||||
|
||||
roundtrip_type!(TimelineId, expected_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
//! `utils` is intended to be a place to put code that is shared
|
||||
//! between other crates in this repository.
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
pub mod backoff;
|
||||
|
||||
@@ -24,6 +25,10 @@ pub mod auth;
|
||||
|
||||
// utility functions and helper traits for unified unique id generation/serialization etc.
|
||||
pub mod id;
|
||||
|
||||
mod hex;
|
||||
pub use hex::Hex;
|
||||
|
||||
// http endpoint utils
|
||||
pub mod http;
|
||||
|
||||
@@ -73,6 +78,9 @@ pub mod completion;
|
||||
/// Reporting utilities
|
||||
pub mod error;
|
||||
|
||||
/// async timeout helper
|
||||
pub mod timeout;
|
||||
|
||||
pub mod sync;
|
||||
|
||||
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#![warn(missing_docs)]
|
||||
|
||||
use camino::Utf8Path;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde::{de::Visitor, Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
use std::ops::{Add, AddAssign};
|
||||
use std::str::FromStr;
|
||||
@@ -13,10 +13,114 @@ use crate::seqwait::MonotonicCounter;
|
||||
pub const XLOG_BLCKSZ: u32 = 8192;
|
||||
|
||||
/// A Postgres LSN (Log Sequence Number), also known as an XLogRecPtr
|
||||
#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Hash, Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Hash)]
|
||||
pub struct Lsn(pub u64);
|
||||
|
||||
impl Serialize for Lsn {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
if serializer.is_human_readable() {
|
||||
serializer.collect_str(self)
|
||||
} else {
|
||||
self.0.serialize(serializer)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Lsn {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
struct LsnVisitor {
|
||||
is_human_readable_deserializer: bool,
|
||||
}
|
||||
|
||||
impl<'de> Visitor<'de> for LsnVisitor {
|
||||
type Value = Lsn;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.is_human_readable_deserializer {
|
||||
formatter.write_str(
|
||||
"value in form of hex string({upper_u32_hex}/{lower_u32_hex}) representing u64 integer",
|
||||
)
|
||||
} else {
|
||||
formatter.write_str("value in form of integer(u64)")
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
|
||||
where
|
||||
E: serde::de::Error,
|
||||
{
|
||||
Ok(Lsn(v))
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: serde::de::Error,
|
||||
{
|
||||
Lsn::from_str(v).map_err(|e| E::custom(e))
|
||||
}
|
||||
}
|
||||
|
||||
if deserializer.is_human_readable() {
|
||||
deserializer.deserialize_str(LsnVisitor {
|
||||
is_human_readable_deserializer: true,
|
||||
})
|
||||
} else {
|
||||
deserializer.deserialize_u64(LsnVisitor {
|
||||
is_human_readable_deserializer: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Allows (de)serialization of an `Lsn` always as `u64`.
|
||||
///
|
||||
/// ### Example
|
||||
///
|
||||
/// ```rust
|
||||
/// # use serde::{Serialize, Deserialize};
|
||||
/// use utils::lsn::Lsn;
|
||||
///
|
||||
/// #[derive(PartialEq, Serialize, Deserialize, Debug)]
|
||||
/// struct Foo {
|
||||
/// #[serde(with = "utils::lsn::serde_as_u64")]
|
||||
/// always_u64: Lsn,
|
||||
/// }
|
||||
///
|
||||
/// let orig = Foo { always_u64: Lsn(1234) };
|
||||
///
|
||||
/// let res = serde_json::to_string(&orig).unwrap();
|
||||
/// assert_eq!(res, r#"{"always_u64":1234}"#);
|
||||
///
|
||||
/// let foo = serde_json::from_str::<Foo>(&res).unwrap();
|
||||
/// assert_eq!(foo, orig);
|
||||
/// ```
|
||||
///
|
||||
pub mod serde_as_u64 {
|
||||
use super::Lsn;
|
||||
|
||||
/// Serializes the Lsn as u64 disregarding the human readability of the format.
|
||||
///
|
||||
/// Meant to be used via `#[serde(with = "...")]` or `#[serde(serialize_with = "...")]`.
|
||||
pub fn serialize<S: serde::Serializer>(lsn: &Lsn, serializer: S) -> Result<S::Ok, S::Error> {
|
||||
use serde::Serialize;
|
||||
lsn.0.serialize(serializer)
|
||||
}
|
||||
|
||||
/// Deserializes the Lsn as u64 disregarding the human readability of the format.
|
||||
///
|
||||
/// Meant to be used via `#[serde(with = "...")]` or `#[serde(deserialize_with = "...")]`.
|
||||
pub fn deserialize<'de, D: serde::Deserializer<'de>>(deserializer: D) -> Result<Lsn, D::Error> {
|
||||
use serde::Deserialize;
|
||||
u64::deserialize(deserializer).map(Lsn)
|
||||
}
|
||||
}
|
||||
|
||||
/// We tried to parse an LSN from a string, but failed
|
||||
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
|
||||
#[error("LsnParseError")]
|
||||
@@ -264,8 +368,13 @@ impl MonotonicCounter<Lsn> for RecordLsn {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::bin_ser::BeSer;
|
||||
|
||||
use super::*;
|
||||
|
||||
use serde::ser::Serialize;
|
||||
use serde_assert::{Deserializer, Serializer, Token, Tokens};
|
||||
|
||||
#[test]
|
||||
fn test_lsn_strings() {
|
||||
assert_eq!("12345678/AAAA5555".parse(), Ok(Lsn(0x12345678AAAA5555)));
|
||||
@@ -341,4 +450,95 @@ mod tests {
|
||||
assert_eq!(lsn.fetch_max(Lsn(6000)), Lsn(5678));
|
||||
assert_eq!(lsn.fetch_max(Lsn(5000)), Lsn(6000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lsn_serde() {
|
||||
let original_lsn = Lsn(0x0123456789abcdef);
|
||||
let expected_readable_tokens = Tokens(vec![Token::U64(0x0123456789abcdef)]);
|
||||
let expected_non_readable_tokens =
|
||||
Tokens(vec![Token::Str(String::from("1234567/89ABCDEF"))]);
|
||||
|
||||
// Testing human_readable ser/de
|
||||
let serializer = Serializer::builder().is_human_readable(false).build();
|
||||
let readable_ser_tokens = original_lsn.serialize(&serializer).unwrap();
|
||||
assert_eq!(readable_ser_tokens, expected_readable_tokens);
|
||||
|
||||
let mut deserializer = Deserializer::builder()
|
||||
.is_human_readable(false)
|
||||
.tokens(readable_ser_tokens)
|
||||
.build();
|
||||
let des_lsn = Lsn::deserialize(&mut deserializer).unwrap();
|
||||
assert_eq!(des_lsn, original_lsn);
|
||||
|
||||
// Testing NON human_readable ser/de
|
||||
let serializer = Serializer::builder().is_human_readable(true).build();
|
||||
let non_readable_ser_tokens = original_lsn.serialize(&serializer).unwrap();
|
||||
assert_eq!(non_readable_ser_tokens, expected_non_readable_tokens);
|
||||
|
||||
let mut deserializer = Deserializer::builder()
|
||||
.is_human_readable(true)
|
||||
.tokens(non_readable_ser_tokens)
|
||||
.build();
|
||||
let des_lsn = Lsn::deserialize(&mut deserializer).unwrap();
|
||||
assert_eq!(des_lsn, original_lsn);
|
||||
|
||||
// Testing mismatching ser/de
|
||||
let serializer = Serializer::builder().is_human_readable(false).build();
|
||||
let non_readable_ser_tokens = original_lsn.serialize(&serializer).unwrap();
|
||||
|
||||
let mut deserializer = Deserializer::builder()
|
||||
.is_human_readable(true)
|
||||
.tokens(non_readable_ser_tokens)
|
||||
.build();
|
||||
Lsn::deserialize(&mut deserializer).unwrap_err();
|
||||
|
||||
let serializer = Serializer::builder().is_human_readable(true).build();
|
||||
let readable_ser_tokens = original_lsn.serialize(&serializer).unwrap();
|
||||
|
||||
let mut deserializer = Deserializer::builder()
|
||||
.is_human_readable(false)
|
||||
.tokens(readable_ser_tokens)
|
||||
.build();
|
||||
Lsn::deserialize(&mut deserializer).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lsn_ensure_roundtrip() {
|
||||
let original_lsn = Lsn(0xaaaabbbb);
|
||||
|
||||
let serializer = Serializer::builder().is_human_readable(false).build();
|
||||
let ser_tokens = original_lsn.serialize(&serializer).unwrap();
|
||||
|
||||
let mut deserializer = Deserializer::builder()
|
||||
.is_human_readable(false)
|
||||
.tokens(ser_tokens)
|
||||
.build();
|
||||
|
||||
let des_lsn = Lsn::deserialize(&mut deserializer).unwrap();
|
||||
assert_eq!(des_lsn, original_lsn);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lsn_bincode_serde() {
|
||||
let lsn = Lsn(0x0123456789abcdef);
|
||||
let expected_bytes = [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef];
|
||||
|
||||
let ser_bytes = lsn.ser().unwrap();
|
||||
assert_eq!(ser_bytes, expected_bytes);
|
||||
|
||||
let des_lsn = Lsn::des(&ser_bytes).unwrap();
|
||||
assert_eq!(des_lsn, lsn);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lsn_bincode_ensure_roundtrip() {
|
||||
let original_lsn = Lsn(0x01_02_03_04_05_06_07_08);
|
||||
let expected_bytes = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
|
||||
|
||||
let ser_bytes = original_lsn.ser().unwrap();
|
||||
assert_eq!(ser_bytes, expected_bytes);
|
||||
|
||||
let des_lsn = Lsn::des(&ser_bytes).unwrap();
|
||||
assert_eq!(des_lsn, original_lsn);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ use std::time::{Duration, SystemTime};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use pq_proto::{read_cstr, PG_EPOCH};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use tracing::{trace, warn};
|
||||
|
||||
use crate::lsn::Lsn;
|
||||
@@ -15,21 +14,17 @@ use crate::lsn::Lsn;
|
||||
///
|
||||
/// serde Serialize is used only for human readable dump to json (e.g. in
|
||||
/// safekeepers debug_dump).
|
||||
#[serde_as]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct PageserverFeedback {
|
||||
/// Last known size of the timeline. Used to enforce timeline size limit.
|
||||
pub current_timeline_size: u64,
|
||||
/// LSN last received and ingested by the pageserver. Controls backpressure.
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub last_received_lsn: Lsn,
|
||||
/// LSN up to which data is persisted by the pageserver to its local disc.
|
||||
/// Controls backpressure.
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub disk_consistent_lsn: Lsn,
|
||||
/// LSN up to which data is persisted by the pageserver on s3; safekeepers
|
||||
/// consider WAL before it can be removed.
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub remote_consistent_lsn: Lsn,
|
||||
// Serialize with RFC3339 format.
|
||||
#[serde(with = "serde_systemtime")]
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/// Immediately terminate the calling process without calling
|
||||
/// atexit callbacks, C runtime destructors etc. We mainly use
|
||||
/// this to protect coverage data from concurrent writes.
|
||||
pub fn exit_now(code: u8) {
|
||||
pub fn exit_now(code: u8) -> ! {
|
||||
// SAFETY: exiting is safe, the ffi is not safe
|
||||
unsafe { nix::libc::_exit(code as _) };
|
||||
}
|
||||
|
||||
@@ -1 +1,3 @@
|
||||
pub mod heavier_once_cell;
|
||||
|
||||
pub mod gate;
|
||||
|
||||
158
libs/utils/src/sync/gate.rs
Normal file
158
libs/utils/src/sync/gate.rs
Normal file
@@ -0,0 +1,158 @@
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
/// Gates are a concurrency helper, primarily used for implementing safe shutdown.
|
||||
///
|
||||
/// Users of a resource call `enter()` to acquire a GateGuard, and the owner of
|
||||
/// the resource calls `close()` when they want to ensure that all holders of guards
|
||||
/// have released them, and that no future guards will be issued.
|
||||
pub struct Gate {
|
||||
/// Each caller of enter() takes one unit from the semaphore. In close(), we
|
||||
/// take all the units to ensure all GateGuards are destroyed.
|
||||
sem: Arc<tokio::sync::Semaphore>,
|
||||
|
||||
/// For observability only: a name that will be used to log warnings if a particular
|
||||
/// gate is holding up shutdown
|
||||
name: String,
|
||||
}
|
||||
|
||||
/// RAII guard for a [`Gate`]: as long as this exists, calls to [`Gate::close`] will
|
||||
/// not complete.
|
||||
#[derive(Debug)]
|
||||
pub struct GateGuard(tokio::sync::OwnedSemaphorePermit);
|
||||
|
||||
/// Observability helper: every `warn_period`, emit a log warning that we're still waiting on this gate
|
||||
async fn warn_if_stuck<Fut: std::future::Future>(
|
||||
fut: Fut,
|
||||
name: &str,
|
||||
warn_period: std::time::Duration,
|
||||
) -> <Fut as std::future::Future>::Output {
|
||||
let started = std::time::Instant::now();
|
||||
|
||||
let mut fut = std::pin::pin!(fut);
|
||||
|
||||
loop {
|
||||
match tokio::time::timeout(warn_period, &mut fut).await {
|
||||
Ok(ret) => return ret,
|
||||
Err(_) => {
|
||||
tracing::warn!(
|
||||
gate = name,
|
||||
elapsed_ms = started.elapsed().as_millis(),
|
||||
"still waiting, taking longer than expected..."
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum GateError {
|
||||
GateClosed,
|
||||
}
|
||||
|
||||
impl Gate {
|
||||
const MAX_UNITS: u32 = u32::MAX;
|
||||
|
||||
pub fn new(name: String) -> Self {
|
||||
Self {
|
||||
sem: Arc::new(tokio::sync::Semaphore::new(Self::MAX_UNITS as usize)),
|
||||
name,
|
||||
}
|
||||
}
|
||||
|
||||
/// Acquire a guard that will prevent close() calls from completing. If close()
|
||||
/// was already called, this will return an error which should be interpreted
|
||||
/// as "shutting down".
|
||||
///
|
||||
/// This function would typically be used from e.g. request handlers. While holding
|
||||
/// the guard returned from this function, it is important to respect a CancellationToken
|
||||
/// to avoid blocking close() indefinitely: typically types that contain a Gate will
|
||||
/// also contain a CancellationToken.
|
||||
pub fn enter(&self) -> Result<GateGuard, GateError> {
|
||||
self.sem
|
||||
.clone()
|
||||
.try_acquire_owned()
|
||||
.map(GateGuard)
|
||||
.map_err(|_| GateError::GateClosed)
|
||||
}
|
||||
|
||||
/// Types with a shutdown() method and a gate should call this method at the
|
||||
/// end of shutdown, to ensure that all GateGuard holders are done.
|
||||
///
|
||||
/// This will wait for all guards to be destroyed. For this to complete promptly, it is
|
||||
/// important that the holders of such guards are respecting a CancellationToken which has
|
||||
/// been cancelled before entering this function.
|
||||
pub async fn close(&self) {
|
||||
warn_if_stuck(self.do_close(), &self.name, Duration::from_millis(1000)).await
|
||||
}
|
||||
|
||||
/// Check if [`Self::close()`] has finished waiting for all [`Self::enter()`] users to finish. This
|
||||
/// is usually analoguous for "Did shutdown finish?" for types that include a Gate, whereas checking
|
||||
/// the CancellationToken on such types is analogous to "Did shutdown start?"
|
||||
pub fn close_complete(&self) -> bool {
|
||||
self.sem.is_closed()
|
||||
}
|
||||
|
||||
async fn do_close(&self) {
|
||||
tracing::debug!(gate = self.name, "Closing Gate...");
|
||||
match self.sem.acquire_many(Self::MAX_UNITS).await {
|
||||
Ok(_units) => {
|
||||
// While holding all units, close the semaphore. All subsequent calls to enter() will fail.
|
||||
self.sem.close();
|
||||
}
|
||||
Err(_) => {
|
||||
// Semaphore closed: we are the only function that can do this, so it indicates a double-call.
|
||||
// This is legal. Timeline::shutdown for example is not protected from being called more than
|
||||
// once.
|
||||
tracing::debug!(gate = self.name, "Double close")
|
||||
}
|
||||
}
|
||||
tracing::debug!(gate = self.name, "Closed Gate.")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use futures::FutureExt;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_idle_gate() {
|
||||
// Having taken no gates, we should not be blocked in close
|
||||
let gate = Gate::new("test".to_string());
|
||||
gate.close().await;
|
||||
|
||||
// If a guard is dropped before entering, close should not be blocked
|
||||
let gate = Gate::new("test".to_string());
|
||||
let guard = gate.enter().unwrap();
|
||||
drop(guard);
|
||||
gate.close().await;
|
||||
|
||||
// Entering a closed guard fails
|
||||
gate.enter().expect_err("enter should fail after close");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_busy_gate() {
|
||||
let gate = Gate::new("test".to_string());
|
||||
|
||||
let guard = gate.enter().unwrap();
|
||||
|
||||
let mut close_fut = std::pin::pin!(gate.close());
|
||||
|
||||
// Close should be blocked
|
||||
assert!(close_fut.as_mut().now_or_never().is_none());
|
||||
|
||||
// Attempting to enter() should fail, even though close isn't done yet.
|
||||
gate.enter()
|
||||
.expect_err("enter should fail after entering close");
|
||||
|
||||
drop(guard);
|
||||
|
||||
// Guard is gone, close should finish
|
||||
assert!(close_fut.as_mut().now_or_never().is_some());
|
||||
|
||||
// Attempting to enter() is still forbidden
|
||||
gate.enter().expect_err("enter should fail finishing close");
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,7 @@
|
||||
use std::sync::{Arc, Mutex, MutexGuard};
|
||||
use std::sync::{
|
||||
atomic::{AtomicUsize, Ordering},
|
||||
Arc, Mutex, MutexGuard,
|
||||
};
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
/// Custom design like [`tokio::sync::OnceCell`] but using [`OwnedSemaphorePermit`] instead of
|
||||
@@ -10,6 +13,7 @@ use tokio::sync::Semaphore;
|
||||
/// [`OwnedSemaphorePermit`]: tokio::sync::OwnedSemaphorePermit
|
||||
pub struct OnceCell<T> {
|
||||
inner: Mutex<Inner<T>>,
|
||||
initializers: AtomicUsize,
|
||||
}
|
||||
|
||||
impl<T> Default for OnceCell<T> {
|
||||
@@ -17,6 +21,7 @@ impl<T> Default for OnceCell<T> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
inner: Default::default(),
|
||||
initializers: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -49,6 +54,7 @@ impl<T> OnceCell<T> {
|
||||
init_semaphore: Arc::new(sem),
|
||||
value: Some(value),
|
||||
}),
|
||||
initializers: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,7 +77,11 @@ impl<T> OnceCell<T> {
|
||||
guard.init_semaphore.clone()
|
||||
};
|
||||
|
||||
let permit = sem.acquire_owned().await;
|
||||
let permit = {
|
||||
// increment the count for the duration of queued
|
||||
let _guard = CountWaitingInitializers::start(self);
|
||||
sem.acquire_owned().await
|
||||
};
|
||||
|
||||
match permit {
|
||||
Ok(permit) => {
|
||||
@@ -100,12 +110,13 @@ impl<T> OnceCell<T> {
|
||||
///
|
||||
/// If the inner has already been initialized.
|
||||
pub fn set(&self, value: T, _permit: InitPermit) -> Guard<'_, T> {
|
||||
// cannot assert that this permit is for self.inner.semaphore
|
||||
let guard = self.inner.lock().unwrap();
|
||||
|
||||
// cannot assert that this permit is for self.inner.semaphore, but we can assert it cannot
|
||||
// give more permits right now.
|
||||
if guard.init_semaphore.try_acquire().is_ok() {
|
||||
drop(guard);
|
||||
panic!("semaphore is of wrong origin");
|
||||
panic!("permit is of wrong origin");
|
||||
}
|
||||
|
||||
Self::set0(value, guard)
|
||||
@@ -130,6 +141,28 @@ impl<T> OnceCell<T> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the number of [`Self::get_or_init`] calls waiting for initialization to complete.
|
||||
pub fn initializer_count(&self) -> usize {
|
||||
self.initializers.load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
/// DropGuard counter for queued tasks waiting to initialize, mainly accessible for the
|
||||
/// initializing task for example at the end of initialization.
|
||||
struct CountWaitingInitializers<'a, T>(&'a OnceCell<T>);
|
||||
|
||||
impl<'a, T> CountWaitingInitializers<'a, T> {
|
||||
fn start(target: &'a OnceCell<T>) -> Self {
|
||||
target.initializers.fetch_add(1, Ordering::Relaxed);
|
||||
CountWaitingInitializers(target)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> Drop for CountWaitingInitializers<'a, T> {
|
||||
fn drop(&mut self) {
|
||||
self.0.initializers.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// Uninteresting guard object to allow short-lived access to inspect or clone the held,
|
||||
|
||||
37
libs/utils/src/timeout.rs
Normal file
37
libs/utils/src/timeout.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
pub enum TimeoutCancellableError {
|
||||
Timeout,
|
||||
Cancelled,
|
||||
}
|
||||
|
||||
/// Wrap [`tokio::time::timeout`] with a CancellationToken.
|
||||
///
|
||||
/// This wrapper is appropriate for any long running operation in a task
|
||||
/// that ought to respect a CancellationToken (which means most tasks).
|
||||
///
|
||||
/// The only time you should use a bare tokio::timeout is when the future `F`
|
||||
/// itself respects a CancellationToken: otherwise, always use this wrapper
|
||||
/// with your CancellationToken to ensure that your task does not hold up
|
||||
/// graceful shutdown.
|
||||
pub async fn timeout_cancellable<F>(
|
||||
duration: Duration,
|
||||
cancel: &CancellationToken,
|
||||
future: F,
|
||||
) -> Result<F::Output, TimeoutCancellableError>
|
||||
where
|
||||
F: std::future::Future,
|
||||
{
|
||||
tokio::select!(
|
||||
r = tokio::time::timeout(duration, future) => {
|
||||
r.map_err(|_| TimeoutCancellableError::Timeout)
|
||||
|
||||
},
|
||||
_ = cancel.cancelled() => {
|
||||
Err(TimeoutCancellableError::Cancelled)
|
||||
|
||||
}
|
||||
)
|
||||
}
|
||||
@@ -19,13 +19,12 @@ inotify.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
sysinfo.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio = { workspace = true, features = ["rt-multi-thread"] }
|
||||
tokio-postgres.workspace = true
|
||||
tokio-stream.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tracing.workspace = true
|
||||
tracing-subscriber.workspace = true
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
cgroups-rs = "0.3.3"
|
||||
|
||||
@@ -21,11 +21,6 @@ pub struct FileCacheState {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FileCacheConfig {
|
||||
/// Whether the file cache is *actually* stored in memory (e.g. by writing to
|
||||
/// a tmpfs or shmem file). If true, the size of the file cache will be counted against the
|
||||
/// memory available for the cgroup.
|
||||
pub(crate) in_memory: bool,
|
||||
|
||||
/// The size of the file cache, in terms of the size of the resource it consumes
|
||||
/// (currently: only memory)
|
||||
///
|
||||
@@ -59,22 +54,9 @@ pub struct FileCacheConfig {
|
||||
spread_factor: f64,
|
||||
}
|
||||
|
||||
impl FileCacheConfig {
|
||||
pub fn default_in_memory() -> Self {
|
||||
impl Default for FileCacheConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
in_memory: true,
|
||||
// 75 %
|
||||
resource_multiplier: 0.75,
|
||||
// 640 MiB; (512 + 128)
|
||||
min_remaining_after_cache: NonZeroU64::new(640 * MiB).unwrap(),
|
||||
// ensure any increase in file cache size is split 90-10 with 10% to other memory
|
||||
spread_factor: 0.1,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_on_disk() -> Self {
|
||||
Self {
|
||||
in_memory: false,
|
||||
resource_multiplier: 0.75,
|
||||
// 256 MiB - lower than when in memory because overcommitting is safe; if we don't have
|
||||
// memory, the kernel will just evict from its page cache, rather than e.g. killing
|
||||
@@ -83,7 +65,9 @@ impl FileCacheConfig {
|
||||
spread_factor: 0.1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FileCacheConfig {
|
||||
/// Make sure fields of the config are consistent.
|
||||
pub fn validate(&self) -> anyhow::Result<()> {
|
||||
// Single field validity
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
#![cfg(target_os = "linux")]
|
||||
|
||||
use anyhow::Context;
|
||||
@@ -39,16 +41,6 @@ pub struct Args {
|
||||
#[arg(short, long)]
|
||||
pub pgconnstr: Option<String>,
|
||||
|
||||
/// Flag to signal that the Postgres file cache is on disk (i.e. not in memory aside from the
|
||||
/// kernel's page cache), and therefore should not count against available memory.
|
||||
//
|
||||
// NB: Ideally this flag would directly refer to whether the file cache is in memory (rather
|
||||
// than a roundabout way, via whether it's on disk), but in order to be backwards compatible
|
||||
// during the switch away from an in-memory file cache, we had to default to the previous
|
||||
// behavior.
|
||||
#[arg(long)]
|
||||
pub file_cache_on_disk: bool,
|
||||
|
||||
/// The address we should listen on for connection requests. For the
|
||||
/// agent, this is 0.0.0.0:10301. For the informant, this is 127.0.0.1:10369.
|
||||
#[arg(short, long)]
|
||||
|
||||
@@ -156,10 +156,7 @@ impl Runner {
|
||||
// memory limits.
|
||||
if let Some(connstr) = &args.pgconnstr {
|
||||
info!("initializing file cache");
|
||||
let config = match args.file_cache_on_disk {
|
||||
true => FileCacheConfig::default_on_disk(),
|
||||
false => FileCacheConfig::default_in_memory(),
|
||||
};
|
||||
let config = FileCacheConfig::default();
|
||||
|
||||
let mut file_cache = FileCacheState::new(connstr, config, token.clone())
|
||||
.await
|
||||
@@ -187,10 +184,7 @@ impl Runner {
|
||||
info!("file cache size actually got set to {actual_size}")
|
||||
}
|
||||
|
||||
if args.file_cache_on_disk {
|
||||
file_cache_disk_size = actual_size;
|
||||
}
|
||||
|
||||
file_cache_disk_size = actual_size;
|
||||
state.filecache = Some(file_cache);
|
||||
}
|
||||
|
||||
@@ -239,17 +233,11 @@ impl Runner {
|
||||
|
||||
let requested_mem = target.mem;
|
||||
let usable_system_memory = requested_mem.saturating_sub(self.config.sys_buffer_bytes);
|
||||
let (expected_file_cache_size, expected_file_cache_disk_size) = self
|
||||
let expected_file_cache_size = self
|
||||
.filecache
|
||||
.as_ref()
|
||||
.map(|file_cache| {
|
||||
let size = file_cache.config.calculate_cache_size(usable_system_memory);
|
||||
match file_cache.config.in_memory {
|
||||
true => (size, 0),
|
||||
false => (size, size),
|
||||
}
|
||||
})
|
||||
.unwrap_or((0, 0));
|
||||
.map(|file_cache| file_cache.config.calculate_cache_size(usable_system_memory))
|
||||
.unwrap_or(0);
|
||||
if let Some(cgroup) = &self.cgroup {
|
||||
let (last_time, last_history) = *cgroup.watcher.borrow();
|
||||
|
||||
@@ -273,7 +261,7 @@ impl Runner {
|
||||
|
||||
let new_threshold = self
|
||||
.config
|
||||
.cgroup_threshold(usable_system_memory, expected_file_cache_disk_size);
|
||||
.cgroup_threshold(usable_system_memory, expected_file_cache_size);
|
||||
|
||||
let current = last_history.avg_non_reclaimable;
|
||||
|
||||
@@ -300,13 +288,10 @@ impl Runner {
|
||||
.set_file_cache_size(expected_file_cache_size)
|
||||
.await
|
||||
.context("failed to set file cache size")?;
|
||||
if !file_cache.config.in_memory {
|
||||
file_cache_disk_size = actual_usage;
|
||||
}
|
||||
file_cache_disk_size = actual_usage;
|
||||
let message = format!(
|
||||
"set file cache size to {} MiB (in memory = {})",
|
||||
"set file cache size to {} MiB",
|
||||
bytes_to_mebibytes(actual_usage),
|
||||
file_cache.config.in_memory,
|
||||
);
|
||||
info!("downscale: {message}");
|
||||
status.push(message);
|
||||
@@ -357,9 +342,7 @@ impl Runner {
|
||||
.set_file_cache_size(expected_usage)
|
||||
.await
|
||||
.context("failed to set file cache size")?;
|
||||
if !file_cache.config.in_memory {
|
||||
file_cache_disk_size = actual_usage;
|
||||
}
|
||||
file_cache_disk_size = actual_usage;
|
||||
|
||||
if actual_usage != expected_usage {
|
||||
warn!(
|
||||
|
||||
@@ -82,6 +82,8 @@ enum-map.workspace = true
|
||||
enumset.workspace = true
|
||||
strum.workspace = true
|
||||
strum_macros.workspace = true
|
||||
tokio-stream.workspace = true
|
||||
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion.workspace = true
|
||||
|
||||
245
pageserver/src/bin/getpage_bench_http.rs
Normal file
245
pageserver/src/bin/getpage_bench_http.rs
Normal file
@@ -0,0 +1,245 @@
|
||||
use clap::Parser;
|
||||
use hyper::client::conn::Parts;
|
||||
use hyper::client::HttpConnector;
|
||||
use hyper::{Body, Client, Uri};
|
||||
use pageserver::{repository, tenant};
|
||||
use rand::prelude::*;
|
||||
use std::env::args;
|
||||
use std::future::Future;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread;
|
||||
use tokio::sync::mpsc::{channel, Sender};
|
||||
use tokio::sync::Mutex as AsyncMutex;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
struct Key(repository::Key);
|
||||
|
||||
impl std::str::FromStr for Key {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||
repository::Key::from_hex(s).map(Key)
|
||||
}
|
||||
}
|
||||
|
||||
struct KeyRange {
|
||||
start: Key,
|
||||
end: Key,
|
||||
}
|
||||
|
||||
impl KeyRange {
|
||||
fn len(&self) -> i128 {
|
||||
self.end.0.to_i128() - self.start.0.to_i128()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
#[clap(long, default_value = "http://localhost:9898")]
|
||||
ps_endpoint: String,
|
||||
// tenant_id: String,
|
||||
// timeline_id: String,
|
||||
num_tasks: usize,
|
||||
num_requests: usize,
|
||||
tenants: Option<Vec<String>>,
|
||||
#[clap(long)]
|
||||
pick_n_tenants: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct Stats {
|
||||
completed_requests: AtomicU64,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
fn inc(&self) {
|
||||
self.completed_requests.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args: &'static Args = Box::leak(Box::new(Args::parse()));
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
let tenants = if let Some(tenants) = &args.tenants {
|
||||
tenants.clone()
|
||||
} else {
|
||||
// let tenant_id = "b97965931096047b2d54958756baee7b";
|
||||
// let timeline_id = "2868f84a8d166779e4c651b116c45059";
|
||||
|
||||
let resp = client
|
||||
.get(Uri::try_from(&format!("{}/v1/tenant", args.ps_endpoint)).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let tenants: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let mut out = Vec::new();
|
||||
for t in tenants.as_array().unwrap() {
|
||||
if let Some(limit) = args.pick_n_tenants {
|
||||
if out.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
out.push(t.get("id").unwrap().as_str().unwrap().to_owned());
|
||||
}
|
||||
if let Some(limit) = args.pick_n_tenants {
|
||||
assert_eq!(out.len(), limit);
|
||||
}
|
||||
out
|
||||
};
|
||||
|
||||
let mut tenant_timelines = Vec::new();
|
||||
for tenant_id in tenants {
|
||||
let resp = client
|
||||
.get(
|
||||
Uri::try_from(&format!(
|
||||
"{}/v1/tenant/{}/timeline",
|
||||
args.ps_endpoint, tenant_id
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let timelines: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
for t in timelines.as_array().unwrap() {
|
||||
let timeline_id = t.get("timeline_id").unwrap().as_str().unwrap().to_owned();
|
||||
tenant_timelines.push((tenant_id.clone(), timeline_id));
|
||||
}
|
||||
}
|
||||
println!("tenant_timelines:\n{:?}", tenant_timelines);
|
||||
|
||||
let mut stats = Arc::new(Stats::default());
|
||||
|
||||
tokio::spawn({
|
||||
let stats = Arc::clone(&stats);
|
||||
async move {
|
||||
loop {
|
||||
let start = std::time::Instant::now();
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"RPS: {:.0}",
|
||||
completed_requests as f64 / elapsed.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
for (tenant_id, timeline_id) in tenant_timelines {
|
||||
let t = tokio::spawn(timeline(
|
||||
args,
|
||||
client.clone(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
Arc::clone(&stats),
|
||||
));
|
||||
tasks.push(t);
|
||||
}
|
||||
|
||||
for t in tasks {
|
||||
t.await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn timeline(
|
||||
args: &'static Args,
|
||||
client: Client<HttpConnector, Body>,
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
stats: Arc<Stats>,
|
||||
) -> impl Future<Output = ()> {
|
||||
async move {
|
||||
let mut resp = client
|
||||
.get(
|
||||
Uri::try_from(&format!(
|
||||
"{}/v1/tenant/{}/timeline/{}/keyspace",
|
||||
args.ps_endpoint, tenant_id, timeline_id
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
if !resp.status().is_success() {
|
||||
panic!("Failed to get keyspace: {resp:?}");
|
||||
}
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let keyspace: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
|
||||
let lsn = Arc::new(keyspace["at_lsn"].as_str().unwrap().to_owned());
|
||||
|
||||
let ranges = keyspace["keys"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|r| {
|
||||
let r = r.as_array().unwrap();
|
||||
assert_eq!(r.len(), 2);
|
||||
let start = Key::from_str(r[0].as_str().unwrap()).unwrap();
|
||||
let end = Key::from_str(r[1].as_str().unwrap()).unwrap();
|
||||
KeyRange { start, end }
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// weighted ranges
|
||||
let weights = ranges.iter().map(|r| r.len()).collect::<Vec<_>>();
|
||||
|
||||
let ranges = Arc::new(ranges);
|
||||
let weights = Arc::new(weights);
|
||||
|
||||
let (tx, mut rx) = channel::<i32>(1000);
|
||||
let tx = Arc::new(AsyncMutex::new(tx));
|
||||
|
||||
let mut tasks = Vec::<JoinHandle<()>>::new();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
for i in 0..args.num_tasks {
|
||||
let ranges = ranges.clone();
|
||||
let weights = weights.clone();
|
||||
let lsn = lsn.clone();
|
||||
let client = client.clone();
|
||||
let tenant_id = tenant_id.clone();
|
||||
let timeline_id = timeline_id.clone();
|
||||
let stats = Arc::clone(&stats);
|
||||
let task = tokio::spawn(async move {
|
||||
for i in 0..args.num_requests {
|
||||
let key = {
|
||||
let mut rng = rand::thread_rng();
|
||||
let r = ranges.choose_weighted(&mut rng, |r| r.len()).unwrap();
|
||||
let key = rng.gen_range((r.start.0.to_i128()..r.end.0.to_i128()));
|
||||
key
|
||||
};
|
||||
let url = format!(
|
||||
"{}/v1/tenant/{}/timeline/{}/getpage?key={:036x}&lsn={}",
|
||||
args.ps_endpoint, tenant_id, timeline_id, key, lsn
|
||||
);
|
||||
let uri = url.parse::<Uri>().unwrap();
|
||||
let resp = client.get(uri).await.unwrap();
|
||||
stats.inc();
|
||||
}
|
||||
});
|
||||
tasks.push(task);
|
||||
}
|
||||
|
||||
drop(tx);
|
||||
|
||||
for task in tasks {
|
||||
task.await.unwrap();
|
||||
}
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"RPS: {:.0}",
|
||||
(args.num_requests * args.num_tasks) as f64 / elapsed.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
411
pageserver/src/bin/getpage_bench_libpq.rs
Normal file
411
pageserver/src/bin/getpage_bench_libpq.rs
Normal file
@@ -0,0 +1,411 @@
|
||||
use anyhow::Context;
|
||||
use clap::Parser;
|
||||
use futures::{SinkExt, TryStreamExt};
|
||||
use hyper::client::conn::Parts;
|
||||
use hyper::client::HttpConnector;
|
||||
use hyper::{Client, Uri};
|
||||
use pageserver::page_cache::PAGE_SZ;
|
||||
use pageserver::pgdatadir_mapping::{is_rel_block_key, key_to_rel_block};
|
||||
use pageserver::{repository, tenant};
|
||||
use pageserver_api::models::{
|
||||
PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
|
||||
};
|
||||
use pageserver_api::reltag::RelTag;
|
||||
use rand::prelude::*;
|
||||
use scopeguard::defer;
|
||||
use std::env::args;
|
||||
use std::future::Future;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread;
|
||||
use tokio::sync::mpsc::{channel, Sender};
|
||||
use tokio::sync::Mutex as AsyncMutex;
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_stream::{Stream, StreamExt};
|
||||
use utils::completion;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
struct Key(repository::Key);
|
||||
|
||||
impl std::str::FromStr for Key {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||
repository::Key::from_hex(s).map(Key)
|
||||
}
|
||||
}
|
||||
|
||||
struct KeyRange {
|
||||
start: i128,
|
||||
end: i128,
|
||||
}
|
||||
|
||||
impl KeyRange {
|
||||
fn len(&self) -> i128 {
|
||||
self.end - self.start
|
||||
}
|
||||
}
|
||||
|
||||
struct RelTagBlockNo {
|
||||
rel_tag: RelTag,
|
||||
block_no: u32,
|
||||
}
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
#[clap(long, default_value = "http://localhost:9898")]
|
||||
mgmt_api_endpoint: String,
|
||||
#[clap(long, default_value = "postgres://postgres@localhost:64000")]
|
||||
page_service_connstring: String,
|
||||
// tenant_id: String,
|
||||
// timeline_id: String,
|
||||
num_tasks: usize,
|
||||
num_requests: usize,
|
||||
tenants: Option<Vec<String>>,
|
||||
#[clap(long)]
|
||||
pick_n_tenants: Option<usize>,
|
||||
#[clap(subcommand)]
|
||||
mode: Mode,
|
||||
}
|
||||
|
||||
#[derive(clap::Parser, Clone)]
|
||||
enum Mode {
|
||||
GetPage,
|
||||
NoOp,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct Stats {
|
||||
completed_requests: AtomicU64,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
fn inc(&self) {
|
||||
self.completed_requests.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args: &'static Args = Box::leak(Box::new(Args::parse()));
|
||||
|
||||
// std::env::set_var("RUST_LOG", "info,tokio_postgres=trace");
|
||||
// tracing_subscriber::fmt::init();
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
let tenants = if let Some(tenants) = &args.tenants {
|
||||
tenants.clone()
|
||||
} else {
|
||||
// let tenant_id = "b97965931096047b2d54958756baee7b";
|
||||
// let timeline_id = "2868f84a8d166779e4c651b116c45059";
|
||||
|
||||
let resp = client
|
||||
.get(Uri::try_from(&format!("{}/v1/tenant", args.mgmt_api_endpoint)).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let tenants: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let mut out = Vec::new();
|
||||
for t in tenants.as_array().unwrap() {
|
||||
if let Some(limit) = args.pick_n_tenants {
|
||||
if out.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
out.push(t.get("id").unwrap().as_str().unwrap().to_owned());
|
||||
}
|
||||
if let Some(limit) = args.pick_n_tenants {
|
||||
assert_eq!(out.len(), limit);
|
||||
}
|
||||
out
|
||||
};
|
||||
|
||||
let mut tenant_timelines = Vec::new();
|
||||
for tenant_id in tenants {
|
||||
let resp = client
|
||||
.get(
|
||||
Uri::try_from(&format!(
|
||||
"{}/v1/tenant/{}/timeline",
|
||||
args.mgmt_api_endpoint, tenant_id
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let timelines: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
for t in timelines.as_array().unwrap() {
|
||||
let timeline_id = t.get("timeline_id").unwrap().as_str().unwrap().to_owned();
|
||||
tenant_timelines.push((tenant_id.clone(), timeline_id));
|
||||
}
|
||||
}
|
||||
println!("tenant_timelines:\n{:?}", tenant_timelines);
|
||||
|
||||
let mut stats = Arc::new(Stats::default());
|
||||
|
||||
tokio::spawn({
|
||||
let stats = Arc::clone(&stats);
|
||||
async move {
|
||||
loop {
|
||||
let start = std::time::Instant::now();
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"RPS: {:.0}",
|
||||
completed_requests as f64 / elapsed.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
for (tenant_id, timeline_id) in tenant_timelines {
|
||||
let stats = Arc::clone(&stats);
|
||||
let t = tokio::spawn(timeline(
|
||||
args,
|
||||
client.clone(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
stats,
|
||||
));
|
||||
tasks.push(t);
|
||||
}
|
||||
|
||||
for t in tasks {
|
||||
t.await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn timeline(
|
||||
args: &'static Args,
|
||||
http_client: Client<HttpConnector, hyper::Body>,
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
stats: Arc<Stats>,
|
||||
) -> impl Future<Output = ()> + Send + Sync {
|
||||
async move {
|
||||
let mut resp = http_client
|
||||
.get(
|
||||
Uri::try_from(&format!(
|
||||
"{}/v1/tenant/{}/timeline/{}/keyspace",
|
||||
args.mgmt_api_endpoint, tenant_id, timeline_id
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
if !resp.status().is_success() {
|
||||
panic!("Failed to get keyspace: {resp:?}");
|
||||
}
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let keyspace: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let lsn: Lsn = keyspace["at_lsn"].as_str().unwrap().parse().unwrap();
|
||||
|
||||
let ranges = keyspace["keys"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.filter_map(|r| {
|
||||
let r = r.as_array().unwrap();
|
||||
assert_eq!(r.len(), 2);
|
||||
let start = Key::from_str(r[0].as_str().unwrap()).unwrap();
|
||||
let end = Key::from_str(r[1].as_str().unwrap()).unwrap();
|
||||
// filter out non-relblock keys
|
||||
match (is_rel_block_key(start.0), is_rel_block_key(end.0)) {
|
||||
(true, true) => Some(KeyRange {
|
||||
start: start.0.to_i128(),
|
||||
end: end.0.to_i128(),
|
||||
}),
|
||||
(true, false) | (false, true) => {
|
||||
unimplemented!("split up range")
|
||||
}
|
||||
(false, false) => None,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// weighted ranges
|
||||
let weights = ranges.iter().map(|r| r.len()).collect::<Vec<_>>();
|
||||
|
||||
let ranges = Arc::new(ranges);
|
||||
let weights = Arc::new(weights);
|
||||
|
||||
let mut tasks = Vec::<JoinHandle<()>>::new();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
for i in 0..args.num_tasks {
|
||||
let ranges = ranges.clone();
|
||||
let weights = weights.clone();
|
||||
let client = http_client.clone();
|
||||
let tenant_id = tenant_id.clone();
|
||||
let timeline_id = timeline_id.clone();
|
||||
let task = tokio::spawn({
|
||||
let stats = Arc::clone(&stats);
|
||||
async move {
|
||||
let mut client = getpage_client::Client::new(
|
||||
args.page_service_connstring.clone(),
|
||||
tenant_id.clone(),
|
||||
timeline_id.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
for i in 0..args.num_requests {
|
||||
match args.mode {
|
||||
Mode::GetPage => {
|
||||
let key = {
|
||||
let mut rng = rand::thread_rng();
|
||||
let r = ranges.choose_weighted(&mut rng, |r| r.len()).unwrap();
|
||||
let key: i128 = rng.gen_range((r.start..r.end));
|
||||
let key = repository::Key::from_i128(key);
|
||||
// XXX filter these out when we iterate the keyspace
|
||||
assert!(
|
||||
is_rel_block_key(key),
|
||||
"we filter non-relblock keys out above"
|
||||
);
|
||||
let (rel_tag, block_no) =
|
||||
key_to_rel_block(key).expect("we just checked");
|
||||
RelTagBlockNo { rel_tag, block_no }
|
||||
};
|
||||
client
|
||||
.getpage(key, lsn)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"getpage for tenant {} timeline {}",
|
||||
tenant_id, timeline_id
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
Mode::NoOp => {
|
||||
client.noop().await.unwrap();
|
||||
}
|
||||
}
|
||||
stats.inc();
|
||||
}
|
||||
client.shutdown().await;
|
||||
}
|
||||
});
|
||||
tasks.push(task);
|
||||
}
|
||||
|
||||
for task in tasks {
|
||||
task.await.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod getpage_client {
|
||||
use std::pin::Pin;
|
||||
|
||||
use futures::SinkExt;
|
||||
use pageserver_api::models::{
|
||||
PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest,
|
||||
PagestreamGetPageResponse,
|
||||
};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_stream::StreamExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::RelTagBlockNo;
|
||||
|
||||
pub(crate) struct Client {
|
||||
copy_both: Pin<Box<tokio_postgres::CopyBothDuplex<bytes::Bytes>>>,
|
||||
cancel_on_client_drop: Option<tokio_util::sync::DropGuard>,
|
||||
conn_task: JoinHandle<()>,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
pub fn new(
|
||||
connstring: String,
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
) -> impl std::future::Future<Output = anyhow::Result<Self>> + Send {
|
||||
async move {
|
||||
let (client, connection) =
|
||||
tokio_postgres::connect(&connstring, postgres::NoTls).await?;
|
||||
|
||||
let conn_task_cancel = CancellationToken::new();
|
||||
let conn_task = tokio::spawn({
|
||||
let conn_task_cancel = conn_task_cancel.clone();
|
||||
async move {
|
||||
tokio::select! {
|
||||
_ = conn_task_cancel.cancelled() => {
|
||||
return;
|
||||
}
|
||||
res = connection => {
|
||||
res.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let copy_both: tokio_postgres::CopyBothDuplex<bytes::Bytes> = client
|
||||
.copy_both_simple(&format!("pagestream {tenant_id} {timeline_id}"))
|
||||
.await?;
|
||||
|
||||
Ok(Self {
|
||||
copy_both: Box::pin(copy_both),
|
||||
conn_task,
|
||||
cancel_on_client_drop: Some(conn_task_cancel.drop_guard()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn shutdown(mut self) {
|
||||
let _ = self.cancel_on_client_drop.take();
|
||||
self.conn_task.await.unwrap();
|
||||
}
|
||||
|
||||
pub async fn getpage(
|
||||
&mut self,
|
||||
key: RelTagBlockNo,
|
||||
lsn: Lsn,
|
||||
) -> anyhow::Result<PagestreamGetPageResponse> {
|
||||
let req = PagestreamGetPageRequest {
|
||||
latest: false,
|
||||
rel: key.rel_tag,
|
||||
blkno: key.block_no,
|
||||
lsn,
|
||||
};
|
||||
let req = PagestreamFeMessage::GetPage(req);
|
||||
match self.do_request(req).await? {
|
||||
PagestreamBeMessage::GetPage(p) => Ok(p),
|
||||
x => anyhow::bail!("Unexpected response: {:?}", x),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn noop(&mut self) -> anyhow::Result<()> {
|
||||
match self.do_request(PagestreamFeMessage::NoOp).await? {
|
||||
PagestreamBeMessage::NoOp => Ok(()),
|
||||
x => anyhow::bail!("Unexpected response: {:?}", x),
|
||||
}
|
||||
}
|
||||
|
||||
async fn do_request(
|
||||
&mut self,
|
||||
req: PagestreamFeMessage,
|
||||
) -> Result<PagestreamBeMessage, anyhow::Error> {
|
||||
let req: bytes::Bytes = req.serialize();
|
||||
// let mut req = tokio_util::io::ReaderStream::new(&req);
|
||||
let mut req = tokio_stream::once(Ok(req));
|
||||
|
||||
self.copy_both.send_all(&mut req).await?;
|
||||
|
||||
let next: Option<Result<bytes::Bytes, _>> = self.copy_both.next().await;
|
||||
let next = next.unwrap().unwrap();
|
||||
|
||||
match PagestreamBeMessage::deserialize(next)? {
|
||||
PagestreamBeMessage::Error(e) => anyhow::bail!("Error: {:?}", e),
|
||||
x => Ok(x),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
109
pageserver/src/bin/noop_server.rs
Normal file
109
pageserver/src/bin/noop_server.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
use anyhow::Context;
|
||||
use bytes::Buf;
|
||||
use clap::Parser;
|
||||
use pageserver_api::models::{PagestreamBeMessage, PagestreamErrorResponse, PagestreamFeMessage};
|
||||
use postgres_backend::{AuthType, PostgresBackend, QueryError};
|
||||
use pq_proto::{BeMessage, FeMessage};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
bind: String,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args = Args::parse();
|
||||
|
||||
let listener = tokio::net::TcpListener::bind(&args.bind).await.unwrap();
|
||||
loop {
|
||||
let (socket, _) = listener.accept().await.unwrap();
|
||||
tokio::spawn(async move {
|
||||
handle_connection(socket).await.unwrap();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_connection(socket: tokio::net::TcpStream) -> anyhow::Result<()> {
|
||||
socket
|
||||
.set_nodelay(true)
|
||||
.context("could not set TCP_NODELAY")?;
|
||||
|
||||
let peer_addr = socket.peer_addr().context("get peer address")?;
|
||||
let socket = tokio_io_timeout::TimeoutReader::new(socket);
|
||||
tokio::pin!(socket);
|
||||
let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, AuthType::Trust, None)?;
|
||||
let mut conn_handler = NoOpHandler;
|
||||
let cancel = CancellationToken::new();
|
||||
pgbackend
|
||||
.run(&mut conn_handler, || {
|
||||
let cancel = cancel.clone();
|
||||
async move { cancel.cancelled().await }
|
||||
})
|
||||
.await?;
|
||||
anyhow::Ok(())
|
||||
}
|
||||
|
||||
struct NoOpHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<IO> postgres_backend::Handler<IO> for NoOpHandler
|
||||
where
|
||||
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
|
||||
{
|
||||
fn startup(
|
||||
&mut self,
|
||||
_pgb: &mut PostgresBackend<IO>,
|
||||
_sm: &pq_proto::FeStartupPacket,
|
||||
) -> Result<(), QueryError> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_query(
|
||||
&mut self,
|
||||
pgb: &mut PostgresBackend<IO>,
|
||||
query_string: &str,
|
||||
) -> Result<(), QueryError> {
|
||||
if !query_string.starts_with("pagestream ") {
|
||||
return Err(QueryError::Other(anyhow::anyhow!("not a pagestream query")));
|
||||
}
|
||||
|
||||
// switch client to COPYBOTH
|
||||
pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
|
||||
pgb.flush().await?;
|
||||
|
||||
loop {
|
||||
let msg = pgb.read_message().await?;
|
||||
|
||||
let copy_data_bytes = match msg {
|
||||
Some(FeMessage::CopyData(bytes)) => bytes,
|
||||
Some(FeMessage::Terminate) => return Ok(()),
|
||||
Some(m) => {
|
||||
return Err(QueryError::Other(anyhow::anyhow!(
|
||||
"unexpected message: {m:?} during COPY"
|
||||
)));
|
||||
}
|
||||
None => return Ok(()), // client disconnected
|
||||
};
|
||||
|
||||
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
|
||||
|
||||
let response = match neon_fe_msg {
|
||||
PagestreamFeMessage::NoOp => Ok(PagestreamBeMessage::NoOp),
|
||||
x => Err(QueryError::Other(anyhow::anyhow!(
|
||||
"this server only supports no-op: {x:?}"
|
||||
))),
|
||||
};
|
||||
|
||||
let response = response.unwrap_or_else(|e| {
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
})
|
||||
});
|
||||
|
||||
pgb.write_message_noflush(&BeMessage::CopyData(&response.serialize()))?;
|
||||
pgb.flush().await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -34,8 +34,11 @@ use postgres_backend::AuthType;
|
||||
use utils::logging::TracingErrorLayerEnablement;
|
||||
use utils::signals::ShutdownSignals;
|
||||
use utils::{
|
||||
auth::JwtAuth, logging, project_build_tag, project_git_version, sentry_init::init_sentry,
|
||||
signals::Signal, tcp_listener,
|
||||
auth::{JwtAuth, SwappableJwtAuth},
|
||||
logging, project_build_tag, project_git_version,
|
||||
sentry_init::init_sentry,
|
||||
signals::Signal,
|
||||
tcp_listener,
|
||||
};
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
@@ -321,13 +324,12 @@ fn start_pageserver(
|
||||
let http_auth;
|
||||
let pg_auth;
|
||||
if conf.http_auth_type == AuthType::NeonJWT || conf.pg_auth_type == AuthType::NeonJWT {
|
||||
// unwrap is ok because check is performed when creating config, so path is set and file exists
|
||||
// unwrap is ok because check is performed when creating config, so path is set and exists
|
||||
let key_path = conf.auth_validation_public_key_path.as_ref().unwrap();
|
||||
info!(
|
||||
"Loading public key for verifying JWT tokens from {:#?}",
|
||||
key_path
|
||||
);
|
||||
let auth: Arc<JwtAuth> = Arc::new(JwtAuth::from_key_path(key_path)?);
|
||||
info!("Loading public key(s) for verifying JWT tokens from {key_path:?}");
|
||||
|
||||
let jwt_auth = JwtAuth::from_key_path(key_path)?;
|
||||
let auth: Arc<SwappableJwtAuth> = Arc::new(SwappableJwtAuth::new(jwt_auth));
|
||||
|
||||
http_auth = match &conf.http_auth_type {
|
||||
AuthType::Trust => None,
|
||||
@@ -410,7 +412,7 @@ fn start_pageserver(
|
||||
|
||||
// Scan the local 'tenants/' directory and start loading the tenants
|
||||
let deletion_queue_client = deletion_queue.new_client();
|
||||
BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(
|
||||
let tenant_manager = BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(
|
||||
conf,
|
||||
TenantSharedResources {
|
||||
broker_client: broker_client.clone(),
|
||||
@@ -420,6 +422,7 @@ fn start_pageserver(
|
||||
order,
|
||||
shutdown_pageserver.clone(),
|
||||
))?;
|
||||
let tenant_manager = Arc::new(tenant_manager);
|
||||
|
||||
BACKGROUND_RUNTIME.spawn({
|
||||
let init_done_rx = init_done_rx;
|
||||
@@ -548,6 +551,7 @@ fn start_pageserver(
|
||||
let router_state = Arc::new(
|
||||
http::routes::State::new(
|
||||
conf,
|
||||
tenant_manager,
|
||||
http_auth.clone(),
|
||||
remote_storage.clone(),
|
||||
broker_client.clone(),
|
||||
|
||||
130
pageserver/src/bin/tokio_tcp_bench.rs
Normal file
130
pageserver/src/bin/tokio_tcp_bench.rs
Normal file
@@ -0,0 +1,130 @@
|
||||
use std::env::args;
|
||||
|
||||
use clap::Parser;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
#[clap(subcommand)]
|
||||
mode: Mode,
|
||||
}
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
enum Mode {
|
||||
Client(Client),
|
||||
Server(Server),
|
||||
}
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Client {
|
||||
num_tasks: usize,
|
||||
}
|
||||
#[derive(clap::Parser)]
|
||||
struct Server {}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args: &'static _ = Box::leak(Box::new(Args::parse()));
|
||||
|
||||
match &args.mode {
|
||||
Mode::Client(x) => client::client(x).await,
|
||||
Mode::Server(x) => server::server(x).await,
|
||||
}
|
||||
}
|
||||
|
||||
mod client {
|
||||
use std::sync::{atomic::{Ordering, AtomicU64}, Arc};
|
||||
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
|
||||
use super::Client;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct Stats {
|
||||
completed_requests: AtomicU64,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
fn inc(&self) {
|
||||
self.completed_requests.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub(crate) async fn client(args: &'static Client) {
|
||||
let mut stats = Arc::new(Stats::default());
|
||||
|
||||
tokio::spawn({
|
||||
let stats = Arc::clone(&stats);
|
||||
async move {
|
||||
loop {
|
||||
let start = std::time::Instant::now();
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"RPS: {:.0} RPS/client: {:.2}",
|
||||
completed_requests as f64 / elapsed.as_secs_f64(),
|
||||
completed_requests as f64 / elapsed.as_secs_f64() / args.num_tasks as f64,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
for _ in 0..args.num_tasks {
|
||||
let stats = Arc::clone(&stats);
|
||||
let t = tokio::spawn(client_task(args, stats));
|
||||
tasks.push(t);
|
||||
}
|
||||
|
||||
for t in tasks {
|
||||
t.await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
async fn client_task(args: &'static Client, stats: Arc<Stats>) -> anyhow::Result<()> {
|
||||
let mut conn = tokio::net::TcpStream::connect("localhost:65000").await?;
|
||||
conn.set_nodelay(true)?;
|
||||
|
||||
loop {
|
||||
let mut buf = [0u8; 1];
|
||||
conn.write_all(&buf).await?;
|
||||
conn.read_exact(&mut buf).await?;
|
||||
stats.inc();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod server {
|
||||
|
||||
use anyhow::Context;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
|
||||
use super::Server;
|
||||
|
||||
pub(crate) async fn server(args: &'static Server) {
|
||||
let listener = tokio::net::TcpListener::bind("localhost:65000").await.unwrap();
|
||||
loop {
|
||||
let (socket, _) = listener.accept().await.unwrap();
|
||||
tokio::spawn(async move {
|
||||
server_handle_connection(args, socket).await.unwrap();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async fn server_handle_connection(
|
||||
args: &'static Server,
|
||||
socket: tokio::net::TcpStream,
|
||||
) -> anyhow::Result<()> {
|
||||
socket
|
||||
.set_nodelay(true)
|
||||
.context("could not set TCP_NODELAY")?;
|
||||
// let socket = tokio_io_timeout::TimeoutReader::new(socket);
|
||||
tokio::pin!(socket);
|
||||
|
||||
loop {
|
||||
let mut buf = [0u8; 4096];
|
||||
socket.read_exact(&mut buf).await?;
|
||||
socket.write_all(&buf).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -161,7 +161,7 @@ pub struct PageServerConf {
|
||||
pub http_auth_type: AuthType,
|
||||
/// authentication method for libpq connections from compute
|
||||
pub pg_auth_type: AuthType,
|
||||
/// Path to a file containing public key for verifying JWT tokens.
|
||||
/// Path to a file or directory containing public key(s) for verifying JWT tokens.
|
||||
/// Used for both mgmt and compute auth, if enabled.
|
||||
pub auth_validation_public_key_path: Option<Utf8PathBuf>,
|
||||
|
||||
|
||||
@@ -266,7 +266,7 @@ async fn calculate_synthetic_size_worker(
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Ok(tenant) = mgr::get_tenant(tenant_id, true).await {
|
||||
if let Ok(tenant) = mgr::get_tenant(tenant_id, true) {
|
||||
// TODO should we use concurrent_background_tasks_rate_limit() here, like the other background tasks?
|
||||
// We can put in some prioritization for consumption metrics.
|
||||
// Same for the loop that fetches computed metrics.
|
||||
|
||||
@@ -3,7 +3,6 @@ use anyhow::Context;
|
||||
use chrono::{DateTime, Utc};
|
||||
use consumption_metrics::EventType;
|
||||
use futures::stream::StreamExt;
|
||||
use serde_with::serde_as;
|
||||
use std::{sync::Arc, time::SystemTime};
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
@@ -42,13 +41,10 @@ pub(super) enum Name {
|
||||
///
|
||||
/// This is a denormalization done at the MetricsKey const methods; these should not be constructed
|
||||
/// elsewhere.
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
|
||||
pub(crate) struct MetricsKey {
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
pub(super) tenant_id: TenantId,
|
||||
|
||||
#[serde_as(as = "Option<serde_with::DisplayFromStr>")]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) timeline_id: Option<TimelineId>,
|
||||
|
||||
@@ -206,7 +202,6 @@ pub(super) async fn collect_all_metrics(
|
||||
None
|
||||
} else {
|
||||
crate::tenant::mgr::get_tenant(id, true)
|
||||
.await
|
||||
.ok()
|
||||
.map(|tenant| (id, tenant))
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use consumption_metrics::{Event, EventChunk, IdempotencyKey, CHUNK_SIZE};
|
||||
use serde_with::serde_as;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::Instrument;
|
||||
|
||||
@@ -7,12 +6,9 @@ use super::{metrics::Name, Cache, MetricsKey, RawMetric};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
/// How the metrics from pageserver are identified.
|
||||
#[serde_with::serde_as]
|
||||
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Copy, PartialEq)]
|
||||
struct Ids {
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
pub(super) tenant_id: TenantId,
|
||||
#[serde_as(as = "Option<serde_with::DisplayFromStr>")]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) timeline_id: Option<TimelineId>,
|
||||
}
|
||||
|
||||
@@ -18,7 +18,6 @@ use hex::FromHex;
|
||||
use remote_storage::{GenericRemoteStorage, RemotePath};
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_with::serde_as;
|
||||
use thiserror::Error;
|
||||
use tokio;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -215,7 +214,6 @@ where
|
||||
/// during recovery as startup.
|
||||
const TEMP_SUFFIX: &str = "tmp";
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct DeletionList {
|
||||
/// Serialization version, for future use
|
||||
@@ -244,7 +242,6 @@ struct DeletionList {
|
||||
validated: bool,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct DeletionHeader {
|
||||
/// Serialization version, for future use
|
||||
@@ -348,7 +345,7 @@ impl DeletionList {
|
||||
result.extend(
|
||||
timeline_layers
|
||||
.into_iter()
|
||||
.map(|l| timeline_remote_path.join(&Utf8PathBuf::from(l))),
|
||||
.map(|l| timeline_remote_path.join(Utf8PathBuf::from(l))),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,21 +55,24 @@ impl Deleter {
|
||||
|
||||
/// Wrap the remote `delete_objects` with a failpoint
|
||||
async fn remote_delete(&self) -> Result<(), anyhow::Error> {
|
||||
fail::fail_point!("deletion-queue-before-execute", |_| {
|
||||
info!("Skipping execution, failpoint set");
|
||||
metrics::DELETION_QUEUE
|
||||
.remote_errors
|
||||
.with_label_values(&["failpoint"])
|
||||
.inc();
|
||||
Err(anyhow::anyhow!("failpoint hit"))
|
||||
});
|
||||
|
||||
// A backoff::retry is used here for two reasons:
|
||||
// - To provide a backoff rather than busy-polling the API on errors
|
||||
// - To absorb transient 429/503 conditions without hitting our error
|
||||
// logging path for issues deleting objects.
|
||||
backoff::retry(
|
||||
|| async { self.remote_storage.delete_objects(&self.accumulator).await },
|
||||
|| async {
|
||||
fail::fail_point!("deletion-queue-before-execute", |_| {
|
||||
info!("Skipping execution, failpoint set");
|
||||
|
||||
metrics::DELETION_QUEUE
|
||||
.remote_errors
|
||||
.with_label_values(&["failpoint"])
|
||||
.inc();
|
||||
Err(anyhow::anyhow!("failpoint: deletion-queue-before-execute"))
|
||||
});
|
||||
|
||||
self.remote_storage.delete_objects(&self.accumulator).await
|
||||
},
|
||||
|_| false,
|
||||
3,
|
||||
10,
|
||||
|
||||
@@ -403,7 +403,7 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
|
||||
return (evicted_bytes, evictions_failed);
|
||||
};
|
||||
|
||||
let results = timeline.evict_layers(&batch, &cancel).await;
|
||||
let results = timeline.evict_layers(&batch).await;
|
||||
|
||||
match results {
|
||||
Ok(results) => {
|
||||
@@ -545,7 +545,7 @@ async fn collect_eviction_candidates(
|
||||
if cancel.is_cancelled() {
|
||||
return Ok(EvictionCandidates::Cancelled);
|
||||
}
|
||||
let tenant = match tenant::mgr::get_tenant(*tenant_id, true).await {
|
||||
let tenant = match tenant::mgr::get_tenant(*tenant_id, true) {
|
||||
Ok(tenant) => tenant,
|
||||
Err(e) => {
|
||||
// this can happen if tenant has lifecycle transition after we fetched it
|
||||
@@ -554,6 +554,11 @@ async fn collect_eviction_candidates(
|
||||
}
|
||||
};
|
||||
|
||||
if tenant.cancel.is_cancelled() {
|
||||
info!(%tenant_id, "Skipping tenant for eviction, it is shutting down");
|
||||
continue;
|
||||
}
|
||||
|
||||
// collect layers from all timelines in this tenant
|
||||
//
|
||||
// If one of the timelines becomes `!is_active()` during the iteration,
|
||||
|
||||
@@ -52,6 +52,31 @@ paths:
|
||||
schema:
|
||||
type: object
|
||||
|
||||
/v1/reload_auth_validation_keys:
|
||||
post:
|
||||
description: Reloads the JWT public keys from their pre-configured location on disk.
|
||||
responses:
|
||||
"200":
|
||||
description: The reload completed successfully.
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/UnauthorizedError"
|
||||
"403":
|
||||
description: Forbidden Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"500":
|
||||
description: Generic operation error (also hits if no keys were found)
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/tenant/{tenant_id}:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
|
||||
@@ -17,10 +17,10 @@ use pageserver_api::models::{
|
||||
TenantLoadRequest, TenantLocationConfigRequest,
|
||||
};
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use tenant_size_model::{SizeResult, StorageModel};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::auth::JwtAuth;
|
||||
use utils::http::endpoint::request_span;
|
||||
use utils::http::json::json_request_or_empty_body;
|
||||
use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};
|
||||
@@ -36,7 +36,8 @@ use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::config::{LocationConf, TenantConfOpt};
|
||||
use crate::tenant::mgr::{
|
||||
GetTenantError, SetNewTenantConfigError, TenantMapInsertError, TenantStateError,
|
||||
GetTenantError, SetNewTenantConfigError, TenantManager, TenantMapError, TenantMapInsertError,
|
||||
TenantSlotError, TenantSlotUpsertError, TenantStateError,
|
||||
};
|
||||
use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
||||
@@ -45,7 +46,7 @@ use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError, TenantSha
|
||||
use crate::{config::PageServerConf, tenant::mgr};
|
||||
use crate::{disk_usage_eviction_task, tenant};
|
||||
use utils::{
|
||||
auth::JwtAuth,
|
||||
auth::SwappableJwtAuth,
|
||||
generation::Generation,
|
||||
http::{
|
||||
endpoint::{self, attach_openapi_ui, auth_middleware, check_permission_with},
|
||||
@@ -63,7 +64,8 @@ use super::models::ConfigureFailpointsRequest;
|
||||
|
||||
pub struct State {
|
||||
conf: &'static PageServerConf,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
allowlist_routes: Vec<Uri>,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
@@ -74,7 +76,8 @@ pub struct State {
|
||||
impl State {
|
||||
pub fn new(
|
||||
conf: &'static PageServerConf,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
||||
@@ -86,6 +89,7 @@ impl State {
|
||||
.collect::<Vec<_>>();
|
||||
Ok(Self {
|
||||
conf,
|
||||
tenant_manager,
|
||||
auth,
|
||||
allowlist_routes,
|
||||
remote_storage,
|
||||
@@ -147,28 +151,59 @@ impl From<PageReconstructError> for ApiError {
|
||||
impl From<TenantMapInsertError> for ApiError {
|
||||
fn from(tmie: TenantMapInsertError) -> ApiError {
|
||||
match tmie {
|
||||
TenantMapInsertError::StillInitializing | TenantMapInsertError::ShuttingDown => {
|
||||
ApiError::ResourceUnavailable(format!("{tmie}").into())
|
||||
}
|
||||
TenantMapInsertError::TenantAlreadyExists(id, state) => {
|
||||
ApiError::Conflict(format!("tenant {id} already exists, state: {state:?}"))
|
||||
}
|
||||
TenantMapInsertError::TenantExistsSecondary(id) => {
|
||||
ApiError::Conflict(format!("tenant {id} already exists as secondary"))
|
||||
}
|
||||
TenantMapInsertError::SlotError(e) => e.into(),
|
||||
TenantMapInsertError::SlotUpsertError(e) => e.into(),
|
||||
TenantMapInsertError::Other(e) => ApiError::InternalServerError(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TenantSlotError> for ApiError {
|
||||
fn from(e: TenantSlotError) -> ApiError {
|
||||
use TenantSlotError::*;
|
||||
match e {
|
||||
NotFound(tenant_id) => {
|
||||
ApiError::NotFound(anyhow::anyhow!("NotFound: tenant {tenant_id}").into())
|
||||
}
|
||||
e @ (AlreadyExists(_, _) | Conflict(_)) => ApiError::Conflict(format!("{e}")),
|
||||
InProgress => {
|
||||
ApiError::ResourceUnavailable("Tenant is being modified concurrently".into())
|
||||
}
|
||||
MapState(e) => e.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TenantSlotUpsertError> for ApiError {
|
||||
fn from(e: TenantSlotUpsertError) -> ApiError {
|
||||
use TenantSlotUpsertError::*;
|
||||
match e {
|
||||
InternalError(e) => ApiError::InternalServerError(anyhow::anyhow!("{e}")),
|
||||
MapState(e) => e.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TenantMapError> for ApiError {
|
||||
fn from(e: TenantMapError) -> ApiError {
|
||||
use TenantMapError::*;
|
||||
match e {
|
||||
StillInitializing | ShuttingDown => {
|
||||
ApiError::ResourceUnavailable(format!("{e}").into())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TenantStateError> for ApiError {
|
||||
fn from(tse: TenantStateError) -> ApiError {
|
||||
match tse {
|
||||
TenantStateError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid).into()),
|
||||
TenantStateError::IsStopping(_) => {
|
||||
ApiError::ResourceUnavailable("Tenant is stopping".into())
|
||||
}
|
||||
_ => ApiError::InternalServerError(anyhow::Error::new(tse)),
|
||||
TenantStateError::SlotError(e) => e.into(),
|
||||
TenantStateError::SlotUpsertError(e) => e.into(),
|
||||
TenantStateError::Other(e) => ApiError::InternalServerError(anyhow!(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -189,6 +224,7 @@ impl From<GetTenantError> for ApiError {
|
||||
// (We can produce this variant only in `mgr::get_tenant(..., active=true)` calls).
|
||||
ApiError::ResourceUnavailable("Tenant not yet active".into())
|
||||
}
|
||||
GetTenantError::MapState(e) => ApiError::ResourceUnavailable(format!("{e}").into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -243,6 +279,9 @@ impl From<crate::tenant::delete::DeleteTenantError> for ApiError {
|
||||
Get(g) => ApiError::from(g),
|
||||
e @ AlreadyInProgress => ApiError::Conflict(e.to_string()),
|
||||
Timeline(t) => ApiError::from(t),
|
||||
NotAttached => ApiError::NotFound(anyhow::anyhow!("Tenant is not attached").into()),
|
||||
SlotError(e) => e.into(),
|
||||
SlotUpsertError(e) => e.into(),
|
||||
Other(o) => ApiError::InternalServerError(o),
|
||||
e @ InvalidState(_) => ApiError::PreconditionFailed(e.to_string().into_boxed_str()),
|
||||
}
|
||||
@@ -354,6 +393,32 @@ async fn status_handler(
|
||||
json_response(StatusCode::OK, StatusResponse { id: config.id })
|
||||
}
|
||||
|
||||
async fn reload_auth_validation_keys_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
check_permission(&request, None)?;
|
||||
let config = get_config(&request);
|
||||
let state = get_state(&request);
|
||||
let Some(shared_auth) = &state.auth else {
|
||||
return json_response(StatusCode::BAD_REQUEST, ());
|
||||
};
|
||||
// unwrap is ok because check is performed when creating config, so path is set and exists
|
||||
let key_path = config.auth_validation_public_key_path.as_ref().unwrap();
|
||||
info!("Reloading public key(s) for verifying JWT tokens from {key_path:?}");
|
||||
|
||||
match JwtAuth::from_key_path(key_path) {
|
||||
Ok(new_auth) => {
|
||||
shared_auth.swap(new_auth);
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Error reloading public keys from {key_path:?}: {e:}");
|
||||
json_response(StatusCode::INTERNAL_SERVER_ERROR, ())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn timeline_create_handler(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
@@ -369,7 +434,7 @@ async fn timeline_create_handler(
|
||||
let state = get_state(&request);
|
||||
|
||||
async {
|
||||
let tenant = mgr::get_tenant(tenant_id, true).await?;
|
||||
let tenant = mgr::get_tenant(tenant_id, true)?;
|
||||
match tenant.create_timeline(
|
||||
new_timeline_id,
|
||||
request_data.ancestor_timeline_id.map(TimelineId::from),
|
||||
@@ -397,6 +462,9 @@ async fn timeline_create_handler(
|
||||
Err(e @ tenant::CreateTimelineError::AncestorNotActive) => {
|
||||
json_response(StatusCode::SERVICE_UNAVAILABLE, HttpErrorBody::from_msg(e.to_string()))
|
||||
}
|
||||
Err(tenant::CreateTimelineError::ShuttingDown) => {
|
||||
json_response(StatusCode::SERVICE_UNAVAILABLE,HttpErrorBody::from_msg("tenant shutting down".to_string()))
|
||||
}
|
||||
Err(tenant::CreateTimelineError::Other(err)) => Err(ApiError::InternalServerError(err)),
|
||||
}
|
||||
}
|
||||
@@ -416,7 +484,7 @@ async fn timeline_list_handler(
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
|
||||
let response_data = async {
|
||||
let tenant = mgr::get_tenant(tenant_id, true).await?;
|
||||
let tenant = mgr::get_tenant(tenant_id, true)?;
|
||||
let timelines = tenant.list_timelines();
|
||||
|
||||
let mut response_data = Vec::with_capacity(timelines.len());
|
||||
@@ -455,7 +523,7 @@ async fn timeline_detail_handler(
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
|
||||
let timeline_info = async {
|
||||
let tenant = mgr::get_tenant(tenant_id, true).await?;
|
||||
let tenant = mgr::get_tenant(tenant_id, true)?;
|
||||
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, false)
|
||||
@@ -499,10 +567,8 @@ async fn get_lsn_by_timestamp_handler(
|
||||
let result = timeline.find_lsn_for_timestamp(timestamp_pg, &ctx).await?;
|
||||
|
||||
if version.unwrap_or(0) > 1 {
|
||||
#[serde_as]
|
||||
#[derive(serde::Serialize)]
|
||||
struct Result {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
lsn: Lsn,
|
||||
kind: &'static str,
|
||||
}
|
||||
@@ -681,6 +747,46 @@ async fn tenant_ignore_handler(
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn tenant_duplicate_handler(
|
||||
mut request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let src_tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
|
||||
let request_data: TenantCreateRequest = json_request(&mut request).await?;
|
||||
let new_tenant_id = request_data.new_tenant_id;
|
||||
check_permission(&request, None)?;
|
||||
|
||||
let _timer = STORAGE_TIME_GLOBAL
|
||||
.get_metric_with_label_values(&[StorageTimeOperation::DuplicateTenant.into()])
|
||||
.expect("bug")
|
||||
.start_timer();
|
||||
|
||||
let tenant_conf =
|
||||
TenantConfOpt::try_from(&request_data.config).map_err(ApiError::BadRequest)?;
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
let generation = get_request_generation(state, request_data.generation)?;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
|
||||
|
||||
mgr::duplicate_tenant(
|
||||
state.conf,
|
||||
tenant_conf,
|
||||
src_tenant_id,
|
||||
new_tenant_id,
|
||||
generation,
|
||||
state.tenant_resources(),
|
||||
&ctx,
|
||||
&cancel,
|
||||
)
|
||||
.instrument(info_span!("tenant_duplicate", %src_tenant_id, tenant_id = %new_tenant_id))
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::CREATED, TenantCreateResponse(new_tenant_id))
|
||||
}
|
||||
|
||||
async fn tenant_list_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
@@ -713,7 +819,7 @@ async fn tenant_status(
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant_info = async {
|
||||
let tenant = mgr::get_tenant(tenant_id, false).await?;
|
||||
let tenant = mgr::get_tenant(tenant_id, false)?;
|
||||
|
||||
// Calculate total physical size of all timelines
|
||||
let mut current_physical_size = 0;
|
||||
@@ -776,7 +882,7 @@ async fn tenant_size_handler(
|
||||
let headers = request.headers();
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let tenant = mgr::get_tenant(tenant_id, true).await?;
|
||||
let tenant = mgr::get_tenant(tenant_id, true)?;
|
||||
|
||||
// this can be long operation
|
||||
let inputs = tenant
|
||||
@@ -811,10 +917,8 @@ async fn tenant_size_handler(
|
||||
}
|
||||
|
||||
/// The type resides in the pageserver not to expose `ModelInputs`.
|
||||
#[serde_with::serde_as]
|
||||
#[derive(serde::Serialize)]
|
||||
struct TenantHistorySize {
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
id: TenantId,
|
||||
/// Size is a mixture of WAL and logical size, so the unit is bytes.
|
||||
///
|
||||
@@ -1035,7 +1139,7 @@ async fn get_tenant_config_handler(
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant = mgr::get_tenant(tenant_id, false).await?;
|
||||
let tenant = mgr::get_tenant(tenant_id, false)?;
|
||||
|
||||
let response = HashMap::from([
|
||||
(
|
||||
@@ -1094,7 +1198,7 @@ async fn put_tenant_location_config_handler(
|
||||
.await
|
||||
{
|
||||
match e {
|
||||
TenantStateError::NotFound(_) => {
|
||||
TenantStateError::SlotError(TenantSlotError::NotFound(_)) => {
|
||||
// This API is idempotent: a NotFound on a detach is fine.
|
||||
}
|
||||
_ => return Err(e.into()),
|
||||
@@ -1106,20 +1210,14 @@ async fn put_tenant_location_config_handler(
|
||||
let location_conf =
|
||||
LocationConf::try_from(&request_data.config).map_err(ApiError::BadRequest)?;
|
||||
|
||||
mgr::upsert_location(
|
||||
state.conf,
|
||||
tenant_id,
|
||||
location_conf,
|
||||
state.broker_client.clone(),
|
||||
state.remote_storage.clone(),
|
||||
state.deletion_queue_client.clone(),
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
// TODO: badrequest assumes the caller was asking for something unreasonable, but in
|
||||
// principle we might have hit something like concurrent API calls to the same tenant,
|
||||
// which is not a 400 but a 409.
|
||||
.map_err(ApiError::BadRequest)?;
|
||||
state
|
||||
.tenant_manager
|
||||
.upsert_location(tenant_id, location_conf, &ctx)
|
||||
.await
|
||||
// TODO: badrequest assumes the caller was asking for something unreasonable, but in
|
||||
// principle we might have hit something like concurrent API calls to the same tenant,
|
||||
// which is not a 400 but a 409.
|
||||
.map_err(ApiError::BadRequest)?;
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
@@ -1132,7 +1230,6 @@ async fn handle_tenant_break(
|
||||
let tenant_id: TenantId = parse_request_param(&r, "tenant_id")?;
|
||||
|
||||
let tenant = crate::tenant::mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(|_| ApiError::Conflict(String::from("no active tenant found")))?;
|
||||
|
||||
tenant.set_broken("broken from test".to_owned()).await;
|
||||
@@ -1437,7 +1534,7 @@ async fn active_timeline_of_active_tenant(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<Arc<Timeline>, ApiError> {
|
||||
let tenant = mgr::get_tenant(tenant_id, true).await?;
|
||||
let tenant = mgr::get_tenant(tenant_id, true)?;
|
||||
tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
.map_err(|e| ApiError::NotFound(e.into()))
|
||||
@@ -1662,7 +1759,7 @@ where
|
||||
pub fn make_router(
|
||||
state: Arc<State>,
|
||||
launch_ts: &'static LaunchTimestamp,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {
|
||||
let spec = include_bytes!("openapi_spec.yml");
|
||||
let mut router = attach_openapi_ui(endpoint::make_router(), spec, "/swagger.yml", "/v1/doc");
|
||||
@@ -1691,6 +1788,9 @@ pub fn make_router(
|
||||
.put("/v1/failpoints", |r| {
|
||||
testing_api_handler("manage failpoints", r, failpoints_handler)
|
||||
})
|
||||
.post("/v1/reload_auth_validation_keys", |r| {
|
||||
api_handler(r, reload_auth_validation_keys_handler)
|
||||
})
|
||||
.get("/v1/tenant", |r| api_handler(r, tenant_list_handler))
|
||||
.post("/v1/tenant", |r| api_handler(r, tenant_create_handler))
|
||||
.get("/v1/tenant/:tenant_id", |r| api_handler(r, tenant_status))
|
||||
@@ -1727,6 +1827,9 @@ pub fn make_router(
|
||||
.post("/v1/tenant/:tenant_id/ignore", |r| {
|
||||
api_handler(r, tenant_ignore_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/duplicate", |r| {
|
||||
api_handler(r, tenant_duplicate_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
|
||||
api_handler(r, timeline_detail_handler)
|
||||
})
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
mod auth;
|
||||
pub mod basebackup;
|
||||
pub mod config;
|
||||
@@ -61,14 +63,6 @@ pub async fn shutdown_pageserver(deletion_queue: Option<DeletionQueue>, exit_cod
|
||||
)
|
||||
.await;
|
||||
|
||||
// Shut down any page service tasks.
|
||||
timed(
|
||||
task_mgr::shutdown_tasks(Some(TaskKind::PageRequestHandler), None, None),
|
||||
"shutdown PageRequestHandlers",
|
||||
Duration::from_secs(1),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Shut down all the tenants. This flushes everything to disk and kills
|
||||
// the checkpoint and GC tasks.
|
||||
timed(
|
||||
@@ -78,6 +72,15 @@ pub async fn shutdown_pageserver(deletion_queue: Option<DeletionQueue>, exit_cod
|
||||
)
|
||||
.await;
|
||||
|
||||
// Shut down any page service tasks: any in-progress work for particular timelines or tenants
|
||||
// should already have been canclled via mgr::shutdown_all_tenants
|
||||
timed(
|
||||
task_mgr::shutdown_tasks(Some(TaskKind::PageRequestHandler), None, None),
|
||||
"shutdown PageRequestHandlers",
|
||||
Duration::from_secs(1),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Best effort to persist any outstanding deletions, to avoid leaking objects
|
||||
if let Some(mut deletion_queue) = deletion_queue {
|
||||
deletion_queue.shutdown(Duration::from_secs(5)).await;
|
||||
|
||||
@@ -51,6 +51,9 @@ pub enum StorageTimeOperation {
|
||||
|
||||
#[strum(serialize = "create tenant")]
|
||||
CreateTenant,
|
||||
|
||||
#[strum(serialize = "duplicate tenant")]
|
||||
DuplicateTenant,
|
||||
}
|
||||
|
||||
pub static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
|
||||
@@ -757,6 +760,7 @@ pub enum SmgrQueryType {
|
||||
GetRelSize,
|
||||
GetPageAtLsn,
|
||||
GetDbSize,
|
||||
NoOp,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -962,6 +966,32 @@ static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) struct TenantManagerMetrics {
|
||||
pub(crate) tenant_slots: UIntGauge,
|
||||
pub(crate) tenant_slot_writes: IntCounter,
|
||||
pub(crate) unexpected_errors: IntCounter,
|
||||
}
|
||||
|
||||
pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
|
||||
TenantManagerMetrics {
|
||||
tenant_slots: register_uint_gauge!(
|
||||
"pageserver_tenant_manager_slots",
|
||||
"How many slots currently exist, including all attached, secondary and in-progress operations",
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
tenant_slot_writes: register_int_counter!(
|
||||
"pageserver_tenant_manager_slot_writes",
|
||||
"Writes to a tenant slot, including all of create/attach/detach/delete"
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
unexpected_errors: register_int_counter!(
|
||||
"pageserver_tenant_manager_unexpected_errors_total",
|
||||
"Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
}
|
||||
});
|
||||
|
||||
pub(crate) struct DeletionQueueMetrics {
|
||||
pub(crate) keys_submitted: IntCounter,
|
||||
pub(crate) keys_dropped: IntCounter,
|
||||
@@ -1884,6 +1914,9 @@ pub fn preinitialize_metrics() {
|
||||
// Deletion queue stats
|
||||
Lazy::force(&DELETION_QUEUE);
|
||||
|
||||
// Tenant manager stats
|
||||
Lazy::force(&TENANT_MANAGER);
|
||||
|
||||
// countervecs
|
||||
[&BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT]
|
||||
.into_iter()
|
||||
|
||||
@@ -40,7 +40,7 @@ use tracing::field;
|
||||
use tracing::*;
|
||||
use utils::id::ConnectionId;
|
||||
use utils::{
|
||||
auth::{Claims, JwtAuth, Scope},
|
||||
auth::{Claims, Scope, SwappableJwtAuth},
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
simple_rcu::RcuReadGuard,
|
||||
@@ -55,16 +55,20 @@ use crate::metrics;
|
||||
use crate::metrics::LIVE_CONNECTIONS_COUNT;
|
||||
use crate::task_mgr;
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant;
|
||||
use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::tenant::mgr;
|
||||
use crate::tenant::mgr::GetTenantError;
|
||||
use crate::tenant::{Tenant, Timeline};
|
||||
use crate::tenant::mgr::get_active_tenant_with_timeout;
|
||||
use crate::tenant::mgr::GetActiveTenantError;
|
||||
use crate::tenant::Timeline;
|
||||
use crate::trace::Tracer;
|
||||
|
||||
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
|
||||
// How long we may block waiting for a [`TenantSlot::InProgress`]` and/or a [`Tenant`] which
|
||||
// is not yet in state [`TenantState::Active`].
|
||||
const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(5000);
|
||||
|
||||
/// Read the end of a tar archive.
|
||||
///
|
||||
/// A tar archive normally ends with two consecutive blocks of zeros, 512 bytes each.
|
||||
@@ -118,7 +122,7 @@ async fn read_tar_eof(mut reader: (impl AsyncRead + Unpin)) -> anyhow::Result<()
|
||||
pub async fn libpq_listener_main(
|
||||
conf: &'static PageServerConf,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
listener: TcpListener,
|
||||
auth_type: AuthType,
|
||||
listener_ctx: RequestContext,
|
||||
@@ -186,7 +190,7 @@ pub async fn libpq_listener_main(
|
||||
async fn page_service_conn_main(
|
||||
conf: &'static PageServerConf,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
socket: tokio::net::TcpStream,
|
||||
auth_type: AuthType,
|
||||
connection_ctx: RequestContext,
|
||||
@@ -223,13 +227,7 @@ async fn page_service_conn_main(
|
||||
// and create a child per-query context when it invokes process_query.
|
||||
// But it's in a shared crate, so, we store connection_ctx inside PageServerHandler
|
||||
// and create the per-query context in process_query ourselves.
|
||||
let mut conn_handler = PageServerHandler::new(
|
||||
conf,
|
||||
broker_client,
|
||||
auth,
|
||||
connection_ctx,
|
||||
task_mgr::shutdown_token(),
|
||||
);
|
||||
let mut conn_handler = PageServerHandler::new(conf, broker_client, auth, connection_ctx);
|
||||
let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?;
|
||||
|
||||
match pgbackend
|
||||
@@ -255,7 +253,7 @@ async fn page_service_conn_main(
|
||||
struct PageServerHandler {
|
||||
_conf: &'static PageServerConf,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
claims: Option<Claims>,
|
||||
|
||||
/// The context created for the lifetime of the connection
|
||||
@@ -263,19 +261,14 @@ struct PageServerHandler {
|
||||
/// For each query received over the connection,
|
||||
/// `process_query` creates a child context from this one.
|
||||
connection_ctx: RequestContext,
|
||||
|
||||
/// A token that should fire when the tenant transitions from
|
||||
/// attached state, or when the pageserver is shutting down.
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
impl PageServerHandler {
|
||||
pub fn new(
|
||||
conf: &'static PageServerConf,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
connection_ctx: RequestContext,
|
||||
cancel: CancellationToken,
|
||||
) -> Self {
|
||||
PageServerHandler {
|
||||
_conf: conf,
|
||||
@@ -283,7 +276,6 @@ impl PageServerHandler {
|
||||
auth,
|
||||
claims: None,
|
||||
connection_ctx,
|
||||
cancel,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -291,7 +283,11 @@ impl PageServerHandler {
|
||||
/// this rather than naked flush() in order to shut down promptly. Without this, we would
|
||||
/// block shutdown of a tenant if a postgres client was failing to consume bytes we send
|
||||
/// in the flush.
|
||||
async fn flush_cancellable<IO>(&self, pgb: &mut PostgresBackend<IO>) -> Result<(), QueryError>
|
||||
async fn flush_cancellable<IO>(
|
||||
&self,
|
||||
pgb: &mut PostgresBackend<IO>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(), QueryError>
|
||||
where
|
||||
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
|
||||
{
|
||||
@@ -299,7 +295,7 @@ impl PageServerHandler {
|
||||
flush_r = pgb.flush() => {
|
||||
Ok(flush_r?)
|
||||
},
|
||||
_ = self.cancel.cancelled() => {
|
||||
_ = cancel.cancelled() => {
|
||||
Err(QueryError::Shutdown)
|
||||
}
|
||||
)
|
||||
@@ -308,6 +304,7 @@ impl PageServerHandler {
|
||||
fn copyin_stream<'a, IO>(
|
||||
&'a self,
|
||||
pgb: &'a mut PostgresBackend<IO>,
|
||||
cancel: &'a CancellationToken,
|
||||
) -> impl Stream<Item = io::Result<Bytes>> + 'a
|
||||
where
|
||||
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
|
||||
@@ -317,7 +314,7 @@ impl PageServerHandler {
|
||||
let msg = tokio::select! {
|
||||
biased;
|
||||
|
||||
_ = self.cancel.cancelled() => {
|
||||
_ = cancel.cancelled() => {
|
||||
// We were requested to shut down.
|
||||
let msg = "pageserver is shutting down";
|
||||
let _ = pgb.write_message_noflush(&BeMessage::ErrorResponse(msg, None));
|
||||
@@ -357,7 +354,7 @@ impl PageServerHandler {
|
||||
let query_error = QueryError::Disconnected(ConnectionError::Io(io::Error::new(io::ErrorKind::ConnectionReset, msg)));
|
||||
// error can't happen here, ErrorResponse serialization should be always ok
|
||||
pgb.write_message_noflush(&BeMessage::ErrorResponse(msg, Some(query_error.pg_error_code()))).map_err(|e| e.into_io_error())?;
|
||||
self.flush_cancellable(pgb).await.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
||||
self.flush_cancellable(pgb, cancel).await.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
|
||||
Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
|
||||
}
|
||||
Err(QueryError::Disconnected(ConnectionError::Io(io_error))) => {
|
||||
@@ -384,12 +381,13 @@ impl PageServerHandler {
|
||||
{
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
// NOTE: pagerequests handler exits when connection is closed,
|
||||
// so there is no need to reset the association
|
||||
task_mgr::associate_with(Some(tenant_id), Some(timeline_id));
|
||||
|
||||
// Make request tracer if needed
|
||||
let tenant = get_active_tenant_with_timeout(tenant_id, &ctx).await?;
|
||||
let tenant = mgr::get_active_tenant_with_timeout(
|
||||
tenant_id,
|
||||
ACTIVE_TENANT_TIMEOUT,
|
||||
&task_mgr::shutdown_token(),
|
||||
)
|
||||
.await?;
|
||||
let mut tracer = if tenant.get_trace_read_requests() {
|
||||
let connection_id = ConnectionId::generate();
|
||||
let path = tenant
|
||||
@@ -405,9 +403,14 @@ impl PageServerHandler {
|
||||
.get_timeline(timeline_id, true)
|
||||
.map_err(|e| anyhow::anyhow!(e))?;
|
||||
|
||||
// Avoid starting new requests if the timeline has already started shutting down,
|
||||
// and block timeline shutdown until this request is complete, or drops out due
|
||||
// to cancellation.
|
||||
let _timeline_guard = timeline.gate.enter().map_err(|_| QueryError::Shutdown)?;
|
||||
|
||||
// switch client to COPYBOTH
|
||||
pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
|
||||
self.flush_cancellable(pgb).await?;
|
||||
self.flush_cancellable(pgb, &timeline.cancel).await?;
|
||||
|
||||
let metrics = metrics::SmgrQueryTimePerTimeline::new(&tenant_id, &timeline_id);
|
||||
|
||||
@@ -415,7 +418,7 @@ impl PageServerHandler {
|
||||
let msg = tokio::select! {
|
||||
biased;
|
||||
|
||||
_ = self.cancel.cancelled() => {
|
||||
_ = timeline.cancel.cancelled() => {
|
||||
// We were requested to shut down.
|
||||
info!("shutdown request received in page handler");
|
||||
return Err(QueryError::Shutdown)
|
||||
@@ -488,11 +491,27 @@ impl PageServerHandler {
|
||||
span,
|
||||
)
|
||||
}
|
||||
PagestreamFeMessage::NoOp => {
|
||||
let _timer = metrics.start_timer(metrics::SmgrQueryType::NoOp);
|
||||
let span = tracing::info_span!("no_op");
|
||||
(Ok(PagestreamBeMessage::NoOp), span)
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = &response {
|
||||
if timeline.cancel.is_cancelled() {
|
||||
// If we fail to fulfil a request during shutdown, which may be _because_ of
|
||||
// shutdown, then do not send the error to the client. Instead just drop the
|
||||
// connection.
|
||||
span.in_scope(|| info!("dropped response during shutdown: {e:#}"));
|
||||
return Err(QueryError::Shutdown);
|
||||
}
|
||||
}
|
||||
|
||||
let response = response.unwrap_or_else(|e| {
|
||||
// print the all details to the log with {:#}, but for the client the
|
||||
// error message is enough
|
||||
// error message is enough. Do not log if shutting down, as the anyhow::Error
|
||||
// here includes cancellation which is not an error.
|
||||
span.in_scope(|| error!("error reading relation or page version: {:#}", e));
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
@@ -500,7 +519,7 @@ impl PageServerHandler {
|
||||
});
|
||||
|
||||
pgb.write_message_noflush(&BeMessage::CopyData(&response.serialize()))?;
|
||||
self.flush_cancellable(pgb).await?;
|
||||
self.flush_cancellable(pgb, &timeline.cancel).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -522,10 +541,14 @@ impl PageServerHandler {
|
||||
{
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
task_mgr::associate_with(Some(tenant_id), Some(timeline_id));
|
||||
// Create empty timeline
|
||||
info!("creating new timeline");
|
||||
let tenant = get_active_tenant_with_timeout(tenant_id, &ctx).await?;
|
||||
let tenant = get_active_tenant_with_timeout(
|
||||
tenant_id,
|
||||
ACTIVE_TENANT_TIMEOUT,
|
||||
&task_mgr::shutdown_token(),
|
||||
)
|
||||
.await?;
|
||||
let timeline = tenant
|
||||
.create_empty_timeline(timeline_id, base_lsn, pg_version, &ctx)
|
||||
.await?;
|
||||
@@ -543,9 +566,9 @@ impl PageServerHandler {
|
||||
// Import basebackup provided via CopyData
|
||||
info!("importing basebackup");
|
||||
pgb.write_message_noflush(&BeMessage::CopyInResponse)?;
|
||||
self.flush_cancellable(pgb).await?;
|
||||
self.flush_cancellable(pgb, &tenant.cancel).await?;
|
||||
|
||||
let mut copyin_reader = pin!(StreamReader::new(self.copyin_stream(pgb)));
|
||||
let mut copyin_reader = pin!(StreamReader::new(self.copyin_stream(pgb, &tenant.cancel)));
|
||||
timeline
|
||||
.import_basebackup_from_tar(
|
||||
&mut copyin_reader,
|
||||
@@ -582,9 +605,10 @@ impl PageServerHandler {
|
||||
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
|
||||
{
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
task_mgr::associate_with(Some(tenant_id), Some(timeline_id));
|
||||
|
||||
let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?;
|
||||
let timeline = self
|
||||
.get_active_tenant_timeline(tenant_id, timeline_id)
|
||||
.await?;
|
||||
let last_record_lsn = timeline.get_last_record_lsn();
|
||||
if last_record_lsn != start_lsn {
|
||||
return Err(QueryError::Other(
|
||||
@@ -598,8 +622,8 @@ impl PageServerHandler {
|
||||
// Import wal provided via CopyData
|
||||
info!("importing wal");
|
||||
pgb.write_message_noflush(&BeMessage::CopyInResponse)?;
|
||||
self.flush_cancellable(pgb).await?;
|
||||
let mut copyin_reader = pin!(StreamReader::new(self.copyin_stream(pgb)));
|
||||
self.flush_cancellable(pgb, &timeline.cancel).await?;
|
||||
let mut copyin_reader = pin!(StreamReader::new(self.copyin_stream(pgb, &timeline.cancel)));
|
||||
import_wal_from_tar(&timeline, &mut copyin_reader, start_lsn, end_lsn, &ctx).await?;
|
||||
info!("wal import complete");
|
||||
|
||||
@@ -792,7 +816,9 @@ impl PageServerHandler {
|
||||
let started = std::time::Instant::now();
|
||||
|
||||
// check that the timeline exists
|
||||
let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?;
|
||||
let timeline = self
|
||||
.get_active_tenant_timeline(tenant_id, timeline_id)
|
||||
.await?;
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
if let Some(lsn) = lsn {
|
||||
// Backup was requested at a particular LSN. Wait for it to arrive.
|
||||
@@ -807,7 +833,7 @@ impl PageServerHandler {
|
||||
|
||||
// switch client to COPYOUT
|
||||
pgb.write_message_noflush(&BeMessage::CopyOutResponse)?;
|
||||
self.flush_cancellable(pgb).await?;
|
||||
self.flush_cancellable(pgb, &timeline.cancel).await?;
|
||||
|
||||
// Send a tarball of the latest layer on the timeline. Compress if not
|
||||
// fullbackup. TODO Compress in that case too (tests need to be updated)
|
||||
@@ -859,7 +885,7 @@ impl PageServerHandler {
|
||||
}
|
||||
|
||||
pgb.write_message_noflush(&BeMessage::CopyDone)?;
|
||||
self.flush_cancellable(pgb).await?;
|
||||
self.flush_cancellable(pgb, &timeline.cancel).await?;
|
||||
|
||||
let basebackup_after = started
|
||||
.elapsed()
|
||||
@@ -891,6 +917,25 @@ impl PageServerHandler {
|
||||
.expect("claims presence already checked");
|
||||
check_permission(claims, tenant_id)
|
||||
}
|
||||
|
||||
/// Shorthand for getting a reference to a Timeline of an Active tenant.
|
||||
async fn get_active_tenant_timeline(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<Arc<Timeline>, GetActiveTimelineError> {
|
||||
let tenant = get_active_tenant_with_timeout(
|
||||
tenant_id,
|
||||
ACTIVE_TENANT_TIMEOUT,
|
||||
&task_mgr::shutdown_token(),
|
||||
)
|
||||
.await
|
||||
.map_err(GetActiveTimelineError::Tenant)?;
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
.map_err(|e| GetActiveTimelineError::Timeline(anyhow::anyhow!(e)))?;
|
||||
Ok(timeline)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -1048,7 +1093,9 @@ where
|
||||
.record("timeline_id", field::display(timeline_id));
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?;
|
||||
let timeline = self
|
||||
.get_active_tenant_timeline(tenant_id, timeline_id)
|
||||
.await?;
|
||||
|
||||
let end_of_timeline = timeline.get_last_record_rlsn();
|
||||
|
||||
@@ -1232,7 +1279,12 @@ where
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
|
||||
let tenant = get_active_tenant_with_timeout(tenant_id, &ctx).await?;
|
||||
let tenant = get_active_tenant_with_timeout(
|
||||
tenant_id,
|
||||
ACTIVE_TENANT_TIMEOUT,
|
||||
&task_mgr::shutdown_token(),
|
||||
)
|
||||
.await?;
|
||||
pgb.write_message_noflush(&BeMessage::RowDescription(&[
|
||||
RowDescriptor::int8_col(b"checkpoint_distance"),
|
||||
RowDescriptor::int8_col(b"checkpoint_timeout"),
|
||||
@@ -1278,67 +1330,16 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
enum GetActiveTenantError {
|
||||
#[error(
|
||||
"Timed out waiting {wait_time:?} for tenant active state. Latest state: {latest_state:?}"
|
||||
)]
|
||||
WaitForActiveTimeout {
|
||||
latest_state: TenantState,
|
||||
wait_time: Duration,
|
||||
},
|
||||
#[error(transparent)]
|
||||
NotFound(GetTenantError),
|
||||
#[error(transparent)]
|
||||
WaitTenantActive(tenant::WaitToBecomeActiveError),
|
||||
}
|
||||
|
||||
impl From<GetActiveTenantError> for QueryError {
|
||||
fn from(e: GetActiveTenantError) -> Self {
|
||||
match e {
|
||||
GetActiveTenantError::WaitForActiveTimeout { .. } => QueryError::Disconnected(
|
||||
ConnectionError::Io(io::Error::new(io::ErrorKind::TimedOut, e.to_string())),
|
||||
),
|
||||
GetActiveTenantError::WaitTenantActive(e) => QueryError::Other(anyhow::Error::new(e)),
|
||||
GetActiveTenantError::NotFound(e) => QueryError::Other(anyhow::Error::new(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get active tenant.
|
||||
///
|
||||
/// If the tenant is Loading, waits for it to become Active, for up to 30 s. That
|
||||
/// ensures that queries don't fail immediately after pageserver startup, because
|
||||
/// all tenants are still loading.
|
||||
async fn get_active_tenant_with_timeout(
|
||||
tenant_id: TenantId,
|
||||
_ctx: &RequestContext, /* require get a context to support cancellation in the future */
|
||||
) -> Result<Arc<Tenant>, GetActiveTenantError> {
|
||||
let tenant = match mgr::get_tenant(tenant_id, false).await {
|
||||
Ok(tenant) => tenant,
|
||||
Err(e @ GetTenantError::NotFound(_)) => return Err(GetActiveTenantError::NotFound(e)),
|
||||
Err(GetTenantError::NotActive(_)) => {
|
||||
unreachable!("we're calling get_tenant with active_only=false")
|
||||
}
|
||||
Err(GetTenantError::Broken(_)) => {
|
||||
unreachable!("we're calling get_tenant with active_only=false")
|
||||
}
|
||||
};
|
||||
let wait_time = Duration::from_secs(30);
|
||||
match tokio::time::timeout(wait_time, tenant.wait_to_become_active()).await {
|
||||
Ok(Ok(())) => Ok(tenant),
|
||||
// no .context(), the error message is good enough and some tests depend on it
|
||||
Ok(Err(e)) => Err(GetActiveTenantError::WaitTenantActive(e)),
|
||||
Err(_) => {
|
||||
let latest_state = tenant.current_state();
|
||||
if latest_state == TenantState::Active {
|
||||
Ok(tenant)
|
||||
} else {
|
||||
Err(GetActiveTenantError::WaitForActiveTimeout {
|
||||
latest_state,
|
||||
wait_time,
|
||||
})
|
||||
GetActiveTenantError::WillNotBecomeActive(TenantState::Stopping { .. }) => {
|
||||
QueryError::Shutdown
|
||||
}
|
||||
e => QueryError::Other(anyhow::anyhow!(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1359,18 +1360,3 @@ impl From<GetActiveTimelineError> for QueryError {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Shorthand for getting a reference to a Timeline of an Active tenant.
|
||||
async fn get_active_tenant_timeline(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Arc<Timeline>, GetActiveTimelineError> {
|
||||
let tenant = get_active_tenant_with_timeout(tenant_id, ctx)
|
||||
.await
|
||||
.map_err(GetActiveTimelineError::Tenant)?;
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
.map_err(|e| GetActiveTimelineError::Timeline(anyhow::anyhow!(e)))?;
|
||||
Ok(timeline)
|
||||
}
|
||||
|
||||
@@ -44,6 +44,17 @@ pub enum CalculateLogicalSizeError {
|
||||
Other(#[from] anyhow::Error),
|
||||
}
|
||||
|
||||
impl From<PageReconstructError> for CalculateLogicalSizeError {
|
||||
fn from(pre: PageReconstructError) -> Self {
|
||||
match pre {
|
||||
PageReconstructError::AncestorStopping(_) | PageReconstructError::Cancelled => {
|
||||
Self::Cancelled
|
||||
}
|
||||
_ => Self::Other(pre.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum RelationError {
|
||||
#[error("Relation Already Exists")]
|
||||
@@ -573,24 +584,17 @@ impl Timeline {
|
||||
crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
// Fetch list of database dirs and iterate them
|
||||
let buf = self.get(DBDIR_KEY, lsn, ctx).await.context("read dbdir")?;
|
||||
let buf = self.get(DBDIR_KEY, lsn, ctx).await?;
|
||||
let dbdir = DbDirectory::des(&buf).context("deserialize db directory")?;
|
||||
|
||||
let mut total_size: u64 = 0;
|
||||
for (spcnode, dbnode) in dbdir.dbdirs.keys() {
|
||||
for rel in self
|
||||
.list_rels(*spcnode, *dbnode, lsn, ctx)
|
||||
.await
|
||||
.context("list rels")?
|
||||
{
|
||||
for rel in self.list_rels(*spcnode, *dbnode, lsn, ctx).await? {
|
||||
if cancel.is_cancelled() {
|
||||
return Err(CalculateLogicalSizeError::Cancelled);
|
||||
}
|
||||
let relsize_key = rel_size_to_key(rel);
|
||||
let mut buf = self
|
||||
.get(relsize_key, lsn, ctx)
|
||||
.await
|
||||
.with_context(|| format!("read relation size of {rel:?}"))?;
|
||||
let mut buf = self.get(relsize_key, lsn, ctx).await?;
|
||||
let relsize = buf.get_u32_le();
|
||||
|
||||
total_size += relsize as u64;
|
||||
@@ -1696,6 +1700,7 @@ const AUX_FILES_KEY: Key = Key {
|
||||
// Reverse mappings for a few Keys.
|
||||
// These are needed by WAL redo manager.
|
||||
|
||||
/// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
|
||||
pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
|
||||
Ok(match key.field1 {
|
||||
0x00 => (
|
||||
@@ -1711,7 +1716,8 @@ pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
|
||||
})
|
||||
}
|
||||
|
||||
fn is_rel_block_key(key: Key) -> bool {
|
||||
/// See [[key_to_rel_block]].
|
||||
pub fn is_rel_block_key(key: Key) -> bool {
|
||||
key.field1 == 0x00 && key.field4 != 0
|
||||
}
|
||||
|
||||
|
||||
@@ -299,10 +299,6 @@ pub enum TaskKind {
|
||||
|
||||
#[derive(Default)]
|
||||
struct MutableTaskState {
|
||||
/// Tenant and timeline that this task is associated with.
|
||||
tenant_id: Option<TenantId>,
|
||||
timeline_id: Option<TimelineId>,
|
||||
|
||||
/// Handle for waiting for the task to exit. It can be None, if the
|
||||
/// the task has already exited.
|
||||
join_handle: Option<JoinHandle<()>>,
|
||||
@@ -319,6 +315,11 @@ struct PageServerTask {
|
||||
// To request task shutdown, just cancel this token.
|
||||
cancel: CancellationToken,
|
||||
|
||||
/// Tasks may optionally be launched for a particular tenant/timeline, enabling
|
||||
/// later cancelling tasks for that tenant/timeline in [`shutdown_tasks`]
|
||||
tenant_id: Option<TenantId>,
|
||||
timeline_id: Option<TimelineId>,
|
||||
|
||||
mutable: Mutex<MutableTaskState>,
|
||||
}
|
||||
|
||||
@@ -344,11 +345,9 @@ where
|
||||
kind,
|
||||
name: name.to_string(),
|
||||
cancel: cancel.clone(),
|
||||
mutable: Mutex::new(MutableTaskState {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
join_handle: None,
|
||||
}),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
mutable: Mutex::new(MutableTaskState { join_handle: None }),
|
||||
});
|
||||
|
||||
TASKS.lock().unwrap().insert(task_id, Arc::clone(&task));
|
||||
@@ -418,8 +417,6 @@ async fn task_finish(
|
||||
|
||||
let mut shutdown_process = false;
|
||||
{
|
||||
let task_mut = task.mutable.lock().unwrap();
|
||||
|
||||
match result {
|
||||
Ok(Ok(())) => {
|
||||
debug!("Task '{}' exited normally", task_name);
|
||||
@@ -428,13 +425,13 @@ async fn task_finish(
|
||||
if shutdown_process_on_error {
|
||||
error!(
|
||||
"Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
|
||||
task_name, task_mut.tenant_id, task_mut.timeline_id, err
|
||||
task_name, task.tenant_id, task.timeline_id, err
|
||||
);
|
||||
shutdown_process = true;
|
||||
} else {
|
||||
error!(
|
||||
"Task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
|
||||
task_name, task_mut.tenant_id, task_mut.timeline_id, err
|
||||
task_name, task.tenant_id, task.timeline_id, err
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -442,13 +439,13 @@ async fn task_finish(
|
||||
if shutdown_process_on_error {
|
||||
error!(
|
||||
"Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
|
||||
task_name, task_mut.tenant_id, task_mut.timeline_id, err
|
||||
task_name, task.tenant_id, task.timeline_id, err
|
||||
);
|
||||
shutdown_process = true;
|
||||
} else {
|
||||
error!(
|
||||
"Task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
|
||||
task_name, task_mut.tenant_id, task_mut.timeline_id, err
|
||||
task_name, task.tenant_id, task.timeline_id, err
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -460,17 +457,6 @@ async fn task_finish(
|
||||
}
|
||||
}
|
||||
|
||||
// expected to be called from the task of the given id.
|
||||
pub fn associate_with(tenant_id: Option<TenantId>, timeline_id: Option<TimelineId>) {
|
||||
CURRENT_TASK.with(|ct| {
|
||||
let mut task_mut = ct.mutable.lock().unwrap();
|
||||
task_mut.tenant_id = tenant_id;
|
||||
task_mut.timeline_id = timeline_id;
|
||||
});
|
||||
}
|
||||
|
||||
/// Is there a task running that matches the criteria
|
||||
|
||||
/// Signal and wait for tasks to shut down.
|
||||
///
|
||||
///
|
||||
@@ -493,17 +479,16 @@ pub async fn shutdown_tasks(
|
||||
{
|
||||
let tasks = TASKS.lock().unwrap();
|
||||
for task in tasks.values() {
|
||||
let task_mut = task.mutable.lock().unwrap();
|
||||
if (kind.is_none() || Some(task.kind) == kind)
|
||||
&& (tenant_id.is_none() || task_mut.tenant_id == tenant_id)
|
||||
&& (timeline_id.is_none() || task_mut.timeline_id == timeline_id)
|
||||
&& (tenant_id.is_none() || task.tenant_id == tenant_id)
|
||||
&& (timeline_id.is_none() || task.timeline_id == timeline_id)
|
||||
{
|
||||
task.cancel.cancel();
|
||||
victim_tasks.push((
|
||||
Arc::clone(task),
|
||||
task.kind,
|
||||
task_mut.tenant_id,
|
||||
task_mut.timeline_id,
|
||||
task.tenant_id,
|
||||
task.timeline_id,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ use tracing::*;
|
||||
use utils::completion;
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::fs_ext;
|
||||
use utils::sync::gate::Gate;
|
||||
|
||||
use std::cmp::min;
|
||||
use std::collections::hash_map::Entry;
|
||||
@@ -54,6 +55,8 @@ use self::config::TenantConf;
|
||||
use self::delete::DeleteTenantFlow;
|
||||
use self::metadata::LoadMetadataError;
|
||||
use self::metadata::TimelineMetadata;
|
||||
use self::mgr::GetActiveTenantError;
|
||||
use self::mgr::GetTenantError;
|
||||
use self::mgr::TenantsMap;
|
||||
use self::remote_timeline_client::RemoteTimelineClient;
|
||||
use self::timeline::uninit::TimelineUninitMark;
|
||||
@@ -252,6 +255,20 @@ pub struct Tenant {
|
||||
eviction_task_tenant_state: tokio::sync::Mutex<EvictionTaskTenantState>,
|
||||
|
||||
pub(crate) delete_progress: Arc<tokio::sync::Mutex<DeleteTenantFlow>>,
|
||||
|
||||
// Cancellation token fires when we have entered shutdown(). This is a parent of
|
||||
// Timelines' cancellation token.
|
||||
pub(crate) cancel: CancellationToken,
|
||||
|
||||
// Users of the Tenant such as the page service must take this Gate to avoid
|
||||
// trying to use a Tenant which is shutting down.
|
||||
pub(crate) gate: Gate,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Tenant {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{} ({})", self.tenant_id, self.current_state())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) enum WalRedoManager {
|
||||
@@ -359,34 +376,6 @@ impl Debug for SetStoppingError {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum WaitToBecomeActiveError {
|
||||
WillNotBecomeActive {
|
||||
tenant_id: TenantId,
|
||||
state: TenantState,
|
||||
},
|
||||
TenantDropped {
|
||||
tenant_id: TenantId,
|
||||
},
|
||||
}
|
||||
|
||||
impl std::fmt::Display for WaitToBecomeActiveError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
WaitToBecomeActiveError::WillNotBecomeActive { tenant_id, state } => {
|
||||
write!(
|
||||
f,
|
||||
"Tenant {} will not become active. Current state: {:?}",
|
||||
tenant_id, state
|
||||
)
|
||||
}
|
||||
WaitToBecomeActiveError::TenantDropped { tenant_id } => {
|
||||
write!(f, "Tenant {tenant_id} will not become active (dropped)")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum CreateTimelineError {
|
||||
#[error("a timeline with the given ID already exists")]
|
||||
@@ -395,6 +384,8 @@ pub enum CreateTimelineError {
|
||||
AncestorLsn(anyhow::Error),
|
||||
#[error("ancestor timeline is not active")]
|
||||
AncestorNotActive,
|
||||
#[error("tenant shutting down")]
|
||||
ShuttingDown,
|
||||
#[error(transparent)]
|
||||
Other(#[from] anyhow::Error),
|
||||
}
|
||||
@@ -526,7 +517,7 @@ impl Tenant {
|
||||
resources: TenantSharedResources,
|
||||
attached_conf: AttachedTenantConf,
|
||||
init_order: Option<InitializationOrder>,
|
||||
tenants: &'static tokio::sync::RwLock<TenantsMap>,
|
||||
tenants: &'static std::sync::RwLock<TenantsMap>,
|
||||
mode: SpawnMode,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Arc<Tenant>> {
|
||||
@@ -1524,6 +1515,11 @@ impl Tenant {
|
||||
)));
|
||||
}
|
||||
|
||||
let _gate = self
|
||||
.gate
|
||||
.enter()
|
||||
.map_err(|_| CreateTimelineError::ShuttingDown)?;
|
||||
|
||||
if let Ok(existing) = self.get_timeline(new_timeline_id, false) {
|
||||
debug!("timeline {new_timeline_id} already exists");
|
||||
|
||||
@@ -1808,6 +1804,7 @@ impl Tenant {
|
||||
freeze_and_flush: bool,
|
||||
) -> Result<(), completion::Barrier> {
|
||||
span::debug_assert_current_span_has_tenant_id();
|
||||
|
||||
// Set tenant (and its timlines) to Stoppping state.
|
||||
//
|
||||
// Since we can only transition into Stopping state after activation is complete,
|
||||
@@ -1833,6 +1830,7 @@ impl Tenant {
|
||||
}
|
||||
Err(SetStoppingError::AlreadyStopping(other)) => {
|
||||
// give caller the option to wait for this this shutdown
|
||||
info!("Tenant::shutdown: AlreadyStopping");
|
||||
return Err(other);
|
||||
}
|
||||
};
|
||||
@@ -1846,6 +1844,7 @@ impl Tenant {
|
||||
js.spawn(async move { timeline.shutdown(freeze_and_flush).instrument(span).await });
|
||||
})
|
||||
};
|
||||
tracing::info!("Waiting for timelines...");
|
||||
while let Some(res) = js.join_next().await {
|
||||
match res {
|
||||
Ok(()) => {}
|
||||
@@ -1855,12 +1854,21 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
|
||||
// We cancel the Tenant's cancellation token _after_ the timelines have all shut down. This permits
|
||||
// them to continue to do work during their shutdown methods, e.g. flushing data.
|
||||
tracing::debug!("Cancelling CancellationToken");
|
||||
self.cancel.cancel();
|
||||
|
||||
// shutdown all tenant and timeline tasks: gc, compaction, page service
|
||||
// No new tasks will be started for this tenant because it's in `Stopping` state.
|
||||
//
|
||||
// this will additionally shutdown and await all timeline tasks.
|
||||
tracing::debug!("Waiting for tasks...");
|
||||
task_mgr::shutdown_tasks(None, Some(self.tenant_id), None).await;
|
||||
|
||||
// Wait for any in-flight operations to complete
|
||||
self.gate.close().await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -2021,7 +2029,7 @@ impl Tenant {
|
||||
self.state.subscribe()
|
||||
}
|
||||
|
||||
pub(crate) async fn wait_to_become_active(&self) -> Result<(), WaitToBecomeActiveError> {
|
||||
pub(crate) async fn wait_to_become_active(&self) -> Result<(), GetActiveTenantError> {
|
||||
let mut receiver = self.state.subscribe();
|
||||
loop {
|
||||
let current_state = receiver.borrow_and_update().clone();
|
||||
@@ -2029,11 +2037,9 @@ impl Tenant {
|
||||
TenantState::Loading | TenantState::Attaching | TenantState::Activating(_) => {
|
||||
// in these states, there's a chance that we can reach ::Active
|
||||
receiver.changed().await.map_err(
|
||||
|_e: tokio::sync::watch::error::RecvError| {
|
||||
WaitToBecomeActiveError::TenantDropped {
|
||||
tenant_id: self.tenant_id,
|
||||
}
|
||||
},
|
||||
|_e: tokio::sync::watch::error::RecvError|
|
||||
// Tenant existed but was dropped: report it as non-existent
|
||||
GetActiveTenantError::NotFound(GetTenantError::NotFound(self.tenant_id))
|
||||
)?;
|
||||
}
|
||||
TenantState::Active { .. } => {
|
||||
@@ -2041,10 +2047,7 @@ impl Tenant {
|
||||
}
|
||||
TenantState::Broken { .. } | TenantState::Stopping { .. } => {
|
||||
// There's no chance the tenant can transition back into ::Active
|
||||
return Err(WaitToBecomeActiveError::WillNotBecomeActive {
|
||||
tenant_id: self.tenant_id,
|
||||
state: current_state,
|
||||
});
|
||||
return Err(GetActiveTenantError::WillNotBecomeActive(current_state));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2110,6 +2113,9 @@ where
|
||||
}
|
||||
|
||||
impl Tenant {
|
||||
pub fn get_tenant_id(&self) -> TenantId {
|
||||
self.tenant_id
|
||||
}
|
||||
pub fn tenant_specific_overrides(&self) -> TenantConfOpt {
|
||||
self.tenant_conf.read().unwrap().tenant_conf
|
||||
}
|
||||
@@ -2267,6 +2273,7 @@ impl Tenant {
|
||||
initial_logical_size_can_start.cloned(),
|
||||
initial_logical_size_attempt.cloned().flatten(),
|
||||
state,
|
||||
self.cancel.child_token(),
|
||||
);
|
||||
|
||||
Ok(timeline)
|
||||
@@ -2356,6 +2363,8 @@ impl Tenant {
|
||||
cached_synthetic_tenant_size: Arc::new(AtomicU64::new(0)),
|
||||
eviction_task_tenant_state: tokio::sync::Mutex::new(EvictionTaskTenantState::default()),
|
||||
delete_progress: Arc::new(tokio::sync::Mutex::new(DeleteTenantFlow::default())),
|
||||
cancel: CancellationToken::default(),
|
||||
gate: Gate::new(format!("Tenant<{tenant_id}>")),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3692,7 +3701,7 @@ mod tests {
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
static TEST_KEY: Lazy<Key> =
|
||||
Lazy::new(|| Key::from_slice(&hex!("112222222233333333444444445500000001")));
|
||||
Lazy::new(|| Key::from_slice(&hex!("010000000033333333444444445500000001")));
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_basic() -> anyhow::Result<()> {
|
||||
@@ -3788,9 +3797,9 @@ mod tests {
|
||||
let writer = tline.writer().await;
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
let TEST_KEY_A: Key = Key::from_hex("112222222233333333444444445500000001").unwrap();
|
||||
let TEST_KEY_A: Key = Key::from_hex("110000000033333333444444445500000001").unwrap();
|
||||
#[allow(non_snake_case)]
|
||||
let TEST_KEY_B: Key = Key::from_hex("112222222233333333444444445500000002").unwrap();
|
||||
let TEST_KEY_B: Key = Key::from_hex("110000000033333333444444445500000002").unwrap();
|
||||
|
||||
// Insert a value on the timeline
|
||||
writer
|
||||
@@ -4236,11 +4245,7 @@ mod tests {
|
||||
metadata_bytes[8] ^= 1;
|
||||
std::fs::write(metadata_path, metadata_bytes)?;
|
||||
|
||||
let err = harness
|
||||
.try_load_local(&ctx)
|
||||
.await
|
||||
.err()
|
||||
.expect("should fail");
|
||||
let err = harness.try_load_local(&ctx).await.expect_err("should fail");
|
||||
// get all the stack with all .context, not only the last one
|
||||
let message = format!("{err:#}");
|
||||
let expected = "failed to load metadata";
|
||||
@@ -4374,7 +4379,7 @@ mod tests {
|
||||
|
||||
let mut keyspace = KeySpaceAccum::new();
|
||||
|
||||
let mut test_key = Key::from_hex("012222222233333333444444445500000000").unwrap();
|
||||
let mut test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
|
||||
let mut blknum = 0;
|
||||
for _ in 0..50 {
|
||||
for _ in 0..10000 {
|
||||
@@ -4420,7 +4425,7 @@ mod tests {
|
||||
|
||||
const NUM_KEYS: usize = 1000;
|
||||
|
||||
let mut test_key = Key::from_hex("012222222233333333444444445500000000").unwrap();
|
||||
let mut test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
|
||||
|
||||
let mut keyspace = KeySpaceAccum::new();
|
||||
|
||||
@@ -4501,7 +4506,7 @@ mod tests {
|
||||
|
||||
const NUM_KEYS: usize = 1000;
|
||||
|
||||
let mut test_key = Key::from_hex("012222222233333333444444445500000000").unwrap();
|
||||
let mut test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
|
||||
|
||||
let mut keyspace = KeySpaceAccum::new();
|
||||
|
||||
@@ -4592,7 +4597,7 @@ mod tests {
|
||||
const NUM_KEYS: usize = 100;
|
||||
const NUM_TLINES: usize = 50;
|
||||
|
||||
let mut test_key = Key::from_hex("012222222233333333444444445500000000").unwrap();
|
||||
let mut test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
|
||||
// Track page mutation lsns across different timelines.
|
||||
let mut updated = [[Lsn(0); NUM_KEYS]; NUM_TLINES];
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ use crate::{
|
||||
};
|
||||
|
||||
use super::{
|
||||
mgr::{GetTenantError, TenantsMap},
|
||||
mgr::{GetTenantError, TenantSlotError, TenantSlotUpsertError, TenantsMap},
|
||||
remote_timeline_client::{FAILED_REMOTE_OP_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD},
|
||||
span,
|
||||
timeline::delete::DeleteTimelineFlow,
|
||||
@@ -33,12 +33,21 @@ pub(crate) enum DeleteTenantError {
|
||||
#[error("GetTenant {0}")]
|
||||
Get(#[from] GetTenantError),
|
||||
|
||||
#[error("Tenant not attached")]
|
||||
NotAttached,
|
||||
|
||||
#[error("Invalid state {0}. Expected Active or Broken")]
|
||||
InvalidState(TenantState),
|
||||
|
||||
#[error("Tenant deletion is already in progress")]
|
||||
AlreadyInProgress,
|
||||
|
||||
#[error("Tenant map slot error {0}")]
|
||||
SlotError(#[from] TenantSlotError),
|
||||
|
||||
#[error("Tenant map slot upsert error {0}")]
|
||||
SlotUpsertError(#[from] TenantSlotUpsertError),
|
||||
|
||||
#[error("Timeline {0}")]
|
||||
Timeline(#[from] DeleteTimelineError),
|
||||
|
||||
@@ -273,12 +282,12 @@ impl DeleteTenantFlow {
|
||||
pub(crate) async fn run(
|
||||
conf: &'static PageServerConf,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
tenants: &'static tokio::sync::RwLock<TenantsMap>,
|
||||
tenant_id: TenantId,
|
||||
tenants: &'static std::sync::RwLock<TenantsMap>,
|
||||
tenant: Arc<Tenant>,
|
||||
) -> Result<(), DeleteTenantError> {
|
||||
span::debug_assert_current_span_has_tenant_id();
|
||||
|
||||
let (tenant, mut guard) = Self::prepare(tenants, tenant_id).await?;
|
||||
let mut guard = Self::prepare(&tenant).await?;
|
||||
|
||||
if let Err(e) = Self::run_inner(&mut guard, conf, remote_storage.as_ref(), &tenant).await {
|
||||
tenant.set_broken(format!("{e:#}")).await;
|
||||
@@ -378,7 +387,7 @@ impl DeleteTenantFlow {
|
||||
guard: DeletionGuard,
|
||||
tenant: &Arc<Tenant>,
|
||||
preload: Option<TenantPreload>,
|
||||
tenants: &'static tokio::sync::RwLock<TenantsMap>,
|
||||
tenants: &'static std::sync::RwLock<TenantsMap>,
|
||||
init_order: Option<InitializationOrder>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), DeleteTenantError> {
|
||||
@@ -405,15 +414,8 @@ impl DeleteTenantFlow {
|
||||
}
|
||||
|
||||
async fn prepare(
|
||||
tenants: &tokio::sync::RwLock<TenantsMap>,
|
||||
tenant_id: TenantId,
|
||||
) -> Result<(Arc<Tenant>, tokio::sync::OwnedMutexGuard<Self>), DeleteTenantError> {
|
||||
let m = tenants.read().await;
|
||||
|
||||
let tenant = m
|
||||
.get(&tenant_id)
|
||||
.ok_or(GetTenantError::NotFound(tenant_id))?;
|
||||
|
||||
tenant: &Arc<Tenant>,
|
||||
) -> Result<tokio::sync::OwnedMutexGuard<Self>, DeleteTenantError> {
|
||||
// FIXME: unsure about active only. Our init jobs may not be cancellable properly,
|
||||
// so at least for now allow deletions only for active tenants. TODO recheck
|
||||
// Broken and Stopping is needed for retries.
|
||||
@@ -447,14 +449,14 @@ impl DeleteTenantFlow {
|
||||
)));
|
||||
}
|
||||
|
||||
Ok((Arc::clone(tenant), guard))
|
||||
Ok(guard)
|
||||
}
|
||||
|
||||
fn schedule_background(
|
||||
guard: OwnedMutexGuard<Self>,
|
||||
conf: &'static PageServerConf,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
tenants: &'static tokio::sync::RwLock<TenantsMap>,
|
||||
tenants: &'static std::sync::RwLock<TenantsMap>,
|
||||
tenant: Arc<Tenant>,
|
||||
) {
|
||||
let tenant_id = tenant.tenant_id;
|
||||
@@ -487,7 +489,7 @@ impl DeleteTenantFlow {
|
||||
mut guard: OwnedMutexGuard<Self>,
|
||||
conf: &PageServerConf,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
tenants: &'static tokio::sync::RwLock<TenantsMap>,
|
||||
tenants: &'static std::sync::RwLock<TenantsMap>,
|
||||
tenant: &Arc<Tenant>,
|
||||
) -> Result<(), DeleteTenantError> {
|
||||
// Tree sort timelines, schedule delete for them. Mention retries from the console side.
|
||||
@@ -535,10 +537,18 @@ impl DeleteTenantFlow {
|
||||
.await
|
||||
.context("cleanup_remaining_fs_traces")?;
|
||||
|
||||
let mut locked = tenants.write().await;
|
||||
if locked.remove(&tenant.tenant_id).is_none() {
|
||||
warn!("Tenant got removed from tenants map during deletion");
|
||||
};
|
||||
{
|
||||
let mut locked = tenants.write().unwrap();
|
||||
if locked.remove(&tenant.tenant_id).is_none() {
|
||||
warn!("Tenant got removed from tenants map during deletion");
|
||||
};
|
||||
|
||||
// FIXME: we should not be modifying this from outside of mgr.rs.
|
||||
// This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
|
||||
crate::metrics::TENANT_MANAGER
|
||||
.tenant_slots
|
||||
.set(locked.len() as u64);
|
||||
}
|
||||
|
||||
*guard = Self::Finished;
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -406,4 +406,123 @@ mod tests {
|
||||
METADATA_OLD_FORMAT_VERSION, METADATA_FORMAT_VERSION
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_metadata_bincode_serde() {
|
||||
let original_metadata = TimelineMetadata::new(
|
||||
Lsn(0x200),
|
||||
Some(Lsn(0x100)),
|
||||
Some(TIMELINE_ID),
|
||||
Lsn(0),
|
||||
Lsn(0),
|
||||
Lsn(0),
|
||||
// Any version will do here, so use the default
|
||||
crate::DEFAULT_PG_VERSION,
|
||||
);
|
||||
let metadata_bytes = original_metadata
|
||||
.to_bytes()
|
||||
.expect("Cannot create bytes array from metadata");
|
||||
|
||||
let metadata_bincode_be_bytes = original_metadata
|
||||
.ser()
|
||||
.expect("Cannot serialize the metadata");
|
||||
|
||||
// 8 bytes for the length of the vector
|
||||
assert_eq!(metadata_bincode_be_bytes.len(), 8 + metadata_bytes.len());
|
||||
|
||||
let expected_bincode_bytes = {
|
||||
let mut temp = vec![];
|
||||
let len_bytes = metadata_bytes.len().to_be_bytes();
|
||||
temp.extend_from_slice(&len_bytes);
|
||||
temp.extend_from_slice(&metadata_bytes);
|
||||
temp
|
||||
};
|
||||
assert_eq!(metadata_bincode_be_bytes, expected_bincode_bytes);
|
||||
|
||||
let deserialized_metadata = TimelineMetadata::des(&metadata_bincode_be_bytes).unwrap();
|
||||
// Deserialized metadata has the metadata header, which is different from the serialized one.
|
||||
// Reference: TimelineMetaData::to_bytes()
|
||||
let expected_metadata = {
|
||||
let mut temp_metadata = original_metadata;
|
||||
let body_bytes = temp_metadata
|
||||
.body
|
||||
.ser()
|
||||
.expect("Cannot serialize the metadata body");
|
||||
let metadata_size = METADATA_HDR_SIZE + body_bytes.len();
|
||||
let hdr = TimelineMetadataHeader {
|
||||
size: metadata_size as u16,
|
||||
format_version: METADATA_FORMAT_VERSION,
|
||||
checksum: crc32c::crc32c(&body_bytes),
|
||||
};
|
||||
temp_metadata.hdr = hdr;
|
||||
temp_metadata
|
||||
};
|
||||
assert_eq!(deserialized_metadata, expected_metadata);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_metadata_bincode_serde_ensure_roundtrip() {
|
||||
let original_metadata = TimelineMetadata::new(
|
||||
Lsn(0x200),
|
||||
Some(Lsn(0x100)),
|
||||
Some(TIMELINE_ID),
|
||||
Lsn(0),
|
||||
Lsn(0),
|
||||
Lsn(0),
|
||||
// Any version will do here, so use the default
|
||||
crate::DEFAULT_PG_VERSION,
|
||||
);
|
||||
let expected_bytes = vec![
|
||||
/* bincode length encoding bytes */
|
||||
0, 0, 0, 0, 0, 0, 2, 0, // 8 bytes for the length of the serialized vector
|
||||
/* TimelineMetadataHeader */
|
||||
4, 37, 101, 34, 0, 70, 0, 4, // checksum, size, format_version (4 + 2 + 2)
|
||||
/* TimelineMetadataBodyV2 */
|
||||
0, 0, 0, 0, 0, 0, 2, 0, // disk_consistent_lsn (8 bytes)
|
||||
1, 0, 0, 0, 0, 0, 0, 1, 0, // prev_record_lsn (9 bytes)
|
||||
1, 17, 34, 51, 68, 85, 102, 119, 136, 17, 34, 51, 68, 85, 102, 119,
|
||||
136, // ancestor_timeline (17 bytes)
|
||||
0, 0, 0, 0, 0, 0, 0, 0, // ancestor_lsn (8 bytes)
|
||||
0, 0, 0, 0, 0, 0, 0, 0, // latest_gc_cutoff_lsn (8 bytes)
|
||||
0, 0, 0, 0, 0, 0, 0, 0, // initdb_lsn (8 bytes)
|
||||
0, 0, 0, 15, // pg_version (4 bytes)
|
||||
/* padding bytes */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
];
|
||||
let metadata_ser_bytes = original_metadata.ser().unwrap();
|
||||
assert_eq!(metadata_ser_bytes, expected_bytes);
|
||||
|
||||
let expected_metadata = {
|
||||
let mut temp_metadata = original_metadata;
|
||||
let body_bytes = temp_metadata
|
||||
.body
|
||||
.ser()
|
||||
.expect("Cannot serialize the metadata body");
|
||||
let metadata_size = METADATA_HDR_SIZE + body_bytes.len();
|
||||
let hdr = TimelineMetadataHeader {
|
||||
size: metadata_size as u16,
|
||||
format_version: METADATA_FORMAT_VERSION,
|
||||
checksum: crc32c::crc32c(&body_bytes),
|
||||
};
|
||||
temp_metadata.hdr = hdr;
|
||||
temp_metadata
|
||||
};
|
||||
let des_metadata = TimelineMetadata::des(&metadata_ser_bytes).unwrap();
|
||||
assert_eq!(des_metadata, expected_metadata);
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1542,7 +1542,7 @@ pub fn remote_index_path(
|
||||
}
|
||||
|
||||
/// Given the key of an index, parse out the generation part of the name
|
||||
pub(crate) fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {
|
||||
pub fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {
|
||||
let file_name = match path.get_path().file_name() {
|
||||
Some(f) => f,
|
||||
None => {
|
||||
|
||||
@@ -6,7 +6,6 @@ use std::collections::HashMap;
|
||||
|
||||
use chrono::NaiveDateTime;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use utils::bin_ser::SerializeError;
|
||||
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
@@ -58,7 +57,6 @@ impl LayerFileMetadata {
|
||||
///
|
||||
/// This type needs to be backwards and forwards compatible. When changing the fields,
|
||||
/// remember to add a test case for the changed version.
|
||||
#[serde_as]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||
pub struct IndexPart {
|
||||
/// Debugging aid describing the version of this type.
|
||||
@@ -78,7 +76,6 @@ pub struct IndexPart {
|
||||
// 'disk_consistent_lsn' is a copy of the 'disk_consistent_lsn' in the metadata.
|
||||
// It's duplicated for convenience when reading the serialized structure, but is
|
||||
// private because internally we would read from metadata instead.
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
disk_consistent_lsn: Lsn,
|
||||
|
||||
#[serde(rename = "metadata_bytes")]
|
||||
@@ -155,7 +152,7 @@ pub struct IndexLayerMetadata {
|
||||
|
||||
#[serde(default = "Generation::none")]
|
||||
#[serde(skip_serializing_if = "Generation::is_none")]
|
||||
pub(super) generation: Generation,
|
||||
pub generation: Generation,
|
||||
}
|
||||
|
||||
impl From<LayerFileMetadata> for IndexLayerMetadata {
|
||||
|
||||
@@ -29,7 +29,6 @@ use tenant_size_model::{Segment, StorageModel};
|
||||
/// needs. We will convert this into a StorageModel when it's time to perform
|
||||
/// the calculation.
|
||||
///
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct ModelInputs {
|
||||
pub segments: Vec<SegmentMeta>,
|
||||
@@ -37,11 +36,9 @@ pub struct ModelInputs {
|
||||
}
|
||||
|
||||
/// A [`Segment`], with some extra information for display purposes
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct SegmentMeta {
|
||||
pub segment: Segment,
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
pub timeline_id: TimelineId,
|
||||
pub kind: LsnKind,
|
||||
}
|
||||
@@ -77,32 +74,22 @@ pub enum LsnKind {
|
||||
|
||||
/// Collect all relevant LSNs to the inputs. These will only be helpful in the serialized form as
|
||||
/// part of [`ModelInputs`] from the HTTP api, explaining the inputs.
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct TimelineInputs {
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
pub timeline_id: TimelineId,
|
||||
|
||||
#[serde_as(as = "Option<serde_with::DisplayFromStr>")]
|
||||
pub ancestor_id: Option<TimelineId>,
|
||||
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
ancestor_lsn: Lsn,
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
last_record: Lsn,
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
latest_gc_cutoff: Lsn,
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
horizon_cutoff: Lsn,
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
pitr_cutoff: Lsn,
|
||||
|
||||
/// Cutoff point based on GC settings
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
next_gc_cutoff: Lsn,
|
||||
|
||||
/// Cutoff point calculated from the user-supplied 'max_retention_period'
|
||||
#[serde_as(as = "Option<serde_with::DisplayFromStr>")]
|
||||
retention_param_cutoff: Option<Lsn>,
|
||||
}
|
||||
|
||||
@@ -419,10 +406,12 @@ async fn fill_logical_sizes(
|
||||
have_any_error = true;
|
||||
}
|
||||
Ok(Ok(TimelineAtLsnSizeResult(timeline, lsn, Err(error)))) => {
|
||||
warn!(
|
||||
timeline_id=%timeline.timeline_id,
|
||||
"failed to calculate logical size at {lsn}: {error:#}"
|
||||
);
|
||||
if !matches!(error, CalculateLogicalSizeError::Cancelled) {
|
||||
warn!(
|
||||
timeline_id=%timeline.timeline_id,
|
||||
"failed to calculate logical size at {lsn}: {error:#}"
|
||||
);
|
||||
}
|
||||
have_any_error = true;
|
||||
}
|
||||
Ok(Ok(TimelineAtLsnSizeResult(timeline, lsn, Ok(size)))) => {
|
||||
|
||||
@@ -609,6 +609,49 @@ impl Drop for DeltaLayerWriter {
|
||||
}
|
||||
}
|
||||
|
||||
impl DeltaLayer {
|
||||
/// Assume the file at `path` is corrupt if this function returns with an error.
|
||||
pub(crate) async fn rewrite_tenant_timeline(
|
||||
path: &Utf8Path,
|
||||
new_tenant: TenantId,
|
||||
new_timeline: TimelineId,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
let file = VirtualFile::open_with_options(
|
||||
path,
|
||||
&*std::fs::OpenOptions::new().read(true).write(true),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to open file '{}'", path))?;
|
||||
let file = FileBlockReader::new(file);
|
||||
let summary_blk = file.read_blk(0, ctx).await?;
|
||||
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
|
||||
let mut file = file.file;
|
||||
if actual_summary.magic != DELTA_FILE_MAGIC {
|
||||
bail!("File '{}' is not a delta layer", path);
|
||||
}
|
||||
let new_summary = Summary {
|
||||
tenant_id: new_tenant,
|
||||
timeline_id: new_timeline,
|
||||
..actual_summary
|
||||
};
|
||||
|
||||
let mut buf = smallvec::SmallVec::<[u8; PAGE_SZ]>::new();
|
||||
Summary::ser_into(&new_summary, &mut buf)?;
|
||||
if buf.spilled() {
|
||||
// The code in ImageLayerWriterInner just warn!()s for this.
|
||||
// It should probably error out as well.
|
||||
anyhow::bail!(
|
||||
"Used more than one page size for summary buffer: {}",
|
||||
buf.len()
|
||||
);
|
||||
}
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
file.write_all(&buf).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl DeltaLayerInner {
|
||||
pub(super) async fn load(
|
||||
path: &Utf8Path,
|
||||
|
||||
@@ -294,6 +294,49 @@ impl ImageLayer {
|
||||
}
|
||||
}
|
||||
|
||||
impl ImageLayer {
|
||||
/// Assume the file at `path` is corrupt if this function returns with an error.
|
||||
pub(crate) async fn rewrite_tenant_timeline(
|
||||
path: &Utf8Path,
|
||||
new_tenant: TenantId,
|
||||
new_timeline: TimelineId,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
let file = VirtualFile::open_with_options(
|
||||
path,
|
||||
&*std::fs::OpenOptions::new().read(true).write(true),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to open file '{}'", path))?;
|
||||
let file = FileBlockReader::new(file);
|
||||
let summary_blk = file.read_blk(0, ctx).await?;
|
||||
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
|
||||
let mut file = file.file;
|
||||
if actual_summary.magic != IMAGE_FILE_MAGIC {
|
||||
bail!("File '{}' is not a delta layer", path);
|
||||
}
|
||||
let new_summary = Summary {
|
||||
tenant_id: new_tenant,
|
||||
timeline_id: new_timeline,
|
||||
..actual_summary
|
||||
};
|
||||
|
||||
let mut buf = smallvec::SmallVec::<[u8; PAGE_SZ]>::new();
|
||||
Summary::ser_into(&new_summary, &mut buf)?;
|
||||
if buf.spilled() {
|
||||
// The code in ImageLayerWriterInner just warn!()s for this.
|
||||
// It should probably error out as well.
|
||||
anyhow::bail!(
|
||||
"Used more than one page size for summary buffer: {}",
|
||||
buf.len()
|
||||
);
|
||||
}
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
file.write_all(&buf).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl ImageLayerInner {
|
||||
pub(super) async fn load(
|
||||
path: &Utf8Path,
|
||||
|
||||
@@ -345,14 +345,19 @@ impl InMemoryLayer {
|
||||
|
||||
let cursor = inner.file.block_cursor();
|
||||
|
||||
let mut keys: Vec<(&Key, &VecMap<Lsn, u64>)> = inner.index.iter().collect();
|
||||
keys.sort_by_key(|k| k.0);
|
||||
// Sort the keys because delta layer writer expects them sorted.
|
||||
//
|
||||
// NOTE: this sort can take up significant time if the layer has millions of
|
||||
// keys. To speed up all the comparisons we convert the key to i128 and
|
||||
// keep the value as a reference.
|
||||
let mut keys: Vec<_> = inner.index.iter().map(|(k, m)| (k.to_i128(), m)).collect();
|
||||
keys.sort_unstable_by_key(|k| k.0);
|
||||
|
||||
let ctx = RequestContextBuilder::extend(ctx)
|
||||
.page_content_kind(PageContentKind::InMemoryLayer)
|
||||
.build();
|
||||
for (key, vec_map) in keys.iter() {
|
||||
let key = **key;
|
||||
let key = Key::from_i128(*key);
|
||||
// Write all page versions
|
||||
for (lsn, pos) in vec_map.as_slice() {
|
||||
cursor.read_blob_into_buf(*pos, &mut buf, &ctx).await?;
|
||||
|
||||
@@ -337,18 +337,16 @@ enum ResidentOrWantedEvicted {
|
||||
}
|
||||
|
||||
impl ResidentOrWantedEvicted {
|
||||
/// If `Some` is returned, the ResidentOrWantedEvicted has been upgraded back from
|
||||
/// `ResidentOrWantedEvicted::WantedEvicted` to `ResidentOrWantedEvicted::Resident`.
|
||||
fn get_and_upgrade(&mut self) -> Option<Arc<DownloadedLayer>> {
|
||||
fn get_and_upgrade(&mut self) -> Option<(Arc<DownloadedLayer>, bool)> {
|
||||
match self {
|
||||
ResidentOrWantedEvicted::Resident(strong) => Some(strong.clone()),
|
||||
ResidentOrWantedEvicted::Resident(strong) => Some((strong.clone(), false)),
|
||||
ResidentOrWantedEvicted::WantedEvicted(weak, _) => match weak.upgrade() {
|
||||
Some(strong) => {
|
||||
LAYER_IMPL_METRICS.inc_raced_wanted_evicted_accesses();
|
||||
|
||||
*self = ResidentOrWantedEvicted::Resident(strong.clone());
|
||||
|
||||
Some(strong)
|
||||
Some((strong, true))
|
||||
}
|
||||
None => None,
|
||||
},
|
||||
@@ -413,6 +411,10 @@ struct LayerInner {
|
||||
version: AtomicUsize,
|
||||
|
||||
/// Allow subscribing to when the layer actually gets evicted.
|
||||
///
|
||||
/// If in future we need to implement "wait until layer instances are gone and done", carrying
|
||||
/// this over to the gc spawn_blocking from LayerInner::drop will do the trick, and adding a
|
||||
/// method for "wait_gc" which will wait to this being closed.
|
||||
status: tokio::sync::broadcast::Sender<Status>,
|
||||
|
||||
/// Counter for exponential backoff with the download
|
||||
@@ -563,6 +565,8 @@ impl LayerInner {
|
||||
}
|
||||
}
|
||||
|
||||
/// Cancellation safe, however dropping the future and calling this method again might result
|
||||
/// in a new attempt to evict OR join the previously started attempt.
|
||||
pub(crate) async fn evict_and_wait(
|
||||
&self,
|
||||
_: &RemoteTimelineClient,
|
||||
@@ -611,8 +615,8 @@ impl LayerInner {
|
||||
}
|
||||
}
|
||||
|
||||
/// Should be cancellation safe, but cancellation is troublesome together with the spawned
|
||||
/// download.
|
||||
/// Cancellation safe.
|
||||
#[tracing::instrument(skip_all, fields(layer=%self))]
|
||||
async fn get_or_maybe_download(
|
||||
self: &Arc<Self>,
|
||||
allow_download: bool,
|
||||
@@ -637,14 +641,16 @@ impl LayerInner {
|
||||
|
||||
// check if we really need to be downloaded; could have been already downloaded by a
|
||||
// cancelled previous attempt.
|
||||
//
|
||||
// FIXME: what if it's a directory? that is currently needs_download == true
|
||||
let needs_download = self
|
||||
.needs_download()
|
||||
.await
|
||||
.map_err(DownloadError::PreStatFailed)?;
|
||||
|
||||
let permit = if let Some(reason) = needs_download {
|
||||
if let NeedsDownload::NotFile(ft) = reason {
|
||||
return Err(DownloadError::NotFile(ft));
|
||||
}
|
||||
|
||||
// only reset this after we've decided we really need to download. otherwise it'd
|
||||
// be impossible to mark cancelled downloads for eviction, like one could imagine
|
||||
// we would like to do for prefetching which was not needed.
|
||||
@@ -654,8 +660,6 @@ impl LayerInner {
|
||||
return Err(DownloadError::NoRemoteStorage);
|
||||
}
|
||||
|
||||
tracing::debug!(%reason, "downloading layer");
|
||||
|
||||
if let Some(ctx) = ctx {
|
||||
self.check_expected_download(ctx)?;
|
||||
}
|
||||
@@ -666,6 +670,8 @@ impl LayerInner {
|
||||
return Err(DownloadError::DownloadRequired);
|
||||
}
|
||||
|
||||
tracing::info!(%reason, "downloading on-demand");
|
||||
|
||||
self.spawn_download_and_wait(timeline, permit).await?
|
||||
} else {
|
||||
// the file is present locally, probably by a previous but cancelled call to
|
||||
@@ -686,6 +692,11 @@ impl LayerInner {
|
||||
LayerResidenceEventReason::ResidenceChange,
|
||||
);
|
||||
|
||||
let waiters = self.inner.initializer_count();
|
||||
if waiters > 0 {
|
||||
tracing::info!(waiters, "completing the on-demand download for other tasks");
|
||||
}
|
||||
|
||||
Ok((ResidentOrWantedEvicted::Resident(res), permit))
|
||||
};
|
||||
|
||||
@@ -694,7 +705,7 @@ impl LayerInner {
|
||||
// below paths anymore essentially limiting the max loop iterations to 2.
|
||||
let (value, init_permit) = download(init_permit).await?;
|
||||
let mut guard = self.inner.set(value, init_permit);
|
||||
let strong = guard
|
||||
let (strong, _upgraded) = guard
|
||||
.get_and_upgrade()
|
||||
.expect("init creates strong reference, we held the init permit");
|
||||
return Ok(strong);
|
||||
@@ -703,11 +714,17 @@ impl LayerInner {
|
||||
let (weak, permit) = {
|
||||
let mut locked = self.inner.get_or_init(download).await?;
|
||||
|
||||
if let Some(strong) = locked.get_and_upgrade() {
|
||||
self.wanted_evicted.store(false, Ordering::Relaxed);
|
||||
if let Some((strong, upgraded)) = locked.get_and_upgrade() {
|
||||
if upgraded {
|
||||
// when upgraded back, the Arc<DownloadedLayer> is still available, but
|
||||
// previously a `evict_and_wait` was received.
|
||||
self.wanted_evicted.store(false, Ordering::Relaxed);
|
||||
|
||||
// error out any `evict_and_wait`
|
||||
drop(self.status.send(Status::Downloaded));
|
||||
// error out any `evict_and_wait`
|
||||
drop(self.status.send(Status::Downloaded));
|
||||
LAYER_IMPL_METRICS
|
||||
.inc_eviction_cancelled(EvictionCancelled::UpgradedBackOnAccess);
|
||||
}
|
||||
|
||||
return Ok(strong);
|
||||
} else {
|
||||
@@ -740,8 +757,8 @@ impl LayerInner {
|
||||
match b {
|
||||
Download => Ok(()),
|
||||
Warn | Error => {
|
||||
tracing::warn!(
|
||||
"unexpectedly on-demand downloading remote layer {self} for task kind {:?}",
|
||||
tracing::info!(
|
||||
"unexpectedly on-demand downloading for task kind {:?}",
|
||||
ctx.task_kind()
|
||||
);
|
||||
crate::metrics::UNEXPECTED_ONDEMAND_DOWNLOADS.inc();
|
||||
@@ -773,6 +790,7 @@ impl LayerInner {
|
||||
// block tenant::mgr::remove_tenant_from_memory.
|
||||
|
||||
let this: Arc<Self> = self.clone();
|
||||
|
||||
crate::task_mgr::spawn(
|
||||
&tokio::runtime::Handle::current(),
|
||||
crate::task_mgr::TaskKind::RemoteDownloadTask,
|
||||
@@ -781,6 +799,7 @@ impl LayerInner {
|
||||
&task_name,
|
||||
false,
|
||||
async move {
|
||||
|
||||
let client = timeline
|
||||
.remote_client
|
||||
.as_ref()
|
||||
@@ -842,6 +861,7 @@ impl LayerInner {
|
||||
}
|
||||
|
||||
self.consecutive_failures.store(0, Ordering::Relaxed);
|
||||
tracing::info!("on-demand download successful");
|
||||
|
||||
Ok(permit)
|
||||
}
|
||||
@@ -883,7 +903,7 @@ impl LayerInner {
|
||||
fn is_file_present_and_good_size(&self, m: &std::fs::Metadata) -> Result<(), NeedsDownload> {
|
||||
// in future, this should include sha2-256 validation of the file.
|
||||
if !m.is_file() {
|
||||
Err(NeedsDownload::NotFile)
|
||||
Err(NeedsDownload::NotFile(m.file_type()))
|
||||
} else if m.len() != self.desc.file_size {
|
||||
Err(NeedsDownload::WrongSize {
|
||||
actual: m.len(),
|
||||
@@ -1034,11 +1054,14 @@ impl LayerInner {
|
||||
Ok(())
|
||||
}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
|
||||
tracing::info!("failed to evict file from disk, it was already gone");
|
||||
tracing::error!(
|
||||
layer_size = %self.desc.file_size,
|
||||
"failed to evict layer from disk, it was already gone (metrics will be inaccurate)"
|
||||
);
|
||||
Err(EvictionCancelled::FileNotFound)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("failed to evict file from disk: {e:#}");
|
||||
tracing::error!("failed to evict file from disk: {e:#}");
|
||||
Err(EvictionCancelled::RemoveFailed)
|
||||
}
|
||||
};
|
||||
@@ -1082,6 +1105,8 @@ enum DownloadError {
|
||||
ContextAndConfigReallyDeniesDownloads,
|
||||
#[error("downloading is really required but not allowed by this method")]
|
||||
DownloadRequired,
|
||||
#[error("layer path exists, but it is not a file: {0:?}")]
|
||||
NotFile(std::fs::FileType),
|
||||
/// Why no error here? Because it will be reported by page_service. We should had also done
|
||||
/// retries already.
|
||||
#[error("downloading evicted layer file failed")]
|
||||
@@ -1097,7 +1122,7 @@ enum DownloadError {
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) enum NeedsDownload {
|
||||
NotFound,
|
||||
NotFile,
|
||||
NotFile(std::fs::FileType),
|
||||
WrongSize { actual: u64, expected: u64 },
|
||||
}
|
||||
|
||||
@@ -1105,7 +1130,7 @@ impl std::fmt::Display for NeedsDownload {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
NeedsDownload::NotFound => write!(f, "file was not found"),
|
||||
NeedsDownload::NotFile => write!(f, "path is not a file"),
|
||||
NeedsDownload::NotFile(ft) => write!(f, "path is not a file; {ft:?}"),
|
||||
NeedsDownload::WrongSize { actual, expected } => {
|
||||
write!(f, "file size mismatch {actual} vs. {expected}")
|
||||
}
|
||||
@@ -1116,6 +1141,8 @@ impl std::fmt::Display for NeedsDownload {
|
||||
/// Existence of `DownloadedLayer` means that we have the file locally, and can later evict it.
|
||||
pub(crate) struct DownloadedLayer {
|
||||
owner: Weak<LayerInner>,
|
||||
// Use tokio OnceCell as we do not need to deinitialize this, it'll just get dropped with the
|
||||
// DownloadedLayer
|
||||
kind: tokio::sync::OnceCell<anyhow::Result<LayerKind>>,
|
||||
version: usize,
|
||||
}
|
||||
@@ -1159,7 +1186,6 @@ impl DownloadedLayer {
|
||||
"these are the same, just avoiding the upgrade"
|
||||
);
|
||||
|
||||
// there is nothing async here, but it should be async
|
||||
let res = if owner.desc.is_delta {
|
||||
let summary = Some(delta_layer::Summary::expected(
|
||||
owner.desc.tenant_id,
|
||||
@@ -1258,6 +1284,8 @@ impl std::fmt::Debug for ResidentLayer {
|
||||
|
||||
impl ResidentLayer {
|
||||
/// Release the eviction guard, converting back into a plain [`Layer`].
|
||||
///
|
||||
/// You can access the [`Layer`] also by using `as_ref`.
|
||||
pub(crate) fn drop_eviction_guard(self) -> Layer {
|
||||
self.into()
|
||||
}
|
||||
@@ -1313,7 +1341,7 @@ impl AsRef<Layer> for ResidentLayer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Allow slimming down if we don't want the `2*usize` with eviction candidates?
|
||||
/// Drop the eviction guard.
|
||||
impl From<ResidentLayer> for Layer {
|
||||
fn from(value: ResidentLayer) -> Self {
|
||||
value.owner
|
||||
@@ -1501,6 +1529,8 @@ enum EvictionCancelled {
|
||||
AlreadyReinitialized,
|
||||
/// Not evicted because of a pending reinitialization
|
||||
LostToDownload,
|
||||
/// After eviction, there was a new layer access which cancelled the eviction.
|
||||
UpgradedBackOnAccess,
|
||||
}
|
||||
|
||||
impl EvictionCancelled {
|
||||
@@ -1513,6 +1543,7 @@ impl EvictionCancelled {
|
||||
EvictionCancelled::RemoveFailed => "remove_failed",
|
||||
EvictionCancelled::AlreadyReinitialized => "already_reinitialized",
|
||||
EvictionCancelled::LostToDownload => "lost_to_download",
|
||||
EvictionCancelled::UpgradedBackOnAccess => "upgraded_back_on_access",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ use tokio::{
|
||||
};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::id::TenantTimelineId;
|
||||
use utils::{id::TenantTimelineId, sync::gate::Gate};
|
||||
|
||||
use std::cmp::{max, min, Ordering};
|
||||
use std::collections::{BinaryHeap, HashMap, HashSet};
|
||||
@@ -310,6 +310,13 @@ pub struct Timeline {
|
||||
/// Load or creation time information about the disk_consistent_lsn and when the loading
|
||||
/// happened. Used for consumption metrics.
|
||||
pub(crate) loaded_at: (Lsn, SystemTime),
|
||||
|
||||
/// Gate to prevent shutdown completing while I/O is still happening to this timeline's data
|
||||
pub(crate) gate: Gate,
|
||||
|
||||
/// Cancellation token scoped to this timeline: anything doing long-running work relating
|
||||
/// to the timeline should drop out when this token fires.
|
||||
pub(crate) cancel: CancellationToken,
|
||||
}
|
||||
|
||||
pub struct WalReceiverInfo {
|
||||
@@ -786,7 +793,11 @@ impl Timeline {
|
||||
// as an empty timeline. Also in unit tests, when we use the timeline
|
||||
// as a simple key-value store, ignoring the datadir layout. Log the
|
||||
// error but continue.
|
||||
error!("could not compact, repartitioning keyspace failed: {err:?}");
|
||||
//
|
||||
// Suppress error when it's due to cancellation
|
||||
if !self.cancel.is_cancelled() {
|
||||
error!("could not compact, repartitioning keyspace failed: {err:?}");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -884,7 +895,12 @@ impl Timeline {
|
||||
pub async fn shutdown(self: &Arc<Self>, freeze_and_flush: bool) {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
// Signal any subscribers to our cancellation token to drop out
|
||||
tracing::debug!("Cancelling CancellationToken");
|
||||
self.cancel.cancel();
|
||||
|
||||
// prevent writes to the InMemoryLayer
|
||||
tracing::debug!("Waiting for WalReceiverManager...");
|
||||
task_mgr::shutdown_tasks(
|
||||
Some(TaskKind::WalReceiverManager),
|
||||
Some(self.tenant_id),
|
||||
@@ -920,6 +936,16 @@ impl Timeline {
|
||||
warn!("failed to await for frozen and flushed uploads: {e:#}");
|
||||
}
|
||||
}
|
||||
|
||||
// Page request handlers might be waiting for LSN to advance: they do not respect Timeline::cancel
|
||||
// while doing so.
|
||||
self.last_record_lsn.shutdown();
|
||||
|
||||
tracing::debug!("Waiting for tasks...");
|
||||
task_mgr::shutdown_tasks(None, Some(self.tenant_id), Some(self.timeline_id)).await;
|
||||
|
||||
// Finally wait until any gate-holders are complete
|
||||
self.gate.close().await;
|
||||
}
|
||||
|
||||
pub fn set_state(&self, new_state: TimelineState) {
|
||||
@@ -1048,6 +1074,11 @@ impl Timeline {
|
||||
/// Like [`evict_layer_batch`](Self::evict_layer_batch), but for just one layer.
|
||||
/// Additional case `Ok(None)` covers the case where the layer could not be found by its `layer_file_name`.
|
||||
pub async fn evict_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
|
||||
let _gate = self
|
||||
.gate
|
||||
.enter()
|
||||
.map_err(|_| anyhow::anyhow!("Shutting down"))?;
|
||||
|
||||
let Some(local_layer) = self.find_layer(layer_file_name).await else {
|
||||
return Ok(None);
|
||||
};
|
||||
@@ -1063,9 +1094,8 @@ impl Timeline {
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow::anyhow!("remote storage not configured; cannot evict"))?;
|
||||
|
||||
let cancel = CancellationToken::new();
|
||||
let results = self
|
||||
.evict_layer_batch(remote_client, &[local_layer], &cancel)
|
||||
.evict_layer_batch(remote_client, &[local_layer])
|
||||
.await?;
|
||||
assert_eq!(results.len(), 1);
|
||||
let result: Option<Result<(), EvictionError>> = results.into_iter().next().unwrap();
|
||||
@@ -1080,15 +1110,18 @@ impl Timeline {
|
||||
pub(crate) async fn evict_layers(
|
||||
&self,
|
||||
layers_to_evict: &[Layer],
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<Vec<Option<Result<(), EvictionError>>>> {
|
||||
let _gate = self
|
||||
.gate
|
||||
.enter()
|
||||
.map_err(|_| anyhow::anyhow!("Shutting down"))?;
|
||||
|
||||
let remote_client = self
|
||||
.remote_client
|
||||
.as_ref()
|
||||
.context("timeline must have RemoteTimelineClient")?;
|
||||
|
||||
self.evict_layer_batch(remote_client, layers_to_evict, cancel)
|
||||
.await
|
||||
self.evict_layer_batch(remote_client, layers_to_evict).await
|
||||
}
|
||||
|
||||
/// Evict multiple layers at once, continuing through errors.
|
||||
@@ -1109,7 +1142,6 @@ impl Timeline {
|
||||
&self,
|
||||
remote_client: &Arc<RemoteTimelineClient>,
|
||||
layers_to_evict: &[Layer],
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<Vec<Option<Result<(), EvictionError>>>> {
|
||||
// ensure that the layers have finished uploading
|
||||
// (don't hold the layer_removal_cs while we do it, we're not removing anything yet)
|
||||
@@ -1157,7 +1189,7 @@ impl Timeline {
|
||||
};
|
||||
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => {},
|
||||
_ = self.cancel.cancelled() => {},
|
||||
_ = join => {}
|
||||
}
|
||||
|
||||
@@ -1267,6 +1299,7 @@ impl Timeline {
|
||||
initial_logical_size_can_start: Option<completion::Barrier>,
|
||||
initial_logical_size_attempt: Option<completion::Completion>,
|
||||
state: TimelineState,
|
||||
cancel: CancellationToken,
|
||||
) -> Arc<Self> {
|
||||
let disk_consistent_lsn = metadata.disk_consistent_lsn();
|
||||
let (state, _) = watch::channel(state);
|
||||
@@ -1367,6 +1400,8 @@ impl Timeline {
|
||||
|
||||
initial_logical_size_can_start,
|
||||
initial_logical_size_attempt: Mutex::new(initial_logical_size_attempt),
|
||||
cancel,
|
||||
gate: Gate::new(format!("Timeline<{tenant_id}/{timeline_id}>")),
|
||||
};
|
||||
result.repartition_threshold =
|
||||
result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE;
|
||||
@@ -2030,6 +2065,10 @@ impl Timeline {
|
||||
let mut cont_lsn = Lsn(request_lsn.0 + 1);
|
||||
|
||||
'outer: loop {
|
||||
if self.cancel.is_cancelled() {
|
||||
return Err(PageReconstructError::Cancelled);
|
||||
}
|
||||
|
||||
// The function should have updated 'state'
|
||||
//info!("CALLED for {} at {}: {:?} with {} records, cached {}", key, cont_lsn, result, reconstruct_state.records.len(), cached_lsn);
|
||||
match result {
|
||||
@@ -2936,13 +2975,10 @@ struct CompactLevel0Phase1StatsBuilder {
|
||||
new_deltas_size: Option<u64>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(serde::Serialize)]
|
||||
struct CompactLevel0Phase1Stats {
|
||||
version: u64,
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
tenant_id: TenantId,
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
timeline_id: TimelineId,
|
||||
read_lock_acquisition_micros: RecordedDuration,
|
||||
read_lock_held_spawn_blocking_startup_micros: RecordedDuration,
|
||||
@@ -4369,25 +4405,10 @@ mod tests {
|
||||
.expect("should had been resident")
|
||||
.drop_eviction_guard();
|
||||
|
||||
let cancel = tokio_util::sync::CancellationToken::new();
|
||||
let batch = [layer];
|
||||
|
||||
let first = {
|
||||
let cancel = cancel.child_token();
|
||||
async {
|
||||
let cancel = cancel;
|
||||
timeline
|
||||
.evict_layer_batch(&rc, &batch, &cancel)
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
};
|
||||
let second = async {
|
||||
timeline
|
||||
.evict_layer_batch(&rc, &batch, &cancel)
|
||||
.await
|
||||
.unwrap()
|
||||
};
|
||||
let first = async { timeline.evict_layer_batch(&rc, &batch).await.unwrap() };
|
||||
let second = async { timeline.evict_layer_batch(&rc, &batch).await.unwrap() };
|
||||
|
||||
let (first, second) = tokio::join!(first, second);
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ use crate::{
|
||||
deletion_queue::DeletionQueueClient,
|
||||
task_mgr::{self, TaskKind},
|
||||
tenant::{
|
||||
debug_assert_current_span_has_tenant_and_timeline_id,
|
||||
metadata::TimelineMetadata,
|
||||
remote_timeline_client::{
|
||||
self, PersistIndexPartWithDeletedFlagError, RemoteTimelineClient,
|
||||
@@ -30,6 +31,11 @@ use super::{Timeline, TimelineResources};
|
||||
|
||||
/// Now that the Timeline is in Stopping state, request all the related tasks to shut down.
|
||||
async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
// Notify any timeline work to drop out of loops/requests
|
||||
tracing::debug!("Cancelling CancellationToken");
|
||||
timeline.cancel.cancel();
|
||||
|
||||
// Stop the walreceiver first.
|
||||
debug!("waiting for wal receiver to shutdown");
|
||||
let maybe_started_walreceiver = { timeline.walreceiver.lock().unwrap().take() };
|
||||
@@ -74,6 +80,11 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
|
||||
"failpoint: timeline-delete-before-index-deleted-at"
|
||||
))?
|
||||
});
|
||||
|
||||
tracing::debug!("Waiting for gate...");
|
||||
timeline.gate.close().await;
|
||||
tracing::debug!("Shutdown complete");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -277,10 +277,7 @@ impl Timeline {
|
||||
Some(c) => c,
|
||||
};
|
||||
|
||||
let results = match self
|
||||
.evict_layer_batch(remote_client, &candidates, cancel)
|
||||
.await
|
||||
{
|
||||
let results = match self.evict_layer_batch(remote_client, &candidates).await {
|
||||
Err(pre_err) => {
|
||||
stats.errors += candidates.len();
|
||||
error!("could not do any evictions: {pre_err:#}");
|
||||
@@ -344,20 +341,7 @@ impl Timeline {
|
||||
// Make one of the tenant's timelines draw the short straw and run the calculation.
|
||||
// The others wait until the calculation is done so that they take into account the
|
||||
// imitated accesses that the winner made.
|
||||
//
|
||||
// It is critical we are responsive to cancellation here. Otherwise, we deadlock with
|
||||
// tenant deletion (holds TENANTS in read mode) any other task that attempts to
|
||||
// acquire TENANTS in write mode before we here call get_tenant.
|
||||
// See https://github.com/neondatabase/neon/issues/5284.
|
||||
let res = tokio::select! {
|
||||
_ = cancel.cancelled() => {
|
||||
return ControlFlow::Break(());
|
||||
}
|
||||
res = crate::tenant::mgr::get_tenant(self.tenant_id, true) => {
|
||||
res
|
||||
}
|
||||
};
|
||||
let tenant = match res {
|
||||
let tenant = match crate::tenant::mgr::get_tenant(self.tenant_id, true) {
|
||||
Ok(t) => t,
|
||||
Err(_) => {
|
||||
return ControlFlow::Break(());
|
||||
|
||||
@@ -426,7 +426,7 @@ impl ConnectionManagerState {
|
||||
timeline,
|
||||
new_sk.wal_source_connconf,
|
||||
events_sender,
|
||||
cancellation,
|
||||
cancellation.clone(),
|
||||
connect_timeout,
|
||||
ctx,
|
||||
node_id,
|
||||
@@ -447,7 +447,14 @@ impl ConnectionManagerState {
|
||||
}
|
||||
WalReceiverError::Other(e) => {
|
||||
// give out an error to have task_mgr give it a really verbose logging
|
||||
Err(e).context("walreceiver connection handling failure")
|
||||
if cancellation.is_cancelled() {
|
||||
// Ideally we would learn about this via some path other than Other, but
|
||||
// that requires refactoring all the intermediate layers of ingest code
|
||||
// that only emit anyhow::Error
|
||||
Ok(())
|
||||
} else {
|
||||
Err(e).context("walreceiver connection handling failure")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -596,21 +596,21 @@ trait CloseFileDescriptors: CommandExt {
|
||||
|
||||
impl<C: CommandExt> CloseFileDescriptors for C {
|
||||
fn close_fds(&mut self) -> &mut Command {
|
||||
// SAFETY: Code executed inside pre_exec should have async-signal-safety,
|
||||
// which means it should be safe to execute inside a signal handler.
|
||||
// The precise meaning depends on platform. See `man signal-safety`
|
||||
// for the linux definition.
|
||||
//
|
||||
// The set_fds_cloexec_threadsafe function is documented to be
|
||||
// async-signal-safe.
|
||||
//
|
||||
// Aside from this function, the rest of the code is re-entrant and
|
||||
// doesn't make any syscalls. We're just passing constants.
|
||||
//
|
||||
// NOTE: It's easy to indirectly cause a malloc or lock a mutex,
|
||||
// which is not async-signal-safe. Be careful.
|
||||
unsafe {
|
||||
self.pre_exec(move || {
|
||||
// SAFETY: Code executed inside pre_exec should have async-signal-safety,
|
||||
// which means it should be safe to execute inside a signal handler.
|
||||
// The precise meaning depends on platform. See `man signal-safety`
|
||||
// for the linux definition.
|
||||
//
|
||||
// The set_fds_cloexec_threadsafe function is documented to be
|
||||
// async-signal-safe.
|
||||
//
|
||||
// Aside from this function, the rest of the code is re-entrant and
|
||||
// doesn't make any syscalls. We're just passing constants.
|
||||
//
|
||||
// NOTE: It's easy to indirectly cause a malloc or lock a mutex,
|
||||
// which is not async-signal-safe. Be careful.
|
||||
close_fds::set_fds_cloexec_threadsafe(3, &[]);
|
||||
Ok(())
|
||||
})
|
||||
|
||||
@@ -19,7 +19,10 @@
|
||||
#include "access/xlog.h"
|
||||
#include "access/xlogutils.h"
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/lwlock.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "c.h"
|
||||
#include "postmaster/interrupt.h"
|
||||
|
||||
#include "libpq-fe.h"
|
||||
#include "libpq/pqformat.h"
|
||||
@@ -61,23 +64,63 @@ int flush_every_n_requests = 8;
|
||||
int n_reconnect_attempts = 0;
|
||||
int max_reconnect_attempts = 60;
|
||||
|
||||
#define MAX_PAGESERVER_CONNSTRING_SIZE 256
|
||||
|
||||
typedef struct
|
||||
{
|
||||
LWLockId lock;
|
||||
pg_atomic_uint64 update_counter;
|
||||
char pageserver_connstring[MAX_PAGESERVER_CONNSTRING_SIZE];
|
||||
} PagestoreShmemState;
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
static shmem_request_hook_type prev_shmem_request_hook = NULL;
|
||||
static void walproposer_shmem_request(void);
|
||||
#endif
|
||||
static shmem_startup_hook_type prev_shmem_startup_hook;
|
||||
static PagestoreShmemState *pagestore_shared;
|
||||
static uint64 pagestore_local_counter = 0;
|
||||
static char local_pageserver_connstring[MAX_PAGESERVER_CONNSTRING_SIZE];
|
||||
|
||||
bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;
|
||||
|
||||
static bool pageserver_flush(void);
|
||||
static void pageserver_disconnect(void);
|
||||
|
||||
|
||||
static pqsigfunc prev_signal_handler;
|
||||
static bool
|
||||
CheckPageserverConnstring(char **newval, void **extra, GucSource source)
|
||||
{
|
||||
return strlen(*newval) < MAX_PAGESERVER_CONNSTRING_SIZE;
|
||||
}
|
||||
|
||||
static void
|
||||
pageserver_sighup_handler(SIGNAL_ARGS)
|
||||
AssignPageserverConnstring(const char *newval, void *extra)
|
||||
{
|
||||
if (prev_signal_handler)
|
||||
{
|
||||
prev_signal_handler(postgres_signal_arg);
|
||||
}
|
||||
neon_log(LOG, "Received SIGHUP, disconnecting pageserver. New pageserver connstring is %s", page_server_connstring);
|
||||
pageserver_disconnect();
|
||||
if(!pagestore_shared)
|
||||
return;
|
||||
LWLockAcquire(pagestore_shared->lock, LW_EXCLUSIVE);
|
||||
strlcpy(pagestore_shared->pageserver_connstring, newval, MAX_PAGESERVER_CONNSTRING_SIZE);
|
||||
pg_atomic_fetch_add_u64(&pagestore_shared->update_counter, 1);
|
||||
LWLockRelease(pagestore_shared->lock);
|
||||
}
|
||||
|
||||
static bool
|
||||
CheckConnstringUpdated()
|
||||
{
|
||||
if(!pagestore_shared)
|
||||
return false;
|
||||
return pagestore_local_counter < pg_atomic_read_u64(&pagestore_shared->update_counter);
|
||||
}
|
||||
|
||||
static void
|
||||
ReloadConnstring()
|
||||
{
|
||||
if(!pagestore_shared)
|
||||
return;
|
||||
LWLockAcquire(pagestore_shared->lock, LW_SHARED);
|
||||
strlcpy(local_pageserver_connstring, pagestore_shared->pageserver_connstring, sizeof(local_pageserver_connstring));
|
||||
pagestore_local_counter = pg_atomic_read_u64(&pagestore_shared->update_counter);
|
||||
LWLockRelease(pagestore_shared->lock);
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -91,6 +134,11 @@ pageserver_connect(int elevel)
|
||||
|
||||
Assert(!connected);
|
||||
|
||||
if(CheckConnstringUpdated())
|
||||
{
|
||||
ReloadConnstring();
|
||||
}
|
||||
|
||||
/*
|
||||
* Connect using the connection string we got from the
|
||||
* neon.pageserver_connstring GUC. If the NEON_AUTH_TOKEN environment
|
||||
@@ -110,7 +158,7 @@ pageserver_connect(int elevel)
|
||||
n++;
|
||||
}
|
||||
keywords[n] = "dbname";
|
||||
values[n] = page_server_connstring;
|
||||
values[n] = local_pageserver_connstring;
|
||||
n++;
|
||||
keywords[n] = NULL;
|
||||
values[n] = NULL;
|
||||
@@ -254,6 +302,12 @@ pageserver_send(NeonRequest * request)
|
||||
{
|
||||
StringInfoData req_buff;
|
||||
|
||||
if(CheckConnstringUpdated())
|
||||
{
|
||||
pageserver_disconnect();
|
||||
ReloadConnstring();
|
||||
}
|
||||
|
||||
/* If the connection was lost for some reason, reconnect */
|
||||
if (connected && PQstatus(pageserver_conn) == CONNECTION_BAD)
|
||||
{
|
||||
@@ -274,6 +328,7 @@ pageserver_send(NeonRequest * request)
|
||||
{
|
||||
while (!pageserver_connect(n_reconnect_attempts < max_reconnect_attempts ? LOG : ERROR))
|
||||
{
|
||||
HandleMainLoopInterrupts();
|
||||
n_reconnect_attempts += 1;
|
||||
pg_usleep(RECONNECT_INTERVAL_USEC);
|
||||
}
|
||||
@@ -391,7 +446,8 @@ pageserver_flush(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
page_server_api api = {
|
||||
page_server_api api =
|
||||
{
|
||||
.send = pageserver_send,
|
||||
.flush = pageserver_flush,
|
||||
.receive = pageserver_receive
|
||||
@@ -405,12 +461,72 @@ check_neon_id(char **newval, void **extra, GucSource source)
|
||||
return **newval == '\0' || HexDecodeString(id, *newval, 16);
|
||||
}
|
||||
|
||||
static Size
|
||||
PagestoreShmemSize(void)
|
||||
{
|
||||
return sizeof(PagestoreShmemState);
|
||||
}
|
||||
|
||||
static bool
|
||||
PagestoreShmemInit(void)
|
||||
{
|
||||
bool found;
|
||||
LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
|
||||
pagestore_shared = ShmemInitStruct("libpagestore shared state",
|
||||
PagestoreShmemSize(),
|
||||
&found);
|
||||
if(!found)
|
||||
{
|
||||
pagestore_shared->lock = &(GetNamedLWLockTranche("neon_libpagestore")->lock);
|
||||
pg_atomic_init_u64(&pagestore_shared->update_counter, 0);
|
||||
AssignPageserverConnstring(page_server_connstring, NULL);
|
||||
}
|
||||
LWLockRelease(AddinShmemInitLock);
|
||||
return found;
|
||||
}
|
||||
|
||||
static void
|
||||
pagestore_shmem_startup_hook(void)
|
||||
{
|
||||
if(prev_shmem_startup_hook)
|
||||
prev_shmem_startup_hook();
|
||||
|
||||
PagestoreShmemInit();
|
||||
}
|
||||
|
||||
static void
|
||||
pagestore_shmem_request(void)
|
||||
{
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
if(prev_shmem_request_hook)
|
||||
prev_shmem_request_hook();
|
||||
#endif
|
||||
|
||||
RequestAddinShmemSpace(PagestoreShmemSize());
|
||||
RequestNamedLWLockTranche("neon_libpagestore", 1);
|
||||
}
|
||||
|
||||
static void
|
||||
pagestore_prepare_shmem(void)
|
||||
{
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
prev_shmem_request_hook = shmem_request_hook;
|
||||
shmem_request_hook = pagestore_shmem_request;
|
||||
#else
|
||||
pagestore_shmem_request();
|
||||
#endif
|
||||
prev_shmem_startup_hook = shmem_startup_hook;
|
||||
shmem_startup_hook = pagestore_shmem_startup_hook;
|
||||
}
|
||||
|
||||
/*
|
||||
* Module initialization function
|
||||
*/
|
||||
void
|
||||
pg_init_libpagestore(void)
|
||||
{
|
||||
pagestore_prepare_shmem();
|
||||
|
||||
DefineCustomStringVariable("neon.pageserver_connstring",
|
||||
"connection string to the page server",
|
||||
NULL,
|
||||
@@ -418,7 +534,7 @@ pg_init_libpagestore(void)
|
||||
"",
|
||||
PGC_SIGHUP,
|
||||
0, /* no flags required */
|
||||
NULL, NULL, NULL);
|
||||
CheckPageserverConnstring, AssignPageserverConnstring, NULL);
|
||||
|
||||
DefineCustomStringVariable("neon.timeline_id",
|
||||
"Neon timeline_id the server is running on",
|
||||
@@ -499,7 +615,5 @@ pg_init_libpagestore(void)
|
||||
redo_read_buffer_filter = neon_redo_read_buffer_filter;
|
||||
}
|
||||
|
||||
prev_signal_handler = pqsignal(SIGHUP, pageserver_sighup_handler);
|
||||
|
||||
lfc_init();
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ typedef enum
|
||||
T_NeonNblocksRequest,
|
||||
T_NeonGetPageRequest,
|
||||
T_NeonDbSizeRequest,
|
||||
T_NeonPagestoreNoopRequest,
|
||||
|
||||
/* pagestore -> pagestore_client */
|
||||
T_NeonExistsResponse = 100,
|
||||
@@ -40,6 +41,7 @@ typedef enum
|
||||
T_NeonGetPageResponse,
|
||||
T_NeonErrorResponse,
|
||||
T_NeonDbSizeResponse,
|
||||
T_NeonPagestoreNoopResponse,
|
||||
} NeonMessageTag;
|
||||
|
||||
/* base struct for c-style inheritance */
|
||||
|
||||
@@ -88,7 +88,7 @@ static void StartProposerReplication(WalProposer *wp, StartReplicationCmd *cmd);
|
||||
static void WalSndLoop(WalProposer *wp);
|
||||
static void XLogBroadcastWalProposer(WalProposer *wp);
|
||||
|
||||
static void XLogWalPropWrite(char *buf, Size nbytes, XLogRecPtr recptr);
|
||||
static void XLogWalPropWrite(WalProposer *wp, char *buf, Size nbytes, XLogRecPtr recptr);
|
||||
static void XLogWalPropClose(XLogRecPtr recptr);
|
||||
|
||||
static void
|
||||
@@ -1241,7 +1241,7 @@ WalProposerRecovery(Safekeeper *sk, TimeLineID timeline, XLogRecPtr startpos, XL
|
||||
rec_end_lsn = rec_start_lsn + len - XLOG_HDR_SIZE;
|
||||
|
||||
/* write WAL to disk */
|
||||
XLogWalPropWrite(&buf[XLOG_HDR_SIZE], len - XLOG_HDR_SIZE, rec_start_lsn);
|
||||
XLogWalPropWrite(sk->wp, &buf[XLOG_HDR_SIZE], len - XLOG_HDR_SIZE, rec_start_lsn);
|
||||
|
||||
ereport(DEBUG1,
|
||||
(errmsg("Recover message %X/%X length %d",
|
||||
@@ -1283,11 +1283,24 @@ static XLogSegNo walpropSegNo = 0;
|
||||
* Write XLOG data to disk.
|
||||
*/
|
||||
static void
|
||||
XLogWalPropWrite(char *buf, Size nbytes, XLogRecPtr recptr)
|
||||
XLogWalPropWrite(WalProposer *wp, char *buf, Size nbytes, XLogRecPtr recptr)
|
||||
{
|
||||
int startoff;
|
||||
int byteswritten;
|
||||
|
||||
/*
|
||||
* Apart from walproposer, basebackup LSN page is also written out by
|
||||
* postgres itself which writes WAL only in pages, and in basebackup it is
|
||||
* inherently dummy (only safekeepers have historic WAL). Update WAL buffers
|
||||
* here to avoid dummy page overwriting correct one we download here. Ugly,
|
||||
* but alternatives are about the same ugly. We won't need that if we switch
|
||||
* to on-demand WAL download from safekeepers, without writing to disk.
|
||||
*
|
||||
* https://github.com/neondatabase/neon/issues/5749
|
||||
*/
|
||||
if (!wp->config->syncSafekeepers)
|
||||
XLogUpdateWalBuffers(buf, recptr, nbytes);
|
||||
|
||||
while (nbytes > 0)
|
||||
{
|
||||
int segbytes;
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
//! User credentials used in authentication.
|
||||
|
||||
use crate::{auth::password_hack::parse_endpoint_param, error::UserFacingError};
|
||||
use crate::{
|
||||
auth::password_hack::parse_endpoint_param, error::UserFacingError, proxy::neon_options,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use pq_proto::StartupMessageParams;
|
||||
use std::collections::HashSet;
|
||||
@@ -38,6 +40,8 @@ pub struct ClientCredentials<'a> {
|
||||
pub user: &'a str,
|
||||
// TODO: this is a severe misnomer! We should think of a new name ASAP.
|
||||
pub project: Option<String>,
|
||||
|
||||
pub cache_key: String,
|
||||
}
|
||||
|
||||
impl ClientCredentials<'_> {
|
||||
@@ -53,6 +57,7 @@ impl<'a> ClientCredentials<'a> {
|
||||
ClientCredentials {
|
||||
user: "",
|
||||
project: None,
|
||||
cache_key: "".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,7 +125,17 @@ impl<'a> ClientCredentials<'a> {
|
||||
|
||||
info!(user, project = project.as_deref(), "credentials");
|
||||
|
||||
Ok(Self { user, project })
|
||||
let cache_key = format!(
|
||||
"{}{}",
|
||||
project.as_deref().unwrap_or(""),
|
||||
neon_options(params).unwrap_or("".to_string())
|
||||
);
|
||||
|
||||
Ok(Self {
|
||||
user,
|
||||
project,
|
||||
cache_key,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -176,6 +191,7 @@ mod tests {
|
||||
let creds = ClientCredentials::parse(&options, sni, common_names)?;
|
||||
assert_eq!(creds.user, "john_doe");
|
||||
assert_eq!(creds.project.as_deref(), Some("foo"));
|
||||
assert_eq!(creds.cache_key, "foo");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -303,4 +319,23 @@ mod tests {
|
||||
_ => panic!("bad error: {err:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_neon_options() -> anyhow::Result<()> {
|
||||
let options = StartupMessageParams::new([
|
||||
("user", "john_doe"),
|
||||
("options", "neon_lsn:0/2 neon_endpoint_type:read_write"),
|
||||
]);
|
||||
|
||||
let sni = Some("project.localhost");
|
||||
let common_names = Some(["localhost".into()].into());
|
||||
let creds = ClientCredentials::parse(&options, sni, common_names)?;
|
||||
assert_eq!(creds.project.as_deref(), Some("project"));
|
||||
assert_eq!(
|
||||
creds.cache_key,
|
||||
"projectneon_endpoint_type:read_write neon_lsn:0/2"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ use crate::{
|
||||
cancellation::CancelClosure,
|
||||
console::errors::WakeComputeError,
|
||||
error::{io_error, UserFacingError},
|
||||
proxy::is_neon_param,
|
||||
};
|
||||
use futures::{FutureExt, TryFutureExt};
|
||||
use itertools::Itertools;
|
||||
@@ -278,7 +279,7 @@ fn filtered_options(params: &StartupMessageParams) -> Option<String> {
|
||||
#[allow(unstable_name_collisions)]
|
||||
let options: String = params
|
||||
.options_raw()?
|
||||
.filter(|opt| parse_endpoint_param(opt).is_none())
|
||||
.filter(|opt| parse_endpoint_param(opt).is_none() && !is_neon_param(opt))
|
||||
.intersperse(" ") // TODO: use impl from std once it's stabilized
|
||||
.collect();
|
||||
|
||||
@@ -313,5 +314,11 @@ mod tests {
|
||||
|
||||
let params = StartupMessageParams::new([("options", "project = foo")]);
|
||||
assert_eq!(filtered_options(¶ms).as_deref(), Some("project = foo"));
|
||||
|
||||
let params = StartupMessageParams::new([(
|
||||
"options",
|
||||
"project = foo neon_endpoint_type:read_write neon_lsn:0/2",
|
||||
)]);
|
||||
assert_eq!(filtered_options(¶ms).as_deref(), Some("project = foo"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ pub struct ConsoleError {
|
||||
#[derive(Deserialize)]
|
||||
pub struct GetRoleSecret {
|
||||
pub role_secret: Box<str>,
|
||||
pub allowed_ips: Option<Vec<Box<str>>>,
|
||||
}
|
||||
|
||||
// Manually implement debug to omit sensitive info.
|
||||
@@ -187,4 +188,31 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_wake_compute() -> anyhow::Result<()> {
|
||||
let json = json!({
|
||||
"address": "0.0.0.0",
|
||||
"aux": dummy_aux(),
|
||||
});
|
||||
let _: WakeCompute = serde_json::from_str(&json.to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_get_role_secret() -> anyhow::Result<()> {
|
||||
// Empty `allowed_ips` field.
|
||||
let json = json!({
|
||||
"role_secret": "secret",
|
||||
});
|
||||
let _: GetRoleSecret = serde_json::from_str(&json.to_string())?;
|
||||
// Empty `allowed_ips` field.
|
||||
let json = json!({
|
||||
"role_secret": "secret",
|
||||
"allowed_ips": ["8.8.8.8"],
|
||||
});
|
||||
let _: GetRoleSecret = serde_json::from_str(&json.to_string())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,6 +178,7 @@ pub struct ConsoleReqExtra<'a> {
|
||||
pub session_id: uuid::Uuid,
|
||||
/// Name of client application, if set.
|
||||
pub application_name: Option<&'a str>,
|
||||
pub options: Option<&'a str>,
|
||||
}
|
||||
|
||||
/// Auth secret which is managed by the cloud.
|
||||
|
||||
@@ -99,6 +99,7 @@ impl Api {
|
||||
.query(&[
|
||||
("application_name", extra.application_name),
|
||||
("project", Some(project)),
|
||||
("options", extra.options),
|
||||
])
|
||||
.build()?;
|
||||
|
||||
@@ -151,7 +152,7 @@ impl super::Api for Api {
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
creds: &ClientCredentials,
|
||||
) -> Result<CachedNodeInfo, WakeComputeError> {
|
||||
let key = creds.project().expect("impossible");
|
||||
let key: &str = &creds.cache_key;
|
||||
|
||||
// Every time we do a wakeup http request, the compute node will stay up
|
||||
// for some time (highly depends on the console's scale-to-zero policy);
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
use std::convert::Infallible;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
|
||||
@@ -3,10 +3,9 @@
|
||||
use std::ffi::CStr;
|
||||
|
||||
pub fn split_cstr(bytes: &[u8]) -> Option<(&CStr, &[u8])> {
|
||||
let pos = bytes.iter().position(|&x| x == 0)?;
|
||||
let (cstr, other) = bytes.split_at(pos + 1);
|
||||
// SAFETY: we've already checked that there's a terminator
|
||||
Some((unsafe { CStr::from_bytes_with_nul_unchecked(cstr) }, other))
|
||||
let cstr = CStr::from_bytes_until_nul(bytes).ok()?;
|
||||
let (_, other) = bytes.split_at(cstr.to_bytes_with_nul().len());
|
||||
Some((cstr, other))
|
||||
}
|
||||
|
||||
/// See <https://doc.rust-lang.org/std/primitive.slice.html#method.split_array_ref>.
|
||||
|
||||
@@ -15,10 +15,12 @@ use crate::{
|
||||
use anyhow::{bail, Context};
|
||||
use async_trait::async_trait;
|
||||
use futures::TryFutureExt;
|
||||
use itertools::Itertools;
|
||||
use metrics::{exponential_buckets, register_int_counter_vec, IntCounterVec};
|
||||
use once_cell::sync::Lazy;
|
||||
use once_cell::sync::{Lazy, OnceCell};
|
||||
use pq_proto::{BeMessage as Be, FeStartupPacket, StartupMessageParams};
|
||||
use prometheus::{register_histogram_vec, HistogramVec};
|
||||
use regex::Regex;
|
||||
use std::{error::Error, io, ops::ControlFlow, sync::Arc, time::Instant};
|
||||
use tokio::{
|
||||
io::{AsyncRead, AsyncWrite, AsyncWriteExt},
|
||||
@@ -881,9 +883,12 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
|
||||
allow_self_signed_compute,
|
||||
} = self;
|
||||
|
||||
let console_options = neon_options(params);
|
||||
|
||||
let extra = console::ConsoleReqExtra {
|
||||
session_id, // aka this connection's id
|
||||
application_name: params.get("application_name"),
|
||||
options: console_options.as_deref(),
|
||||
};
|
||||
|
||||
let mut latency_timer = LatencyTimer::new(mode.protocol_label());
|
||||
@@ -945,3 +950,27 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
|
||||
proxy_pass(stream, node.stream, &aux).await
|
||||
}
|
||||
}
|
||||
|
||||
pub fn neon_options(params: &StartupMessageParams) -> Option<String> {
|
||||
#[allow(unstable_name_collisions)]
|
||||
let options: String = params
|
||||
.options_raw()?
|
||||
.filter(|opt| is_neon_param(opt))
|
||||
.sorted() // we sort it to use as cache key
|
||||
.intersperse(" ") // TODO: use impl from std once it's stabilized
|
||||
.collect();
|
||||
|
||||
// Don't even bother with empty options.
|
||||
if options.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(options)
|
||||
}
|
||||
|
||||
pub fn is_neon_param(bytes: &str) -> bool {
|
||||
static RE: OnceCell<Regex> = OnceCell::new();
|
||||
RE.get_or_init(|| Regex::new(r"^neon_\w+:").unwrap());
|
||||
|
||||
RE.get().unwrap().is_match(bytes)
|
||||
}
|
||||
|
||||
@@ -440,6 +440,7 @@ fn helper_create_connect_info(
|
||||
let extra = console::ConsoleReqExtra {
|
||||
session_id: uuid::Uuid::new_v4(),
|
||||
application_name: Some("TEST"),
|
||||
options: None,
|
||||
};
|
||||
let creds = auth::BackendType::Test(mechanism);
|
||||
(cache, extra, creds)
|
||||
|
||||
@@ -22,7 +22,10 @@ use tokio_postgres::{AsyncMessage, ReadyForQueryStatus};
|
||||
|
||||
use crate::{
|
||||
auth, console,
|
||||
proxy::{LatencyTimer, NUM_DB_CONNECTIONS_CLOSED_COUNTER, NUM_DB_CONNECTIONS_OPENED_COUNTER},
|
||||
proxy::{
|
||||
neon_options, LatencyTimer, NUM_DB_CONNECTIONS_CLOSED_COUNTER,
|
||||
NUM_DB_CONNECTIONS_OPENED_COUNTER,
|
||||
},
|
||||
usage_metrics::{Ids, MetricCounter, USAGE_METRICS},
|
||||
};
|
||||
use crate::{compute, config};
|
||||
@@ -41,6 +44,7 @@ pub struct ConnInfo {
|
||||
pub dbname: String,
|
||||
pub hostname: String,
|
||||
pub password: String,
|
||||
pub options: Option<String>,
|
||||
}
|
||||
|
||||
impl ConnInfo {
|
||||
@@ -401,26 +405,25 @@ async fn connect_to_compute(
|
||||
let tls = config.tls_config.as_ref();
|
||||
let common_names = tls.and_then(|tls| tls.common_names.clone());
|
||||
|
||||
let credential_params = StartupMessageParams::new([
|
||||
let params = StartupMessageParams::new([
|
||||
("user", &conn_info.username),
|
||||
("database", &conn_info.dbname),
|
||||
("application_name", APP_NAME),
|
||||
("options", conn_info.options.as_deref().unwrap_or("")),
|
||||
]);
|
||||
|
||||
let creds = config
|
||||
.auth_backend
|
||||
.as_ref()
|
||||
.map(|_| {
|
||||
auth::ClientCredentials::parse(
|
||||
&credential_params,
|
||||
Some(&conn_info.hostname),
|
||||
common_names,
|
||||
)
|
||||
})
|
||||
.map(|_| auth::ClientCredentials::parse(¶ms, Some(&conn_info.hostname), common_names))
|
||||
.transpose()?;
|
||||
|
||||
let console_options = neon_options(¶ms);
|
||||
|
||||
let extra = console::ConsoleReqExtra {
|
||||
session_id: uuid::Uuid::new_v4(),
|
||||
application_name: Some(APP_NAME),
|
||||
options: console_options.as_deref(),
|
||||
};
|
||||
|
||||
let node_info = creds
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user