Compare commits

..

1 Commits

Author SHA1 Message Date
Konstantin Knizhnik
2da33f89fa Make it possible to specify wal_level in initdb 2023-11-02 22:07:11 +02:00
58 changed files with 450 additions and 1570 deletions

View File

@@ -17,9 +17,9 @@ assignees: ''
## Implementation ideas ## Implementation ideas
```[tasklist]
## Tasks ## Tasks
``` - [ ]
## Other related tasks and Epics ## Other related tasks and Epics
- -

23
Cargo.lock generated
View File

@@ -3550,7 +3550,7 @@ dependencies = [
[[package]] [[package]]
name = "postgres" name = "postgres"
version = "0.19.4" version = "0.19.4"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=ce7260db5998fe27167da42503905a12e7ad9048#ce7260db5998fe27167da42503905a12e7ad9048" source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
dependencies = [ dependencies = [
"bytes", "bytes",
"fallible-iterator", "fallible-iterator",
@@ -3563,7 +3563,7 @@ dependencies = [
[[package]] [[package]]
name = "postgres-native-tls" name = "postgres-native-tls"
version = "0.5.0" version = "0.5.0"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=ce7260db5998fe27167da42503905a12e7ad9048#ce7260db5998fe27167da42503905a12e7ad9048" source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
dependencies = [ dependencies = [
"native-tls", "native-tls",
"tokio", "tokio",
@@ -3574,7 +3574,7 @@ dependencies = [
[[package]] [[package]]
name = "postgres-protocol" name = "postgres-protocol"
version = "0.6.4" version = "0.6.4"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=ce7260db5998fe27167da42503905a12e7ad9048#ce7260db5998fe27167da42503905a12e7ad9048" source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
dependencies = [ dependencies = [
"base64 0.20.0", "base64 0.20.0",
"byteorder", "byteorder",
@@ -3592,7 +3592,7 @@ dependencies = [
[[package]] [[package]]
name = "postgres-types" name = "postgres-types"
version = "0.2.4" version = "0.2.4"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=ce7260db5998fe27167da42503905a12e7ad9048#ce7260db5998fe27167da42503905a12e7ad9048" source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
dependencies = [ dependencies = [
"bytes", "bytes",
"fallible-iterator", "fallible-iterator",
@@ -4419,7 +4419,6 @@ dependencies = [
"itertools", "itertools",
"pageserver", "pageserver",
"rand 0.8.5", "rand 0.8.5",
"remote_storage",
"reqwest", "reqwest",
"serde", "serde",
"serde_json", "serde_json",
@@ -4478,7 +4477,6 @@ dependencies = [
"tokio", "tokio",
"tokio-io-timeout", "tokio-io-timeout",
"tokio-postgres", "tokio-postgres",
"tokio-stream",
"toml_edit", "toml_edit",
"tracing", "tracing",
"url", "url",
@@ -4681,16 +4679,6 @@ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]]
name = "serde_assert"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eda563240c1288b044209be1f0d38bb4d15044fb3e00dc354fbc922ab4733e80"
dependencies = [
"hashbrown 0.13.2",
"serde",
]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.183" version = "1.0.183"
@@ -5408,7 +5396,7 @@ dependencies = [
[[package]] [[package]]
name = "tokio-postgres" name = "tokio-postgres"
version = "0.7.7" version = "0.7.7"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=ce7260db5998fe27167da42503905a12e7ad9048#ce7260db5998fe27167da42503905a12e7ad9048" source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"byteorder", "byteorder",
@@ -5977,7 +5965,6 @@ dependencies = [
"routerify", "routerify",
"sentry", "sentry",
"serde", "serde",
"serde_assert",
"serde_json", "serde_json",
"serde_with", "serde_with",
"signal-hook", "signal-hook",

View File

@@ -124,7 +124,6 @@ sentry = { version = "0.31", default-features = false, features = ["backtrace",
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1" serde_json = "1"
serde_with = "2.0" serde_with = "2.0"
serde_assert = "0.5.0"
sha2 = "0.10.2" sha2 = "0.10.2"
signal-hook = "0.3" signal-hook = "0.3"
smallvec = "1.11" smallvec = "1.11"
@@ -162,11 +161,11 @@ env_logger = "0.10"
log = "0.4" log = "0.4"
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="ce7260db5998fe27167da42503905a12e7ad9048" } postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="ce7260db5998fe27167da42503905a12e7ad9048" } postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="ce7260db5998fe27167da42503905a12e7ad9048" } postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="ce7260db5998fe27167da42503905a12e7ad9048" } postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="ce7260db5998fe27167da42503905a12e7ad9048" } tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
## Other git libraries ## Other git libraries
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
@@ -203,7 +202,7 @@ tonic-build = "0.9"
# This is only needed for proxy's tests. # This is only needed for proxy's tests.
# TODO: we should probably fork `tokio-postgres-rustls` instead. # TODO: we should probably fork `tokio-postgres-rustls` instead.
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="ce7260db5998fe27167da42503905a12e7ad9048" } tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
################# Binary contents sections ################# Binary contents sections

View File

@@ -68,7 +68,7 @@ pub fn get_spec_from_control_plane(
base_uri: &str, base_uri: &str,
compute_id: &str, compute_id: &str,
) -> Result<Option<ComputeSpec>> { ) -> Result<Option<ComputeSpec>> {
let cp_uri = format!("{base_uri}/compute/api/v2/computes/{compute_id}/spec"); let cp_uri = format!("{base_uri}/management/api/v2/computes/{compute_id}/spec");
let jwt: String = match std::env::var("NEON_CONTROL_PLANE_TOKEN") { let jwt: String = match std::env::var("NEON_CONTROL_PLANE_TOKEN") {
Ok(v) => v, Ok(v) => v,
Err(_) => "".to_string(), Err(_) => "".to_string(),

View File

@@ -2,6 +2,7 @@ use crate::{background_process, local_env::LocalEnv};
use anyhow::anyhow; use anyhow::anyhow;
use camino::Utf8PathBuf; use camino::Utf8PathBuf;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use std::{path::PathBuf, process::Child}; use std::{path::PathBuf, process::Child};
use utils::id::{NodeId, TenantId}; use utils::id::{NodeId, TenantId};
@@ -13,8 +14,10 @@ pub struct AttachmentService {
const COMMAND: &str = "attachment_service"; const COMMAND: &str = "attachment_service";
#[serde_as]
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct AttachHookRequest { pub struct AttachHookRequest {
#[serde_as(as = "DisplayFromStr")]
pub tenant_id: TenantId, pub tenant_id: TenantId,
pub node_id: Option<NodeId>, pub node_id: Option<NodeId>,
} }

View File

@@ -46,6 +46,7 @@ use std::time::Duration;
use anyhow::{anyhow, bail, Context, Result}; use anyhow::{anyhow, bail, Context, Result};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use utils::id::{NodeId, TenantId, TimelineId}; use utils::id::{NodeId, TenantId, TimelineId};
use crate::local_env::LocalEnv; use crate::local_env::LocalEnv;
@@ -56,10 +57,13 @@ use compute_api::responses::{ComputeState, ComputeStatus};
use compute_api::spec::{Cluster, ComputeMode, ComputeSpec}; use compute_api::spec::{Cluster, ComputeMode, ComputeSpec};
// contents of a endpoint.json file // contents of a endpoint.json file
#[serde_as]
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)] #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
pub struct EndpointConf { pub struct EndpointConf {
endpoint_id: String, endpoint_id: String,
#[serde_as(as = "DisplayFromStr")]
tenant_id: TenantId, tenant_id: TenantId,
#[serde_as(as = "DisplayFromStr")]
timeline_id: TimelineId, timeline_id: TimelineId,
mode: ComputeMode, mode: ComputeMode,
pg_port: u16, pg_port: u16,

View File

@@ -8,6 +8,7 @@ use anyhow::{bail, ensure, Context};
use postgres_backend::AuthType; use postgres_backend::AuthType;
use reqwest::Url; use reqwest::Url;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use std::collections::HashMap; use std::collections::HashMap;
use std::env; use std::env;
use std::fs; use std::fs;
@@ -32,6 +33,7 @@ pub const DEFAULT_PG_VERSION: u32 = 15;
// to 'neon_local init --config=<path>' option. See control_plane/simple.conf for // to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
// an example. // an example.
// //
#[serde_as]
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)] #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
pub struct LocalEnv { pub struct LocalEnv {
// Base directory for all the nodes (the pageserver, safekeepers and // Base directory for all the nodes (the pageserver, safekeepers and
@@ -57,6 +59,7 @@ pub struct LocalEnv {
// Default tenant ID to use with the 'neon_local' command line utility, when // Default tenant ID to use with the 'neon_local' command line utility, when
// --tenant_id is not explicitly specified. // --tenant_id is not explicitly specified.
#[serde(default)] #[serde(default)]
#[serde_as(as = "Option<DisplayFromStr>")]
pub default_tenant_id: Option<TenantId>, pub default_tenant_id: Option<TenantId>,
// used to issue tokens during e.g pg start // used to issue tokens during e.g pg start
@@ -81,6 +84,7 @@ pub struct LocalEnv {
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here, // A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
// but deserialization into a generic toml object as `toml::Value::try_from` fails with an error. // but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
// https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table". // https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
#[serde_as(as = "HashMap<_, Vec<(DisplayFromStr, DisplayFromStr)>>")]
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>, branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
} }

View File

@@ -6,6 +6,7 @@
use std::collections::HashMap; use std::collections::HashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use utils::id::{TenantId, TimelineId}; use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn; use utils::lsn::Lsn;
@@ -18,6 +19,7 @@ pub type PgIdent = String;
/// Cluster spec or configuration represented as an optional number of /// Cluster spec or configuration represented as an optional number of
/// delta operations + final cluster state description. /// delta operations + final cluster state description.
#[serde_as]
#[derive(Clone, Debug, Default, Deserialize, Serialize)] #[derive(Clone, Debug, Default, Deserialize, Serialize)]
pub struct ComputeSpec { pub struct ComputeSpec {
pub format_version: f32, pub format_version: f32,
@@ -48,12 +50,12 @@ pub struct ComputeSpec {
// these, and instead set the "neon.tenant_id", "neon.timeline_id", // these, and instead set the "neon.tenant_id", "neon.timeline_id",
// etc. GUCs in cluster.settings. TODO: Once the control plane has been // etc. GUCs in cluster.settings. TODO: Once the control plane has been
// updated to fill these fields, we can make these non optional. // updated to fill these fields, we can make these non optional.
#[serde_as(as = "Option<DisplayFromStr>")]
pub tenant_id: Option<TenantId>, pub tenant_id: Option<TenantId>,
#[serde_as(as = "Option<DisplayFromStr>")]
pub timeline_id: Option<TimelineId>, pub timeline_id: Option<TimelineId>,
#[serde_as(as = "Option<DisplayFromStr>")]
pub pageserver_connstring: Option<String>, pub pageserver_connstring: Option<String>,
#[serde(default)] #[serde(default)]
pub safekeeper_connstrings: Vec<String>, pub safekeeper_connstrings: Vec<String>,
@@ -138,13 +140,14 @@ impl RemoteExtSpec {
} }
} }
#[serde_as]
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Deserialize, Serialize)] #[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
pub enum ComputeMode { pub enum ComputeMode {
/// A read-write node /// A read-write node
#[default] #[default]
Primary, Primary,
/// A read-only node, pinned at a particular LSN /// A read-only node, pinned at a particular LSN
Static(Lsn), Static(#[serde_as(as = "DisplayFromStr")] Lsn),
/// A read-only node that follows the tip of the branch in hot standby mode /// A read-only node that follows the tip of the branch in hot standby mode
/// ///
/// Future versions may want to distinguish between replicas with hot standby /// Future versions may want to distinguish between replicas with hot standby

View File

@@ -4,6 +4,7 @@
//! See docs/rfcs/025-generation-numbers.md //! See docs/rfcs/025-generation-numbers.md
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use utils::id::{NodeId, TenantId}; use utils::id::{NodeId, TenantId};
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
@@ -11,8 +12,10 @@ pub struct ReAttachRequest {
pub node_id: NodeId, pub node_id: NodeId,
} }
#[serde_as]
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct ReAttachResponseTenant { pub struct ReAttachResponseTenant {
#[serde_as(as = "DisplayFromStr")]
pub id: TenantId, pub id: TenantId,
pub gen: u32, pub gen: u32,
} }
@@ -22,8 +25,10 @@ pub struct ReAttachResponse {
pub tenants: Vec<ReAttachResponseTenant>, pub tenants: Vec<ReAttachResponseTenant>,
} }
#[serde_as]
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct ValidateRequestTenant { pub struct ValidateRequestTenant {
#[serde_as(as = "DisplayFromStr")]
pub id: TenantId, pub id: TenantId,
pub gen: u32, pub gen: u32,
} }
@@ -38,8 +43,10 @@ pub struct ValidateResponse {
pub tenants: Vec<ValidateResponseTenant>, pub tenants: Vec<ValidateResponseTenant>,
} }
#[serde_as]
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct ValidateResponseTenant { pub struct ValidateResponseTenant {
#[serde_as(as = "DisplayFromStr")]
pub id: TenantId, pub id: TenantId,
pub valid: bool, pub valid: bool,
} }

View File

@@ -6,7 +6,7 @@ use std::{
use byteorder::{BigEndian, ReadBytesExt}; use byteorder::{BigEndian, ReadBytesExt};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::serde_as; use serde_with::{serde_as, DisplayFromStr};
use strum_macros; use strum_macros;
use utils::{ use utils::{
completion, completion,
@@ -174,19 +174,25 @@ pub enum TimelineState {
Broken { reason: String, backtrace: String }, Broken { reason: String, backtrace: String },
} }
#[serde_as]
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct TimelineCreateRequest { pub struct TimelineCreateRequest {
#[serde_as(as = "DisplayFromStr")]
pub new_timeline_id: TimelineId, pub new_timeline_id: TimelineId,
#[serde(default)] #[serde(default)]
#[serde_as(as = "Option<DisplayFromStr>")]
pub ancestor_timeline_id: Option<TimelineId>, pub ancestor_timeline_id: Option<TimelineId>,
#[serde(default)] #[serde(default)]
#[serde_as(as = "Option<DisplayFromStr>")]
pub ancestor_start_lsn: Option<Lsn>, pub ancestor_start_lsn: Option<Lsn>,
pub pg_version: Option<u32>, pub pg_version: Option<u32>,
} }
#[serde_as]
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize, Deserialize, Debug)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct TenantCreateRequest { pub struct TenantCreateRequest {
#[serde_as(as = "DisplayFromStr")]
pub new_tenant_id: TenantId, pub new_tenant_id: TenantId,
#[serde(default)] #[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
@@ -195,6 +201,7 @@ pub struct TenantCreateRequest {
pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
} }
#[serde_as]
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct TenantLoadRequest { pub struct TenantLoadRequest {
@@ -271,26 +278,31 @@ pub struct LocationConfig {
pub tenant_conf: TenantConfig, pub tenant_conf: TenantConfig,
} }
#[serde_as]
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
#[serde(transparent)] #[serde(transparent)]
pub struct TenantCreateResponse(pub TenantId); pub struct TenantCreateResponse(#[serde_as(as = "DisplayFromStr")] pub TenantId);
#[derive(Serialize)] #[derive(Serialize)]
pub struct StatusResponse { pub struct StatusResponse {
pub id: NodeId, pub id: NodeId,
} }
#[serde_as]
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize, Deserialize, Debug)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct TenantLocationConfigRequest { pub struct TenantLocationConfigRequest {
#[serde_as(as = "DisplayFromStr")]
pub tenant_id: TenantId, pub tenant_id: TenantId,
#[serde(flatten)] #[serde(flatten)]
pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it
} }
#[serde_as]
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize, Deserialize, Debug)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct TenantConfigRequest { pub struct TenantConfigRequest {
#[serde_as(as = "DisplayFromStr")]
pub tenant_id: TenantId, pub tenant_id: TenantId,
#[serde(flatten)] #[serde(flatten)]
pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
@@ -362,8 +374,10 @@ pub enum TenantAttachmentStatus {
Failed { reason: String }, Failed { reason: String },
} }
#[serde_as]
#[derive(Serialize, Deserialize, Clone)] #[derive(Serialize, Deserialize, Clone)]
pub struct TenantInfo { pub struct TenantInfo {
#[serde_as(as = "DisplayFromStr")]
pub id: TenantId, pub id: TenantId,
// NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
pub state: TenantState, pub state: TenantState,
@@ -374,22 +388,33 @@ pub struct TenantInfo {
} }
/// This represents the output of the "timeline_detail" and "timeline_list" API calls. /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
#[serde_as]
#[derive(Debug, Serialize, Deserialize, Clone)] #[derive(Debug, Serialize, Deserialize, Clone)]
pub struct TimelineInfo { pub struct TimelineInfo {
#[serde_as(as = "DisplayFromStr")]
pub tenant_id: TenantId, pub tenant_id: TenantId,
#[serde_as(as = "DisplayFromStr")]
pub timeline_id: TimelineId, pub timeline_id: TimelineId,
#[serde_as(as = "Option<DisplayFromStr>")]
pub ancestor_timeline_id: Option<TimelineId>, pub ancestor_timeline_id: Option<TimelineId>,
#[serde_as(as = "Option<DisplayFromStr>")]
pub ancestor_lsn: Option<Lsn>, pub ancestor_lsn: Option<Lsn>,
#[serde_as(as = "DisplayFromStr")]
pub last_record_lsn: Lsn, pub last_record_lsn: Lsn,
#[serde_as(as = "Option<DisplayFromStr>")]
pub prev_record_lsn: Option<Lsn>, pub prev_record_lsn: Option<Lsn>,
#[serde_as(as = "DisplayFromStr")]
pub latest_gc_cutoff_lsn: Lsn, pub latest_gc_cutoff_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
pub disk_consistent_lsn: Lsn, pub disk_consistent_lsn: Lsn,
/// The LSN that we have succesfully uploaded to remote storage /// The LSN that we have succesfully uploaded to remote storage
#[serde_as(as = "DisplayFromStr")]
pub remote_consistent_lsn: Lsn, pub remote_consistent_lsn: Lsn,
/// The LSN that we are advertizing to safekeepers /// The LSN that we are advertizing to safekeepers
#[serde_as(as = "DisplayFromStr")]
pub remote_consistent_lsn_visible: Lsn, pub remote_consistent_lsn_visible: Lsn,
pub current_logical_size: Option<u64>, // is None when timeline is Unloaded pub current_logical_size: Option<u64>, // is None when timeline is Unloaded
@@ -401,6 +426,7 @@ pub struct TimelineInfo {
pub timeline_dir_layer_file_size_sum: Option<u64>, pub timeline_dir_layer_file_size_sum: Option<u64>,
pub wal_source_connstr: Option<String>, pub wal_source_connstr: Option<String>,
#[serde_as(as = "Option<DisplayFromStr>")]
pub last_received_msg_lsn: Option<Lsn>, pub last_received_msg_lsn: Option<Lsn>,
/// the timestamp (in microseconds) of the last received message /// the timestamp (in microseconds) of the last received message
pub last_received_msg_ts: Option<u128>, pub last_received_msg_ts: Option<u128>,
@@ -497,13 +523,23 @@ pub struct LayerAccessStats {
pub residence_events_history: HistoryBufferWithDropCounter<LayerResidenceEvent, 16>, pub residence_events_history: HistoryBufferWithDropCounter<LayerResidenceEvent, 16>,
} }
#[serde_as]
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
#[serde(tag = "kind")] #[serde(tag = "kind")]
pub enum InMemoryLayerInfo { pub enum InMemoryLayerInfo {
Open { lsn_start: Lsn }, Open {
Frozen { lsn_start: Lsn, lsn_end: Lsn }, #[serde_as(as = "DisplayFromStr")]
lsn_start: Lsn,
},
Frozen {
#[serde_as(as = "DisplayFromStr")]
lsn_start: Lsn,
#[serde_as(as = "DisplayFromStr")]
lsn_end: Lsn,
},
} }
#[serde_as]
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
#[serde(tag = "kind")] #[serde(tag = "kind")]
pub enum HistoricLayerInfo { pub enum HistoricLayerInfo {
@@ -511,7 +547,9 @@ pub enum HistoricLayerInfo {
layer_file_name: String, layer_file_name: String,
layer_file_size: u64, layer_file_size: u64,
#[serde_as(as = "DisplayFromStr")]
lsn_start: Lsn, lsn_start: Lsn,
#[serde_as(as = "DisplayFromStr")]
lsn_end: Lsn, lsn_end: Lsn,
remote: bool, remote: bool,
access_stats: LayerAccessStats, access_stats: LayerAccessStats,
@@ -520,6 +558,7 @@ pub enum HistoricLayerInfo {
layer_file_name: String, layer_file_name: String,
layer_file_size: u64, layer_file_size: u64,
#[serde_as(as = "DisplayFromStr")]
lsn_start: Lsn, lsn_start: Lsn,
remote: bool, remote: bool,
access_stats: LayerAccessStats, access_stats: LayerAccessStats,

View File

@@ -1,18 +1,23 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use utils::{ use utils::{
id::{NodeId, TenantId, TimelineId}, id::{NodeId, TenantId, TimelineId},
lsn::Lsn, lsn::Lsn,
}; };
#[serde_as]
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct TimelineCreateRequest { pub struct TimelineCreateRequest {
#[serde_as(as = "DisplayFromStr")]
pub tenant_id: TenantId, pub tenant_id: TenantId,
#[serde_as(as = "DisplayFromStr")]
pub timeline_id: TimelineId, pub timeline_id: TimelineId,
pub peer_ids: Option<Vec<NodeId>>, pub peer_ids: Option<Vec<NodeId>>,
pub pg_version: u32, pub pg_version: u32,
pub system_id: Option<u64>, pub system_id: Option<u64>,
pub wal_seg_size: Option<u32>, pub wal_seg_size: Option<u32>,
#[serde_as(as = "DisplayFromStr")]
pub commit_lsn: Lsn, pub commit_lsn: Lsn,
// If not passed, it is assigned to the beginning of commit_lsn segment. // If not passed, it is assigned to the beginning of commit_lsn segment.
pub local_start_lsn: Option<Lsn>, pub local_start_lsn: Option<Lsn>,
@@ -23,6 +28,7 @@ fn lsn_invalid() -> Lsn {
} }
/// Data about safekeeper's timeline, mirrors broker.proto. /// Data about safekeeper's timeline, mirrors broker.proto.
#[serde_as]
#[derive(Debug, Clone, Deserialize, Serialize)] #[derive(Debug, Clone, Deserialize, Serialize)]
pub struct SkTimelineInfo { pub struct SkTimelineInfo {
/// Term. /// Term.
@@ -30,19 +36,25 @@ pub struct SkTimelineInfo {
/// Term of the last entry. /// Term of the last entry.
pub last_log_term: Option<u64>, pub last_log_term: Option<u64>,
/// LSN of the last record. /// LSN of the last record.
#[serde_as(as = "DisplayFromStr")]
#[serde(default = "lsn_invalid")] #[serde(default = "lsn_invalid")]
pub flush_lsn: Lsn, pub flush_lsn: Lsn,
/// Up to which LSN safekeeper regards its WAL as committed. /// Up to which LSN safekeeper regards its WAL as committed.
#[serde_as(as = "DisplayFromStr")]
#[serde(default = "lsn_invalid")] #[serde(default = "lsn_invalid")]
pub commit_lsn: Lsn, pub commit_lsn: Lsn,
/// LSN up to which safekeeper has backed WAL. /// LSN up to which safekeeper has backed WAL.
#[serde_as(as = "DisplayFromStr")]
#[serde(default = "lsn_invalid")] #[serde(default = "lsn_invalid")]
pub backup_lsn: Lsn, pub backup_lsn: Lsn,
/// LSN of last checkpoint uploaded by pageserver. /// LSN of last checkpoint uploaded by pageserver.
#[serde_as(as = "DisplayFromStr")]
#[serde(default = "lsn_invalid")] #[serde(default = "lsn_invalid")]
pub remote_consistent_lsn: Lsn, pub remote_consistent_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
#[serde(default = "lsn_invalid")] #[serde(default = "lsn_invalid")]
pub peer_horizon_lsn: Lsn, pub peer_horizon_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
#[serde(default = "lsn_invalid")] #[serde(default = "lsn_invalid")]
pub local_start_lsn: Lsn, pub local_start_lsn: Lsn,
/// A connection string to use for WAL receiving. /// A connection string to use for WAL receiving.

View File

@@ -55,7 +55,6 @@ bytes.workspace = true
criterion.workspace = true criterion.workspace = true
hex-literal.workspace = true hex-literal.workspace = true
camino-tempfile.workspace = true camino-tempfile.workspace = true
serde_assert.workspace = true
[[bench]] [[bench]]
name = "benchmarks" name = "benchmarks"

View File

@@ -9,6 +9,7 @@ use jsonwebtoken::{
decode, encode, Algorithm, DecodingKey, EncodingKey, Header, TokenData, Validation, decode, encode, Algorithm, DecodingKey, EncodingKey, Header, TokenData, Validation,
}; };
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use crate::id::TenantId; use crate::id::TenantId;
@@ -31,9 +32,11 @@ pub enum Scope {
} }
/// JWT payload. See docs/authentication.md for the format /// JWT payload. See docs/authentication.md for the format
#[serde_as]
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct Claims { pub struct Claims {
#[serde(default)] #[serde(default)]
#[serde_as(as = "Option<DisplayFromStr>")]
pub tenant_id: Option<TenantId>, pub tenant_id: Option<TenantId>,
pub scope: Scope, pub scope: Scope,
} }

View File

@@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
/// ///
/// See docs/rfcs/025-generation-numbers.md for detail on how generation /// See docs/rfcs/025-generation-numbers.md for detail on how generation
/// numbers are used. /// numbers are used.
#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] #[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
pub enum Generation { pub enum Generation {
// Generations with this magic value will not add a suffix to S3 keys, and will not // Generations with this magic value will not add a suffix to S3 keys, and will not
// be included in persisted index_part.json. This value is only to be used // be included in persisted index_part.json. This value is only to be used

View File

@@ -1,41 +0,0 @@
/// Useful type for asserting that expected bytes match reporting the bytes more readable
/// array-syntax compatible hex bytes.
///
/// # Usage
///
/// ```
/// use utils::Hex;
///
/// let actual = serialize_something();
/// let expected = [0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64];
///
/// // the type implements PartialEq and on mismatch, both sides are printed in 16 wide multiline
/// // output suffixed with an array style length for easier comparisons.
/// assert_eq!(Hex(&actual), Hex(&expected));
///
/// // with `let expected = [0x68];` the error would had been:
/// // assertion `left == right` failed
/// // left: [0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64; 11]
/// // right: [0x68; 1]
/// # fn serialize_something() -> Vec<u8> { "hello world".as_bytes().to_vec() }
/// ```
#[derive(PartialEq)]
pub struct Hex<'a>(pub &'a [u8]);
impl std::fmt::Debug for Hex<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "[")?;
for (i, c) in self.0.chunks(16).enumerate() {
if i > 0 && !c.is_empty() {
writeln!(f, ", ")?;
}
for (j, b) in c.iter().enumerate() {
if j > 0 {
write!(f, ", ")?;
}
write!(f, "0x{b:02x}")?;
}
}
write!(f, "; {}]", self.0.len())
}
}

View File

@@ -14,11 +14,6 @@ use tracing::{self, debug, info, info_span, warn, Instrument};
use std::future::Future; use std::future::Future;
use std::str::FromStr; use std::str::FromStr;
use bytes::{Bytes, BytesMut};
use std::io::Write as _;
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| { static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!( register_int_counter!(
"libmetrics_metric_handler_requests_total", "libmetrics_metric_handler_requests_total",
@@ -151,89 +146,94 @@ impl Drop for RequestCancelled {
} }
} }
/// An [`std::io::Write`] implementation on top of a channel sending [`bytes::Bytes`] chunks.
pub struct ChannelWriter {
buffer: BytesMut,
pub tx: mpsc::Sender<std::io::Result<Bytes>>,
written: usize,
}
impl ChannelWriter {
pub fn new(buf_len: usize, tx: mpsc::Sender<std::io::Result<Bytes>>) -> Self {
assert_ne!(buf_len, 0);
ChannelWriter {
// split about half off the buffer from the start, because we flush depending on
// capacity. first flush will come sooner than without this, but now resizes will
// have better chance of picking up the "other" half. not guaranteed of course.
buffer: BytesMut::with_capacity(buf_len).split_off(buf_len / 2),
tx,
written: 0,
}
}
pub fn flush0(&mut self) -> std::io::Result<usize> {
let n = self.buffer.len();
if n == 0 {
return Ok(0);
}
tracing::trace!(n, "flushing");
let ready = self.buffer.split().freeze();
// not ideal to call from blocking code to block_on, but we are sure that this
// operation does not spawn_blocking other tasks
let res: Result<(), ()> = tokio::runtime::Handle::current().block_on(async {
self.tx.send(Ok(ready)).await.map_err(|_| ())?;
// throttle sending to allow reuse of our buffer in `write`.
self.tx.reserve().await.map_err(|_| ())?;
// now the response task has picked up the buffer and hopefully started
// sending it to the client.
Ok(())
});
if res.is_err() {
return Err(std::io::ErrorKind::BrokenPipe.into());
}
self.written += n;
Ok(n)
}
pub fn flushed_bytes(&self) -> usize {
self.written
}
}
impl std::io::Write for ChannelWriter {
fn write(&mut self, mut buf: &[u8]) -> std::io::Result<usize> {
let remaining = self.buffer.capacity() - self.buffer.len();
let out_of_space = remaining < buf.len();
let original_len = buf.len();
if out_of_space {
let can_still_fit = buf.len() - remaining;
self.buffer.extend_from_slice(&buf[..can_still_fit]);
buf = &buf[can_still_fit..];
self.flush0()?;
}
// assume that this will often under normal operation just move the pointer back to the
// beginning of allocation, because previous split off parts are already sent and
// dropped.
self.buffer.extend_from_slice(buf);
Ok(original_len)
}
fn flush(&mut self) -> std::io::Result<()> {
self.flush0().map(|_| ())
}
}
async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body>, ApiError> { async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
use bytes::{Bytes, BytesMut};
use std::io::Write as _;
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
SERVE_METRICS_COUNT.inc(); SERVE_METRICS_COUNT.inc();
/// An [`std::io::Write`] implementation on top of a channel sending [`bytes::Bytes`] chunks.
struct ChannelWriter {
buffer: BytesMut,
tx: mpsc::Sender<std::io::Result<Bytes>>,
written: usize,
}
impl ChannelWriter {
fn new(buf_len: usize, tx: mpsc::Sender<std::io::Result<Bytes>>) -> Self {
assert_ne!(buf_len, 0);
ChannelWriter {
// split about half off the buffer from the start, because we flush depending on
// capacity. first flush will come sooner than without this, but now resizes will
// have better chance of picking up the "other" half. not guaranteed of course.
buffer: BytesMut::with_capacity(buf_len).split_off(buf_len / 2),
tx,
written: 0,
}
}
fn flush0(&mut self) -> std::io::Result<usize> {
let n = self.buffer.len();
if n == 0 {
return Ok(0);
}
tracing::trace!(n, "flushing");
let ready = self.buffer.split().freeze();
// not ideal to call from blocking code to block_on, but we are sure that this
// operation does not spawn_blocking other tasks
let res: Result<(), ()> = tokio::runtime::Handle::current().block_on(async {
self.tx.send(Ok(ready)).await.map_err(|_| ())?;
// throttle sending to allow reuse of our buffer in `write`.
self.tx.reserve().await.map_err(|_| ())?;
// now the response task has picked up the buffer and hopefully started
// sending it to the client.
Ok(())
});
if res.is_err() {
return Err(std::io::ErrorKind::BrokenPipe.into());
}
self.written += n;
Ok(n)
}
fn flushed_bytes(&self) -> usize {
self.written
}
}
impl std::io::Write for ChannelWriter {
fn write(&mut self, mut buf: &[u8]) -> std::io::Result<usize> {
let remaining = self.buffer.capacity() - self.buffer.len();
let out_of_space = remaining < buf.len();
let original_len = buf.len();
if out_of_space {
let can_still_fit = buf.len() - remaining;
self.buffer.extend_from_slice(&buf[..can_still_fit]);
buf = &buf[can_still_fit..];
self.flush0()?;
}
// assume that this will often under normal operation just move the pointer back to the
// beginning of allocation, because previous split off parts are already sent and
// dropped.
self.buffer.extend_from_slice(buf);
Ok(original_len)
}
fn flush(&mut self) -> std::io::Result<()> {
self.flush0().map(|_| ())
}
}
let started_at = std::time::Instant::now(); let started_at = std::time::Instant::now();
let (tx, rx) = mpsc::channel(1); let (tx, rx) = mpsc::channel(1);

View File

@@ -3,7 +3,6 @@ use std::{fmt, str::FromStr};
use anyhow::Context; use anyhow::Context;
use hex::FromHex; use hex::FromHex;
use rand::Rng; use rand::Rng;
use serde::de::Visitor;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use thiserror::Error; use thiserror::Error;
@@ -18,74 +17,12 @@ pub enum IdError {
/// ///
/// NOTE: It (de)serializes as an array of hex bytes, so the string representation would look /// NOTE: It (de)serializes as an array of hex bytes, so the string representation would look
/// like `[173,80,132,115,129,226,72,254,170,201,135,108,199,26,228,24]`. /// like `[173,80,132,115,129,226,72,254,170,201,135,108,199,26,228,24]`.
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] ///
/// Use `#[serde_as(as = "DisplayFromStr")]` to (de)serialize it as hex string instead: `ad50847381e248feaac9876cc71ae418`.
/// Check the `serde_with::serde_as` documentation for options for more complex types.
#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
struct Id([u8; 16]); struct Id([u8; 16]);
impl Serialize for Id {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
if serializer.is_human_readable() {
serializer.collect_str(self)
} else {
self.0.serialize(serializer)
}
}
}
impl<'de> Deserialize<'de> for Id {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct IdVisitor {
is_human_readable_deserializer: bool,
}
impl<'de> Visitor<'de> for IdVisitor {
type Value = Id;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
if self.is_human_readable_deserializer {
formatter.write_str("value in form of hex string")
} else {
formatter.write_str("value in form of integer array([u8; 16])")
}
}
fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
where
A: serde::de::SeqAccess<'de>,
{
let s = serde::de::value::SeqAccessDeserializer::new(seq);
let id: [u8; 16] = Deserialize::deserialize(s)?;
Ok(Id::from(id))
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Id::from_str(v).map_err(E::custom)
}
}
if deserializer.is_human_readable() {
deserializer.deserialize_str(IdVisitor {
is_human_readable_deserializer: true,
})
} else {
deserializer.deserialize_tuple(
16,
IdVisitor {
is_human_readable_deserializer: false,
},
)
}
}
}
impl Id { impl Id {
pub fn get_from_buf(buf: &mut impl bytes::Buf) -> Id { pub fn get_from_buf(buf: &mut impl bytes::Buf) -> Id {
let mut arr = [0u8; 16]; let mut arr = [0u8; 16];
@@ -371,112 +308,3 @@ impl fmt::Display for NodeId {
write!(f, "{}", self.0) write!(f, "{}", self.0)
} }
} }
#[cfg(test)]
mod tests {
use serde_assert::{Deserializer, Serializer, Token, Tokens};
use crate::bin_ser::BeSer;
use super::*;
#[test]
fn test_id_serde_non_human_readable() {
let original_id = Id([
173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,
]);
let expected_tokens = Tokens(vec![
Token::Tuple { len: 16 },
Token::U8(173),
Token::U8(80),
Token::U8(132),
Token::U8(115),
Token::U8(129),
Token::U8(226),
Token::U8(72),
Token::U8(254),
Token::U8(170),
Token::U8(201),
Token::U8(135),
Token::U8(108),
Token::U8(199),
Token::U8(26),
Token::U8(228),
Token::U8(24),
Token::TupleEnd,
]);
let serializer = Serializer::builder().is_human_readable(false).build();
let serialized_tokens = original_id.serialize(&serializer).unwrap();
assert_eq!(serialized_tokens, expected_tokens);
let mut deserializer = Deserializer::builder()
.is_human_readable(false)
.tokens(serialized_tokens)
.build();
let deserialized_id = Id::deserialize(&mut deserializer).unwrap();
assert_eq!(deserialized_id, original_id);
}
#[test]
fn test_id_serde_human_readable() {
let original_id = Id([
173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,
]);
let expected_tokens = Tokens(vec![Token::Str(String::from(
"ad50847381e248feaac9876cc71ae418",
))]);
let serializer = Serializer::builder().is_human_readable(true).build();
let serialized_tokens = original_id.serialize(&serializer).unwrap();
assert_eq!(serialized_tokens, expected_tokens);
let mut deserializer = Deserializer::builder()
.is_human_readable(true)
.tokens(Tokens(vec![Token::Str(String::from(
"ad50847381e248feaac9876cc71ae418",
))]))
.build();
assert_eq!(Id::deserialize(&mut deserializer).unwrap(), original_id);
}
macro_rules! roundtrip_type {
($type:ty, $expected_bytes:expr) => {{
let expected_bytes: [u8; 16] = $expected_bytes;
let original_id = <$type>::from(expected_bytes);
let ser_bytes = original_id.ser().unwrap();
assert_eq!(ser_bytes, expected_bytes);
let des_id = <$type>::des(&ser_bytes).unwrap();
assert_eq!(des_id, original_id);
}};
}
#[test]
fn test_id_bincode_serde() {
let expected_bytes = [
173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,
];
roundtrip_type!(Id, expected_bytes);
}
#[test]
fn test_tenant_id_bincode_serde() {
let expected_bytes = [
173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,
];
roundtrip_type!(TenantId, expected_bytes);
}
#[test]
fn test_timeline_id_bincode_serde() {
let expected_bytes = [
173, 80, 132, 115, 129, 226, 72, 254, 170, 201, 135, 108, 199, 26, 228, 24,
];
roundtrip_type!(TimelineId, expected_bytes);
}
}

View File

@@ -24,10 +24,6 @@ pub mod auth;
// utility functions and helper traits for unified unique id generation/serialization etc. // utility functions and helper traits for unified unique id generation/serialization etc.
pub mod id; pub mod id;
mod hex;
pub use hex::Hex;
// http endpoint utils // http endpoint utils
pub mod http; pub mod http;

View File

@@ -1,7 +1,7 @@
#![warn(missing_docs)] #![warn(missing_docs)]
use camino::Utf8Path; use camino::Utf8Path;
use serde::{de::Visitor, Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::fmt; use std::fmt;
use std::ops::{Add, AddAssign}; use std::ops::{Add, AddAssign};
use std::str::FromStr; use std::str::FromStr;
@@ -13,114 +13,10 @@ use crate::seqwait::MonotonicCounter;
pub const XLOG_BLCKSZ: u32 = 8192; pub const XLOG_BLCKSZ: u32 = 8192;
/// A Postgres LSN (Log Sequence Number), also known as an XLogRecPtr /// A Postgres LSN (Log Sequence Number), also known as an XLogRecPtr
#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Hash)] #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Hash, Serialize, Deserialize)]
#[serde(transparent)]
pub struct Lsn(pub u64); pub struct Lsn(pub u64);
impl Serialize for Lsn {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
if serializer.is_human_readable() {
serializer.collect_str(self)
} else {
self.0.serialize(serializer)
}
}
}
impl<'de> Deserialize<'de> for Lsn {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct LsnVisitor {
is_human_readable_deserializer: bool,
}
impl<'de> Visitor<'de> for LsnVisitor {
type Value = Lsn;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
if self.is_human_readable_deserializer {
formatter.write_str(
"value in form of hex string({upper_u32_hex}/{lower_u32_hex}) representing u64 integer",
)
} else {
formatter.write_str("value in form of integer(u64)")
}
}
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Lsn(v))
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Lsn::from_str(v).map_err(|e| E::custom(e))
}
}
if deserializer.is_human_readable() {
deserializer.deserialize_str(LsnVisitor {
is_human_readable_deserializer: true,
})
} else {
deserializer.deserialize_u64(LsnVisitor {
is_human_readable_deserializer: false,
})
}
}
}
/// Allows (de)serialization of an `Lsn` always as `u64`.
///
/// ### Example
///
/// ```rust
/// # use serde::{Serialize, Deserialize};
/// use utils::lsn::Lsn;
///
/// #[derive(PartialEq, Serialize, Deserialize, Debug)]
/// struct Foo {
/// #[serde(with = "utils::lsn::serde_as_u64")]
/// always_u64: Lsn,
/// }
///
/// let orig = Foo { always_u64: Lsn(1234) };
///
/// let res = serde_json::to_string(&orig).unwrap();
/// assert_eq!(res, r#"{"always_u64":1234}"#);
///
/// let foo = serde_json::from_str::<Foo>(&res).unwrap();
/// assert_eq!(foo, orig);
/// ```
///
pub mod serde_as_u64 {
use super::Lsn;
/// Serializes the Lsn as u64 disregarding the human readability of the format.
///
/// Meant to be used via `#[serde(with = "...")]` or `#[serde(serialize_with = "...")]`.
pub fn serialize<S: serde::Serializer>(lsn: &Lsn, serializer: S) -> Result<S::Ok, S::Error> {
use serde::Serialize;
lsn.0.serialize(serializer)
}
/// Deserializes the Lsn as u64 disregarding the human readability of the format.
///
/// Meant to be used via `#[serde(with = "...")]` or `#[serde(deserialize_with = "...")]`.
pub fn deserialize<'de, D: serde::Deserializer<'de>>(deserializer: D) -> Result<Lsn, D::Error> {
use serde::Deserialize;
u64::deserialize(deserializer).map(Lsn)
}
}
/// We tried to parse an LSN from a string, but failed /// We tried to parse an LSN from a string, but failed
#[derive(Debug, PartialEq, Eq, thiserror::Error)] #[derive(Debug, PartialEq, Eq, thiserror::Error)]
#[error("LsnParseError")] #[error("LsnParseError")]
@@ -368,13 +264,8 @@ impl MonotonicCounter<Lsn> for RecordLsn {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::bin_ser::BeSer;
use super::*; use super::*;
use serde::ser::Serialize;
use serde_assert::{Deserializer, Serializer, Token, Tokens};
#[test] #[test]
fn test_lsn_strings() { fn test_lsn_strings() {
assert_eq!("12345678/AAAA5555".parse(), Ok(Lsn(0x12345678AAAA5555))); assert_eq!("12345678/AAAA5555".parse(), Ok(Lsn(0x12345678AAAA5555)));
@@ -450,95 +341,4 @@ mod tests {
assert_eq!(lsn.fetch_max(Lsn(6000)), Lsn(5678)); assert_eq!(lsn.fetch_max(Lsn(6000)), Lsn(5678));
assert_eq!(lsn.fetch_max(Lsn(5000)), Lsn(6000)); assert_eq!(lsn.fetch_max(Lsn(5000)), Lsn(6000));
} }
#[test]
fn test_lsn_serde() {
let original_lsn = Lsn(0x0123456789abcdef);
let expected_readable_tokens = Tokens(vec![Token::U64(0x0123456789abcdef)]);
let expected_non_readable_tokens =
Tokens(vec![Token::Str(String::from("1234567/89ABCDEF"))]);
// Testing human_readable ser/de
let serializer = Serializer::builder().is_human_readable(false).build();
let readable_ser_tokens = original_lsn.serialize(&serializer).unwrap();
assert_eq!(readable_ser_tokens, expected_readable_tokens);
let mut deserializer = Deserializer::builder()
.is_human_readable(false)
.tokens(readable_ser_tokens)
.build();
let des_lsn = Lsn::deserialize(&mut deserializer).unwrap();
assert_eq!(des_lsn, original_lsn);
// Testing NON human_readable ser/de
let serializer = Serializer::builder().is_human_readable(true).build();
let non_readable_ser_tokens = original_lsn.serialize(&serializer).unwrap();
assert_eq!(non_readable_ser_tokens, expected_non_readable_tokens);
let mut deserializer = Deserializer::builder()
.is_human_readable(true)
.tokens(non_readable_ser_tokens)
.build();
let des_lsn = Lsn::deserialize(&mut deserializer).unwrap();
assert_eq!(des_lsn, original_lsn);
// Testing mismatching ser/de
let serializer = Serializer::builder().is_human_readable(false).build();
let non_readable_ser_tokens = original_lsn.serialize(&serializer).unwrap();
let mut deserializer = Deserializer::builder()
.is_human_readable(true)
.tokens(non_readable_ser_tokens)
.build();
Lsn::deserialize(&mut deserializer).unwrap_err();
let serializer = Serializer::builder().is_human_readable(true).build();
let readable_ser_tokens = original_lsn.serialize(&serializer).unwrap();
let mut deserializer = Deserializer::builder()
.is_human_readable(false)
.tokens(readable_ser_tokens)
.build();
Lsn::deserialize(&mut deserializer).unwrap_err();
}
#[test]
fn test_lsn_ensure_roundtrip() {
let original_lsn = Lsn(0xaaaabbbb);
let serializer = Serializer::builder().is_human_readable(false).build();
let ser_tokens = original_lsn.serialize(&serializer).unwrap();
let mut deserializer = Deserializer::builder()
.is_human_readable(false)
.tokens(ser_tokens)
.build();
let des_lsn = Lsn::deserialize(&mut deserializer).unwrap();
assert_eq!(des_lsn, original_lsn);
}
#[test]
fn test_lsn_bincode_serde() {
let lsn = Lsn(0x0123456789abcdef);
let expected_bytes = [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef];
let ser_bytes = lsn.ser().unwrap();
assert_eq!(ser_bytes, expected_bytes);
let des_lsn = Lsn::des(&ser_bytes).unwrap();
assert_eq!(des_lsn, lsn);
}
#[test]
fn test_lsn_bincode_ensure_roundtrip() {
let original_lsn = Lsn(0x01_02_03_04_05_06_07_08);
let expected_bytes = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
let ser_bytes = original_lsn.ser().unwrap();
assert_eq!(ser_bytes, expected_bytes);
let des_lsn = Lsn::des(&ser_bytes).unwrap();
assert_eq!(des_lsn, original_lsn);
}
} }

View File

@@ -3,6 +3,7 @@ use std::time::{Duration, SystemTime};
use bytes::{Buf, BufMut, Bytes, BytesMut}; use bytes::{Buf, BufMut, Bytes, BytesMut};
use pq_proto::{read_cstr, PG_EPOCH}; use pq_proto::{read_cstr, PG_EPOCH};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use tracing::{trace, warn}; use tracing::{trace, warn};
use crate::lsn::Lsn; use crate::lsn::Lsn;
@@ -14,17 +15,21 @@ use crate::lsn::Lsn;
/// ///
/// serde Serialize is used only for human readable dump to json (e.g. in /// serde Serialize is used only for human readable dump to json (e.g. in
/// safekeepers debug_dump). /// safekeepers debug_dump).
#[serde_as]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub struct PageserverFeedback { pub struct PageserverFeedback {
/// Last known size of the timeline. Used to enforce timeline size limit. /// Last known size of the timeline. Used to enforce timeline size limit.
pub current_timeline_size: u64, pub current_timeline_size: u64,
/// LSN last received and ingested by the pageserver. Controls backpressure. /// LSN last received and ingested by the pageserver. Controls backpressure.
#[serde_as(as = "DisplayFromStr")]
pub last_received_lsn: Lsn, pub last_received_lsn: Lsn,
/// LSN up to which data is persisted by the pageserver to its local disc. /// LSN up to which data is persisted by the pageserver to its local disc.
/// Controls backpressure. /// Controls backpressure.
#[serde_as(as = "DisplayFromStr")]
pub disk_consistent_lsn: Lsn, pub disk_consistent_lsn: Lsn,
/// LSN up to which data is persisted by the pageserver on s3; safekeepers /// LSN up to which data is persisted by the pageserver on s3; safekeepers
/// consider WAL before it can be removed. /// consider WAL before it can be removed.
#[serde_as(as = "DisplayFromStr")]
pub remote_consistent_lsn: Lsn, pub remote_consistent_lsn: Lsn,
// Serialize with RFC3339 format. // Serialize with RFC3339 format.
#[serde(with = "serde_systemtime")] #[serde(with = "serde_systemtime")]

View File

@@ -3,6 +3,7 @@ use anyhow::Context;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use consumption_metrics::EventType; use consumption_metrics::EventType;
use futures::stream::StreamExt; use futures::stream::StreamExt;
use serde_with::serde_as;
use std::{sync::Arc, time::SystemTime}; use std::{sync::Arc, time::SystemTime};
use utils::{ use utils::{
id::{TenantId, TimelineId}, id::{TenantId, TimelineId},
@@ -41,10 +42,13 @@ pub(super) enum Name {
/// ///
/// This is a denormalization done at the MetricsKey const methods; these should not be constructed /// This is a denormalization done at the MetricsKey const methods; these should not be constructed
/// elsewhere. /// elsewhere.
#[serde_with::serde_as]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub(crate) struct MetricsKey { pub(crate) struct MetricsKey {
#[serde_as(as = "serde_with::DisplayFromStr")]
pub(super) tenant_id: TenantId, pub(super) tenant_id: TenantId,
#[serde_as(as = "Option<serde_with::DisplayFromStr>")]
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub(super) timeline_id: Option<TimelineId>, pub(super) timeline_id: Option<TimelineId>,

View File

@@ -1,4 +1,5 @@
use consumption_metrics::{Event, EventChunk, IdempotencyKey, CHUNK_SIZE}; use consumption_metrics::{Event, EventChunk, IdempotencyKey, CHUNK_SIZE};
use serde_with::serde_as;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::Instrument; use tracing::Instrument;
@@ -6,9 +7,12 @@ use super::{metrics::Name, Cache, MetricsKey, RawMetric};
use utils::id::{TenantId, TimelineId}; use utils::id::{TenantId, TimelineId};
/// How the metrics from pageserver are identified. /// How the metrics from pageserver are identified.
#[serde_with::serde_as]
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Copy, PartialEq)] #[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Copy, PartialEq)]
struct Ids { struct Ids {
#[serde_as(as = "serde_with::DisplayFromStr")]
pub(super) tenant_id: TenantId, pub(super) tenant_id: TenantId,
#[serde_as(as = "Option<serde_with::DisplayFromStr>")]
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub(super) timeline_id: Option<TimelineId>, pub(super) timeline_id: Option<TimelineId>,
} }

View File

@@ -18,6 +18,7 @@ use hex::FromHex;
use remote_storage::{GenericRemoteStorage, RemotePath}; use remote_storage::{GenericRemoteStorage, RemotePath};
use serde::Deserialize; use serde::Deserialize;
use serde::Serialize; use serde::Serialize;
use serde_with::serde_as;
use thiserror::Error; use thiserror::Error;
use tokio; use tokio;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
@@ -214,6 +215,7 @@ where
/// during recovery as startup. /// during recovery as startup.
const TEMP_SUFFIX: &str = "tmp"; const TEMP_SUFFIX: &str = "tmp";
#[serde_as]
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
struct DeletionList { struct DeletionList {
/// Serialization version, for future use /// Serialization version, for future use
@@ -242,6 +244,7 @@ struct DeletionList {
validated: bool, validated: bool,
} }
#[serde_as]
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
struct DeletionHeader { struct DeletionHeader {
/// Serialization version, for future use /// Serialization version, for future use

View File

@@ -17,6 +17,7 @@ use pageserver_api::models::{
TenantLoadRequest, TenantLocationConfigRequest, TenantLoadRequest, TenantLocationConfigRequest,
}; };
use remote_storage::GenericRemoteStorage; use remote_storage::GenericRemoteStorage;
use serde_with::{serde_as, DisplayFromStr};
use tenant_size_model::{SizeResult, StorageModel}; use tenant_size_model::{SizeResult, StorageModel};
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::*; use tracing::*;
@@ -498,8 +499,10 @@ async fn get_lsn_by_timestamp_handler(
let result = timeline.find_lsn_for_timestamp(timestamp_pg, &ctx).await?; let result = timeline.find_lsn_for_timestamp(timestamp_pg, &ctx).await?;
if version.unwrap_or(0) > 1 { if version.unwrap_or(0) > 1 {
#[serde_as]
#[derive(serde::Serialize)] #[derive(serde::Serialize)]
struct Result { struct Result {
#[serde_as(as = "DisplayFromStr")]
lsn: Lsn, lsn: Lsn,
kind: &'static str, kind: &'static str,
} }
@@ -808,8 +811,10 @@ async fn tenant_size_handler(
} }
/// The type resides in the pageserver not to expose `ModelInputs`. /// The type resides in the pageserver not to expose `ModelInputs`.
#[serde_with::serde_as]
#[derive(serde::Serialize)] #[derive(serde::Serialize)]
struct TenantHistorySize { struct TenantHistorySize {
#[serde_as(as = "serde_with::DisplayFromStr")]
id: TenantId, id: TenantId,
/// Size is a mixture of WAL and logical size, so the unit is bytes. /// Size is a mixture of WAL and logical size, so the unit is bytes.
/// ///

View File

@@ -3340,6 +3340,7 @@ fn run_initdb(
.args(["-D", initdb_target_dir.as_ref()]) .args(["-D", initdb_target_dir.as_ref()])
.args(["-U", &conf.superuser]) .args(["-U", &conf.superuser])
.args(["-E", "utf8"]) .args(["-E", "utf8"])
.args(["-l", "logical"])
.arg("--no-instructions") .arg("--no-instructions")
// This is only used for a temporary installation that is deleted shortly after, // This is only used for a temporary installation that is deleted shortly after,
// so no need to fsync it // so no need to fsync it

View File

@@ -406,123 +406,4 @@ mod tests {
METADATA_OLD_FORMAT_VERSION, METADATA_FORMAT_VERSION METADATA_OLD_FORMAT_VERSION, METADATA_FORMAT_VERSION
); );
} }
#[test]
fn test_metadata_bincode_serde() {
let original_metadata = TimelineMetadata::new(
Lsn(0x200),
Some(Lsn(0x100)),
Some(TIMELINE_ID),
Lsn(0),
Lsn(0),
Lsn(0),
// Any version will do here, so use the default
crate::DEFAULT_PG_VERSION,
);
let metadata_bytes = original_metadata
.to_bytes()
.expect("Cannot create bytes array from metadata");
let metadata_bincode_be_bytes = original_metadata
.ser()
.expect("Cannot serialize the metadata");
// 8 bytes for the length of the vector
assert_eq!(metadata_bincode_be_bytes.len(), 8 + metadata_bytes.len());
let expected_bincode_bytes = {
let mut temp = vec![];
let len_bytes = metadata_bytes.len().to_be_bytes();
temp.extend_from_slice(&len_bytes);
temp.extend_from_slice(&metadata_bytes);
temp
};
assert_eq!(metadata_bincode_be_bytes, expected_bincode_bytes);
let deserialized_metadata = TimelineMetadata::des(&metadata_bincode_be_bytes).unwrap();
// Deserialized metadata has the metadata header, which is different from the serialized one.
// Reference: TimelineMetaData::to_bytes()
let expected_metadata = {
let mut temp_metadata = original_metadata;
let body_bytes = temp_metadata
.body
.ser()
.expect("Cannot serialize the metadata body");
let metadata_size = METADATA_HDR_SIZE + body_bytes.len();
let hdr = TimelineMetadataHeader {
size: metadata_size as u16,
format_version: METADATA_FORMAT_VERSION,
checksum: crc32c::crc32c(&body_bytes),
};
temp_metadata.hdr = hdr;
temp_metadata
};
assert_eq!(deserialized_metadata, expected_metadata);
}
#[test]
fn test_metadata_bincode_serde_ensure_roundtrip() {
let original_metadata = TimelineMetadata::new(
Lsn(0x200),
Some(Lsn(0x100)),
Some(TIMELINE_ID),
Lsn(0),
Lsn(0),
Lsn(0),
// Any version will do here, so use the default
crate::DEFAULT_PG_VERSION,
);
let expected_bytes = vec![
/* bincode length encoding bytes */
0, 0, 0, 0, 0, 0, 2, 0, // 8 bytes for the length of the serialized vector
/* TimelineMetadataHeader */
4, 37, 101, 34, 0, 70, 0, 4, // checksum, size, format_version (4 + 2 + 2)
/* TimelineMetadataBodyV2 */
0, 0, 0, 0, 0, 0, 2, 0, // disk_consistent_lsn (8 bytes)
1, 0, 0, 0, 0, 0, 0, 1, 0, // prev_record_lsn (9 bytes)
1, 17, 34, 51, 68, 85, 102, 119, 136, 17, 34, 51, 68, 85, 102, 119,
136, // ancestor_timeline (17 bytes)
0, 0, 0, 0, 0, 0, 0, 0, // ancestor_lsn (8 bytes)
0, 0, 0, 0, 0, 0, 0, 0, // latest_gc_cutoff_lsn (8 bytes)
0, 0, 0, 0, 0, 0, 0, 0, // initdb_lsn (8 bytes)
0, 0, 0, 15, // pg_version (4 bytes)
/* padding bytes */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
];
let metadata_ser_bytes = original_metadata.ser().unwrap();
assert_eq!(metadata_ser_bytes, expected_bytes);
let expected_metadata = {
let mut temp_metadata = original_metadata;
let body_bytes = temp_metadata
.body
.ser()
.expect("Cannot serialize the metadata body");
let metadata_size = METADATA_HDR_SIZE + body_bytes.len();
let hdr = TimelineMetadataHeader {
size: metadata_size as u16,
format_version: METADATA_FORMAT_VERSION,
checksum: crc32c::crc32c(&body_bytes),
};
temp_metadata.hdr = hdr;
temp_metadata
};
let des_metadata = TimelineMetadata::des(&metadata_ser_bytes).unwrap();
assert_eq!(des_metadata, expected_metadata);
}
} }

View File

@@ -1542,7 +1542,7 @@ pub fn remote_index_path(
} }
/// Given the key of an index, parse out the generation part of the name /// Given the key of an index, parse out the generation part of the name
pub fn parse_remote_index_path(path: RemotePath) -> Option<Generation> { pub(crate) fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {
let file_name = match path.get_path().file_name() { let file_name = match path.get_path().file_name() {
Some(f) => f, Some(f) => f,
None => { None => {

View File

@@ -6,6 +6,7 @@ use std::collections::HashMap;
use chrono::NaiveDateTime; use chrono::NaiveDateTime;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use utils::bin_ser::SerializeError; use utils::bin_ser::SerializeError;
use crate::tenant::metadata::TimelineMetadata; use crate::tenant::metadata::TimelineMetadata;
@@ -57,6 +58,7 @@ impl LayerFileMetadata {
/// ///
/// This type needs to be backwards and forwards compatible. When changing the fields, /// This type needs to be backwards and forwards compatible. When changing the fields,
/// remember to add a test case for the changed version. /// remember to add a test case for the changed version.
#[serde_as]
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct IndexPart { pub struct IndexPart {
/// Debugging aid describing the version of this type. /// Debugging aid describing the version of this type.
@@ -76,6 +78,7 @@ pub struct IndexPart {
// 'disk_consistent_lsn' is a copy of the 'disk_consistent_lsn' in the metadata. // 'disk_consistent_lsn' is a copy of the 'disk_consistent_lsn' in the metadata.
// It's duplicated for convenience when reading the serialized structure, but is // It's duplicated for convenience when reading the serialized structure, but is
// private because internally we would read from metadata instead. // private because internally we would read from metadata instead.
#[serde_as(as = "DisplayFromStr")]
disk_consistent_lsn: Lsn, disk_consistent_lsn: Lsn,
#[serde(rename = "metadata_bytes")] #[serde(rename = "metadata_bytes")]
@@ -152,7 +155,7 @@ pub struct IndexLayerMetadata {
#[serde(default = "Generation::none")] #[serde(default = "Generation::none")]
#[serde(skip_serializing_if = "Generation::is_none")] #[serde(skip_serializing_if = "Generation::is_none")]
pub generation: Generation, pub(super) generation: Generation,
} }
impl From<LayerFileMetadata> for IndexLayerMetadata { impl From<LayerFileMetadata> for IndexLayerMetadata {

View File

@@ -29,6 +29,7 @@ use tenant_size_model::{Segment, StorageModel};
/// needs. We will convert this into a StorageModel when it's time to perform /// needs. We will convert this into a StorageModel when it's time to perform
/// the calculation. /// the calculation.
/// ///
#[serde_with::serde_as]
#[derive(Debug, serde::Serialize, serde::Deserialize)] #[derive(Debug, serde::Serialize, serde::Deserialize)]
pub struct ModelInputs { pub struct ModelInputs {
pub segments: Vec<SegmentMeta>, pub segments: Vec<SegmentMeta>,
@@ -36,9 +37,11 @@ pub struct ModelInputs {
} }
/// A [`Segment`], with some extra information for display purposes /// A [`Segment`], with some extra information for display purposes
#[serde_with::serde_as]
#[derive(Debug, serde::Serialize, serde::Deserialize)] #[derive(Debug, serde::Serialize, serde::Deserialize)]
pub struct SegmentMeta { pub struct SegmentMeta {
pub segment: Segment, pub segment: Segment,
#[serde_as(as = "serde_with::DisplayFromStr")]
pub timeline_id: TimelineId, pub timeline_id: TimelineId,
pub kind: LsnKind, pub kind: LsnKind,
} }
@@ -74,22 +77,32 @@ pub enum LsnKind {
/// Collect all relevant LSNs to the inputs. These will only be helpful in the serialized form as /// Collect all relevant LSNs to the inputs. These will only be helpful in the serialized form as
/// part of [`ModelInputs`] from the HTTP api, explaining the inputs. /// part of [`ModelInputs`] from the HTTP api, explaining the inputs.
#[serde_with::serde_as]
#[derive(Debug, serde::Serialize, serde::Deserialize)] #[derive(Debug, serde::Serialize, serde::Deserialize)]
pub struct TimelineInputs { pub struct TimelineInputs {
#[serde_as(as = "serde_with::DisplayFromStr")]
pub timeline_id: TimelineId, pub timeline_id: TimelineId,
#[serde_as(as = "Option<serde_with::DisplayFromStr>")]
pub ancestor_id: Option<TimelineId>, pub ancestor_id: Option<TimelineId>,
#[serde_as(as = "serde_with::DisplayFromStr")]
ancestor_lsn: Lsn, ancestor_lsn: Lsn,
#[serde_as(as = "serde_with::DisplayFromStr")]
last_record: Lsn, last_record: Lsn,
#[serde_as(as = "serde_with::DisplayFromStr")]
latest_gc_cutoff: Lsn, latest_gc_cutoff: Lsn,
#[serde_as(as = "serde_with::DisplayFromStr")]
horizon_cutoff: Lsn, horizon_cutoff: Lsn,
#[serde_as(as = "serde_with::DisplayFromStr")]
pitr_cutoff: Lsn, pitr_cutoff: Lsn,
/// Cutoff point based on GC settings /// Cutoff point based on GC settings
#[serde_as(as = "serde_with::DisplayFromStr")]
next_gc_cutoff: Lsn, next_gc_cutoff: Lsn,
/// Cutoff point calculated from the user-supplied 'max_retention_period' /// Cutoff point calculated from the user-supplied 'max_retention_period'
#[serde_as(as = "Option<serde_with::DisplayFromStr>")]
retention_param_cutoff: Option<Lsn>, retention_param_cutoff: Option<Lsn>,
} }

View File

@@ -2936,10 +2936,13 @@ struct CompactLevel0Phase1StatsBuilder {
new_deltas_size: Option<u64>, new_deltas_size: Option<u64>,
} }
#[serde_as]
#[derive(serde::Serialize)] #[derive(serde::Serialize)]
struct CompactLevel0Phase1Stats { struct CompactLevel0Phase1Stats {
version: u64, version: u64,
#[serde_as(as = "serde_with::DisplayFromStr")]
tenant_id: TenantId, tenant_id: TenantId,
#[serde_as(as = "serde_with::DisplayFromStr")]
timeline_id: TimelineId, timeline_id: TimelineId,
read_lock_acquisition_micros: RecordedDuration, read_lock_acquisition_micros: RecordedDuration,
read_lock_held_spawn_blocking_startup_micros: RecordedDuration, read_lock_held_spawn_blocking_startup_micros: RecordedDuration,

View File

@@ -88,7 +88,7 @@ static void StartProposerReplication(WalProposer *wp, StartReplicationCmd *cmd);
static void WalSndLoop(WalProposer *wp); static void WalSndLoop(WalProposer *wp);
static void XLogBroadcastWalProposer(WalProposer *wp); static void XLogBroadcastWalProposer(WalProposer *wp);
static void XLogWalPropWrite(WalProposer *wp, char *buf, Size nbytes, XLogRecPtr recptr); static void XLogWalPropWrite(char *buf, Size nbytes, XLogRecPtr recptr);
static void XLogWalPropClose(XLogRecPtr recptr); static void XLogWalPropClose(XLogRecPtr recptr);
static void static void
@@ -1241,7 +1241,7 @@ WalProposerRecovery(Safekeeper *sk, TimeLineID timeline, XLogRecPtr startpos, XL
rec_end_lsn = rec_start_lsn + len - XLOG_HDR_SIZE; rec_end_lsn = rec_start_lsn + len - XLOG_HDR_SIZE;
/* write WAL to disk */ /* write WAL to disk */
XLogWalPropWrite(sk->wp, &buf[XLOG_HDR_SIZE], len - XLOG_HDR_SIZE, rec_start_lsn); XLogWalPropWrite(&buf[XLOG_HDR_SIZE], len - XLOG_HDR_SIZE, rec_start_lsn);
ereport(DEBUG1, ereport(DEBUG1,
(errmsg("Recover message %X/%X length %d", (errmsg("Recover message %X/%X length %d",
@@ -1283,24 +1283,11 @@ static XLogSegNo walpropSegNo = 0;
* Write XLOG data to disk. * Write XLOG data to disk.
*/ */
static void static void
XLogWalPropWrite(WalProposer *wp, char *buf, Size nbytes, XLogRecPtr recptr) XLogWalPropWrite(char *buf, Size nbytes, XLogRecPtr recptr)
{ {
int startoff; int startoff;
int byteswritten; int byteswritten;
/*
* Apart from walproposer, basebackup LSN page is also written out by
* postgres itself which writes WAL only in pages, and in basebackup it is
* inherently dummy (only safekeepers have historic WAL). Update WAL buffers
* here to avoid dummy page overwriting correct one we download here. Ugly,
* but alternatives are about the same ugly. We won't need that if we switch
* to on-demand WAL download from safekeepers, without writing to disk.
*
* https://github.com/neondatabase/neon/issues/5749
*/
if (!wp->config->syncSafekeepers)
XLogUpdateWalBuffers(buf, recptr, nbytes);
while (nbytes > 0) while (nbytes > 0)
{ {
int segbytes; int segbytes;

View File

@@ -470,26 +470,30 @@ async fn query_to_json<T: GenericClient>(
} }
.and_then(|s| s.parse::<i64>().ok()); .and_then(|s| s.parse::<i64>().ok());
let mut fields = vec![]; let fields = if !rows.is_empty() {
let mut columns = vec![]; rows[0]
.columns()
for c in row_stream.columns() { .iter()
fields.push(json!({ .map(|c| {
"name": Value::String(c.name().to_owned()), json!({
"dataTypeID": Value::Number(c.type_().oid().into()), "name": Value::String(c.name().to_owned()),
"tableID": c.table_oid(), "dataTypeID": Value::Number(c.type_().oid().into()),
"columnID": c.column_id(), "tableID": c.table_oid(),
"dataTypeSize": c.type_size(), "columnID": c.column_id(),
"dataTypeModifier": c.type_modifier(), "dataTypeSize": c.type_size(),
"format": "text", "dataTypeModifier": c.type_modifier(),
})); "format": "text",
columns.push(client.get_type(c.type_oid()).await?); })
} })
.collect::<Vec<_>>()
} else {
Vec::new()
};
// convert rows to JSON // convert rows to JSON
let rows = rows let rows = rows
.iter() .iter()
.map(|row| pg_text_row_to_json(row, &columns, raw_output, array_mode)) .map(|row| pg_text_row_to_json(row, raw_output, array_mode))
.collect::<Result<Vec<_>, _>>()?; .collect::<Result<Vec<_>, _>>()?;
// resulting JSON format is based on the format of node-postgres result // resulting JSON format is based on the format of node-postgres result
@@ -510,28 +514,22 @@ async fn query_to_json<T: GenericClient>(
// //
pub fn pg_text_row_to_json( pub fn pg_text_row_to_json(
row: &Row, row: &Row,
columns: &[Type],
raw_output: bool, raw_output: bool,
array_mode: bool, array_mode: bool,
) -> Result<Value, anyhow::Error> { ) -> Result<Value, anyhow::Error> {
let iter = row let iter = row.columns().iter().enumerate().map(|(i, column)| {
.columns() let name = column.name();
.iter() let pg_value = row.as_text(i)?;
.zip(columns) let json_value = if raw_output {
.enumerate() match pg_value {
.map(|(i, (column, typ))| { Some(v) => Value::String(v.to_string()),
let name = column.name(); None => Value::Null,
let pg_value = row.as_text(i)?; }
let json_value = if raw_output { } else {
match pg_value { pg_text_to_json(pg_value, column.type_())?
Some(v) => Value::String(v.to_string()), };
None => Value::Null, Ok((name.to_string(), json_value))
} });
} else {
pg_text_to_json(pg_value, typ)?
};
Ok((name.to_string(), json_value))
});
if array_mode { if array_mode {
// drop keys and aggregate into array // drop keys and aggregate into array

View File

@@ -33,7 +33,6 @@ reqwest = { workspace = true, default-features = false, features = ["rustls-tls"
aws-config = { workspace = true, default-features = false, features = ["rustls", "credentials-sso"] } aws-config = { workspace = true, default-features = false, features = ["rustls", "credentials-sso"] }
pageserver = { path = "../pageserver" } pageserver = { path = "../pageserver" }
remote_storage = { path = "../libs/remote_storage" }
tracing.workspace = true tracing.workspace = true
tracing-subscriber.workspace = true tracing-subscriber.workspace = true

View File

@@ -1,18 +1,13 @@
use std::collections::HashSet; use std::collections::HashSet;
use anyhow::Context; use anyhow::Context;
use aws_sdk_s3::{types::ObjectIdentifier, Client}; use aws_sdk_s3::Client;
use tracing::{error, info, warn}; use tracing::{error, info, warn};
use utils::generation::Generation;
use crate::cloud_admin_api::BranchData; use crate::cloud_admin_api::BranchData;
use crate::metadata_stream::stream_listing; use crate::{download_object_with_retries, list_objects_with_retries, RootTarget};
use crate::{download_object_with_retries, RootTarget};
use futures_util::{pin_mut, StreamExt};
use pageserver::tenant::remote_timeline_client::parse_remote_index_path;
use pageserver::tenant::storage_layer::LayerFileName; use pageserver::tenant::storage_layer::LayerFileName;
use pageserver::tenant::IndexPart; use pageserver::tenant::IndexPart;
use remote_storage::RemotePath;
use utils::id::TenantTimelineId; use utils::id::TenantTimelineId;
pub(crate) struct TimelineAnalysis { pub(crate) struct TimelineAnalysis {
@@ -73,7 +68,6 @@ pub(crate) async fn branch_cleanup_and_check_errors(
match s3_data.blob_data { match s3_data.blob_data {
BlobDataParseResult::Parsed { BlobDataParseResult::Parsed {
index_part, index_part,
index_part_generation,
mut s3_layers, mut s3_layers,
} => { } => {
if !IndexPart::KNOWN_VERSIONS.contains(&index_part.get_version()) { if !IndexPart::KNOWN_VERSIONS.contains(&index_part.get_version()) {
@@ -113,62 +107,33 @@ pub(crate) async fn branch_cleanup_and_check_errors(
)) ))
} }
let layer_map_key = (layer, metadata.generation); if !s3_layers.remove(&layer) {
if !s3_layers.remove(&layer_map_key) {
// FIXME: this will emit false positives if an index was
// uploaded concurrently with our scan. To make this check
// correct, we need to try sending a HEAD request for the
// layer we think is missing.
result.errors.push(format!( result.errors.push(format!(
"index_part.json contains a layer {}{} that is not present in remote storage", "index_part.json contains a layer {} that is not present in S3",
layer_map_key.0.file_name(), layer.file_name(),
layer_map_key.1.get_suffix()
)) ))
} }
} }
let orphan_layers: Vec<(LayerFileName, Generation)> = s3_layers if !s3_layers.is_empty() {
.into_iter()
.filter(|(_layer_name, gen)|
// A layer is only considered orphaned if it has a generation below
// the index. If the generation is >= the index, then the layer may
// be an upload from a running pageserver, or even an upload from
// a new generation that didn't upload an index yet.
//
// Even so, a layer that is not referenced by the index could just
// be something enqueued for deletion, so while this check is valid
// for indicating that a layer is garbage, it is not an indicator
// of a problem.
gen < &index_part_generation)
.collect();
if !orphan_layers.is_empty() {
result.errors.push(format!( result.errors.push(format!(
"index_part.json does not contain layers from S3: {:?}", "index_part.json does not contain layers from S3: {:?}",
orphan_layers s3_layers
.iter() .iter()
.map(|(layer_name, gen)| format!( .map(|layer_name| layer_name.file_name())
"{}{}",
layer_name.file_name(),
gen.get_suffix()
))
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
)); ));
result.garbage_keys.extend(orphan_layers.iter().map( result
|(layer_name, layer_gen)| { .garbage_keys
.extend(s3_layers.iter().map(|layer_name| {
let mut key = s3_root.timeline_root(id).prefix_in_bucket; let mut key = s3_root.timeline_root(id).prefix_in_bucket;
let delimiter = s3_root.delimiter(); let delimiter = s3_root.delimiter();
if !key.ends_with(delimiter) { if !key.ends_with(delimiter) {
key.push_str(delimiter); key.push_str(delimiter);
} }
key.push_str(&format!( key.push_str(&layer_name.file_name());
"{}{}",
&layer_name.file_name(),
layer_gen.get_suffix()
));
key key
}, }));
));
} }
} }
BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend( BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend(
@@ -213,96 +178,69 @@ pub(crate) struct S3TimelineBlobData {
pub(crate) enum BlobDataParseResult { pub(crate) enum BlobDataParseResult {
Parsed { Parsed {
index_part: IndexPart, index_part: IndexPart,
index_part_generation: Generation, s3_layers: HashSet<LayerFileName>,
s3_layers: HashSet<(LayerFileName, Generation)>,
}, },
Incorrect(Vec<String>), Incorrect(Vec<String>),
} }
fn parse_layer_object_name(name: &str) -> Result<(LayerFileName, Generation), String> {
match name.rsplit_once('-') {
// FIXME: this is gross, just use a regex?
Some((layer_filename, gen)) if gen.len() == 8 => {
let layer = layer_filename.parse::<LayerFileName>()?;
let gen =
Generation::parse_suffix(gen).ok_or("Malformed generation suffix".to_string())?;
Ok((layer, gen))
}
_ => Ok((name.parse::<LayerFileName>()?, Generation::none())),
}
}
pub(crate) async fn list_timeline_blobs( pub(crate) async fn list_timeline_blobs(
s3_client: &Client, s3_client: &Client,
id: TenantTimelineId, id: TenantTimelineId,
s3_root: &RootTarget, s3_root: &RootTarget,
) -> anyhow::Result<S3TimelineBlobData> { ) -> anyhow::Result<S3TimelineBlobData> {
let mut s3_layers = HashSet::new(); let mut s3_layers = HashSet::new();
let mut index_part_object = None;
let timeline_dir_target = s3_root.timeline_root(&id);
let mut continuation_token = None;
let mut errors = Vec::new(); let mut errors = Vec::new();
let mut keys_to_remove = Vec::new(); let mut keys_to_remove = Vec::new();
let mut timeline_dir_target = s3_root.timeline_root(&id); loop {
timeline_dir_target.delimiter = String::new(); let fetch_response =
list_objects_with_retries(s3_client, &timeline_dir_target, continuation_token.clone())
.await?;
let mut index_parts: Vec<ObjectIdentifier> = Vec::new(); let subdirectories = fetch_response.common_prefixes().unwrap_or_default();
if !subdirectories.is_empty() {
errors.push(format!(
"S3 list response should not contain any subdirectories, but got {subdirectories:?}"
));
}
let stream = stream_listing(s3_client, &timeline_dir_target); for (object, key) in fetch_response
pin_mut!(stream); .contents()
while let Some(obj) = stream.next().await { .unwrap_or_default()
let obj = obj?; .iter()
let key = match obj.key() { .filter_map(|object| Some((object, object.key()?)))
Some(k) => k, {
None => continue, let blob_name = key.strip_prefix(&timeline_dir_target.prefix_in_bucket);
}; match blob_name {
Some("index_part.json") => index_part_object = Some(object.clone()),
let blob_name = key.strip_prefix(&timeline_dir_target.prefix_in_bucket); Some(maybe_layer_name) => match maybe_layer_name.parse::<LayerFileName>() {
match blob_name { Ok(new_layer) => {
Some(name) if name.starts_with("index_part.json") => { s3_layers.insert(new_layer);
tracing::info!("Index key {key}"); }
index_parts.push(obj) Err(e) => {
} errors.push(
Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) { format!("S3 list response got an object with key {key} that is not a layer name: {e}"),
Ok((new_layer, gen)) => { );
tracing::info!("Parsed layer key: {} {:?}", new_layer, gen); keys_to_remove.push(key.to_string());
s3_layers.insert((new_layer, gen)); }
} },
Err(e) => { None => {
tracing::info!("Error parsing key {maybe_layer_name}"); errors.push(format!("S3 list response got an object with odd key {key}"));
errors.push(
format!("S3 list response got an object with key {key} that is not a layer name: {e}"),
);
keys_to_remove.push(key.to_string()); keys_to_remove.push(key.to_string());
} }
},
None => {
tracing::info!("Peculiar key {}", key);
errors.push(format!("S3 list response got an object with odd key {key}"));
keys_to_remove.push(key.to_string());
} }
} }
}
// Choose the index_part with the highest generation match fetch_response.next_continuation_token {
let (index_part_object, index_part_generation) = match index_parts Some(new_token) => continuation_token = Some(new_token),
.iter() None => break,
.filter_map(|k| {
let key = k.key().unwrap();
// Stripping the index key to the last part, because RemotePath doesn't
// like absolute paths, and depending on prefix_in_bucket it's possible
// for the keys we read back to start with a slash.
let basename = key.rsplit_once('/').unwrap().1;
parse_remote_index_path(RemotePath::from_string(basename).unwrap()).map(|g| (k, g))
})
.max_by_key(|i| i.1)
.map(|(k, g)| (k.clone(), g))
{
Some((key, gen)) => (Some(key), gen),
None => {
// Legacy/missing case: one or zero index parts, which did not have a generation
(index_parts.pop(), Generation::none())
} }
}; }
if index_part_object.is_none() { if index_part_object.is_none() {
errors.push("S3 list response got no index_part.json file".to_string()); errors.push("S3 list response got no index_part.json file".to_string());
@@ -323,7 +261,6 @@ pub(crate) async fn list_timeline_blobs(
return Ok(S3TimelineBlobData { return Ok(S3TimelineBlobData {
blob_data: BlobDataParseResult::Parsed { blob_data: BlobDataParseResult::Parsed {
index_part, index_part,
index_part_generation,
s3_layers, s3_layers,
}, },
keys_to_remove, keys_to_remove,

View File

@@ -5,7 +5,6 @@ use std::time::Duration;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use hex::FromHex; use hex::FromHex;
use pageserver::tenant::Tenant;
use reqwest::{header, Client, StatusCode, Url}; use reqwest::{header, Client, StatusCode, Url};
use serde::Deserialize; use serde::Deserialize;
use tokio::sync::Semaphore; use tokio::sync::Semaphore;
@@ -119,18 +118,13 @@ fn from_nullable_id<'de, D>(deserializer: D) -> Result<TenantId, D::Error>
where where
D: serde::de::Deserializer<'de>, D: serde::de::Deserializer<'de>,
{ {
if deserializer.is_human_readable() { let id_str = String::deserialize(deserializer)?;
let id_str = String::deserialize(deserializer)?; if id_str.is_empty() {
if id_str.is_empty() { // This is a bogus value, but for the purposes of the scrubber all that
// This is a bogus value, but for the purposes of the scrubber all that // matters is that it doesn't collide with any real IDs.
// matters is that it doesn't collide with any real IDs. Ok(TenantId::from([0u8; 16]))
Ok(TenantId::from([0u8; 16]))
} else {
TenantId::from_hex(&id_str).map_err(|e| serde::de::Error::custom(format!("{e}")))
}
} else { } else {
let id_arr = <[u8; 16]>::deserialize(deserializer)?; TenantId::from_hex(&id_str).map_err(|e| serde::de::Error::custom(format!("{e}")))
Ok(TenantId::from(id_arr))
} }
} }
@@ -159,6 +153,7 @@ pub struct ProjectData {
pub maintenance_set: Option<String>, pub maintenance_set: Option<String>,
} }
#[serde_with::serde_as]
#[derive(Debug, serde::Deserialize)] #[derive(Debug, serde::Deserialize)]
pub struct BranchData { pub struct BranchData {
pub id: BranchId, pub id: BranchId,
@@ -166,10 +161,12 @@ pub struct BranchData {
pub updated_at: DateTime<Utc>, pub updated_at: DateTime<Utc>,
pub name: String, pub name: String,
pub project_id: ProjectId, pub project_id: ProjectId,
#[serde_as(as = "serde_with::DisplayFromStr")]
pub timeline_id: TimelineId, pub timeline_id: TimelineId,
#[serde(default)] #[serde(default)]
pub parent_id: Option<BranchId>, pub parent_id: Option<BranchId>,
#[serde(default)] #[serde(default)]
#[serde_as(as = "Option<serde_with::DisplayFromStr>")]
pub parent_lsn: Option<Lsn>, pub parent_lsn: Option<Lsn>,
pub default: bool, pub default: bool,
pub deleted: bool, pub deleted: bool,

View File

@@ -34,9 +34,6 @@ const CLOUD_ADMIN_API_TOKEN_ENV_VAR: &str = "CLOUD_ADMIN_API_TOKEN";
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct S3Target { pub struct S3Target {
pub bucket_name: String, pub bucket_name: String,
/// This `prefix_in_bucket` is only equal to the PS/SK config of the same
/// name for the RootTarget: other instances of S3Target will have prefix_in_bucket
/// with extra parts.
pub prefix_in_bucket: String, pub prefix_in_bucket: String,
pub delimiter: String, pub delimiter: String,
} }
@@ -80,13 +77,9 @@ impl Display for NodeKind {
impl S3Target { impl S3Target {
pub fn with_sub_segment(&self, new_segment: &str) -> Self { pub fn with_sub_segment(&self, new_segment: &str) -> Self {
let mut new_self = self.clone(); let mut new_self = self.clone();
if new_self.prefix_in_bucket.is_empty() { let _ = new_self.prefix_in_bucket.pop();
new_self.prefix_in_bucket = format!("/{}/", new_segment); new_self.prefix_in_bucket =
} else { [&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
let _ = new_self.prefix_in_bucket.pop();
new_self.prefix_in_bucket =
[&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
}
new_self new_self
} }
} }
@@ -98,10 +91,10 @@ pub enum RootTarget {
} }
impl RootTarget { impl RootTarget {
pub fn tenants_root(&self) -> S3Target { pub fn tenants_root(&self) -> &S3Target {
match self { match self {
Self::Pageserver(root) => root.with_sub_segment(TENANTS_SEGMENT_NAME), Self::Pageserver(root) => root,
Self::Safekeeper(root) => root.with_sub_segment("wal"), Self::Safekeeper(root) => root,
} }
} }
@@ -140,7 +133,6 @@ impl RootTarget {
pub struct BucketConfig { pub struct BucketConfig {
pub region: String, pub region: String,
pub bucket: String, pub bucket: String,
pub prefix_in_bucket: Option<String>,
/// Use SSO if this is set, else rely on AWS_* environment vars /// Use SSO if this is set, else rely on AWS_* environment vars
pub sso_account_id: Option<String>, pub sso_account_id: Option<String>,
@@ -163,12 +155,10 @@ impl BucketConfig {
let sso_account_id = env::var("SSO_ACCOUNT_ID").ok(); let sso_account_id = env::var("SSO_ACCOUNT_ID").ok();
let region = env::var("REGION").context("'REGION' param retrieval")?; let region = env::var("REGION").context("'REGION' param retrieval")?;
let bucket = env::var("BUCKET").context("'BUCKET' param retrieval")?; let bucket = env::var("BUCKET").context("'BUCKET' param retrieval")?;
let prefix_in_bucket = env::var("BUCKET_PREFIX").ok();
Ok(Self { Ok(Self {
region, region,
bucket, bucket,
prefix_in_bucket,
sso_account_id, sso_account_id,
}) })
} }
@@ -201,14 +191,14 @@ pub fn init_logging(file_name: &str) -> WorkerGuard {
.with_target(false) .with_target(false)
.with_ansi(false) .with_ansi(false)
.with_writer(file_writer); .with_writer(file_writer);
let stderr_logs = fmt::Layer::new() let stdout_logs = fmt::Layer::new()
.with_ansi(std::io::stderr().is_terminal()) .with_ansi(std::io::stdout().is_terminal())
.with_target(false) .with_target(false)
.with_writer(std::io::stderr); .with_writer(std::io::stdout);
tracing_subscriber::registry() tracing_subscriber::registry()
.with(EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"))) .with(EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")))
.with(file_logs) .with(file_logs)
.with(stderr_logs) .with(stdout_logs)
.init(); .init();
guard guard
@@ -260,20 +250,15 @@ fn init_remote(
let bucket_region = Region::new(bucket_config.region); let bucket_region = Region::new(bucket_config.region);
let delimiter = "/".to_string(); let delimiter = "/".to_string();
let s3_client = Arc::new(init_s3_client(bucket_config.sso_account_id, bucket_region)); let s3_client = Arc::new(init_s3_client(bucket_config.sso_account_id, bucket_region));
let s3_root = match node_kind { let s3_root = match node_kind {
NodeKind::Pageserver => RootTarget::Pageserver(S3Target { NodeKind::Pageserver => RootTarget::Pageserver(S3Target {
bucket_name: bucket_config.bucket, bucket_name: bucket_config.bucket,
prefix_in_bucket: bucket_config prefix_in_bucket: ["pageserver", "v1", TENANTS_SEGMENT_NAME, ""].join(&delimiter),
.prefix_in_bucket
.unwrap_or("pageserver/v1".to_string()),
delimiter, delimiter,
}), }),
NodeKind::Safekeeper => RootTarget::Safekeeper(S3Target { NodeKind::Safekeeper => RootTarget::Safekeeper(S3Target {
bucket_name: bucket_config.bucket, bucket_name: bucket_config.bucket,
prefix_in_bucket: bucket_config prefix_in_bucket: ["safekeeper", "v1", "wal", ""].join(&delimiter),
.prefix_in_bucket
.unwrap_or("safekeeper/v1".to_string()),
delimiter, delimiter,
}), }),
}; };

View File

@@ -31,10 +31,7 @@ enum Command {
#[arg(short, long, default_value_t = PurgeMode::DeletedOnly)] #[arg(short, long, default_value_t = PurgeMode::DeletedOnly)]
mode: PurgeMode, mode: PurgeMode,
}, },
ScanMetadata { ScanMetadata {},
#[arg(short, long, default_value_t = false)]
json: bool,
},
} }
#[tokio::main] #[tokio::main]
@@ -57,17 +54,13 @@ async fn main() -> anyhow::Result<()> {
)); ));
match cli.command { match cli.command {
Command::ScanMetadata { json } => match scan_metadata(bucket_config).await { Command::ScanMetadata {} => match scan_metadata(bucket_config).await {
Err(e) => { Err(e) => {
tracing::error!("Failed: {e}"); tracing::error!("Failed: {e}");
Err(e) Err(e)
} }
Ok(summary) => { Ok(summary) => {
if json { println!("{}", summary.summary_string());
println!("{}", serde_json::to_string(&summary).unwrap())
} else {
println!("{}", summary.summary_string());
}
if summary.is_fatal() { if summary.is_fatal() {
Err(anyhow::anyhow!("Fatal scrub errors detected")) Err(anyhow::anyhow!("Fatal scrub errors detected"))
} else { } else {

View File

@@ -13,10 +13,10 @@ pub fn stream_tenants<'a>(
) -> impl Stream<Item = anyhow::Result<TenantId>> + 'a { ) -> impl Stream<Item = anyhow::Result<TenantId>> + 'a {
try_stream! { try_stream! {
let mut continuation_token = None; let mut continuation_token = None;
let tenants_target = target.tenants_root();
loop { loop {
let tenants_target = target.tenants_root();
let fetch_response = let fetch_response =
list_objects_with_retries(s3_client, &tenants_target, continuation_token.clone()).await?; list_objects_with_retries(s3_client, tenants_target, continuation_token.clone()).await?;
let new_entry_ids = fetch_response let new_entry_ids = fetch_response
.common_prefixes() .common_prefixes()

View File

@@ -10,10 +10,8 @@ use aws_sdk_s3::Client;
use futures_util::{pin_mut, StreamExt, TryStreamExt}; use futures_util::{pin_mut, StreamExt, TryStreamExt};
use histogram::Histogram; use histogram::Histogram;
use pageserver::tenant::IndexPart; use pageserver::tenant::IndexPart;
use serde::Serialize;
use utils::id::TenantTimelineId; use utils::id::TenantTimelineId;
#[derive(Serialize)]
pub struct MetadataSummary { pub struct MetadataSummary {
count: usize, count: usize,
with_errors: HashSet<TenantTimelineId>, with_errors: HashSet<TenantTimelineId>,
@@ -27,9 +25,7 @@ pub struct MetadataSummary {
} }
/// A histogram plus minimum and maximum tracking /// A histogram plus minimum and maximum tracking
#[derive(Serialize)]
struct MinMaxHisto { struct MinMaxHisto {
#[serde(skip)]
histo: Histogram, histo: Histogram,
min: u64, min: u64,
max: u64, max: u64,
@@ -113,7 +109,6 @@ impl MetadataSummary {
self.count += 1; self.count += 1;
if let BlobDataParseResult::Parsed { if let BlobDataParseResult::Parsed {
index_part, index_part,
index_part_generation: _,
s3_layers: _, s3_layers: _,
} = &data.blob_data } = &data.blob_data
{ {

View File

@@ -47,7 +47,6 @@ pq_proto.workspace = true
remote_storage.workspace = true remote_storage.workspace = true
safekeeper_api.workspace = true safekeeper_api.workspace = true
storage_broker.workspace = true storage_broker.workspace = true
tokio-stream.workspace = true
utils.workspace = true utils.workspace = true
workspace_hack.workspace = true workspace_hack.workspace = true

View File

@@ -13,7 +13,7 @@ use utils::{
}; };
/// Persistent consensus state of the acceptor. /// Persistent consensus state of the acceptor.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize)]
struct AcceptorStateV1 { struct AcceptorStateV1 {
/// acceptor's last term it voted for (advanced in 1 phase) /// acceptor's last term it voted for (advanced in 1 phase)
term: Term, term: Term,
@@ -21,7 +21,7 @@ struct AcceptorStateV1 {
epoch: Term, epoch: Term,
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize)]
struct SafeKeeperStateV1 { struct SafeKeeperStateV1 {
/// persistent acceptor state /// persistent acceptor state
acceptor_state: AcceptorStateV1, acceptor_state: AcceptorStateV1,
@@ -50,7 +50,7 @@ pub struct ServerInfoV2 {
pub wal_seg_size: u32, pub wal_seg_size: u32,
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SafeKeeperStateV2 { pub struct SafeKeeperStateV2 {
/// persistent acceptor state /// persistent acceptor state
pub acceptor_state: AcceptorState, pub acceptor_state: AcceptorState,
@@ -81,7 +81,7 @@ pub struct ServerInfoV3 {
pub wal_seg_size: u32, pub wal_seg_size: u32,
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SafeKeeperStateV3 { pub struct SafeKeeperStateV3 {
/// persistent acceptor state /// persistent acceptor state
pub acceptor_state: AcceptorState, pub acceptor_state: AcceptorState,
@@ -101,7 +101,7 @@ pub struct SafeKeeperStateV3 {
pub wal_start_lsn: Lsn, pub wal_start_lsn: Lsn,
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SafeKeeperStateV4 { pub struct SafeKeeperStateV4 {
#[serde(with = "hex")] #[serde(with = "hex")]
pub tenant_id: TenantId, pub tenant_id: TenantId,
@@ -264,245 +264,3 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<SafeKeeperState>
} }
bail!("unsupported safekeeper control file version {}", version) bail!("unsupported safekeeper control file version {}", version)
} }
#[cfg(test)]
mod tests {
use std::str::FromStr;
use utils::{id::NodeId, Hex};
use crate::safekeeper::PersistedPeerInfo;
use super::*;
#[test]
fn roundtrip_v1() {
let tenant_id = TenantId::from_str("cf0480929707ee75372337efaa5ecf96").unwrap();
let timeline_id = TimelineId::from_str("112ded66422aa5e953e5440fa5427ac4").unwrap();
let state = SafeKeeperStateV1 {
acceptor_state: AcceptorStateV1 {
term: 42,
epoch: 43,
},
server: ServerInfoV2 {
pg_version: 14,
system_id: 0x1234567887654321,
tenant_id,
timeline_id,
wal_seg_size: 0x12345678,
},
proposer_uuid: {
let mut arr = timeline_id.as_arr();
arr.reverse();
arr
},
commit_lsn: Lsn(1234567800),
truncate_lsn: Lsn(123456780),
wal_start_lsn: Lsn(1234567800 - 8),
};
let ser = state.ser().unwrap();
#[rustfmt::skip]
let expected = [
// term
0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// epoch
0x2b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// pg_version
0x0e, 0x00, 0x00, 0x00,
// system_id
0x21, 0x43, 0x65, 0x87, 0x78, 0x56, 0x34, 0x12,
// tenant_id
0xcf, 0x04, 0x80, 0x92, 0x97, 0x07, 0xee, 0x75, 0x37, 0x23, 0x37, 0xef, 0xaa, 0x5e, 0xcf, 0x96,
// timeline_id
0x11, 0x2d, 0xed, 0x66, 0x42, 0x2a, 0xa5, 0xe9, 0x53, 0xe5, 0x44, 0x0f, 0xa5, 0x42, 0x7a, 0xc4,
// wal_seg_size
0x78, 0x56, 0x34, 0x12,
// proposer_uuid
0xc4, 0x7a, 0x42, 0xa5, 0x0f, 0x44, 0xe5, 0x53, 0xe9, 0xa5, 0x2a, 0x42, 0x66, 0xed, 0x2d, 0x11,
// commit_lsn
0x78, 0x02, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00,
// truncate_lsn
0x0c, 0xcd, 0x5b, 0x07, 0x00, 0x00, 0x00, 0x00,
// wal_start_lsn
0x70, 0x02, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00,
];
assert_eq!(Hex(&ser), Hex(&expected));
let deser = SafeKeeperStateV1::des(&ser).unwrap();
assert_eq!(state, deser);
}
#[test]
fn roundtrip_v2() {
let tenant_id = TenantId::from_str("cf0480929707ee75372337efaa5ecf96").unwrap();
let timeline_id = TimelineId::from_str("112ded66422aa5e953e5440fa5427ac4").unwrap();
let state = SafeKeeperStateV2 {
acceptor_state: AcceptorState {
term: 42,
term_history: TermHistory(vec![TermLsn {
lsn: Lsn(0x1),
term: 41,
}]),
},
server: ServerInfoV2 {
pg_version: 14,
system_id: 0x1234567887654321,
tenant_id,
timeline_id,
wal_seg_size: 0x12345678,
},
proposer_uuid: {
let mut arr = timeline_id.as_arr();
arr.reverse();
arr
},
commit_lsn: Lsn(1234567800),
truncate_lsn: Lsn(123456780),
wal_start_lsn: Lsn(1234567800 - 8),
};
let ser = state.ser().unwrap();
let expected = [
0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x21, 0x43, 0x65, 0x87, 0x78, 0x56,
0x34, 0x12, 0xcf, 0x04, 0x80, 0x92, 0x97, 0x07, 0xee, 0x75, 0x37, 0x23, 0x37, 0xef,
0xaa, 0x5e, 0xcf, 0x96, 0x11, 0x2d, 0xed, 0x66, 0x42, 0x2a, 0xa5, 0xe9, 0x53, 0xe5,
0x44, 0x0f, 0xa5, 0x42, 0x7a, 0xc4, 0x78, 0x56, 0x34, 0x12, 0xc4, 0x7a, 0x42, 0xa5,
0x0f, 0x44, 0xe5, 0x53, 0xe9, 0xa5, 0x2a, 0x42, 0x66, 0xed, 0x2d, 0x11, 0x78, 0x02,
0x96, 0x49, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xcd, 0x5b, 0x07, 0x00, 0x00, 0x00, 0x00,
0x70, 0x02, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00,
];
assert_eq!(Hex(&ser), Hex(&expected));
let deser = SafeKeeperStateV2::des(&ser).unwrap();
assert_eq!(state, deser);
}
#[test]
fn roundtrip_v3() {
let tenant_id = TenantId::from_str("cf0480929707ee75372337efaa5ecf96").unwrap();
let timeline_id = TimelineId::from_str("112ded66422aa5e953e5440fa5427ac4").unwrap();
let state = SafeKeeperStateV3 {
acceptor_state: AcceptorState {
term: 42,
term_history: TermHistory(vec![TermLsn {
lsn: Lsn(0x1),
term: 41,
}]),
},
server: ServerInfoV3 {
pg_version: 14,
system_id: 0x1234567887654321,
tenant_id,
timeline_id,
wal_seg_size: 0x12345678,
},
proposer_uuid: {
let mut arr = timeline_id.as_arr();
arr.reverse();
arr
},
commit_lsn: Lsn(1234567800),
truncate_lsn: Lsn(123456780),
wal_start_lsn: Lsn(1234567800 - 8),
};
let ser = state.ser().unwrap();
let expected = [
0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x21, 0x43, 0x65, 0x87, 0x78, 0x56,
0x34, 0x12, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x66, 0x30, 0x34,
0x38, 0x30, 0x39, 0x32, 0x39, 0x37, 0x30, 0x37, 0x65, 0x65, 0x37, 0x35, 0x33, 0x37,
0x32, 0x33, 0x33, 0x37, 0x65, 0x66, 0x61, 0x61, 0x35, 0x65, 0x63, 0x66, 0x39, 0x36,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x31, 0x31, 0x32, 0x64, 0x65, 0x64,
0x36, 0x36, 0x34, 0x32, 0x32, 0x61, 0x61, 0x35, 0x65, 0x39, 0x35, 0x33, 0x65, 0x35,
0x34, 0x34, 0x30, 0x66, 0x61, 0x35, 0x34, 0x32, 0x37, 0x61, 0x63, 0x34, 0x78, 0x56,
0x34, 0x12, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x34, 0x37, 0x61,
0x34, 0x32, 0x61, 0x35, 0x30, 0x66, 0x34, 0x34, 0x65, 0x35, 0x35, 0x33, 0x65, 0x39,
0x61, 0x35, 0x32, 0x61, 0x34, 0x32, 0x36, 0x36, 0x65, 0x64, 0x32, 0x64, 0x31, 0x31,
0x78, 0x02, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xcd, 0x5b, 0x07, 0x00, 0x00,
0x00, 0x00, 0x70, 0x02, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00,
];
assert_eq!(Hex(&ser), Hex(&expected));
let deser = SafeKeeperStateV3::des(&ser).unwrap();
assert_eq!(state, deser);
}
#[test]
fn roundtrip_v4() {
let tenant_id = TenantId::from_str("cf0480929707ee75372337efaa5ecf96").unwrap();
let timeline_id = TimelineId::from_str("112ded66422aa5e953e5440fa5427ac4").unwrap();
let state = SafeKeeperStateV4 {
tenant_id,
timeline_id,
acceptor_state: AcceptorState {
term: 42,
term_history: TermHistory(vec![TermLsn {
lsn: Lsn(0x1),
term: 41,
}]),
},
server: ServerInfo {
pg_version: 14,
system_id: 0x1234567887654321,
wal_seg_size: 0x12345678,
},
proposer_uuid: {
let mut arr = timeline_id.as_arr();
arr.reverse();
arr
},
peers: PersistedPeers(vec![(
NodeId(1),
PersistedPeerInfo {
backup_lsn: Lsn(1234567000),
term: 42,
flush_lsn: Lsn(1234567800 - 8),
commit_lsn: Lsn(1234567600),
},
)]),
commit_lsn: Lsn(1234567800),
s3_wal_lsn: Lsn(1234567300),
peer_horizon_lsn: Lsn(9999999),
remote_consistent_lsn: Lsn(1234560000),
};
let ser = state.ser().unwrap();
let expected = [
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x66, 0x30, 0x34, 0x38, 0x30,
0x39, 0x32, 0x39, 0x37, 0x30, 0x37, 0x65, 0x65, 0x37, 0x35, 0x33, 0x37, 0x32, 0x33,
0x33, 0x37, 0x65, 0x66, 0x61, 0x61, 0x35, 0x65, 0x63, 0x66, 0x39, 0x36, 0x20, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x31, 0x31, 0x32, 0x64, 0x65, 0x64, 0x36, 0x36,
0x34, 0x32, 0x32, 0x61, 0x61, 0x35, 0x65, 0x39, 0x35, 0x33, 0x65, 0x35, 0x34, 0x34,
0x30, 0x66, 0x61, 0x35, 0x34, 0x32, 0x37, 0x61, 0x63, 0x34, 0x2a, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x21, 0x43, 0x65, 0x87, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56,
0x34, 0x12, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x34, 0x37, 0x61,
0x34, 0x32, 0x61, 0x35, 0x30, 0x66, 0x34, 0x34, 0x65, 0x35, 0x35, 0x33, 0x65, 0x39,
0x61, 0x35, 0x32, 0x61, 0x34, 0x32, 0x36, 0x36, 0x65, 0x64, 0x32, 0x64, 0x31, 0x31,
0x78, 0x02, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00, 0x84, 0x00, 0x96, 0x49, 0x00, 0x00,
0x00, 0x00, 0x7f, 0x96, 0x98, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe4, 0x95, 0x49,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x58, 0xff, 0x95, 0x49, 0x00, 0x00, 0x00, 0x00,
0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x02, 0x96, 0x49, 0x00, 0x00,
0x00, 0x00, 0xb0, 0x01, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00,
];
assert_eq!(Hex(&ser), Hex(&expected));
let deser = SafeKeeperStateV4::des(&ser).unwrap();
assert_eq!(state, deser);
}
}

View File

@@ -5,7 +5,6 @@ use std::fs::DirEntry;
use std::io::BufReader; use std::io::BufReader;
use std::io::Read; use std::io::Read;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Arc;
use anyhow::Result; use anyhow::Result;
use camino::Utf8Path; use camino::Utf8Path;
@@ -14,6 +13,7 @@ use postgres_ffi::XLogSegNo;
use serde::Deserialize; use serde::Deserialize;
use serde::Serialize; use serde::Serialize;
use serde_with::{serde_as, DisplayFromStr};
use utils::id::NodeId; use utils::id::NodeId;
use utils::id::TenantTimelineId; use utils::id::TenantTimelineId;
use utils::id::{TenantId, TimelineId}; use utils::id::{TenantId, TimelineId};
@@ -28,7 +28,7 @@ use crate::send_wal::WalSenderState;
use crate::GlobalTimelines; use crate::GlobalTimelines;
/// Various filters that influence the resulting JSON output. /// Various filters that influence the resulting JSON output.
#[derive(Debug, Serialize, Deserialize, Clone)] #[derive(Debug, Serialize, Deserialize)]
pub struct Args { pub struct Args {
/// Dump all available safekeeper state. False by default. /// Dump all available safekeeper state. False by default.
pub dump_all: bool, pub dump_all: bool,
@@ -53,76 +53,15 @@ pub struct Args {
} }
/// Response for debug dump request. /// Response for debug dump request.
#[derive(Debug, Serialize)] #[derive(Debug, Serialize, Deserialize)]
pub struct Response { pub struct Response {
pub start_time: DateTime<Utc>, pub start_time: DateTime<Utc>,
pub finish_time: DateTime<Utc>, pub finish_time: DateTime<Utc>,
pub timelines: Vec<TimelineDumpSer>, pub timelines: Vec<Timeline>,
pub timelines_count: usize, pub timelines_count: usize,
pub config: Config, pub config: Config,
} }
pub struct TimelineDumpSer {
pub tli: Arc<crate::timeline::Timeline>,
pub args: Args,
pub runtime: Arc<tokio::runtime::Runtime>,
}
impl std::fmt::Debug for TimelineDumpSer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("TimelineDumpSer")
.field("tli", &self.tli.ttid)
.field("args", &self.args)
.finish()
}
}
impl Serialize for TimelineDumpSer {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let dump = self
.runtime
.block_on(build_from_tli_dump(self.tli.clone(), self.args.clone()));
dump.serialize(serializer)
}
}
async fn build_from_tli_dump(timeline: Arc<crate::timeline::Timeline>, args: Args) -> Timeline {
let control_file = if args.dump_control_file {
let mut state = timeline.get_state().await.1;
if !args.dump_term_history {
state.acceptor_state.term_history = TermHistory(vec![]);
}
Some(state)
} else {
None
};
let memory = if args.dump_memory {
Some(timeline.memory_dump().await)
} else {
None
};
let disk_content = if args.dump_disk_content {
// build_disk_content can fail, but we don't want to fail the whole
// request because of that.
build_disk_content(&timeline.timeline_dir).ok()
} else {
None
};
Timeline {
tenant_id: timeline.ttid.tenant_id,
timeline_id: timeline.ttid.timeline_id,
control_file,
memory,
disk_content,
}
}
/// Safekeeper configuration. /// Safekeeper configuration.
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
pub struct Config { pub struct Config {
@@ -135,9 +74,12 @@ pub struct Config {
pub wal_backup_enabled: bool, pub wal_backup_enabled: bool,
} }
#[serde_as]
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
pub struct Timeline { pub struct Timeline {
#[serde_as(as = "DisplayFromStr")]
pub tenant_id: TenantId, pub tenant_id: TenantId,
#[serde_as(as = "DisplayFromStr")]
pub timeline_id: TimelineId, pub timeline_id: TimelineId,
pub control_file: Option<SafeKeeperState>, pub control_file: Option<SafeKeeperState>,
pub memory: Option<Memory>, pub memory: Option<Memory>,
@@ -198,12 +140,8 @@ pub async fn build(args: Args) -> Result<Response> {
GlobalTimelines::get_all() GlobalTimelines::get_all()
}; };
// TODO: return Stream instead of Vec
let mut timelines = Vec::new(); let mut timelines = Vec::new();
let runtime = Arc::new(
tokio::runtime::Builder::new_current_thread()
.build()
.unwrap(),
);
for tli in ptrs_snapshot { for tli in ptrs_snapshot {
let ttid = tli.ttid; let ttid = tli.ttid;
if let Some(tenant_id) = args.tenant_id { if let Some(tenant_id) = args.tenant_id {
@@ -217,11 +155,38 @@ pub async fn build(args: Args) -> Result<Response> {
} }
} }
timelines.push(TimelineDumpSer { let control_file = if args.dump_control_file {
tli, let mut state = tli.get_state().await.1;
args: args.clone(), if !args.dump_term_history {
runtime: runtime.clone(), state.acceptor_state.term_history = TermHistory(vec![]);
}); }
Some(state)
} else {
None
};
let memory = if args.dump_memory {
Some(tli.memory_dump().await)
} else {
None
};
let disk_content = if args.dump_disk_content {
// build_disk_content can fail, but we don't want to fail the whole
// request because of that.
build_disk_content(&tli.timeline_dir).ok()
} else {
None
};
let timeline = Timeline {
tenant_id: ttid.tenant_id,
timeline_id: ttid.timeline_id,
control_file,
memory,
disk_content,
};
timelines.push(timeline);
} }
let config = GlobalTimelines::get_global_config(); let config = GlobalTimelines::get_global_config();

View File

@@ -4,6 +4,7 @@ use once_cell::sync::Lazy;
use postgres_ffi::WAL_SEGMENT_SIZE; use postgres_ffi::WAL_SEGMENT_SIZE;
use safekeeper_api::models::SkTimelineInfo; use safekeeper_api::models::SkTimelineInfo;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::fmt; use std::fmt;
use std::str::FromStr; use std::str::FromStr;
@@ -12,12 +13,7 @@ use storage_broker::proto::SafekeeperTimelineInfo;
use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId; use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
use tokio::fs::File; use tokio::fs::File;
use tokio::io::AsyncReadExt; use tokio::io::AsyncReadExt;
use utils::http::endpoint::request_span;
use std::io::Write as _;
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
use tracing::info_span;
use utils::http::endpoint::{request_span, ChannelWriter};
use crate::receive_wal::WalReceiverState; use crate::receive_wal::WalReceiverState;
use crate::safekeeper::Term; use crate::safekeeper::Term;
@@ -66,9 +62,11 @@ fn get_conf(request: &Request<Body>) -> &SafeKeeperConf {
/// Same as TermLsn, but serializes LSN using display serializer /// Same as TermLsn, but serializes LSN using display serializer
/// in Postgres format, i.e. 0/FFFFFFFF. Used only for the API response. /// in Postgres format, i.e. 0/FFFFFFFF. Used only for the API response.
#[serde_as]
#[derive(Debug, Clone, Copy, Serialize, Deserialize)] #[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct TermSwitchApiEntry { pub struct TermSwitchApiEntry {
pub term: Term, pub term: Term,
#[serde_as(as = "DisplayFromStr")]
pub lsn: Lsn, pub lsn: Lsn,
} }
@@ -90,18 +88,28 @@ pub struct AcceptorStateStatus {
} }
/// Info about timeline on safekeeper ready for reporting. /// Info about timeline on safekeeper ready for reporting.
#[serde_as]
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
pub struct TimelineStatus { pub struct TimelineStatus {
#[serde_as(as = "DisplayFromStr")]
pub tenant_id: TenantId, pub tenant_id: TenantId,
#[serde_as(as = "DisplayFromStr")]
pub timeline_id: TimelineId, pub timeline_id: TimelineId,
pub acceptor_state: AcceptorStateStatus, pub acceptor_state: AcceptorStateStatus,
pub pg_info: ServerInfo, pub pg_info: ServerInfo,
#[serde_as(as = "DisplayFromStr")]
pub flush_lsn: Lsn, pub flush_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
pub timeline_start_lsn: Lsn, pub timeline_start_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
pub local_start_lsn: Lsn, pub local_start_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
pub commit_lsn: Lsn, pub commit_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
pub backup_lsn: Lsn, pub backup_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
pub peer_horizon_lsn: Lsn, pub peer_horizon_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
pub remote_consistent_lsn: Lsn, pub remote_consistent_lsn: Lsn,
pub peers: Vec<PeerInfo>, pub peers: Vec<PeerInfo>,
pub walsenders: Vec<WalSenderState>, pub walsenders: Vec<WalSenderState>,
@@ -365,52 +373,8 @@ async fn dump_debug_handler(mut request: Request<Body>) -> Result<Response<Body>
.await .await
.map_err(ApiError::InternalServerError)?; .map_err(ApiError::InternalServerError)?;
let started_at = std::time::Instant::now(); // TODO: use streaming response
json_response(StatusCode::OK, resp)
let (tx, rx) = mpsc::channel(1);
let body = Body::wrap_stream(ReceiverStream::new(rx));
let mut writer = ChannelWriter::new(128 * 1024, tx);
let response = Response::builder()
.status(200)
.header(hyper::header::CONTENT_TYPE, "application/octet-stream")
.body(body)
.unwrap();
let span = info_span!("blocking");
tokio::task::spawn_blocking(move || {
let _span = span.entered();
let res = serde_json::to_writer(&mut writer, &resp)
.map_err(std::io::Error::from)
.and_then(|_| writer.flush());
match res {
Ok(()) => {
tracing::info!(
bytes = writer.flushed_bytes(),
elapsed_ms = started_at.elapsed().as_millis(),
"responded /v1/debug_dump"
);
}
Err(e) => {
tracing::warn!("failed to write out /v1/debug_dump response: {e:#}");
// semantics of this error are quite... unclear. we want to error the stream out to
// abort the response to somehow notify the client that we failed.
//
// though, most likely the reason for failure is that the receiver is already gone.
drop(
writer
.tx
.blocking_send(Err(std::io::ErrorKind::BrokenPipe.into())),
);
}
}
});
Ok(response)
} }
/// Safekeeper http router. /// Safekeeper http router.

View File

@@ -44,11 +44,8 @@ pub struct AppendLogicalMessage {
// fields from AppendRequestHeader // fields from AppendRequestHeader
pub term: Term, pub term: Term,
#[serde(with = "utils::lsn::serde_as_u64")]
pub epoch_start_lsn: Lsn, pub epoch_start_lsn: Lsn,
#[serde(with = "utils::lsn::serde_as_u64")]
pub begin_lsn: Lsn, pub begin_lsn: Lsn,
#[serde(with = "utils::lsn::serde_as_u64")]
pub truncate_lsn: Lsn, pub truncate_lsn: Lsn,
pub pg_version: u32, pub pg_version: u32,
} }

View File

@@ -1,4 +1,3 @@
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use anyhow::{bail, Context, Result}; use anyhow::{bail, Context, Result};
@@ -6,6 +5,8 @@ use tokio::io::AsyncWriteExt;
use tracing::info; use tracing::info;
use utils::id::{TenantId, TenantTimelineId, TimelineId}; use utils::id::{TenantId, TenantTimelineId, TimelineId};
use serde_with::{serde_as, DisplayFromStr};
use crate::{ use crate::{
control_file, debug_dump, control_file, debug_dump,
http::routes::TimelineStatus, http::routes::TimelineStatus,
@@ -14,9 +15,12 @@ use crate::{
}; };
/// Info about timeline on safekeeper ready for reporting. /// Info about timeline on safekeeper ready for reporting.
#[serde_as]
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
pub struct Request { pub struct Request {
#[serde_as(as = "DisplayFromStr")]
pub tenant_id: TenantId, pub tenant_id: TenantId,
#[serde_as(as = "DisplayFromStr")]
pub timeline_id: TimelineId, pub timeline_id: TimelineId,
pub http_hosts: Vec<String>, pub http_hosts: Vec<String>,
} }
@@ -28,16 +32,6 @@ pub struct Response {
// TODO: add more fields? // TODO: add more fields?
} }
/// Response for debug dump request.
#[derive(Debug, Serialize, Deserialize)]
pub struct DebugDumpResponse {
pub start_time: DateTime<Utc>,
pub finish_time: DateTime<Utc>,
pub timelines: Vec<debug_dump::Timeline>,
pub timelines_count: usize,
pub config: debug_dump::Config,
}
/// Find the most advanced safekeeper and pull timeline from it. /// Find the most advanced safekeeper and pull timeline from it.
pub async fn handle_request(request: Request) -> Result<Response> { pub async fn handle_request(request: Request) -> Result<Response> {
let existing_tli = GlobalTimelines::get(TenantTimelineId::new( let existing_tli = GlobalTimelines::get(TenantTimelineId::new(
@@ -109,7 +103,7 @@ async fn pull_timeline(status: TimelineStatus, host: String) -> Result<Response>
// Implementing our own scp over HTTP. // Implementing our own scp over HTTP.
// At first, we need to fetch list of files from safekeeper. // At first, we need to fetch list of files from safekeeper.
let dump: DebugDumpResponse = client let dump: debug_dump::Response = client
.get(format!( .get(format!(
"{}/v1/debug_dump?dump_all=true&tenant_id={}&timeline_id={}", "{}/v1/debug_dump?dump_all=true&tenant_id={}&timeline_id={}",
host, status.tenant_id, status.timeline_id host, status.tenant_id, status.timeline_id

View File

@@ -52,7 +52,7 @@ impl From<(Term, Lsn)> for TermLsn {
} }
} }
#[derive(Clone, Serialize, Deserialize, PartialEq)] #[derive(Clone, Serialize, Deserialize)]
pub struct TermHistory(pub Vec<TermLsn>); pub struct TermHistory(pub Vec<TermLsn>);
impl TermHistory { impl TermHistory {
@@ -178,7 +178,7 @@ impl fmt::Debug for TermHistory {
pub type PgUuid = [u8; 16]; pub type PgUuid = [u8; 16];
/// Persistent consensus state of the acceptor. /// Persistent consensus state of the acceptor.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AcceptorState { pub struct AcceptorState {
/// acceptor's last term it voted for (advanced in 1 phase) /// acceptor's last term it voted for (advanced in 1 phase)
pub term: Term, pub term: Term,
@@ -209,16 +209,16 @@ pub struct ServerInfo {
pub wal_seg_size: u32, pub wal_seg_size: u32,
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PersistedPeerInfo { pub struct PersistedPeerInfo {
/// LSN up to which safekeeper offloaded WAL to s3. /// LSN up to which safekeeper offloaded WAL to s3.
pub backup_lsn: Lsn, backup_lsn: Lsn,
/// Term of the last entry. /// Term of the last entry.
pub term: Term, term: Term,
/// LSN of the last record. /// LSN of the last record.
pub flush_lsn: Lsn, flush_lsn: Lsn,
/// Up to which LSN safekeeper regards its WAL as committed. /// Up to which LSN safekeeper regards its WAL as committed.
pub commit_lsn: Lsn, commit_lsn: Lsn,
} }
impl PersistedPeerInfo { impl PersistedPeerInfo {
@@ -232,12 +232,12 @@ impl PersistedPeerInfo {
} }
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PersistedPeers(pub Vec<(NodeId, PersistedPeerInfo)>); pub struct PersistedPeers(pub Vec<(NodeId, PersistedPeerInfo)>);
/// Persistent information stored on safekeeper node /// Persistent information stored on safekeeper node
/// On disk data is prefixed by magic and format version and followed by checksum. /// On disk data is prefixed by magic and format version and followed by checksum.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SafeKeeperState { pub struct SafeKeeperState {
#[serde(with = "hex")] #[serde(with = "hex")]
pub tenant_id: TenantId, pub tenant_id: TenantId,
@@ -1096,7 +1096,7 @@ mod tests {
use super::*; use super::*;
use crate::wal_storage::Storage; use crate::wal_storage::Storage;
use std::{ops::Deref, str::FromStr, time::Instant}; use std::{ops::Deref, time::Instant};
// fake storage for tests // fake storage for tests
struct InMemoryState { struct InMemoryState {
@@ -1314,98 +1314,4 @@ mod tests {
}) })
); );
} }
#[test]
fn test_sk_state_bincode_serde_roundtrip() {
use utils::Hex;
let tenant_id = TenantId::from_str("cf0480929707ee75372337efaa5ecf96").unwrap();
let timeline_id = TimelineId::from_str("112ded66422aa5e953e5440fa5427ac4").unwrap();
let state = SafeKeeperState {
tenant_id,
timeline_id,
acceptor_state: AcceptorState {
term: 42,
term_history: TermHistory(vec![TermLsn {
lsn: Lsn(0x1),
term: 41,
}]),
},
server: ServerInfo {
pg_version: 14,
system_id: 0x1234567887654321,
wal_seg_size: 0x12345678,
},
proposer_uuid: {
let mut arr = timeline_id.as_arr();
arr.reverse();
arr
},
timeline_start_lsn: Lsn(0x12345600),
local_start_lsn: Lsn(0x12),
commit_lsn: Lsn(1234567800),
backup_lsn: Lsn(1234567300),
peer_horizon_lsn: Lsn(9999999),
remote_consistent_lsn: Lsn(1234560000),
peers: PersistedPeers(vec![(
NodeId(1),
PersistedPeerInfo {
backup_lsn: Lsn(1234567000),
term: 42,
flush_lsn: Lsn(1234567800 - 8),
commit_lsn: Lsn(1234567600),
},
)]),
};
let ser = state.ser().unwrap();
#[rustfmt::skip]
let expected = [
// tenant_id as length prefixed hex
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x63, 0x66, 0x30, 0x34, 0x38, 0x30, 0x39, 0x32, 0x39, 0x37, 0x30, 0x37, 0x65, 0x65, 0x37, 0x35, 0x33, 0x37, 0x32, 0x33, 0x33, 0x37, 0x65, 0x66, 0x61, 0x61, 0x35, 0x65, 0x63, 0x66, 0x39, 0x36,
// timeline_id as length prefixed hex
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x31, 0x31, 0x32, 0x64, 0x65, 0x64, 0x36, 0x36, 0x34, 0x32, 0x32, 0x61, 0x61, 0x35, 0x65, 0x39, 0x35, 0x33, 0x65, 0x35, 0x34, 0x34, 0x30, 0x66, 0x61, 0x35, 0x34, 0x32, 0x37, 0x61, 0x63, 0x34,
// term
0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// length prefix
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// unsure why this order is swapped
0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// pg_version
0x0e, 0x00, 0x00, 0x00,
// systemid
0x21, 0x43, 0x65, 0x87, 0x78, 0x56, 0x34, 0x12,
// wal_seg_size
0x78, 0x56, 0x34, 0x12,
// pguuid as length prefixed hex
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x63, 0x34, 0x37, 0x61, 0x34, 0x32, 0x61, 0x35, 0x30, 0x66, 0x34, 0x34, 0x65, 0x35, 0x35, 0x33, 0x65, 0x39, 0x61, 0x35, 0x32, 0x61, 0x34, 0x32, 0x36, 0x36, 0x65, 0x64, 0x32, 0x64, 0x31, 0x31,
// timeline_start_lsn
0x00, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x78, 0x02, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00,
0x84, 0x00, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00,
0x7f, 0x96, 0x98, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0xe4, 0x95, 0x49, 0x00, 0x00, 0x00, 0x00,
// length prefix for persistentpeers
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// nodeid
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// backuplsn
0x58, 0xff, 0x95, 0x49, 0x00, 0x00, 0x00, 0x00,
0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x70, 0x02, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00,
0xb0, 0x01, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00,
];
assert_eq!(Hex(&ser), Hex(&expected));
let deser = SafeKeeperState::des(&ser).unwrap();
assert_eq!(deser, state);
}
} }

View File

@@ -16,6 +16,7 @@ use postgres_ffi::get_current_timestamp;
use postgres_ffi::{TimestampTz, MAX_SEND_SIZE}; use postgres_ffi::{TimestampTz, MAX_SEND_SIZE};
use pq_proto::{BeMessage, WalSndKeepAlive, XLogDataBody}; use pq_proto::{BeMessage, WalSndKeepAlive, XLogDataBody};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use tokio::io::{AsyncRead, AsyncWrite}; use tokio::io::{AsyncRead, AsyncWrite};
use utils::id::TenantTimelineId; use utils::id::TenantTimelineId;
use utils::lsn::AtomicLsn; use utils::lsn::AtomicLsn;
@@ -312,8 +313,10 @@ impl WalSendersShared {
} }
// Serialized is used only for pretty printing in json. // Serialized is used only for pretty printing in json.
#[serde_as]
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WalSenderState { pub struct WalSenderState {
#[serde_as(as = "DisplayFromStr")]
ttid: TenantTimelineId, ttid: TenantTimelineId,
addr: SocketAddr, addr: SocketAddr,
conn_id: ConnectionId, conn_id: ConnectionId,

View File

@@ -5,8 +5,10 @@ use anyhow::{anyhow, bail, Result};
use camino::Utf8PathBuf; use camino::Utf8PathBuf;
use postgres_ffi::XLogSegNo; use postgres_ffi::XLogSegNo;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_with::serde_as;
use tokio::fs; use tokio::fs;
use serde_with::DisplayFromStr;
use std::cmp::max; use std::cmp::max;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
@@ -40,6 +42,7 @@ use crate::SafeKeeperConf;
use crate::{debug_dump, wal_storage}; use crate::{debug_dump, wal_storage};
/// Things safekeeper should know about timeline state on peers. /// Things safekeeper should know about timeline state on peers.
#[serde_as]
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PeerInfo { pub struct PeerInfo {
pub sk_id: NodeId, pub sk_id: NodeId,
@@ -47,10 +50,13 @@ pub struct PeerInfo {
/// Term of the last entry. /// Term of the last entry.
pub last_log_term: Term, pub last_log_term: Term,
/// LSN of the last record. /// LSN of the last record.
#[serde_as(as = "DisplayFromStr")]
pub flush_lsn: Lsn, pub flush_lsn: Lsn,
#[serde_as(as = "DisplayFromStr")]
pub commit_lsn: Lsn, pub commit_lsn: Lsn,
/// Since which LSN safekeeper has WAL. TODO: remove this once we fill new /// Since which LSN safekeeper has WAL. TODO: remove this once we fill new
/// sk since backup_lsn. /// sk since backup_lsn.
#[serde_as(as = "DisplayFromStr")]
pub local_start_lsn: Lsn, pub local_start_lsn: Lsn,
/// When info was received. Serde annotations are not very useful but make /// When info was received. Serde annotations are not very useful but make
/// the code compile -- we don't rely on this field externally. /// the code compile -- we don't rely on this field externally.

View File

@@ -2868,7 +2868,7 @@ class SafekeeperHttpClient(requests.Session):
params = params or {} params = params or {}
res = self.get(f"http://localhost:{self.port}/v1/debug_dump", params=params) res = self.get(f"http://localhost:{self.port}/v1/debug_dump", params=params)
res.raise_for_status() res.raise_for_status()
res_json = json.loads(res.text) res_json = res.json()
assert isinstance(res_json, dict) assert isinstance(res_json, dict)
return res_json return res_json
@@ -2968,33 +2968,24 @@ class S3Scrubber:
self.env = env self.env = env
self.log_dir = log_dir self.log_dir = log_dir
def scrubber_cli(self, args: list[str], timeout) -> str: def scrubber_cli(self, args, timeout):
assert isinstance(self.env.pageserver_remote_storage, S3Storage) assert isinstance(self.env.pageserver_remote_storage, S3Storage)
s3_storage = self.env.pageserver_remote_storage s3_storage = self.env.pageserver_remote_storage
env = { env = {
"REGION": s3_storage.bucket_region, "REGION": s3_storage.bucket_region,
"BUCKET": s3_storage.bucket_name, "BUCKET": s3_storage.bucket_name,
"BUCKET_PREFIX": s3_storage.prefix_in_bucket,
"RUST_LOG": "DEBUG",
} }
env.update(s3_storage.access_env_vars()) env.update(s3_storage.access_env_vars())
if s3_storage.endpoint is not None: if s3_storage.endpoint is not None:
env.update({"AWS_ENDPOINT_URL": s3_storage.endpoint}) env.update({"AWS_ENDPOINT_URL": s3_storage.endpoint})
base_args = [str(self.env.neon_binpath / "s3_scrubber")] base_args = [self.env.neon_binpath / "s3_scrubber"]
args = base_args + args args = base_args + args
(output_path, stdout, status_code) = subprocess_capture( (output_path, _, status_code) = subprocess_capture(
self.log_dir, self.log_dir, args, echo_stderr=True, echo_stdout=True, env=env, check=False
args,
echo_stderr=True,
echo_stdout=True,
env=env,
check=False,
capture_stdout=True,
timeout=timeout,
) )
if status_code: if status_code:
log.warning(f"Scrub command {args} failed") log.warning(f"Scrub command {args} failed")
@@ -3003,18 +2994,8 @@ class S3Scrubber:
raise RuntimeError("Remote storage scrub failed") raise RuntimeError("Remote storage scrub failed")
assert stdout is not None def scan_metadata(self):
return stdout self.scrubber_cli(["scan-metadata"], timeout=30)
def scan_metadata(self) -> Any:
stdout = self.scrubber_cli(["scan-metadata", "--json"], timeout=30)
try:
return json.loads(stdout)
except:
log.error("Failed to decode JSON output from `scan-metadata`. Dumping stdout:")
log.error(stdout)
raise
def get_test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Path: def get_test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Path:

View File

@@ -35,7 +35,6 @@ def subprocess_capture(
echo_stderr=False, echo_stderr=False,
echo_stdout=False, echo_stdout=False,
capture_stdout=False, capture_stdout=False,
timeout=None,
**kwargs: Any, **kwargs: Any,
) -> Tuple[str, Optional[str], int]: ) -> Tuple[str, Optional[str], int]:
"""Run a process and bifurcate its output to files and the `log` logger """Run a process and bifurcate its output to files and the `log` logger
@@ -105,7 +104,7 @@ def subprocess_capture(
stderr_handler = OutputHandler(p.stderr, stderr_f, echo=echo_stderr, capture=False) stderr_handler = OutputHandler(p.stderr, stderr_f, echo=echo_stderr, capture=False)
stderr_handler.start() stderr_handler.start()
r = p.wait(timeout=timeout) r = p.wait()
stdout_handler.join() stdout_handler.join()
stderr_handler.join() stderr_handler.join()

View File

@@ -72,7 +72,7 @@ class DdlForwardingContext:
self.dbs: Dict[str, str] = {} self.dbs: Dict[str, str] = {}
self.roles: Dict[str, str] = {} self.roles: Dict[str, str] = {}
self.fail = False self.fail = False
endpoint = "/test/roles_and_databases" endpoint = "/management/api/v2/roles_and_databases"
ddl_url = f"http://{host}:{port}{endpoint}" ddl_url = f"http://{host}:{port}{endpoint}"
self.pg.configure( self.pg.configure(
[ [

View File

@@ -1,14 +1,11 @@
import time import time
import pytest
from fixtures.log_helper import log from fixtures.log_helper import log
from fixtures.neon_fixtures import ( from fixtures.neon_fixtures import (
NeonEnv, NeonEnv,
logical_replication_sync, logical_replication_sync,
wait_for_last_flush_lsn, wait_for_last_flush_lsn,
) )
from fixtures.types import Lsn
from fixtures.utils import query_scalar
def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg): def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
@@ -150,89 +147,3 @@ COMMIT;
endpoint.start() endpoint.start()
# it must be gone (but walproposer slot still exists, hence 1) # it must be gone (but walproposer slot still exists, hence 1)
assert endpoint.safe_psql("select count(*) from pg_replication_slots")[0][0] == 1 assert endpoint.safe_psql("select count(*) from pg_replication_slots")[0][0] == 1
# Test compute start at LSN page of which starts with contrecord
# https://github.com/neondatabase/neon/issues/5749
def test_wal_page_boundary_start(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
env.neon_cli.create_branch("init")
endpoint = env.endpoints.create_start("init")
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
cur = endpoint.connect().cursor()
cur.execute("create table t(key int, value text)")
cur.execute("CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int);")
cur.execute("insert into replication_example values (1, 2)")
cur.execute("create publication pub1 for table replication_example")
# now start subscriber
vanilla_pg.start()
vanilla_pg.safe_psql("create table t(pk integer primary key, value text)")
vanilla_pg.safe_psql("CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int);")
log.info(f"ep connstr is {endpoint.connstr()}, subscriber connstr {vanilla_pg.connstr()}")
connstr = endpoint.connstr().replace("'", "''")
vanilla_pg.safe_psql(f"create subscription sub1 connection '{connstr}' publication pub1")
logical_replication_sync(vanilla_pg, endpoint)
vanilla_pg.stop()
with endpoint.cursor() as cur:
# measure how much space logical message takes. Sometimes first attempt
# creates huge message and then it stabilizes, have no idea why.
for _ in range(3):
lsn_before = Lsn(query_scalar(cur, "select pg_current_wal_lsn()"))
log.info(f"current_lsn={lsn_before}")
# Non-transactional logical message doesn't write WAL, only XLogInsert's
# it, so use transactional. Which is a bit problematic as transactional
# necessitates commit record. Alternatively we can do smth like
# select neon_xlogflush(pg_current_wal_insert_lsn());
# but isn't much better + that particular call complains on 'xlog flush
# request 0/282C018 is not satisfied' as pg_current_wal_insert_lsn skips
# page headers.
payload = "blahblah"
cur.execute(f"select pg_logical_emit_message(true, 'pref', '{payload}')")
lsn_after_by_curr_wal_lsn = Lsn(query_scalar(cur, "select pg_current_wal_lsn()"))
lsn_diff = lsn_after_by_curr_wal_lsn - lsn_before
logical_message_base = lsn_after_by_curr_wal_lsn - lsn_before - len(payload)
log.info(
f"before {lsn_before}, after {lsn_after_by_curr_wal_lsn}, lsn diff is {lsn_diff}, base {logical_message_base}"
)
# and write logical message spanning exactly as we want
lsn_before = Lsn(query_scalar(cur, "select pg_current_wal_lsn()"))
log.info(f"current_lsn={lsn_before}")
curr_lsn = Lsn(query_scalar(cur, "select pg_current_wal_lsn()"))
offs = int(curr_lsn) % 8192
till_page = 8192 - offs
payload_len = (
till_page - logical_message_base - 8
) # not sure why 8 is here, it is deduced from experiments
log.info(f"current_lsn={curr_lsn}, offs {offs}, till_page {till_page}")
# payload_len above would go exactly till the page boundary; but we want contrecord, so make it slightly longer
payload_len += 8
cur.execute(f"select pg_logical_emit_message(true, 'pref', 'f{'a' * payload_len}')")
supposedly_contrecord_end = Lsn(query_scalar(cur, "select pg_current_wal_lsn()"))
log.info(f"supposedly_page_boundary={supposedly_contrecord_end}")
# The calculations to hit the page boundary are very fuzzy, so just
# ignore test if we fail to reach it.
if not (int(supposedly_contrecord_end) % 8192 == 32):
pytest.skip("missed page boundary, bad luck")
cur.execute("insert into replication_example values (2, 3)")
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
endpoint.stop().start()
cur = endpoint.connect().cursor()
# this should flush current wal page
cur.execute("insert into replication_example values (3, 4)")
vanilla_pg.start()
logical_replication_sync(vanilla_pg, endpoint)
assert vanilla_pg.safe_psql(
"select sum(somedata) from replication_example"
) == endpoint.safe_psql("select sum(somedata) from replication_example")

View File

@@ -21,7 +21,6 @@ from fixtures.neon_fixtures import (
NeonEnv, NeonEnv,
NeonEnvBuilder, NeonEnvBuilder,
PgBin, PgBin,
S3Scrubber,
last_flush_lsn_upload, last_flush_lsn_upload,
wait_for_last_flush_lsn, wait_for_last_flush_lsn,
) )
@@ -235,22 +234,8 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
assert len(suffixed_objects) > 0 assert len(suffixed_objects) > 0
assert len(legacy_objects) > 0 assert len(legacy_objects) > 0
# Flush through deletions to get a clean state for scrub: we are implicitly validating
# that our generations-enabled pageserver was able to do deletions of layers
# from earlier which don't have a generation.
env.pageserver.http_client().deletion_queue_flush(execute=True)
assert get_deletion_queue_unexpected_errors(env.pageserver.http_client()) == 0 assert get_deletion_queue_unexpected_errors(env.pageserver.http_client()) == 0
# Having written a mixture of generation-aware and legacy index_part.json,
# ensure the scrubber handles the situation as expected.
metadata_summary = S3Scrubber(
neon_env_builder.test_output_dir, neon_env_builder
).scan_metadata()
assert metadata_summary["count"] == 1 # Scrubber should have seen our timeline
assert not metadata_summary["with_errors"]
assert not metadata_summary["with_warnings"]
def test_deferred_deletion(neon_env_builder: NeonEnvBuilder): def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
neon_env_builder.enable_generations = True neon_env_builder.enable_generations = True

View File

@@ -432,47 +432,3 @@ def test_sql_over_http_pool_idle(static_proxy: NeonProxy):
query(200, "BEGIN") query(200, "BEGIN")
pid2 = query(200, GET_CONNECTION_PID_QUERY)["rows"][0]["pid"] pid2 = query(200, GET_CONNECTION_PID_QUERY)["rows"][0]["pid"]
assert pid1 != pid2 assert pid1 != pid2
@pytest.mark.timeout(60)
def test_sql_over_http_pool_dos(static_proxy: NeonProxy):
static_proxy.safe_psql("create user http_auth with password 'http' superuser")
static_proxy.safe_psql("CREATE TYPE foo AS ENUM ('foo')")
def query(status: int, query: str) -> Any:
return static_proxy.http_query(
query,
[],
user="http_auth",
password="http",
expected_code=status,
)
# query generates a million rows - should hit the 10MB reponse limit quickly
response = query(
400,
"select * from generate_series(1, 5000) a cross join generate_series(1, 5000) b cross join (select 'foo'::foo) c;",
)
assert "response is too large (max is 10485760 bytes)" in response["message"]
def test_sql_over_http_pool_custom_types(static_proxy: NeonProxy):
static_proxy.safe_psql("create user http_auth with password 'http' superuser")
static_proxy.safe_psql("CREATE TYPE foo AS ENUM ('foo','bar','baz')")
def query(status: int, query: str) -> Any:
return static_proxy.http_query(
query,
[],
user="http_auth",
password="http",
expected_code=status,
)
response = query(
200,
"select array['foo'::foo, 'bar'::foo, 'baz'::foo] as data",
)
assert response["rows"][0]["data"] == ["foo", "bar", "baz"]

View File

@@ -1,5 +1,5 @@
{ {
"postgres-v16": "763000f1d0873b827829c41f2f6f799ffc0de55c", "postgres-v16": "825a713a4fc833a0924bf22ad34681de97c155a0",
"postgres-v15": "bc88f539312fcc4bb292ce94ae9db09ab6656e8a", "postgres-v15": "e45d092e4d2b09057671190a1c9d9292293cd6b7",
"postgres-v14": "dd067cf656f6810a25aca6025633d32d02c5085a" "postgres-v14": "1224330eee058ed6013840e2a6dace5af82150ac"
} }