mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-28 00:23:00 +00:00
Update storage components to edition 2024 (#10919)
Updates storage components to edition 2024. We like to stay on the latest edition if possible. There is no functional changes, however some code changes had to be done to accommodate the edition's breaking changes. The PR has two commits: * the first commit updates storage crates to edition 2024 and appeases `cargo clippy` by changing code. i have accidentially ran the formatter on some files that had other edits. * the second commit performs a `cargo fmt` I would recommend a closer review of the first commit and a less close review of the second one (as it just runs `cargo fmt`). part of https://github.com/neondatabase/neon/issues/10918
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "pageserver_api"
|
name = "pageserver_api"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition.workspace = true
|
edition = "2024"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
|||||||
@@ -9,19 +9,18 @@ pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN
|
|||||||
pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
|
pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
|
||||||
pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
|
pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::num::{NonZeroU64, NonZeroUsize};
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use postgres_backend::AuthType;
|
use postgres_backend::AuthType;
|
||||||
use remote_storage::RemoteStorageConfig;
|
use remote_storage::RemoteStorageConfig;
|
||||||
use serde_with::serde_as;
|
use serde_with::serde_as;
|
||||||
use std::{
|
use utils::logging::LogFormat;
|
||||||
collections::HashMap,
|
use utils::postgres_client::PostgresClientProtocol;
|
||||||
num::{NonZeroU64, NonZeroUsize},
|
|
||||||
str::FromStr,
|
|
||||||
time::Duration,
|
|
||||||
};
|
|
||||||
use utils::{logging::LogFormat, postgres_client::PostgresClientProtocol};
|
|
||||||
|
|
||||||
use crate::models::ImageCompressionAlgorithm;
|
use crate::models::{ImageCompressionAlgorithm, LsnLease};
|
||||||
use crate::models::LsnLease;
|
|
||||||
|
|
||||||
// Certain metadata (e.g. externally-addressable name, AZ) is delivered
|
// Certain metadata (e.g. externally-addressable name, AZ) is delivered
|
||||||
// as a separate structure. This information is not neeed by the pageserver
|
// as a separate structure. This information is not neeed by the pageserver
|
||||||
@@ -367,10 +366,10 @@ pub struct TenantConfigToml {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub mod defaults {
|
pub mod defaults {
|
||||||
use crate::models::ImageCompressionAlgorithm;
|
|
||||||
|
|
||||||
pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
|
pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
|
||||||
|
|
||||||
|
use crate::models::ImageCompressionAlgorithm;
|
||||||
|
|
||||||
pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
|
pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
|
||||||
pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
|
pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
|
||||||
|
|
||||||
|
|||||||
@@ -9,11 +9,8 @@ use std::time::{Duration, Instant};
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use utils::id::{NodeId, TenantId};
|
use utils::id::{NodeId, TenantId};
|
||||||
|
|
||||||
use crate::models::PageserverUtilization;
|
use crate::models::{PageserverUtilization, ShardParameters, TenantConfig};
|
||||||
use crate::{
|
use crate::shard::{ShardStripeSize, TenantShardId};
|
||||||
models::{ShardParameters, TenantConfig},
|
|
||||||
shard::{ShardStripeSize, TenantShardId},
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
@@ -354,7 +351,7 @@ impl FromStr for SkSchedulingPolicy {
|
|||||||
_ => {
|
_ => {
|
||||||
return Err(anyhow::anyhow!(
|
return Err(anyhow::anyhow!(
|
||||||
"Unknown scheduling policy '{s}', try active,pause,decomissioned"
|
"Unknown scheduling policy '{s}', try active,pause,decomissioned"
|
||||||
))
|
));
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -457,9 +454,10 @@ pub struct SafekeeperSchedulingPolicyRequest {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
|
||||||
use serde_json;
|
use serde_json;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
/// Check stability of PlacementPolicy's serialization
|
/// Check stability of PlacementPolicy's serialization
|
||||||
#[test]
|
#[test]
|
||||||
fn placement_policy_encoding() -> anyhow::Result<()> {
|
fn placement_policy_encoding() -> anyhow::Result<()> {
|
||||||
|
|||||||
@@ -1,11 +1,12 @@
|
|||||||
use anyhow::{bail, Result};
|
use std::fmt;
|
||||||
use byteorder::{ByteOrder, BE};
|
use std::ops::Range;
|
||||||
|
|
||||||
|
use anyhow::{Result, bail};
|
||||||
|
use byteorder::{BE, ByteOrder};
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||||
use postgres_ffi::Oid;
|
use postgres_ffi::{Oid, RepOriginId};
|
||||||
use postgres_ffi::RepOriginId;
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::{fmt, ops::Range};
|
|
||||||
use utils::const_assert;
|
use utils::const_assert;
|
||||||
|
|
||||||
use crate::reltag::{BlockNumber, RelTag, SlruKind};
|
use crate::reltag::{BlockNumber, RelTag, SlruKind};
|
||||||
@@ -954,25 +955,22 @@ impl std::str::FromStr for Key {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use crate::key::is_metadata_key_slice;
|
use rand::{Rng, SeedableRng};
|
||||||
use crate::key::Key;
|
|
||||||
|
|
||||||
use rand::Rng;
|
|
||||||
use rand::SeedableRng;
|
|
||||||
|
|
||||||
use super::AUX_KEY_PREFIX;
|
use super::AUX_KEY_PREFIX;
|
||||||
|
use crate::key::{Key, is_metadata_key_slice};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn display_fromstr_bijection() {
|
fn display_fromstr_bijection() {
|
||||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||||
|
|
||||||
let key = Key {
|
let key = Key {
|
||||||
field1: rng.gen(),
|
field1: rng.r#gen(),
|
||||||
field2: rng.gen(),
|
field2: rng.r#gen(),
|
||||||
field3: rng.gen(),
|
field3: rng.r#gen(),
|
||||||
field4: rng.gen(),
|
field4: rng.r#gen(),
|
||||||
field5: rng.gen(),
|
field5: rng.r#gen(),
|
||||||
field6: rng.gen(),
|
field6: rng.r#gen(),
|
||||||
};
|
};
|
||||||
|
|
||||||
assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
|
assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
use postgres_ffi::BLCKSZ;
|
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
|
|
||||||
use crate::{
|
|
||||||
key::Key,
|
|
||||||
shard::{ShardCount, ShardIdentity},
|
|
||||||
};
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
use postgres_ffi::BLCKSZ;
|
||||||
|
|
||||||
|
use crate::key::Key;
|
||||||
|
use crate::shard::{ShardCount, ShardIdentity};
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Represents a set of Keys, in a compact form.
|
/// Represents a set of Keys, in a compact form.
|
||||||
@@ -609,15 +608,13 @@ pub fn singleton_range(key: Key) -> Range<Key> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use std::fmt::Write;
|
||||||
|
|
||||||
use rand::{RngCore, SeedableRng};
|
use rand::{RngCore, SeedableRng};
|
||||||
|
|
||||||
use crate::{
|
|
||||||
models::ShardParameters,
|
|
||||||
shard::{ShardCount, ShardNumber},
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use std::fmt::Write;
|
use crate::models::ShardParameters;
|
||||||
|
use crate::shard::{ShardCount, ShardNumber};
|
||||||
|
|
||||||
// Helper function to create a key range.
|
// Helper function to create a key range.
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -2,38 +2,30 @@ pub mod detach_ancestor;
|
|||||||
pub mod partitioning;
|
pub mod partitioning;
|
||||||
pub mod utilization;
|
pub mod utilization;
|
||||||
|
|
||||||
#[cfg(feature = "testing")]
|
|
||||||
use camino::Utf8PathBuf;
|
|
||||||
pub use utilization::PageserverUtilization;
|
|
||||||
|
|
||||||
use core::ops::Range;
|
use core::ops::Range;
|
||||||
use std::{
|
use std::collections::HashMap;
|
||||||
collections::HashMap,
|
use std::fmt::Display;
|
||||||
fmt::Display,
|
use std::io::{BufRead, Read};
|
||||||
io::{BufRead, Read},
|
use std::num::{NonZeroU32, NonZeroU64, NonZeroUsize};
|
||||||
num::{NonZeroU32, NonZeroU64, NonZeroUsize},
|
use std::str::FromStr;
|
||||||
str::FromStr,
|
use std::time::{Duration, SystemTime};
|
||||||
time::{Duration, SystemTime},
|
|
||||||
};
|
|
||||||
|
|
||||||
use byteorder::{BigEndian, ReadBytesExt};
|
use byteorder::{BigEndian, ReadBytesExt};
|
||||||
|
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||||
|
#[cfg(feature = "testing")]
|
||||||
|
use camino::Utf8PathBuf;
|
||||||
use postgres_ffi::BLCKSZ;
|
use postgres_ffi::BLCKSZ;
|
||||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
use serde_with::serde_as;
|
use serde_with::serde_as;
|
||||||
use utils::{
|
pub use utilization::PageserverUtilization;
|
||||||
completion,
|
use utils::id::{NodeId, TenantId, TimelineId};
|
||||||
id::{NodeId, TenantId, TimelineId},
|
use utils::lsn::Lsn;
|
||||||
lsn::Lsn,
|
use utils::postgres_client::PostgresClientProtocol;
|
||||||
postgres_client::PostgresClientProtocol,
|
use utils::{completion, serde_system_time};
|
||||||
serde_system_time,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{
|
use crate::key::{CompactKey, Key};
|
||||||
key::{CompactKey, Key},
|
use crate::reltag::RelTag;
|
||||||
reltag::RelTag,
|
use crate::shard::{ShardCount, ShardStripeSize, TenantShardId};
|
||||||
shard::{ShardCount, ShardStripeSize, TenantShardId},
|
|
||||||
};
|
|
||||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
|
||||||
|
|
||||||
/// The state of a tenant in this pageserver.
|
/// The state of a tenant in this pageserver.
|
||||||
///
|
///
|
||||||
@@ -332,7 +324,8 @@ pub struct ImportPgdataIdempotencyKey(pub String);
|
|||||||
|
|
||||||
impl ImportPgdataIdempotencyKey {
|
impl ImportPgdataIdempotencyKey {
|
||||||
pub fn random() -> Self {
|
pub fn random() -> Self {
|
||||||
use rand::{distributions::Alphanumeric, Rng};
|
use rand::Rng;
|
||||||
|
use rand::distributions::Alphanumeric;
|
||||||
Self(
|
Self(
|
||||||
rand::thread_rng()
|
rand::thread_rng()
|
||||||
.sample_iter(&Alphanumeric)
|
.sample_iter(&Alphanumeric)
|
||||||
@@ -2288,9 +2281,10 @@ impl Default for PageTraceEvent {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use serde_json::json;
|
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
use std::time::SystemTime;
|
use std::time::SystemTime;
|
||||||
use utils::{serde_percent::Percent, serde_system_time};
|
|
||||||
|
use utils::serde_percent::Percent;
|
||||||
|
use utils::serde_system_time;
|
||||||
|
|
||||||
/// Pageserver current utilization and scoring for how good candidate the pageserver would be for
|
/// Pageserver current utilization and scoring for how good candidate the pageserver would be for
|
||||||
/// the next tenant.
|
/// the next tenant.
|
||||||
@@ -131,12 +133,12 @@ impl PageserverUtilization {
|
|||||||
|
|
||||||
/// Test helper
|
/// Test helper
|
||||||
pub mod test_utilization {
|
pub mod test_utilization {
|
||||||
use super::PageserverUtilization;
|
|
||||||
use std::time::SystemTime;
|
use std::time::SystemTime;
|
||||||
use utils::{
|
|
||||||
serde_percent::Percent,
|
use utils::serde_percent::Percent;
|
||||||
serde_system_time::{self},
|
use utils::serde_system_time::{self};
|
||||||
};
|
|
||||||
|
use super::PageserverUtilization;
|
||||||
|
|
||||||
// Parameters of the imaginary node used for test utilization instances
|
// Parameters of the imaginary node used for test utilization instances
|
||||||
const TEST_DISK_SIZE: u64 = 1024 * 1024 * 1024 * 1024;
|
const TEST_DISK_SIZE: u64 = 1024 * 1024 * 1024 * 1024;
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
//! This module defines the WAL record format used within the pageserver.
|
//! This module defines the WAL record format used within the pageserver.
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use postgres_ffi::walrecord::{describe_postgres_wal_record, MultiXactMember};
|
use postgres_ffi::walrecord::{MultiXactMember, describe_postgres_wal_record};
|
||||||
use postgres_ffi::{MultiXactId, MultiXactOffset, TimestampTz, TransactionId};
|
use postgres_ffi::{MultiXactId, MultiXactOffset, TimestampTz, TransactionId};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use utils::bin_ser::DeserializeError;
|
use utils::bin_ser::DeserializeError;
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
|
|
||||||
use postgres_ffi::relfile_utils::{forkname_to_number, forknumber_to_name, MAIN_FORKNUM};
|
|
||||||
use postgres_ffi::Oid;
|
use postgres_ffi::Oid;
|
||||||
|
use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
|
||||||
|
use postgres_ffi::relfile_utils::{MAIN_FORKNUM, forkname_to_number, forknumber_to_name};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Relation data file segment id throughout the Postgres cluster.
|
/// Relation data file segment id throughout the Postgres cluster.
|
||||||
|
|||||||
@@ -33,12 +33,13 @@
|
|||||||
|
|
||||||
use std::hash::{Hash, Hasher};
|
use std::hash::{Hash, Hasher};
|
||||||
|
|
||||||
use crate::{key::Key, models::ShardParameters};
|
#[doc(inline)]
|
||||||
|
pub use ::utils::shard::*;
|
||||||
use postgres_ffi::relfile_utils::INIT_FORKNUM;
|
use postgres_ffi::relfile_utils::INIT_FORKNUM;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
#[doc(inline)]
|
use crate::key::Key;
|
||||||
pub use ::utils::shard::*;
|
use crate::models::ShardParameters;
|
||||||
|
|
||||||
/// The ShardIdentity contains enough information to map a [`Key`] to a [`ShardNumber`],
|
/// The ShardIdentity contains enough information to map a [`Key`] to a [`ShardNumber`],
|
||||||
/// and to check whether that [`ShardNumber`] is the same as the current shard.
|
/// and to check whether that [`ShardNumber`] is the same as the current shard.
|
||||||
@@ -337,7 +338,8 @@ pub fn describe(
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use utils::{id::TenantId, Hex};
|
use utils::Hex;
|
||||||
|
use utils::id::TenantId;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
|||||||
@@ -6,9 +6,9 @@
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use utils::id::NodeId;
|
use utils::id::NodeId;
|
||||||
|
|
||||||
use crate::{
|
use crate::controller_api::NodeRegisterRequest;
|
||||||
controller_api::NodeRegisterRequest, models::LocationConfigMode, shard::TenantShardId,
|
use crate::models::LocationConfigMode;
|
||||||
};
|
use crate::shard::TenantShardId;
|
||||||
|
|
||||||
/// Upcall message sent by the pageserver to the configured `control_plane_api` on
|
/// Upcall message sent by the pageserver to the configured `control_plane_api` on
|
||||||
/// startup.
|
/// startup.
|
||||||
@@ -30,7 +30,7 @@ fn default_mode() -> LocationConfigMode {
|
|||||||
pub struct ReAttachResponseTenant {
|
pub struct ReAttachResponseTenant {
|
||||||
pub id: TenantShardId,
|
pub id: TenantShardId,
|
||||||
/// Mandatory if LocationConfigMode is None or set to an Attached* mode
|
/// Mandatory if LocationConfigMode is None or set to an Attached* mode
|
||||||
pub gen: Option<u32>,
|
pub r#gen: Option<u32>,
|
||||||
|
|
||||||
/// Default value only for backward compat: this field should be set
|
/// Default value only for backward compat: this field should be set
|
||||||
#[serde(default = "default_mode")]
|
#[serde(default = "default_mode")]
|
||||||
@@ -44,7 +44,7 @@ pub struct ReAttachResponse {
|
|||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
pub struct ValidateRequestTenant {
|
pub struct ValidateRequestTenant {
|
||||||
pub id: TenantShardId,
|
pub id: TenantShardId,
|
||||||
pub gen: u32,
|
pub r#gen: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
|
|||||||
@@ -7,10 +7,11 @@
|
|||||||
//! Note that the [`Value`] type is used for the permananent storage format, so any
|
//! Note that the [`Value`] type is used for the permananent storage format, so any
|
||||||
//! changes to it must be backwards compatible.
|
//! changes to it must be backwards compatible.
|
||||||
|
|
||||||
use crate::record::NeonWalRecord;
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::record::NeonWalRecord;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
pub enum Value {
|
pub enum Value {
|
||||||
/// An Image value contains a full copy of the value
|
/// An Image value contains a full copy of the value
|
||||||
@@ -83,11 +84,11 @@ impl ValueBytes {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use utils::bin_ser::BeSer;
|
use utils::bin_ser::BeSer;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
macro_rules! roundtrip {
|
macro_rules! roundtrip {
|
||||||
($orig:expr, $expected:expr) => {{
|
($orig:expr, $expected:expr) => {{
|
||||||
let orig: Value = $orig;
|
let orig: Value = $orig;
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "remote_storage"
|
name = "remote_storage"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition.workspace = true
|
edition = "2024"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@@ -2,33 +2,26 @@
|
|||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::env;
|
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
use std::io;
|
|
||||||
use std::num::NonZeroU32;
|
use std::num::NonZeroU32;
|
||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::{Duration, SystemTime};
|
||||||
use std::time::SystemTime;
|
use std::{env, io};
|
||||||
|
|
||||||
use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
|
use anyhow::{Context, Result};
|
||||||
use anyhow::Context;
|
|
||||||
use anyhow::Result;
|
|
||||||
use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
|
use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
|
||||||
use azure_core::HttpClient;
|
use azure_core::{Continuable, HttpClient, RetryOptions, TransportOptions};
|
||||||
use azure_core::TransportOptions;
|
|
||||||
use azure_core::{Continuable, RetryOptions};
|
|
||||||
use azure_storage::StorageCredentials;
|
use azure_storage::StorageCredentials;
|
||||||
use azure_storage_blobs::blob::CopyStatus;
|
use azure_storage_blobs::blob::CopyStatus;
|
||||||
use azure_storage_blobs::prelude::ClientBuilder;
|
use azure_storage_blobs::blob::operations::GetBlobBuilder;
|
||||||
use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerClient};
|
use azure_storage_blobs::prelude::{ClientBuilder, ContainerClient};
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
|
use futures::FutureExt;
|
||||||
use futures::future::Either;
|
use futures::future::Either;
|
||||||
use futures::stream::Stream;
|
use futures::stream::Stream;
|
||||||
use futures::FutureExt;
|
use futures_util::{StreamExt, TryStreamExt};
|
||||||
use futures_util::StreamExt;
|
|
||||||
use futures_util::TryStreamExt;
|
|
||||||
use http_types::{StatusCode, Url};
|
use http_types::{StatusCode, Url};
|
||||||
use scopeguard::ScopeGuard;
|
use scopeguard::ScopeGuard;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
@@ -36,12 +29,13 @@ use tracing::debug;
|
|||||||
use utils::backoff;
|
use utils::backoff;
|
||||||
use utils::backoff::exponential_backoff_duration_seconds;
|
use utils::backoff::exponential_backoff_duration_seconds;
|
||||||
|
|
||||||
use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
|
use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
|
||||||
use crate::DownloadKind;
|
use crate::config::AzureConfig;
|
||||||
|
use crate::error::Cancelled;
|
||||||
|
use crate::metrics::{AttemptOutcome, RequestKind, start_measuring_requests};
|
||||||
use crate::{
|
use crate::{
|
||||||
config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError,
|
ConcurrencyLimiter, Download, DownloadError, DownloadKind, DownloadOpts, Listing, ListingMode,
|
||||||
DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, RemoteStorage, StorageMetadata,
|
ListingObject, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,
|
||||||
TimeTravelError, TimeoutOrCancel,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct AzureBlobStorage {
|
pub struct AzureBlobStorage {
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
use std::{fmt::Debug, num::NonZeroUsize, str::FromStr, time::Duration};
|
use std::fmt::Debug;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use aws_sdk_s3::types::StorageClass;
|
use aws_sdk_s3::types::StorageClass;
|
||||||
use camino::Utf8PathBuf;
|
use camino::Utf8PathBuf;
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
|||||||
@@ -18,40 +18,35 @@ mod s3_bucket;
|
|||||||
mod simulate_failures;
|
mod simulate_failures;
|
||||||
mod support;
|
mod support;
|
||||||
|
|
||||||
use std::{
|
use std::collections::HashMap;
|
||||||
collections::HashMap,
|
use std::fmt::Debug;
|
||||||
fmt::Debug,
|
use std::num::NonZeroU32;
|
||||||
num::NonZeroU32,
|
use std::ops::Bound;
|
||||||
ops::Bound,
|
use std::pin::{Pin, pin};
|
||||||
pin::{pin, Pin},
|
use std::sync::Arc;
|
||||||
sync::Arc,
|
use std::time::SystemTime;
|
||||||
time::SystemTime,
|
|
||||||
};
|
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use camino::{Utf8Path, Utf8PathBuf};
|
/// Azure SDK's ETag type is a simple String wrapper: we use this internally instead of repeating it here.
|
||||||
|
pub use azure_core::Etag;
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use futures::{stream::Stream, StreamExt};
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
|
pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
|
||||||
|
use futures::StreamExt;
|
||||||
|
use futures::stream::Stream;
|
||||||
use itertools::Itertools as _;
|
use itertools::Itertools as _;
|
||||||
|
use s3_bucket::RequestKind;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tokio::sync::Semaphore;
|
use tokio::sync::Semaphore;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
pub use self::{
|
pub use self::azure_blob::AzureBlobStorage;
|
||||||
azure_blob::AzureBlobStorage, local_fs::LocalFs, s3_bucket::S3Bucket,
|
pub use self::local_fs::LocalFs;
|
||||||
simulate_failures::UnreliableWrapper,
|
pub use self::s3_bucket::S3Bucket;
|
||||||
};
|
pub use self::simulate_failures::UnreliableWrapper;
|
||||||
use s3_bucket::RequestKind;
|
|
||||||
|
|
||||||
pub use crate::config::{AzureConfig, RemoteStorageConfig, RemoteStorageKind, S3Config};
|
pub use crate::config::{AzureConfig, RemoteStorageConfig, RemoteStorageKind, S3Config};
|
||||||
|
|
||||||
/// Azure SDK's ETag type is a simple String wrapper: we use this internally instead of repeating it here.
|
|
||||||
pub use azure_core::Etag;
|
|
||||||
|
|
||||||
pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
|
|
||||||
|
|
||||||
/// Default concurrency limit for S3 operations
|
/// Default concurrency limit for S3 operations
|
||||||
///
|
///
|
||||||
/// Currently, sync happens with AWS S3, that has two limits on requests per second:
|
/// Currently, sync happens with AWS S3, that has two limits on requests per second:
|
||||||
@@ -640,8 +635,13 @@ impl GenericRemoteStorage {
|
|||||||
let profile = std::env::var("AWS_PROFILE").unwrap_or_else(|_| "<none>".into());
|
let profile = std::env::var("AWS_PROFILE").unwrap_or_else(|_| "<none>".into());
|
||||||
let access_key_id =
|
let access_key_id =
|
||||||
std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "<none>".into());
|
std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "<none>".into());
|
||||||
info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
|
info!(
|
||||||
s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
|
"Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
|
||||||
|
s3_config.bucket_name,
|
||||||
|
s3_config.bucket_region,
|
||||||
|
s3_config.prefix_in_bucket,
|
||||||
|
s3_config.endpoint
|
||||||
|
);
|
||||||
Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout).await?))
|
Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout).await?))
|
||||||
}
|
}
|
||||||
RemoteStorageKind::AzureContainer(azure_config) => {
|
RemoteStorageKind::AzureContainer(azure_config) => {
|
||||||
@@ -649,8 +649,12 @@ impl GenericRemoteStorage {
|
|||||||
.storage_account
|
.storage_account
|
||||||
.as_deref()
|
.as_deref()
|
||||||
.unwrap_or("<AZURE_STORAGE_ACCOUNT>");
|
.unwrap_or("<AZURE_STORAGE_ACCOUNT>");
|
||||||
info!("Using azure container '{}' in account '{storage_account}' in region '{}' as a remote storage, prefix in container: '{:?}'",
|
info!(
|
||||||
azure_config.container_name, azure_config.container_region, azure_config.prefix_in_container);
|
"Using azure container '{}' in account '{storage_account}' in region '{}' as a remote storage, prefix in container: '{:?}'",
|
||||||
|
azure_config.container_name,
|
||||||
|
azure_config.container_region,
|
||||||
|
azure_config.prefix_in_container
|
||||||
|
);
|
||||||
Self::AzureBlob(Arc::new(AzureBlobStorage::new(
|
Self::AzureBlob(Arc::new(AzureBlobStorage::new(
|
||||||
azure_config,
|
azure_config,
|
||||||
timeout,
|
timeout,
|
||||||
|
|||||||
@@ -4,31 +4,26 @@
|
|||||||
//! This storage used in tests, but can also be used in cases when a certain persistent
|
//! This storage used in tests, but can also be used in cases when a certain persistent
|
||||||
//! volume is mounted to the local FS.
|
//! volume is mounted to the local FS.
|
||||||
|
|
||||||
use std::{
|
use std::collections::HashSet;
|
||||||
collections::HashSet,
|
use std::io::ErrorKind;
|
||||||
io::ErrorKind,
|
use std::num::NonZeroU32;
|
||||||
num::NonZeroU32,
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||||
time::{Duration, SystemTime, UNIX_EPOCH},
|
|
||||||
};
|
|
||||||
|
|
||||||
use anyhow::{bail, ensure, Context};
|
use anyhow::{Context, bail, ensure};
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use camino::{Utf8Path, Utf8PathBuf};
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
use futures::stream::Stream;
|
use futures::stream::Stream;
|
||||||
use tokio::{
|
use tokio::fs;
|
||||||
fs,
|
use tokio::io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt};
|
||||||
io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
|
use tokio_util::io::ReaderStream;
|
||||||
};
|
use tokio_util::sync::CancellationToken;
|
||||||
use tokio_util::{io::ReaderStream, sync::CancellationToken};
|
|
||||||
use utils::crashsafe::path_with_suffix_extension;
|
use utils::crashsafe::path_with_suffix_extension;
|
||||||
|
|
||||||
use crate::{
|
|
||||||
Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath,
|
|
||||||
TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::{RemoteStorage, StorageMetadata};
|
use super::{RemoteStorage, StorageMetadata};
|
||||||
use crate::Etag;
|
use crate::{
|
||||||
|
Download, DownloadError, DownloadOpts, Etag, Listing, ListingMode, ListingObject,
|
||||||
|
REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, TimeTravelError, TimeoutOrCancel,
|
||||||
|
};
|
||||||
|
|
||||||
const LOCAL_FS_TEMP_FILE_SUFFIX: &str = "___temp";
|
const LOCAL_FS_TEMP_FILE_SUFFIX: &str = "___temp";
|
||||||
|
|
||||||
@@ -91,7 +86,8 @@ impl LocalFs {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
async fn list_all(&self) -> anyhow::Result<Vec<RemotePath>> {
|
async fn list_all(&self) -> anyhow::Result<Vec<RemotePath>> {
|
||||||
use std::{future::Future, pin::Pin};
|
use std::future::Future;
|
||||||
|
use std::pin::Pin;
|
||||||
fn get_all_files<'a, P>(
|
fn get_all_files<'a, P>(
|
||||||
directory_path: P,
|
directory_path: P,
|
||||||
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<Utf8PathBuf>>> + Send + Sync + 'a>>
|
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<Utf8PathBuf>>> + Send + Sync + 'a>>
|
||||||
@@ -284,7 +280,9 @@ impl LocalFs {
|
|||||||
})?;
|
})?;
|
||||||
|
|
||||||
if bytes_read < from_size_bytes {
|
if bytes_read < from_size_bytes {
|
||||||
bail!("Provided stream was shorter than expected: {bytes_read} vs {from_size_bytes} bytes");
|
bail!(
|
||||||
|
"Provided stream was shorter than expected: {bytes_read} vs {from_size_bytes} bytes"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
// Check if there is any extra data after the given size.
|
// Check if there is any extra data after the given size.
|
||||||
let mut from = buffer_to_read.into_inner();
|
let mut from = buffer_to_read.into_inner();
|
||||||
@@ -642,10 +640,13 @@ fn mock_etag(meta: &std::fs::Metadata) -> Etag {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod fs_tests {
|
mod fs_tests {
|
||||||
use super::*;
|
use std::collections::HashMap;
|
||||||
|
use std::io::Write;
|
||||||
|
use std::ops::Bound;
|
||||||
|
|
||||||
use camino_tempfile::tempdir;
|
use camino_tempfile::tempdir;
|
||||||
use std::{collections::HashMap, io::Write, ops::Bound};
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
async fn read_and_check_metadata(
|
async fn read_and_check_metadata(
|
||||||
storage: &LocalFs,
|
storage: &LocalFs,
|
||||||
@@ -736,9 +737,14 @@ mod fs_tests {
|
|||||||
);
|
);
|
||||||
|
|
||||||
let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?;
|
let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?;
|
||||||
match storage.download(&non_existing_path, &DownloadOpts::default(), &cancel).await {
|
match storage
|
||||||
|
.download(&non_existing_path, &DownloadOpts::default(), &cancel)
|
||||||
|
.await
|
||||||
|
{
|
||||||
Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
|
Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
|
||||||
other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
|
other => panic!(
|
||||||
|
"Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"
|
||||||
|
),
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use metrics::{
|
use metrics::{
|
||||||
register_histogram_vec, register_int_counter, register_int_counter_vec, Histogram, IntCounter,
|
Histogram, IntCounter, register_histogram_vec, register_int_counter, register_int_counter_vec,
|
||||||
};
|
};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
@@ -16,8 +16,8 @@ pub(crate) enum RequestKind {
|
|||||||
Head = 6,
|
Head = 6,
|
||||||
}
|
}
|
||||||
|
|
||||||
use scopeguard::ScopeGuard;
|
|
||||||
use RequestKind::*;
|
use RequestKind::*;
|
||||||
|
use scopeguard::ScopeGuard;
|
||||||
|
|
||||||
impl RequestKind {
|
impl RequestKind {
|
||||||
const fn as_str(&self) -> &'static str {
|
const fn as_str(&self) -> &'static str {
|
||||||
|
|||||||
@@ -4,56 +4,50 @@
|
|||||||
//! allowing multiple api users to independently work with the same S3 bucket, if
|
//! allowing multiple api users to independently work with the same S3 bucket, if
|
||||||
//! their bucket prefixes are both specified and different.
|
//! their bucket prefixes are both specified and different.
|
||||||
|
|
||||||
use std::{
|
use std::borrow::Cow;
|
||||||
borrow::Cow,
|
use std::collections::HashMap;
|
||||||
collections::HashMap,
|
use std::num::NonZeroU32;
|
||||||
num::NonZeroU32,
|
use std::pin::Pin;
|
||||||
pin::Pin,
|
use std::sync::Arc;
|
||||||
sync::Arc,
|
use std::task::{Context, Poll};
|
||||||
task::{Context, Poll},
|
use std::time::{Duration, SystemTime};
|
||||||
time::{Duration, SystemTime},
|
|
||||||
};
|
|
||||||
|
|
||||||
use anyhow::{anyhow, Context as _};
|
use anyhow::{Context as _, anyhow};
|
||||||
use aws_config::{
|
use aws_config::BehaviorVersion;
|
||||||
default_provider::credentials::DefaultCredentialsChain,
|
use aws_config::default_provider::credentials::DefaultCredentialsChain;
|
||||||
retry::{RetryConfigBuilder, RetryMode},
|
use aws_config::retry::{RetryConfigBuilder, RetryMode};
|
||||||
BehaviorVersion,
|
use aws_sdk_s3::Client;
|
||||||
};
|
use aws_sdk_s3::config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep};
|
||||||
use aws_sdk_s3::{
|
use aws_sdk_s3::error::SdkError;
|
||||||
config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep},
|
use aws_sdk_s3::operation::get_object::GetObjectError;
|
||||||
error::SdkError,
|
use aws_sdk_s3::operation::head_object::HeadObjectError;
|
||||||
operation::{get_object::GetObjectError, head_object::HeadObjectError},
|
use aws_sdk_s3::types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion, StorageClass};
|
||||||
types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion, StorageClass},
|
|
||||||
Client,
|
|
||||||
};
|
|
||||||
use aws_smithy_async::rt::sleep::TokioSleep;
|
use aws_smithy_async::rt::sleep::TokioSleep;
|
||||||
use http_body_util::StreamBody;
|
use aws_smithy_types::DateTime;
|
||||||
use http_types::StatusCode;
|
use aws_smithy_types::body::SdkBody;
|
||||||
|
use aws_smithy_types::byte_stream::ByteStream;
|
||||||
use aws_smithy_types::{body::SdkBody, DateTime};
|
use aws_smithy_types::date_time::ConversionError;
|
||||||
use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError};
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use futures::stream::Stream;
|
use futures::stream::Stream;
|
||||||
use futures_util::StreamExt;
|
use futures_util::StreamExt;
|
||||||
|
use http_body_util::StreamBody;
|
||||||
|
use http_types::StatusCode;
|
||||||
use hyper::body::Frame;
|
use hyper::body::Frame;
|
||||||
use scopeguard::ScopeGuard;
|
use scopeguard::ScopeGuard;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use utils::backoff;
|
use utils::backoff;
|
||||||
|
|
||||||
use super::StorageMetadata;
|
use super::StorageMetadata;
|
||||||
use crate::{
|
use crate::config::S3Config;
|
||||||
config::S3Config,
|
use crate::error::Cancelled;
|
||||||
error::Cancelled,
|
|
||||||
metrics::{start_counting_cancelled_wait, start_measuring_requests},
|
|
||||||
support::PermitCarrying,
|
|
||||||
ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
|
|
||||||
RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE_S3,
|
|
||||||
REMOTE_STORAGE_PREFIX_SEPARATOR,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::metrics::AttemptOutcome;
|
|
||||||
pub(super) use crate::metrics::RequestKind;
|
pub(super) use crate::metrics::RequestKind;
|
||||||
|
use crate::metrics::{AttemptOutcome, start_counting_cancelled_wait, start_measuring_requests};
|
||||||
|
use crate::support::PermitCarrying;
|
||||||
|
use crate::{
|
||||||
|
ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
|
||||||
|
MAX_KEYS_PER_DELETE_S3, REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, RemoteStorage,
|
||||||
|
TimeTravelError, TimeoutOrCancel,
|
||||||
|
};
|
||||||
|
|
||||||
/// AWS S3 storage.
|
/// AWS S3 storage.
|
||||||
pub struct S3Bucket {
|
pub struct S3Bucket {
|
||||||
@@ -958,8 +952,10 @@ impl RemoteStorage for S3Bucket {
|
|||||||
version_id, key, ..
|
version_id, key, ..
|
||||||
} = &vd;
|
} = &vd;
|
||||||
if version_id == "null" {
|
if version_id == "null" {
|
||||||
return Err(TimeTravelError::Other(anyhow!("Received ListVersions response for key={key} with version_id='null', \
|
return Err(TimeTravelError::Other(anyhow!(
|
||||||
indicating either disabled versioning, or legacy objects with null version id values")));
|
"Received ListVersions response for key={key} with version_id='null', \
|
||||||
|
indicating either disabled versioning, or legacy objects with null version id values"
|
||||||
|
)));
|
||||||
}
|
}
|
||||||
tracing::trace!(
|
tracing::trace!(
|
||||||
"Parsing version key={key} version_id={version_id} kind={:?}",
|
"Parsing version key={key} version_id={version_id} kind={:?}",
|
||||||
@@ -1126,9 +1122,10 @@ impl VerOrDelete {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use camino::Utf8Path;
|
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
|
use camino::Utf8Path;
|
||||||
|
|
||||||
use crate::{RemotePath, S3Bucket, S3Config};
|
use crate::{RemotePath, S3Bucket, S3Config};
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
|
|||||||
@@ -1,14 +1,15 @@
|
|||||||
//! This module provides a wrapper around a real RemoteStorage implementation that
|
//! This module provides a wrapper around a real RemoteStorage implementation that
|
||||||
//! causes the first N attempts at each upload or download operatio to fail. For
|
//! causes the first N attempts at each upload or download operatio to fail. For
|
||||||
//! testing purposes.
|
//! testing purposes.
|
||||||
use bytes::Bytes;
|
|
||||||
use futures::stream::Stream;
|
|
||||||
use futures::StreamExt;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::collections::hash_map::Entry;
|
||||||
use std::num::NonZeroU32;
|
use std::num::NonZeroU32;
|
||||||
use std::sync::Mutex;
|
use std::sync::{Arc, Mutex};
|
||||||
use std::time::SystemTime;
|
use std::time::SystemTime;
|
||||||
use std::{collections::hash_map::Entry, sync::Arc};
|
|
||||||
|
use bytes::Bytes;
|
||||||
|
use futures::StreamExt;
|
||||||
|
use futures::stream::Stream;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
|||||||
@@ -1,9 +1,7 @@
|
|||||||
use std::{
|
use std::future::Future;
|
||||||
future::Future,
|
use std::pin::Pin;
|
||||||
pin::Pin,
|
use std::task::{Context, Poll};
|
||||||
task::{Context, Poll},
|
use std::time::Duration;
|
||||||
time::Duration,
|
|
||||||
};
|
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use futures_util::Stream;
|
use futures_util::Stream;
|
||||||
@@ -114,9 +112,10 @@ pub(crate) fn cancel_or_timeout(
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use futures::stream::StreamExt;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::DownloadError;
|
use crate::DownloadError;
|
||||||
use futures::stream::StreamExt;
|
|
||||||
|
|
||||||
#[tokio::test(start_paused = true)]
|
#[tokio::test(start_paused = true)]
|
||||||
async fn cancelled_download_stream() {
|
async fn cancelled_download_stream() {
|
||||||
|
|||||||
@@ -1,19 +1,20 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
|
use std::num::NonZeroU32;
|
||||||
|
use std::ops::Bound;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use camino::Utf8Path;
|
use camino::Utf8Path;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath};
|
use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath};
|
||||||
use std::ops::Bound;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::{collections::HashSet, num::NonZeroU32};
|
|
||||||
use test_context::test_context;
|
use test_context::test_context;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::common::{download_to_vec, upload_stream, wrap_stream};
|
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
MaybeEnabledStorage, MaybeEnabledStorageWithSimpleTestBlobs, MaybeEnabledStorageWithTestBlobs,
|
MaybeEnabledStorage, MaybeEnabledStorageWithSimpleTestBlobs, MaybeEnabledStorageWithTestBlobs,
|
||||||
};
|
};
|
||||||
|
use crate::common::{download_to_vec, upload_stream, wrap_stream};
|
||||||
|
|
||||||
/// Tests that S3 client can list all prefixes, even if the response come paginated and requires multiple S3 queries.
|
/// Tests that S3 client can list all prefixes, even if the response come paginated and requires multiple S3 queries.
|
||||||
/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified.
|
/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified.
|
||||||
@@ -62,7 +63,8 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect::<HashSet<_>>();
|
.collect::<HashSet<_>>();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
root_remote_prefixes, HashSet::from([base_prefix.clone()]),
|
root_remote_prefixes,
|
||||||
|
HashSet::from([base_prefix.clone()]),
|
||||||
"remote storage root prefixes list mismatches with the uploads. Returned prefixes: {root_remote_prefixes:?}"
|
"remote storage root prefixes list mismatches with the uploads. Returned prefixes: {root_remote_prefixes:?}"
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -84,7 +86,8 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
|
|||||||
.difference(&nested_remote_prefixes)
|
.difference(&nested_remote_prefixes)
|
||||||
.collect::<HashSet<_>>();
|
.collect::<HashSet<_>>();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
|
remote_only_prefixes.len() + missing_uploaded_prefixes.len(),
|
||||||
|
0,
|
||||||
"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
|
"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -119,7 +122,8 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
|
|||||||
.difference(&nested_remote_prefixes_combined)
|
.difference(&nested_remote_prefixes_combined)
|
||||||
.collect::<HashSet<_>>();
|
.collect::<HashSet<_>>();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
|
remote_only_prefixes.len() + missing_uploaded_prefixes.len(),
|
||||||
|
0,
|
||||||
"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
|
"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::UNIX_EPOCH;
|
use std::time::{Duration, UNIX_EPOCH};
|
||||||
use std::{collections::HashSet, time::Duration};
|
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use remote_storage::{
|
use remote_storage::{
|
||||||
@@ -208,7 +208,7 @@ async fn create_azure_client(
|
|||||||
.as_millis();
|
.as_millis();
|
||||||
|
|
||||||
// because nanos can be the same for two threads so can millis, add randomness
|
// because nanos can be the same for two threads so can millis, add randomness
|
||||||
let random = rand::thread_rng().gen::<u32>();
|
let random = rand::thread_rng().r#gen::<u32>();
|
||||||
|
|
||||||
let remote_storage_config = RemoteStorageConfig {
|
let remote_storage_config = RemoteStorageConfig {
|
||||||
storage: RemoteStorageKind::AzureContainer(AzureConfig {
|
storage: RemoteStorageKind::AzureContainer(AzureConfig {
|
||||||
|
|||||||
@@ -1,13 +1,12 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::fmt::{Debug, Display};
|
use std::fmt::{Debug, Display};
|
||||||
use std::future::Future;
|
use std::future::Future;
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::{Duration, UNIX_EPOCH};
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||||
use std::{collections::HashSet, time::SystemTime};
|
|
||||||
|
|
||||||
use crate::common::{download_to_vec, upload_stream};
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use camino::Utf8Path;
|
use camino::Utf8Path;
|
||||||
use futures_util::StreamExt;
|
use futures_util::StreamExt;
|
||||||
@@ -15,12 +14,13 @@ use remote_storage::{
|
|||||||
DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
|
DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
|
||||||
RemoteStorageConfig, RemoteStorageKind, S3Config,
|
RemoteStorageConfig, RemoteStorageKind, S3Config,
|
||||||
};
|
};
|
||||||
use test_context::test_context;
|
use test_context::{AsyncTestContext, test_context};
|
||||||
use test_context::AsyncTestContext;
|
|
||||||
use tokio::io::AsyncBufReadExt;
|
use tokio::io::AsyncBufReadExt;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
|
use crate::common::{download_to_vec, upload_stream};
|
||||||
|
|
||||||
mod common;
|
mod common;
|
||||||
|
|
||||||
#[path = "common/tests.rs"]
|
#[path = "common/tests.rs"]
|
||||||
@@ -128,8 +128,10 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
|
|||||||
let t0_hwt = t0 + half_wt;
|
let t0_hwt = t0 + half_wt;
|
||||||
let t1_hwt = t1 - half_wt;
|
let t1_hwt = t1 - half_wt;
|
||||||
if !(t0_hwt..=t1_hwt).contains(&last_modified) {
|
if !(t0_hwt..=t1_hwt).contains(&last_modified) {
|
||||||
panic!("last_modified={last_modified:?} is not between t0_hwt={t0_hwt:?} and t1_hwt={t1_hwt:?}. \
|
panic!(
|
||||||
This likely means a large lock discrepancy between S3 and the local clock.");
|
"last_modified={last_modified:?} is not between t0_hwt={t0_hwt:?} and t1_hwt={t1_hwt:?}. \
|
||||||
|
This likely means a large lock discrepancy between S3 and the local clock."
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -383,7 +385,7 @@ async fn create_s3_client(
|
|||||||
.as_millis();
|
.as_millis();
|
||||||
|
|
||||||
// because nanos can be the same for two threads so can millis, add randomness
|
// because nanos can be the same for two threads so can millis, add randomness
|
||||||
let random = rand::thread_rng().gen::<u32>();
|
let random = rand::thread_rng().r#gen::<u32>();
|
||||||
|
|
||||||
let remote_storage_config = RemoteStorageConfig {
|
let remote_storage_config = RemoteStorageConfig {
|
||||||
storage: RemoteStorageKind::AwsS3(S3Config {
|
storage: RemoteStorageKind::AwsS3(S3Config {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "safekeeper_api"
|
name = "safekeeper_api"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition.workspace = true
|
edition = "2024"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@@ -2,7 +2,8 @@
|
|||||||
//! rfcs/035-safekeeper-dynamic-membership-change.md
|
//! rfcs/035-safekeeper-dynamic-membership-change.md
|
||||||
//! for details.
|
//! for details.
|
||||||
|
|
||||||
use std::{collections::HashSet, fmt::Display};
|
use std::collections::HashSet;
|
||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
use anyhow;
|
use anyhow;
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
@@ -148,9 +149,10 @@ impl Display for Configuration {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{MemberSet, SafekeeperId};
|
|
||||||
use utils::id::NodeId;
|
use utils::id::NodeId;
|
||||||
|
|
||||||
|
use super::{MemberSet, SafekeeperId};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_member_set() {
|
fn test_member_set() {
|
||||||
let mut members = MemberSet::empty();
|
let mut members = MemberSet::empty();
|
||||||
|
|||||||
@@ -1,18 +1,17 @@
|
|||||||
//! Types used in safekeeper http API. Many of them are also reused internally.
|
//! Types used in safekeeper http API. Many of them are also reused internally.
|
||||||
|
|
||||||
|
use std::net::SocketAddr;
|
||||||
|
|
||||||
use pageserver_api::shard::ShardIdentity;
|
use pageserver_api::shard::ShardIdentity;
|
||||||
use postgres_ffi::TimestampTz;
|
use postgres_ffi::TimestampTz;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::net::SocketAddr;
|
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
|
use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
use utils::pageserver_feedback::PageserverFeedback;
|
||||||
|
|
||||||
use utils::{
|
use crate::membership::Configuration;
|
||||||
id::{NodeId, TenantId, TenantTimelineId, TimelineId},
|
use crate::{ServerInfo, Term};
|
||||||
lsn::Lsn,
|
|
||||||
pageserver_feedback::PageserverFeedback,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{membership::Configuration, ServerInfo, Term};
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
pub struct SafekeeperStatus {
|
pub struct SafekeeperStatus {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "pageserver"
|
name = "pageserver"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition.workspace = true
|
edition = "2024"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
|||||||
@@ -1,22 +1,20 @@
|
|||||||
use std::{env, num::NonZeroUsize};
|
use std::env;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use camino::Utf8PathBuf;
|
use camino::Utf8PathBuf;
|
||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
use criterion::{Criterion, criterion_group, criterion_main};
|
||||||
use pageserver::{
|
use pageserver::config::PageServerConf;
|
||||||
config::PageServerConf,
|
use pageserver::context::{DownloadBehavior, RequestContext};
|
||||||
context::{DownloadBehavior, RequestContext},
|
use pageserver::l0_flush::{L0FlushConfig, L0FlushGlobalState};
|
||||||
l0_flush::{L0FlushConfig, L0FlushGlobalState},
|
use pageserver::task_mgr::TaskKind;
|
||||||
page_cache,
|
use pageserver::tenant::storage_layer::InMemoryLayer;
|
||||||
task_mgr::TaskKind,
|
use pageserver::{page_cache, virtual_file};
|
||||||
tenant::storage_layer::InMemoryLayer,
|
use pageserver_api::key::Key;
|
||||||
virtual_file,
|
use pageserver_api::shard::TenantShardId;
|
||||||
};
|
use pageserver_api::value::Value;
|
||||||
use pageserver_api::{key::Key, shard::TenantShardId, value::Value};
|
use utils::bin_ser::BeSer;
|
||||||
use utils::{
|
use utils::id::{TenantId, TimelineId};
|
||||||
bin_ser::BeSer,
|
|
||||||
id::{TenantId, TimelineId},
|
|
||||||
};
|
|
||||||
use wal_decoder::serialized_batch::SerializedValueBatch;
|
use wal_decoder::serialized_batch::SerializedValueBatch;
|
||||||
|
|
||||||
// A very cheap hash for generating non-sequential keys.
|
// A very cheap hash for generating non-sequential keys.
|
||||||
|
|||||||
@@ -1,23 +1,21 @@
|
|||||||
use criterion::measurement::WallTime;
|
|
||||||
use pageserver::keyspace::{KeyPartitioning, KeySpace};
|
|
||||||
use pageserver::tenant::layer_map::LayerMap;
|
|
||||||
use pageserver::tenant::storage_layer::LayerName;
|
|
||||||
use pageserver::tenant::storage_layer::PersistentLayerDesc;
|
|
||||||
use pageserver_api::key::Key;
|
|
||||||
use pageserver_api::shard::TenantShardId;
|
|
||||||
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
|
|
||||||
use std::cmp::{max, min};
|
use std::cmp::{max, min};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufRead, BufReader};
|
use std::io::{BufRead, BufReader};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
|
use criterion::measurement::WallTime;
|
||||||
|
use criterion::{BenchmarkGroup, Criterion, black_box, criterion_group, criterion_main};
|
||||||
|
use pageserver::keyspace::{KeyPartitioning, KeySpace};
|
||||||
|
use pageserver::tenant::layer_map::LayerMap;
|
||||||
|
use pageserver::tenant::storage_layer::{LayerName, PersistentLayerDesc};
|
||||||
|
use pageserver_api::key::Key;
|
||||||
|
use pageserver_api::shard::TenantShardId;
|
||||||
|
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
|
||||||
use utils::id::{TenantId, TimelineId};
|
use utils::id::{TenantId, TimelineId};
|
||||||
|
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkGroup, Criterion};
|
|
||||||
|
|
||||||
fn fixture_path(relative: &str) -> PathBuf {
|
fn fixture_path(relative: &str) -> PathBuf {
|
||||||
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(relative)
|
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(relative)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -56,20 +56,23 @@
|
|||||||
//! medium/128 time: [10.412 ms 10.574 ms 10.718 ms]
|
//! medium/128 time: [10.412 ms 10.574 ms 10.718 ms]
|
||||||
//! ```
|
//! ```
|
||||||
|
|
||||||
|
use std::future::Future;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use bytes::{Buf, Bytes};
|
use bytes::{Buf, Bytes};
|
||||||
use criterion::{BenchmarkId, Criterion};
|
use criterion::{BenchmarkId, Criterion};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use pageserver::{config::PageServerConf, walredo::PostgresRedoManager};
|
use pageserver::config::PageServerConf;
|
||||||
|
use pageserver::walredo::PostgresRedoManager;
|
||||||
|
use pageserver_api::key::Key;
|
||||||
use pageserver_api::record::NeonWalRecord;
|
use pageserver_api::record::NeonWalRecord;
|
||||||
use pageserver_api::{key::Key, shard::TenantShardId};
|
use pageserver_api::shard::TenantShardId;
|
||||||
use std::{
|
use tokio::sync::Barrier;
|
||||||
future::Future,
|
use tokio::task::JoinSet;
|
||||||
sync::Arc,
|
use utils::id::TenantId;
|
||||||
time::{Duration, Instant},
|
use utils::lsn::Lsn;
|
||||||
};
|
|
||||||
use tokio::{sync::Barrier, task::JoinSet};
|
|
||||||
use utils::{id::TenantId, lsn::Lsn};
|
|
||||||
|
|
||||||
fn bench(c: &mut Criterion) {
|
fn bench(c: &mut Criterion) {
|
||||||
macro_rules! bench_group {
|
macro_rules! bench_group {
|
||||||
|
|||||||
@@ -1,15 +1,15 @@
|
|||||||
//! Upload queue benchmarks.
|
//! Upload queue benchmarks.
|
||||||
|
|
||||||
use std::str::FromStr as _;
|
use std::str::FromStr as _;
|
||||||
use std::sync::atomic::AtomicU32;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::sync::atomic::AtomicU32;
|
||||||
|
|
||||||
use criterion::{criterion_group, criterion_main, Bencher, Criterion};
|
use criterion::{Bencher, Criterion, criterion_group, criterion_main};
|
||||||
|
use pageserver::tenant::IndexPart;
|
||||||
use pageserver::tenant::metadata::TimelineMetadata;
|
use pageserver::tenant::metadata::TimelineMetadata;
|
||||||
use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
|
use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||||
use pageserver::tenant::storage_layer::LayerName;
|
use pageserver::tenant::storage_layer::LayerName;
|
||||||
use pageserver::tenant::upload_queue::{Delete, UploadOp, UploadQueue, UploadTask};
|
use pageserver::tenant::upload_queue::{Delete, UploadOp, UploadQueue, UploadTask};
|
||||||
use pageserver::tenant::IndexPart;
|
|
||||||
use pprof::criterion::{Output, PProfProfiler};
|
use pprof::criterion::{Output, PProfProfiler};
|
||||||
use utils::generation::Generation;
|
use utils::generation::Generation;
|
||||||
use utils::shard::{ShardCount, ShardIndex, ShardNumber};
|
use utils::shard::{ShardCount, ShardIndex, ShardNumber};
|
||||||
|
|||||||
@@ -221,12 +221,12 @@ where
|
|||||||
// performed implicitly when `top` is dropped).
|
// performed implicitly when `top` is dropped).
|
||||||
if let Some(mut top) = this.heap.peek_mut() {
|
if let Some(mut top) = this.heap.peek_mut() {
|
||||||
match top.deref_mut() {
|
match top.deref_mut() {
|
||||||
LazyLoadLayer::Unloaded(ref mut l) => {
|
LazyLoadLayer::Unloaded(l) => {
|
||||||
let fut = l.load_keys(this.ctx);
|
let fut = l.load_keys(this.ctx);
|
||||||
this.load_future.set(Some(Box::pin(fut)));
|
this.load_future.set(Some(Box::pin(fut)));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
LazyLoadLayer::Loaded(ref mut entries) => {
|
LazyLoadLayer::Loaded(entries) => {
|
||||||
let result = entries.pop_front().unwrap();
|
let result = entries.pop_front().unwrap();
|
||||||
if entries.is_empty() {
|
if entries.is_empty() {
|
||||||
std::collections::binary_heap::PeekMut::pop(top);
|
std::collections::binary_heap::PeekMut::pop(top);
|
||||||
|
|||||||
@@ -40,9 +40,7 @@ impl Stats {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub(crate) fn add(&mut self, other: &Self) {
|
pub(crate) fn add(&mut self, other: &Self) {
|
||||||
let Self {
|
let Self { latency_histo } = self;
|
||||||
ref mut latency_histo,
|
|
||||||
} = self;
|
|
||||||
latency_histo.add(&other.latency_histo).unwrap();
|
latency_histo.add(&other.latency_histo).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,9 @@
|
|||||||
|
|
||||||
pub(crate) const _ASSERT_U64_EQ_USIZE: () = {
|
pub(crate) const _ASSERT_U64_EQ_USIZE: () = {
|
||||||
if std::mem::size_of::<usize>() != std::mem::size_of::<u64>() {
|
if std::mem::size_of::<usize>() != std::mem::size_of::<u64>() {
|
||||||
panic!("the traits defined in this module assume that usize and u64 can be converted to each other without loss of information");
|
panic!(
|
||||||
|
"the traits defined in this module assume that usize and u64 can be converted to each other without loss of information"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use ::metrics::IntGauge;
|
use ::metrics::IntGauge;
|
||||||
use bytes::{Buf, BufMut, Bytes};
|
use bytes::{Buf, BufMut, Bytes};
|
||||||
use pageserver_api::key::{Key, AUX_KEY_PREFIX, METADATA_KEY_SIZE};
|
use pageserver_api::key::{AUX_KEY_PREFIX, Key, METADATA_KEY_SIZE};
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
|
|
||||||
// BEGIN Copyright (c) 2017 Servo Contributors
|
// BEGIN Copyright (c) 2017 Servo Contributors
|
||||||
|
|||||||
@@ -10,33 +10,31 @@
|
|||||||
//! This module is responsible for creation of such tarball
|
//! This module is responsible for creation of such tarball
|
||||||
//! from data stored in object storage.
|
//! from data stored in object storage.
|
||||||
//!
|
//!
|
||||||
use anyhow::{anyhow, Context};
|
|
||||||
use bytes::{BufMut, Bytes, BytesMut};
|
|
||||||
use fail::fail_point;
|
|
||||||
use pageserver_api::key::{rel_block_to_key, Key};
|
|
||||||
use postgres_ffi::pg_constants;
|
|
||||||
use std::fmt::Write as FmtWrite;
|
use std::fmt::Write as FmtWrite;
|
||||||
use std::time::{Instant, SystemTime};
|
use std::time::{Instant, SystemTime};
|
||||||
|
|
||||||
|
use anyhow::{Context, anyhow};
|
||||||
|
use bytes::{BufMut, Bytes, BytesMut};
|
||||||
|
use fail::fail_point;
|
||||||
|
use pageserver_api::key::{Key, rel_block_to_key};
|
||||||
|
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||||
|
use postgres_ffi::pg_constants::{
|
||||||
|
DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID, PG_HBA, PGDATA_SPECIAL_FILES,
|
||||||
|
};
|
||||||
|
use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
|
||||||
|
use postgres_ffi::{
|
||||||
|
BLCKSZ, PG_TLI, RELSEG_SIZE, WAL_SEGMENT_SIZE, XLogFileName, dispatch_pgversion, pg_constants,
|
||||||
|
};
|
||||||
use tokio::io;
|
use tokio::io;
|
||||||
use tokio::io::AsyncWrite;
|
use tokio::io::AsyncWrite;
|
||||||
use tracing::*;
|
|
||||||
|
|
||||||
use tokio_tar::{Builder, EntryType, Header};
|
use tokio_tar::{Builder, EntryType, Header};
|
||||||
|
use tracing::*;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use crate::context::RequestContext;
|
use crate::context::RequestContext;
|
||||||
use crate::pgdatadir_mapping::Version;
|
use crate::pgdatadir_mapping::Version;
|
||||||
use crate::tenant::storage_layer::IoConcurrency;
|
|
||||||
use crate::tenant::Timeline;
|
use crate::tenant::Timeline;
|
||||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
use crate::tenant::storage_layer::IoConcurrency;
|
||||||
|
|
||||||
use postgres_ffi::dispatch_pgversion;
|
|
||||||
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
|
|
||||||
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PG_HBA};
|
|
||||||
use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
|
|
||||||
use postgres_ffi::XLogFileName;
|
|
||||||
use postgres_ffi::PG_TLI;
|
|
||||||
use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE};
|
|
||||||
use utils::lsn::Lsn;
|
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub enum BasebackupError {
|
pub enum BasebackupError {
|
||||||
|
|||||||
@@ -3,49 +3,41 @@
|
|||||||
//! Main entry point for the Page Server executable.
|
//! Main entry point for the Page Server executable.
|
||||||
|
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::env::{var, VarError};
|
use std::env::{VarError, var};
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use anyhow::{anyhow, Context};
|
use anyhow::{Context, anyhow};
|
||||||
use camino::Utf8Path;
|
use camino::Utf8Path;
|
||||||
use clap::{Arg, ArgAction, Command};
|
use clap::{Arg, ArgAction, Command};
|
||||||
|
use metrics::launch_timestamp::{LaunchTimestamp, set_launch_timestamp_metric};
|
||||||
use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
|
use metrics::set_build_info_metric;
|
||||||
use pageserver::config::PageserverIdentity;
|
use pageserver::config::{PageServerConf, PageserverIdentity};
|
||||||
use pageserver::controller_upcall_client::ControllerUpcallClient;
|
use pageserver::controller_upcall_client::ControllerUpcallClient;
|
||||||
|
use pageserver::deletion_queue::DeletionQueue;
|
||||||
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
|
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
|
||||||
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
|
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
|
||||||
use pageserver::task_mgr::{COMPUTE_REQUEST_RUNTIME, WALRECEIVER_RUNTIME};
|
use pageserver::task_mgr::{
|
||||||
use pageserver::tenant::{secondary, TenantSharedResources};
|
BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
|
||||||
use pageserver::{CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener};
|
};
|
||||||
|
use pageserver::tenant::{TenantSharedResources, mgr, secondary};
|
||||||
|
use pageserver::{
|
||||||
|
CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener, http, page_cache, page_service,
|
||||||
|
task_mgr, virtual_file,
|
||||||
|
};
|
||||||
|
use postgres_backend::AuthType;
|
||||||
use remote_storage::GenericRemoteStorage;
|
use remote_storage::GenericRemoteStorage;
|
||||||
use tokio::signal::unix::SignalKind;
|
use tokio::signal::unix::SignalKind;
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
|
use utils::auth::{JwtAuth, SwappableJwtAuth};
|
||||||
use metrics::set_build_info_metric;
|
|
||||||
use pageserver::{
|
|
||||||
config::PageServerConf,
|
|
||||||
deletion_queue::DeletionQueue,
|
|
||||||
http, page_cache, page_service, task_mgr,
|
|
||||||
task_mgr::{BACKGROUND_RUNTIME, MGMT_REQUEST_RUNTIME},
|
|
||||||
tenant::mgr,
|
|
||||||
virtual_file,
|
|
||||||
};
|
|
||||||
use postgres_backend::AuthType;
|
|
||||||
use utils::crashsafe::syncfs;
|
use utils::crashsafe::syncfs;
|
||||||
use utils::failpoint_support;
|
|
||||||
use utils::logging::TracingErrorLayerEnablement;
|
use utils::logging::TracingErrorLayerEnablement;
|
||||||
use utils::{
|
use utils::sentry_init::init_sentry;
|
||||||
auth::{JwtAuth, SwappableJwtAuth},
|
use utils::{failpoint_support, logging, project_build_tag, project_git_version, tcp_listener};
|
||||||
logging, project_build_tag, project_git_version,
|
|
||||||
sentry_init::init_sentry,
|
|
||||||
tcp_listener,
|
|
||||||
};
|
|
||||||
|
|
||||||
project_git_version!(GIT_VERSION);
|
project_git_version!(GIT_VERSION);
|
||||||
project_build_tag!(BUILD_TAG);
|
project_build_tag!(BUILD_TAG);
|
||||||
@@ -57,7 +49,7 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
|||||||
/// This adds roughly 3% overhead for allocations on average, which is acceptable considering
|
/// This adds roughly 3% overhead for allocations on average, which is acceptable considering
|
||||||
/// performance-sensitive code will avoid allocations as far as possible anyway.
|
/// performance-sensitive code will avoid allocations as far as possible anyway.
|
||||||
#[allow(non_upper_case_globals)]
|
#[allow(non_upper_case_globals)]
|
||||||
#[export_name = "malloc_conf"]
|
#[unsafe(export_name = "malloc_conf")]
|
||||||
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
|
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
|
||||||
|
|
||||||
const PID_FILE_NAME: &str = "pageserver.pid";
|
const PID_FILE_NAME: &str = "pageserver.pid";
|
||||||
@@ -85,6 +77,9 @@ fn main() -> anyhow::Result<()> {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize up failpoints support
|
||||||
|
let scenario = failpoint_support::init();
|
||||||
|
|
||||||
let workdir = arg_matches
|
let workdir = arg_matches
|
||||||
.get_one::<String>("workdir")
|
.get_one::<String>("workdir")
|
||||||
.map(Utf8Path::new)
|
.map(Utf8Path::new)
|
||||||
@@ -178,9 +173,6 @@ fn main() -> anyhow::Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize up failpoints support
|
|
||||||
let scenario = failpoint_support::init();
|
|
||||||
|
|
||||||
// Basic initialization of things that don't change after startup
|
// Basic initialization of things that don't change after startup
|
||||||
tracing::info!("Initializing virtual_file...");
|
tracing::info!("Initializing virtual_file...");
|
||||||
virtual_file::init(
|
virtual_file::init(
|
||||||
@@ -217,7 +209,9 @@ fn initialize_config(
|
|||||||
Ok(mut f) => {
|
Ok(mut f) => {
|
||||||
let md = f.metadata().context("stat config file")?;
|
let md = f.metadata().context("stat config file")?;
|
||||||
if !md.is_file() {
|
if !md.is_file() {
|
||||||
anyhow::bail!("Pageserver found identity file but it is a dir entry: {identity_file_path}. Aborting start up ...");
|
anyhow::bail!(
|
||||||
|
"Pageserver found identity file but it is a dir entry: {identity_file_path}. Aborting start up ..."
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut s = String::new();
|
let mut s = String::new();
|
||||||
@@ -225,7 +219,9 @@ fn initialize_config(
|
|||||||
toml_edit::de::from_str::<PageserverIdentity>(&s)?
|
toml_edit::de::from_str::<PageserverIdentity>(&s)?
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
anyhow::bail!("Pageserver could not read identity file: {identity_file_path}: {e}. Aborting start up ...");
|
anyhow::bail!(
|
||||||
|
"Pageserver could not read identity file: {identity_file_path}: {e}. Aborting start up ..."
|
||||||
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -401,11 +397,9 @@ fn start_pageserver(
|
|||||||
Err(VarError::NotPresent) => {
|
Err(VarError::NotPresent) => {
|
||||||
info!("No JWT token for authentication with Safekeeper detected");
|
info!("No JWT token for authentication with Safekeeper detected");
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => return Err(e).with_context(
|
||||||
return Err(e).with_context(|| {
|
|| "Failed to either load to detect non-present NEON_AUTH_TOKEN environment variable",
|
||||||
"Failed to either load to detect non-present NEON_AUTH_TOKEN environment variable"
|
),
|
||||||
})
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Top-level cancellation token for the process
|
// Top-level cancellation token for the process
|
||||||
@@ -711,7 +705,9 @@ async fn create_remote_storage_client(
|
|||||||
// wrapper that simulates failures.
|
// wrapper that simulates failures.
|
||||||
if conf.test_remote_failures > 0 {
|
if conf.test_remote_failures > 0 {
|
||||||
if !cfg!(feature = "testing") {
|
if !cfg!(feature = "testing") {
|
||||||
anyhow::bail!("test_remote_failures option is not available because pageserver was compiled without the 'testing' feature");
|
anyhow::bail!(
|
||||||
|
"test_remote_failures option is not available because pageserver was compiled without the 'testing' feature"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
info!(
|
info!(
|
||||||
"Simulating remote failures for first {} attempts of each op",
|
"Simulating remote failures for first {} attempts of each op",
|
||||||
|
|||||||
@@ -1,14 +1,10 @@
|
|||||||
use std::{
|
use std::io::{Read, Write, stdin, stdout};
|
||||||
io::{stdin, stdout, Read, Write},
|
use std::time::Duration;
|
||||||
time::Duration,
|
|
||||||
};
|
|
||||||
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use pageserver_api::models::{PagestreamRequest, PagestreamTestRequest};
|
use pageserver_api::models::{PagestreamRequest, PagestreamTestRequest};
|
||||||
use utils::{
|
use utils::id::{TenantId, TimelineId};
|
||||||
id::{TenantId, TimelineId},
|
use utils::lsn::Lsn;
|
||||||
lsn::Lsn,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(clap::Parser)]
|
#[derive(clap::Parser)]
|
||||||
struct Args {
|
struct Args {
|
||||||
|
|||||||
@@ -4,36 +4,29 @@
|
|||||||
//! file, or on the command line.
|
//! file, or on the command line.
|
||||||
//! See also `settings.md` for better description on every parameter.
|
//! See also `settings.md` for better description on every parameter.
|
||||||
|
|
||||||
use anyhow::{bail, ensure, Context};
|
|
||||||
use pageserver_api::models::ImageCompressionAlgorithm;
|
|
||||||
use pageserver_api::{
|
|
||||||
config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes},
|
|
||||||
shard::TenantShardId,
|
|
||||||
};
|
|
||||||
use remote_storage::{RemotePath, RemoteStorageConfig};
|
|
||||||
use std::env;
|
use std::env;
|
||||||
use storage_broker::Uri;
|
|
||||||
use utils::logging::SecretString;
|
|
||||||
use utils::postgres_client::PostgresClientProtocol;
|
|
||||||
|
|
||||||
use once_cell::sync::OnceCell;
|
|
||||||
use reqwest::Url;
|
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use anyhow::{Context, bail, ensure};
|
||||||
use camino::{Utf8Path, Utf8PathBuf};
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
|
use once_cell::sync::OnceCell;
|
||||||
|
use pageserver_api::config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes};
|
||||||
|
use pageserver_api::models::ImageCompressionAlgorithm;
|
||||||
|
use pageserver_api::shard::TenantShardId;
|
||||||
use postgres_backend::AuthType;
|
use postgres_backend::AuthType;
|
||||||
use utils::{
|
use remote_storage::{RemotePath, RemoteStorageConfig};
|
||||||
id::{NodeId, TimelineId},
|
use reqwest::Url;
|
||||||
logging::LogFormat,
|
use storage_broker::Uri;
|
||||||
};
|
use utils::id::{NodeId, TimelineId};
|
||||||
|
use utils::logging::{LogFormat, SecretString};
|
||||||
|
use utils::postgres_client::PostgresClientProtocol;
|
||||||
|
|
||||||
use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
|
use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
|
||||||
use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
|
use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
|
||||||
use crate::virtual_file;
|
|
||||||
use crate::virtual_file::io_engine;
|
use crate::virtual_file::io_engine;
|
||||||
use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME};
|
use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME, virtual_file};
|
||||||
|
|
||||||
/// Global state of pageserver.
|
/// Global state of pageserver.
|
||||||
///
|
///
|
||||||
@@ -440,7 +433,9 @@ impl PageServerConf {
|
|||||||
io_engine::FeatureTestResult::PlatformPreferred(v) => v, // make no noise
|
io_engine::FeatureTestResult::PlatformPreferred(v) => v, // make no noise
|
||||||
io_engine::FeatureTestResult::Worse { engine, remark } => {
|
io_engine::FeatureTestResult::Worse { engine, remark } => {
|
||||||
// TODO: bubble this up to the caller so we can tracing::warn! it.
|
// TODO: bubble this up to the caller so we can tracing::warn! it.
|
||||||
eprintln!("auto-detected IO engine is not platform-preferred: engine={engine:?} remark={remark:?}");
|
eprintln!(
|
||||||
|
"auto-detected IO engine is not platform-preferred: engine={engine:?} remark={remark:?}"
|
||||||
|
);
|
||||||
engine
|
engine
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,13 +1,9 @@
|
|||||||
//! Periodically collect consumption metrics for all active tenants
|
//! Periodically collect consumption metrics for all active tenants
|
||||||
//! and push them to a HTTP endpoint.
|
//! and push them to a HTTP endpoint.
|
||||||
use crate::config::PageServerConf;
|
use std::collections::HashMap;
|
||||||
use crate::consumption_metrics::metrics::MetricsKey;
|
use std::sync::Arc;
|
||||||
use crate::consumption_metrics::upload::KeyGen as _;
|
use std::time::{Duration, SystemTime};
|
||||||
use crate::context::{DownloadBehavior, RequestContext};
|
|
||||||
use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
|
|
||||||
use crate::tenant::size::CalculateSyntheticSizeError;
|
|
||||||
use crate::tenant::tasks::BackgroundLoopKind;
|
|
||||||
use crate::tenant::{mgr::TenantManager, LogicalSizeCalculationCause, Tenant};
|
|
||||||
use camino::Utf8PathBuf;
|
use camino::Utf8PathBuf;
|
||||||
use consumption_metrics::EventType;
|
use consumption_metrics::EventType;
|
||||||
use itertools::Itertools as _;
|
use itertools::Itertools as _;
|
||||||
@@ -15,14 +11,21 @@ use pageserver_api::models::TenantState;
|
|||||||
use remote_storage::{GenericRemoteStorage, RemoteStorageConfig};
|
use remote_storage::{GenericRemoteStorage, RemoteStorageConfig};
|
||||||
use reqwest::Url;
|
use reqwest::Url;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::{Duration, SystemTime};
|
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
use utils::id::NodeId;
|
use utils::id::NodeId;
|
||||||
|
|
||||||
|
use crate::config::PageServerConf;
|
||||||
|
use crate::consumption_metrics::metrics::MetricsKey;
|
||||||
|
use crate::consumption_metrics::upload::KeyGen as _;
|
||||||
|
use crate::context::{DownloadBehavior, RequestContext};
|
||||||
|
use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};
|
||||||
|
use crate::tenant::mgr::TenantManager;
|
||||||
|
use crate::tenant::size::CalculateSyntheticSizeError;
|
||||||
|
use crate::tenant::tasks::BackgroundLoopKind;
|
||||||
|
use crate::tenant::{LogicalSizeCalculationCause, Tenant};
|
||||||
|
|
||||||
mod disk_cache;
|
mod disk_cache;
|
||||||
mod metrics;
|
mod metrics;
|
||||||
mod upload;
|
mod upload;
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
use anyhow::Context;
|
|
||||||
use camino::{Utf8Path, Utf8PathBuf};
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use crate::consumption_metrics::NewMetricsRefRoot;
|
use anyhow::Context;
|
||||||
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
|
|
||||||
use super::{NewMetricsRoot, NewRawMetric, RawMetric};
|
use super::{NewMetricsRoot, NewRawMetric, RawMetric};
|
||||||
|
use crate::consumption_metrics::NewMetricsRefRoot;
|
||||||
|
|
||||||
pub(super) fn read_metrics_from_serde_value(
|
pub(super) fn read_metrics_from_serde_value(
|
||||||
json_value: serde_json::Value,
|
json_value: serde_json::Value,
|
||||||
|
|||||||
@@ -1,15 +1,16 @@
|
|||||||
use crate::tenant::mgr::TenantManager;
|
use std::sync::Arc;
|
||||||
use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogicalSize};
|
use std::time::SystemTime;
|
||||||
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use consumption_metrics::EventType;
|
use consumption_metrics::EventType;
|
||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use std::{sync::Arc, time::SystemTime};
|
use utils::id::{TenantId, TimelineId};
|
||||||
use utils::{
|
use utils::lsn::Lsn;
|
||||||
id::{TenantId, TimelineId},
|
|
||||||
lsn::Lsn,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::{Cache, NewRawMetric};
|
use super::{Cache, NewRawMetric};
|
||||||
|
use crate::context::RequestContext;
|
||||||
|
use crate::tenant::mgr::TenantManager;
|
||||||
|
use crate::tenant::timeline::logical_size::CurrentLogicalSize;
|
||||||
|
|
||||||
/// Name of the metric, used by `MetricsKey` factory methods and `deserialize_cached_events`
|
/// Name of the metric, used by `MetricsKey` factory methods and `deserialize_cached_events`
|
||||||
/// instead of static str.
|
/// instead of static str.
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use crate::consumption_metrics::RawMetric;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use std::collections::HashMap;
|
use crate::consumption_metrics::RawMetric;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn startup_collected_timeline_metrics_before_advancing() {
|
fn startup_collected_timeline_metrics_before_advancing() {
|
||||||
|
|||||||
@@ -2,15 +2,16 @@ use std::error::Error as _;
|
|||||||
use std::time::SystemTime;
|
use std::time::SystemTime;
|
||||||
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use consumption_metrics::{Event, EventChunk, IdempotencyKey, CHUNK_SIZE};
|
use consumption_metrics::{CHUNK_SIZE, Event, EventChunk, IdempotencyKey};
|
||||||
use remote_storage::{GenericRemoteStorage, RemotePath};
|
use remote_storage::{GenericRemoteStorage, RemotePath};
|
||||||
use tokio::io::AsyncWriteExt;
|
use tokio::io::AsyncWriteExt;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::Instrument;
|
use tracing::Instrument;
|
||||||
|
|
||||||
use super::{metrics::Name, Cache, MetricsKey, NewRawMetric, RawMetric};
|
|
||||||
use utils::id::{TenantId, TimelineId};
|
use utils::id::{TenantId, TimelineId};
|
||||||
|
|
||||||
|
use super::metrics::Name;
|
||||||
|
use super::{Cache, MetricsKey, NewRawMetric, RawMetric};
|
||||||
|
|
||||||
/// How the metrics from pageserver are identified.
|
/// How the metrics from pageserver are identified.
|
||||||
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Copy, PartialEq)]
|
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Copy, PartialEq)]
|
||||||
struct Ids {
|
struct Ids {
|
||||||
@@ -438,14 +439,13 @@ async fn upload(
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::consumption_metrics::{
|
|
||||||
disk_cache::read_metrics_from_serde_value, NewMetricsRefRoot,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::consumption_metrics::NewMetricsRefRoot;
|
||||||
|
use crate::consumption_metrics::disk_cache::read_metrics_from_serde_value;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn chunked_serialization() {
|
fn chunked_serialization() {
|
||||||
let examples = metric_samples();
|
let examples = metric_samples();
|
||||||
|
|||||||
@@ -1,21 +1,23 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use futures::Future;
|
use futures::Future;
|
||||||
use pageserver_api::{
|
use pageserver_api::config::NodeMetadata;
|
||||||
controller_api::{AvailabilityZone, NodeRegisterRequest},
|
use pageserver_api::controller_api::{AvailabilityZone, NodeRegisterRequest};
|
||||||
shard::TenantShardId,
|
use pageserver_api::shard::TenantShardId;
|
||||||
upcall_api::{
|
use pageserver_api::upcall_api::{
|
||||||
ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
|
ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
|
||||||
ValidateRequestTenant, ValidateResponse,
|
ValidateRequestTenant, ValidateResponse,
|
||||||
},
|
|
||||||
};
|
};
|
||||||
use serde::{de::DeserializeOwned, Serialize};
|
use serde::Serialize;
|
||||||
|
use serde::de::DeserializeOwned;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
use utils::{backoff, failpoint_support, generation::Generation, id::NodeId};
|
use utils::generation::Generation;
|
||||||
|
use utils::id::NodeId;
|
||||||
|
use utils::{backoff, failpoint_support};
|
||||||
|
|
||||||
use crate::{config::PageServerConf, virtual_file::on_fatal_io_error};
|
use crate::config::PageServerConf;
|
||||||
use pageserver_api::config::NodeMetadata;
|
use crate::virtual_file::on_fatal_io_error;
|
||||||
|
|
||||||
/// The Pageserver's client for using the storage controller upcall API: this is a small API
|
/// The Pageserver's client for using the storage controller upcall API: this is a small API
|
||||||
/// for dealing with generations (see docs/rfcs/025-generation-numbers.md).
|
/// for dealing with generations (see docs/rfcs/025-generation-numbers.md).
|
||||||
@@ -157,14 +159,18 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
|
|||||||
match az_id_from_metadata {
|
match az_id_from_metadata {
|
||||||
Some(az_id) => Some(AvailabilityZone(az_id)),
|
Some(az_id) => Some(AvailabilityZone(az_id)),
|
||||||
None => {
|
None => {
|
||||||
tracing::warn!("metadata.json does not contain an 'availability_zone_id' field");
|
tracing::warn!(
|
||||||
|
"metadata.json does not contain an 'availability_zone_id' field"
|
||||||
|
);
|
||||||
conf.availability_zone.clone().map(AvailabilityZone)
|
conf.availability_zone.clone().map(AvailabilityZone)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if az_id.is_none() {
|
if az_id.is_none() {
|
||||||
panic!("Availablity zone id could not be inferred from metadata.json or pageserver config");
|
panic!(
|
||||||
|
"Availablity zone id could not be inferred from metadata.json or pageserver config"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
Some(NodeRegisterRequest {
|
Some(NodeRegisterRequest {
|
||||||
@@ -236,7 +242,7 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
|
|||||||
.iter()
|
.iter()
|
||||||
.map(|(id, generation)| ValidateRequestTenant {
|
.map(|(id, generation)| ValidateRequestTenant {
|
||||||
id: *id,
|
id: *id,
|
||||||
gen: (*generation).into().expect(
|
r#gen: (*generation).into().expect(
|
||||||
"Generation should always be valid for a Tenant doing deletions",
|
"Generation should always be valid for a Tenant doing deletions",
|
||||||
),
|
),
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -6,38 +6,31 @@ use std::collections::HashMap;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use crate::controller_upcall_client::ControlPlaneGenerationsApi;
|
|
||||||
use crate::metrics;
|
|
||||||
use crate::tenant::remote_timeline_client::remote_timeline_path;
|
|
||||||
use crate::tenant::remote_timeline_client::LayerFileMetadata;
|
|
||||||
use crate::virtual_file::MaybeFatalIo;
|
|
||||||
use crate::virtual_file::VirtualFile;
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use camino::Utf8PathBuf;
|
use camino::Utf8PathBuf;
|
||||||
|
use deleter::DeleterMessage;
|
||||||
|
use list_writer::ListWriterQueueMessage;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use remote_storage::{GenericRemoteStorage, RemotePath};
|
use remote_storage::{GenericRemoteStorage, RemotePath};
|
||||||
use serde::Deserialize;
|
use serde::{Deserialize, Serialize};
|
||||||
use serde::Serialize;
|
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::Instrument;
|
use tracing::{Instrument, debug, error};
|
||||||
use tracing::{debug, error};
|
|
||||||
use utils::crashsafe::path_with_suffix_extension;
|
use utils::crashsafe::path_with_suffix_extension;
|
||||||
use utils::generation::Generation;
|
use utils::generation::Generation;
|
||||||
use utils::id::TimelineId;
|
use utils::id::TimelineId;
|
||||||
use utils::lsn::AtomicLsn;
|
use utils::lsn::{AtomicLsn, Lsn};
|
||||||
use utils::lsn::Lsn;
|
|
||||||
|
|
||||||
use self::deleter::Deleter;
|
|
||||||
use self::list_writer::DeletionOp;
|
|
||||||
use self::list_writer::ListWriter;
|
|
||||||
use self::list_writer::RecoverOp;
|
|
||||||
use self::validator::Validator;
|
|
||||||
use deleter::DeleterMessage;
|
|
||||||
use list_writer::ListWriterQueueMessage;
|
|
||||||
use validator::ValidatorQueueMessage;
|
use validator::ValidatorQueueMessage;
|
||||||
|
|
||||||
use crate::{config::PageServerConf, tenant::storage_layer::LayerName};
|
use self::deleter::Deleter;
|
||||||
|
use self::list_writer::{DeletionOp, ListWriter, RecoverOp};
|
||||||
|
use self::validator::Validator;
|
||||||
|
use crate::config::PageServerConf;
|
||||||
|
use crate::controller_upcall_client::ControlPlaneGenerationsApi;
|
||||||
|
use crate::metrics;
|
||||||
|
use crate::tenant::remote_timeline_client::{LayerFileMetadata, remote_timeline_path};
|
||||||
|
use crate::tenant::storage_layer::LayerName;
|
||||||
|
use crate::virtual_file::{MaybeFatalIo, VirtualFile};
|
||||||
|
|
||||||
// TODO: configurable for how long to wait before executing deletions
|
// TODO: configurable for how long to wait before executing deletions
|
||||||
|
|
||||||
@@ -664,21 +657,22 @@ impl DeletionQueue {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
|
use std::io::ErrorKind;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use camino::Utf8Path;
|
use camino::Utf8Path;
|
||||||
use hex_literal::hex;
|
use hex_literal::hex;
|
||||||
use pageserver_api::{key::Key, shard::ShardIndex, upcall_api::ReAttachResponseTenant};
|
use pageserver_api::key::Key;
|
||||||
use std::{io::ErrorKind, time::Duration};
|
use pageserver_api::shard::ShardIndex;
|
||||||
use tracing::info;
|
use pageserver_api::upcall_api::ReAttachResponseTenant;
|
||||||
|
|
||||||
use remote_storage::{RemoteStorageConfig, RemoteStorageKind};
|
use remote_storage::{RemoteStorageConfig, RemoteStorageKind};
|
||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
|
use tracing::info;
|
||||||
use crate::{
|
|
||||||
controller_upcall_client::RetryForeverError,
|
|
||||||
tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::controller_upcall_client::RetryForeverError;
|
||||||
|
use crate::tenant::harness::TenantHarness;
|
||||||
|
use crate::tenant::storage_layer::DeltaLayerName;
|
||||||
pub const TIMELINE_ID: TimelineId =
|
pub const TIMELINE_ID: TimelineId =
|
||||||
TimelineId::from_array(hex!("11223344556677881122334455667788"));
|
TimelineId::from_array(hex!("11223344556677881122334455667788"));
|
||||||
|
|
||||||
@@ -724,26 +718,26 @@ mod test {
|
|||||||
.expect("Failed to join workers for previous deletion queue");
|
.expect("Failed to join workers for previous deletion queue");
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_latest_generation(&self, gen: Generation) {
|
fn set_latest_generation(&self, gen_: Generation) {
|
||||||
let tenant_shard_id = self.harness.tenant_shard_id;
|
let tenant_shard_id = self.harness.tenant_shard_id;
|
||||||
self.mock_control_plane
|
self.mock_control_plane
|
||||||
.latest_generation
|
.latest_generation
|
||||||
.lock()
|
.lock()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.insert(tenant_shard_id, gen);
|
.insert(tenant_shard_id, gen_);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns remote layer file name, suitable for use in assert_remote_files
|
/// Returns remote layer file name, suitable for use in assert_remote_files
|
||||||
fn write_remote_layer(
|
fn write_remote_layer(
|
||||||
&self,
|
&self,
|
||||||
file_name: LayerName,
|
file_name: LayerName,
|
||||||
gen: Generation,
|
gen_: Generation,
|
||||||
) -> anyhow::Result<String> {
|
) -> anyhow::Result<String> {
|
||||||
let tenant_shard_id = self.harness.tenant_shard_id;
|
let tenant_shard_id = self.harness.tenant_shard_id;
|
||||||
let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
|
let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
|
||||||
let remote_timeline_path = self.remote_fs_dir.join(relative_remote_path.get_path());
|
let remote_timeline_path = self.remote_fs_dir.join(relative_remote_path.get_path());
|
||||||
std::fs::create_dir_all(&remote_timeline_path)?;
|
std::fs::create_dir_all(&remote_timeline_path)?;
|
||||||
let remote_layer_file_name = format!("{}{}", file_name, gen.get_suffix());
|
let remote_layer_file_name = format!("{}{}", file_name, gen_.get_suffix());
|
||||||
|
|
||||||
let content: Vec<u8> = format!("placeholder contents of {file_name}").into();
|
let content: Vec<u8> = format!("placeholder contents of {file_name}").into();
|
||||||
|
|
||||||
@@ -1098,11 +1092,12 @@ mod test {
|
|||||||
/// or coalescing, and doesn't actually execute any deletions unless you call pump() to kick it.
|
/// or coalescing, and doesn't actually execute any deletions unless you call pump() to kick it.
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod mock {
|
pub(crate) mod mock {
|
||||||
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
|
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::tenant::remote_timeline_client::remote_layer_path;
|
use crate::tenant::remote_timeline_client::remote_layer_path;
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
||||||
|
|
||||||
pub struct ConsumerState {
|
pub struct ConsumerState {
|
||||||
rx: tokio::sync::mpsc::UnboundedReceiver<ListWriterQueueMessage>,
|
rx: tokio::sync::mpsc::UnboundedReceiver<ListWriterQueueMessage>,
|
||||||
|
|||||||
@@ -6,21 +6,16 @@
|
|||||||
//! number of full-sized DeleteObjects requests, rather than a larger number of
|
//! number of full-sized DeleteObjects requests, rather than a larger number of
|
||||||
//! smaller requests.
|
//! smaller requests.
|
||||||
|
|
||||||
use remote_storage::GenericRemoteStorage;
|
|
||||||
use remote_storage::RemotePath;
|
|
||||||
use remote_storage::TimeoutOrCancel;
|
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::info;
|
use tracing::{info, warn};
|
||||||
use tracing::warn;
|
use utils::{backoff, pausable_failpoint};
|
||||||
use utils::backoff;
|
|
||||||
use utils::pausable_failpoint;
|
|
||||||
|
|
||||||
|
use super::{DeletionQueueError, FlushOp};
|
||||||
use crate::metrics;
|
use crate::metrics;
|
||||||
|
|
||||||
use super::DeletionQueueError;
|
|
||||||
use super::FlushOp;
|
|
||||||
|
|
||||||
const AUTOFLUSH_INTERVAL: Duration = Duration::from_secs(10);
|
const AUTOFLUSH_INTERVAL: Duration = Duration::from_secs(10);
|
||||||
|
|
||||||
pub(super) enum DeleterMessage {
|
pub(super) enum DeleterMessage {
|
||||||
|
|||||||
@@ -10,11 +10,6 @@
|
|||||||
//!
|
//!
|
||||||
//! DeletionLists are passed onwards to the Validator.
|
//! DeletionLists are passed onwards to the Validator.
|
||||||
|
|
||||||
use super::DeletionHeader;
|
|
||||||
use super::DeletionList;
|
|
||||||
use super::FlushOp;
|
|
||||||
use super::ValidatorQueueMessage;
|
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::create_dir_all;
|
use std::fs::create_dir_all;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
@@ -23,20 +18,17 @@ use pageserver_api::shard::TenantShardId;
|
|||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use remote_storage::RemotePath;
|
use remote_storage::RemotePath;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::debug;
|
use tracing::{debug, info, warn};
|
||||||
use tracing::info;
|
|
||||||
use tracing::warn;
|
|
||||||
use utils::generation::Generation;
|
use utils::generation::Generation;
|
||||||
use utils::id::TimelineId;
|
use utils::id::TimelineId;
|
||||||
|
|
||||||
|
use super::{DeletionHeader, DeletionList, FlushOp, ValidatorQueueMessage};
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::deletion_queue::TEMP_SUFFIX;
|
use crate::deletion_queue::TEMP_SUFFIX;
|
||||||
use crate::metrics;
|
use crate::metrics;
|
||||||
use crate::tenant::remote_timeline_client::remote_layer_path;
|
use crate::tenant::remote_timeline_client::{LayerFileMetadata, remote_layer_path};
|
||||||
use crate::tenant::remote_timeline_client::LayerFileMetadata;
|
|
||||||
use crate::tenant::storage_layer::LayerName;
|
use crate::tenant::storage_layer::LayerName;
|
||||||
use crate::virtual_file::on_fatal_io_error;
|
use crate::virtual_file::{MaybeFatalIo, on_fatal_io_error};
|
||||||
use crate::virtual_file::MaybeFatalIo;
|
|
||||||
|
|
||||||
// The number of keys in a DeletionList before we will proactively persist it
|
// The number of keys in a DeletionList before we will proactively persist it
|
||||||
// (without reaching a flush deadline). This aims to deliver objects of the order
|
// (without reaching a flush deadline). This aims to deliver objects of the order
|
||||||
|
|||||||
@@ -20,22 +20,14 @@ use std::time::Duration;
|
|||||||
|
|
||||||
use camino::Utf8PathBuf;
|
use camino::Utf8PathBuf;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::debug;
|
use tracing::{debug, info, warn};
|
||||||
use tracing::info;
|
|
||||||
use tracing::warn;
|
|
||||||
|
|
||||||
use crate::config::PageServerConf;
|
|
||||||
use crate::controller_upcall_client::ControlPlaneGenerationsApi;
|
|
||||||
use crate::controller_upcall_client::RetryForeverError;
|
|
||||||
use crate::metrics;
|
|
||||||
use crate::virtual_file::MaybeFatalIo;
|
|
||||||
|
|
||||||
use super::deleter::DeleterMessage;
|
use super::deleter::DeleterMessage;
|
||||||
use super::DeletionHeader;
|
use super::{DeletionHeader, DeletionList, DeletionQueueError, FlushOp, VisibleLsnUpdates};
|
||||||
use super::DeletionList;
|
use crate::config::PageServerConf;
|
||||||
use super::DeletionQueueError;
|
use crate::controller_upcall_client::{ControlPlaneGenerationsApi, RetryForeverError};
|
||||||
use super::FlushOp;
|
use crate::metrics;
|
||||||
use super::VisibleLsnUpdates;
|
use crate::virtual_file::MaybeFatalIo;
|
||||||
|
|
||||||
// After this length of time, do any validation work that is pending,
|
// After this length of time, do any validation work that is pending,
|
||||||
// even if we haven't accumulated many keys to delete.
|
// even if we haven't accumulated many keys to delete.
|
||||||
@@ -190,7 +182,10 @@ where
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// If we failed validation, then do not apply any of the projected updates
|
// If we failed validation, then do not apply any of the projected updates
|
||||||
info!("Dropped remote consistent LSN updates for tenant {tenant_id} in stale generation {:?}", tenant_lsn_state.generation);
|
info!(
|
||||||
|
"Dropped remote consistent LSN updates for tenant {tenant_id} in stale generation {:?}",
|
||||||
|
tenant_lsn_state.generation
|
||||||
|
);
|
||||||
metrics::DELETION_QUEUE.dropped_lsn_updates.inc();
|
metrics::DELETION_QUEUE.dropped_lsn_updates.inc();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,30 +41,31 @@
|
|||||||
// - The `#[allow(dead_code)]` above various structs are to suppress warnings about only the Debug impl
|
// - The `#[allow(dead_code)]` above various structs are to suppress warnings about only the Debug impl
|
||||||
// reading these fields. We use the Debug impl for semi-structured logging, though.
|
// reading these fields. We use the Debug impl for semi-structured logging, though.
|
||||||
|
|
||||||
use std::{sync::Arc, time::SystemTime};
|
use std::sync::Arc;
|
||||||
|
use std::time::SystemTime;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use pageserver_api::{config::DiskUsageEvictionTaskConfig, shard::TenantShardId};
|
use pageserver_api::config::DiskUsageEvictionTaskConfig;
|
||||||
|
use pageserver_api::shard::TenantShardId;
|
||||||
use remote_storage::GenericRemoteStorage;
|
use remote_storage::GenericRemoteStorage;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::{debug, error, info, instrument, warn, Instrument};
|
use tracing::{Instrument, debug, error, info, instrument, warn};
|
||||||
use utils::{completion, id::TimelineId};
|
use utils::completion;
|
||||||
|
use utils::id::TimelineId;
|
||||||
|
|
||||||
use crate::{
|
use crate::config::PageServerConf;
|
||||||
config::PageServerConf,
|
use crate::metrics::disk_usage_based_eviction::METRICS;
|
||||||
metrics::disk_usage_based_eviction::METRICS,
|
use crate::task_mgr::{self, BACKGROUND_RUNTIME};
|
||||||
task_mgr::{self, BACKGROUND_RUNTIME},
|
use crate::tenant::mgr::TenantManager;
|
||||||
tenant::{
|
use crate::tenant::remote_timeline_client::LayerFileMetadata;
|
||||||
mgr::TenantManager,
|
use crate::tenant::secondary::SecondaryTenant;
|
||||||
remote_timeline_client::LayerFileMetadata,
|
use crate::tenant::storage_layer::{
|
||||||
secondary::SecondaryTenant,
|
AsLayerDesc, EvictionError, Layer, LayerName, LayerVisibilityHint,
|
||||||
storage_layer::{AsLayerDesc, EvictionError, Layer, LayerName, LayerVisibilityHint},
|
|
||||||
tasks::sleep_random,
|
|
||||||
},
|
|
||||||
CancellableTask, DiskUsageEvictionTask,
|
|
||||||
};
|
};
|
||||||
|
use crate::tenant::tasks::sleep_random;
|
||||||
|
use crate::{CancellableTask, DiskUsageEvictionTask};
|
||||||
|
|
||||||
/// Selects the sort order for eviction candidates *after* per tenant `min_resident_size`
|
/// Selects the sort order for eviction candidates *after* per tenant `min_resident_size`
|
||||||
/// partitioning.
|
/// partitioning.
|
||||||
@@ -1007,10 +1008,14 @@ async fn collect_eviction_candidates(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
debug_assert!(EvictionPartition::Above < EvictionPartition::Below,
|
debug_assert!(
|
||||||
"as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first");
|
EvictionPartition::Above < EvictionPartition::Below,
|
||||||
debug_assert!(EvictionPartition::EvictNow < EvictionPartition::Above,
|
"as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first"
|
||||||
"as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first");
|
);
|
||||||
|
debug_assert!(
|
||||||
|
EvictionPartition::EvictNow < EvictionPartition::Above,
|
||||||
|
"as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first"
|
||||||
|
);
|
||||||
|
|
||||||
eviction_order.sort(&mut candidates);
|
eviction_order.sort(&mut candidates);
|
||||||
|
|
||||||
@@ -1157,9 +1162,8 @@ mod filesystem_level_usage {
|
|||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use camino::Utf8Path;
|
use camino::Utf8Path;
|
||||||
|
|
||||||
use crate::statvfs::Statvfs;
|
|
||||||
|
|
||||||
use super::DiskUsageEvictionTaskConfig;
|
use super::DiskUsageEvictionTaskConfig;
|
||||||
|
use crate::statvfs::Statvfs;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub struct Usage<'a> {
|
pub struct Usage<'a> {
|
||||||
@@ -1224,10 +1228,12 @@ mod filesystem_level_usage {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn max_usage_pct_pressure() {
|
fn max_usage_pct_pressure() {
|
||||||
use super::Usage as _;
|
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use utils::serde_percent::Percent;
|
use utils::serde_percent::Percent;
|
||||||
|
|
||||||
|
use super::Usage as _;
|
||||||
|
|
||||||
let mut usage = Usage {
|
let mut usage = Usage {
|
||||||
config: &DiskUsageEvictionTaskConfig {
|
config: &DiskUsageEvictionTaskConfig {
|
||||||
max_usage_pct: Percent::new(85).unwrap(),
|
max_usage_pct: Percent::new(85).unwrap(),
|
||||||
|
|||||||
@@ -2,125 +2,83 @@
|
|||||||
//! Management HTTP API
|
//! Management HTTP API
|
||||||
//!
|
//!
|
||||||
use std::cmp::Reverse;
|
use std::cmp::Reverse;
|
||||||
use std::collections::BinaryHeap;
|
use std::collections::{BinaryHeap, HashMap};
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{Context, Result, anyhow};
|
||||||
use enumset::EnumSet;
|
use enumset::EnumSet;
|
||||||
use futures::future::join_all;
|
use futures::future::join_all;
|
||||||
use futures::StreamExt;
|
use futures::{StreamExt, TryFutureExt};
|
||||||
use futures::TryFutureExt;
|
|
||||||
use http_utils::endpoint::{
|
use http_utils::endpoint::{
|
||||||
profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span,
|
self, attach_openapi_ui, auth_middleware, check_permission_with, profile_cpu_handler,
|
||||||
|
profile_heap_handler, prometheus_metrics_handler, request_span,
|
||||||
};
|
};
|
||||||
|
use http_utils::error::{ApiError, HttpErrorBody};
|
||||||
use http_utils::failpoints::failpoints_handler;
|
use http_utils::failpoints::failpoints_handler;
|
||||||
use http_utils::request::must_parse_query_param;
|
use http_utils::json::{json_request, json_request_maybe, json_response};
|
||||||
use http_utils::request::{get_request_param, must_get_query_param, parse_query_param};
|
use http_utils::request::{
|
||||||
|
get_request_param, must_get_query_param, must_parse_query_param, parse_query_param,
|
||||||
|
parse_request_param,
|
||||||
|
};
|
||||||
|
use http_utils::{RequestExt, RouterBuilder};
|
||||||
use humantime::format_rfc3339;
|
use humantime::format_rfc3339;
|
||||||
use hyper::header;
|
use hyper::{Body, Request, Response, StatusCode, Uri, header};
|
||||||
use hyper::StatusCode;
|
|
||||||
use hyper::{Body, Request, Response, Uri};
|
|
||||||
use metrics::launch_timestamp::LaunchTimestamp;
|
use metrics::launch_timestamp::LaunchTimestamp;
|
||||||
use pageserver_api::models::virtual_file::IoMode;
|
use pageserver_api::models::virtual_file::IoMode;
|
||||||
use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
|
use pageserver_api::models::{
|
||||||
use pageserver_api::models::IngestAuxFilesRequest;
|
DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest, ListAuxFilesRequest,
|
||||||
use pageserver_api::models::ListAuxFilesRequest;
|
LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease, LsnLeaseRequest,
|
||||||
use pageserver_api::models::LocationConfig;
|
OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse,
|
||||||
use pageserver_api::models::LocationConfigListResponse;
|
TenantConfigPatchRequest, TenantConfigRequest, TenantDetails, TenantInfo,
|
||||||
use pageserver_api::models::LocationConfigMode;
|
TenantLocationConfigRequest, TenantLocationConfigResponse, TenantScanRemoteStorageResponse,
|
||||||
use pageserver_api::models::LsnLease;
|
TenantScanRemoteStorageShard, TenantShardLocation, TenantShardSplitRequest,
|
||||||
use pageserver_api::models::LsnLeaseRequest;
|
TenantShardSplitResponse, TenantSorting, TenantState, TenantWaitLsnRequest,
|
||||||
use pageserver_api::models::OffloadedTimelineInfo;
|
TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineCreateRequestMode,
|
||||||
use pageserver_api::models::PageTraceEvent;
|
TimelineCreateRequestModeImportPgdata, TimelineGcRequest, TimelineInfo,
|
||||||
use pageserver_api::models::ShardParameters;
|
TimelinesInfoAndOffloaded, TopTenantShardItem, TopTenantShardsRequest, TopTenantShardsResponse,
|
||||||
use pageserver_api::models::TenantConfigPatchRequest;
|
};
|
||||||
use pageserver_api::models::TenantDetails;
|
use pageserver_api::shard::{ShardCount, TenantShardId};
|
||||||
use pageserver_api::models::TenantLocationConfigRequest;
|
use remote_storage::{DownloadError, GenericRemoteStorage, TimeTravelError};
|
||||||
use pageserver_api::models::TenantLocationConfigResponse;
|
|
||||||
use pageserver_api::models::TenantScanRemoteStorageResponse;
|
|
||||||
use pageserver_api::models::TenantScanRemoteStorageShard;
|
|
||||||
use pageserver_api::models::TenantShardLocation;
|
|
||||||
use pageserver_api::models::TenantShardSplitRequest;
|
|
||||||
use pageserver_api::models::TenantShardSplitResponse;
|
|
||||||
use pageserver_api::models::TenantSorting;
|
|
||||||
use pageserver_api::models::TenantState;
|
|
||||||
use pageserver_api::models::TenantWaitLsnRequest;
|
|
||||||
use pageserver_api::models::TimelineArchivalConfigRequest;
|
|
||||||
use pageserver_api::models::TimelineCreateRequestMode;
|
|
||||||
use pageserver_api::models::TimelineCreateRequestModeImportPgdata;
|
|
||||||
use pageserver_api::models::TimelinesInfoAndOffloaded;
|
|
||||||
use pageserver_api::models::TopTenantShardItem;
|
|
||||||
use pageserver_api::models::TopTenantShardsRequest;
|
|
||||||
use pageserver_api::models::TopTenantShardsResponse;
|
|
||||||
use pageserver_api::shard::ShardCount;
|
|
||||||
use pageserver_api::shard::TenantShardId;
|
|
||||||
use remote_storage::DownloadError;
|
|
||||||
use remote_storage::GenericRemoteStorage;
|
|
||||||
use remote_storage::TimeTravelError;
|
|
||||||
use scopeguard::defer;
|
use scopeguard::defer;
|
||||||
use tenant_size_model::{svg::SvgBranchKind, SizeResult, StorageModel};
|
use tenant_size_model::svg::SvgBranchKind;
|
||||||
|
use tenant_size_model::{SizeResult, StorageModel};
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
use tokio_util::io::StreamReader;
|
use tokio_util::io::StreamReader;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
|
use utils::auth::SwappableJwtAuth;
|
||||||
|
use utils::generation::Generation;
|
||||||
|
use utils::id::{TenantId, TimelineId};
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::context::RequestContextBuilder;
|
use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
|
||||||
use crate::context::{DownloadBehavior, RequestContext};
|
|
||||||
use crate::deletion_queue::DeletionQueueClient;
|
use crate::deletion_queue::DeletionQueueClient;
|
||||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||||
use crate::task_mgr::TaskKind;
|
use crate::task_mgr::TaskKind;
|
||||||
use crate::tenant::config::{LocationConf, TenantConfOpt};
|
use crate::tenant::config::{LocationConf, TenantConfOpt};
|
||||||
use crate::tenant::mgr::GetActiveTenantError;
|
|
||||||
use crate::tenant::mgr::{
|
use crate::tenant::mgr::{
|
||||||
GetTenantError, TenantManager, TenantMapError, TenantMapInsertError, TenantSlotError,
|
GetActiveTenantError, GetTenantError, TenantManager, TenantMapError, TenantMapInsertError,
|
||||||
TenantSlotUpsertError, TenantStateError,
|
TenantSlot, TenantSlotError, TenantSlotUpsertError, TenantStateError, UpsertLocationError,
|
||||||
|
};
|
||||||
|
use crate::tenant::remote_timeline_client::{
|
||||||
|
download_index_part, list_remote_tenant_shards, list_remote_timelines,
|
||||||
};
|
};
|
||||||
use crate::tenant::mgr::{TenantSlot, UpsertLocationError};
|
|
||||||
use crate::tenant::remote_timeline_client;
|
|
||||||
use crate::tenant::remote_timeline_client::download_index_part;
|
|
||||||
use crate::tenant::remote_timeline_client::list_remote_tenant_shards;
|
|
||||||
use crate::tenant::remote_timeline_client::list_remote_timelines;
|
|
||||||
use crate::tenant::secondary::SecondaryController;
|
use crate::tenant::secondary::SecondaryController;
|
||||||
use crate::tenant::size::ModelInputs;
|
use crate::tenant::size::ModelInputs;
|
||||||
use crate::tenant::storage_layer::IoConcurrency;
|
use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName};
|
||||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
use crate::tenant::timeline::offload::{OffloadError, offload_timeline};
|
||||||
use crate::tenant::storage_layer::LayerName;
|
use crate::tenant::timeline::{
|
||||||
use crate::tenant::timeline::import_pgdata;
|
CompactFlags, CompactOptions, CompactRequest, CompactionError, Timeline, WaitLsnTimeout,
|
||||||
use crate::tenant::timeline::offload::offload_timeline;
|
WaitLsnWaiter, import_pgdata,
|
||||||
use crate::tenant::timeline::offload::OffloadError;
|
|
||||||
use crate::tenant::timeline::CompactFlags;
|
|
||||||
use crate::tenant::timeline::CompactOptions;
|
|
||||||
use crate::tenant::timeline::CompactRequest;
|
|
||||||
use crate::tenant::timeline::CompactionError;
|
|
||||||
use crate::tenant::timeline::Timeline;
|
|
||||||
use crate::tenant::timeline::WaitLsnTimeout;
|
|
||||||
use crate::tenant::timeline::WaitLsnWaiter;
|
|
||||||
use crate::tenant::GetTimelineError;
|
|
||||||
use crate::tenant::OffloadedTimeline;
|
|
||||||
use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError};
|
|
||||||
use crate::DEFAULT_PG_VERSION;
|
|
||||||
use crate::{disk_usage_eviction_task, tenant};
|
|
||||||
use http_utils::{
|
|
||||||
endpoint::{self, attach_openapi_ui, auth_middleware, check_permission_with},
|
|
||||||
error::{ApiError, HttpErrorBody},
|
|
||||||
json::{json_request, json_request_maybe, json_response},
|
|
||||||
request::parse_request_param,
|
|
||||||
RequestExt, RouterBuilder,
|
|
||||||
};
|
};
|
||||||
use pageserver_api::models::{
|
use crate::tenant::{
|
||||||
StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest, TimelineGcRequest,
|
GetTimelineError, LogicalSizeCalculationCause, OffloadedTimeline, PageReconstructError,
|
||||||
TimelineInfo,
|
remote_timeline_client,
|
||||||
};
|
|
||||||
use utils::{
|
|
||||||
auth::SwappableJwtAuth,
|
|
||||||
generation::Generation,
|
|
||||||
id::{TenantId, TimelineId},
|
|
||||||
lsn::Lsn,
|
|
||||||
};
|
};
|
||||||
|
use crate::{DEFAULT_PG_VERSION, disk_usage_eviction_task, tenant};
|
||||||
|
|
||||||
// For APIs that require an Active tenant, how long should we block waiting for that state?
|
// For APIs that require an Active tenant, how long should we block waiting for that state?
|
||||||
// This is not functionally necessary (clients will retry), but avoids generating a lot of
|
// This is not functionally necessary (clients will retry), but avoids generating a lot of
|
||||||
@@ -1128,12 +1086,12 @@ async fn tenant_list_handler(
|
|||||||
ApiError::ResourceUnavailable("Tenant map is initializing or shutting down".into())
|
ApiError::ResourceUnavailable("Tenant map is initializing or shutting down".into())
|
||||||
})?
|
})?
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(id, state, gen)| TenantInfo {
|
.map(|(id, state, gen_)| TenantInfo {
|
||||||
id: *id,
|
id: *id,
|
||||||
state: state.clone(),
|
state: state.clone(),
|
||||||
current_physical_size: None,
|
current_physical_size: None,
|
||||||
attachment_status: state.attachment_status(),
|
attachment_status: state.attachment_status(),
|
||||||
generation: (*gen)
|
generation: (*gen_)
|
||||||
.into()
|
.into()
|
||||||
.expect("Tenants are always attached with a generation"),
|
.expect("Tenants are always attached with a generation"),
|
||||||
gc_blocking: None,
|
gc_blocking: None,
|
||||||
@@ -1670,9 +1628,8 @@ async fn block_or_unblock_gc(
|
|||||||
request: Request<Body>,
|
request: Request<Body>,
|
||||||
block: bool,
|
block: bool,
|
||||||
) -> Result<Response<Body>, ApiError> {
|
) -> Result<Response<Body>, ApiError> {
|
||||||
use crate::tenant::{
|
use crate::tenant::remote_timeline_client::WaitCompletionError;
|
||||||
remote_timeline_client::WaitCompletionError, upload_queue::NotInitialized,
|
use crate::tenant::upload_queue::NotInitialized;
|
||||||
};
|
|
||||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||||
@@ -2058,7 +2015,9 @@ async fn tenant_time_travel_remote_storage_handler(
|
|||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
tracing::info!("Issuing time travel request internally. timestamp={timestamp_raw}, done_if_after={done_if_after_raw}");
|
tracing::info!(
|
||||||
|
"Issuing time travel request internally. timestamp={timestamp_raw}, done_if_after={done_if_after_raw}"
|
||||||
|
);
|
||||||
|
|
||||||
remote_timeline_client::upload::time_travel_recover_tenant(
|
remote_timeline_client::upload::time_travel_recover_tenant(
|
||||||
&state.remote_storage,
|
&state.remote_storage,
|
||||||
@@ -2459,9 +2418,10 @@ async fn timeline_detach_ancestor_handler(
|
|||||||
request: Request<Body>,
|
request: Request<Body>,
|
||||||
_cancel: CancellationToken,
|
_cancel: CancellationToken,
|
||||||
) -> Result<Response<Body>, ApiError> {
|
) -> Result<Response<Body>, ApiError> {
|
||||||
use crate::tenant::timeline::detach_ancestor;
|
|
||||||
use pageserver_api::models::detach_ancestor::AncestorDetached;
|
use pageserver_api::models::detach_ancestor::AncestorDetached;
|
||||||
|
|
||||||
|
use crate::tenant::timeline::detach_ancestor;
|
||||||
|
|
||||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||||
@@ -2806,14 +2766,19 @@ async fn tenant_scan_remote_handler(
|
|||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok((index_part, index_generation, _index_mtime)) => {
|
Ok((index_part, index_generation, _index_mtime)) => {
|
||||||
tracing::info!("Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)",
|
tracing::info!(
|
||||||
index_part.layer_metadata.len(), index_part.metadata.disk_consistent_lsn());
|
"Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)",
|
||||||
|
index_part.layer_metadata.len(),
|
||||||
|
index_part.metadata.disk_consistent_lsn()
|
||||||
|
);
|
||||||
generation = std::cmp::max(generation, index_generation);
|
generation = std::cmp::max(generation, index_generation);
|
||||||
}
|
}
|
||||||
Err(DownloadError::NotFound) => {
|
Err(DownloadError::NotFound) => {
|
||||||
// This is normal for tenants that were created with multiple shards: they have an unsharded path
|
// This is normal for tenants that were created with multiple shards: they have an unsharded path
|
||||||
// containing the timeline's initdb tarball but no index. Otherwise it is a bit strange.
|
// containing the timeline's initdb tarball but no index. Otherwise it is a bit strange.
|
||||||
tracing::info!("Timeline path {tenant_shard_id}/{timeline_id} exists in remote storage but has no index, skipping");
|
tracing::info!(
|
||||||
|
"Timeline path {tenant_shard_id}/{timeline_id} exists in remote storage but has no index, skipping"
|
||||||
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
@@ -3432,7 +3397,9 @@ async fn read_tar_eof(mut reader: (impl tokio::io::AsyncRead + Unpin)) -> anyhow
|
|||||||
anyhow::bail!("unexpected non-zero bytes after the tar archive");
|
anyhow::bail!("unexpected non-zero bytes after the tar archive");
|
||||||
}
|
}
|
||||||
if trailing_bytes % 512 != 0 {
|
if trailing_bytes % 512 != 0 {
|
||||||
anyhow::bail!("unexpected number of zeros ({trailing_bytes}), not divisible by tar block size (512 bytes), after the tar archive");
|
anyhow::bail!(
|
||||||
|
"unexpected number of zeros ({trailing_bytes}), not divisible by tar block size (512 bytes), after the tar archive"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,14 +4,22 @@
|
|||||||
//!
|
//!
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use anyhow::{bail, ensure, Context, Result};
|
use anyhow::{Context, Result, bail, ensure};
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use camino::Utf8Path;
|
use camino::Utf8Path;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use pageserver_api::key::rel_block_to_key;
|
use pageserver_api::key::rel_block_to_key;
|
||||||
|
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||||
|
use postgres_ffi::relfile_utils::*;
|
||||||
|
use postgres_ffi::waldecoder::WalStreamDecoder;
|
||||||
|
use postgres_ffi::{
|
||||||
|
BLCKSZ, ControlFileData, DBState_DB_SHUTDOWNED, Oid, WAL_SEGMENT_SIZE, XLogFileName,
|
||||||
|
pg_constants,
|
||||||
|
};
|
||||||
use tokio::io::{AsyncRead, AsyncReadExt};
|
use tokio::io::{AsyncRead, AsyncReadExt};
|
||||||
use tokio_tar::Archive;
|
use tokio_tar::Archive;
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
use wal_decoder::models::InterpretedWalRecord;
|
use wal_decoder::models::InterpretedWalRecord;
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
@@ -20,16 +28,6 @@ use crate::metrics::WAL_INGEST;
|
|||||||
use crate::pgdatadir_mapping::*;
|
use crate::pgdatadir_mapping::*;
|
||||||
use crate::tenant::Timeline;
|
use crate::tenant::Timeline;
|
||||||
use crate::walingest::WalIngest;
|
use crate::walingest::WalIngest;
|
||||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
|
||||||
use postgres_ffi::pg_constants;
|
|
||||||
use postgres_ffi::relfile_utils::*;
|
|
||||||
use postgres_ffi::waldecoder::WalStreamDecoder;
|
|
||||||
use postgres_ffi::ControlFileData;
|
|
||||||
use postgres_ffi::DBState_DB_SHUTDOWNED;
|
|
||||||
use postgres_ffi::Oid;
|
|
||||||
use postgres_ffi::XLogFileName;
|
|
||||||
use postgres_ffi::{BLCKSZ, WAL_SEGMENT_SIZE};
|
|
||||||
use utils::lsn::Lsn;
|
|
||||||
|
|
||||||
// Returns checkpoint LSN from controlfile
|
// Returns checkpoint LSN from controlfile
|
||||||
pub fn get_lsn_from_controlfile(path: &Utf8Path) -> Result<Lsn> {
|
pub fn get_lsn_from_controlfile(path: &Utf8Path) -> Result<Lsn> {
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
use std::{num::NonZeroUsize, sync::Arc};
|
use std::num::NonZeroUsize;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub enum L0FlushConfig {
|
pub enum L0FlushConfig {
|
||||||
|
|||||||
@@ -15,7 +15,8 @@ pub mod l0_flush;
|
|||||||
|
|
||||||
extern crate hyper0 as hyper;
|
extern crate hyper0 as hyper;
|
||||||
|
|
||||||
use futures::{stream::FuturesUnordered, StreamExt};
|
use futures::StreamExt;
|
||||||
|
use futures::stream::FuturesUnordered;
|
||||||
pub use pageserver_api::keyspace;
|
pub use pageserver_api::keyspace;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
mod assert_u64_eq_usize;
|
mod assert_u64_eq_usize;
|
||||||
@@ -35,10 +36,8 @@ pub mod walredo;
|
|||||||
|
|
||||||
use camino::Utf8Path;
|
use camino::Utf8Path;
|
||||||
use deletion_queue::DeletionQueue;
|
use deletion_queue::DeletionQueue;
|
||||||
use tenant::{
|
use tenant::mgr::{BackgroundPurges, TenantManager};
|
||||||
mgr::{BackgroundPurges, TenantManager},
|
use tenant::secondary;
|
||||||
secondary,
|
|
||||||
};
|
|
||||||
use tracing::{info, info_span};
|
use tracing::{info, info_span};
|
||||||
|
|
||||||
/// Current storage format version
|
/// Current storage format version
|
||||||
@@ -350,9 +349,10 @@ async fn timed_after_cancellation<Fut: std::future::Future>(
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod timed_tests {
|
mod timed_tests {
|
||||||
use super::timed;
|
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use super::timed;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn timed_completes_when_inner_future_completes() {
|
async fn timed_completes_when_inner_future_completes() {
|
||||||
// A future that completes on time should have its result returned
|
// A future that completes on time should have its result returned
|
||||||
|
|||||||
@@ -10,11 +10,11 @@ use std::time::{Duration, Instant};
|
|||||||
use enum_map::{Enum as _, EnumMap};
|
use enum_map::{Enum as _, EnumMap};
|
||||||
use futures::Future;
|
use futures::Future;
|
||||||
use metrics::{
|
use metrics::{
|
||||||
|
Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
|
||||||
|
IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
||||||
register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
|
register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
|
||||||
register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
|
register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
|
||||||
register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
|
register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
|
||||||
Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
|
|
||||||
IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
|
||||||
};
|
};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use pageserver_api::config::{
|
use pageserver_api::config::{
|
||||||
@@ -24,9 +24,8 @@ use pageserver_api::config::{
|
|||||||
use pageserver_api::models::InMemoryLayerInfo;
|
use pageserver_api::models::InMemoryLayerInfo;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use pin_project_lite::pin_project;
|
use pin_project_lite::pin_project;
|
||||||
use postgres_backend::{is_expected_io_error, QueryError};
|
use postgres_backend::{QueryError, is_expected_io_error};
|
||||||
use pq_proto::framed::ConnectionError;
|
use pq_proto::framed::ConnectionError;
|
||||||
|
|
||||||
use strum::{EnumCount, IntoEnumIterator as _, VariantNames};
|
use strum::{EnumCount, IntoEnumIterator as _, VariantNames};
|
||||||
use strum_macros::{IntoStaticStr, VariantNames};
|
use strum_macros::{IntoStaticStr, VariantNames};
|
||||||
use utils::id::TimelineId;
|
use utils::id::TimelineId;
|
||||||
@@ -35,12 +34,12 @@ use crate::config::PageServerConf;
|
|||||||
use crate::context::{PageContentKind, RequestContext};
|
use crate::context::{PageContentKind, RequestContext};
|
||||||
use crate::pgdatadir_mapping::DatadirModificationStats;
|
use crate::pgdatadir_mapping::DatadirModificationStats;
|
||||||
use crate::task_mgr::TaskKind;
|
use crate::task_mgr::TaskKind;
|
||||||
|
use crate::tenant::Timeline;
|
||||||
use crate::tenant::layer_map::LayerMap;
|
use crate::tenant::layer_map::LayerMap;
|
||||||
use crate::tenant::mgr::TenantSlot;
|
use crate::tenant::mgr::TenantSlot;
|
||||||
use crate::tenant::storage_layer::{InMemoryLayer, PersistentLayerDesc};
|
use crate::tenant::storage_layer::{InMemoryLayer, PersistentLayerDesc};
|
||||||
use crate::tenant::tasks::BackgroundLoopKind;
|
use crate::tenant::tasks::BackgroundLoopKind;
|
||||||
use crate::tenant::throttle::ThrottleResult;
|
use crate::tenant::throttle::ThrottleResult;
|
||||||
use crate::tenant::Timeline;
|
|
||||||
|
|
||||||
/// Prometheus histogram buckets (in seconds) for operations in the critical
|
/// Prometheus histogram buckets (in seconds) for operations in the critical
|
||||||
/// path. In other words, operations that directly affect that latency of user
|
/// path. In other words, operations that directly affect that latency of user
|
||||||
@@ -363,7 +362,7 @@ pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
|
|||||||
pub(crate) mod page_cache_eviction_metrics {
|
pub(crate) mod page_cache_eviction_metrics {
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
|
use metrics::{IntCounter, IntCounterVec, register_int_counter_vec};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
@@ -722,7 +721,7 @@ pub(crate) static RELSIZE_CACHE_MISSES_OLD: Lazy<IntCounter> = Lazy::new(|| {
|
|||||||
});
|
});
|
||||||
|
|
||||||
pub(crate) mod initial_logical_size {
|
pub(crate) mod initial_logical_size {
|
||||||
use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
|
use metrics::{IntCounter, IntCounterVec, register_int_counter, register_int_counter_vec};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
pub(crate) struct StartCalculation(IntCounterVec);
|
pub(crate) struct StartCalculation(IntCounterVec);
|
||||||
@@ -1105,12 +1104,17 @@ impl EvictionsWithLowResidenceDuration {
|
|||||||
// - future "drop panick => abort"
|
// - future "drop panick => abort"
|
||||||
//
|
//
|
||||||
// so just nag: (the error has the labels)
|
// so just nag: (the error has the labels)
|
||||||
tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
|
tracing::warn!(
|
||||||
|
"failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
// to help identify cases where we double-remove the same values, let's log all
|
// to help identify cases where we double-remove the same values, let's log all
|
||||||
// deletions?
|
// deletions?
|
||||||
tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
|
tracing::info!(
|
||||||
|
"removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}",
|
||||||
|
self.data_source
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3574,12 +3578,10 @@ impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub mod tokio_epoll_uring {
|
pub mod tokio_epoll_uring {
|
||||||
use std::{
|
use std::collections::HashMap;
|
||||||
collections::HashMap,
|
use std::sync::{Arc, Mutex};
|
||||||
sync::{Arc, Mutex},
|
|
||||||
};
|
|
||||||
|
|
||||||
use metrics::{register_histogram, register_int_counter, Histogram, LocalHistogram, UIntGauge};
|
use metrics::{Histogram, LocalHistogram, UIntGauge, register_histogram, register_int_counter};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
/// Shared storage for tokio-epoll-uring thread local metrics.
|
/// Shared storage for tokio-epoll-uring thread local metrics.
|
||||||
@@ -3588,7 +3590,9 @@ pub mod tokio_epoll_uring {
|
|||||||
let slots_submission_queue_depth = register_histogram!(
|
let slots_submission_queue_depth = register_histogram!(
|
||||||
"pageserver_tokio_epoll_uring_slots_submission_queue_depth",
|
"pageserver_tokio_epoll_uring_slots_submission_queue_depth",
|
||||||
"The slots waiters queue depth of each tokio_epoll_uring system",
|
"The slots waiters queue depth of each tokio_epoll_uring system",
|
||||||
vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
|
vec![
|
||||||
|
1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
|
||||||
|
],
|
||||||
)
|
)
|
||||||
.expect("failed to define a metric");
|
.expect("failed to define a metric");
|
||||||
ThreadLocalMetricsStorage {
|
ThreadLocalMetricsStorage {
|
||||||
@@ -3765,7 +3769,7 @@ pub mod tokio_epoll_uring {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) mod tenant_throttling {
|
pub(crate) mod tenant_throttling {
|
||||||
use metrics::{register_int_counter_vec, IntCounter};
|
use metrics::{IntCounter, register_int_counter_vec};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use utils::shard::TenantShardId;
|
use utils::shard::TenantShardId;
|
||||||
|
|
||||||
|
|||||||
@@ -67,23 +67,18 @@
|
|||||||
//! mapping is automatically removed and the slot is marked free.
|
//! mapping is automatically removed and the slot is marked free.
|
||||||
//!
|
//!
|
||||||
|
|
||||||
use std::{
|
use std::collections::HashMap;
|
||||||
collections::{hash_map::Entry, HashMap},
|
use std::collections::hash_map::Entry;
|
||||||
sync::{
|
use std::sync::atomic::{AtomicU8, AtomicU64, AtomicUsize, Ordering};
|
||||||
atomic::{AtomicU64, AtomicU8, AtomicUsize, Ordering},
|
use std::sync::{Arc, Weak};
|
||||||
Arc, Weak,
|
use std::time::Duration;
|
||||||
},
|
|
||||||
time::Duration,
|
|
||||||
};
|
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use once_cell::sync::OnceCell;
|
use once_cell::sync::OnceCell;
|
||||||
|
|
||||||
use crate::{
|
use crate::context::RequestContext;
|
||||||
context::RequestContext,
|
use crate::metrics::{PageCacheSizeMetrics, page_cache_eviction_metrics};
|
||||||
metrics::{page_cache_eviction_metrics, PageCacheSizeMetrics},
|
use crate::virtual_file::{IoBufferMut, IoPageSlice};
|
||||||
virtual_file::{IoBufferMut, IoPageSlice},
|
|
||||||
};
|
|
||||||
|
|
||||||
static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
|
static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
|
||||||
const TEST_PAGE_CACHE_SIZE: usize = 50;
|
const TEST_PAGE_CACHE_SIZE: usize = 50;
|
||||||
@@ -168,11 +163,7 @@ impl Slot {
|
|||||||
let count_res =
|
let count_res =
|
||||||
self.usage_count
|
self.usage_count
|
||||||
.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |val| {
|
.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |val| {
|
||||||
if val == 0 {
|
if val == 0 { None } else { Some(val - 1) }
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(val - 1)
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
match count_res {
|
match count_res {
|
||||||
|
|||||||
@@ -1,7 +1,15 @@
|
|||||||
//! The Page Service listens for client connections and serves their GetPage@LSN
|
//! The Page Service listens for client connections and serves their GetPage@LSN
|
||||||
//! requests.
|
//! requests.
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
use std::borrow::Cow;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
|
use std::os::fd::AsRawFd;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::{Duration, Instant, SystemTime};
|
||||||
|
use std::{io, str};
|
||||||
|
|
||||||
|
use anyhow::{Context, bail};
|
||||||
use async_compression::tokio::write::GzipEncoder;
|
use async_compression::tokio::write::GzipEncoder;
|
||||||
use bytes::Buf;
|
use bytes::Buf;
|
||||||
use futures::FutureExt;
|
use futures::FutureExt;
|
||||||
@@ -11,72 +19,57 @@ use pageserver_api::config::{
|
|||||||
PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
|
PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
|
||||||
PageServiceProtocolPipelinedExecutionStrategy,
|
PageServiceProtocolPipelinedExecutionStrategy,
|
||||||
};
|
};
|
||||||
use pageserver_api::models::{self, TenantState};
|
use pageserver_api::key::rel_block_to_key;
|
||||||
use pageserver_api::models::{
|
use pageserver_api::models::{
|
||||||
PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
|
self, PageTraceEvent, PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
|
||||||
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
|
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
|
||||||
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest,
|
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest,
|
||||||
PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
|
PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
|
||||||
PagestreamProtocolVersion, PagestreamRequest,
|
PagestreamProtocolVersion, PagestreamRequest, TenantState,
|
||||||
};
|
};
|
||||||
|
use pageserver_api::reltag::SlruKind;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use postgres_backend::{
|
use postgres_backend::{
|
||||||
is_expected_io_error, AuthType, PostgresBackend, PostgresBackendReader, QueryError,
|
AuthType, PostgresBackend, PostgresBackendReader, QueryError, is_expected_io_error,
|
||||||
};
|
};
|
||||||
|
use postgres_ffi::BLCKSZ;
|
||||||
|
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
||||||
use pq_proto::framed::ConnectionError;
|
use pq_proto::framed::ConnectionError;
|
||||||
use pq_proto::FeStartupPacket;
|
use pq_proto::{BeMessage, FeMessage, FeStartupPacket, RowDescriptor};
|
||||||
use pq_proto::{BeMessage, FeMessage, RowDescriptor};
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::io;
|
|
||||||
use std::num::NonZeroUsize;
|
|
||||||
use std::str;
|
|
||||||
use std::str::FromStr;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::SystemTime;
|
|
||||||
use std::time::{Duration, Instant};
|
|
||||||
use strum_macros::IntoStaticStr;
|
use strum_macros::IntoStaticStr;
|
||||||
use tokio::io::{AsyncRead, AsyncWrite};
|
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufWriter};
|
||||||
use tokio::io::{AsyncWriteExt, BufWriter};
|
|
||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
|
use utils::auth::{Claims, Scope, SwappableJwtAuth};
|
||||||
|
use utils::failpoint_support;
|
||||||
|
use utils::id::{TenantId, TimelineId};
|
||||||
use utils::logging::log_slow;
|
use utils::logging::log_slow;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
use utils::simple_rcu::RcuReadGuard;
|
||||||
use utils::sync::gate::{Gate, GateGuard};
|
use utils::sync::gate::{Gate, GateGuard};
|
||||||
use utils::sync::spsc_fold;
|
use utils::sync::spsc_fold;
|
||||||
use utils::{
|
|
||||||
auth::{Claims, Scope, SwappableJwtAuth},
|
|
||||||
failpoint_support,
|
|
||||||
id::{TenantId, TimelineId},
|
|
||||||
lsn::Lsn,
|
|
||||||
simple_rcu::RcuReadGuard,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::auth::check_permission;
|
use crate::auth::check_permission;
|
||||||
use crate::basebackup::BasebackupError;
|
use crate::basebackup::BasebackupError;
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::context::{DownloadBehavior, RequestContext};
|
use crate::context::{DownloadBehavior, RequestContext};
|
||||||
use crate::metrics::{self, SmgrOpTimer};
|
use crate::metrics::{
|
||||||
use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS};
|
self, COMPUTE_COMMANDS_COUNTERS, ComputeCommandKind, LIVE_CONNECTIONS, SmgrOpTimer,
|
||||||
|
};
|
||||||
use crate::pgdatadir_mapping::Version;
|
use crate::pgdatadir_mapping::Version;
|
||||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
use crate::span::{
|
||||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
|
debug_assert_current_span_has_tenant_and_timeline_id,
|
||||||
use crate::task_mgr::TaskKind;
|
debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id,
|
||||||
use crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME};
|
};
|
||||||
use crate::tenant::mgr::ShardSelector;
|
use crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME, TaskKind};
|
||||||
use crate::tenant::mgr::TenantManager;
|
use crate::tenant::mgr::{
|
||||||
use crate::tenant::mgr::{GetActiveTenantError, GetTenantError, ShardResolveResult};
|
GetActiveTenantError, GetTenantError, ShardResolveResult, ShardSelector, TenantManager,
|
||||||
|
};
|
||||||
use crate::tenant::storage_layer::IoConcurrency;
|
use crate::tenant::storage_layer::IoConcurrency;
|
||||||
use crate::tenant::timeline::{self, WaitLsnError};
|
use crate::tenant::timeline::{self, WaitLsnError};
|
||||||
use crate::tenant::GetTimelineError;
|
use crate::tenant::{GetTimelineError, PageReconstructError, Timeline};
|
||||||
use crate::tenant::PageReconstructError;
|
|
||||||
use crate::tenant::Timeline;
|
|
||||||
use crate::{basebackup, timed_after_cancellation};
|
use crate::{basebackup, timed_after_cancellation};
|
||||||
use pageserver_api::key::rel_block_to_key;
|
|
||||||
use pageserver_api::models::PageTraceEvent;
|
|
||||||
use pageserver_api::reltag::SlruKind;
|
|
||||||
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
|
||||||
use postgres_ffi::BLCKSZ;
|
|
||||||
use std::os::fd::AsRawFd;
|
|
||||||
|
|
||||||
/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::Tenant`] which
|
/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::Tenant`] which
|
||||||
/// is not yet in state [`TenantState::Active`].
|
/// is not yet in state [`TenantState::Active`].
|
||||||
@@ -986,7 +979,7 @@ impl PageServerHandler {
|
|||||||
Ok(BatchedFeMessage::GetPage {
|
Ok(BatchedFeMessage::GetPage {
|
||||||
span: _,
|
span: _,
|
||||||
shard: accum_shard,
|
shard: accum_shard,
|
||||||
pages: ref mut accum_pages,
|
pages: accum_pages,
|
||||||
effective_request_lsn: accum_lsn,
|
effective_request_lsn: accum_lsn,
|
||||||
}),
|
}),
|
||||||
BatchedFeMessage::GetPage {
|
BatchedFeMessage::GetPage {
|
||||||
@@ -1236,12 +1229,13 @@ impl PageServerHandler {
|
|||||||
} => {
|
} => {
|
||||||
fail::fail_point!("ps::handle-pagerequest-message::exists");
|
fail::fail_point!("ps::handle-pagerequest-message::exists");
|
||||||
(
|
(
|
||||||
vec![self
|
vec![
|
||||||
.handle_get_rel_exists_request(&*shard.upgrade()?, &req, ctx)
|
self.handle_get_rel_exists_request(&*shard.upgrade()?, &req, ctx)
|
||||||
.instrument(span.clone())
|
.instrument(span.clone())
|
||||||
.await
|
.await
|
||||||
.map(|msg| (msg, timer))
|
.map(|msg| (msg, timer))
|
||||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
|
.map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
|
||||||
|
],
|
||||||
span,
|
span,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -1253,12 +1247,13 @@ impl PageServerHandler {
|
|||||||
} => {
|
} => {
|
||||||
fail::fail_point!("ps::handle-pagerequest-message::nblocks");
|
fail::fail_point!("ps::handle-pagerequest-message::nblocks");
|
||||||
(
|
(
|
||||||
vec![self
|
vec![
|
||||||
.handle_get_nblocks_request(&*shard.upgrade()?, &req, ctx)
|
self.handle_get_nblocks_request(&*shard.upgrade()?, &req, ctx)
|
||||||
.instrument(span.clone())
|
.instrument(span.clone())
|
||||||
.await
|
.await
|
||||||
.map(|msg| (msg, timer))
|
.map(|msg| (msg, timer))
|
||||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
|
.map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
|
||||||
|
],
|
||||||
span,
|
span,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -1297,12 +1292,13 @@ impl PageServerHandler {
|
|||||||
} => {
|
} => {
|
||||||
fail::fail_point!("ps::handle-pagerequest-message::dbsize");
|
fail::fail_point!("ps::handle-pagerequest-message::dbsize");
|
||||||
(
|
(
|
||||||
vec![self
|
vec![
|
||||||
.handle_db_size_request(&*shard.upgrade()?, &req, ctx)
|
self.handle_db_size_request(&*shard.upgrade()?, &req, ctx)
|
||||||
.instrument(span.clone())
|
.instrument(span.clone())
|
||||||
.await
|
.await
|
||||||
.map(|msg| (msg, timer))
|
.map(|msg| (msg, timer))
|
||||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
|
.map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
|
||||||
|
],
|
||||||
span,
|
span,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -1314,12 +1310,13 @@ impl PageServerHandler {
|
|||||||
} => {
|
} => {
|
||||||
fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
|
fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
|
||||||
(
|
(
|
||||||
vec![self
|
vec![
|
||||||
.handle_get_slru_segment_request(&*shard.upgrade()?, &req, ctx)
|
self.handle_get_slru_segment_request(&*shard.upgrade()?, &req, ctx)
|
||||||
.instrument(span.clone())
|
.instrument(span.clone())
|
||||||
.await
|
.await
|
||||||
.map(|msg| (msg, timer))
|
.map(|msg| (msg, timer))
|
||||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
|
.map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
|
||||||
|
],
|
||||||
span,
|
span,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -2112,7 +2109,9 @@ impl PageServerHandler {
|
|||||||
set_tracing_field_shard_id(&timeline);
|
set_tracing_field_shard_id(&timeline);
|
||||||
|
|
||||||
if timeline.is_archived() == Some(true) {
|
if timeline.is_archived() == Some(true) {
|
||||||
tracing::info!("timeline {tenant_id}/{timeline_id} is archived, but got basebackup request for it.");
|
tracing::info!(
|
||||||
|
"timeline {tenant_id}/{timeline_id} is archived, but got basebackup request for it."
|
||||||
|
);
|
||||||
return Err(QueryError::NotFound("timeline is archived".into()));
|
return Err(QueryError::NotFound("timeline is archived".into()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,36 @@
|
|||||||
//! walingest.rs handles a few things like implicit relation creation and extension.
|
//! walingest.rs handles a few things like implicit relation creation and extension.
|
||||||
//! Clarify that)
|
//! Clarify that)
|
||||||
//!
|
//!
|
||||||
|
use std::collections::{BTreeMap, HashMap, HashSet, hash_map};
|
||||||
|
use std::ops::{ControlFlow, Range};
|
||||||
|
|
||||||
|
use anyhow::{Context, ensure};
|
||||||
|
use bytes::{Buf, Bytes, BytesMut};
|
||||||
|
use enum_map::Enum;
|
||||||
|
use itertools::Itertools;
|
||||||
|
use pageserver_api::key::{
|
||||||
|
AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, CompactKey, DBDIR_KEY, Key, RelDirExists,
|
||||||
|
TWOPHASEDIR_KEY, dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range,
|
||||||
|
rel_size_to_key, rel_tag_sparse_key, rel_tag_sparse_key_range, relmap_file_key,
|
||||||
|
repl_origin_key, repl_origin_key_range, slru_block_to_key, slru_dir_to_key,
|
||||||
|
slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range,
|
||||||
|
};
|
||||||
|
use pageserver_api::keyspace::SparseKeySpace;
|
||||||
|
use pageserver_api::record::NeonWalRecord;
|
||||||
|
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
||||||
|
use pageserver_api::shard::ShardIdentity;
|
||||||
|
use pageserver_api::value::Value;
|
||||||
|
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||||
|
use postgres_ffi::{BLCKSZ, Oid, RepOriginId, TimestampTz, TransactionId};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use strum::IntoEnumIterator;
|
||||||
|
use tokio_util::sync::CancellationToken;
|
||||||
|
use tracing::{debug, info, trace, warn};
|
||||||
|
use utils::bin_ser::{BeSer, DeserializeError};
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
use utils::pausable_failpoint;
|
||||||
|
use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
|
||||||
|
|
||||||
use super::tenant::{PageReconstructError, Timeline};
|
use super::tenant::{PageReconstructError, Timeline};
|
||||||
use crate::aux_file;
|
use crate::aux_file;
|
||||||
use crate::context::RequestContext;
|
use crate::context::RequestContext;
|
||||||
@@ -19,37 +49,6 @@ use crate::span::{
|
|||||||
};
|
};
|
||||||
use crate::tenant::storage_layer::IoConcurrency;
|
use crate::tenant::storage_layer::IoConcurrency;
|
||||||
use crate::tenant::timeline::GetVectoredError;
|
use crate::tenant::timeline::GetVectoredError;
|
||||||
use anyhow::{ensure, Context};
|
|
||||||
use bytes::{Buf, Bytes, BytesMut};
|
|
||||||
use enum_map::Enum;
|
|
||||||
use itertools::Itertools;
|
|
||||||
use pageserver_api::key::{
|
|
||||||
dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, rel_size_to_key,
|
|
||||||
rel_tag_sparse_key_range, relmap_file_key, repl_origin_key, repl_origin_key_range,
|
|
||||||
slru_block_to_key, slru_dir_to_key, slru_segment_key_range, slru_segment_size_to_key,
|
|
||||||
twophase_file_key, twophase_key_range, CompactKey, RelDirExists, AUX_FILES_KEY, CHECKPOINT_KEY,
|
|
||||||
CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
|
|
||||||
};
|
|
||||||
use pageserver_api::key::{rel_tag_sparse_key, Key};
|
|
||||||
use pageserver_api::keyspace::SparseKeySpace;
|
|
||||||
use pageserver_api::record::NeonWalRecord;
|
|
||||||
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
|
||||||
use pageserver_api::shard::ShardIdentity;
|
|
||||||
use pageserver_api::value::Value;
|
|
||||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
|
||||||
use postgres_ffi::BLCKSZ;
|
|
||||||
use postgres_ffi::{Oid, RepOriginId, TimestampTz, TransactionId};
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use std::collections::{hash_map, BTreeMap, HashMap, HashSet};
|
|
||||||
use std::ops::ControlFlow;
|
|
||||||
use std::ops::Range;
|
|
||||||
use strum::IntoEnumIterator;
|
|
||||||
use tokio_util::sync::CancellationToken;
|
|
||||||
use tracing::{debug, info, trace, warn};
|
|
||||||
use utils::bin_ser::DeserializeError;
|
|
||||||
use utils::pausable_failpoint;
|
|
||||||
use utils::{bin_ser::BeSer, lsn::Lsn};
|
|
||||||
use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
|
|
||||||
|
|
||||||
/// Max delta records appended to the AUX_FILES_KEY (for aux v1). The write path will write a full image once this threshold is reached.
|
/// Max delta records appended to the AUX_FILES_KEY (for aux v1). The write path will write a full image once this threshold is reached.
|
||||||
pub const MAX_AUX_FILE_DELTAS: usize = 1024;
|
pub const MAX_AUX_FILE_DELTAS: usize = 1024;
|
||||||
@@ -327,16 +326,16 @@ impl Timeline {
|
|||||||
let clone = match &res {
|
let clone = match &res {
|
||||||
Ok(buf) => Ok(buf.clone()),
|
Ok(buf) => Ok(buf.clone()),
|
||||||
Err(err) => Err(match err {
|
Err(err) => Err(match err {
|
||||||
PageReconstructError::Cancelled => {
|
PageReconstructError::Cancelled => PageReconstructError::Cancelled,
|
||||||
PageReconstructError::Cancelled
|
|
||||||
}
|
|
||||||
|
|
||||||
x @ PageReconstructError::Other(_) |
|
x @ PageReconstructError::Other(_)
|
||||||
x @ PageReconstructError::AncestorLsnTimeout(_) |
|
| x @ PageReconstructError::AncestorLsnTimeout(_)
|
||||||
x @ PageReconstructError::WalRedo(_) |
|
| x @ PageReconstructError::WalRedo(_)
|
||||||
x @ PageReconstructError::MissingKey(_) => {
|
| x @ PageReconstructError::MissingKey(_) => {
|
||||||
PageReconstructError::Other(anyhow::anyhow!("there was more than one request for this key in the batch, error logged once: {x:?}"))
|
PageReconstructError::Other(anyhow::anyhow!(
|
||||||
},
|
"there was more than one request for this key in the batch, error logged once: {x:?}"
|
||||||
|
))
|
||||||
|
}
|
||||||
}),
|
}),
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -355,23 +354,23 @@ impl Timeline {
|
|||||||
// this whole `match` is a lot like `From<GetVectoredError> for PageReconstructError`
|
// this whole `match` is a lot like `From<GetVectoredError> for PageReconstructError`
|
||||||
// but without taking ownership of the GetVectoredError
|
// but without taking ownership of the GetVectoredError
|
||||||
let err = match &err {
|
let err = match &err {
|
||||||
GetVectoredError::Cancelled => {
|
GetVectoredError::Cancelled => Err(PageReconstructError::Cancelled),
|
||||||
Err(PageReconstructError::Cancelled)
|
|
||||||
}
|
|
||||||
// TODO: restructure get_vectored API to make this error per-key
|
// TODO: restructure get_vectored API to make this error per-key
|
||||||
GetVectoredError::MissingKey(err) => {
|
GetVectoredError::MissingKey(err) => {
|
||||||
Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more of the requested keys were missing: {err:?}")))
|
Err(PageReconstructError::Other(anyhow::anyhow!(
|
||||||
|
"whole vectored get request failed because one or more of the requested keys were missing: {err:?}"
|
||||||
|
)))
|
||||||
}
|
}
|
||||||
// TODO: restructure get_vectored API to make this error per-key
|
// TODO: restructure get_vectored API to make this error per-key
|
||||||
GetVectoredError::GetReadyAncestorError(err) => {
|
GetVectoredError::GetReadyAncestorError(err) => {
|
||||||
Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}")))
|
Err(PageReconstructError::Other(anyhow::anyhow!(
|
||||||
|
"whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}"
|
||||||
|
)))
|
||||||
}
|
}
|
||||||
// TODO: restructure get_vectored API to make this error per-key
|
// TODO: restructure get_vectored API to make this error per-key
|
||||||
GetVectoredError::Other(err) => {
|
GetVectoredError::Other(err) => Err(PageReconstructError::Other(
|
||||||
Err(PageReconstructError::Other(
|
anyhow::anyhow!("whole vectored get request failed: {err:?}"),
|
||||||
anyhow::anyhow!("whole vectored get request failed: {err:?}"),
|
)),
|
||||||
))
|
|
||||||
}
|
|
||||||
// TODO: we can prevent this error class by moving this check into the type system
|
// TODO: we can prevent this error class by moving this check into the type system
|
||||||
GetVectoredError::InvalidLsn(e) => {
|
GetVectoredError::InvalidLsn(e) => {
|
||||||
Err(anyhow::anyhow!("invalid LSN: {e:?}").into())
|
Err(anyhow::anyhow!("invalid LSN: {e:?}").into())
|
||||||
@@ -379,10 +378,7 @@ impl Timeline {
|
|||||||
// NB: this should never happen in practice because we limit MAX_GET_VECTORED_KEYS
|
// NB: this should never happen in practice because we limit MAX_GET_VECTORED_KEYS
|
||||||
// TODO: we can prevent this error class by moving this check into the type system
|
// TODO: we can prevent this error class by moving this check into the type system
|
||||||
GetVectoredError::Oversized(err) => {
|
GetVectoredError::Oversized(err) => {
|
||||||
Err(anyhow::anyhow!(
|
Err(anyhow::anyhow!("batching oversized: {err:?}").into())
|
||||||
"batching oversized: {err:?}"
|
|
||||||
)
|
|
||||||
.into())
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -715,7 +711,10 @@ impl Timeline {
|
|||||||
{
|
{
|
||||||
Ok(res) => res,
|
Ok(res) => res,
|
||||||
Err(PageReconstructError::MissingKey(e)) => {
|
Err(PageReconstructError::MissingKey(e)) => {
|
||||||
warn!("Missing key while find_lsn_for_timestamp. Either we might have already garbage-collected that data or the key is really missing. Last error: {:#}", e);
|
warn!(
|
||||||
|
"Missing key while find_lsn_for_timestamp. Either we might have already garbage-collected that data or the key is really missing. Last error: {:#}",
|
||||||
|
e
|
||||||
|
);
|
||||||
// Return that we didn't find any requests smaller than the LSN, and logging the error.
|
// Return that we didn't find any requests smaller than the LSN, and logging the error.
|
||||||
return Ok(LsnForTimestamp::Past(min_lsn));
|
return Ok(LsnForTimestamp::Past(min_lsn));
|
||||||
}
|
}
|
||||||
@@ -2464,10 +2463,12 @@ impl DatadirModification<'_> {
|
|||||||
// modifications before ingesting DB create operations, which are the only kind that reads
|
// modifications before ingesting DB create operations, which are the only kind that reads
|
||||||
// data pages during ingest.
|
// data pages during ingest.
|
||||||
if cfg!(debug_assertions) {
|
if cfg!(debug_assertions) {
|
||||||
assert!(!self
|
assert!(
|
||||||
.pending_data_batch
|
!self
|
||||||
.as_ref()
|
.pending_data_batch
|
||||||
.is_some_and(|b| b.updates_key(&key)));
|
.as_ref()
|
||||||
|
.is_some_and(|b| b.updates_key(&key))
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2666,15 +2667,14 @@ static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use hex_literal::hex;
|
use hex_literal::hex;
|
||||||
use pageserver_api::{models::ShardParameters, shard::ShardStripeSize};
|
use pageserver_api::models::ShardParameters;
|
||||||
use utils::{
|
use pageserver_api::shard::ShardStripeSize;
|
||||||
id::TimelineId,
|
use utils::id::TimelineId;
|
||||||
shard::{ShardCount, ShardNumber},
|
use utils::shard::{ShardCount, ShardNumber};
|
||||||
};
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::DEFAULT_PG_VERSION;
|
||||||
use crate::{tenant::harness::TenantHarness, DEFAULT_PG_VERSION};
|
use crate::tenant::harness::TenantHarness;
|
||||||
|
|
||||||
/// Test a round trip of aux file updates, from DatadirModification to reading back from the Timeline
|
/// Test a round trip of aux file updates, from DatadirModification to reading back from the Timeline
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
|
|||||||
@@ -73,11 +73,10 @@ impl Statvfs {
|
|||||||
|
|
||||||
pub mod mock {
|
pub mod mock {
|
||||||
use camino::Utf8Path;
|
use camino::Utf8Path;
|
||||||
|
pub use pageserver_api::config::statvfs::mock::Behavior;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use tracing::log::info;
|
use tracing::log::info;
|
||||||
|
|
||||||
pub use pageserver_api::config::statvfs::mock::Behavior;
|
|
||||||
|
|
||||||
pub fn get(tenants_dir: &Utf8Path, behavior: &Behavior) -> nix::Result<Statvfs> {
|
pub fn get(tenants_dir: &Utf8Path, behavior: &Behavior) -> nix::Result<Statvfs> {
|
||||||
info!("running mocked statvfs");
|
info!("running mocked statvfs");
|
||||||
|
|
||||||
@@ -85,7 +84,7 @@ pub mod mock {
|
|||||||
Behavior::Success {
|
Behavior::Success {
|
||||||
blocksize,
|
blocksize,
|
||||||
total_blocks,
|
total_blocks,
|
||||||
ref name_filter,
|
name_filter,
|
||||||
} => {
|
} => {
|
||||||
let used_bytes = walk_dir_disk_usage(tenants_dir, name_filter.as_deref()).unwrap();
|
let used_bytes = walk_dir_disk_usage(tenants_dir, name_filter.as_deref()).unwrap();
|
||||||
|
|
||||||
@@ -134,7 +133,7 @@ pub mod mock {
|
|||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
return Err(anyhow::Error::new(e)
|
return Err(anyhow::Error::new(e)
|
||||||
.context(format!("get metadata of {:?}", entry.path())))
|
.context(format!("get metadata of {:?}", entry.path())));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
total += m.len();
|
total += m.len();
|
||||||
|
|||||||
@@ -40,15 +40,12 @@ use std::sync::atomic::{AtomicU64, Ordering};
|
|||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
use futures::FutureExt;
|
use futures::FutureExt;
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
use tokio::task_local;
|
use tokio::task_local;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
|
|
||||||
use tracing::{debug, error, info, warn};
|
use tracing::{debug, error, info, warn};
|
||||||
|
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
|
|
||||||
use utils::env;
|
use utils::env;
|
||||||
use utils::id::TimelineId;
|
use utils::id::TimelineId;
|
||||||
|
|
||||||
|
|||||||
@@ -12,150 +12,99 @@
|
|||||||
//! parent timeline, and the last LSN that has been written to disk.
|
//! parent timeline, and the last LSN that has been written to disk.
|
||||||
//!
|
//!
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
use std::collections::hash_map::Entry;
|
||||||
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||||
|
use std::fmt::{Debug, Display};
|
||||||
|
use std::fs::File;
|
||||||
|
use std::future::Future;
|
||||||
|
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||||
|
use std::sync::{Arc, Mutex, Weak};
|
||||||
|
use std::time::{Duration, Instant, SystemTime};
|
||||||
|
use std::{fmt, fs};
|
||||||
|
|
||||||
|
use anyhow::{Context, bail};
|
||||||
use arc_swap::ArcSwap;
|
use arc_swap::ArcSwap;
|
||||||
use camino::Utf8Path;
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
use camino::Utf8PathBuf;
|
|
||||||
use chrono::NaiveDateTime;
|
use chrono::NaiveDateTime;
|
||||||
use enumset::EnumSet;
|
use enumset::EnumSet;
|
||||||
use futures::stream::FuturesUnordered;
|
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
|
use futures::stream::FuturesUnordered;
|
||||||
use itertools::Itertools as _;
|
use itertools::Itertools as _;
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
use pageserver_api::models;
|
use pageserver_api::models;
|
||||||
use pageserver_api::models::CompactInfoResponse;
|
pub use pageserver_api::models::TenantState;
|
||||||
use pageserver_api::models::LsnLease;
|
use pageserver_api::models::{
|
||||||
use pageserver_api::models::TimelineArchivalState;
|
CompactInfoResponse, LsnLease, TimelineArchivalState, TimelineState, TopTenantShardItem,
|
||||||
use pageserver_api::models::TimelineState;
|
WalRedoManagerStatus,
|
||||||
use pageserver_api::models::TopTenantShardItem;
|
};
|
||||||
use pageserver_api::models::WalRedoManagerStatus;
|
use pageserver_api::shard::{ShardIdentity, ShardStripeSize, TenantShardId};
|
||||||
use pageserver_api::shard::ShardIdentity;
|
use remote_storage::{DownloadError, GenericRemoteStorage, TimeoutOrCancel};
|
||||||
use pageserver_api::shard::ShardStripeSize;
|
|
||||||
use pageserver_api::shard::TenantShardId;
|
|
||||||
use remote_storage::DownloadError;
|
|
||||||
use remote_storage::GenericRemoteStorage;
|
|
||||||
use remote_storage::TimeoutOrCancel;
|
|
||||||
use remote_timeline_client::index::GcCompactionState;
|
use remote_timeline_client::index::GcCompactionState;
|
||||||
use remote_timeline_client::manifest::{
|
use remote_timeline_client::manifest::{
|
||||||
OffloadedTimelineManifest, TenantManifest, LATEST_TENANT_MANIFEST_VERSION,
|
LATEST_TENANT_MANIFEST_VERSION, OffloadedTimelineManifest, TenantManifest,
|
||||||
};
|
};
|
||||||
use remote_timeline_client::UploadQueueNotReadyError;
|
use remote_timeline_client::{
|
||||||
use remote_timeline_client::FAILED_REMOTE_OP_RETRIES;
|
FAILED_REMOTE_OP_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD, UploadQueueNotReadyError,
|
||||||
use remote_timeline_client::FAILED_UPLOAD_WARN_THRESHOLD;
|
};
|
||||||
use secondary::heatmap::HeatMapTenant;
|
use secondary::heatmap::{HeatMapTenant, HeatMapTimeline};
|
||||||
use secondary::heatmap::HeatMapTimeline;
|
|
||||||
use std::collections::BTreeMap;
|
|
||||||
use std::fmt;
|
|
||||||
use std::future::Future;
|
|
||||||
use std::sync::atomic::AtomicBool;
|
|
||||||
use std::sync::Weak;
|
|
||||||
use std::time::SystemTime;
|
|
||||||
use storage_broker::BrokerClientChannel;
|
use storage_broker::BrokerClientChannel;
|
||||||
use timeline::compaction::CompactionOutcome;
|
use timeline::compaction::{CompactionOutcome, GcCompactionQueue};
|
||||||
use timeline::compaction::GcCompactionQueue;
|
use timeline::offload::{OffloadError, offload_timeline};
|
||||||
use timeline::import_pgdata;
|
use timeline::{
|
||||||
use timeline::offload::offload_timeline;
|
CompactFlags, CompactOptions, CompactionError, PreviousHeatmap, ShutdownMode, import_pgdata,
|
||||||
use timeline::offload::OffloadError;
|
};
|
||||||
use timeline::CompactFlags;
|
|
||||||
use timeline::CompactOptions;
|
|
||||||
use timeline::CompactionError;
|
|
||||||
use timeline::PreviousHeatmap;
|
|
||||||
use timeline::ShutdownMode;
|
|
||||||
use tokio::io::BufReader;
|
use tokio::io::BufReader;
|
||||||
use tokio::sync::watch;
|
use tokio::sync::{Notify, Semaphore, watch};
|
||||||
use tokio::sync::Notify;
|
|
||||||
use tokio::task::JoinSet;
|
use tokio::task::JoinSet;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
use upload_queue::NotInitialized;
|
use upload_queue::NotInitialized;
|
||||||
use utils::backoff;
|
|
||||||
use utils::circuit_breaker::CircuitBreaker;
|
use utils::circuit_breaker::CircuitBreaker;
|
||||||
use utils::completion;
|
|
||||||
use utils::crashsafe::path_with_suffix_extension;
|
use utils::crashsafe::path_with_suffix_extension;
|
||||||
use utils::failpoint_support;
|
use utils::sync::gate::{Gate, GateGuard};
|
||||||
use utils::fs_ext;
|
use utils::timeout::{TimeoutCancellableError, timeout_cancellable};
|
||||||
use utils::pausable_failpoint;
|
|
||||||
use utils::sync::gate::Gate;
|
|
||||||
use utils::sync::gate::GateGuard;
|
|
||||||
use utils::timeout::timeout_cancellable;
|
|
||||||
use utils::timeout::TimeoutCancellableError;
|
|
||||||
use utils::try_rcu::ArcSwapExt;
|
use utils::try_rcu::ArcSwapExt;
|
||||||
use utils::zstd::create_zst_tarball;
|
use utils::zstd::{create_zst_tarball, extract_zst_tarball};
|
||||||
use utils::zstd::extract_zst_tarball;
|
use utils::{backoff, completion, failpoint_support, fs_ext, pausable_failpoint};
|
||||||
|
|
||||||
use self::config::AttachedLocationConfig;
|
use self::config::{AttachedLocationConfig, AttachmentMode, LocationConf, TenantConf};
|
||||||
use self::config::AttachmentMode;
|
|
||||||
use self::config::LocationConf;
|
|
||||||
use self::config::TenantConf;
|
|
||||||
use self::metadata::TimelineMetadata;
|
use self::metadata::TimelineMetadata;
|
||||||
use self::mgr::GetActiveTenantError;
|
use self::mgr::{GetActiveTenantError, GetTenantError};
|
||||||
use self::mgr::GetTenantError;
|
|
||||||
use self::remote_timeline_client::upload::{upload_index_part, upload_tenant_manifest};
|
use self::remote_timeline_client::upload::{upload_index_part, upload_tenant_manifest};
|
||||||
use self::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError};
|
use self::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError};
|
||||||
use self::timeline::uninit::TimelineCreateGuard;
|
use self::timeline::uninit::{TimelineCreateGuard, TimelineExclusionError, UninitializedTimeline};
|
||||||
use self::timeline::uninit::TimelineExclusionError;
|
use self::timeline::{
|
||||||
use self::timeline::uninit::UninitializedTimeline;
|
EvictionTaskTenantState, GcCutoffs, TimelineDeleteProgress, TimelineResources, WaitLsnError,
|
||||||
use self::timeline::EvictionTaskTenantState;
|
};
|
||||||
use self::timeline::GcCutoffs;
|
|
||||||
use self::timeline::TimelineDeleteProgress;
|
|
||||||
use self::timeline::TimelineResources;
|
|
||||||
use self::timeline::WaitLsnError;
|
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::context::{DownloadBehavior, RequestContext};
|
use crate::context::{DownloadBehavior, RequestContext};
|
||||||
use crate::deletion_queue::DeletionQueueClient;
|
use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
|
||||||
use crate::deletion_queue::DeletionQueueError;
|
|
||||||
use crate::import_datadir;
|
|
||||||
use crate::l0_flush::L0FlushGlobalState;
|
use crate::l0_flush::L0FlushGlobalState;
|
||||||
use crate::metrics::CONCURRENT_INITDBS;
|
|
||||||
use crate::metrics::INITDB_RUN_TIME;
|
|
||||||
use crate::metrics::INITDB_SEMAPHORE_ACQUISITION_TIME;
|
|
||||||
use crate::metrics::TENANT;
|
|
||||||
use crate::metrics::{
|
use crate::metrics::{
|
||||||
remove_tenant_metrics, BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN,
|
BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN, CONCURRENT_INITDBS,
|
||||||
TENANT_STATE_METRIC, TENANT_SYNTHETIC_SIZE_METRIC,
|
INITDB_RUN_TIME, INITDB_SEMAPHORE_ACQUISITION_TIME, TENANT, TENANT_STATE_METRIC,
|
||||||
|
TENANT_SYNTHETIC_SIZE_METRIC, remove_tenant_metrics,
|
||||||
};
|
};
|
||||||
use crate::task_mgr;
|
|
||||||
use crate::task_mgr::TaskKind;
|
use crate::task_mgr::TaskKind;
|
||||||
use crate::tenant::config::LocationMode;
|
use crate::tenant::config::{LocationMode, TenantConfOpt};
|
||||||
use crate::tenant::config::TenantConfOpt;
|
|
||||||
use crate::tenant::gc_result::GcResult;
|
use crate::tenant::gc_result::GcResult;
|
||||||
pub use crate::tenant::remote_timeline_client::index::IndexPart;
|
pub use crate::tenant::remote_timeline_client::index::IndexPart;
|
||||||
use crate::tenant::remote_timeline_client::remote_initdb_archive_path;
|
use crate::tenant::remote_timeline_client::{
|
||||||
use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart;
|
INITDB_PATH, MaybeDeletedIndexPart, remote_initdb_archive_path,
|
||||||
use crate::tenant::remote_timeline_client::INITDB_PATH;
|
};
|
||||||
use crate::tenant::storage_layer::DeltaLayer;
|
use crate::tenant::storage_layer::{DeltaLayer, ImageLayer};
|
||||||
use crate::tenant::storage_layer::ImageLayer;
|
|
||||||
use crate::walingest::WalLagCooldown;
|
|
||||||
use crate::walredo;
|
|
||||||
use crate::InitializationOrder;
|
|
||||||
use std::collections::hash_map::Entry;
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::collections::HashSet;
|
|
||||||
use std::fmt::Debug;
|
|
||||||
use std::fmt::Display;
|
|
||||||
use std::fs;
|
|
||||||
use std::fs::File;
|
|
||||||
use std::sync::atomic::{AtomicU64, Ordering};
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::sync::Mutex;
|
|
||||||
use std::time::{Duration, Instant};
|
|
||||||
|
|
||||||
use crate::span;
|
|
||||||
use crate::tenant::timeline::delete::DeleteTimelineFlow;
|
use crate::tenant::timeline::delete::DeleteTimelineFlow;
|
||||||
use crate::tenant::timeline::uninit::cleanup_timeline_directory;
|
use crate::tenant::timeline::uninit::cleanup_timeline_directory;
|
||||||
use crate::virtual_file::VirtualFile;
|
use crate::virtual_file::VirtualFile;
|
||||||
|
use crate::walingest::WalLagCooldown;
|
||||||
use crate::walredo::PostgresRedoManager;
|
use crate::walredo::PostgresRedoManager;
|
||||||
use crate::TEMP_FILE_SUFFIX;
|
use crate::{InitializationOrder, TEMP_FILE_SUFFIX, import_datadir, span, task_mgr, walredo};
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
pub use pageserver_api::models::TenantState;
|
|
||||||
use tokio::sync::Semaphore;
|
|
||||||
|
|
||||||
static INIT_DB_SEMAPHORE: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(8));
|
static INIT_DB_SEMAPHORE: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(8));
|
||||||
use utils::{
|
use utils::crashsafe;
|
||||||
crashsafe,
|
use utils::generation::Generation;
|
||||||
generation::Generation,
|
use utils::id::TimelineId;
|
||||||
id::TimelineId,
|
use utils::lsn::{Lsn, RecordLsn};
|
||||||
lsn::{Lsn, RecordLsn},
|
|
||||||
};
|
|
||||||
|
|
||||||
pub mod blob_io;
|
pub mod blob_io;
|
||||||
pub mod block_io;
|
pub mod block_io;
|
||||||
@@ -184,9 +133,9 @@ mod gc_block;
|
|||||||
mod gc_result;
|
mod gc_result;
|
||||||
pub(crate) mod throttle;
|
pub(crate) mod throttle;
|
||||||
|
|
||||||
pub(crate) use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
|
||||||
pub(crate) use timeline::{LogicalSizeCalculationCause, PageReconstructError, Timeline};
|
pub(crate) use timeline::{LogicalSizeCalculationCause, PageReconstructError, Timeline};
|
||||||
|
|
||||||
|
pub(crate) use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||||
// re-export for use in walreceiver
|
// re-export for use in walreceiver
|
||||||
pub use crate::tenant::timeline::WalReceiverInfo;
|
pub use crate::tenant::timeline::WalReceiverInfo;
|
||||||
|
|
||||||
@@ -251,7 +200,9 @@ impl AttachedTenantConf {
|
|||||||
Ok(Self::new(location_conf.tenant_conf, *attach_conf))
|
Ok(Self::new(location_conf.tenant_conf, *attach_conf))
|
||||||
}
|
}
|
||||||
LocationMode::Secondary(_) => {
|
LocationMode::Secondary(_) => {
|
||||||
anyhow::bail!("Attempted to construct AttachedTenantConf from a LocationConf in secondary mode")
|
anyhow::bail!(
|
||||||
|
"Attempted to construct AttachedTenantConf from a LocationConf in secondary mode"
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -465,7 +416,9 @@ impl WalredoManagerId {
|
|||||||
static NEXT: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1);
|
static NEXT: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1);
|
||||||
let id = NEXT.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
let id = NEXT.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||||
if id == 0 {
|
if id == 0 {
|
||||||
panic!("WalredoManagerId::new() returned 0, indicating wraparound, risking it's no longer unique");
|
panic!(
|
||||||
|
"WalredoManagerId::new() returned 0, indicating wraparound, risking it's no longer unique"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
Self(id)
|
Self(id)
|
||||||
}
|
}
|
||||||
@@ -1229,7 +1182,9 @@ impl Tenant {
|
|||||||
match cause {
|
match cause {
|
||||||
LoadTimelineCause::Attach | LoadTimelineCause::Unoffload => (),
|
LoadTimelineCause::Attach | LoadTimelineCause::Unoffload => (),
|
||||||
LoadTimelineCause::ImportPgdata { .. } => {
|
LoadTimelineCause::ImportPgdata { .. } => {
|
||||||
unreachable!("ImportPgdata should not be reloading timeline import is done and persisted as such in s3")
|
unreachable!(
|
||||||
|
"ImportPgdata should not be reloading timeline import is done and persisted as such in s3"
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut guard = self.timelines_creating.lock().unwrap();
|
let mut guard = self.timelines_creating.lock().unwrap();
|
||||||
@@ -1262,8 +1217,8 @@ impl Tenant {
|
|||||||
// We should never try and load the same timeline twice during startup
|
// We should never try and load the same timeline twice during startup
|
||||||
Entry::Occupied(_) => {
|
Entry::Occupied(_) => {
|
||||||
unreachable!(
|
unreachable!(
|
||||||
"Timeline {tenant_id}/{timeline_id} already exists in the tenant map"
|
"Timeline {tenant_id}/{timeline_id} already exists in the tenant map"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Entry::Vacant(v) => {
|
Entry::Vacant(v) => {
|
||||||
v.insert(Arc::clone(&timeline));
|
v.insert(Arc::clone(&timeline));
|
||||||
@@ -1657,7 +1612,9 @@ impl Tenant {
|
|||||||
failpoint_support::sleep_millis_async!("before-attaching-tenant");
|
failpoint_support::sleep_millis_async!("before-attaching-tenant");
|
||||||
|
|
||||||
let Some(preload) = preload else {
|
let Some(preload) = preload else {
|
||||||
anyhow::bail!("local-only deployment is no longer supported, https://github.com/neondatabase/neon/issues/5624");
|
anyhow::bail!(
|
||||||
|
"local-only deployment is no longer supported, https://github.com/neondatabase/neon/issues/5624"
|
||||||
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut offloaded_timeline_ids = HashSet::new();
|
let mut offloaded_timeline_ids = HashSet::new();
|
||||||
@@ -2041,7 +1998,7 @@ impl Tenant {
|
|||||||
remote_storage: GenericRemoteStorage,
|
remote_storage: GenericRemoteStorage,
|
||||||
previous_heatmap: Option<PreviousHeatmap>,
|
previous_heatmap: Option<PreviousHeatmap>,
|
||||||
cancel: CancellationToken,
|
cancel: CancellationToken,
|
||||||
) -> impl Future<Output = TimelinePreload> {
|
) -> impl Future<Output = TimelinePreload> + use<> {
|
||||||
let client = self.build_timeline_client(timeline_id, remote_storage);
|
let client = self.build_timeline_client(timeline_id, remote_storage);
|
||||||
async move {
|
async move {
|
||||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||||
@@ -2736,7 +2693,9 @@ impl Tenant {
|
|||||||
timeline
|
timeline
|
||||||
}
|
}
|
||||||
CreateTimelineResult::ImportSpawned(timeline) => {
|
CreateTimelineResult::ImportSpawned(timeline) => {
|
||||||
info!("import task spawned, timeline will become visible and activated once the import is done");
|
info!(
|
||||||
|
"import task spawned, timeline will become visible and activated once the import is done"
|
||||||
|
);
|
||||||
timeline
|
timeline
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -2782,7 +2741,7 @@ impl Tenant {
|
|||||||
{
|
{
|
||||||
StartCreatingTimelineResult::CreateGuard(guard) => guard,
|
StartCreatingTimelineResult::CreateGuard(guard) => guard,
|
||||||
StartCreatingTimelineResult::Idempotent(timeline) => {
|
StartCreatingTimelineResult::Idempotent(timeline) => {
|
||||||
return Ok(CreateTimelineResult::Idempotent(timeline))
|
return Ok(CreateTimelineResult::Idempotent(timeline));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -2916,7 +2875,9 @@ impl Tenant {
|
|||||||
let index_part = match index_part {
|
let index_part = match index_part {
|
||||||
MaybeDeletedIndexPart::Deleted(_) => {
|
MaybeDeletedIndexPart::Deleted(_) => {
|
||||||
// likely concurrent delete call, cplane should prevent this
|
// likely concurrent delete call, cplane should prevent this
|
||||||
anyhow::bail!("index part says deleted but we are not done creating yet, this should not happen but")
|
anyhow::bail!(
|
||||||
|
"index part says deleted but we are not done creating yet, this should not happen but"
|
||||||
|
)
|
||||||
}
|
}
|
||||||
MaybeDeletedIndexPart::IndexPart(p) => p,
|
MaybeDeletedIndexPart::IndexPart(p) => p,
|
||||||
};
|
};
|
||||||
@@ -3907,7 +3868,9 @@ where
|
|||||||
if !later.is_empty() {
|
if !later.is_empty() {
|
||||||
for (missing_id, orphan_ids) in later {
|
for (missing_id, orphan_ids) in later {
|
||||||
for (orphan_id, _) in orphan_ids {
|
for (orphan_id, _) in orphan_ids {
|
||||||
error!("could not load timeline {orphan_id} because its ancestor timeline {missing_id} could not be loaded");
|
error!(
|
||||||
|
"could not load timeline {orphan_id} because its ancestor timeline {missing_id} could not be loaded"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bail!("could not load tenant because some timelines are missing ancestors");
|
bail!("could not load tenant because some timelines are missing ancestors");
|
||||||
@@ -4827,7 +4790,10 @@ impl Tenant {
|
|||||||
let gc_info = src_timeline.gc_info.read().unwrap();
|
let gc_info = src_timeline.gc_info.read().unwrap();
|
||||||
let planned_cutoff = gc_info.min_cutoff();
|
let planned_cutoff = gc_info.min_cutoff();
|
||||||
if gc_info.lsn_covered_by_lease(start_lsn) {
|
if gc_info.lsn_covered_by_lease(start_lsn) {
|
||||||
tracing::info!("skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease", *applied_gc_cutoff_lsn);
|
tracing::info!(
|
||||||
|
"skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease",
|
||||||
|
*applied_gc_cutoff_lsn
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
src_timeline
|
src_timeline
|
||||||
.check_lsn_is_in_scope(start_lsn, &applied_gc_cutoff_lsn)
|
.check_lsn_is_in_scope(start_lsn, &applied_gc_cutoff_lsn)
|
||||||
@@ -4973,7 +4939,9 @@ impl Tenant {
|
|||||||
}
|
}
|
||||||
// Idempotent <=> CreateTimelineIdempotency is identical
|
// Idempotent <=> CreateTimelineIdempotency is identical
|
||||||
(x, y) if x == y => {
|
(x, y) if x == y => {
|
||||||
info!("timeline already exists and idempotency matches, succeeding request");
|
info!(
|
||||||
|
"timeline already exists and idempotency matches, succeeding request"
|
||||||
|
);
|
||||||
// fallthrough
|
// fallthrough
|
||||||
}
|
}
|
||||||
(_, _) => {
|
(_, _) => {
|
||||||
@@ -5055,7 +5023,7 @@ impl Tenant {
|
|||||||
{
|
{
|
||||||
StartCreatingTimelineResult::CreateGuard(guard) => guard,
|
StartCreatingTimelineResult::CreateGuard(guard) => guard,
|
||||||
StartCreatingTimelineResult::Idempotent(timeline) => {
|
StartCreatingTimelineResult::Idempotent(timeline) => {
|
||||||
return Ok(CreateTimelineResult::Idempotent(timeline))
|
return Ok(CreateTimelineResult::Idempotent(timeline));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -5260,7 +5228,9 @@ impl Tenant {
|
|||||||
.create_timeline_files(&create_guard.timeline_path)
|
.create_timeline_files(&create_guard.timeline_path)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
error!("Failed to create initial files for timeline {tenant_shard_id}/{new_timeline_id}, cleaning up: {e:?}");
|
error!(
|
||||||
|
"Failed to create initial files for timeline {tenant_shard_id}/{new_timeline_id}, cleaning up: {e:?}"
|
||||||
|
);
|
||||||
cleanup_timeline_directory(create_guard);
|
cleanup_timeline_directory(create_guard);
|
||||||
return Err(e);
|
return Err(e);
|
||||||
}
|
}
|
||||||
@@ -5625,20 +5595,19 @@ pub async fn dump_layerfile_from_path(
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod harness {
|
pub(crate) mod harness {
|
||||||
use bytes::{Bytes, BytesMut};
|
use bytes::{Bytes, BytesMut};
|
||||||
|
use hex_literal::hex;
|
||||||
use once_cell::sync::OnceCell;
|
use once_cell::sync::OnceCell;
|
||||||
|
use pageserver_api::key::Key;
|
||||||
use pageserver_api::models::ShardParameters;
|
use pageserver_api::models::ShardParameters;
|
||||||
|
use pageserver_api::record::NeonWalRecord;
|
||||||
use pageserver_api::shard::ShardIndex;
|
use pageserver_api::shard::ShardIndex;
|
||||||
|
use utils::id::TenantId;
|
||||||
use utils::logging;
|
use utils::logging;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
use crate::deletion_queue::mock::MockDeletionQueue;
|
use crate::deletion_queue::mock::MockDeletionQueue;
|
||||||
use crate::l0_flush::L0FlushConfig;
|
use crate::l0_flush::L0FlushConfig;
|
||||||
use crate::walredo::apply_neon;
|
use crate::walredo::apply_neon;
|
||||||
use pageserver_api::key::Key;
|
|
||||||
use pageserver_api::record::NeonWalRecord;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
use hex_literal::hex;
|
|
||||||
use utils::id::TenantId;
|
|
||||||
|
|
||||||
pub const TIMELINE_ID: TimelineId =
|
pub const TIMELINE_ID: TimelineId =
|
||||||
TimelineId::from_array(hex!("11223344556677881122334455667788"));
|
TimelineId::from_array(hex!("11223344556677881122334455667788"));
|
||||||
@@ -5919,34 +5888,34 @@ pub(crate) mod harness {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::collections::{BTreeMap, BTreeSet};
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
|
||||||
use super::*;
|
|
||||||
use crate::keyspace::KeySpaceAccum;
|
|
||||||
use crate::tenant::harness::*;
|
|
||||||
use crate::tenant::timeline::CompactFlags;
|
|
||||||
use crate::DEFAULT_PG_VERSION;
|
|
||||||
use bytes::{Bytes, BytesMut};
|
use bytes::{Bytes, BytesMut};
|
||||||
use hex_literal::hex;
|
use hex_literal::hex;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use pageserver_api::key::{Key, AUX_KEY_PREFIX, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX};
|
#[cfg(feature = "testing")]
|
||||||
|
use models::CompactLsnRange;
|
||||||
|
use pageserver_api::key::{AUX_KEY_PREFIX, Key, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX};
|
||||||
use pageserver_api::keyspace::KeySpace;
|
use pageserver_api::keyspace::KeySpace;
|
||||||
use pageserver_api::models::{CompactionAlgorithm, CompactionAlgorithmSettings};
|
use pageserver_api::models::{CompactionAlgorithm, CompactionAlgorithmSettings};
|
||||||
|
#[cfg(feature = "testing")]
|
||||||
|
use pageserver_api::record::NeonWalRecord;
|
||||||
use pageserver_api::value::Value;
|
use pageserver_api::value::Value;
|
||||||
use pageserver_compaction::helpers::overlaps_with;
|
use pageserver_compaction::helpers::overlaps_with;
|
||||||
use rand::{thread_rng, Rng};
|
use rand::{Rng, thread_rng};
|
||||||
use storage_layer::{IoConcurrency, PersistentLayerKey};
|
use storage_layer::{IoConcurrency, PersistentLayerKey};
|
||||||
use tests::storage_layer::ValuesReconstructState;
|
use tests::storage_layer::ValuesReconstructState;
|
||||||
use tests::timeline::{GetVectoredError, ShutdownMode};
|
use tests::timeline::{GetVectoredError, ShutdownMode};
|
||||||
|
#[cfg(feature = "testing")]
|
||||||
|
use timeline::GcInfo;
|
||||||
|
#[cfg(feature = "testing")]
|
||||||
|
use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
|
||||||
use timeline::{CompactOptions, DeltaLayerTestDesc};
|
use timeline::{CompactOptions, DeltaLayerTestDesc};
|
||||||
use utils::id::TenantId;
|
use utils::id::TenantId;
|
||||||
|
|
||||||
#[cfg(feature = "testing")]
|
use super::*;
|
||||||
use models::CompactLsnRange;
|
use crate::DEFAULT_PG_VERSION;
|
||||||
#[cfg(feature = "testing")]
|
use crate::keyspace::KeySpaceAccum;
|
||||||
use pageserver_api::record::NeonWalRecord;
|
use crate::tenant::harness::*;
|
||||||
#[cfg(feature = "testing")]
|
use crate::tenant::timeline::CompactFlags;
|
||||||
use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
|
|
||||||
#[cfg(feature = "testing")]
|
|
||||||
use timeline::GcInfo;
|
|
||||||
|
|
||||||
static TEST_KEY: Lazy<Key> =
|
static TEST_KEY: Lazy<Key> =
|
||||||
Lazy::new(|| Key::from_slice(&hex!("010000000033333333444444445500000001")));
|
Lazy::new(|| Key::from_slice(&hex!("010000000033333333444444445500000001")));
|
||||||
@@ -6196,11 +6165,12 @@ mod tests {
|
|||||||
panic!("wrong error type")
|
panic!("wrong error type")
|
||||||
};
|
};
|
||||||
assert!(err.to_string().contains("invalid branch start lsn"));
|
assert!(err.to_string().contains("invalid branch start lsn"));
|
||||||
assert!(err
|
assert!(
|
||||||
.source()
|
err.source()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.to_string()
|
.to_string()
|
||||||
.contains("we might've already garbage collected needed data"))
|
.contains("we might've already garbage collected needed data")
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -6229,11 +6199,12 @@ mod tests {
|
|||||||
panic!("wrong error type");
|
panic!("wrong error type");
|
||||||
};
|
};
|
||||||
assert!(&err.to_string().contains("invalid branch start lsn"));
|
assert!(&err.to_string().contains("invalid branch start lsn"));
|
||||||
assert!(&err
|
assert!(
|
||||||
.source()
|
&err.source()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.to_string()
|
.to_string()
|
||||||
.contains("is earlier than latest GC cutoff"));
|
.contains("is earlier than latest GC cutoff")
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -7542,10 +7513,12 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert!(!harness
|
assert!(
|
||||||
.conf
|
!harness
|
||||||
.timeline_path(&tenant.tenant_shard_id, &TIMELINE_ID)
|
.conf
|
||||||
.exists());
|
.timeline_path(&tenant.tenant_shard_id, &TIMELINE_ID)
|
||||||
|
.exists()
|
||||||
|
);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -7746,7 +7719,10 @@ mod tests {
|
|||||||
|
|
||||||
let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len();
|
let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len();
|
||||||
|
|
||||||
assert!(after_num_l0_delta_files < before_num_l0_delta_files, "after_num_l0_delta_files={after_num_l0_delta_files}, before_num_l0_delta_files={before_num_l0_delta_files}");
|
assert!(
|
||||||
|
after_num_l0_delta_files < before_num_l0_delta_files,
|
||||||
|
"after_num_l0_delta_files={after_num_l0_delta_files}, before_num_l0_delta_files={before_num_l0_delta_files}"
|
||||||
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
tline.get(test_key, lsn, &ctx).await?,
|
tline.get(test_key, lsn, &ctx).await?,
|
||||||
@@ -7913,7 +7889,10 @@ mod tests {
|
|||||||
let (_, after_delta_file_accessed) =
|
let (_, after_delta_file_accessed) =
|
||||||
scan_with_statistics(&tline, &keyspace, lsn, &ctx, io_concurrency.clone())
|
scan_with_statistics(&tline, &keyspace, lsn, &ctx, io_concurrency.clone())
|
||||||
.await?;
|
.await?;
|
||||||
assert!(after_delta_file_accessed < before_delta_file_accessed, "after_delta_file_accessed={after_delta_file_accessed}, before_delta_file_accessed={before_delta_file_accessed}");
|
assert!(
|
||||||
|
after_delta_file_accessed < before_delta_file_accessed,
|
||||||
|
"after_delta_file_accessed={after_delta_file_accessed}, before_delta_file_accessed={before_delta_file_accessed}"
|
||||||
|
);
|
||||||
// Given that we already produced an image layer, there should be no delta layer needed for the scan, but still setting a low threshold there for unforeseen circumstances.
|
// Given that we already produced an image layer, there should be no delta layer needed for the scan, but still setting a low threshold there for unforeseen circumstances.
|
||||||
assert!(
|
assert!(
|
||||||
after_delta_file_accessed <= 2,
|
after_delta_file_accessed <= 2,
|
||||||
@@ -7967,10 +7946,12 @@ mod tests {
|
|||||||
get_vectored_impl_wrapper(&tline, base_key, lsn, &ctx).await?,
|
get_vectored_impl_wrapper(&tline, base_key, lsn, &ctx).await?,
|
||||||
Some(test_img("data key 1"))
|
Some(test_img("data key 1"))
|
||||||
);
|
);
|
||||||
assert!(get_vectored_impl_wrapper(&tline, base_key_child, lsn, &ctx)
|
assert!(
|
||||||
.await
|
get_vectored_impl_wrapper(&tline, base_key_child, lsn, &ctx)
|
||||||
.unwrap_err()
|
.await
|
||||||
.is_missing_key_error());
|
.unwrap_err()
|
||||||
|
.is_missing_key_error()
|
||||||
|
);
|
||||||
assert!(
|
assert!(
|
||||||
get_vectored_impl_wrapper(&tline, base_key_nonexist, lsn, &ctx)
|
get_vectored_impl_wrapper(&tline, base_key_nonexist, lsn, &ctx)
|
||||||
.await
|
.await
|
||||||
|
|||||||
@@ -14,6 +14,9 @@
|
|||||||
//! len < 128: 0XXXXXXX
|
//! len < 128: 0XXXXXXX
|
||||||
//! len >= 128: 1CCCXXXX XXXXXXXX XXXXXXXX XXXXXXXX
|
//! len >= 128: 1CCCXXXX XXXXXXXX XXXXXXXX XXXXXXXX
|
||||||
//!
|
//!
|
||||||
|
use std::cmp::min;
|
||||||
|
use std::io::{Error, ErrorKind};
|
||||||
|
|
||||||
use async_compression::Level;
|
use async_compression::Level;
|
||||||
use bytes::{BufMut, BytesMut};
|
use bytes::{BufMut, BytesMut};
|
||||||
use pageserver_api::models::ImageCompressionAlgorithm;
|
use pageserver_api::models::ImageCompressionAlgorithm;
|
||||||
@@ -24,10 +27,8 @@ use tracing::warn;
|
|||||||
use crate::context::RequestContext;
|
use crate::context::RequestContext;
|
||||||
use crate::page_cache::PAGE_SZ;
|
use crate::page_cache::PAGE_SZ;
|
||||||
use crate::tenant::block_io::BlockCursor;
|
use crate::tenant::block_io::BlockCursor;
|
||||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
|
|
||||||
use crate::virtual_file::VirtualFile;
|
use crate::virtual_file::VirtualFile;
|
||||||
use std::cmp::min;
|
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
|
||||||
use std::io::{Error, ErrorKind};
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
#[derive(Copy, Clone, Debug)]
|
||||||
pub struct CompressionInfo {
|
pub struct CompressionInfo {
|
||||||
@@ -414,12 +415,15 @@ impl BlobWriter<false> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod tests {
|
pub(crate) mod tests {
|
||||||
use super::*;
|
|
||||||
use crate::{context::DownloadBehavior, task_mgr::TaskKind, tenant::block_io::BlockReaderRef};
|
|
||||||
use camino::Utf8PathBuf;
|
use camino::Utf8PathBuf;
|
||||||
use camino_tempfile::Utf8TempDir;
|
use camino_tempfile::Utf8TempDir;
|
||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::context::DownloadBehavior;
|
||||||
|
use crate::task_mgr::TaskKind;
|
||||||
|
use crate::tenant::block_io::BlockReaderRef;
|
||||||
|
|
||||||
async fn round_trip_test<const BUFFERED: bool>(blobs: &[Vec<u8>]) -> Result<(), Error> {
|
async fn round_trip_test<const BUFFERED: bool>(blobs: &[Vec<u8>]) -> Result<(), Error> {
|
||||||
round_trip_test_compressed::<BUFFERED>(blobs, false).await
|
round_trip_test_compressed::<BUFFERED>(blobs, false).await
|
||||||
}
|
}
|
||||||
@@ -486,7 +490,7 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
pub(crate) fn random_array(len: usize) -> Vec<u8> {
|
pub(crate) fn random_array(len: usize) -> Vec<u8> {
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
(0..len).map(|_| rng.gen()).collect::<_>()
|
(0..len).map(|_| rng.r#gen()).collect::<_>()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -544,9 +548,9 @@ pub(crate) mod tests {
|
|||||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||||
let blobs = (0..1024)
|
let blobs = (0..1024)
|
||||||
.map(|_| {
|
.map(|_| {
|
||||||
let mut sz: u16 = rng.gen();
|
let mut sz: u16 = rng.r#gen();
|
||||||
// Make 50% of the arrays small
|
// Make 50% of the arrays small
|
||||||
if rng.gen() {
|
if rng.r#gen() {
|
||||||
sz &= 63;
|
sz &= 63;
|
||||||
}
|
}
|
||||||
random_array(sz.into())
|
random_array(sz.into())
|
||||||
|
|||||||
@@ -2,14 +2,16 @@
|
|||||||
//! Low-level Block-oriented I/O functions
|
//! Low-level Block-oriented I/O functions
|
||||||
//!
|
//!
|
||||||
|
|
||||||
|
use std::ops::Deref;
|
||||||
|
|
||||||
|
use bytes::Bytes;
|
||||||
|
|
||||||
use super::storage_layer::delta_layer::{Adapter, DeltaLayerInner};
|
use super::storage_layer::delta_layer::{Adapter, DeltaLayerInner};
|
||||||
use crate::context::RequestContext;
|
use crate::context::RequestContext;
|
||||||
use crate::page_cache::{self, FileId, PageReadGuard, PageWriteGuard, ReadBufResult, PAGE_SZ};
|
use crate::page_cache::{self, FileId, PAGE_SZ, PageReadGuard, PageWriteGuard, ReadBufResult};
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use crate::virtual_file::IoBufferMut;
|
use crate::virtual_file::IoBufferMut;
|
||||||
use crate::virtual_file::VirtualFile;
|
use crate::virtual_file::VirtualFile;
|
||||||
use bytes::Bytes;
|
|
||||||
use std::ops::Deref;
|
|
||||||
|
|
||||||
/// This is implemented by anything that can read 8 kB (PAGE_SZ)
|
/// This is implemented by anything that can read 8 kB (PAGE_SZ)
|
||||||
/// blocks, using the page cache
|
/// blocks, using the page cache
|
||||||
|
|||||||
@@ -63,9 +63,9 @@ pub fn check_valid_layermap(metadata: &[LayerName]) -> Option<String> {
|
|||||||
&& overlaps_with(&layer.key_range, &other_layer.key_range)
|
&& overlaps_with(&layer.key_range, &other_layer.key_range)
|
||||||
{
|
{
|
||||||
let err = format!(
|
let err = format!(
|
||||||
"layer violates the layer map LSN split assumption: layer {} intersects with layer {}",
|
"layer violates the layer map LSN split assumption: layer {} intersects with layer {}",
|
||||||
layer, other_layer
|
layer, other_layer
|
||||||
);
|
);
|
||||||
return Some(err);
|
return Some(err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,16 +8,17 @@
|
|||||||
//! We cannot use global or default config instead, because wrong settings
|
//! We cannot use global or default config instead, because wrong settings
|
||||||
//! may lead to a data loss.
|
//! may lead to a data loss.
|
||||||
//!
|
//!
|
||||||
|
use std::num::NonZeroU64;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
pub(crate) use pageserver_api::config::TenantConfigToml as TenantConf;
|
pub(crate) use pageserver_api::config::TenantConfigToml as TenantConf;
|
||||||
use pageserver_api::models::CompactionAlgorithmSettings;
|
use pageserver_api::models::{
|
||||||
use pageserver_api::models::EvictionPolicy;
|
self, CompactionAlgorithmSettings, EvictionPolicy, TenantConfigPatch,
|
||||||
use pageserver_api::models::{self, TenantConfigPatch};
|
};
|
||||||
use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
|
use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
|
||||||
use serde::de::IntoDeserializer;
|
use serde::de::IntoDeserializer;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use std::num::NonZeroU64;
|
|
||||||
use std::time::Duration;
|
|
||||||
use utils::generation::Generation;
|
use utils::generation::Generation;
|
||||||
use utils::postgres_client::PostgresClientProtocol;
|
use utils::postgres_client::PostgresClientProtocol;
|
||||||
|
|
||||||
@@ -739,9 +740,10 @@ impl From<TenantConfOpt> for models::TenantConfig {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
|
||||||
use models::TenantConfig;
|
use models::TenantConfig;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn de_serializing_pageserver_config_omits_empty_values() {
|
fn de_serializing_pageserver_config_omits_empty_values() {
|
||||||
let small_conf = TenantConfOpt {
|
let small_conf = TenantConfOpt {
|
||||||
|
|||||||
@@ -18,27 +18,23 @@
|
|||||||
//! - An Iterator interface would be more convenient for the callers than the
|
//! - An Iterator interface would be more convenient for the callers than the
|
||||||
//! 'visit' function
|
//! 'visit' function
|
||||||
//!
|
//!
|
||||||
|
use std::cmp::Ordering;
|
||||||
|
use std::iter::Rev;
|
||||||
|
use std::ops::{Range, RangeInclusive};
|
||||||
|
use std::{io, result};
|
||||||
|
|
||||||
use async_stream::try_stream;
|
use async_stream::try_stream;
|
||||||
use byteorder::{ReadBytesExt, BE};
|
use byteorder::{BE, ReadBytesExt};
|
||||||
use bytes::{BufMut, Bytes, BytesMut};
|
use bytes::{BufMut, Bytes, BytesMut};
|
||||||
use either::Either;
|
use either::Either;
|
||||||
use futures::{Stream, StreamExt};
|
use futures::{Stream, StreamExt};
|
||||||
use hex;
|
use hex;
|
||||||
use std::{
|
|
||||||
cmp::Ordering,
|
|
||||||
io,
|
|
||||||
iter::Rev,
|
|
||||||
ops::{Range, RangeInclusive},
|
|
||||||
result,
|
|
||||||
};
|
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tracing::error;
|
use tracing::error;
|
||||||
|
|
||||||
use crate::{
|
use crate::context::{DownloadBehavior, RequestContext};
|
||||||
context::{DownloadBehavior, RequestContext},
|
use crate::task_mgr::TaskKind;
|
||||||
task_mgr::TaskKind,
|
use crate::tenant::block_io::{BlockReader, BlockWriter};
|
||||||
tenant::block_io::{BlockReader, BlockWriter},
|
|
||||||
};
|
|
||||||
|
|
||||||
// The maximum size of a value stored in the B-tree. 5 bytes is enough currently.
|
// The maximum size of a value stored in the B-tree. 5 bytes is enough currently.
|
||||||
pub const VALUE_SZ: usize = 5;
|
pub const VALUE_SZ: usize = 5;
|
||||||
@@ -833,12 +829,14 @@ impl<const L: usize> BuildNode<L> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod tests {
|
pub(crate) mod tests {
|
||||||
use super::*;
|
|
||||||
use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReaderRef};
|
|
||||||
use rand::Rng;
|
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
|
|
||||||
|
use rand::Rng;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReaderRef};
|
||||||
|
|
||||||
#[derive(Clone, Default)]
|
#[derive(Clone, Default)]
|
||||||
pub(crate) struct TestDisk {
|
pub(crate) struct TestDisk {
|
||||||
blocks: Vec<Bytes>,
|
blocks: Vec<Bytes>,
|
||||||
@@ -1115,7 +1113,7 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
// Test get() operations on random keys, most of which will not exist
|
// Test get() operations on random keys, most of which will not exist
|
||||||
for _ in 0..100000 {
|
for _ in 0..100000 {
|
||||||
let key_int = rand::thread_rng().gen::<u128>();
|
let key_int = rand::thread_rng().r#gen::<u128>();
|
||||||
let search_key = u128::to_be_bytes(key_int);
|
let search_key = u128::to_be_bytes(key_int);
|
||||||
assert!(reader.get(&search_key, &ctx).await? == all_data.get(&key_int).cloned());
|
assert!(reader.get(&search_key, &ctx).await? == all_data.get(&key_int).cloned());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,17 @@
|
|||||||
//! Implementation of append-only file data structure
|
//! Implementation of append-only file data structure
|
||||||
//! used to keep in-memory layers spilled on disk.
|
//! used to keep in-memory layers spilled on disk.
|
||||||
|
|
||||||
|
use std::io;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::sync::atomic::AtomicU64;
|
||||||
|
|
||||||
|
use camino::Utf8PathBuf;
|
||||||
|
use num_traits::Num;
|
||||||
|
use pageserver_api::shard::TenantShardId;
|
||||||
|
use tokio_epoll_uring::{BoundedBuf, Slice};
|
||||||
|
use tracing::error;
|
||||||
|
use utils::id::TimelineId;
|
||||||
|
|
||||||
use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};
|
use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::context::RequestContext;
|
use crate::context::RequestContext;
|
||||||
@@ -9,17 +20,7 @@ use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File;
|
|||||||
use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
|
use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
|
||||||
use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
|
use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
|
||||||
use crate::virtual_file::owned_buffers_io::write::Buffer;
|
use crate::virtual_file::owned_buffers_io::write::Buffer;
|
||||||
use crate::virtual_file::{self, owned_buffers_io, IoBufferMut, VirtualFile};
|
use crate::virtual_file::{self, IoBufferMut, VirtualFile, owned_buffers_io};
|
||||||
use camino::Utf8PathBuf;
|
|
||||||
use num_traits::Num;
|
|
||||||
use pageserver_api::shard::TenantShardId;
|
|
||||||
use tokio_epoll_uring::{BoundedBuf, Slice};
|
|
||||||
use tracing::error;
|
|
||||||
|
|
||||||
use std::io;
|
|
||||||
use std::sync::atomic::AtomicU64;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use utils::id::TimelineId;
|
|
||||||
|
|
||||||
pub struct EphemeralFile {
|
pub struct EphemeralFile {
|
||||||
_tenant_shard_id: TenantShardId,
|
_tenant_shard_id: TenantShardId,
|
||||||
@@ -319,13 +320,14 @@ pub fn is_ephemeral_file(filename: &str) -> bool {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use std::fs;
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::context::DownloadBehavior;
|
use crate::context::DownloadBehavior;
|
||||||
use crate::task_mgr::TaskKind;
|
use crate::task_mgr::TaskKind;
|
||||||
use std::fs;
|
|
||||||
use std::str::FromStr;
|
|
||||||
|
|
||||||
fn harness(
|
fn harness(
|
||||||
test_name: &str,
|
test_name: &str,
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
use std::{collections::HashMap, sync::Arc};
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use utils::id::TimelineId;
|
use utils::id::TimelineId;
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
use anyhow::Result;
|
|
||||||
use serde::Serialize;
|
|
||||||
use std::ops::AddAssign;
|
use std::ops::AddAssign;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Result of performing GC
|
/// Result of performing GC
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -46,24 +46,24 @@
|
|||||||
mod historic_layer_coverage;
|
mod historic_layer_coverage;
|
||||||
mod layer_coverage;
|
mod layer_coverage;
|
||||||
|
|
||||||
use crate::context::RequestContext;
|
|
||||||
use crate::keyspace::KeyPartitioning;
|
|
||||||
use crate::tenant::storage_layer::InMemoryLayer;
|
|
||||||
use anyhow::Result;
|
|
||||||
use pageserver_api::key::Key;
|
|
||||||
use pageserver_api::keyspace::{KeySpace, KeySpaceAccum};
|
|
||||||
use range_set_blaze::{CheckSortedDisjoint, RangeSetBlaze};
|
|
||||||
use std::collections::{HashMap, VecDeque};
|
use std::collections::{HashMap, VecDeque};
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use historic_layer_coverage::BufferedHistoricLayerCoverage;
|
||||||
|
pub use historic_layer_coverage::LayerKey;
|
||||||
|
use pageserver_api::key::Key;
|
||||||
|
use pageserver_api::keyspace::{KeySpace, KeySpaceAccum};
|
||||||
|
use range_set_blaze::{CheckSortedDisjoint, RangeSetBlaze};
|
||||||
use tokio::sync::watch;
|
use tokio::sync::watch;
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use historic_layer_coverage::BufferedHistoricLayerCoverage;
|
|
||||||
pub use historic_layer_coverage::LayerKey;
|
|
||||||
|
|
||||||
use super::storage_layer::{LayerVisibilityHint, PersistentLayerDesc};
|
use super::storage_layer::{LayerVisibilityHint, PersistentLayerDesc};
|
||||||
|
use crate::context::RequestContext;
|
||||||
|
use crate::keyspace::KeyPartitioning;
|
||||||
|
use crate::tenant::storage_layer::InMemoryLayer;
|
||||||
|
|
||||||
///
|
///
|
||||||
/// LayerMap tracks what layers exist on a timeline.
|
/// LayerMap tracks what layers exist on a timeline.
|
||||||
@@ -1066,18 +1066,17 @@ impl LayerMap {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::tenant::{storage_layer::LayerName, IndexPart};
|
use std::collections::HashMap;
|
||||||
use pageserver_api::{
|
use std::path::PathBuf;
|
||||||
key::DBDIR_KEY,
|
|
||||||
keyspace::{KeySpace, KeySpaceRandomAccum},
|
use pageserver_api::key::DBDIR_KEY;
|
||||||
};
|
use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
|
||||||
use std::{collections::HashMap, path::PathBuf};
|
use utils::id::{TenantId, TimelineId};
|
||||||
use utils::{
|
use utils::shard::TenantShardId;
|
||||||
id::{TenantId, TimelineId},
|
|
||||||
shard::TenantShardId,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::tenant::IndexPart;
|
||||||
|
use crate::tenant::storage_layer::LayerName;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct LayerDesc {
|
struct LayerDesc {
|
||||||
@@ -1417,9 +1416,11 @@ mod tests {
|
|||||||
assert!(!shadow.ranges.is_empty());
|
assert!(!shadow.ranges.is_empty());
|
||||||
|
|
||||||
// At least some layers should be marked covered
|
// At least some layers should be marked covered
|
||||||
assert!(layer_visibilities
|
assert!(
|
||||||
.iter()
|
layer_visibilities
|
||||||
.any(|i| matches!(i.1, LayerVisibilityHint::Covered)));
|
.iter()
|
||||||
|
.any(|i| matches!(i.1, LayerVisibilityHint::Covered))
|
||||||
|
);
|
||||||
|
|
||||||
let layer_visibilities = layer_visibilities.into_iter().collect::<HashMap<_, _>>();
|
let layer_visibilities = layer_visibilities.into_iter().collect::<HashMap<_, _>>();
|
||||||
|
|
||||||
|
|||||||
@@ -3,9 +3,8 @@ use std::ops::Range;
|
|||||||
|
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
use crate::tenant::storage_layer::PersistentLayerDesc;
|
|
||||||
|
|
||||||
use super::layer_coverage::LayerCoverageTuple;
|
use super::layer_coverage::LayerCoverageTuple;
|
||||||
|
use crate::tenant::storage_layer::PersistentLayerDesc;
|
||||||
|
|
||||||
/// Layers in this module are identified and indexed by this data.
|
/// Layers in this module are identified and indexed by this data.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -19,8 +19,9 @@
|
|||||||
|
|
||||||
use anyhow::ensure;
|
use anyhow::ensure;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use utils::bin_ser::SerializeError;
|
use utils::bin_ser::{BeSer, SerializeError};
|
||||||
use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn};
|
use utils::id::TimelineId;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
/// Use special format number to enable backward compatibility.
|
/// Use special format number to enable backward compatibility.
|
||||||
const METADATA_FORMAT_VERSION: u16 = 4;
|
const METADATA_FORMAT_VERSION: u16 = 4;
|
||||||
@@ -345,9 +346,10 @@ impl TimelineMetadata {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) mod modern_serde {
|
pub(crate) mod modern_serde {
|
||||||
use super::{TimelineMetadata, TimelineMetadataBodyV2, TimelineMetadataHeader};
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use super::{TimelineMetadata, TimelineMetadataBodyV2, TimelineMetadataHeader};
|
||||||
|
|
||||||
pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result<TimelineMetadata, D::Error>
|
pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result<TimelineMetadata, D::Error>
|
||||||
where
|
where
|
||||||
D: serde::de::Deserializer<'de>,
|
D: serde::de::Deserializer<'de>,
|
||||||
|
|||||||
@@ -1,34 +1,42 @@
|
|||||||
//! This module acts as a switchboard to access different repositories managed by this
|
//! This module acts as a switchboard to access different repositories managed by this
|
||||||
//! page server.
|
//! page server.
|
||||||
|
|
||||||
use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
|
|
||||||
use futures::StreamExt;
|
|
||||||
use itertools::Itertools;
|
|
||||||
use pageserver_api::key::Key;
|
|
||||||
use pageserver_api::models::LocationConfigMode;
|
|
||||||
use pageserver_api::shard::{
|
|
||||||
ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId,
|
|
||||||
};
|
|
||||||
use pageserver_api::upcall_api::ReAttachResponseTenant;
|
|
||||||
use rand::{distributions::Alphanumeric, Rng};
|
|
||||||
use remote_storage::TimeoutOrCancel;
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use sysinfo::SystemExt;
|
|
||||||
use tokio::fs;
|
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
|
use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
|
||||||
|
use futures::StreamExt;
|
||||||
|
use itertools::Itertools;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
use pageserver_api::key::Key;
|
||||||
|
use pageserver_api::models::LocationConfigMode;
|
||||||
|
use pageserver_api::shard::{
|
||||||
|
ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId,
|
||||||
|
};
|
||||||
|
use pageserver_api::upcall_api::ReAttachResponseTenant;
|
||||||
|
use rand::Rng;
|
||||||
|
use rand::distributions::Alphanumeric;
|
||||||
|
use remote_storage::TimeoutOrCancel;
|
||||||
|
use sysinfo::SystemExt;
|
||||||
|
use tokio::fs;
|
||||||
use tokio::task::JoinSet;
|
use tokio::task::JoinSet;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
|
use utils::crashsafe::path_with_suffix_extension;
|
||||||
|
use utils::fs_ext::PathExt;
|
||||||
|
use utils::generation::Generation;
|
||||||
|
use utils::id::{TenantId, TimelineId};
|
||||||
use utils::{backoff, completion, crashsafe};
|
use utils::{backoff, completion, crashsafe};
|
||||||
|
|
||||||
|
use super::remote_timeline_client::remote_tenant_path;
|
||||||
|
use super::secondary::SecondaryTenant;
|
||||||
|
use super::timeline::detach_ancestor::{self, PreparedTimelineDetach};
|
||||||
|
use super::{GlobalShutDown, TenantSharedResources};
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::context::{DownloadBehavior, RequestContext};
|
use crate::context::{DownloadBehavior, RequestContext};
|
||||||
use crate::controller_upcall_client::{
|
use crate::controller_upcall_client::{
|
||||||
@@ -37,7 +45,7 @@ use crate::controller_upcall_client::{
|
|||||||
use crate::deletion_queue::DeletionQueueClient;
|
use crate::deletion_queue::DeletionQueueClient;
|
||||||
use crate::http::routes::ACTIVE_TENANT_TIMEOUT;
|
use crate::http::routes::ACTIVE_TENANT_TIMEOUT;
|
||||||
use crate::metrics::{TENANT, TENANT_MANAGER as METRICS};
|
use crate::metrics::{TENANT, TENANT_MANAGER as METRICS};
|
||||||
use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
|
use crate::task_mgr::{BACKGROUND_RUNTIME, TaskKind};
|
||||||
use crate::tenant::config::{
|
use crate::tenant::config::{
|
||||||
AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, SecondaryLocationConfig,
|
AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, SecondaryLocationConfig,
|
||||||
};
|
};
|
||||||
@@ -48,16 +56,6 @@ use crate::tenant::{AttachedTenantConf, GcError, LoadConfigError, SpawnMode, Ten
|
|||||||
use crate::virtual_file::MaybeFatalIo;
|
use crate::virtual_file::MaybeFatalIo;
|
||||||
use crate::{InitializationOrder, TEMP_FILE_SUFFIX};
|
use crate::{InitializationOrder, TEMP_FILE_SUFFIX};
|
||||||
|
|
||||||
use utils::crashsafe::path_with_suffix_extension;
|
|
||||||
use utils::fs_ext::PathExt;
|
|
||||||
use utils::generation::Generation;
|
|
||||||
use utils::id::{TenantId, TimelineId};
|
|
||||||
|
|
||||||
use super::remote_timeline_client::remote_tenant_path;
|
|
||||||
use super::secondary::SecondaryTenant;
|
|
||||||
use super::timeline::detach_ancestor::{self, PreparedTimelineDetach};
|
|
||||||
use super::{GlobalShutDown, TenantSharedResources};
|
|
||||||
|
|
||||||
/// For a tenant that appears in TenantsMap, it may either be
|
/// For a tenant that appears in TenantsMap, it may either be
|
||||||
/// - `Attached`: has a full Tenant object, is elegible to service
|
/// - `Attached`: has a full Tenant object, is elegible to service
|
||||||
/// reads and ingest WAL.
|
/// reads and ingest WAL.
|
||||||
@@ -140,7 +138,7 @@ impl TenantStartupMode {
|
|||||||
/// If this returns None, the re-attach struct is in an invalid state and
|
/// If this returns None, the re-attach struct is in an invalid state and
|
||||||
/// should be ignored in the response.
|
/// should be ignored in the response.
|
||||||
fn from_reattach_tenant(rart: ReAttachResponseTenant) -> Option<Self> {
|
fn from_reattach_tenant(rart: ReAttachResponseTenant) -> Option<Self> {
|
||||||
match (rart.mode, rart.gen) {
|
match (rart.mode, rart.r#gen) {
|
||||||
(LocationConfigMode::Detached, _) => None,
|
(LocationConfigMode::Detached, _) => None,
|
||||||
(LocationConfigMode::Secondary, _) => Some(Self::Secondary),
|
(LocationConfigMode::Secondary, _) => Some(Self::Secondary),
|
||||||
(LocationConfigMode::AttachedMulti, Some(g)) => {
|
(LocationConfigMode::AttachedMulti, Some(g)) => {
|
||||||
@@ -376,7 +374,7 @@ async fn init_load_generations(
|
|||||||
TenantStartupMode::Attached((_mode, generation)) => Some(generation),
|
TenantStartupMode::Attached((_mode, generation)) => Some(generation),
|
||||||
TenantStartupMode::Secondary => None,
|
TenantStartupMode::Secondary => None,
|
||||||
}
|
}
|
||||||
.map(|gen| (*id, *gen))
|
.map(|gen_| (*id, *gen_))
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
resources.deletion_queue_client.recover(attached_tenants)?;
|
resources.deletion_queue_client.recover(attached_tenants)?;
|
||||||
@@ -502,7 +500,9 @@ pub async fn init_tenant_mgr(
|
|||||||
.total_memory();
|
.total_memory();
|
||||||
let max_ephemeral_layer_bytes =
|
let max_ephemeral_layer_bytes =
|
||||||
conf.ephemeral_bytes_per_memory_kb as u64 * (system_memory / 1024);
|
conf.ephemeral_bytes_per_memory_kb as u64 * (system_memory / 1024);
|
||||||
tracing::info!("Initialized ephemeral layer size limit to {max_ephemeral_layer_bytes}, for {system_memory} bytes of memory");
|
tracing::info!(
|
||||||
|
"Initialized ephemeral layer size limit to {max_ephemeral_layer_bytes}, for {system_memory} bytes of memory"
|
||||||
|
);
|
||||||
inmemory_layer::GLOBAL_RESOURCES.max_dirty_bytes.store(
|
inmemory_layer::GLOBAL_RESOURCES.max_dirty_bytes.store(
|
||||||
max_ephemeral_layer_bytes,
|
max_ephemeral_layer_bytes,
|
||||||
std::sync::atomic::Ordering::Relaxed,
|
std::sync::atomic::Ordering::Relaxed,
|
||||||
@@ -700,10 +700,11 @@ fn tenant_spawn(
|
|||||||
// to avoid impacting prod runtime performance.
|
// to avoid impacting prod runtime performance.
|
||||||
assert!(!crate::is_temporary(tenant_path));
|
assert!(!crate::is_temporary(tenant_path));
|
||||||
debug_assert!(tenant_path.is_dir());
|
debug_assert!(tenant_path.is_dir());
|
||||||
debug_assert!(conf
|
debug_assert!(
|
||||||
.tenant_location_config_path(&tenant_shard_id)
|
conf.tenant_location_config_path(&tenant_shard_id)
|
||||||
.try_exists()
|
.try_exists()
|
||||||
.unwrap());
|
.unwrap()
|
||||||
|
);
|
||||||
|
|
||||||
Tenant::spawn(
|
Tenant::spawn(
|
||||||
conf,
|
conf,
|
||||||
@@ -791,7 +792,9 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
|
|||||||
(total_in_progress, total_attached)
|
(total_in_progress, total_attached)
|
||||||
}
|
}
|
||||||
TenantsMap::ShuttingDown(_) => {
|
TenantsMap::ShuttingDown(_) => {
|
||||||
error!("already shutting down, this function isn't supposed to be called more than once");
|
error!(
|
||||||
|
"already shutting down, this function isn't supposed to be called more than once"
|
||||||
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1016,9 +1019,9 @@ impl TenantManager {
|
|||||||
Ok(Ok(_)) => return Ok(Some(tenant)),
|
Ok(Ok(_)) => return Ok(Some(tenant)),
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
tracing::warn!(
|
tracing::warn!(
|
||||||
timeout_ms = flush_timeout.as_millis(),
|
timeout_ms = flush_timeout.as_millis(),
|
||||||
"Timed out waiting for flush to remote storage, proceeding anyway."
|
"Timed out waiting for flush to remote storage, proceeding anyway."
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1194,7 +1197,9 @@ impl TenantManager {
|
|||||||
}
|
}
|
||||||
TenantSlot::Attached(tenant) => {
|
TenantSlot::Attached(tenant) => {
|
||||||
let (_guard, progress) = utils::completion::channel();
|
let (_guard, progress) = utils::completion::channel();
|
||||||
info!("Shutting down just-spawned tenant, because tenant manager is shut down");
|
info!(
|
||||||
|
"Shutting down just-spawned tenant, because tenant manager is shut down"
|
||||||
|
);
|
||||||
match tenant.shutdown(progress, ShutdownMode::Hard).await {
|
match tenant.shutdown(progress, ShutdownMode::Hard).await {
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
info!("Finished shutting down just-spawned tenant");
|
info!("Finished shutting down just-spawned tenant");
|
||||||
@@ -1784,7 +1789,7 @@ impl TenantManager {
|
|||||||
_ => {
|
_ => {
|
||||||
return Err(anyhow::anyhow!(e).context(format!(
|
return Err(anyhow::anyhow!(e).context(format!(
|
||||||
"Hard linking {relative_layer} into {child_prefix}"
|
"Hard linking {relative_layer} into {child_prefix}"
|
||||||
)))
|
)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2025,8 +2030,8 @@ impl TenantManager {
|
|||||||
.wait_to_become_active(std::time::Duration::from_secs(9999))
|
.wait_to_become_active(std::time::Duration::from_secs(9999))
|
||||||
.await
|
.await
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
use pageserver_api::models::TenantState;
|
|
||||||
use GetActiveTenantError::{Cancelled, WillNotBecomeActive};
|
use GetActiveTenantError::{Cancelled, WillNotBecomeActive};
|
||||||
|
use pageserver_api::models::TenantState;
|
||||||
match e {
|
match e {
|
||||||
Cancelled | WillNotBecomeActive(TenantState::Stopping { .. }) => {
|
Cancelled | WillNotBecomeActive(TenantState::Stopping { .. }) => {
|
||||||
Error::ShuttingDown
|
Error::ShuttingDown
|
||||||
@@ -2089,7 +2094,7 @@ impl TenantManager {
|
|||||||
|
|
||||||
match selector {
|
match selector {
|
||||||
ShardSelector::Zero if slot.0.shard_number == ShardNumber(0) => {
|
ShardSelector::Zero if slot.0.shard_number == ShardNumber(0) => {
|
||||||
return ShardResolveResult::Found(tenant.clone())
|
return ShardResolveResult::Found(tenant.clone());
|
||||||
}
|
}
|
||||||
ShardSelector::Page(key) => {
|
ShardSelector::Page(key) => {
|
||||||
// First slot we see for this tenant, calculate the expected shard number
|
// First slot we see for this tenant, calculate the expected shard number
|
||||||
@@ -2486,7 +2491,7 @@ impl SlotGuard {
|
|||||||
TenantsMap::Initializing => {
|
TenantsMap::Initializing => {
|
||||||
return Err(TenantSlotUpsertError::MapState(
|
return Err(TenantSlotUpsertError::MapState(
|
||||||
TenantMapError::StillInitializing,
|
TenantMapError::StillInitializing,
|
||||||
))
|
));
|
||||||
}
|
}
|
||||||
TenantsMap::ShuttingDown(_) => {
|
TenantsMap::ShuttingDown(_) => {
|
||||||
return Err(TenantSlotUpsertError::ShuttingDown((
|
return Err(TenantSlotUpsertError::ShuttingDown((
|
||||||
@@ -2815,21 +2820,22 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
use {
|
use http_utils::error::ApiError;
|
||||||
crate::tenant::gc_result::GcResult, http_utils::error::ApiError,
|
use pageserver_api::models::TimelineGcRequest;
|
||||||
pageserver_api::models::TimelineGcRequest,
|
|
||||||
};
|
use crate::tenant::gc_result::GcResult;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use tracing::Instrument;
|
use tracing::Instrument;
|
||||||
|
|
||||||
|
use super::super::harness::TenantHarness;
|
||||||
|
use super::TenantsMap;
|
||||||
use crate::tenant::mgr::TenantSlot;
|
use crate::tenant::mgr::TenantSlot;
|
||||||
|
|
||||||
use super::{super::harness::TenantHarness, TenantsMap};
|
|
||||||
|
|
||||||
#[tokio::test(start_paused = true)]
|
#[tokio::test(start_paused = true)]
|
||||||
async fn shutdown_awaits_in_progress_tenant() {
|
async fn shutdown_awaits_in_progress_tenant() {
|
||||||
// Test that if an InProgress tenant is in the map during shutdown, the shutdown will gracefully
|
// Test that if an InProgress tenant is in the map during shutdown, the shutdown will gracefully
|
||||||
|
|||||||
@@ -179,78 +179,64 @@ pub mod index;
|
|||||||
pub mod manifest;
|
pub mod manifest;
|
||||||
pub(crate) mod upload;
|
pub(crate) mod upload;
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use camino::Utf8Path;
|
|
||||||
use chrono::{NaiveDateTime, Utc};
|
|
||||||
|
|
||||||
pub(crate) use download::download_initdb_tar_zst;
|
|
||||||
use index::GcCompactionState;
|
|
||||||
use pageserver_api::models::TimelineArchivalState;
|
|
||||||
use pageserver_api::shard::{ShardIndex, TenantShardId};
|
|
||||||
use regex::Regex;
|
|
||||||
use scopeguard::ScopeGuard;
|
|
||||||
use tokio_util::sync::CancellationToken;
|
|
||||||
use utils::backoff::{
|
|
||||||
self, exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
|
|
||||||
};
|
|
||||||
use utils::pausable_failpoint;
|
|
||||||
use utils::shard::ShardNumber;
|
|
||||||
|
|
||||||
use std::collections::{HashMap, HashSet, VecDeque};
|
use std::collections::{HashMap, HashSet, VecDeque};
|
||||||
|
use std::ops::DerefMut;
|
||||||
use std::sync::atomic::{AtomicU32, Ordering};
|
use std::sync::atomic::{AtomicU32, Ordering};
|
||||||
use std::sync::{Arc, Mutex, OnceLock};
|
use std::sync::{Arc, Mutex, OnceLock};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use anyhow::Context;
|
||||||
|
use camino::Utf8Path;
|
||||||
|
use chrono::{NaiveDateTime, Utc};
|
||||||
|
pub(crate) use download::{
|
||||||
|
download_index_part, download_initdb_tar_zst, download_tenant_manifest, is_temp_download_file,
|
||||||
|
list_remote_tenant_shards, list_remote_timelines,
|
||||||
|
};
|
||||||
|
use index::GcCompactionState;
|
||||||
|
pub(crate) use index::LayerFileMetadata;
|
||||||
|
use pageserver_api::models::TimelineArchivalState;
|
||||||
|
use pageserver_api::shard::{ShardIndex, TenantShardId};
|
||||||
|
use regex::Regex;
|
||||||
use remote_storage::{
|
use remote_storage::{
|
||||||
DownloadError, GenericRemoteStorage, ListingMode, RemotePath, TimeoutOrCancel,
|
DownloadError, GenericRemoteStorage, ListingMode, RemotePath, TimeoutOrCancel,
|
||||||
};
|
};
|
||||||
use std::ops::DerefMut;
|
use scopeguard::ScopeGuard;
|
||||||
use tracing::{debug, error, info, instrument, warn};
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::{info_span, Instrument};
|
use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
|
||||||
use utils::lsn::Lsn;
|
pub(crate) use upload::upload_initdb_dir;
|
||||||
|
use utils::backoff::{
|
||||||
use crate::context::RequestContext;
|
self, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, exponential_backoff,
|
||||||
use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
|
|
||||||
use crate::metrics::{
|
|
||||||
MeasureRemoteOp, RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics,
|
|
||||||
RemoteTimelineClientMetricsCallTrackSize, REMOTE_ONDEMAND_DOWNLOADED_BYTES,
|
|
||||||
REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
|
|
||||||
};
|
};
|
||||||
use crate::task_mgr::shutdown_token;
|
|
||||||
use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
|
|
||||||
use crate::tenant::remote_timeline_client::download::download_retry;
|
|
||||||
use crate::tenant::storage_layer::AsLayerDesc;
|
|
||||||
use crate::tenant::upload_queue::{Delete, OpType, UploadQueueStoppedDeletable};
|
|
||||||
use crate::tenant::TIMELINES_SEGMENT_NAME;
|
|
||||||
use crate::{
|
|
||||||
config::PageServerConf,
|
|
||||||
task_mgr,
|
|
||||||
task_mgr::TaskKind,
|
|
||||||
task_mgr::BACKGROUND_RUNTIME,
|
|
||||||
tenant::metadata::TimelineMetadata,
|
|
||||||
tenant::upload_queue::{
|
|
||||||
UploadOp, UploadQueue, UploadQueueInitialized, UploadQueueStopped, UploadTask,
|
|
||||||
},
|
|
||||||
TENANT_HEATMAP_BASENAME,
|
|
||||||
};
|
|
||||||
|
|
||||||
use utils::id::{TenantId, TimelineId};
|
use utils::id::{TenantId, TimelineId};
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
use utils::pausable_failpoint;
|
||||||
|
use utils::shard::ShardNumber;
|
||||||
|
|
||||||
use self::index::IndexPart;
|
use self::index::IndexPart;
|
||||||
|
|
||||||
use super::config::AttachedLocationConfig;
|
use super::config::AttachedLocationConfig;
|
||||||
use super::metadata::MetadataUpdate;
|
use super::metadata::MetadataUpdate;
|
||||||
use super::storage_layer::{Layer, LayerName, ResidentLayer};
|
use super::storage_layer::{Layer, LayerName, ResidentLayer};
|
||||||
use super::timeline::import_pgdata;
|
use super::timeline::import_pgdata;
|
||||||
use super::upload_queue::{NotInitialized, SetDeletedFlagProgress};
|
use super::upload_queue::{NotInitialized, SetDeletedFlagProgress};
|
||||||
use super::{DeleteTimelineError, Generation};
|
use super::{DeleteTimelineError, Generation};
|
||||||
|
use crate::config::PageServerConf;
|
||||||
pub(crate) use download::{
|
use crate::context::RequestContext;
|
||||||
download_index_part, download_tenant_manifest, is_temp_download_file,
|
use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
|
||||||
list_remote_tenant_shards, list_remote_timelines,
|
use crate::metrics::{
|
||||||
|
MeasureRemoteOp, REMOTE_ONDEMAND_DOWNLOADED_BYTES, REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
|
||||||
|
RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics,
|
||||||
|
RemoteTimelineClientMetricsCallTrackSize,
|
||||||
};
|
};
|
||||||
pub(crate) use index::LayerFileMetadata;
|
use crate::task_mgr::{BACKGROUND_RUNTIME, TaskKind, shutdown_token};
|
||||||
pub(crate) use upload::upload_initdb_dir;
|
use crate::tenant::metadata::TimelineMetadata;
|
||||||
|
use crate::tenant::remote_timeline_client::download::download_retry;
|
||||||
|
use crate::tenant::storage_layer::AsLayerDesc;
|
||||||
|
use crate::tenant::upload_queue::{
|
||||||
|
Delete, OpType, UploadOp, UploadQueue, UploadQueueInitialized, UploadQueueStopped,
|
||||||
|
UploadQueueStoppedDeletable, UploadTask,
|
||||||
|
};
|
||||||
|
use crate::tenant::{TIMELINES_SEGMENT_NAME, debug_assert_current_span_has_tenant_and_timeline_id};
|
||||||
|
use crate::{TENANT_HEATMAP_BASENAME, task_mgr};
|
||||||
|
|
||||||
// Occasional network issues and such can cause remote operations to fail, and
|
// Occasional network issues and such can cause remote operations to fail, and
|
||||||
// that's expected. If a download fails, we log it at info-level, and retry.
|
// that's expected. If a download fails, we log it at info-level, and retry.
|
||||||
@@ -1091,7 +1077,11 @@ impl RemoteTimelineClient {
|
|||||||
if !wanted(x) && wanted(y) {
|
if !wanted(x) && wanted(y) {
|
||||||
// this could be avoided by having external in-memory synchronization, like
|
// this could be avoided by having external in-memory synchronization, like
|
||||||
// timeline detach ancestor
|
// timeline detach ancestor
|
||||||
warn!(?reason, op="insert", "unexpected: two racing processes to enable and disable a gc blocking reason");
|
warn!(
|
||||||
|
?reason,
|
||||||
|
op = "insert",
|
||||||
|
"unexpected: two racing processes to enable and disable a gc blocking reason"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// at this point, the metadata must always show that there is a parent
|
// at this point, the metadata must always show that there is a parent
|
||||||
@@ -1145,7 +1135,11 @@ impl RemoteTimelineClient {
|
|||||||
(x, y) if wanted(x) && !wanted(y) => Some(self.schedule_barrier0(upload_queue)),
|
(x, y) if wanted(x) && !wanted(y) => Some(self.schedule_barrier0(upload_queue)),
|
||||||
(x, y) => {
|
(x, y) => {
|
||||||
if !wanted(x) && wanted(y) {
|
if !wanted(x) && wanted(y) {
|
||||||
warn!(?reason, op="remove", "unexpected: two racing processes to enable and disable a gc blocking reason (remove)");
|
warn!(
|
||||||
|
?reason,
|
||||||
|
op = "remove",
|
||||||
|
"unexpected: two racing processes to enable and disable a gc blocking reason (remove)"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
upload_queue.dirty.gc_blocking =
|
upload_queue.dirty.gc_blocking =
|
||||||
@@ -1287,12 +1281,14 @@ impl RemoteTimelineClient {
|
|||||||
|
|
||||||
#[cfg(feature = "testing")]
|
#[cfg(feature = "testing")]
|
||||||
for (name, metadata) in &with_metadata {
|
for (name, metadata) in &with_metadata {
|
||||||
let gen = metadata.generation;
|
let gen_ = metadata.generation;
|
||||||
if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), gen) {
|
if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), gen_) {
|
||||||
if unexpected == gen {
|
if unexpected == gen_ {
|
||||||
tracing::error!("{name} was unlinked twice with same generation");
|
tracing::error!("{name} was unlinked twice with same generation");
|
||||||
} else {
|
} else {
|
||||||
tracing::error!("{name} was unlinked twice with different generations {gen:?} and {unexpected:?}");
|
tracing::error!(
|
||||||
|
"{name} was unlinked twice with different generations {gen_:?} and {unexpected:?}"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1354,11 +1350,11 @@ impl RemoteTimelineClient {
|
|||||||
|
|
||||||
#[cfg(feature = "testing")]
|
#[cfg(feature = "testing")]
|
||||||
for (name, meta) in &with_metadata {
|
for (name, meta) in &with_metadata {
|
||||||
let gen = meta.generation;
|
let gen_ = meta.generation;
|
||||||
match upload_queue.dangling_files.remove(name) {
|
match upload_queue.dangling_files.remove(name) {
|
||||||
Some(same) if same == gen => { /* expected */ }
|
Some(same) if same == gen_ => { /* expected */ }
|
||||||
Some(other) => {
|
Some(other) => {
|
||||||
tracing::error!("{name} was unlinked with {other:?} but deleted with {gen:?}");
|
tracing::error!("{name} was unlinked with {other:?} but deleted with {gen_:?}");
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
tracing::error!("{name} was unlinked but was not dangling");
|
tracing::error!("{name} was unlinked but was not dangling");
|
||||||
@@ -1455,7 +1451,9 @@ impl RemoteTimelineClient {
|
|||||||
// proper stop is yet to be called. On cancel the original or some later task must call
|
// proper stop is yet to be called. On cancel the original or some later task must call
|
||||||
// `stop` or `shutdown`.
|
// `stop` or `shutdown`.
|
||||||
let sg = scopeguard::guard((), |_| {
|
let sg = scopeguard::guard((), |_| {
|
||||||
tracing::error!("RemoteTimelineClient::shutdown was cancelled; this should not happen, do not make this into an allowed_error")
|
tracing::error!(
|
||||||
|
"RemoteTimelineClient::shutdown was cancelled; this should not happen, do not make this into an allowed_error"
|
||||||
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
let fut = {
|
let fut = {
|
||||||
@@ -1471,7 +1469,7 @@ impl RemoteTimelineClient {
|
|||||||
scopeguard::ScopeGuard::into_inner(sg);
|
scopeguard::ScopeGuard::into_inner(sg);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
UploadQueue::Initialized(ref mut init) => init,
|
UploadQueue::Initialized(init) => init,
|
||||||
};
|
};
|
||||||
|
|
||||||
// if the queue is already stuck due to a shutdown operation which was cancelled, then
|
// if the queue is already stuck due to a shutdown operation which was cancelled, then
|
||||||
@@ -1831,7 +1829,9 @@ impl RemoteTimelineClient {
|
|||||||
.map(|n| n.starts_with(IndexPart::FILE_NAME))
|
.map(|n| n.starts_with(IndexPart::FILE_NAME))
|
||||||
.unwrap_or(false)
|
.unwrap_or(false)
|
||||||
})
|
})
|
||||||
.filter_map(|o| parse_remote_index_path(o.key.clone()).map(|gen| (o.key.clone(), gen)))
|
.filter_map(|o| {
|
||||||
|
parse_remote_index_path(o.key.clone()).map(|gen_| (o.key.clone(), gen_))
|
||||||
|
})
|
||||||
.max_by_key(|i| i.1)
|
.max_by_key(|i| i.1)
|
||||||
.map(|i| i.0.clone())
|
.map(|i| i.0.clone())
|
||||||
.unwrap_or(
|
.unwrap_or(
|
||||||
@@ -2023,7 +2023,7 @@ impl RemoteTimelineClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let upload_result: anyhow::Result<()> = match &task.op {
|
let upload_result: anyhow::Result<()> = match &task.op {
|
||||||
UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => {
|
UploadOp::UploadLayer(layer, layer_metadata, mode) => {
|
||||||
// TODO: check if this mechanism can be removed now that can_bypass() performs
|
// TODO: check if this mechanism can be removed now that can_bypass() performs
|
||||||
// conflict checks during scheduling.
|
// conflict checks during scheduling.
|
||||||
if let Some(OpType::FlushDeletion) = mode {
|
if let Some(OpType::FlushDeletion) = mode {
|
||||||
@@ -2113,7 +2113,7 @@ impl RemoteTimelineClient {
|
|||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
UploadOp::UploadMetadata { ref uploaded } => {
|
UploadOp::UploadMetadata { uploaded } => {
|
||||||
let res = upload::upload_index_part(
|
let res = upload::upload_index_part(
|
||||||
&self.storage_impl,
|
&self.storage_impl,
|
||||||
&self.tenant_shard_id,
|
&self.tenant_shard_id,
|
||||||
@@ -2229,11 +2229,11 @@ impl RemoteTimelineClient {
|
|||||||
let lsn_update = {
|
let lsn_update = {
|
||||||
let mut upload_queue_guard = self.upload_queue.lock().unwrap();
|
let mut upload_queue_guard = self.upload_queue.lock().unwrap();
|
||||||
let upload_queue = match upload_queue_guard.deref_mut() {
|
let upload_queue = match upload_queue_guard.deref_mut() {
|
||||||
UploadQueue::Uninitialized => panic!("callers are responsible for ensuring this is only called on an initialized queue"),
|
UploadQueue::Uninitialized => panic!(
|
||||||
UploadQueue::Stopped(_stopped) => {
|
"callers are responsible for ensuring this is only called on an initialized queue"
|
||||||
None
|
),
|
||||||
},
|
UploadQueue::Stopped(_stopped) => None,
|
||||||
UploadQueue::Initialized(qi) => { Some(qi) }
|
UploadQueue::Initialized(qi) => Some(qi),
|
||||||
};
|
};
|
||||||
|
|
||||||
let upload_queue = match upload_queue {
|
let upload_queue = match upload_queue {
|
||||||
@@ -2255,7 +2255,11 @@ impl RemoteTimelineClient {
|
|||||||
let is_later = last_updater.is_some_and(|task_id| task_id < task.task_id);
|
let is_later = last_updater.is_some_and(|task_id| task_id < task.task_id);
|
||||||
let monotone = is_later || last_updater.is_none();
|
let monotone = is_later || last_updater.is_none();
|
||||||
|
|
||||||
assert!(monotone, "no two index uploads should be completing at the same time, prev={last_updater:?}, task.task_id={}", task.task_id);
|
assert!(
|
||||||
|
monotone,
|
||||||
|
"no two index uploads should be completing at the same time, prev={last_updater:?}, task.task_id={}",
|
||||||
|
task.task_id
|
||||||
|
);
|
||||||
|
|
||||||
// not taking ownership is wasteful
|
// not taking ownership is wasteful
|
||||||
upload_queue.clean.0.clone_from(uploaded);
|
upload_queue.clean.0.clone_from(uploaded);
|
||||||
@@ -2654,20 +2658,16 @@ pub fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option<Generation>
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
|
||||||
use crate::{
|
|
||||||
context::RequestContext,
|
|
||||||
tenant::{
|
|
||||||
config::AttachmentMode,
|
|
||||||
harness::{TenantHarness, TIMELINE_ID},
|
|
||||||
storage_layer::layer::local_layer_path,
|
|
||||||
Tenant, Timeline,
|
|
||||||
},
|
|
||||||
DEFAULT_PG_VERSION,
|
|
||||||
};
|
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::DEFAULT_PG_VERSION;
|
||||||
|
use crate::context::RequestContext;
|
||||||
|
use crate::tenant::config::AttachmentMode;
|
||||||
|
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
||||||
|
use crate::tenant::storage_layer::layer::local_layer_path;
|
||||||
|
use crate::tenant::{Tenant, Timeline};
|
||||||
|
|
||||||
pub(super) fn dummy_contents(name: &str) -> Vec<u8> {
|
pub(super) fn dummy_contents(name: &str) -> Vec<u8> {
|
||||||
format!("contents for {name}").into()
|
format!("contents for {name}").into()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,41 +8,39 @@ use std::future::Future;
|
|||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::time::SystemTime;
|
use std::time::SystemTime;
|
||||||
|
|
||||||
use anyhow::{anyhow, Context};
|
use anyhow::{Context, anyhow};
|
||||||
use camino::{Utf8Path, Utf8PathBuf};
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
|
use remote_storage::{
|
||||||
|
DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
|
||||||
|
};
|
||||||
use tokio::fs::{self, File, OpenOptions};
|
use tokio::fs::{self, File, OpenOptions};
|
||||||
use tokio::io::{AsyncSeekExt, AsyncWriteExt};
|
use tokio::io::{AsyncSeekExt, AsyncWriteExt};
|
||||||
use tokio_util::io::StreamReader;
|
use tokio_util::io::StreamReader;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
use utils::backoff;
|
use utils::crashsafe::path_with_suffix_extension;
|
||||||
|
use utils::id::{TenantId, TimelineId};
|
||||||
|
use utils::{backoff, pausable_failpoint};
|
||||||
|
|
||||||
|
use super::index::{IndexPart, LayerFileMetadata};
|
||||||
|
use super::manifest::TenantManifest;
|
||||||
|
use super::{
|
||||||
|
FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH, parse_remote_index_path,
|
||||||
|
parse_remote_tenant_manifest_path, remote_index_path, remote_initdb_archive_path,
|
||||||
|
remote_initdb_preserved_archive_path, remote_tenant_manifest_path,
|
||||||
|
remote_tenant_manifest_prefix, remote_tenant_path,
|
||||||
|
};
|
||||||
|
use crate::TEMP_FILE_SUFFIX;
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::context::RequestContext;
|
use crate::context::RequestContext;
|
||||||
use crate::span::{
|
use crate::span::{
|
||||||
debug_assert_current_span_has_tenant_and_timeline_id, debug_assert_current_span_has_tenant_id,
|
debug_assert_current_span_has_tenant_and_timeline_id, debug_assert_current_span_has_tenant_id,
|
||||||
};
|
};
|
||||||
|
use crate::tenant::Generation;
|
||||||
use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
|
use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
|
||||||
use crate::tenant::storage_layer::LayerName;
|
use crate::tenant::storage_layer::LayerName;
|
||||||
use crate::tenant::Generation;
|
use crate::virtual_file::{MaybeFatalIo, VirtualFile, on_fatal_io_error};
|
||||||
use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
|
|
||||||
use crate::TEMP_FILE_SUFFIX;
|
|
||||||
use remote_storage::{
|
|
||||||
DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
|
|
||||||
};
|
|
||||||
use utils::crashsafe::path_with_suffix_extension;
|
|
||||||
use utils::id::{TenantId, TimelineId};
|
|
||||||
use utils::pausable_failpoint;
|
|
||||||
|
|
||||||
use super::index::{IndexPart, LayerFileMetadata};
|
|
||||||
use super::manifest::TenantManifest;
|
|
||||||
use super::{
|
|
||||||
parse_remote_index_path, parse_remote_tenant_manifest_path, remote_index_path,
|
|
||||||
remote_initdb_archive_path, remote_initdb_preserved_archive_path, remote_tenant_manifest_path,
|
|
||||||
remote_tenant_manifest_prefix, remote_tenant_path, FAILED_DOWNLOAD_WARN_THRESHOLD,
|
|
||||||
FAILED_REMOTE_OP_RETRIES, INITDB_PATH,
|
|
||||||
};
|
|
||||||
|
|
||||||
///
|
///
|
||||||
/// If 'metadata' is given, we will validate that the downloaded file's size matches that
|
/// If 'metadata' is given, we will validate that the downloaded file's size matches that
|
||||||
@@ -207,9 +205,9 @@ async fn download_object(
|
|||||||
}
|
}
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
crate::virtual_file::io_engine::IoEngine::TokioEpollUring => {
|
crate::virtual_file::io_engine::IoEngine::TokioEpollUring => {
|
||||||
use crate::virtual_file::owned_buffers_io;
|
|
||||||
use crate::virtual_file::IoBufferMut;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use crate::virtual_file::{IoBufferMut, owned_buffers_io};
|
||||||
async {
|
async {
|
||||||
let destination_file = Arc::new(
|
let destination_file = Arc::new(
|
||||||
VirtualFile::create(dst_path, ctx)
|
VirtualFile::create(dst_path, ctx)
|
||||||
|
|||||||
@@ -7,16 +7,16 @@ use std::collections::HashMap;
|
|||||||
|
|
||||||
use chrono::NaiveDateTime;
|
use chrono::NaiveDateTime;
|
||||||
use pageserver_api::models::AuxFilePolicy;
|
use pageserver_api::models::AuxFilePolicy;
|
||||||
|
use pageserver_api::shard::ShardIndex;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use utils::id::TimelineId;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use super::is_same_remote_layer_path;
|
use super::is_same_remote_layer_path;
|
||||||
|
use crate::tenant::Generation;
|
||||||
use crate::tenant::metadata::TimelineMetadata;
|
use crate::tenant::metadata::TimelineMetadata;
|
||||||
use crate::tenant::storage_layer::LayerName;
|
use crate::tenant::storage_layer::LayerName;
|
||||||
use crate::tenant::timeline::import_pgdata;
|
use crate::tenant::timeline::import_pgdata;
|
||||||
use crate::tenant::Generation;
|
|
||||||
use pageserver_api::shard::ShardIndex;
|
|
||||||
use utils::id::TimelineId;
|
|
||||||
use utils::lsn::Lsn;
|
|
||||||
|
|
||||||
/// In-memory representation of an `index_part.json` file
|
/// In-memory representation of an `index_part.json` file
|
||||||
///
|
///
|
||||||
@@ -435,10 +435,12 @@ impl GcBlocking {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use utils::id::TimelineId;
|
use utils::id::TimelineId;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn v1_indexpart_is_parsed() {
|
fn v1_indexpart_is_parsed() {
|
||||||
let example = r#"{
|
let example = r#"{
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use chrono::NaiveDateTime;
|
use chrono::NaiveDateTime;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use utils::{id::TimelineId, lsn::Lsn};
|
use utils::id::TimelineId;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
/// Tenant-shard scoped manifest
|
/// Tenant-shard scoped manifest
|
||||||
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq)]
|
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
|||||||
@@ -1,28 +1,28 @@
|
|||||||
//! Helper functions to upload files to remote storage with a RemoteStorage
|
//! Helper functions to upload files to remote storage with a RemoteStorage
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
use std::io::{ErrorKind, SeekFrom};
|
||||||
|
use std::time::SystemTime;
|
||||||
|
|
||||||
|
use anyhow::{Context, bail};
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use camino::Utf8Path;
|
use camino::Utf8Path;
|
||||||
use fail::fail_point;
|
use fail::fail_point;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use std::io::{ErrorKind, SeekFrom};
|
use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError};
|
||||||
use std::time::SystemTime;
|
|
||||||
use tokio::fs::{self, File};
|
use tokio::fs::{self, File};
|
||||||
use tokio::io::AsyncSeekExt;
|
use tokio::io::AsyncSeekExt;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
|
use tracing::info;
|
||||||
|
use utils::id::{TenantId, TimelineId};
|
||||||
use utils::{backoff, pausable_failpoint};
|
use utils::{backoff, pausable_failpoint};
|
||||||
|
|
||||||
|
use super::Generation;
|
||||||
use super::index::IndexPart;
|
use super::index::IndexPart;
|
||||||
use super::manifest::TenantManifest;
|
use super::manifest::TenantManifest;
|
||||||
use super::Generation;
|
|
||||||
use crate::tenant::remote_timeline_client::{
|
use crate::tenant::remote_timeline_client::{
|
||||||
remote_index_path, remote_initdb_archive_path, remote_initdb_preserved_archive_path,
|
remote_index_path, remote_initdb_archive_path, remote_initdb_preserved_archive_path,
|
||||||
remote_tenant_manifest_path,
|
remote_tenant_manifest_path,
|
||||||
};
|
};
|
||||||
use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError};
|
|
||||||
use utils::id::{TenantId, TimelineId};
|
|
||||||
|
|
||||||
use tracing::info;
|
|
||||||
|
|
||||||
/// Serializes and uploads the given index part data to the remote storage.
|
/// Serializes and uploads the given index part data to the remote storage.
|
||||||
pub(crate) async fn upload_index_part(
|
pub(crate) async fn upload_index_part(
|
||||||
@@ -134,7 +134,9 @@ pub(super) async fn upload_timeline_layer<'a>(
|
|||||||
.len();
|
.len();
|
||||||
|
|
||||||
if metadata_size != fs_size {
|
if metadata_size != fs_size {
|
||||||
bail!("File {local_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}");
|
bail!(
|
||||||
|
"File {local_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let fs_size = usize::try_from(fs_size)
|
let fs_size = usize::try_from(fs_size)
|
||||||
|
|||||||
@@ -3,40 +3,31 @@ pub mod heatmap;
|
|||||||
mod heatmap_uploader;
|
mod heatmap_uploader;
|
||||||
mod scheduler;
|
mod scheduler;
|
||||||
|
|
||||||
use std::{sync::Arc, time::SystemTime};
|
use std::sync::Arc;
|
||||||
|
use std::time::SystemTime;
|
||||||
|
|
||||||
use crate::{
|
|
||||||
context::RequestContext,
|
|
||||||
disk_usage_eviction_task::DiskUsageEvictionInfo,
|
|
||||||
metrics::SECONDARY_HEATMAP_TOTAL_SIZE,
|
|
||||||
task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
|
|
||||||
};
|
|
||||||
|
|
||||||
use self::{
|
|
||||||
downloader::{downloader_task, SecondaryDetail},
|
|
||||||
heatmap_uploader::heatmap_uploader_task,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::{
|
|
||||||
config::{SecondaryLocationConfig, TenantConfOpt},
|
|
||||||
mgr::TenantManager,
|
|
||||||
span::debug_assert_current_span_has_tenant_id,
|
|
||||||
storage_layer::LayerName,
|
|
||||||
GetTenantError,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::metrics::SECONDARY_RESIDENT_PHYSICAL_SIZE;
|
|
||||||
use metrics::UIntGauge;
|
use metrics::UIntGauge;
|
||||||
use pageserver_api::{
|
use pageserver_api::models;
|
||||||
models,
|
use pageserver_api::shard::{ShardIdentity, TenantShardId};
|
||||||
shard::{ShardIdentity, TenantShardId},
|
|
||||||
};
|
|
||||||
use remote_storage::GenericRemoteStorage;
|
use remote_storage::GenericRemoteStorage;
|
||||||
|
|
||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::instrument;
|
use tracing::instrument;
|
||||||
use utils::{completion::Barrier, id::TimelineId, sync::gate::Gate};
|
use utils::completion::Barrier;
|
||||||
|
use utils::id::TimelineId;
|
||||||
|
use utils::sync::gate::Gate;
|
||||||
|
|
||||||
|
use self::downloader::{SecondaryDetail, downloader_task};
|
||||||
|
use self::heatmap_uploader::heatmap_uploader_task;
|
||||||
|
use super::GetTenantError;
|
||||||
|
use super::config::{SecondaryLocationConfig, TenantConfOpt};
|
||||||
|
use super::mgr::TenantManager;
|
||||||
|
use super::span::debug_assert_current_span_has_tenant_id;
|
||||||
|
use super::storage_layer::LayerName;
|
||||||
|
use crate::context::RequestContext;
|
||||||
|
use crate::disk_usage_eviction_task::DiskUsageEvictionInfo;
|
||||||
|
use crate::metrics::{SECONDARY_HEATMAP_TOTAL_SIZE, SECONDARY_RESIDENT_PHYSICAL_SIZE};
|
||||||
|
use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};
|
||||||
|
|
||||||
enum DownloadCommand {
|
enum DownloadCommand {
|
||||||
Download(TenantShardId),
|
Download(TenantShardId),
|
||||||
|
|||||||
@@ -1,47 +1,8 @@
|
|||||||
use std::{
|
use std::collections::{HashMap, HashSet};
|
||||||
collections::{HashMap, HashSet},
|
use std::pin::Pin;
|
||||||
pin::Pin,
|
use std::str::FromStr;
|
||||||
str::FromStr,
|
use std::sync::Arc;
|
||||||
sync::Arc,
|
use std::time::{Duration, Instant, SystemTime};
|
||||||
time::{Duration, Instant, SystemTime},
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
config::PageServerConf,
|
|
||||||
context::RequestContext,
|
|
||||||
disk_usage_eviction_task::{
|
|
||||||
finite_f32, DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer,
|
|
||||||
},
|
|
||||||
metrics::SECONDARY_MODE,
|
|
||||||
tenant::{
|
|
||||||
config::SecondaryLocationConfig,
|
|
||||||
debug_assert_current_span_has_tenant_and_timeline_id,
|
|
||||||
ephemeral_file::is_ephemeral_file,
|
|
||||||
remote_timeline_client::{
|
|
||||||
index::LayerFileMetadata, is_temp_download_file, FAILED_DOWNLOAD_WARN_THRESHOLD,
|
|
||||||
FAILED_REMOTE_OP_RETRIES,
|
|
||||||
},
|
|
||||||
span::debug_assert_current_span_has_tenant_id,
|
|
||||||
storage_layer::{layer::local_layer_path, LayerName, LayerVisibilityHint},
|
|
||||||
tasks::{warn_when_period_overrun, BackgroundLoopKind},
|
|
||||||
},
|
|
||||||
virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile},
|
|
||||||
TEMP_FILE_SUFFIX,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::{
|
|
||||||
heatmap::HeatMapLayer,
|
|
||||||
scheduler::{
|
|
||||||
self, period_jitter, period_warmup, Completion, JobGenerator, SchedulingResult,
|
|
||||||
TenantBackgroundJobs,
|
|
||||||
},
|
|
||||||
GetTenantError, SecondaryTenant, SecondaryTenantError,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::tenant::{
|
|
||||||
mgr::TenantManager,
|
|
||||||
remote_timeline_client::{download::download_layer_file, remote_heatmap_path},
|
|
||||||
};
|
|
||||||
|
|
||||||
use camino::Utf8PathBuf;
|
use camino::Utf8PathBuf;
|
||||||
use chrono::format::{DelayedFormat, StrftimeItems};
|
use chrono::format::{DelayedFormat, StrftimeItems};
|
||||||
@@ -50,18 +11,43 @@ use metrics::UIntGauge;
|
|||||||
use pageserver_api::models::SecondaryProgress;
|
use pageserver_api::models::SecondaryProgress;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use remote_storage::{DownloadError, DownloadKind, DownloadOpts, Etag, GenericRemoteStorage};
|
use remote_storage::{DownloadError, DownloadKind, DownloadOpts, Etag, GenericRemoteStorage};
|
||||||
|
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::{info_span, instrument, warn, Instrument};
|
use tracing::{Instrument, info_span, instrument, warn};
|
||||||
use utils::{
|
use utils::completion::Barrier;
|
||||||
backoff, completion::Barrier, crashsafe::path_with_suffix_extension, failpoint_support, fs_ext,
|
use utils::crashsafe::path_with_suffix_extension;
|
||||||
id::TimelineId, pausable_failpoint, serde_system_time,
|
use utils::id::TimelineId;
|
||||||
};
|
use utils::{backoff, failpoint_support, fs_ext, pausable_failpoint, serde_system_time};
|
||||||
|
|
||||||
use super::{
|
use super::heatmap::{HeatMapLayer, HeatMapTenant, HeatMapTimeline};
|
||||||
heatmap::{HeatMapTenant, HeatMapTimeline},
|
use super::scheduler::{
|
||||||
CommandRequest, DownloadCommand,
|
self, Completion, JobGenerator, SchedulingResult, TenantBackgroundJobs, period_jitter,
|
||||||
|
period_warmup,
|
||||||
};
|
};
|
||||||
|
use super::{
|
||||||
|
CommandRequest, DownloadCommand, GetTenantError, SecondaryTenant, SecondaryTenantError,
|
||||||
|
};
|
||||||
|
use crate::TEMP_FILE_SUFFIX;
|
||||||
|
use crate::config::PageServerConf;
|
||||||
|
use crate::context::RequestContext;
|
||||||
|
use crate::disk_usage_eviction_task::{
|
||||||
|
DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer, finite_f32,
|
||||||
|
};
|
||||||
|
use crate::metrics::SECONDARY_MODE;
|
||||||
|
use crate::tenant::config::SecondaryLocationConfig;
|
||||||
|
use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||||
|
use crate::tenant::ephemeral_file::is_ephemeral_file;
|
||||||
|
use crate::tenant::mgr::TenantManager;
|
||||||
|
use crate::tenant::remote_timeline_client::download::download_layer_file;
|
||||||
|
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||||
|
use crate::tenant::remote_timeline_client::{
|
||||||
|
FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, is_temp_download_file,
|
||||||
|
remote_heatmap_path,
|
||||||
|
};
|
||||||
|
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
|
||||||
|
use crate::tenant::storage_layer::layer::local_layer_path;
|
||||||
|
use crate::tenant::storage_layer::{LayerName, LayerVisibilityHint};
|
||||||
|
use crate::tenant::tasks::{BackgroundLoopKind, warn_when_period_overrun};
|
||||||
|
use crate::virtual_file::{MaybeFatalIo, VirtualFile, on_fatal_io_error};
|
||||||
|
|
||||||
/// For each tenant, default period for how long must have passed since the last download_tenant call before
|
/// For each tenant, default period for how long must have passed since the last download_tenant call before
|
||||||
/// calling it again. This default is replaced with the value of [`HeatMapTenant::upload_period_ms`] after first
|
/// calling it again. This default is replaced with the value of [`HeatMapTenant::upload_period_ms`] after first
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
use std::{collections::HashMap, time::SystemTime};
|
use std::collections::HashMap;
|
||||||
|
use std::time::SystemTime;
|
||||||
use crate::tenant::{remote_timeline_client::index::LayerFileMetadata, storage_layer::LayerName};
|
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_with::{serde_as, DisplayFromStr, TimestampSeconds};
|
use serde_with::{DisplayFromStr, TimestampSeconds, serde_as};
|
||||||
|
use utils::generation::Generation;
|
||||||
|
use utils::id::TimelineId;
|
||||||
|
|
||||||
use utils::{generation::Generation, id::TimelineId};
|
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||||
|
use crate::tenant::storage_layer::LayerName;
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
pub(crate) struct HeatMapTenant {
|
pub(crate) struct HeatMapTenant {
|
||||||
|
|||||||
@@ -1,42 +1,33 @@
|
|||||||
use std::{
|
use std::collections::HashMap;
|
||||||
collections::HashMap,
|
use std::pin::Pin;
|
||||||
pin::Pin,
|
use std::sync::{Arc, Weak};
|
||||||
sync::{Arc, Weak},
|
use std::time::{Duration, Instant};
|
||||||
time::{Duration, Instant},
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
metrics::SECONDARY_MODE,
|
|
||||||
tenant::{
|
|
||||||
config::AttachmentMode,
|
|
||||||
mgr::{GetTenantError, TenantManager},
|
|
||||||
remote_timeline_client::remote_heatmap_path,
|
|
||||||
span::debug_assert_current_span_has_tenant_id,
|
|
||||||
tasks::{warn_when_period_overrun, BackgroundLoopKind},
|
|
||||||
Tenant,
|
|
||||||
},
|
|
||||||
virtual_file::VirtualFile,
|
|
||||||
TEMP_FILE_SUFFIX,
|
|
||||||
};
|
|
||||||
|
|
||||||
use futures::Future;
|
use futures::Future;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use remote_storage::{GenericRemoteStorage, TimeoutOrCancel};
|
use remote_storage::{GenericRemoteStorage, TimeoutOrCancel};
|
||||||
|
|
||||||
use super::{
|
|
||||||
heatmap::HeatMapTenant,
|
|
||||||
scheduler::{
|
|
||||||
self, period_jitter, period_warmup, JobGenerator, RunningJob, SchedulingResult,
|
|
||||||
TenantBackgroundJobs,
|
|
||||||
},
|
|
||||||
CommandRequest, SecondaryTenantError, UploadCommand,
|
|
||||||
};
|
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::{info_span, instrument, Instrument};
|
use tracing::{Instrument, info_span, instrument};
|
||||||
use utils::{
|
use utils::backoff;
|
||||||
backoff, completion::Barrier, crashsafe::path_with_suffix_extension,
|
use utils::completion::Barrier;
|
||||||
yielding_loop::yielding_loop,
|
use utils::crashsafe::path_with_suffix_extension;
|
||||||
|
use utils::yielding_loop::yielding_loop;
|
||||||
|
|
||||||
|
use super::heatmap::HeatMapTenant;
|
||||||
|
use super::scheduler::{
|
||||||
|
self, JobGenerator, RunningJob, SchedulingResult, TenantBackgroundJobs, period_jitter,
|
||||||
|
period_warmup,
|
||||||
};
|
};
|
||||||
|
use super::{CommandRequest, SecondaryTenantError, UploadCommand};
|
||||||
|
use crate::TEMP_FILE_SUFFIX;
|
||||||
|
use crate::metrics::SECONDARY_MODE;
|
||||||
|
use crate::tenant::Tenant;
|
||||||
|
use crate::tenant::config::AttachmentMode;
|
||||||
|
use crate::tenant::mgr::{GetTenantError, TenantManager};
|
||||||
|
use crate::tenant::remote_timeline_client::remote_heatmap_path;
|
||||||
|
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
|
||||||
|
use crate::tenant::tasks::{BackgroundLoopKind, warn_when_period_overrun};
|
||||||
|
use crate::virtual_file::VirtualFile;
|
||||||
|
|
||||||
pub(super) async fn heatmap_uploader_task(
|
pub(super) async fn heatmap_uploader_task(
|
||||||
tenant_manager: Arc<TenantManager>,
|
tenant_manager: Arc<TenantManager>,
|
||||||
|
|||||||
@@ -1,16 +1,15 @@
|
|||||||
use futures::Future;
|
use std::collections::HashMap;
|
||||||
use rand::Rng;
|
use std::marker::PhantomData;
|
||||||
use std::{
|
use std::pin::Pin;
|
||||||
collections::HashMap,
|
use std::time::{Duration, Instant};
|
||||||
marker::PhantomData,
|
|
||||||
pin::Pin,
|
|
||||||
time::{Duration, Instant},
|
|
||||||
};
|
|
||||||
|
|
||||||
|
use futures::Future;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
|
use rand::Rng;
|
||||||
use tokio::task::JoinSet;
|
use tokio::task::JoinSet;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use utils::{completion::Barrier, yielding_loop::yielding_loop};
|
use utils::completion::Barrier;
|
||||||
|
use utils::yielding_loop::yielding_loop;
|
||||||
|
|
||||||
use super::{CommandRequest, CommandResponse, SecondaryTenantError};
|
use super::{CommandRequest, CommandResponse, SecondaryTenantError};
|
||||||
|
|
||||||
|
|||||||
@@ -4,21 +4,18 @@ use std::collections::{HashMap, HashSet};
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use tenant_size_model::svg::SvgBranchKind;
|
use tenant_size_model::svg::SvgBranchKind;
|
||||||
use tokio::sync::oneshot::error::RecvError;
|
use tenant_size_model::{Segment, StorageModel};
|
||||||
use tokio::sync::Semaphore;
|
use tokio::sync::Semaphore;
|
||||||
|
use tokio::sync::oneshot::error::RecvError;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
|
use tracing::*;
|
||||||
use crate::context::RequestContext;
|
|
||||||
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
|
|
||||||
|
|
||||||
use super::{GcError, LogicalSizeCalculationCause, Tenant};
|
|
||||||
use crate::tenant::{MaybeOffloaded, Timeline};
|
|
||||||
use utils::id::TimelineId;
|
use utils::id::TimelineId;
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use tracing::*;
|
use super::{GcError, LogicalSizeCalculationCause, Tenant};
|
||||||
|
use crate::context::RequestContext;
|
||||||
use tenant_size_model::{Segment, StorageModel};
|
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
|
||||||
|
use crate::tenant::{MaybeOffloaded, Timeline};
|
||||||
|
|
||||||
/// Inputs to the actual tenant sizing model
|
/// Inputs to the actual tenant sizing model
|
||||||
///
|
///
|
||||||
@@ -498,7 +495,9 @@ async fn fill_logical_sizes(
|
|||||||
}
|
}
|
||||||
Err(join_error) => {
|
Err(join_error) => {
|
||||||
// cannot really do anything, as this panic is likely a bug
|
// cannot really do anything, as this panic is likely a bug
|
||||||
error!("task that calls spawn_ondemand_logical_size_calculation panicked: {join_error:#}");
|
error!(
|
||||||
|
"task that calls spawn_ondemand_logical_size_calculation panicked: {join_error:#}"
|
||||||
|
);
|
||||||
|
|
||||||
have_any_error = Some(CalculateSyntheticSizeError::Fatal(
|
have_any_error = Some(CalculateSyntheticSizeError::Fatal(
|
||||||
anyhow::anyhow!(join_error)
|
anyhow::anyhow!(join_error)
|
||||||
|
|||||||
@@ -10,42 +10,39 @@ mod layer_desc;
|
|||||||
mod layer_name;
|
mod layer_name;
|
||||||
pub mod merge_iterator;
|
pub mod merge_iterator;
|
||||||
|
|
||||||
use crate::config::PageServerConf;
|
|
||||||
use crate::context::{AccessStatsBehavior, RequestContext};
|
|
||||||
use bytes::Bytes;
|
|
||||||
use futures::stream::FuturesUnordered;
|
|
||||||
use futures::StreamExt;
|
|
||||||
use pageserver_api::key::Key;
|
|
||||||
use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
|
|
||||||
use pageserver_api::record::NeonWalRecord;
|
|
||||||
use pageserver_api::value::Value;
|
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::collections::hash_map::Entry;
|
use std::collections::hash_map::Entry;
|
||||||
use std::collections::{BinaryHeap, HashMap};
|
use std::collections::{BinaryHeap, HashMap};
|
||||||
use std::future::Future;
|
use std::future::Future;
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
use std::sync::atomic::AtomicUsize;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::sync::atomic::AtomicUsize;
|
||||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||||
use tracing::{trace, Instrument};
|
|
||||||
use utils::sync::gate::GateGuard;
|
|
||||||
|
|
||||||
use utils::lsn::Lsn;
|
|
||||||
|
|
||||||
pub use batch_split_writer::{BatchLayerWriter, SplitDeltaLayerWriter, SplitImageLayerWriter};
|
pub use batch_split_writer::{BatchLayerWriter, SplitDeltaLayerWriter, SplitImageLayerWriter};
|
||||||
|
use bytes::Bytes;
|
||||||
pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
|
pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
|
||||||
|
use futures::StreamExt;
|
||||||
|
use futures::stream::FuturesUnordered;
|
||||||
pub use image_layer::{ImageLayer, ImageLayerWriter};
|
pub use image_layer::{ImageLayer, ImageLayerWriter};
|
||||||
pub use inmemory_layer::InMemoryLayer;
|
pub use inmemory_layer::InMemoryLayer;
|
||||||
|
pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
|
||||||
pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
|
pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
|
||||||
pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
|
pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
|
||||||
|
use pageserver_api::key::Key;
|
||||||
pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
|
use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
|
||||||
|
use pageserver_api::record::NeonWalRecord;
|
||||||
|
use pageserver_api::value::Value;
|
||||||
|
use tracing::{Instrument, trace};
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
use utils::sync::gate::GateGuard;
|
||||||
|
|
||||||
use self::inmemory_layer::InMemoryLayerFileId;
|
use self::inmemory_layer::InMemoryLayerFileId;
|
||||||
|
|
||||||
use super::timeline::{GetVectoredError, ReadPath};
|
|
||||||
use super::PageReconstructError;
|
use super::PageReconstructError;
|
||||||
|
use super::timeline::{GetVectoredError, ReadPath};
|
||||||
|
use crate::config::PageServerConf;
|
||||||
|
use crate::context::{AccessStatsBehavior, RequestContext};
|
||||||
|
|
||||||
pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
|
pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
|
||||||
where
|
where
|
||||||
@@ -510,6 +507,7 @@ impl IoConcurrency {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) fn spawn_for_test() -> impl std::ops::DerefMut<Target = Self> {
|
pub(crate) fn spawn_for_test() -> impl std::ops::DerefMut<Target = Self> {
|
||||||
use std::ops::{Deref, DerefMut};
|
use std::ops::{Deref, DerefMut};
|
||||||
|
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
use utils::sync::gate::Gate;
|
use utils::sync::gate::Gate;
|
||||||
|
|
||||||
|
|||||||
@@ -1,17 +1,22 @@
|
|||||||
use std::{future::Future, ops::Range, sync::Arc};
|
use std::future::Future;
|
||||||
|
use std::ops::Range;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use pageserver_api::key::{Key, KEY_SIZE};
|
use pageserver_api::key::{KEY_SIZE, Key};
|
||||||
use utils::{id::TimelineId, lsn::Lsn, shard::TenantShardId};
|
|
||||||
|
|
||||||
use crate::tenant::storage_layer::Layer;
|
|
||||||
use crate::{config::PageServerConf, context::RequestContext, tenant::Timeline};
|
|
||||||
use pageserver_api::value::Value;
|
use pageserver_api::value::Value;
|
||||||
|
use utils::id::TimelineId;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
use utils::shard::TenantShardId;
|
||||||
|
|
||||||
use super::layer::S3_UPLOAD_LIMIT;
|
use super::layer::S3_UPLOAD_LIMIT;
|
||||||
use super::{
|
use super::{
|
||||||
DeltaLayerWriter, ImageLayerWriter, PersistentLayerDesc, PersistentLayerKey, ResidentLayer,
|
DeltaLayerWriter, ImageLayerWriter, PersistentLayerDesc, PersistentLayerKey, ResidentLayer,
|
||||||
};
|
};
|
||||||
|
use crate::config::PageServerConf;
|
||||||
|
use crate::context::RequestContext;
|
||||||
|
use crate::tenant::Timeline;
|
||||||
|
use crate::tenant::storage_layer::Layer;
|
||||||
|
|
||||||
pub(crate) enum BatchWriterResult {
|
pub(crate) enum BatchWriterResult {
|
||||||
Produced(ResidentLayer),
|
Produced(ResidentLayer),
|
||||||
@@ -423,15 +428,10 @@ mod tests {
|
|||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use rand::{RngCore, SeedableRng};
|
use rand::{RngCore, SeedableRng};
|
||||||
|
|
||||||
use crate::{
|
|
||||||
tenant::{
|
|
||||||
harness::{TenantHarness, TIMELINE_ID},
|
|
||||||
storage_layer::AsLayerDesc,
|
|
||||||
},
|
|
||||||
DEFAULT_PG_VERSION,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::DEFAULT_PG_VERSION;
|
||||||
|
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
||||||
|
use crate::tenant::storage_layer::AsLayerDesc;
|
||||||
|
|
||||||
fn get_key(id: u32) -> Key {
|
fn get_key(id: u32) -> Key {
|
||||||
let mut key = Key::from_hex("000000000033333333444444445500000000").unwrap();
|
let mut key = Key::from_hex("000000000033333333444444445500000000").unwrap();
|
||||||
|
|||||||
@@ -27,6 +27,38 @@
|
|||||||
//! "values" part. The actual page images and WAL records are stored in the
|
//! "values" part. The actual page images and WAL records are stored in the
|
||||||
//! "values" part.
|
//! "values" part.
|
||||||
//!
|
//!
|
||||||
|
use std::collections::{HashMap, VecDeque};
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::SeekFrom;
|
||||||
|
use std::ops::Range;
|
||||||
|
use std::os::unix::fs::FileExt;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use anyhow::{Context, Result, bail, ensure};
|
||||||
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
|
use futures::StreamExt;
|
||||||
|
use itertools::Itertools;
|
||||||
|
use pageserver_api::config::MaxVectoredReadBytes;
|
||||||
|
use pageserver_api::key::{DBDIR_KEY, KEY_SIZE, Key};
|
||||||
|
use pageserver_api::keyspace::KeySpace;
|
||||||
|
use pageserver_api::models::ImageCompressionAlgorithm;
|
||||||
|
use pageserver_api::shard::TenantShardId;
|
||||||
|
use pageserver_api::value::Value;
|
||||||
|
use rand::Rng;
|
||||||
|
use rand::distributions::Alphanumeric;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tokio::sync::OnceCell;
|
||||||
|
use tokio_epoll_uring::IoBuf;
|
||||||
|
use tracing::*;
|
||||||
|
use utils::bin_ser::BeSer;
|
||||||
|
use utils::id::{TenantId, TimelineId};
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
|
||||||
|
ValuesReconstructState,
|
||||||
|
};
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
|
use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
|
||||||
use crate::page_cache::{self, FileId, PAGE_SZ};
|
use crate::page_cache::{self, FileId, PAGE_SZ};
|
||||||
@@ -42,43 +74,8 @@ use crate::tenant::vectored_blob_io::{
|
|||||||
VectoredReadPlanner,
|
VectoredReadPlanner,
|
||||||
};
|
};
|
||||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
|
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
|
||||||
use crate::virtual_file::IoBufferMut;
|
use crate::virtual_file::{self, IoBufferMut, MaybeFatalIo, VirtualFile};
|
||||||
use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
|
use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
|
||||||
use crate::TEMP_FILE_SUFFIX;
|
|
||||||
use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
|
|
||||||
use anyhow::{bail, ensure, Context, Result};
|
|
||||||
use camino::{Utf8Path, Utf8PathBuf};
|
|
||||||
use futures::StreamExt;
|
|
||||||
use itertools::Itertools;
|
|
||||||
use pageserver_api::config::MaxVectoredReadBytes;
|
|
||||||
use pageserver_api::key::{Key, DBDIR_KEY, KEY_SIZE};
|
|
||||||
use pageserver_api::keyspace::KeySpace;
|
|
||||||
use pageserver_api::models::ImageCompressionAlgorithm;
|
|
||||||
use pageserver_api::shard::TenantShardId;
|
|
||||||
use pageserver_api::value::Value;
|
|
||||||
use rand::{distributions::Alphanumeric, Rng};
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use std::collections::{HashMap, VecDeque};
|
|
||||||
use std::fs::File;
|
|
||||||
use std::io::SeekFrom;
|
|
||||||
use std::ops::Range;
|
|
||||||
use std::os::unix::fs::FileExt;
|
|
||||||
use std::str::FromStr;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use tokio::sync::OnceCell;
|
|
||||||
use tokio_epoll_uring::IoBuf;
|
|
||||||
use tracing::*;
|
|
||||||
|
|
||||||
use utils::{
|
|
||||||
bin_ser::BeSer,
|
|
||||||
id::{TenantId, TimelineId},
|
|
||||||
lsn::Lsn,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::{
|
|
||||||
AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
|
|
||||||
ValuesReconstructState,
|
|
||||||
};
|
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Header stored in the beginning of the file
|
/// Header stored in the beginning of the file
|
||||||
@@ -1130,10 +1127,11 @@ impl DeltaLayerInner {
|
|||||||
until: Lsn,
|
until: Lsn,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> anyhow::Result<usize> {
|
) -> anyhow::Result<usize> {
|
||||||
|
use futures::stream::TryStreamExt;
|
||||||
|
|
||||||
use crate::tenant::vectored_blob_io::{
|
use crate::tenant::vectored_blob_io::{
|
||||||
BlobMeta, ChunkedVectoredReadBuilder, VectoredReadExtended,
|
BlobMeta, ChunkedVectoredReadBuilder, VectoredReadExtended,
|
||||||
};
|
};
|
||||||
use futures::stream::TryStreamExt;
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum Item {
|
enum Item {
|
||||||
@@ -1599,23 +1597,21 @@ impl DeltaLayerIterator<'_> {
|
|||||||
pub(crate) mod test {
|
pub(crate) mod test {
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use bytes::Bytes;
|
||||||
use itertools::MinMaxResult;
|
use itertools::MinMaxResult;
|
||||||
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
|
use pageserver_api::value::Value;
|
||||||
use rand::RngCore;
|
use rand::RngCore;
|
||||||
|
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::tenant::harness::TIMELINE_ID;
|
use crate::DEFAULT_PG_VERSION;
|
||||||
|
use crate::context::DownloadBehavior;
|
||||||
|
use crate::task_mgr::TaskKind;
|
||||||
|
use crate::tenant::disk_btree::tests::TestDisk;
|
||||||
|
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
||||||
use crate::tenant::storage_layer::{Layer, ResidentLayer};
|
use crate::tenant::storage_layer::{Layer, ResidentLayer};
|
||||||
use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
|
use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
|
||||||
use crate::tenant::{Tenant, Timeline};
|
use crate::tenant::{Tenant, Timeline};
|
||||||
use crate::{
|
|
||||||
context::DownloadBehavior,
|
|
||||||
task_mgr::TaskKind,
|
|
||||||
tenant::{disk_btree::tests::TestDisk, harness::TenantHarness},
|
|
||||||
DEFAULT_PG_VERSION,
|
|
||||||
};
|
|
||||||
use bytes::Bytes;
|
|
||||||
use pageserver_api::value::Value;
|
|
||||||
|
|
||||||
/// Construct an index for a fictional delta layer and and then
|
/// Construct an index for a fictional delta layer and and then
|
||||||
/// traverse in order to plan vectored reads for a query. Finally,
|
/// traverse in order to plan vectored reads for a query. Finally,
|
||||||
|
|||||||
@@ -1,18 +1,14 @@
|
|||||||
use std::{ops::Range, sync::Arc};
|
use std::ops::Range;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
use pageserver_api::{
|
use pageserver_api::key::Key;
|
||||||
key::Key,
|
use pageserver_api::keyspace::{KeySpace, SparseKeySpace};
|
||||||
keyspace::{KeySpace, SparseKeySpace},
|
use pageserver_api::value::Value;
|
||||||
};
|
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use pageserver_api::value::Value;
|
use super::PersistentLayerKey;
|
||||||
|
use super::merge_iterator::{MergeIterator, MergeIteratorItem};
|
||||||
use super::{
|
|
||||||
merge_iterator::{MergeIterator, MergeIteratorItem},
|
|
||||||
PersistentLayerKey,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// A filter iterator over merge iterators (and can be easily extended to other types of iterators).
|
/// A filter iterator over merge iterators (and can be easily extended to other types of iterators).
|
||||||
///
|
///
|
||||||
@@ -98,19 +94,14 @@ impl<'a> FilterIterator<'a> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
|
||||||
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use pageserver_api::key::Key;
|
use pageserver_api::key::Key;
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use crate::{
|
use super::*;
|
||||||
tenant::{
|
use crate::DEFAULT_PG_VERSION;
|
||||||
harness::{TenantHarness, TIMELINE_ID},
|
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
||||||
storage_layer::delta_layer::test::produce_delta_layer,
|
use crate::tenant::storage_layer::delta_layer::test::produce_delta_layer;
|
||||||
},
|
|
||||||
DEFAULT_PG_VERSION,
|
|
||||||
};
|
|
||||||
|
|
||||||
async fn assert_filter_iter_equal(
|
async fn assert_filter_iter_equal(
|
||||||
filter_iter: &mut FilterIterator<'_>,
|
filter_iter: &mut FilterIterator<'_>,
|
||||||
|
|||||||
@@ -25,6 +25,39 @@
|
|||||||
//! layer, and offsets to the other parts. The "index" is a B-tree,
|
//! layer, and offsets to the other parts. The "index" is a B-tree,
|
||||||
//! mapping from Key to an offset in the "values" part. The
|
//! mapping from Key to an offset in the "values" part. The
|
||||||
//! actual page images are stored in the "values" part.
|
//! actual page images are stored in the "values" part.
|
||||||
|
use std::collections::{HashMap, VecDeque};
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::SeekFrom;
|
||||||
|
use std::ops::Range;
|
||||||
|
use std::os::unix::prelude::FileExt;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use anyhow::{Context, Result, bail, ensure};
|
||||||
|
use bytes::Bytes;
|
||||||
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
|
use hex;
|
||||||
|
use itertools::Itertools;
|
||||||
|
use pageserver_api::config::MaxVectoredReadBytes;
|
||||||
|
use pageserver_api::key::{DBDIR_KEY, KEY_SIZE, Key};
|
||||||
|
use pageserver_api::keyspace::KeySpace;
|
||||||
|
use pageserver_api::shard::{ShardIdentity, TenantShardId};
|
||||||
|
use pageserver_api::value::Value;
|
||||||
|
use rand::Rng;
|
||||||
|
use rand::distributions::Alphanumeric;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tokio::sync::OnceCell;
|
||||||
|
use tokio_stream::StreamExt;
|
||||||
|
use tracing::*;
|
||||||
|
use utils::bin_ser::BeSer;
|
||||||
|
use utils::id::{TenantId, TimelineId};
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
|
use super::layer_name::ImageLayerName;
|
||||||
|
use super::{
|
||||||
|
AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
|
||||||
|
ValuesReconstructState,
|
||||||
|
};
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
|
use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
|
||||||
use crate::page_cache::{self, FileId, PAGE_SZ};
|
use crate::page_cache::{self, FileId, PAGE_SZ};
|
||||||
@@ -39,43 +72,8 @@ use crate::tenant::vectored_blob_io::{
|
|||||||
VectoredReadPlanner,
|
VectoredReadPlanner,
|
||||||
};
|
};
|
||||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
||||||
use crate::virtual_file::IoBufferMut;
|
use crate::virtual_file::{self, IoBufferMut, MaybeFatalIo, VirtualFile};
|
||||||
use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
|
|
||||||
use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
|
use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
|
||||||
use anyhow::{bail, ensure, Context, Result};
|
|
||||||
use bytes::Bytes;
|
|
||||||
use camino::{Utf8Path, Utf8PathBuf};
|
|
||||||
use hex;
|
|
||||||
use itertools::Itertools;
|
|
||||||
use pageserver_api::config::MaxVectoredReadBytes;
|
|
||||||
use pageserver_api::key::{Key, DBDIR_KEY, KEY_SIZE};
|
|
||||||
use pageserver_api::keyspace::KeySpace;
|
|
||||||
use pageserver_api::shard::{ShardIdentity, TenantShardId};
|
|
||||||
use pageserver_api::value::Value;
|
|
||||||
use rand::{distributions::Alphanumeric, Rng};
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use std::collections::{HashMap, VecDeque};
|
|
||||||
use std::fs::File;
|
|
||||||
use std::io::SeekFrom;
|
|
||||||
use std::ops::Range;
|
|
||||||
use std::os::unix::prelude::FileExt;
|
|
||||||
use std::str::FromStr;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use tokio::sync::OnceCell;
|
|
||||||
use tokio_stream::StreamExt;
|
|
||||||
use tracing::*;
|
|
||||||
|
|
||||||
use utils::{
|
|
||||||
bin_ser::BeSer,
|
|
||||||
id::{TenantId, TimelineId},
|
|
||||||
lsn::Lsn,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::layer_name::ImageLayerName;
|
|
||||||
use super::{
|
|
||||||
AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
|
|
||||||
ValuesReconstructState,
|
|
||||||
};
|
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Header stored in the beginning of the file
|
/// Header stored in the beginning of the file
|
||||||
@@ -1135,34 +1133,26 @@ impl ImageLayerIterator<'_> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use std::{sync::Arc, time::Duration};
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use pageserver_api::{
|
use pageserver_api::key::Key;
|
||||||
key::Key,
|
use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
|
||||||
shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize},
|
use pageserver_api::value::Value;
|
||||||
value::Value,
|
use utils::generation::Generation;
|
||||||
};
|
use utils::id::{TenantId, TimelineId};
|
||||||
use utils::{
|
use utils::lsn::Lsn;
|
||||||
generation::Generation,
|
|
||||||
id::{TenantId, TimelineId},
|
|
||||||
lsn::Lsn,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
context::RequestContext,
|
|
||||||
tenant::{
|
|
||||||
config::TenantConf,
|
|
||||||
harness::{TenantHarness, TIMELINE_ID},
|
|
||||||
storage_layer::{Layer, ResidentLayer},
|
|
||||||
vectored_blob_io::StreamingVectoredReadPlanner,
|
|
||||||
Tenant, Timeline,
|
|
||||||
},
|
|
||||||
DEFAULT_PG_VERSION,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::{ImageLayerIterator, ImageLayerWriter};
|
use super::{ImageLayerIterator, ImageLayerWriter};
|
||||||
|
use crate::DEFAULT_PG_VERSION;
|
||||||
|
use crate::context::RequestContext;
|
||||||
|
use crate::tenant::config::TenantConf;
|
||||||
|
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
||||||
|
use crate::tenant::storage_layer::{Layer, ResidentLayer};
|
||||||
|
use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
|
||||||
|
use crate::tenant::{Tenant, Timeline};
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn image_layer_rewrite() {
|
async fn image_layer_rewrite() {
|
||||||
@@ -1172,10 +1162,10 @@ mod test {
|
|||||||
..TenantConf::default()
|
..TenantConf::default()
|
||||||
};
|
};
|
||||||
let tenant_id = TenantId::generate();
|
let tenant_id = TenantId::generate();
|
||||||
let mut gen = Generation::new(0xdead0001);
|
let mut gen_ = Generation::new(0xdead0001);
|
||||||
let mut get_next_gen = || {
|
let mut get_next_gen = || {
|
||||||
let ret = gen;
|
let ret = gen_;
|
||||||
gen = gen.next();
|
gen_ = gen_.next();
|
||||||
ret
|
ret
|
||||||
};
|
};
|
||||||
// The LSN at which we will create an image layer to filter
|
// The LSN at which we will create an image layer to filter
|
||||||
|
|||||||
@@ -4,38 +4,39 @@
|
|||||||
//! held in an ephemeral file, not in memory. The metadata for each page version, i.e.
|
//! held in an ephemeral file, not in memory. The metadata for each page version, i.e.
|
||||||
//! its position in the file, is kept in memory, though.
|
//! its position in the file, is kept in memory, though.
|
||||||
//!
|
//!
|
||||||
use crate::assert_u64_eq_usize::{u64_to_usize, U64IsUsize, UsizeIsU64};
|
use std::cmp::Ordering;
|
||||||
|
use std::collections::{BTreeMap, HashMap};
|
||||||
|
use std::fmt::Write;
|
||||||
|
use std::ops::Range;
|
||||||
|
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering as AtomicOrdering};
|
||||||
|
use std::sync::{Arc, OnceLock};
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use camino::Utf8PathBuf;
|
||||||
|
use pageserver_api::key::{CompactKey, Key};
|
||||||
|
use pageserver_api::keyspace::KeySpace;
|
||||||
|
use pageserver_api::models::InMemoryLayerInfo;
|
||||||
|
use pageserver_api::shard::TenantShardId;
|
||||||
|
use tokio::sync::RwLock;
|
||||||
|
use tracing::*;
|
||||||
|
use utils::id::TimelineId;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
use utils::vec_map::VecMap;
|
||||||
|
use wal_decoder::serialized_batch::{SerializedValueBatch, SerializedValueMeta, ValueMeta};
|
||||||
|
|
||||||
|
use super::{DeltaLayerWriter, PersistentLayerDesc, ValuesReconstructState};
|
||||||
|
use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64, u64_to_usize};
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
|
use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
|
||||||
|
// avoid binding to Write (conflicts with std::io::Write)
|
||||||
|
// while being able to use std::fmt::Write's methods
|
||||||
|
use crate::metrics::TIMELINE_EPHEMERAL_BYTES;
|
||||||
use crate::tenant::ephemeral_file::EphemeralFile;
|
use crate::tenant::ephemeral_file::EphemeralFile;
|
||||||
use crate::tenant::storage_layer::{OnDiskValue, OnDiskValueIo};
|
use crate::tenant::storage_layer::{OnDiskValue, OnDiskValueIo};
|
||||||
use crate::tenant::timeline::GetVectoredError;
|
use crate::tenant::timeline::GetVectoredError;
|
||||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
||||||
use crate::{l0_flush, page_cache};
|
use crate::{l0_flush, page_cache};
|
||||||
use anyhow::Result;
|
|
||||||
use camino::Utf8PathBuf;
|
|
||||||
use pageserver_api::key::CompactKey;
|
|
||||||
use pageserver_api::key::Key;
|
|
||||||
use pageserver_api::keyspace::KeySpace;
|
|
||||||
use pageserver_api::models::InMemoryLayerInfo;
|
|
||||||
use pageserver_api::shard::TenantShardId;
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
|
||||||
use std::sync::{Arc, OnceLock};
|
|
||||||
use std::time::Instant;
|
|
||||||
use tracing::*;
|
|
||||||
use utils::{id::TimelineId, lsn::Lsn, vec_map::VecMap};
|
|
||||||
use wal_decoder::serialized_batch::{SerializedValueBatch, SerializedValueMeta, ValueMeta};
|
|
||||||
// avoid binding to Write (conflicts with std::io::Write)
|
|
||||||
// while being able to use std::fmt::Write's methods
|
|
||||||
use crate::metrics::TIMELINE_EPHEMERAL_BYTES;
|
|
||||||
use std::cmp::Ordering;
|
|
||||||
use std::fmt::Write;
|
|
||||||
use std::ops::Range;
|
|
||||||
use std::sync::atomic::Ordering as AtomicOrdering;
|
|
||||||
use std::sync::atomic::{AtomicU64, AtomicUsize};
|
|
||||||
use tokio::sync::RwLock;
|
|
||||||
|
|
||||||
use super::{DeltaLayerWriter, PersistentLayerDesc, ValuesReconstructState};
|
|
||||||
|
|
||||||
pub(crate) mod vectored_dio_read;
|
pub(crate) mod vectored_dio_read;
|
||||||
|
|
||||||
@@ -555,7 +556,9 @@ impl InMemoryLayer {
|
|||||||
gate: &utils::sync::gate::Gate,
|
gate: &utils::sync::gate::Gate,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<InMemoryLayer> {
|
) -> Result<InMemoryLayer> {
|
||||||
trace!("initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}");
|
trace!(
|
||||||
|
"initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}"
|
||||||
|
);
|
||||||
|
|
||||||
let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate, ctx).await?;
|
let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate, ctx).await?;
|
||||||
let key = InMemoryLayerFileId(file.page_cache_file_id());
|
let key = InMemoryLayerFileId(file.page_cache_file_id());
|
||||||
@@ -816,8 +819,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_index_entry() {
|
fn test_index_entry() {
|
||||||
const MAX_SUPPORTED_POS: usize = IndexEntry::MAX_SUPPORTED_POS;
|
const MAX_SUPPORTED_POS: usize = IndexEntry::MAX_SUPPORTED_POS;
|
||||||
use IndexEntryNewArgs as Args;
|
use {IndexEntryNewArgs as Args, IndexEntryUnpacked as Unpacked};
|
||||||
use IndexEntryUnpacked as Unpacked;
|
|
||||||
|
|
||||||
let roundtrip = |args, expect: Unpacked| {
|
let roundtrip = |args, expect: Unpacked| {
|
||||||
let res = IndexEntry::new(args).expect("this tests expects no errors");
|
let res = IndexEntry::new(args).expect("this tests expects no errors");
|
||||||
|
|||||||
@@ -1,16 +1,13 @@
|
|||||||
use std::{
|
use std::collections::BTreeMap;
|
||||||
collections::BTreeMap,
|
use std::sync::{Arc, RwLock};
|
||||||
sync::{Arc, RwLock},
|
|
||||||
};
|
|
||||||
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use tokio_epoll_uring::{BoundedBuf, IoBufMut, Slice};
|
use tokio_epoll_uring::{BoundedBuf, IoBufMut, Slice};
|
||||||
|
|
||||||
use crate::{
|
use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};
|
||||||
assert_u64_eq_usize::{U64IsUsize, UsizeIsU64},
|
use crate::context::RequestContext;
|
||||||
context::RequestContext,
|
use crate::virtual_file::IoBufferMut;
|
||||||
virtual_file::{owned_buffers_io::io_buf_aligned::IoBufAlignedMut, IoBufferMut},
|
use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
|
||||||
};
|
|
||||||
|
|
||||||
/// The file interface we require. At runtime, this is a [`crate::tenant::ephemeral_file::EphemeralFile`].
|
/// The file interface we require. At runtime, this is a [`crate::tenant::ephemeral_file::EphemeralFile`].
|
||||||
pub trait File: Send {
|
pub trait File: Send {
|
||||||
@@ -132,7 +129,9 @@ where
|
|||||||
let req_len = match cur {
|
let req_len = match cur {
|
||||||
LogicalReadState::NotStarted(buf) => {
|
LogicalReadState::NotStarted(buf) => {
|
||||||
if buf.len() != 0 {
|
if buf.len() != 0 {
|
||||||
panic!("The `LogicalRead`s that are passed in must be freshly created using `LogicalRead::new`");
|
panic!(
|
||||||
|
"The `LogicalRead`s that are passed in must be freshly created using `LogicalRead::new`"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
// buf.cap() == 0 is ok
|
// buf.cap() == 0 is ok
|
||||||
|
|
||||||
@@ -141,7 +140,9 @@ where
|
|||||||
*state = LogicalReadState::Ongoing(buf);
|
*state = LogicalReadState::Ongoing(buf);
|
||||||
req_len
|
req_len
|
||||||
}
|
}
|
||||||
x => panic!("must only call with fresh LogicalReads, got another state, leaving Undefined state behind state={x:?}"),
|
x => panic!(
|
||||||
|
"must only call with fresh LogicalReads, got another state, leaving Undefined state behind state={x:?}"
|
||||||
|
),
|
||||||
};
|
};
|
||||||
|
|
||||||
// plan which chunks we need to read from
|
// plan which chunks we need to read from
|
||||||
@@ -422,15 +423,15 @@ impl Buffer for Vec<u8> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[allow(clippy::assertions_on_constants)]
|
#[allow(clippy::assertions_on_constants)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use std::cell::RefCell;
|
||||||
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
|
|
||||||
use crate::{
|
|
||||||
context::DownloadBehavior, task_mgr::TaskKind,
|
|
||||||
virtual_file::owned_buffers_io::slice::SliceMutExt,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use std::{cell::RefCell, collections::VecDeque};
|
use crate::context::DownloadBehavior;
|
||||||
|
use crate::task_mgr::TaskKind;
|
||||||
|
use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
|
||||||
|
|
||||||
struct InMemoryFile {
|
struct InMemoryFile {
|
||||||
content: Vec<u8>,
|
content: Vec<u8>,
|
||||||
|
|||||||
@@ -1,32 +1,32 @@
|
|||||||
|
use std::ops::Range;
|
||||||
|
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||||
|
use std::sync::{Arc, Weak};
|
||||||
|
use std::time::{Duration, SystemTime};
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use camino::{Utf8Path, Utf8PathBuf};
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
use pageserver_api::keyspace::KeySpace;
|
use pageserver_api::keyspace::KeySpace;
|
||||||
use pageserver_api::models::HistoricLayerInfo;
|
use pageserver_api::models::HistoricLayerInfo;
|
||||||
use pageserver_api::shard::{ShardIdentity, ShardIndex, TenantShardId};
|
use pageserver_api::shard::{ShardIdentity, ShardIndex, TenantShardId};
|
||||||
use std::ops::Range;
|
|
||||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
|
||||||
use std::sync::{Arc, Weak};
|
|
||||||
use std::time::{Duration, SystemTime};
|
|
||||||
use tracing::Instrument;
|
use tracing::Instrument;
|
||||||
|
use utils::generation::Generation;
|
||||||
use utils::id::TimelineId;
|
use utils::id::TimelineId;
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
use utils::sync::{gate, heavier_once_cell};
|
use utils::sync::{gate, heavier_once_cell};
|
||||||
|
|
||||||
use crate::config::PageServerConf;
|
|
||||||
use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
|
|
||||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
|
||||||
use crate::task_mgr::TaskKind;
|
|
||||||
use crate::tenant::timeline::{CompactionError, GetVectoredError};
|
|
||||||
use crate::tenant::{remote_timeline_client::LayerFileMetadata, Timeline};
|
|
||||||
|
|
||||||
use super::delta_layer::{self};
|
use super::delta_layer::{self};
|
||||||
use super::image_layer::{self};
|
use super::image_layer::{self};
|
||||||
use super::{
|
use super::{
|
||||||
AsLayerDesc, ImageLayerWriter, LayerAccessStats, LayerAccessStatsReset, LayerName,
|
AsLayerDesc, ImageLayerWriter, LayerAccessStats, LayerAccessStatsReset, LayerName,
|
||||||
LayerVisibilityHint, PersistentLayerDesc, ValuesReconstructState,
|
LayerVisibilityHint, PersistentLayerDesc, ValuesReconstructState,
|
||||||
};
|
};
|
||||||
|
use crate::config::PageServerConf;
|
||||||
use utils::generation::Generation;
|
use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
|
||||||
|
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||||
|
use crate::task_mgr::TaskKind;
|
||||||
|
use crate::tenant::Timeline;
|
||||||
|
use crate::tenant::remote_timeline_client::LayerFileMetadata;
|
||||||
|
use crate::tenant::timeline::{CompactionError, GetVectoredError};
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
@@ -1873,8 +1873,8 @@ impl ResidentLayer {
|
|||||||
self.owner.record_access(ctx);
|
self.owner.record_access(ctx);
|
||||||
|
|
||||||
let res = match inner {
|
let res = match inner {
|
||||||
Delta(ref d) => delta_layer::DeltaLayerInner::load_keys(d, ctx).await,
|
Delta(d) => delta_layer::DeltaLayerInner::load_keys(d, ctx).await,
|
||||||
Image(ref i) => image_layer::ImageLayerInner::load_keys(i, ctx).await,
|
Image(i) => image_layer::ImageLayerInner::load_keys(i, ctx).await,
|
||||||
};
|
};
|
||||||
res.with_context(|| format!("Layer index is corrupted for {self}"))
|
res.with_context(|| format!("Layer index is corrupted for {self}"))
|
||||||
}
|
}
|
||||||
@@ -1920,7 +1920,7 @@ impl ResidentLayer {
|
|||||||
let owner = &self.owner.0;
|
let owner = &self.owner.0;
|
||||||
|
|
||||||
match self.downloaded.get(owner, ctx).await? {
|
match self.downloaded.get(owner, ctx).await? {
|
||||||
Delta(ref d) => d
|
Delta(d) => d
|
||||||
.copy_prefix(writer, until, ctx)
|
.copy_prefix(writer, until, ctx)
|
||||||
.await
|
.await
|
||||||
.with_context(|| format!("copy_delta_prefix until {until} of {self}")),
|
.with_context(|| format!("copy_delta_prefix until {until} of {self}")),
|
||||||
@@ -1943,7 +1943,7 @@ impl ResidentLayer {
|
|||||||
) -> anyhow::Result<&delta_layer::DeltaLayerInner> {
|
) -> anyhow::Result<&delta_layer::DeltaLayerInner> {
|
||||||
use LayerKind::*;
|
use LayerKind::*;
|
||||||
match self.downloaded.get(&self.owner.0, ctx).await? {
|
match self.downloaded.get(&self.owner.0, ctx).await? {
|
||||||
Delta(ref d) => Ok(d),
|
Delta(d) => Ok(d),
|
||||||
Image(_) => Err(anyhow::anyhow!("image layer")),
|
Image(_) => Err(anyhow::anyhow!("image layer")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1955,7 +1955,7 @@ impl ResidentLayer {
|
|||||||
) -> anyhow::Result<&image_layer::ImageLayerInner> {
|
) -> anyhow::Result<&image_layer::ImageLayerInner> {
|
||||||
use LayerKind::*;
|
use LayerKind::*;
|
||||||
match self.downloaded.get(&self.owner.0, ctx).await? {
|
match self.downloaded.get(&self.owner.0, ctx).await? {
|
||||||
Image(ref d) => Ok(d),
|
Image(d) => Ok(d),
|
||||||
Delta(_) => Err(anyhow::anyhow!("delta layer")),
|
Delta(_) => Err(anyhow::anyhow!("delta layer")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,22 +1,16 @@
|
|||||||
use std::time::UNIX_EPOCH;
|
use std::time::UNIX_EPOCH;
|
||||||
|
|
||||||
use pageserver_api::key::{Key, CONTROLFILE_KEY};
|
use pageserver_api::key::{CONTROLFILE_KEY, Key};
|
||||||
use tokio::task::JoinSet;
|
use tokio::task::JoinSet;
|
||||||
use utils::{
|
use utils::completion::{self, Completion};
|
||||||
completion::{self, Completion},
|
use utils::id::TimelineId;
|
||||||
id::TimelineId,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::failpoints::{Failpoint, FailpointKind};
|
use super::failpoints::{Failpoint, FailpointKind};
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::{
|
use crate::context::DownloadBehavior;
|
||||||
context::DownloadBehavior,
|
use crate::task_mgr::TaskKind;
|
||||||
tenant::{
|
use crate::tenant::harness::{TenantHarness, test_img};
|
||||||
harness::test_img,
|
use crate::tenant::storage_layer::{IoConcurrency, LayerVisibilityHint};
|
||||||
storage_layer::{IoConcurrency, LayerVisibilityHint},
|
|
||||||
},
|
|
||||||
};
|
|
||||||
use crate::{task_mgr::TaskKind, tenant::harness::TenantHarness};
|
|
||||||
|
|
||||||
/// Used in tests to advance a future to wanted await point, and not futher.
|
/// Used in tests to advance a future to wanted await point, and not futher.
|
||||||
const ADVANCE: std::time::Duration = std::time::Duration::from_secs(3600);
|
const ADVANCE: std::time::Duration = std::time::Duration::from_secs(3600);
|
||||||
@@ -771,10 +765,12 @@ async fn evict_and_wait_does_not_wait_for_download() {
|
|||||||
let (arrival, _download_arrived) = utils::completion::channel();
|
let (arrival, _download_arrived) = utils::completion::channel();
|
||||||
layer.enable_failpoint(Failpoint::WaitBeforeDownloading(Some(arrival), barrier));
|
layer.enable_failpoint(Failpoint::WaitBeforeDownloading(Some(arrival), barrier));
|
||||||
|
|
||||||
let mut download = std::pin::pin!(layer
|
let mut download = std::pin::pin!(
|
||||||
.0
|
layer
|
||||||
.get_or_maybe_download(true, None)
|
.0
|
||||||
.instrument(download_span));
|
.get_or_maybe_download(true, None)
|
||||||
|
.instrument(download_span)
|
||||||
|
);
|
||||||
|
|
||||||
assert!(
|
assert!(
|
||||||
!layer.is_likely_resident(),
|
!layer.is_likely_resident(),
|
||||||
|
|||||||
@@ -1,16 +1,15 @@
|
|||||||
use core::fmt::Display;
|
use core::fmt::Display;
|
||||||
use pageserver_api::shard::TenantShardId;
|
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use utils::{id::TimelineId, lsn::Lsn};
|
|
||||||
|
|
||||||
use pageserver_api::key::Key;
|
use pageserver_api::key::Key;
|
||||||
|
use pageserver_api::shard::TenantShardId;
|
||||||
use super::{DeltaLayerName, ImageLayerName, LayerName};
|
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use utils::id::TenantId;
|
use utils::id::TenantId;
|
||||||
|
use utils::id::TimelineId;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
|
use super::{DeltaLayerName, ImageLayerName, LayerName};
|
||||||
|
|
||||||
/// A unique identifier of a persistent layer.
|
/// A unique identifier of a persistent layer.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
//!
|
//!
|
||||||
//! Helper functions for dealing with filenames of the image and delta layer files.
|
//! Helper functions for dealing with filenames of the image and delta layer files.
|
||||||
//!
|
//!
|
||||||
use pageserver_api::key::Key;
|
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use pageserver_api::key::Key;
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use super::PersistentLayerDesc;
|
use super::PersistentLayerDesc;
|
||||||
@@ -305,7 +305,7 @@ impl FromStr for LayerName {
|
|||||||
(None, None) => {
|
(None, None) => {
|
||||||
return Err(format!(
|
return Err(format!(
|
||||||
"neither delta nor image layer file name: {value:?}"
|
"neither delta nor image layer file name: {value:?}"
|
||||||
))
|
));
|
||||||
}
|
}
|
||||||
(Some(delta), None) => Self::Delta(delta),
|
(Some(delta), None) => Self::Delta(delta),
|
||||||
(None, Some(image)) => Self::Image(image),
|
(None, Some(image)) => Self::Image(image),
|
||||||
|
|||||||
@@ -1,21 +1,16 @@
|
|||||||
use std::{
|
use std::cmp::Ordering;
|
||||||
cmp::Ordering,
|
use std::collections::{BinaryHeap, binary_heap};
|
||||||
collections::{binary_heap, BinaryHeap},
|
use std::sync::Arc;
|
||||||
sync::Arc,
|
|
||||||
};
|
|
||||||
|
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
use pageserver_api::key::Key;
|
use pageserver_api::key::Key;
|
||||||
|
use pageserver_api::value::Value;
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
|
use super::delta_layer::{DeltaLayerInner, DeltaLayerIterator};
|
||||||
|
use super::image_layer::{ImageLayerInner, ImageLayerIterator};
|
||||||
|
use super::{PersistentLayerDesc, PersistentLayerKey};
|
||||||
use crate::context::RequestContext;
|
use crate::context::RequestContext;
|
||||||
use pageserver_api::value::Value;
|
|
||||||
|
|
||||||
use super::{
|
|
||||||
delta_layer::{DeltaLayerInner, DeltaLayerIterator},
|
|
||||||
image_layer::{ImageLayerInner, ImageLayerIterator},
|
|
||||||
PersistentLayerDesc, PersistentLayerKey,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub(crate) enum LayerRef<'a> {
|
pub(crate) enum LayerRef<'a> {
|
||||||
@@ -349,24 +344,18 @@ impl<'a> MergeIterator<'a> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
|
||||||
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use pageserver_api::key::Key;
|
use pageserver_api::key::Key;
|
||||||
use utils::lsn::Lsn;
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
tenant::{
|
|
||||||
harness::{TenantHarness, TIMELINE_ID},
|
|
||||||
storage_layer::delta_layer::test::{produce_delta_layer, sort_delta},
|
|
||||||
},
|
|
||||||
DEFAULT_PG_VERSION,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[cfg(feature = "testing")]
|
|
||||||
use crate::tenant::storage_layer::delta_layer::test::sort_delta_value;
|
|
||||||
#[cfg(feature = "testing")]
|
#[cfg(feature = "testing")]
|
||||||
use pageserver_api::record::NeonWalRecord;
|
use pageserver_api::record::NeonWalRecord;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::DEFAULT_PG_VERSION;
|
||||||
|
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
||||||
|
#[cfg(feature = "testing")]
|
||||||
|
use crate::tenant::storage_layer::delta_layer::test::sort_delta_value;
|
||||||
|
use crate::tenant::storage_layer::delta_layer::test::{produce_delta_layer, sort_delta};
|
||||||
|
|
||||||
async fn assert_merge_iter_equal(
|
async fn assert_merge_iter_equal(
|
||||||
merge_iter: &mut MergeIterator<'_>,
|
merge_iter: &mut MergeIterator<'_>,
|
||||||
|
|||||||
@@ -8,24 +8,24 @@ use std::sync::Arc;
|
|||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
use pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD;
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use scopeguard::defer;
|
use scopeguard::defer;
|
||||||
use tokio::sync::{Semaphore, SemaphorePermit};
|
use tokio::sync::{Semaphore, SemaphorePermit};
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
|
|
||||||
use crate::context::{DownloadBehavior, RequestContext};
|
|
||||||
use crate::metrics::{self, BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS};
|
|
||||||
use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS};
|
|
||||||
use crate::tenant::throttle::Stats;
|
|
||||||
use crate::tenant::timeline::compaction::CompactionOutcome;
|
|
||||||
use crate::tenant::timeline::CompactionError;
|
|
||||||
use crate::tenant::{Tenant, TenantState};
|
|
||||||
use pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD;
|
|
||||||
use utils::backoff::exponential_backoff_duration;
|
use utils::backoff::exponential_backoff_duration;
|
||||||
use utils::completion::Barrier;
|
use utils::completion::Barrier;
|
||||||
use utils::pausable_failpoint;
|
use utils::pausable_failpoint;
|
||||||
|
|
||||||
|
use crate::context::{DownloadBehavior, RequestContext};
|
||||||
|
use crate::metrics::{self, BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS};
|
||||||
|
use crate::task_mgr::{self, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS, TaskKind};
|
||||||
|
use crate::tenant::throttle::Stats;
|
||||||
|
use crate::tenant::timeline::CompactionError;
|
||||||
|
use crate::tenant::timeline::compaction::CompactionOutcome;
|
||||||
|
use crate::tenant::{Tenant, TenantState};
|
||||||
|
|
||||||
/// Semaphore limiting concurrent background tasks (across all tenants).
|
/// Semaphore limiting concurrent background tasks (across all tenants).
|
||||||
///
|
///
|
||||||
/// We use 3/4 Tokio threads, to avoid blocking all threads in case we do any CPU-heavy work.
|
/// We use 3/4 Tokio threads, to avoid blocking all threads in case we do any CPU-heavy work.
|
||||||
@@ -287,11 +287,12 @@ fn log_compaction_error(
|
|||||||
sleep_duration: Duration,
|
sleep_duration: Duration,
|
||||||
task_cancelled: bool,
|
task_cancelled: bool,
|
||||||
) {
|
) {
|
||||||
use crate::pgdatadir_mapping::CollectKeySpaceError;
|
|
||||||
use crate::tenant::upload_queue::NotInitialized;
|
|
||||||
use crate::tenant::PageReconstructError;
|
|
||||||
use CompactionError::*;
|
use CompactionError::*;
|
||||||
|
|
||||||
|
use crate::pgdatadir_mapping::CollectKeySpaceError;
|
||||||
|
use crate::tenant::PageReconstructError;
|
||||||
|
use crate::tenant::upload_queue::NotInitialized;
|
||||||
|
|
||||||
let level = match err {
|
let level = match err {
|
||||||
ShuttingDown => return,
|
ShuttingDown => return,
|
||||||
Offload(_) => Level::ERROR,
|
Offload(_) => Level::ERROR,
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
use std::{
|
use std::sync::Arc;
|
||||||
sync::{
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
atomic::{AtomicU64, Ordering},
|
use std::time::Instant;
|
||||||
Arc,
|
|
||||||
},
|
|
||||||
time::Instant,
|
|
||||||
};
|
|
||||||
|
|
||||||
use arc_swap::ArcSwap;
|
use arc_swap::ArcSwap;
|
||||||
use utils::leaky_bucket::{LeakyBucketConfig, RateLimiter};
|
use utils::leaky_bucket::{LeakyBucketConfig, RateLimiter};
|
||||||
|
|||||||
@@ -14,55 +14,6 @@ pub mod span;
|
|||||||
pub mod uninit;
|
pub mod uninit;
|
||||||
mod walreceiver;
|
mod walreceiver;
|
||||||
|
|
||||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
|
||||||
use arc_swap::{ArcSwap, ArcSwapOption};
|
|
||||||
use bytes::Bytes;
|
|
||||||
use camino::Utf8Path;
|
|
||||||
use chrono::{DateTime, Utc};
|
|
||||||
use compaction::{CompactionOutcome, GcCompactionCombinedSettings};
|
|
||||||
use enumset::EnumSet;
|
|
||||||
use fail::fail_point;
|
|
||||||
use futures::FutureExt;
|
|
||||||
use futures::{stream::FuturesUnordered, StreamExt};
|
|
||||||
use handle::ShardTimelineId;
|
|
||||||
use layer_manager::Shutdown;
|
|
||||||
use offload::OffloadError;
|
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
use pageserver_api::models::PageTraceEvent;
|
|
||||||
use pageserver_api::{
|
|
||||||
key::{
|
|
||||||
KEY_SIZE, METADATA_KEY_BEGIN_PREFIX, METADATA_KEY_END_PREFIX, NON_INHERITED_RANGE,
|
|
||||||
SPARSE_RANGE,
|
|
||||||
},
|
|
||||||
keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning},
|
|
||||||
models::{
|
|
||||||
CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings,
|
|
||||||
DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
|
|
||||||
InMemoryLayerInfo, LayerMapInfo, LsnLease, TimelineState,
|
|
||||||
},
|
|
||||||
reltag::BlockNumber,
|
|
||||||
shard::{ShardIdentity, ShardNumber, TenantShardId},
|
|
||||||
};
|
|
||||||
use rand::Rng;
|
|
||||||
use remote_storage::DownloadError;
|
|
||||||
use serde_with::serde_as;
|
|
||||||
use storage_broker::BrokerClientChannel;
|
|
||||||
use tokio::runtime::Handle;
|
|
||||||
use tokio::sync::mpsc::Sender;
|
|
||||||
use tokio::sync::{oneshot, watch, Notify};
|
|
||||||
use tokio_util::sync::CancellationToken;
|
|
||||||
use tracing::*;
|
|
||||||
use utils::critical;
|
|
||||||
use utils::rate_limit::RateLimit;
|
|
||||||
use utils::{
|
|
||||||
fs_ext,
|
|
||||||
guard_arc_swap::GuardArcSwap,
|
|
||||||
pausable_failpoint,
|
|
||||||
postgres_client::PostgresClientProtocol,
|
|
||||||
sync::gate::{Gate, GateGuard},
|
|
||||||
};
|
|
||||||
use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
|
|
||||||
|
|
||||||
use std::array;
|
use std::array;
|
||||||
use std::cmp::{max, min};
|
use std::cmp::{max, min};
|
||||||
use std::collections::btree_map::Entry;
|
use std::collections::btree_map::Entry;
|
||||||
@@ -72,74 +23,58 @@ use std::sync::atomic::{AtomicBool, AtomicU64, Ordering as AtomicOrdering};
|
|||||||
use std::sync::{Arc, Mutex, OnceLock, RwLock, Weak};
|
use std::sync::{Arc, Mutex, OnceLock, RwLock, Weak};
|
||||||
use std::time::{Duration, Instant, SystemTime};
|
use std::time::{Duration, Instant, SystemTime};
|
||||||
|
|
||||||
use crate::l0_flush::{self, L0FlushGlobalState};
|
use anyhow::{Context, Result, anyhow, bail, ensure};
|
||||||
use crate::tenant::storage_layer::ImageLayerName;
|
use arc_swap::{ArcSwap, ArcSwapOption};
|
||||||
use crate::{
|
use bytes::Bytes;
|
||||||
aux_file::AuxFileSizeEstimator,
|
use camino::Utf8Path;
|
||||||
page_service::TenantManagerTypes,
|
use chrono::{DateTime, Utc};
|
||||||
tenant::{
|
use compaction::{CompactionOutcome, GcCompactionCombinedSettings};
|
||||||
config::AttachmentMode,
|
use enumset::EnumSet;
|
||||||
layer_map::{LayerMap, SearchResult},
|
use fail::fail_point;
|
||||||
metadata::TimelineMetadata,
|
use futures::stream::FuturesUnordered;
|
||||||
storage_layer::{
|
use futures::{FutureExt, StreamExt};
|
||||||
inmemory_layer::IndexEntry, BatchLayerWriter, IoConcurrency, PersistentLayerDesc,
|
use handle::ShardTimelineId;
|
||||||
ValueReconstructSituation,
|
use layer_manager::Shutdown;
|
||||||
},
|
use offload::OffloadError;
|
||||||
},
|
use once_cell::sync::Lazy;
|
||||||
walingest::WalLagCooldown,
|
|
||||||
walredo,
|
|
||||||
};
|
|
||||||
use crate::{
|
|
||||||
context::{DownloadBehavior, RequestContext},
|
|
||||||
disk_usage_eviction_task::DiskUsageEvictionInfo,
|
|
||||||
pgdatadir_mapping::CollectKeySpaceError,
|
|
||||||
};
|
|
||||||
use crate::{
|
|
||||||
disk_usage_eviction_task::finite_f32,
|
|
||||||
tenant::storage_layer::{
|
|
||||||
AsLayerDesc, DeltaLayerWriter, EvictionError, ImageLayerWriter, InMemoryLayer, Layer,
|
|
||||||
LayerAccessStatsReset, LayerName, ResidentLayer, ValueReconstructState,
|
|
||||||
ValuesReconstructState,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
use crate::{
|
|
||||||
disk_usage_eviction_task::EvictionCandidate, tenant::storage_layer::delta_layer::DeltaEntry,
|
|
||||||
};
|
|
||||||
use crate::{
|
|
||||||
metrics::ScanLatencyOngoingRecording, tenant::timeline::logical_size::CurrentLogicalSize,
|
|
||||||
};
|
|
||||||
use crate::{
|
|
||||||
pgdatadir_mapping::DirectoryKind,
|
|
||||||
virtual_file::{MaybeFatalIo, VirtualFile},
|
|
||||||
};
|
|
||||||
use crate::{pgdatadir_mapping::LsnForTimestamp, tenant::tasks::BackgroundLoopKind};
|
|
||||||
use crate::{pgdatadir_mapping::MAX_AUX_FILE_V2_DELTAS, tenant::storage_layer::PersistentLayerKey};
|
|
||||||
use pageserver_api::config::tenant_conf_defaults::DEFAULT_PITR_INTERVAL;
|
use pageserver_api::config::tenant_conf_defaults::DEFAULT_PITR_INTERVAL;
|
||||||
|
use pageserver_api::key::{
|
||||||
use crate::config::PageServerConf;
|
KEY_SIZE, Key, METADATA_KEY_BEGIN_PREFIX, METADATA_KEY_END_PREFIX, NON_INHERITED_RANGE,
|
||||||
use crate::keyspace::{KeyPartitioning, KeySpace};
|
SPARSE_RANGE,
|
||||||
use crate::metrics::{TimelineMetrics, DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL};
|
|
||||||
use crate::pgdatadir_mapping::{CalculateLogicalSizeError, MetricsUpdate};
|
|
||||||
use crate::tenant::config::TenantConfOpt;
|
|
||||||
use pageserver_api::reltag::RelTag;
|
|
||||||
use pageserver_api::shard::ShardIndex;
|
|
||||||
|
|
||||||
use postgres_connection::PgConnectionConfig;
|
|
||||||
use postgres_ffi::{to_pg_timestamp, v14::xlog_utils, WAL_SEGMENT_SIZE};
|
|
||||||
use utils::{
|
|
||||||
completion,
|
|
||||||
generation::Generation,
|
|
||||||
id::TimelineId,
|
|
||||||
lsn::{AtomicLsn, Lsn, RecordLsn},
|
|
||||||
seqwait::SeqWait,
|
|
||||||
simple_rcu::{Rcu, RcuReadGuard},
|
|
||||||
};
|
};
|
||||||
|
use pageserver_api::keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning};
|
||||||
use crate::task_mgr;
|
use pageserver_api::models::{
|
||||||
use crate::task_mgr::TaskKind;
|
CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings,
|
||||||
use crate::tenant::gc_result::GcResult;
|
DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
|
||||||
use crate::ZERO_PAGE;
|
InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, TimelineState,
|
||||||
use pageserver_api::key::Key;
|
};
|
||||||
|
use pageserver_api::reltag::{BlockNumber, RelTag};
|
||||||
|
use pageserver_api::shard::{ShardIdentity, ShardIndex, ShardNumber, TenantShardId};
|
||||||
|
#[cfg(test)]
|
||||||
|
use pageserver_api::value::Value;
|
||||||
|
use postgres_connection::PgConnectionConfig;
|
||||||
|
use postgres_ffi::v14::xlog_utils;
|
||||||
|
use postgres_ffi::{WAL_SEGMENT_SIZE, to_pg_timestamp};
|
||||||
|
use rand::Rng;
|
||||||
|
use remote_storage::DownloadError;
|
||||||
|
use serde_with::serde_as;
|
||||||
|
use storage_broker::BrokerClientChannel;
|
||||||
|
use tokio::runtime::Handle;
|
||||||
|
use tokio::sync::mpsc::Sender;
|
||||||
|
use tokio::sync::{Notify, oneshot, watch};
|
||||||
|
use tokio_util::sync::CancellationToken;
|
||||||
|
use tracing::*;
|
||||||
|
use utils::generation::Generation;
|
||||||
|
use utils::guard_arc_swap::GuardArcSwap;
|
||||||
|
use utils::id::TimelineId;
|
||||||
|
use utils::lsn::{AtomicLsn, Lsn, RecordLsn};
|
||||||
|
use utils::postgres_client::PostgresClientProtocol;
|
||||||
|
use utils::rate_limit::RateLimit;
|
||||||
|
use utils::seqwait::SeqWait;
|
||||||
|
use utils::simple_rcu::{Rcu, RcuReadGuard};
|
||||||
|
use utils::sync::gate::{Gate, GateGuard};
|
||||||
|
use utils::{completion, critical, fs_ext, pausable_failpoint};
|
||||||
|
use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
|
||||||
|
|
||||||
use self::delete::DeleteTimelineFlow;
|
use self::delete::DeleteTimelineFlow;
|
||||||
pub(super) use self::eviction_task::EvictionTaskTenantState;
|
pub(super) use self::eviction_task::EvictionTaskTenantState;
|
||||||
@@ -147,24 +82,48 @@ use self::eviction_task::EvictionTaskTimelineState;
|
|||||||
use self::layer_manager::LayerManager;
|
use self::layer_manager::LayerManager;
|
||||||
use self::logical_size::LogicalSize;
|
use self::logical_size::LogicalSize;
|
||||||
use self::walreceiver::{WalReceiver, WalReceiverConf};
|
use self::walreceiver::{WalReceiver, WalReceiverConf};
|
||||||
|
use super::config::TenantConf;
|
||||||
use super::remote_timeline_client::index::GcCompactionState;
|
use super::remote_timeline_client::index::{GcCompactionState, IndexPart};
|
||||||
|
use super::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError};
|
||||||
|
use super::secondary::heatmap::HeatMapLayer;
|
||||||
|
use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};
|
||||||
|
use super::upload_queue::NotInitialized;
|
||||||
use super::{
|
use super::{
|
||||||
config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized,
|
AttachedTenantConf, GcError, HeatMapTimeline, MaybeOffloaded,
|
||||||
MaybeOffloaded,
|
debug_assert_current_span_has_tenant_and_timeline_id,
|
||||||
};
|
};
|
||||||
use super::{
|
use crate::aux_file::AuxFileSizeEstimator;
|
||||||
debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf, HeatMapTimeline,
|
use crate::config::PageServerConf;
|
||||||
|
use crate::context::{DownloadBehavior, RequestContext};
|
||||||
|
use crate::disk_usage_eviction_task::{DiskUsageEvictionInfo, EvictionCandidate, finite_f32};
|
||||||
|
use crate::keyspace::{KeyPartitioning, KeySpace};
|
||||||
|
use crate::l0_flush::{self, L0FlushGlobalState};
|
||||||
|
use crate::metrics::{
|
||||||
|
DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL, ScanLatencyOngoingRecording, TimelineMetrics,
|
||||||
};
|
};
|
||||||
use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe};
|
use crate::page_service::TenantManagerTypes;
|
||||||
use super::{
|
use crate::pgdatadir_mapping::{
|
||||||
remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError,
|
CalculateLogicalSizeError, CollectKeySpaceError, DirectoryKind, LsnForTimestamp,
|
||||||
storage_layer::ReadableLayer,
|
MAX_AUX_FILE_V2_DELTAS, MetricsUpdate,
|
||||||
};
|
};
|
||||||
use super::{secondary::heatmap::HeatMapLayer, GcError};
|
use crate::task_mgr::TaskKind;
|
||||||
|
use crate::tenant::config::{AttachmentMode, TenantConfOpt};
|
||||||
#[cfg(test)]
|
use crate::tenant::gc_result::GcResult;
|
||||||
use pageserver_api::value::Value;
|
use crate::tenant::layer_map::{LayerMap, SearchResult};
|
||||||
|
use crate::tenant::metadata::TimelineMetadata;
|
||||||
|
use crate::tenant::storage_layer::delta_layer::DeltaEntry;
|
||||||
|
use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
|
||||||
|
use crate::tenant::storage_layer::{
|
||||||
|
AsLayerDesc, BatchLayerWriter, DeltaLayerWriter, EvictionError, ImageLayerName,
|
||||||
|
ImageLayerWriter, InMemoryLayer, IoConcurrency, Layer, LayerAccessStatsReset, LayerName,
|
||||||
|
PersistentLayerDesc, PersistentLayerKey, ResidentLayer, ValueReconstructSituation,
|
||||||
|
ValueReconstructState, ValuesReconstructState,
|
||||||
|
};
|
||||||
|
use crate::tenant::tasks::BackgroundLoopKind;
|
||||||
|
use crate::tenant::timeline::logical_size::CurrentLogicalSize;
|
||||||
|
use crate::virtual_file::{MaybeFatalIo, VirtualFile};
|
||||||
|
use crate::walingest::WalLagCooldown;
|
||||||
|
use crate::{ZERO_PAGE, task_mgr, walredo};
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||||
pub(crate) enum FlushLoopState {
|
pub(crate) enum FlushLoopState {
|
||||||
@@ -1474,13 +1433,22 @@ impl Timeline {
|
|||||||
| TaskKind::WalReceiverConnectionHandler
|
| TaskKind::WalReceiverConnectionHandler
|
||||||
| TaskKind::WalReceiverConnectionPoller => {
|
| TaskKind::WalReceiverConnectionPoller => {
|
||||||
let is_myself = match who_is_waiting {
|
let is_myself = match who_is_waiting {
|
||||||
WaitLsnWaiter::Timeline(waiter) => Weak::ptr_eq(&waiter.myself, &self.myself),
|
WaitLsnWaiter::Timeline(waiter) => {
|
||||||
WaitLsnWaiter::Tenant | WaitLsnWaiter::PageService | WaitLsnWaiter::HttpEndpoint => unreachable!("tenant or page_service context are not expected to have task kind {:?}", ctx.task_kind()),
|
Weak::ptr_eq(&waiter.myself, &self.myself)
|
||||||
|
}
|
||||||
|
WaitLsnWaiter::Tenant
|
||||||
|
| WaitLsnWaiter::PageService
|
||||||
|
| WaitLsnWaiter::HttpEndpoint => unreachable!(
|
||||||
|
"tenant or page_service context are not expected to have task kind {:?}",
|
||||||
|
ctx.task_kind()
|
||||||
|
),
|
||||||
};
|
};
|
||||||
if is_myself {
|
if is_myself {
|
||||||
if let Err(current) = self.last_record_lsn.would_wait_for(lsn) {
|
if let Err(current) = self.last_record_lsn.would_wait_for(lsn) {
|
||||||
// walingest is the only one that can advance last_record_lsn; it should make sure to never reach here
|
// walingest is the only one that can advance last_record_lsn; it should make sure to never reach here
|
||||||
panic!("this timeline's walingest task is calling wait_lsn({lsn}) but we only have last_record_lsn={current}; would deadlock");
|
panic!(
|
||||||
|
"this timeline's walingest task is calling wait_lsn({lsn}) but we only have last_record_lsn={current}; would deadlock"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// if another timeline's is waiting for us, there's no deadlock risk because
|
// if another timeline's is waiting for us, there's no deadlock risk because
|
||||||
@@ -1509,12 +1477,12 @@ impl Timeline {
|
|||||||
drop(_timer);
|
drop(_timer);
|
||||||
let walreceiver_status = self.walreceiver_status();
|
let walreceiver_status = self.walreceiver_status();
|
||||||
Err(WaitLsnError::Timeout(format!(
|
Err(WaitLsnError::Timeout(format!(
|
||||||
"Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}",
|
"Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}",
|
||||||
lsn,
|
lsn,
|
||||||
self.get_last_record_lsn(),
|
self.get_last_record_lsn(),
|
||||||
self.get_disk_consistent_lsn(),
|
self.get_disk_consistent_lsn(),
|
||||||
walreceiver_status,
|
walreceiver_status,
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1618,10 +1586,18 @@ impl Timeline {
|
|||||||
if init || validate {
|
if init || validate {
|
||||||
let latest_gc_cutoff_lsn = self.get_applied_gc_cutoff_lsn();
|
let latest_gc_cutoff_lsn = self.get_applied_gc_cutoff_lsn();
|
||||||
if lsn < *latest_gc_cutoff_lsn {
|
if lsn < *latest_gc_cutoff_lsn {
|
||||||
bail!("tried to request an lsn lease for an lsn below the latest gc cutoff. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
|
bail!(
|
||||||
|
"tried to request an lsn lease for an lsn below the latest gc cutoff. requested at {} gc cutoff {}",
|
||||||
|
lsn,
|
||||||
|
*latest_gc_cutoff_lsn
|
||||||
|
);
|
||||||
}
|
}
|
||||||
if lsn < planned_cutoff {
|
if lsn < planned_cutoff {
|
||||||
bail!("tried to request an lsn lease for an lsn below the planned gc cutoff. requested at {} planned gc cutoff {}", lsn, planned_cutoff);
|
bail!(
|
||||||
|
"tried to request an lsn lease for an lsn below the planned gc cutoff. requested at {} planned gc cutoff {}",
|
||||||
|
lsn,
|
||||||
|
planned_cutoff
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1745,7 +1721,9 @@ impl Timeline {
|
|||||||
// This is not harmful, but it only happens in relatively rare cases where
|
// This is not harmful, but it only happens in relatively rare cases where
|
||||||
// time-based checkpoints are not happening fast enough to keep the amount of
|
// time-based checkpoints are not happening fast enough to keep the amount of
|
||||||
// ephemeral data within configured limits. It's a sign of stress on the system.
|
// ephemeral data within configured limits. It's a sign of stress on the system.
|
||||||
tracing::info!("Early-rolling open layer at size {current_size} (limit {size_override}) due to dirty data pressure");
|
tracing::info!(
|
||||||
|
"Early-rolling open layer at size {current_size} (limit {size_override}) due to dirty data pressure"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1871,7 +1849,9 @@ impl Timeline {
|
|||||||
|
|
||||||
// Last record Lsn could be zero in case the timeline was just created
|
// Last record Lsn could be zero in case the timeline was just created
|
||||||
if !last_record_lsn.is_valid() {
|
if !last_record_lsn.is_valid() {
|
||||||
warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
|
warn!(
|
||||||
|
"Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}"
|
||||||
|
);
|
||||||
return Ok(CompactionOutcome::Skipped);
|
return Ok(CompactionOutcome::Skipped);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2033,7 +2013,9 @@ impl Timeline {
|
|||||||
// `self.remote_client.shutdown().await` above should have already flushed everything from the queue, but
|
// `self.remote_client.shutdown().await` above should have already flushed everything from the queue, but
|
||||||
// we also do a final check here to ensure that the queue is empty.
|
// we also do a final check here to ensure that the queue is empty.
|
||||||
if !self.remote_client.no_pending_work() {
|
if !self.remote_client.no_pending_work() {
|
||||||
warn!("still have pending work in remote upload queue, but continuing shutting down anyways");
|
warn!(
|
||||||
|
"still have pending work in remote upload queue, but continuing shutting down anyways"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2042,7 +2024,9 @@ impl Timeline {
|
|||||||
// drain the upload queue
|
// drain the upload queue
|
||||||
self.remote_client.shutdown().await;
|
self.remote_client.shutdown().await;
|
||||||
if !self.remote_client.no_pending_work() {
|
if !self.remote_client.no_pending_work() {
|
||||||
warn!("still have pending work in remote upload queue, but continuing shutting down anyways");
|
warn!(
|
||||||
|
"still have pending work in remote upload queue, but continuing shutting down anyways"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2946,8 +2930,9 @@ impl Timeline {
|
|||||||
disk_consistent_lsn: Lsn,
|
disk_consistent_lsn: Lsn,
|
||||||
index_part: IndexPart,
|
index_part: IndexPart,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
use init::{Decision::*, Discovered, DismissedLayer};
|
|
||||||
use LayerName::*;
|
use LayerName::*;
|
||||||
|
use init::Decision::*;
|
||||||
|
use init::{Discovered, DismissedLayer};
|
||||||
|
|
||||||
let mut guard = self.layers.write().await;
|
let mut guard = self.layers.write().await;
|
||||||
|
|
||||||
@@ -3162,11 +3147,15 @@ impl Timeline {
|
|||||||
}
|
}
|
||||||
TimelineState::Loading => {
|
TimelineState::Loading => {
|
||||||
// Import does not return an activated timeline.
|
// Import does not return an activated timeline.
|
||||||
info!("discarding priority boost for logical size calculation because timeline is not yet active");
|
info!(
|
||||||
|
"discarding priority boost for logical size calculation because timeline is not yet active"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
TimelineState::Active => {
|
TimelineState::Active => {
|
||||||
// activation should be setting the once cell
|
// activation should be setting the once cell
|
||||||
warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work");
|
warn!(
|
||||||
|
"unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work"
|
||||||
|
);
|
||||||
debug_assert!(false);
|
debug_assert!(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -4306,10 +4295,14 @@ impl Timeline {
|
|||||||
// This path is only taken for tenants with multiple shards: single sharded tenants should
|
// This path is only taken for tenants with multiple shards: single sharded tenants should
|
||||||
// never encounter a gap in the wal.
|
// never encounter a gap in the wal.
|
||||||
let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
|
let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
|
||||||
tracing::debug!("Advancing disk_consistent_lsn across layer gap {old_disk_consistent_lsn}->{frozen_to_lsn}");
|
tracing::debug!(
|
||||||
|
"Advancing disk_consistent_lsn across layer gap {old_disk_consistent_lsn}->{frozen_to_lsn}"
|
||||||
|
);
|
||||||
if self.set_disk_consistent_lsn(frozen_to_lsn) {
|
if self.set_disk_consistent_lsn(frozen_to_lsn) {
|
||||||
if let Err(e) = self.schedule_uploads(frozen_to_lsn, vec![]) {
|
if let Err(e) = self.schedule_uploads(frozen_to_lsn, vec![]) {
|
||||||
tracing::warn!("Failed to schedule metadata upload after updating disk_consistent_lsn: {e}");
|
tracing::warn!(
|
||||||
|
"Failed to schedule metadata upload after updating disk_consistent_lsn: {e}"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -4534,7 +4527,10 @@ impl Timeline {
|
|||||||
/// This function must only be used from the layer flush task.
|
/// This function must only be used from the layer flush task.
|
||||||
fn set_disk_consistent_lsn(&self, new_value: Lsn) -> bool {
|
fn set_disk_consistent_lsn(&self, new_value: Lsn) -> bool {
|
||||||
let old_value = self.disk_consistent_lsn.fetch_max(new_value);
|
let old_value = self.disk_consistent_lsn.fetch_max(new_value);
|
||||||
assert!(new_value >= old_value, "disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}");
|
assert!(
|
||||||
|
new_value >= old_value,
|
||||||
|
"disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}"
|
||||||
|
);
|
||||||
|
|
||||||
self.metrics
|
self.metrics
|
||||||
.disk_consistent_lsn_gauge
|
.disk_consistent_lsn_gauge
|
||||||
@@ -4829,7 +4825,9 @@ impl Timeline {
|
|||||||
// any metadata keys, keys, as that would lead to actual data
|
// any metadata keys, keys, as that would lead to actual data
|
||||||
// loss.
|
// loss.
|
||||||
if img_key.is_rel_fsm_block_key() || img_key.is_rel_vm_block_key() {
|
if img_key.is_rel_fsm_block_key() || img_key.is_rel_vm_block_key() {
|
||||||
warn!("could not reconstruct FSM or VM key {img_key}, filling with zeros: {err:?}");
|
warn!(
|
||||||
|
"could not reconstruct FSM or VM key {img_key}, filling with zeros: {err:?}"
|
||||||
|
);
|
||||||
ZERO_PAGE.clone()
|
ZERO_PAGE.clone()
|
||||||
} else {
|
} else {
|
||||||
return Err(CreateImageLayersError::from(err));
|
return Err(CreateImageLayersError::from(err));
|
||||||
@@ -4908,7 +4906,8 @@ impl Timeline {
|
|||||||
|
|
||||||
let trigger_generation = delta_files_accessed as usize >= MAX_AUX_FILE_V2_DELTAS;
|
let trigger_generation = delta_files_accessed as usize >= MAX_AUX_FILE_V2_DELTAS;
|
||||||
info!(
|
info!(
|
||||||
"metadata key compaction: trigger_generation={trigger_generation}, delta_files_accessed={delta_files_accessed}, total_kb_retrieved={total_kb_retrieved}, total_keys_retrieved={total_keys_retrieved}, read_time={}s", elapsed.as_secs_f64()
|
"metadata key compaction: trigger_generation={trigger_generation}, delta_files_accessed={delta_files_accessed}, total_kb_retrieved={total_kb_retrieved}, total_keys_retrieved={total_keys_retrieved}, read_time={}s",
|
||||||
|
elapsed.as_secs_f64()
|
||||||
);
|
);
|
||||||
|
|
||||||
if !trigger_generation && mode == ImageLayerCreationMode::Try {
|
if !trigger_generation && mode == ImageLayerCreationMode::Try {
|
||||||
@@ -5230,7 +5229,8 @@ impl Timeline {
|
|||||||
if should_yield {
|
if should_yield {
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
"preempt image layer generation at {lsn} when processing partition {}..{}: too many L0 layers",
|
"preempt image layer generation at {lsn} when processing partition {}..{}: too many L0 layers",
|
||||||
partition.start().unwrap(), partition.end().unwrap()
|
partition.start().unwrap(),
|
||||||
|
partition.end().unwrap()
|
||||||
);
|
);
|
||||||
last_partition_processed = Some(partition.clone());
|
last_partition_processed = Some(partition.clone());
|
||||||
all_generated = false;
|
all_generated = false;
|
||||||
@@ -5588,7 +5588,9 @@ impl Timeline {
|
|||||||
// because we have not implemented L0 => L0 compaction.
|
// because we have not implemented L0 => L0 compaction.
|
||||||
duplicated_layers.insert(l.layer_desc().key());
|
duplicated_layers.insert(l.layer_desc().key());
|
||||||
} else if LayerMap::is_l0(&l.layer_desc().key_range, l.layer_desc().is_delta) {
|
} else if LayerMap::is_l0(&l.layer_desc().key_range, l.layer_desc().is_delta) {
|
||||||
return Err(CompactionError::Other(anyhow::anyhow!("compaction generates a L0 layer file as output, which will cause infinite compaction.")));
|
return Err(CompactionError::Other(anyhow::anyhow!(
|
||||||
|
"compaction generates a L0 layer file as output, which will cause infinite compaction."
|
||||||
|
)));
|
||||||
} else {
|
} else {
|
||||||
insert_layers.push(l.clone());
|
insert_layers.push(l.clone());
|
||||||
}
|
}
|
||||||
@@ -5712,8 +5714,10 @@ impl Timeline {
|
|||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok((index_part, index_generation, _index_mtime)) => {
|
Ok((index_part, index_generation, _index_mtime)) => {
|
||||||
tracing::info!("GC loaded shard zero metadata (gen {index_generation:?}): latest_gc_cutoff_lsn: {}",
|
tracing::info!(
|
||||||
index_part.metadata.latest_gc_cutoff_lsn());
|
"GC loaded shard zero metadata (gen {index_generation:?}): latest_gc_cutoff_lsn: {}",
|
||||||
|
index_part.metadata.latest_gc_cutoff_lsn()
|
||||||
|
);
|
||||||
Ok(Some(index_part.metadata.latest_gc_cutoff_lsn()))
|
Ok(Some(index_part.metadata.latest_gc_cutoff_lsn()))
|
||||||
}
|
}
|
||||||
Err(DownloadError::NotFound) => {
|
Err(DownloadError::NotFound) => {
|
||||||
@@ -6122,9 +6126,7 @@ impl Timeline {
|
|||||||
if let Some((img_lsn, img)) = &data.img {
|
if let Some((img_lsn, img)) = &data.img {
|
||||||
trace!(
|
trace!(
|
||||||
"found page image for key {} at {}, no WAL redo required, req LSN {}",
|
"found page image for key {} at {}, no WAL redo required, req LSN {}",
|
||||||
key,
|
key, img_lsn, request_lsn,
|
||||||
img_lsn,
|
|
||||||
request_lsn,
|
|
||||||
);
|
);
|
||||||
Ok(img.clone())
|
Ok(img.clone())
|
||||||
} else {
|
} else {
|
||||||
@@ -6153,7 +6155,12 @@ impl Timeline {
|
|||||||
request_lsn
|
request_lsn
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
trace!("found {} WAL records that will init the page for {} at {}, performing WAL redo", data.records.len(), key, request_lsn);
|
trace!(
|
||||||
|
"found {} WAL records that will init the page for {} at {}, performing WAL redo",
|
||||||
|
data.records.len(),
|
||||||
|
key,
|
||||||
|
request_lsn
|
||||||
|
);
|
||||||
};
|
};
|
||||||
let res = self
|
let res = self
|
||||||
.walredo_mgr
|
.walredo_mgr
|
||||||
@@ -6697,7 +6704,9 @@ impl TimelineWriter<'_> {
|
|||||||
|
|
||||||
if let Some(wait_threshold) = wait_threshold {
|
if let Some(wait_threshold) = wait_threshold {
|
||||||
if l0_count >= wait_threshold {
|
if l0_count >= wait_threshold {
|
||||||
debug!("layer roll waiting for flush due to compaction backpressure at {l0_count} L0 layers");
|
debug!(
|
||||||
|
"layer roll waiting for flush due to compaction backpressure at {l0_count} L0 layers"
|
||||||
|
);
|
||||||
self.tl.wait_flush_completion(flush_id).await?;
|
self.tl.wait_flush_completion(flush_id).await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -6884,17 +6893,15 @@ mod tests {
|
|||||||
use pageserver_api::key::Key;
|
use pageserver_api::key::Key;
|
||||||
use pageserver_api::value::Value;
|
use pageserver_api::value::Value;
|
||||||
use tracing::Instrument;
|
use tracing::Instrument;
|
||||||
use utils::{id::TimelineId, lsn::Lsn};
|
use utils::id::TimelineId;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
use crate::tenant::{
|
|
||||||
harness::{test_img, TenantHarness},
|
|
||||||
layer_map::LayerMap,
|
|
||||||
storage_layer::{Layer, LayerName, LayerVisibilityHint},
|
|
||||||
timeline::{DeltaLayerTestDesc, EvictionError},
|
|
||||||
PreviousHeatmap, Timeline,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::HeatMapTimeline;
|
use super::HeatMapTimeline;
|
||||||
|
use crate::tenant::harness::{TenantHarness, test_img};
|
||||||
|
use crate::tenant::layer_map::LayerMap;
|
||||||
|
use crate::tenant::storage_layer::{Layer, LayerName, LayerVisibilityHint};
|
||||||
|
use crate::tenant::timeline::{DeltaLayerTestDesc, EvictionError};
|
||||||
|
use crate::tenant::{PreviousHeatmap, Timeline};
|
||||||
|
|
||||||
fn assert_heatmaps_have_same_layers(lhs: &HeatMapTimeline, rhs: &HeatMapTimeline) {
|
fn assert_heatmaps_have_same_layers(lhs: &HeatMapTimeline, rhs: &HeatMapTimeline) {
|
||||||
assert_eq!(lhs.layers.len(), rhs.layers.len());
|
assert_eq!(lhs.layers.len(), rhs.layers.len());
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user