Update storage components to edition 2024 (#10919)

Updates storage components to edition 2024. We like to stay on the
latest edition if possible. There is no functional changes, however some
code changes had to be done to accommodate the edition's breaking
changes.

The PR has two commits:

* the first commit updates storage crates to edition 2024 and appeases
`cargo clippy` by changing code. i have accidentially ran the formatter
on some files that had other edits.
* the second commit performs a `cargo fmt`

I would recommend a closer review of the first commit and a less close
review of the second one (as it just runs `cargo fmt`).

part of https://github.com/neondatabase/neon/issues/10918
This commit is contained in:
Arpad Müller
2025-02-26 00:51:37 +01:00
committed by GitHub
parent dc975d554a
commit 920040e402
221 changed files with 3543 additions and 3611 deletions

View File

@@ -1,7 +1,7 @@
[package]
name = "pageserver_api"
version = "0.1.0"
edition.workspace = true
edition = "2024"
license.workspace = true
[features]

View File

@@ -9,19 +9,18 @@ pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN
pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
use std::collections::HashMap;
use std::num::{NonZeroU64, NonZeroUsize};
use std::str::FromStr;
use std::time::Duration;
use postgres_backend::AuthType;
use remote_storage::RemoteStorageConfig;
use serde_with::serde_as;
use std::{
collections::HashMap,
num::{NonZeroU64, NonZeroUsize},
str::FromStr,
time::Duration,
};
use utils::{logging::LogFormat, postgres_client::PostgresClientProtocol};
use utils::logging::LogFormat;
use utils::postgres_client::PostgresClientProtocol;
use crate::models::ImageCompressionAlgorithm;
use crate::models::LsnLease;
use crate::models::{ImageCompressionAlgorithm, LsnLease};
// Certain metadata (e.g. externally-addressable name, AZ) is delivered
// as a separate structure. This information is not neeed by the pageserver
@@ -367,10 +366,10 @@ pub struct TenantConfigToml {
}
pub mod defaults {
use crate::models::ImageCompressionAlgorithm;
pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
use crate::models::ImageCompressionAlgorithm;
pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";

View File

@@ -9,11 +9,8 @@ use std::time::{Duration, Instant};
use serde::{Deserialize, Serialize};
use utils::id::{NodeId, TenantId};
use crate::models::PageserverUtilization;
use crate::{
models::{ShardParameters, TenantConfig},
shard::{ShardStripeSize, TenantShardId},
};
use crate::models::{PageserverUtilization, ShardParameters, TenantConfig};
use crate::shard::{ShardStripeSize, TenantShardId};
#[derive(Serialize, Deserialize, Debug)]
#[serde(deny_unknown_fields)]
@@ -354,7 +351,7 @@ impl FromStr for SkSchedulingPolicy {
_ => {
return Err(anyhow::anyhow!(
"Unknown scheduling policy '{s}', try active,pause,decomissioned"
))
));
}
})
}
@@ -457,9 +454,10 @@ pub struct SafekeeperSchedulingPolicyRequest {
#[cfg(test)]
mod test {
use super::*;
use serde_json;
use super::*;
/// Check stability of PlacementPolicy's serialization
#[test]
fn placement_policy_encoding() -> anyhow::Result<()> {

View File

@@ -1,11 +1,12 @@
use anyhow::{bail, Result};
use byteorder::{ByteOrder, BE};
use std::fmt;
use std::ops::Range;
use anyhow::{Result, bail};
use byteorder::{BE, ByteOrder};
use bytes::Bytes;
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
use postgres_ffi::Oid;
use postgres_ffi::RepOriginId;
use postgres_ffi::{Oid, RepOriginId};
use serde::{Deserialize, Serialize};
use std::{fmt, ops::Range};
use utils::const_assert;
use crate::reltag::{BlockNumber, RelTag, SlruKind};
@@ -954,25 +955,22 @@ impl std::str::FromStr for Key {
mod tests {
use std::str::FromStr;
use crate::key::is_metadata_key_slice;
use crate::key::Key;
use rand::Rng;
use rand::SeedableRng;
use rand::{Rng, SeedableRng};
use super::AUX_KEY_PREFIX;
use crate::key::{Key, is_metadata_key_slice};
#[test]
fn display_fromstr_bijection() {
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
let key = Key {
field1: rng.gen(),
field2: rng.gen(),
field3: rng.gen(),
field4: rng.gen(),
field5: rng.gen(),
field6: rng.gen(),
field1: rng.r#gen(),
field2: rng.r#gen(),
field3: rng.r#gen(),
field4: rng.r#gen(),
field5: rng.r#gen(),
field6: rng.r#gen(),
};
assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());

View File

@@ -1,11 +1,10 @@
use postgres_ffi::BLCKSZ;
use std::ops::Range;
use crate::{
key::Key,
shard::{ShardCount, ShardIdentity},
};
use itertools::Itertools;
use postgres_ffi::BLCKSZ;
use crate::key::Key;
use crate::shard::{ShardCount, ShardIdentity};
///
/// Represents a set of Keys, in a compact form.
@@ -609,15 +608,13 @@ pub fn singleton_range(key: Key) -> Range<Key> {
#[cfg(test)]
mod tests {
use std::fmt::Write;
use rand::{RngCore, SeedableRng};
use crate::{
models::ShardParameters,
shard::{ShardCount, ShardNumber},
};
use super::*;
use std::fmt::Write;
use crate::models::ShardParameters;
use crate::shard::{ShardCount, ShardNumber};
// Helper function to create a key range.
//

View File

@@ -2,38 +2,30 @@ pub mod detach_ancestor;
pub mod partitioning;
pub mod utilization;
#[cfg(feature = "testing")]
use camino::Utf8PathBuf;
pub use utilization::PageserverUtilization;
use core::ops::Range;
use std::{
collections::HashMap,
fmt::Display,
io::{BufRead, Read},
num::{NonZeroU32, NonZeroU64, NonZeroUsize},
str::FromStr,
time::{Duration, SystemTime},
};
use std::collections::HashMap;
use std::fmt::Display;
use std::io::{BufRead, Read};
use std::num::{NonZeroU32, NonZeroU64, NonZeroUsize};
use std::str::FromStr;
use std::time::{Duration, SystemTime};
use byteorder::{BigEndian, ReadBytesExt};
use bytes::{Buf, BufMut, Bytes, BytesMut};
#[cfg(feature = "testing")]
use camino::Utf8PathBuf;
use postgres_ffi::BLCKSZ;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde_with::serde_as;
use utils::{
completion,
id::{NodeId, TenantId, TimelineId},
lsn::Lsn,
postgres_client::PostgresClientProtocol,
serde_system_time,
};
pub use utilization::PageserverUtilization;
use utils::id::{NodeId, TenantId, TimelineId};
use utils::lsn::Lsn;
use utils::postgres_client::PostgresClientProtocol;
use utils::{completion, serde_system_time};
use crate::{
key::{CompactKey, Key},
reltag::RelTag,
shard::{ShardCount, ShardStripeSize, TenantShardId},
};
use bytes::{Buf, BufMut, Bytes, BytesMut};
use crate::key::{CompactKey, Key};
use crate::reltag::RelTag;
use crate::shard::{ShardCount, ShardStripeSize, TenantShardId};
/// The state of a tenant in this pageserver.
///
@@ -332,7 +324,8 @@ pub struct ImportPgdataIdempotencyKey(pub String);
impl ImportPgdataIdempotencyKey {
pub fn random() -> Self {
use rand::{distributions::Alphanumeric, Rng};
use rand::Rng;
use rand::distributions::Alphanumeric;
Self(
rand::thread_rng()
.sample_iter(&Alphanumeric)
@@ -2288,9 +2281,10 @@ impl Default for PageTraceEvent {
#[cfg(test)]
mod tests {
use serde_json::json;
use std::str::FromStr;
use serde_json::json;
use super::*;
#[test]

View File

@@ -1,5 +1,7 @@
use std::time::SystemTime;
use utils::{serde_percent::Percent, serde_system_time};
use utils::serde_percent::Percent;
use utils::serde_system_time;
/// Pageserver current utilization and scoring for how good candidate the pageserver would be for
/// the next tenant.
@@ -131,12 +133,12 @@ impl PageserverUtilization {
/// Test helper
pub mod test_utilization {
use super::PageserverUtilization;
use std::time::SystemTime;
use utils::{
serde_percent::Percent,
serde_system_time::{self},
};
use utils::serde_percent::Percent;
use utils::serde_system_time::{self};
use super::PageserverUtilization;
// Parameters of the imaginary node used for test utilization instances
const TEST_DISK_SIZE: u64 = 1024 * 1024 * 1024 * 1024;

View File

@@ -1,7 +1,7 @@
//! This module defines the WAL record format used within the pageserver.
use bytes::Bytes;
use postgres_ffi::walrecord::{describe_postgres_wal_record, MultiXactMember};
use postgres_ffi::walrecord::{MultiXactMember, describe_postgres_wal_record};
use postgres_ffi::{MultiXactId, MultiXactOffset, TimestampTz, TransactionId};
use serde::{Deserialize, Serialize};
use utils::bin_ser::DeserializeError;

View File

@@ -1,10 +1,10 @@
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::fmt;
use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
use postgres_ffi::relfile_utils::{forkname_to_number, forknumber_to_name, MAIN_FORKNUM};
use postgres_ffi::Oid;
use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
use postgres_ffi::relfile_utils::{MAIN_FORKNUM, forkname_to_number, forknumber_to_name};
use serde::{Deserialize, Serialize};
///
/// Relation data file segment id throughout the Postgres cluster.

View File

@@ -33,12 +33,13 @@
use std::hash::{Hash, Hasher};
use crate::{key::Key, models::ShardParameters};
#[doc(inline)]
pub use ::utils::shard::*;
use postgres_ffi::relfile_utils::INIT_FORKNUM;
use serde::{Deserialize, Serialize};
#[doc(inline)]
pub use ::utils::shard::*;
use crate::key::Key;
use crate::models::ShardParameters;
/// The ShardIdentity contains enough information to map a [`Key`] to a [`ShardNumber`],
/// and to check whether that [`ShardNumber`] is the same as the current shard.
@@ -337,7 +338,8 @@ pub fn describe(
mod tests {
use std::str::FromStr;
use utils::{id::TenantId, Hex};
use utils::Hex;
use utils::id::TenantId;
use super::*;

View File

@@ -6,9 +6,9 @@
use serde::{Deserialize, Serialize};
use utils::id::NodeId;
use crate::{
controller_api::NodeRegisterRequest, models::LocationConfigMode, shard::TenantShardId,
};
use crate::controller_api::NodeRegisterRequest;
use crate::models::LocationConfigMode;
use crate::shard::TenantShardId;
/// Upcall message sent by the pageserver to the configured `control_plane_api` on
/// startup.
@@ -30,7 +30,7 @@ fn default_mode() -> LocationConfigMode {
pub struct ReAttachResponseTenant {
pub id: TenantShardId,
/// Mandatory if LocationConfigMode is None or set to an Attached* mode
pub gen: Option<u32>,
pub r#gen: Option<u32>,
/// Default value only for backward compat: this field should be set
#[serde(default = "default_mode")]
@@ -44,7 +44,7 @@ pub struct ReAttachResponse {
#[derive(Serialize, Deserialize)]
pub struct ValidateRequestTenant {
pub id: TenantShardId,
pub gen: u32,
pub r#gen: u32,
}
#[derive(Serialize, Deserialize)]

View File

@@ -7,10 +7,11 @@
//! Note that the [`Value`] type is used for the permananent storage format, so any
//! changes to it must be backwards compatible.
use crate::record::NeonWalRecord;
use bytes::Bytes;
use serde::{Deserialize, Serialize};
use crate::record::NeonWalRecord;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum Value {
/// An Image value contains a full copy of the value
@@ -83,11 +84,11 @@ impl ValueBytes {
#[cfg(test)]
mod test {
use super::*;
use bytes::Bytes;
use utils::bin_ser::BeSer;
use super::*;
macro_rules! roundtrip {
($orig:expr, $expected:expr) => {{
let orig: Value = $orig;

View File

@@ -1,7 +1,7 @@
[package]
name = "remote_storage"
version = "0.1.0"
edition.workspace = true
edition = "2024"
license.workspace = true
[dependencies]

View File

@@ -2,33 +2,26 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::env;
use std::fmt::Display;
use std::io;
use std::num::NonZeroU32;
use std::pin::Pin;
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use std::time::SystemTime;
use std::time::{Duration, SystemTime};
use std::{env, io};
use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
use anyhow::Context;
use anyhow::Result;
use anyhow::{Context, Result};
use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
use azure_core::HttpClient;
use azure_core::TransportOptions;
use azure_core::{Continuable, RetryOptions};
use azure_core::{Continuable, HttpClient, RetryOptions, TransportOptions};
use azure_storage::StorageCredentials;
use azure_storage_blobs::blob::CopyStatus;
use azure_storage_blobs::prelude::ClientBuilder;
use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerClient};
use azure_storage_blobs::blob::operations::GetBlobBuilder;
use azure_storage_blobs::prelude::{ClientBuilder, ContainerClient};
use bytes::Bytes;
use futures::FutureExt;
use futures::future::Either;
use futures::stream::Stream;
use futures::FutureExt;
use futures_util::StreamExt;
use futures_util::TryStreamExt;
use futures_util::{StreamExt, TryStreamExt};
use http_types::{StatusCode, Url};
use scopeguard::ScopeGuard;
use tokio_util::sync::CancellationToken;
@@ -36,12 +29,13 @@ use tracing::debug;
use utils::backoff;
use utils::backoff::exponential_backoff_duration_seconds;
use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
use crate::DownloadKind;
use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
use crate::config::AzureConfig;
use crate::error::Cancelled;
use crate::metrics::{AttemptOutcome, RequestKind, start_measuring_requests};
use crate::{
config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError,
DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, RemoteStorage, StorageMetadata,
TimeTravelError, TimeoutOrCancel,
ConcurrencyLimiter, Download, DownloadError, DownloadKind, DownloadOpts, Listing, ListingMode,
ListingObject, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,
};
pub struct AzureBlobStorage {

View File

@@ -1,8 +1,10 @@
use std::{fmt::Debug, num::NonZeroUsize, str::FromStr, time::Duration};
use std::fmt::Debug;
use std::num::NonZeroUsize;
use std::str::FromStr;
use std::time::Duration;
use aws_sdk_s3::types::StorageClass;
use camino::Utf8PathBuf;
use serde::{Deserialize, Serialize};
use crate::{

View File

@@ -18,40 +18,35 @@ mod s3_bucket;
mod simulate_failures;
mod support;
use std::{
collections::HashMap,
fmt::Debug,
num::NonZeroU32,
ops::Bound,
pin::{pin, Pin},
sync::Arc,
time::SystemTime,
};
use std::collections::HashMap;
use std::fmt::Debug;
use std::num::NonZeroU32;
use std::ops::Bound;
use std::pin::{Pin, pin};
use std::sync::Arc;
use std::time::SystemTime;
use anyhow::Context;
use camino::{Utf8Path, Utf8PathBuf};
/// Azure SDK's ETag type is a simple String wrapper: we use this internally instead of repeating it here.
pub use azure_core::Etag;
use bytes::Bytes;
use futures::{stream::Stream, StreamExt};
use camino::{Utf8Path, Utf8PathBuf};
pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
use futures::StreamExt;
use futures::stream::Stream;
use itertools::Itertools as _;
use s3_bucket::RequestKind;
use serde::{Deserialize, Serialize};
use tokio::sync::Semaphore;
use tokio_util::sync::CancellationToken;
use tracing::info;
pub use self::{
azure_blob::AzureBlobStorage, local_fs::LocalFs, s3_bucket::S3Bucket,
simulate_failures::UnreliableWrapper,
};
use s3_bucket::RequestKind;
pub use self::azure_blob::AzureBlobStorage;
pub use self::local_fs::LocalFs;
pub use self::s3_bucket::S3Bucket;
pub use self::simulate_failures::UnreliableWrapper;
pub use crate::config::{AzureConfig, RemoteStorageConfig, RemoteStorageKind, S3Config};
/// Azure SDK's ETag type is a simple String wrapper: we use this internally instead of repeating it here.
pub use azure_core::Etag;
pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
/// Default concurrency limit for S3 operations
///
/// Currently, sync happens with AWS S3, that has two limits on requests per second:
@@ -640,8 +635,13 @@ impl GenericRemoteStorage {
let profile = std::env::var("AWS_PROFILE").unwrap_or_else(|_| "<none>".into());
let access_key_id =
std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "<none>".into());
info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
info!(
"Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
s3_config.bucket_name,
s3_config.bucket_region,
s3_config.prefix_in_bucket,
s3_config.endpoint
);
Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout).await?))
}
RemoteStorageKind::AzureContainer(azure_config) => {
@@ -649,8 +649,12 @@ impl GenericRemoteStorage {
.storage_account
.as_deref()
.unwrap_or("<AZURE_STORAGE_ACCOUNT>");
info!("Using azure container '{}' in account '{storage_account}' in region '{}' as a remote storage, prefix in container: '{:?}'",
azure_config.container_name, azure_config.container_region, azure_config.prefix_in_container);
info!(
"Using azure container '{}' in account '{storage_account}' in region '{}' as a remote storage, prefix in container: '{:?}'",
azure_config.container_name,
azure_config.container_region,
azure_config.prefix_in_container
);
Self::AzureBlob(Arc::new(AzureBlobStorage::new(
azure_config,
timeout,

View File

@@ -4,31 +4,26 @@
//! This storage used in tests, but can also be used in cases when a certain persistent
//! volume is mounted to the local FS.
use std::{
collections::HashSet,
io::ErrorKind,
num::NonZeroU32,
time::{Duration, SystemTime, UNIX_EPOCH},
};
use std::collections::HashSet;
use std::io::ErrorKind;
use std::num::NonZeroU32;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use anyhow::{bail, ensure, Context};
use anyhow::{Context, bail, ensure};
use bytes::Bytes;
use camino::{Utf8Path, Utf8PathBuf};
use futures::stream::Stream;
use tokio::{
fs,
io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
};
use tokio_util::{io::ReaderStream, sync::CancellationToken};
use tokio::fs;
use tokio::io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt};
use tokio_util::io::ReaderStream;
use tokio_util::sync::CancellationToken;
use utils::crashsafe::path_with_suffix_extension;
use crate::{
Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath,
TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
};
use super::{RemoteStorage, StorageMetadata};
use crate::Etag;
use crate::{
Download, DownloadError, DownloadOpts, Etag, Listing, ListingMode, ListingObject,
REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, TimeTravelError, TimeoutOrCancel,
};
const LOCAL_FS_TEMP_FILE_SUFFIX: &str = "___temp";
@@ -91,7 +86,8 @@ impl LocalFs {
#[cfg(test)]
async fn list_all(&self) -> anyhow::Result<Vec<RemotePath>> {
use std::{future::Future, pin::Pin};
use std::future::Future;
use std::pin::Pin;
fn get_all_files<'a, P>(
directory_path: P,
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<Utf8PathBuf>>> + Send + Sync + 'a>>
@@ -284,7 +280,9 @@ impl LocalFs {
})?;
if bytes_read < from_size_bytes {
bail!("Provided stream was shorter than expected: {bytes_read} vs {from_size_bytes} bytes");
bail!(
"Provided stream was shorter than expected: {bytes_read} vs {from_size_bytes} bytes"
);
}
// Check if there is any extra data after the given size.
let mut from = buffer_to_read.into_inner();
@@ -642,10 +640,13 @@ fn mock_etag(meta: &std::fs::Metadata) -> Etag {
#[cfg(test)]
mod fs_tests {
use super::*;
use std::collections::HashMap;
use std::io::Write;
use std::ops::Bound;
use camino_tempfile::tempdir;
use std::{collections::HashMap, io::Write, ops::Bound};
use super::*;
async fn read_and_check_metadata(
storage: &LocalFs,
@@ -736,9 +737,14 @@ mod fs_tests {
);
let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?;
match storage.download(&non_existing_path, &DownloadOpts::default(), &cancel).await {
match storage
.download(&non_existing_path, &DownloadOpts::default(), &cancel)
.await
{
Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
other => panic!(
"Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"
),
}
Ok(())
}

View File

@@ -1,5 +1,5 @@
use metrics::{
register_histogram_vec, register_int_counter, register_int_counter_vec, Histogram, IntCounter,
Histogram, IntCounter, register_histogram_vec, register_int_counter, register_int_counter_vec,
};
use once_cell::sync::Lazy;
@@ -16,8 +16,8 @@ pub(crate) enum RequestKind {
Head = 6,
}
use scopeguard::ScopeGuard;
use RequestKind::*;
use scopeguard::ScopeGuard;
impl RequestKind {
const fn as_str(&self) -> &'static str {

View File

@@ -4,56 +4,50 @@
//! allowing multiple api users to independently work with the same S3 bucket, if
//! their bucket prefixes are both specified and different.
use std::{
borrow::Cow,
collections::HashMap,
num::NonZeroU32,
pin::Pin,
sync::Arc,
task::{Context, Poll},
time::{Duration, SystemTime},
};
use std::borrow::Cow;
use std::collections::HashMap;
use std::num::NonZeroU32;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::{Duration, SystemTime};
use anyhow::{anyhow, Context as _};
use aws_config::{
default_provider::credentials::DefaultCredentialsChain,
retry::{RetryConfigBuilder, RetryMode},
BehaviorVersion,
};
use aws_sdk_s3::{
config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep},
error::SdkError,
operation::{get_object::GetObjectError, head_object::HeadObjectError},
types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion, StorageClass},
Client,
};
use anyhow::{Context as _, anyhow};
use aws_config::BehaviorVersion;
use aws_config::default_provider::credentials::DefaultCredentialsChain;
use aws_config::retry::{RetryConfigBuilder, RetryMode};
use aws_sdk_s3::Client;
use aws_sdk_s3::config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep};
use aws_sdk_s3::error::SdkError;
use aws_sdk_s3::operation::get_object::GetObjectError;
use aws_sdk_s3::operation::head_object::HeadObjectError;
use aws_sdk_s3::types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion, StorageClass};
use aws_smithy_async::rt::sleep::TokioSleep;
use http_body_util::StreamBody;
use http_types::StatusCode;
use aws_smithy_types::{body::SdkBody, DateTime};
use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError};
use aws_smithy_types::DateTime;
use aws_smithy_types::body::SdkBody;
use aws_smithy_types::byte_stream::ByteStream;
use aws_smithy_types::date_time::ConversionError;
use bytes::Bytes;
use futures::stream::Stream;
use futures_util::StreamExt;
use http_body_util::StreamBody;
use http_types::StatusCode;
use hyper::body::Frame;
use scopeguard::ScopeGuard;
use tokio_util::sync::CancellationToken;
use utils::backoff;
use super::StorageMetadata;
use crate::{
config::S3Config,
error::Cancelled,
metrics::{start_counting_cancelled_wait, start_measuring_requests},
support::PermitCarrying,
ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE_S3,
REMOTE_STORAGE_PREFIX_SEPARATOR,
};
use crate::metrics::AttemptOutcome;
use crate::config::S3Config;
use crate::error::Cancelled;
pub(super) use crate::metrics::RequestKind;
use crate::metrics::{AttemptOutcome, start_counting_cancelled_wait, start_measuring_requests};
use crate::support::PermitCarrying;
use crate::{
ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
MAX_KEYS_PER_DELETE_S3, REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, RemoteStorage,
TimeTravelError, TimeoutOrCancel,
};
/// AWS S3 storage.
pub struct S3Bucket {
@@ -958,8 +952,10 @@ impl RemoteStorage for S3Bucket {
version_id, key, ..
} = &vd;
if version_id == "null" {
return Err(TimeTravelError::Other(anyhow!("Received ListVersions response for key={key} with version_id='null', \
indicating either disabled versioning, or legacy objects with null version id values")));
return Err(TimeTravelError::Other(anyhow!(
"Received ListVersions response for key={key} with version_id='null', \
indicating either disabled versioning, or legacy objects with null version id values"
)));
}
tracing::trace!(
"Parsing version key={key} version_id={version_id} kind={:?}",
@@ -1126,9 +1122,10 @@ impl VerOrDelete {
#[cfg(test)]
mod tests {
use camino::Utf8Path;
use std::num::NonZeroUsize;
use camino::Utf8Path;
use crate::{RemotePath, S3Bucket, S3Config};
#[tokio::test]

View File

@@ -1,14 +1,15 @@
//! This module provides a wrapper around a real RemoteStorage implementation that
//! causes the first N attempts at each upload or download operatio to fail. For
//! testing purposes.
use bytes::Bytes;
use futures::stream::Stream;
use futures::StreamExt;
use std::collections::HashMap;
use std::collections::hash_map::Entry;
use std::num::NonZeroU32;
use std::sync::Mutex;
use std::sync::{Arc, Mutex};
use std::time::SystemTime;
use std::{collections::hash_map::Entry, sync::Arc};
use bytes::Bytes;
use futures::StreamExt;
use futures::stream::Stream;
use tokio_util::sync::CancellationToken;
use crate::{

View File

@@ -1,9 +1,7 @@
use std::{
future::Future,
pin::Pin,
task::{Context, Poll},
time::Duration,
};
use std::future::Future;
use std::pin::Pin;
use std::task::{Context, Poll};
use std::time::Duration;
use bytes::Bytes;
use futures_util::Stream;
@@ -114,9 +112,10 @@ pub(crate) fn cancel_or_timeout(
#[cfg(test)]
mod tests {
use futures::stream::StreamExt;
use super::*;
use crate::DownloadError;
use futures::stream::StreamExt;
#[tokio::test(start_paused = true)]
async fn cancelled_download_stream() {

View File

@@ -1,19 +1,20 @@
use std::collections::HashSet;
use std::num::NonZeroU32;
use std::ops::Bound;
use std::sync::Arc;
use anyhow::Context;
use camino::Utf8Path;
use futures::StreamExt;
use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath};
use std::ops::Bound;
use std::sync::Arc;
use std::{collections::HashSet, num::NonZeroU32};
use test_context::test_context;
use tokio_util::sync::CancellationToken;
use tracing::debug;
use crate::common::{download_to_vec, upload_stream, wrap_stream};
use super::{
MaybeEnabledStorage, MaybeEnabledStorageWithSimpleTestBlobs, MaybeEnabledStorageWithTestBlobs,
};
use crate::common::{download_to_vec, upload_stream, wrap_stream};
/// Tests that S3 client can list all prefixes, even if the response come paginated and requires multiple S3 queries.
/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified.
@@ -62,7 +63,8 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
.into_iter()
.collect::<HashSet<_>>();
assert_eq!(
root_remote_prefixes, HashSet::from([base_prefix.clone()]),
root_remote_prefixes,
HashSet::from([base_prefix.clone()]),
"remote storage root prefixes list mismatches with the uploads. Returned prefixes: {root_remote_prefixes:?}"
);
@@ -84,7 +86,8 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
.difference(&nested_remote_prefixes)
.collect::<HashSet<_>>();
assert_eq!(
remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
remote_only_prefixes.len() + missing_uploaded_prefixes.len(),
0,
"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
);
@@ -119,7 +122,8 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
.difference(&nested_remote_prefixes_combined)
.collect::<HashSet<_>>();
assert_eq!(
remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
remote_only_prefixes.len() + missing_uploaded_prefixes.len(),
0,
"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
);

View File

@@ -1,9 +1,9 @@
use std::collections::HashSet;
use std::env;
use std::num::NonZeroUsize;
use std::ops::ControlFlow;
use std::sync::Arc;
use std::time::UNIX_EPOCH;
use std::{collections::HashSet, time::Duration};
use std::time::{Duration, UNIX_EPOCH};
use anyhow::Context;
use remote_storage::{
@@ -208,7 +208,7 @@ async fn create_azure_client(
.as_millis();
// because nanos can be the same for two threads so can millis, add randomness
let random = rand::thread_rng().gen::<u32>();
let random = rand::thread_rng().r#gen::<u32>();
let remote_storage_config = RemoteStorageConfig {
storage: RemoteStorageKind::AzureContainer(AzureConfig {

View File

@@ -1,13 +1,12 @@
use std::collections::HashSet;
use std::env;
use std::fmt::{Debug, Display};
use std::future::Future;
use std::num::NonZeroUsize;
use std::ops::ControlFlow;
use std::sync::Arc;
use std::time::{Duration, UNIX_EPOCH};
use std::{collections::HashSet, time::SystemTime};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use crate::common::{download_to_vec, upload_stream};
use anyhow::Context;
use camino::Utf8Path;
use futures_util::StreamExt;
@@ -15,12 +14,13 @@ use remote_storage::{
DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
RemoteStorageConfig, RemoteStorageKind, S3Config,
};
use test_context::test_context;
use test_context::AsyncTestContext;
use test_context::{AsyncTestContext, test_context};
use tokio::io::AsyncBufReadExt;
use tokio_util::sync::CancellationToken;
use tracing::info;
use crate::common::{download_to_vec, upload_stream};
mod common;
#[path = "common/tests.rs"]
@@ -128,8 +128,10 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
let t0_hwt = t0 + half_wt;
let t1_hwt = t1 - half_wt;
if !(t0_hwt..=t1_hwt).contains(&last_modified) {
panic!("last_modified={last_modified:?} is not between t0_hwt={t0_hwt:?} and t1_hwt={t1_hwt:?}. \
This likely means a large lock discrepancy between S3 and the local clock.");
panic!(
"last_modified={last_modified:?} is not between t0_hwt={t0_hwt:?} and t1_hwt={t1_hwt:?}. \
This likely means a large lock discrepancy between S3 and the local clock."
);
}
}
@@ -383,7 +385,7 @@ async fn create_s3_client(
.as_millis();
// because nanos can be the same for two threads so can millis, add randomness
let random = rand::thread_rng().gen::<u32>();
let random = rand::thread_rng().r#gen::<u32>();
let remote_storage_config = RemoteStorageConfig {
storage: RemoteStorageKind::AwsS3(S3Config {

View File

@@ -1,7 +1,7 @@
[package]
name = "safekeeper_api"
version = "0.1.0"
edition.workspace = true
edition = "2024"
license.workspace = true
[dependencies]

View File

@@ -2,7 +2,8 @@
//! rfcs/035-safekeeper-dynamic-membership-change.md
//! for details.
use std::{collections::HashSet, fmt::Display};
use std::collections::HashSet;
use std::fmt::Display;
use anyhow;
use anyhow::bail;
@@ -148,9 +149,10 @@ impl Display for Configuration {
#[cfg(test)]
mod tests {
use super::{MemberSet, SafekeeperId};
use utils::id::NodeId;
use super::{MemberSet, SafekeeperId};
#[test]
fn test_member_set() {
let mut members = MemberSet::empty();

View File

@@ -1,18 +1,17 @@
//! Types used in safekeeper http API. Many of them are also reused internally.
use std::net::SocketAddr;
use pageserver_api::shard::ShardIdentity;
use postgres_ffi::TimestampTz;
use serde::{Deserialize, Serialize};
use std::net::SocketAddr;
use tokio::time::Instant;
use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
use utils::lsn::Lsn;
use utils::pageserver_feedback::PageserverFeedback;
use utils::{
id::{NodeId, TenantId, TenantTimelineId, TimelineId},
lsn::Lsn,
pageserver_feedback::PageserverFeedback,
};
use crate::{membership::Configuration, ServerInfo, Term};
use crate::membership::Configuration;
use crate::{ServerInfo, Term};
#[derive(Debug, Serialize)]
pub struct SafekeeperStatus {

View File

@@ -1,7 +1,7 @@
[package]
name = "pageserver"
version = "0.1.0"
edition.workspace = true
edition = "2024"
license.workspace = true
[features]

View File

@@ -1,22 +1,20 @@
use std::{env, num::NonZeroUsize};
use std::env;
use std::num::NonZeroUsize;
use bytes::Bytes;
use camino::Utf8PathBuf;
use criterion::{criterion_group, criterion_main, Criterion};
use pageserver::{
config::PageServerConf,
context::{DownloadBehavior, RequestContext},
l0_flush::{L0FlushConfig, L0FlushGlobalState},
page_cache,
task_mgr::TaskKind,
tenant::storage_layer::InMemoryLayer,
virtual_file,
};
use pageserver_api::{key::Key, shard::TenantShardId, value::Value};
use utils::{
bin_ser::BeSer,
id::{TenantId, TimelineId},
};
use criterion::{Criterion, criterion_group, criterion_main};
use pageserver::config::PageServerConf;
use pageserver::context::{DownloadBehavior, RequestContext};
use pageserver::l0_flush::{L0FlushConfig, L0FlushGlobalState};
use pageserver::task_mgr::TaskKind;
use pageserver::tenant::storage_layer::InMemoryLayer;
use pageserver::{page_cache, virtual_file};
use pageserver_api::key::Key;
use pageserver_api::shard::TenantShardId;
use pageserver_api::value::Value;
use utils::bin_ser::BeSer;
use utils::id::{TenantId, TimelineId};
use wal_decoder::serialized_batch::SerializedValueBatch;
// A very cheap hash for generating non-sequential keys.

View File

@@ -1,23 +1,21 @@
use criterion::measurement::WallTime;
use pageserver::keyspace::{KeyPartitioning, KeySpace};
use pageserver::tenant::layer_map::LayerMap;
use pageserver::tenant::storage_layer::LayerName;
use pageserver::tenant::storage_layer::PersistentLayerDesc;
use pageserver_api::key::Key;
use pageserver_api::shard::TenantShardId;
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
use std::cmp::{max, min};
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;
use std::str::FromStr;
use std::time::Instant;
use criterion::measurement::WallTime;
use criterion::{BenchmarkGroup, Criterion, black_box, criterion_group, criterion_main};
use pageserver::keyspace::{KeyPartitioning, KeySpace};
use pageserver::tenant::layer_map::LayerMap;
use pageserver::tenant::storage_layer::{LayerName, PersistentLayerDesc};
use pageserver_api::key::Key;
use pageserver_api::shard::TenantShardId;
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use criterion::{black_box, criterion_group, criterion_main, BenchmarkGroup, Criterion};
fn fixture_path(relative: &str) -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(relative)
}

View File

@@ -56,20 +56,23 @@
//! medium/128 time: [10.412 ms 10.574 ms 10.718 ms]
//! ```
use std::future::Future;
use std::sync::Arc;
use std::time::{Duration, Instant};
use anyhow::Context;
use bytes::{Buf, Bytes};
use criterion::{BenchmarkId, Criterion};
use once_cell::sync::Lazy;
use pageserver::{config::PageServerConf, walredo::PostgresRedoManager};
use pageserver::config::PageServerConf;
use pageserver::walredo::PostgresRedoManager;
use pageserver_api::key::Key;
use pageserver_api::record::NeonWalRecord;
use pageserver_api::{key::Key, shard::TenantShardId};
use std::{
future::Future,
sync::Arc,
time::{Duration, Instant},
};
use tokio::{sync::Barrier, task::JoinSet};
use utils::{id::TenantId, lsn::Lsn};
use pageserver_api::shard::TenantShardId;
use tokio::sync::Barrier;
use tokio::task::JoinSet;
use utils::id::TenantId;
use utils::lsn::Lsn;
fn bench(c: &mut Criterion) {
macro_rules! bench_group {

View File

@@ -1,15 +1,15 @@
//! Upload queue benchmarks.
use std::str::FromStr as _;
use std::sync::atomic::AtomicU32;
use std::sync::Arc;
use std::sync::atomic::AtomicU32;
use criterion::{criterion_group, criterion_main, Bencher, Criterion};
use criterion::{Bencher, Criterion, criterion_group, criterion_main};
use pageserver::tenant::IndexPart;
use pageserver::tenant::metadata::TimelineMetadata;
use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
use pageserver::tenant::storage_layer::LayerName;
use pageserver::tenant::upload_queue::{Delete, UploadOp, UploadQueue, UploadTask};
use pageserver::tenant::IndexPart;
use pprof::criterion::{Output, PProfProfiler};
use utils::generation::Generation;
use utils::shard::{ShardCount, ShardIndex, ShardNumber};

View File

@@ -221,12 +221,12 @@ where
// performed implicitly when `top` is dropped).
if let Some(mut top) = this.heap.peek_mut() {
match top.deref_mut() {
LazyLoadLayer::Unloaded(ref mut l) => {
LazyLoadLayer::Unloaded(l) => {
let fut = l.load_keys(this.ctx);
this.load_future.set(Some(Box::pin(fut)));
continue;
}
LazyLoadLayer::Loaded(ref mut entries) => {
LazyLoadLayer::Loaded(entries) => {
let result = entries.pop_front().unwrap();
if entries.is_empty() {
std::collections::binary_heap::PeekMut::pop(top);

View File

@@ -40,9 +40,7 @@ impl Stats {
}
}
pub(crate) fn add(&mut self, other: &Self) {
let Self {
ref mut latency_histo,
} = self;
let Self { latency_histo } = self;
latency_histo.add(&other.latency_histo).unwrap();
}
}

View File

@@ -2,7 +2,9 @@
pub(crate) const _ASSERT_U64_EQ_USIZE: () = {
if std::mem::size_of::<usize>() != std::mem::size_of::<u64>() {
panic!("the traits defined in this module assume that usize and u64 can be converted to each other without loss of information");
panic!(
"the traits defined in this module assume that usize and u64 can be converted to each other without loss of information"
);
}
};

View File

@@ -2,7 +2,7 @@ use std::sync::Arc;
use ::metrics::IntGauge;
use bytes::{Buf, BufMut, Bytes};
use pageserver_api::key::{Key, AUX_KEY_PREFIX, METADATA_KEY_SIZE};
use pageserver_api::key::{AUX_KEY_PREFIX, Key, METADATA_KEY_SIZE};
use tracing::warn;
// BEGIN Copyright (c) 2017 Servo Contributors

View File

@@ -10,33 +10,31 @@
//! This module is responsible for creation of such tarball
//! from data stored in object storage.
//!
use anyhow::{anyhow, Context};
use bytes::{BufMut, Bytes, BytesMut};
use fail::fail_point;
use pageserver_api::key::{rel_block_to_key, Key};
use postgres_ffi::pg_constants;
use std::fmt::Write as FmtWrite;
use std::time::{Instant, SystemTime};
use anyhow::{Context, anyhow};
use bytes::{BufMut, Bytes, BytesMut};
use fail::fail_point;
use pageserver_api::key::{Key, rel_block_to_key};
use pageserver_api::reltag::{RelTag, SlruKind};
use postgres_ffi::pg_constants::{
DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID, PG_HBA, PGDATA_SPECIAL_FILES,
};
use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
use postgres_ffi::{
BLCKSZ, PG_TLI, RELSEG_SIZE, WAL_SEGMENT_SIZE, XLogFileName, dispatch_pgversion, pg_constants,
};
use tokio::io;
use tokio::io::AsyncWrite;
use tracing::*;
use tokio_tar::{Builder, EntryType, Header};
use tracing::*;
use utils::lsn::Lsn;
use crate::context::RequestContext;
use crate::pgdatadir_mapping::Version;
use crate::tenant::storage_layer::IoConcurrency;
use crate::tenant::Timeline;
use pageserver_api::reltag::{RelTag, SlruKind};
use postgres_ffi::dispatch_pgversion;
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PG_HBA};
use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
use postgres_ffi::XLogFileName;
use postgres_ffi::PG_TLI;
use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE};
use utils::lsn::Lsn;
use crate::tenant::storage_layer::IoConcurrency;
#[derive(Debug, thiserror::Error)]
pub enum BasebackupError {

View File

@@ -3,49 +3,41 @@
//! Main entry point for the Page Server executable.
use std::env;
use std::env::{var, VarError};
use std::env::{VarError, var};
use std::io::Read;
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use anyhow::{anyhow, Context};
use anyhow::{Context, anyhow};
use camino::Utf8Path;
use clap::{Arg, ArgAction, Command};
use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
use pageserver::config::PageserverIdentity;
use metrics::launch_timestamp::{LaunchTimestamp, set_launch_timestamp_metric};
use metrics::set_build_info_metric;
use pageserver::config::{PageServerConf, PageserverIdentity};
use pageserver::controller_upcall_client::ControllerUpcallClient;
use pageserver::deletion_queue::DeletionQueue;
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
use pageserver::task_mgr::{COMPUTE_REQUEST_RUNTIME, WALRECEIVER_RUNTIME};
use pageserver::tenant::{secondary, TenantSharedResources};
use pageserver::{CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener};
use pageserver::task_mgr::{
BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
};
use pageserver::tenant::{TenantSharedResources, mgr, secondary};
use pageserver::{
CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener, http, page_cache, page_service,
task_mgr, virtual_file,
};
use postgres_backend::AuthType;
use remote_storage::GenericRemoteStorage;
use tokio::signal::unix::SignalKind;
use tokio::time::Instant;
use tokio_util::sync::CancellationToken;
use tracing::*;
use metrics::set_build_info_metric;
use pageserver::{
config::PageServerConf,
deletion_queue::DeletionQueue,
http, page_cache, page_service, task_mgr,
task_mgr::{BACKGROUND_RUNTIME, MGMT_REQUEST_RUNTIME},
tenant::mgr,
virtual_file,
};
use postgres_backend::AuthType;
use utils::auth::{JwtAuth, SwappableJwtAuth};
use utils::crashsafe::syncfs;
use utils::failpoint_support;
use utils::logging::TracingErrorLayerEnablement;
use utils::{
auth::{JwtAuth, SwappableJwtAuth},
logging, project_build_tag, project_git_version,
sentry_init::init_sentry,
tcp_listener,
};
use utils::sentry_init::init_sentry;
use utils::{failpoint_support, logging, project_build_tag, project_git_version, tcp_listener};
project_git_version!(GIT_VERSION);
project_build_tag!(BUILD_TAG);
@@ -57,7 +49,7 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
/// This adds roughly 3% overhead for allocations on average, which is acceptable considering
/// performance-sensitive code will avoid allocations as far as possible anyway.
#[allow(non_upper_case_globals)]
#[export_name = "malloc_conf"]
#[unsafe(export_name = "malloc_conf")]
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
const PID_FILE_NAME: &str = "pageserver.pid";
@@ -85,6 +77,9 @@ fn main() -> anyhow::Result<()> {
return Ok(());
}
// Initialize up failpoints support
let scenario = failpoint_support::init();
let workdir = arg_matches
.get_one::<String>("workdir")
.map(Utf8Path::new)
@@ -178,9 +173,6 @@ fn main() -> anyhow::Result<()> {
}
}
// Initialize up failpoints support
let scenario = failpoint_support::init();
// Basic initialization of things that don't change after startup
tracing::info!("Initializing virtual_file...");
virtual_file::init(
@@ -217,7 +209,9 @@ fn initialize_config(
Ok(mut f) => {
let md = f.metadata().context("stat config file")?;
if !md.is_file() {
anyhow::bail!("Pageserver found identity file but it is a dir entry: {identity_file_path}. Aborting start up ...");
anyhow::bail!(
"Pageserver found identity file but it is a dir entry: {identity_file_path}. Aborting start up ..."
);
}
let mut s = String::new();
@@ -225,7 +219,9 @@ fn initialize_config(
toml_edit::de::from_str::<PageserverIdentity>(&s)?
}
Err(e) => {
anyhow::bail!("Pageserver could not read identity file: {identity_file_path}: {e}. Aborting start up ...");
anyhow::bail!(
"Pageserver could not read identity file: {identity_file_path}: {e}. Aborting start up ..."
);
}
};
@@ -401,11 +397,9 @@ fn start_pageserver(
Err(VarError::NotPresent) => {
info!("No JWT token for authentication with Safekeeper detected");
}
Err(e) => {
return Err(e).with_context(|| {
"Failed to either load to detect non-present NEON_AUTH_TOKEN environment variable"
})
}
Err(e) => return Err(e).with_context(
|| "Failed to either load to detect non-present NEON_AUTH_TOKEN environment variable",
),
};
// Top-level cancellation token for the process
@@ -711,7 +705,9 @@ async fn create_remote_storage_client(
// wrapper that simulates failures.
if conf.test_remote_failures > 0 {
if !cfg!(feature = "testing") {
anyhow::bail!("test_remote_failures option is not available because pageserver was compiled without the 'testing' feature");
anyhow::bail!(
"test_remote_failures option is not available because pageserver was compiled without the 'testing' feature"
);
}
info!(
"Simulating remote failures for first {} attempts of each op",

View File

@@ -1,14 +1,10 @@
use std::{
io::{stdin, stdout, Read, Write},
time::Duration,
};
use std::io::{Read, Write, stdin, stdout};
use std::time::Duration;
use clap::Parser;
use pageserver_api::models::{PagestreamRequest, PagestreamTestRequest};
use utils::{
id::{TenantId, TimelineId},
lsn::Lsn,
};
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
#[derive(clap::Parser)]
struct Args {

View File

@@ -4,36 +4,29 @@
//! file, or on the command line.
//! See also `settings.md` for better description on every parameter.
use anyhow::{bail, ensure, Context};
use pageserver_api::models::ImageCompressionAlgorithm;
use pageserver_api::{
config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes},
shard::TenantShardId,
};
use remote_storage::{RemotePath, RemoteStorageConfig};
use std::env;
use storage_broker::Uri;
use utils::logging::SecretString;
use utils::postgres_client::PostgresClientProtocol;
use once_cell::sync::OnceCell;
use reqwest::Url;
use std::num::NonZeroUsize;
use std::sync::Arc;
use std::time::Duration;
use anyhow::{Context, bail, ensure};
use camino::{Utf8Path, Utf8PathBuf};
use once_cell::sync::OnceCell;
use pageserver_api::config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes};
use pageserver_api::models::ImageCompressionAlgorithm;
use pageserver_api::shard::TenantShardId;
use postgres_backend::AuthType;
use utils::{
id::{NodeId, TimelineId},
logging::LogFormat,
};
use remote_storage::{RemotePath, RemoteStorageConfig};
use reqwest::Url;
use storage_broker::Uri;
use utils::id::{NodeId, TimelineId};
use utils::logging::{LogFormat, SecretString};
use utils::postgres_client::PostgresClientProtocol;
use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
use crate::virtual_file;
use crate::virtual_file::io_engine;
use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME};
use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME, virtual_file};
/// Global state of pageserver.
///
@@ -440,7 +433,9 @@ impl PageServerConf {
io_engine::FeatureTestResult::PlatformPreferred(v) => v, // make no noise
io_engine::FeatureTestResult::Worse { engine, remark } => {
// TODO: bubble this up to the caller so we can tracing::warn! it.
eprintln!("auto-detected IO engine is not platform-preferred: engine={engine:?} remark={remark:?}");
eprintln!(
"auto-detected IO engine is not platform-preferred: engine={engine:?} remark={remark:?}"
);
engine
}
},

View File

@@ -1,13 +1,9 @@
//! Periodically collect consumption metrics for all active tenants
//! and push them to a HTTP endpoint.
use crate::config::PageServerConf;
use crate::consumption_metrics::metrics::MetricsKey;
use crate::consumption_metrics::upload::KeyGen as _;
use crate::context::{DownloadBehavior, RequestContext};
use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
use crate::tenant::size::CalculateSyntheticSizeError;
use crate::tenant::tasks::BackgroundLoopKind;
use crate::tenant::{mgr::TenantManager, LogicalSizeCalculationCause, Tenant};
use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, SystemTime};
use camino::Utf8PathBuf;
use consumption_metrics::EventType;
use itertools::Itertools as _;
@@ -15,14 +11,21 @@ use pageserver_api::models::TenantState;
use remote_storage::{GenericRemoteStorage, RemoteStorageConfig};
use reqwest::Url;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, SystemTime};
use tokio::time::Instant;
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::id::NodeId;
use crate::config::PageServerConf;
use crate::consumption_metrics::metrics::MetricsKey;
use crate::consumption_metrics::upload::KeyGen as _;
use crate::context::{DownloadBehavior, RequestContext};
use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};
use crate::tenant::mgr::TenantManager;
use crate::tenant::size::CalculateSyntheticSizeError;
use crate::tenant::tasks::BackgroundLoopKind;
use crate::tenant::{LogicalSizeCalculationCause, Tenant};
mod disk_cache;
mod metrics;
mod upload;

View File

@@ -1,10 +1,10 @@
use anyhow::Context;
use camino::{Utf8Path, Utf8PathBuf};
use std::sync::Arc;
use crate::consumption_metrics::NewMetricsRefRoot;
use anyhow::Context;
use camino::{Utf8Path, Utf8PathBuf};
use super::{NewMetricsRoot, NewRawMetric, RawMetric};
use crate::consumption_metrics::NewMetricsRefRoot;
pub(super) fn read_metrics_from_serde_value(
json_value: serde_json::Value,

View File

@@ -1,15 +1,16 @@
use crate::tenant::mgr::TenantManager;
use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogicalSize};
use std::sync::Arc;
use std::time::SystemTime;
use chrono::{DateTime, Utc};
use consumption_metrics::EventType;
use futures::stream::StreamExt;
use std::{sync::Arc, time::SystemTime};
use utils::{
id::{TenantId, TimelineId},
lsn::Lsn,
};
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use super::{Cache, NewRawMetric};
use crate::context::RequestContext;
use crate::tenant::mgr::TenantManager;
use crate::tenant::timeline::logical_size::CurrentLogicalSize;
/// Name of the metric, used by `MetricsKey` factory methods and `deserialize_cached_events`
/// instead of static str.

View File

@@ -1,7 +1,7 @@
use crate::consumption_metrics::RawMetric;
use std::collections::HashMap;
use super::*;
use std::collections::HashMap;
use crate::consumption_metrics::RawMetric;
#[test]
fn startup_collected_timeline_metrics_before_advancing() {

View File

@@ -2,15 +2,16 @@ use std::error::Error as _;
use std::time::SystemTime;
use chrono::{DateTime, Utc};
use consumption_metrics::{Event, EventChunk, IdempotencyKey, CHUNK_SIZE};
use consumption_metrics::{CHUNK_SIZE, Event, EventChunk, IdempotencyKey};
use remote_storage::{GenericRemoteStorage, RemotePath};
use tokio::io::AsyncWriteExt;
use tokio_util::sync::CancellationToken;
use tracing::Instrument;
use super::{metrics::Name, Cache, MetricsKey, NewRawMetric, RawMetric};
use utils::id::{TenantId, TimelineId};
use super::metrics::Name;
use super::{Cache, MetricsKey, NewRawMetric, RawMetric};
/// How the metrics from pageserver are identified.
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Copy, PartialEq)]
struct Ids {
@@ -438,14 +439,13 @@ async fn upload(
#[cfg(test)]
mod tests {
use crate::consumption_metrics::{
disk_cache::read_metrics_from_serde_value, NewMetricsRefRoot,
};
use super::*;
use chrono::{DateTime, Utc};
use once_cell::sync::Lazy;
use super::*;
use crate::consumption_metrics::NewMetricsRefRoot;
use crate::consumption_metrics::disk_cache::read_metrics_from_serde_value;
#[test]
fn chunked_serialization() {
let examples = metric_samples();

View File

@@ -1,21 +1,23 @@
use std::collections::HashMap;
use futures::Future;
use pageserver_api::{
controller_api::{AvailabilityZone, NodeRegisterRequest},
shard::TenantShardId,
upcall_api::{
use pageserver_api::config::NodeMetadata;
use pageserver_api::controller_api::{AvailabilityZone, NodeRegisterRequest};
use pageserver_api::shard::TenantShardId;
use pageserver_api::upcall_api::{
ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
ValidateRequestTenant, ValidateResponse,
},
};
use serde::{de::DeserializeOwned, Serialize};
use serde::Serialize;
use serde::de::DeserializeOwned;
use tokio_util::sync::CancellationToken;
use url::Url;
use utils::{backoff, failpoint_support, generation::Generation, id::NodeId};
use utils::generation::Generation;
use utils::id::NodeId;
use utils::{backoff, failpoint_support};
use crate::{config::PageServerConf, virtual_file::on_fatal_io_error};
use pageserver_api::config::NodeMetadata;
use crate::config::PageServerConf;
use crate::virtual_file::on_fatal_io_error;
/// The Pageserver's client for using the storage controller upcall API: this is a small API
/// for dealing with generations (see docs/rfcs/025-generation-numbers.md).
@@ -157,14 +159,18 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
match az_id_from_metadata {
Some(az_id) => Some(AvailabilityZone(az_id)),
None => {
tracing::warn!("metadata.json does not contain an 'availability_zone_id' field");
tracing::warn!(
"metadata.json does not contain an 'availability_zone_id' field"
);
conf.availability_zone.clone().map(AvailabilityZone)
}
}
};
if az_id.is_none() {
panic!("Availablity zone id could not be inferred from metadata.json or pageserver config");
panic!(
"Availablity zone id could not be inferred from metadata.json or pageserver config"
);
}
Some(NodeRegisterRequest {
@@ -236,7 +242,7 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
.iter()
.map(|(id, generation)| ValidateRequestTenant {
id: *id,
gen: (*generation).into().expect(
r#gen: (*generation).into().expect(
"Generation should always be valid for a Tenant doing deletions",
),
})

View File

@@ -6,38 +6,31 @@ use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use crate::controller_upcall_client::ControlPlaneGenerationsApi;
use crate::metrics;
use crate::tenant::remote_timeline_client::remote_timeline_path;
use crate::tenant::remote_timeline_client::LayerFileMetadata;
use crate::virtual_file::MaybeFatalIo;
use crate::virtual_file::VirtualFile;
use anyhow::Context;
use camino::Utf8PathBuf;
use deleter::DeleterMessage;
use list_writer::ListWriterQueueMessage;
use pageserver_api::shard::TenantShardId;
use remote_storage::{GenericRemoteStorage, RemotePath};
use serde::Deserialize;
use serde::Serialize;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tokio_util::sync::CancellationToken;
use tracing::Instrument;
use tracing::{debug, error};
use tracing::{Instrument, debug, error};
use utils::crashsafe::path_with_suffix_extension;
use utils::generation::Generation;
use utils::id::TimelineId;
use utils::lsn::AtomicLsn;
use utils::lsn::Lsn;
use self::deleter::Deleter;
use self::list_writer::DeletionOp;
use self::list_writer::ListWriter;
use self::list_writer::RecoverOp;
use self::validator::Validator;
use deleter::DeleterMessage;
use list_writer::ListWriterQueueMessage;
use utils::lsn::{AtomicLsn, Lsn};
use validator::ValidatorQueueMessage;
use crate::{config::PageServerConf, tenant::storage_layer::LayerName};
use self::deleter::Deleter;
use self::list_writer::{DeletionOp, ListWriter, RecoverOp};
use self::validator::Validator;
use crate::config::PageServerConf;
use crate::controller_upcall_client::ControlPlaneGenerationsApi;
use crate::metrics;
use crate::tenant::remote_timeline_client::{LayerFileMetadata, remote_timeline_path};
use crate::tenant::storage_layer::LayerName;
use crate::virtual_file::{MaybeFatalIo, VirtualFile};
// TODO: configurable for how long to wait before executing deletions
@@ -664,21 +657,22 @@ impl DeletionQueue {
#[cfg(test)]
mod test {
use std::io::ErrorKind;
use std::time::Duration;
use camino::Utf8Path;
use hex_literal::hex;
use pageserver_api::{key::Key, shard::ShardIndex, upcall_api::ReAttachResponseTenant};
use std::{io::ErrorKind, time::Duration};
use tracing::info;
use pageserver_api::key::Key;
use pageserver_api::shard::ShardIndex;
use pageserver_api::upcall_api::ReAttachResponseTenant;
use remote_storage::{RemoteStorageConfig, RemoteStorageKind};
use tokio::task::JoinHandle;
use crate::{
controller_upcall_client::RetryForeverError,
tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
};
use tracing::info;
use super::*;
use crate::controller_upcall_client::RetryForeverError;
use crate::tenant::harness::TenantHarness;
use crate::tenant::storage_layer::DeltaLayerName;
pub const TIMELINE_ID: TimelineId =
TimelineId::from_array(hex!("11223344556677881122334455667788"));
@@ -724,26 +718,26 @@ mod test {
.expect("Failed to join workers for previous deletion queue");
}
fn set_latest_generation(&self, gen: Generation) {
fn set_latest_generation(&self, gen_: Generation) {
let tenant_shard_id = self.harness.tenant_shard_id;
self.mock_control_plane
.latest_generation
.lock()
.unwrap()
.insert(tenant_shard_id, gen);
.insert(tenant_shard_id, gen_);
}
/// Returns remote layer file name, suitable for use in assert_remote_files
fn write_remote_layer(
&self,
file_name: LayerName,
gen: Generation,
gen_: Generation,
) -> anyhow::Result<String> {
let tenant_shard_id = self.harness.tenant_shard_id;
let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
let remote_timeline_path = self.remote_fs_dir.join(relative_remote_path.get_path());
std::fs::create_dir_all(&remote_timeline_path)?;
let remote_layer_file_name = format!("{}{}", file_name, gen.get_suffix());
let remote_layer_file_name = format!("{}{}", file_name, gen_.get_suffix());
let content: Vec<u8> = format!("placeholder contents of {file_name}").into();
@@ -1098,11 +1092,12 @@ mod test {
/// or coalescing, and doesn't actually execute any deletions unless you call pump() to kick it.
#[cfg(test)]
pub(crate) mod mock {
use std::sync::atomic::{AtomicUsize, Ordering};
use tracing::info;
use super::*;
use crate::tenant::remote_timeline_client::remote_layer_path;
use std::sync::atomic::{AtomicUsize, Ordering};
pub struct ConsumerState {
rx: tokio::sync::mpsc::UnboundedReceiver<ListWriterQueueMessage>,

View File

@@ -6,21 +6,16 @@
//! number of full-sized DeleteObjects requests, rather than a larger number of
//! smaller requests.
use remote_storage::GenericRemoteStorage;
use remote_storage::RemotePath;
use remote_storage::TimeoutOrCancel;
use std::time::Duration;
use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};
use tokio_util::sync::CancellationToken;
use tracing::info;
use tracing::warn;
use utils::backoff;
use utils::pausable_failpoint;
use tracing::{info, warn};
use utils::{backoff, pausable_failpoint};
use super::{DeletionQueueError, FlushOp};
use crate::metrics;
use super::DeletionQueueError;
use super::FlushOp;
const AUTOFLUSH_INTERVAL: Duration = Duration::from_secs(10);
pub(super) enum DeleterMessage {

View File

@@ -10,11 +10,6 @@
//!
//! DeletionLists are passed onwards to the Validator.
use super::DeletionHeader;
use super::DeletionList;
use super::FlushOp;
use super::ValidatorQueueMessage;
use std::collections::HashMap;
use std::fs::create_dir_all;
use std::time::Duration;
@@ -23,20 +18,17 @@ use pageserver_api::shard::TenantShardId;
use regex::Regex;
use remote_storage::RemotePath;
use tokio_util::sync::CancellationToken;
use tracing::debug;
use tracing::info;
use tracing::warn;
use tracing::{debug, info, warn};
use utils::generation::Generation;
use utils::id::TimelineId;
use super::{DeletionHeader, DeletionList, FlushOp, ValidatorQueueMessage};
use crate::config::PageServerConf;
use crate::deletion_queue::TEMP_SUFFIX;
use crate::metrics;
use crate::tenant::remote_timeline_client::remote_layer_path;
use crate::tenant::remote_timeline_client::LayerFileMetadata;
use crate::tenant::remote_timeline_client::{LayerFileMetadata, remote_layer_path};
use crate::tenant::storage_layer::LayerName;
use crate::virtual_file::on_fatal_io_error;
use crate::virtual_file::MaybeFatalIo;
use crate::virtual_file::{MaybeFatalIo, on_fatal_io_error};
// The number of keys in a DeletionList before we will proactively persist it
// (without reaching a flush deadline). This aims to deliver objects of the order

View File

@@ -20,22 +20,14 @@ use std::time::Duration;
use camino::Utf8PathBuf;
use tokio_util::sync::CancellationToken;
use tracing::debug;
use tracing::info;
use tracing::warn;
use crate::config::PageServerConf;
use crate::controller_upcall_client::ControlPlaneGenerationsApi;
use crate::controller_upcall_client::RetryForeverError;
use crate::metrics;
use crate::virtual_file::MaybeFatalIo;
use tracing::{debug, info, warn};
use super::deleter::DeleterMessage;
use super::DeletionHeader;
use super::DeletionList;
use super::DeletionQueueError;
use super::FlushOp;
use super::VisibleLsnUpdates;
use super::{DeletionHeader, DeletionList, DeletionQueueError, FlushOp, VisibleLsnUpdates};
use crate::config::PageServerConf;
use crate::controller_upcall_client::{ControlPlaneGenerationsApi, RetryForeverError};
use crate::metrics;
use crate::virtual_file::MaybeFatalIo;
// After this length of time, do any validation work that is pending,
// even if we haven't accumulated many keys to delete.
@@ -190,7 +182,10 @@ where
}
} else {
// If we failed validation, then do not apply any of the projected updates
info!("Dropped remote consistent LSN updates for tenant {tenant_id} in stale generation {:?}", tenant_lsn_state.generation);
info!(
"Dropped remote consistent LSN updates for tenant {tenant_id} in stale generation {:?}",
tenant_lsn_state.generation
);
metrics::DELETION_QUEUE.dropped_lsn_updates.inc();
}
}

View File

@@ -41,30 +41,31 @@
// - The `#[allow(dead_code)]` above various structs are to suppress warnings about only the Debug impl
// reading these fields. We use the Debug impl for semi-structured logging, though.
use std::{sync::Arc, time::SystemTime};
use std::sync::Arc;
use std::time::SystemTime;
use anyhow::Context;
use pageserver_api::{config::DiskUsageEvictionTaskConfig, shard::TenantShardId};
use pageserver_api::config::DiskUsageEvictionTaskConfig;
use pageserver_api::shard::TenantShardId;
use remote_storage::GenericRemoteStorage;
use serde::Serialize;
use tokio::time::Instant;
use tokio_util::sync::CancellationToken;
use tracing::{debug, error, info, instrument, warn, Instrument};
use utils::{completion, id::TimelineId};
use tracing::{Instrument, debug, error, info, instrument, warn};
use utils::completion;
use utils::id::TimelineId;
use crate::{
config::PageServerConf,
metrics::disk_usage_based_eviction::METRICS,
task_mgr::{self, BACKGROUND_RUNTIME},
tenant::{
mgr::TenantManager,
remote_timeline_client::LayerFileMetadata,
secondary::SecondaryTenant,
storage_layer::{AsLayerDesc, EvictionError, Layer, LayerName, LayerVisibilityHint},
tasks::sleep_random,
},
CancellableTask, DiskUsageEvictionTask,
use crate::config::PageServerConf;
use crate::metrics::disk_usage_based_eviction::METRICS;
use crate::task_mgr::{self, BACKGROUND_RUNTIME};
use crate::tenant::mgr::TenantManager;
use crate::tenant::remote_timeline_client::LayerFileMetadata;
use crate::tenant::secondary::SecondaryTenant;
use crate::tenant::storage_layer::{
AsLayerDesc, EvictionError, Layer, LayerName, LayerVisibilityHint,
};
use crate::tenant::tasks::sleep_random;
use crate::{CancellableTask, DiskUsageEvictionTask};
/// Selects the sort order for eviction candidates *after* per tenant `min_resident_size`
/// partitioning.
@@ -1007,10 +1008,14 @@ async fn collect_eviction_candidates(
}
}
debug_assert!(EvictionPartition::Above < EvictionPartition::Below,
"as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first");
debug_assert!(EvictionPartition::EvictNow < EvictionPartition::Above,
"as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first");
debug_assert!(
EvictionPartition::Above < EvictionPartition::Below,
"as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first"
);
debug_assert!(
EvictionPartition::EvictNow < EvictionPartition::Above,
"as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first"
);
eviction_order.sort(&mut candidates);
@@ -1157,9 +1162,8 @@ mod filesystem_level_usage {
use anyhow::Context;
use camino::Utf8Path;
use crate::statvfs::Statvfs;
use super::DiskUsageEvictionTaskConfig;
use crate::statvfs::Statvfs;
#[derive(Debug, Clone, Copy)]
pub struct Usage<'a> {
@@ -1224,10 +1228,12 @@ mod filesystem_level_usage {
#[test]
fn max_usage_pct_pressure() {
use super::Usage as _;
use std::time::Duration;
use utils::serde_percent::Percent;
use super::Usage as _;
let mut usage = Usage {
config: &DiskUsageEvictionTaskConfig {
max_usage_pct: Percent::new(85).unwrap(),

View File

@@ -2,125 +2,83 @@
//! Management HTTP API
//!
use std::cmp::Reverse;
use std::collections::BinaryHeap;
use std::collections::HashMap;
use std::collections::{BinaryHeap, HashMap};
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use anyhow::{anyhow, Context, Result};
use anyhow::{Context, Result, anyhow};
use enumset::EnumSet;
use futures::future::join_all;
use futures::StreamExt;
use futures::TryFutureExt;
use futures::{StreamExt, TryFutureExt};
use http_utils::endpoint::{
profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span,
self, attach_openapi_ui, auth_middleware, check_permission_with, profile_cpu_handler,
profile_heap_handler, prometheus_metrics_handler, request_span,
};
use http_utils::error::{ApiError, HttpErrorBody};
use http_utils::failpoints::failpoints_handler;
use http_utils::request::must_parse_query_param;
use http_utils::request::{get_request_param, must_get_query_param, parse_query_param};
use http_utils::json::{json_request, json_request_maybe, json_response};
use http_utils::request::{
get_request_param, must_get_query_param, must_parse_query_param, parse_query_param,
parse_request_param,
};
use http_utils::{RequestExt, RouterBuilder};
use humantime::format_rfc3339;
use hyper::header;
use hyper::StatusCode;
use hyper::{Body, Request, Response, Uri};
use hyper::{Body, Request, Response, StatusCode, Uri, header};
use metrics::launch_timestamp::LaunchTimestamp;
use pageserver_api::models::virtual_file::IoMode;
use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
use pageserver_api::models::IngestAuxFilesRequest;
use pageserver_api::models::ListAuxFilesRequest;
use pageserver_api::models::LocationConfig;
use pageserver_api::models::LocationConfigListResponse;
use pageserver_api::models::LocationConfigMode;
use pageserver_api::models::LsnLease;
use pageserver_api::models::LsnLeaseRequest;
use pageserver_api::models::OffloadedTimelineInfo;
use pageserver_api::models::PageTraceEvent;
use pageserver_api::models::ShardParameters;
use pageserver_api::models::TenantConfigPatchRequest;
use pageserver_api::models::TenantDetails;
use pageserver_api::models::TenantLocationConfigRequest;
use pageserver_api::models::TenantLocationConfigResponse;
use pageserver_api::models::TenantScanRemoteStorageResponse;
use pageserver_api::models::TenantScanRemoteStorageShard;
use pageserver_api::models::TenantShardLocation;
use pageserver_api::models::TenantShardSplitRequest;
use pageserver_api::models::TenantShardSplitResponse;
use pageserver_api::models::TenantSorting;
use pageserver_api::models::TenantState;
use pageserver_api::models::TenantWaitLsnRequest;
use pageserver_api::models::TimelineArchivalConfigRequest;
use pageserver_api::models::TimelineCreateRequestMode;
use pageserver_api::models::TimelineCreateRequestModeImportPgdata;
use pageserver_api::models::TimelinesInfoAndOffloaded;
use pageserver_api::models::TopTenantShardItem;
use pageserver_api::models::TopTenantShardsRequest;
use pageserver_api::models::TopTenantShardsResponse;
use pageserver_api::shard::ShardCount;
use pageserver_api::shard::TenantShardId;
use remote_storage::DownloadError;
use remote_storage::GenericRemoteStorage;
use remote_storage::TimeTravelError;
use pageserver_api::models::{
DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest, ListAuxFilesRequest,
LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease, LsnLeaseRequest,
OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse,
TenantConfigPatchRequest, TenantConfigRequest, TenantDetails, TenantInfo,
TenantLocationConfigRequest, TenantLocationConfigResponse, TenantScanRemoteStorageResponse,
TenantScanRemoteStorageShard, TenantShardLocation, TenantShardSplitRequest,
TenantShardSplitResponse, TenantSorting, TenantState, TenantWaitLsnRequest,
TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineCreateRequestMode,
TimelineCreateRequestModeImportPgdata, TimelineGcRequest, TimelineInfo,
TimelinesInfoAndOffloaded, TopTenantShardItem, TopTenantShardsRequest, TopTenantShardsResponse,
};
use pageserver_api::shard::{ShardCount, TenantShardId};
use remote_storage::{DownloadError, GenericRemoteStorage, TimeTravelError};
use scopeguard::defer;
use tenant_size_model::{svg::SvgBranchKind, SizeResult, StorageModel};
use tenant_size_model::svg::SvgBranchKind;
use tenant_size_model::{SizeResult, StorageModel};
use tokio::time::Instant;
use tokio_util::io::StreamReader;
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::auth::SwappableJwtAuth;
use utils::generation::Generation;
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use crate::config::PageServerConf;
use crate::context::RequestContextBuilder;
use crate::context::{DownloadBehavior, RequestContext};
use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
use crate::deletion_queue::DeletionQueueClient;
use crate::pgdatadir_mapping::LsnForTimestamp;
use crate::task_mgr::TaskKind;
use crate::tenant::config::{LocationConf, TenantConfOpt};
use crate::tenant::mgr::GetActiveTenantError;
use crate::tenant::mgr::{
GetTenantError, TenantManager, TenantMapError, TenantMapInsertError, TenantSlotError,
TenantSlotUpsertError, TenantStateError,
GetActiveTenantError, GetTenantError, TenantManager, TenantMapError, TenantMapInsertError,
TenantSlot, TenantSlotError, TenantSlotUpsertError, TenantStateError, UpsertLocationError,
};
use crate::tenant::remote_timeline_client::{
download_index_part, list_remote_tenant_shards, list_remote_timelines,
};
use crate::tenant::mgr::{TenantSlot, UpsertLocationError};
use crate::tenant::remote_timeline_client;
use crate::tenant::remote_timeline_client::download_index_part;
use crate::tenant::remote_timeline_client::list_remote_tenant_shards;
use crate::tenant::remote_timeline_client::list_remote_timelines;
use crate::tenant::secondary::SecondaryController;
use crate::tenant::size::ModelInputs;
use crate::tenant::storage_layer::IoConcurrency;
use crate::tenant::storage_layer::LayerAccessStatsReset;
use crate::tenant::storage_layer::LayerName;
use crate::tenant::timeline::import_pgdata;
use crate::tenant::timeline::offload::offload_timeline;
use crate::tenant::timeline::offload::OffloadError;
use crate::tenant::timeline::CompactFlags;
use crate::tenant::timeline::CompactOptions;
use crate::tenant::timeline::CompactRequest;
use crate::tenant::timeline::CompactionError;
use crate::tenant::timeline::Timeline;
use crate::tenant::timeline::WaitLsnTimeout;
use crate::tenant::timeline::WaitLsnWaiter;
use crate::tenant::GetTimelineError;
use crate::tenant::OffloadedTimeline;
use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError};
use crate::DEFAULT_PG_VERSION;
use crate::{disk_usage_eviction_task, tenant};
use http_utils::{
endpoint::{self, attach_openapi_ui, auth_middleware, check_permission_with},
error::{ApiError, HttpErrorBody},
json::{json_request, json_request_maybe, json_response},
request::parse_request_param,
RequestExt, RouterBuilder,
use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName};
use crate::tenant::timeline::offload::{OffloadError, offload_timeline};
use crate::tenant::timeline::{
CompactFlags, CompactOptions, CompactRequest, CompactionError, Timeline, WaitLsnTimeout,
WaitLsnWaiter, import_pgdata,
};
use pageserver_api::models::{
StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest, TimelineGcRequest,
TimelineInfo,
};
use utils::{
auth::SwappableJwtAuth,
generation::Generation,
id::{TenantId, TimelineId},
lsn::Lsn,
use crate::tenant::{
GetTimelineError, LogicalSizeCalculationCause, OffloadedTimeline, PageReconstructError,
remote_timeline_client,
};
use crate::{DEFAULT_PG_VERSION, disk_usage_eviction_task, tenant};
// For APIs that require an Active tenant, how long should we block waiting for that state?
// This is not functionally necessary (clients will retry), but avoids generating a lot of
@@ -1128,12 +1086,12 @@ async fn tenant_list_handler(
ApiError::ResourceUnavailable("Tenant map is initializing or shutting down".into())
})?
.iter()
.map(|(id, state, gen)| TenantInfo {
.map(|(id, state, gen_)| TenantInfo {
id: *id,
state: state.clone(),
current_physical_size: None,
attachment_status: state.attachment_status(),
generation: (*gen)
generation: (*gen_)
.into()
.expect("Tenants are always attached with a generation"),
gc_blocking: None,
@@ -1670,9 +1628,8 @@ async fn block_or_unblock_gc(
request: Request<Body>,
block: bool,
) -> Result<Response<Body>, ApiError> {
use crate::tenant::{
remote_timeline_client::WaitCompletionError, upload_queue::NotInitialized,
};
use crate::tenant::remote_timeline_client::WaitCompletionError;
use crate::tenant::upload_queue::NotInitialized;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -2058,7 +2015,9 @@ async fn tenant_time_travel_remote_storage_handler(
)));
}
tracing::info!("Issuing time travel request internally. timestamp={timestamp_raw}, done_if_after={done_if_after_raw}");
tracing::info!(
"Issuing time travel request internally. timestamp={timestamp_raw}, done_if_after={done_if_after_raw}"
);
remote_timeline_client::upload::time_travel_recover_tenant(
&state.remote_storage,
@@ -2459,9 +2418,10 @@ async fn timeline_detach_ancestor_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
use crate::tenant::timeline::detach_ancestor;
use pageserver_api::models::detach_ancestor::AncestorDetached;
use crate::tenant::timeline::detach_ancestor;
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -2806,14 +2766,19 @@ async fn tenant_scan_remote_handler(
.await
{
Ok((index_part, index_generation, _index_mtime)) => {
tracing::info!("Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)",
index_part.layer_metadata.len(), index_part.metadata.disk_consistent_lsn());
tracing::info!(
"Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)",
index_part.layer_metadata.len(),
index_part.metadata.disk_consistent_lsn()
);
generation = std::cmp::max(generation, index_generation);
}
Err(DownloadError::NotFound) => {
// This is normal for tenants that were created with multiple shards: they have an unsharded path
// containing the timeline's initdb tarball but no index. Otherwise it is a bit strange.
tracing::info!("Timeline path {tenant_shard_id}/{timeline_id} exists in remote storage but has no index, skipping");
tracing::info!(
"Timeline path {tenant_shard_id}/{timeline_id} exists in remote storage but has no index, skipping"
);
continue;
}
Err(e) => {
@@ -3432,7 +3397,9 @@ async fn read_tar_eof(mut reader: (impl tokio::io::AsyncRead + Unpin)) -> anyhow
anyhow::bail!("unexpected non-zero bytes after the tar archive");
}
if trailing_bytes % 512 != 0 {
anyhow::bail!("unexpected number of zeros ({trailing_bytes}), not divisible by tar block size (512 bytes), after the tar archive");
anyhow::bail!(
"unexpected number of zeros ({trailing_bytes}), not divisible by tar block size (512 bytes), after the tar archive"
);
}
Ok(())
}

View File

@@ -4,14 +4,22 @@
//!
use std::path::{Path, PathBuf};
use anyhow::{bail, ensure, Context, Result};
use anyhow::{Context, Result, bail, ensure};
use bytes::Bytes;
use camino::Utf8Path;
use futures::StreamExt;
use pageserver_api::key::rel_block_to_key;
use pageserver_api::reltag::{RelTag, SlruKind};
use postgres_ffi::relfile_utils::*;
use postgres_ffi::waldecoder::WalStreamDecoder;
use postgres_ffi::{
BLCKSZ, ControlFileData, DBState_DB_SHUTDOWNED, Oid, WAL_SEGMENT_SIZE, XLogFileName,
pg_constants,
};
use tokio::io::{AsyncRead, AsyncReadExt};
use tokio_tar::Archive;
use tracing::*;
use utils::lsn::Lsn;
use wal_decoder::models::InterpretedWalRecord;
use walkdir::WalkDir;
@@ -20,16 +28,6 @@ use crate::metrics::WAL_INGEST;
use crate::pgdatadir_mapping::*;
use crate::tenant::Timeline;
use crate::walingest::WalIngest;
use pageserver_api::reltag::{RelTag, SlruKind};
use postgres_ffi::pg_constants;
use postgres_ffi::relfile_utils::*;
use postgres_ffi::waldecoder::WalStreamDecoder;
use postgres_ffi::ControlFileData;
use postgres_ffi::DBState_DB_SHUTDOWNED;
use postgres_ffi::Oid;
use postgres_ffi::XLogFileName;
use postgres_ffi::{BLCKSZ, WAL_SEGMENT_SIZE};
use utils::lsn::Lsn;
// Returns checkpoint LSN from controlfile
pub fn get_lsn_from_controlfile(path: &Utf8Path) -> Result<Lsn> {

View File

@@ -1,4 +1,5 @@
use std::{num::NonZeroUsize, sync::Arc};
use std::num::NonZeroUsize;
use std::sync::Arc;
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum L0FlushConfig {

View File

@@ -15,7 +15,8 @@ pub mod l0_flush;
extern crate hyper0 as hyper;
use futures::{stream::FuturesUnordered, StreamExt};
use futures::StreamExt;
use futures::stream::FuturesUnordered;
pub use pageserver_api::keyspace;
use tokio_util::sync::CancellationToken;
mod assert_u64_eq_usize;
@@ -35,10 +36,8 @@ pub mod walredo;
use camino::Utf8Path;
use deletion_queue::DeletionQueue;
use tenant::{
mgr::{BackgroundPurges, TenantManager},
secondary,
};
use tenant::mgr::{BackgroundPurges, TenantManager};
use tenant::secondary;
use tracing::{info, info_span};
/// Current storage format version
@@ -350,9 +349,10 @@ async fn timed_after_cancellation<Fut: std::future::Future>(
#[cfg(test)]
mod timed_tests {
use super::timed;
use std::time::Duration;
use super::timed;
#[tokio::test]
async fn timed_completes_when_inner_future_completes() {
// A future that completes on time should have its result returned

View File

@@ -10,11 +10,11 @@ use std::time::{Duration, Instant};
use enum_map::{Enum as _, EnumMap};
use futures::Future;
use metrics::{
Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
};
use once_cell::sync::Lazy;
use pageserver_api::config::{
@@ -24,9 +24,8 @@ use pageserver_api::config::{
use pageserver_api::models::InMemoryLayerInfo;
use pageserver_api::shard::TenantShardId;
use pin_project_lite::pin_project;
use postgres_backend::{is_expected_io_error, QueryError};
use postgres_backend::{QueryError, is_expected_io_error};
use pq_proto::framed::ConnectionError;
use strum::{EnumCount, IntoEnumIterator as _, VariantNames};
use strum_macros::{IntoStaticStr, VariantNames};
use utils::id::TimelineId;
@@ -35,12 +34,12 @@ use crate::config::PageServerConf;
use crate::context::{PageContentKind, RequestContext};
use crate::pgdatadir_mapping::DatadirModificationStats;
use crate::task_mgr::TaskKind;
use crate::tenant::Timeline;
use crate::tenant::layer_map::LayerMap;
use crate::tenant::mgr::TenantSlot;
use crate::tenant::storage_layer::{InMemoryLayer, PersistentLayerDesc};
use crate::tenant::tasks::BackgroundLoopKind;
use crate::tenant::throttle::ThrottleResult;
use crate::tenant::Timeline;
/// Prometheus histogram buckets (in seconds) for operations in the critical
/// path. In other words, operations that directly affect that latency of user
@@ -363,7 +362,7 @@ pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
pub(crate) mod page_cache_eviction_metrics {
use std::num::NonZeroUsize;
use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
use metrics::{IntCounter, IntCounterVec, register_int_counter_vec};
use once_cell::sync::Lazy;
#[derive(Clone, Copy)]
@@ -722,7 +721,7 @@ pub(crate) static RELSIZE_CACHE_MISSES_OLD: Lazy<IntCounter> = Lazy::new(|| {
});
pub(crate) mod initial_logical_size {
use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
use metrics::{IntCounter, IntCounterVec, register_int_counter, register_int_counter_vec};
use once_cell::sync::Lazy;
pub(crate) struct StartCalculation(IntCounterVec);
@@ -1105,12 +1104,17 @@ impl EvictionsWithLowResidenceDuration {
// - future "drop panick => abort"
//
// so just nag: (the error has the labels)
tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
tracing::warn!(
"failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}"
);
}
Ok(()) => {
// to help identify cases where we double-remove the same values, let's log all
// deletions?
tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
tracing::info!(
"removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}",
self.data_source
);
}
}
}
@@ -3574,12 +3578,10 @@ impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
}
pub mod tokio_epoll_uring {
use std::{
collections::HashMap,
sync::{Arc, Mutex},
};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use metrics::{register_histogram, register_int_counter, Histogram, LocalHistogram, UIntGauge};
use metrics::{Histogram, LocalHistogram, UIntGauge, register_histogram, register_int_counter};
use once_cell::sync::Lazy;
/// Shared storage for tokio-epoll-uring thread local metrics.
@@ -3588,7 +3590,9 @@ pub mod tokio_epoll_uring {
let slots_submission_queue_depth = register_histogram!(
"pageserver_tokio_epoll_uring_slots_submission_queue_depth",
"The slots waiters queue depth of each tokio_epoll_uring system",
vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
vec![
1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
],
)
.expect("failed to define a metric");
ThreadLocalMetricsStorage {
@@ -3765,7 +3769,7 @@ pub mod tokio_epoll_uring {
}
pub(crate) mod tenant_throttling {
use metrics::{register_int_counter_vec, IntCounter};
use metrics::{IntCounter, register_int_counter_vec};
use once_cell::sync::Lazy;
use utils::shard::TenantShardId;

View File

@@ -67,23 +67,18 @@
//! mapping is automatically removed and the slot is marked free.
//!
use std::{
collections::{hash_map::Entry, HashMap},
sync::{
atomic::{AtomicU64, AtomicU8, AtomicUsize, Ordering},
Arc, Weak,
},
time::Duration,
};
use std::collections::HashMap;
use std::collections::hash_map::Entry;
use std::sync::atomic::{AtomicU8, AtomicU64, AtomicUsize, Ordering};
use std::sync::{Arc, Weak};
use std::time::Duration;
use anyhow::Context;
use once_cell::sync::OnceCell;
use crate::{
context::RequestContext,
metrics::{page_cache_eviction_metrics, PageCacheSizeMetrics},
virtual_file::{IoBufferMut, IoPageSlice},
};
use crate::context::RequestContext;
use crate::metrics::{PageCacheSizeMetrics, page_cache_eviction_metrics};
use crate::virtual_file::{IoBufferMut, IoPageSlice};
static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
const TEST_PAGE_CACHE_SIZE: usize = 50;
@@ -168,11 +163,7 @@ impl Slot {
let count_res =
self.usage_count
.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |val| {
if val == 0 {
None
} else {
Some(val - 1)
}
if val == 0 { None } else { Some(val - 1) }
});
match count_res {

View File

@@ -1,7 +1,15 @@
//! The Page Service listens for client connections and serves their GetPage@LSN
//! requests.
use anyhow::{bail, Context};
use std::borrow::Cow;
use std::num::NonZeroUsize;
use std::os::fd::AsRawFd;
use std::str::FromStr;
use std::sync::Arc;
use std::time::{Duration, Instant, SystemTime};
use std::{io, str};
use anyhow::{Context, bail};
use async_compression::tokio::write::GzipEncoder;
use bytes::Buf;
use futures::FutureExt;
@@ -11,72 +19,57 @@ use pageserver_api::config::{
PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
PageServiceProtocolPipelinedExecutionStrategy,
};
use pageserver_api::models::{self, TenantState};
use pageserver_api::key::rel_block_to_key;
use pageserver_api::models::{
PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
self, PageTraceEvent, PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest,
PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
PagestreamProtocolVersion, PagestreamRequest,
PagestreamProtocolVersion, PagestreamRequest, TenantState,
};
use pageserver_api::reltag::SlruKind;
use pageserver_api::shard::TenantShardId;
use postgres_backend::{
is_expected_io_error, AuthType, PostgresBackend, PostgresBackendReader, QueryError,
AuthType, PostgresBackend, PostgresBackendReader, QueryError, is_expected_io_error,
};
use postgres_ffi::BLCKSZ;
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
use pq_proto::framed::ConnectionError;
use pq_proto::FeStartupPacket;
use pq_proto::{BeMessage, FeMessage, RowDescriptor};
use std::borrow::Cow;
use std::io;
use std::num::NonZeroUsize;
use std::str;
use std::str::FromStr;
use std::sync::Arc;
use std::time::SystemTime;
use std::time::{Duration, Instant};
use pq_proto::{BeMessage, FeMessage, FeStartupPacket, RowDescriptor};
use strum_macros::IntoStaticStr;
use tokio::io::{AsyncRead, AsyncWrite};
use tokio::io::{AsyncWriteExt, BufWriter};
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufWriter};
use tokio::task::JoinHandle;
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::auth::{Claims, Scope, SwappableJwtAuth};
use utils::failpoint_support;
use utils::id::{TenantId, TimelineId};
use utils::logging::log_slow;
use utils::lsn::Lsn;
use utils::simple_rcu::RcuReadGuard;
use utils::sync::gate::{Gate, GateGuard};
use utils::sync::spsc_fold;
use utils::{
auth::{Claims, Scope, SwappableJwtAuth},
failpoint_support,
id::{TenantId, TimelineId},
lsn::Lsn,
simple_rcu::RcuReadGuard,
};
use crate::auth::check_permission;
use crate::basebackup::BasebackupError;
use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext};
use crate::metrics::{self, SmgrOpTimer};
use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS};
use crate::metrics::{
self, COMPUTE_COMMANDS_COUNTERS, ComputeCommandKind, LIVE_CONNECTIONS, SmgrOpTimer,
};
use crate::pgdatadir_mapping::Version;
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
use crate::task_mgr::TaskKind;
use crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME};
use crate::tenant::mgr::ShardSelector;
use crate::tenant::mgr::TenantManager;
use crate::tenant::mgr::{GetActiveTenantError, GetTenantError, ShardResolveResult};
use crate::span::{
debug_assert_current_span_has_tenant_and_timeline_id,
debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id,
};
use crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME, TaskKind};
use crate::tenant::mgr::{
GetActiveTenantError, GetTenantError, ShardResolveResult, ShardSelector, TenantManager,
};
use crate::tenant::storage_layer::IoConcurrency;
use crate::tenant::timeline::{self, WaitLsnError};
use crate::tenant::GetTimelineError;
use crate::tenant::PageReconstructError;
use crate::tenant::Timeline;
use crate::tenant::{GetTimelineError, PageReconstructError, Timeline};
use crate::{basebackup, timed_after_cancellation};
use pageserver_api::key::rel_block_to_key;
use pageserver_api::models::PageTraceEvent;
use pageserver_api::reltag::SlruKind;
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
use postgres_ffi::BLCKSZ;
use std::os::fd::AsRawFd;
/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::Tenant`] which
/// is not yet in state [`TenantState::Active`].
@@ -986,7 +979,7 @@ impl PageServerHandler {
Ok(BatchedFeMessage::GetPage {
span: _,
shard: accum_shard,
pages: ref mut accum_pages,
pages: accum_pages,
effective_request_lsn: accum_lsn,
}),
BatchedFeMessage::GetPage {
@@ -1236,12 +1229,13 @@ impl PageServerHandler {
} => {
fail::fail_point!("ps::handle-pagerequest-message::exists");
(
vec![self
.handle_get_rel_exists_request(&*shard.upgrade()?, &req, ctx)
vec![
self.handle_get_rel_exists_request(&*shard.upgrade()?, &req, ctx)
.instrument(span.clone())
.await
.map(|msg| (msg, timer))
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
.map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
],
span,
)
}
@@ -1253,12 +1247,13 @@ impl PageServerHandler {
} => {
fail::fail_point!("ps::handle-pagerequest-message::nblocks");
(
vec![self
.handle_get_nblocks_request(&*shard.upgrade()?, &req, ctx)
vec![
self.handle_get_nblocks_request(&*shard.upgrade()?, &req, ctx)
.instrument(span.clone())
.await
.map(|msg| (msg, timer))
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
.map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
],
span,
)
}
@@ -1297,12 +1292,13 @@ impl PageServerHandler {
} => {
fail::fail_point!("ps::handle-pagerequest-message::dbsize");
(
vec![self
.handle_db_size_request(&*shard.upgrade()?, &req, ctx)
vec![
self.handle_db_size_request(&*shard.upgrade()?, &req, ctx)
.instrument(span.clone())
.await
.map(|msg| (msg, timer))
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
.map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
],
span,
)
}
@@ -1314,12 +1310,13 @@ impl PageServerHandler {
} => {
fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
(
vec![self
.handle_get_slru_segment_request(&*shard.upgrade()?, &req, ctx)
vec![
self.handle_get_slru_segment_request(&*shard.upgrade()?, &req, ctx)
.instrument(span.clone())
.await
.map(|msg| (msg, timer))
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
.map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
],
span,
)
}
@@ -2112,7 +2109,9 @@ impl PageServerHandler {
set_tracing_field_shard_id(&timeline);
if timeline.is_archived() == Some(true) {
tracing::info!("timeline {tenant_id}/{timeline_id} is archived, but got basebackup request for it.");
tracing::info!(
"timeline {tenant_id}/{timeline_id} is archived, but got basebackup request for it."
);
return Err(QueryError::NotFound("timeline is archived".into()));
}

View File

@@ -6,6 +6,36 @@
//! walingest.rs handles a few things like implicit relation creation and extension.
//! Clarify that)
//!
use std::collections::{BTreeMap, HashMap, HashSet, hash_map};
use std::ops::{ControlFlow, Range};
use anyhow::{Context, ensure};
use bytes::{Buf, Bytes, BytesMut};
use enum_map::Enum;
use itertools::Itertools;
use pageserver_api::key::{
AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, CompactKey, DBDIR_KEY, Key, RelDirExists,
TWOPHASEDIR_KEY, dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range,
rel_size_to_key, rel_tag_sparse_key, rel_tag_sparse_key_range, relmap_file_key,
repl_origin_key, repl_origin_key_range, slru_block_to_key, slru_dir_to_key,
slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range,
};
use pageserver_api::keyspace::SparseKeySpace;
use pageserver_api::record::NeonWalRecord;
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
use pageserver_api::shard::ShardIdentity;
use pageserver_api::value::Value;
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
use postgres_ffi::{BLCKSZ, Oid, RepOriginId, TimestampTz, TransactionId};
use serde::{Deserialize, Serialize};
use strum::IntoEnumIterator;
use tokio_util::sync::CancellationToken;
use tracing::{debug, info, trace, warn};
use utils::bin_ser::{BeSer, DeserializeError};
use utils::lsn::Lsn;
use utils::pausable_failpoint;
use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
use super::tenant::{PageReconstructError, Timeline};
use crate::aux_file;
use crate::context::RequestContext;
@@ -19,37 +49,6 @@ use crate::span::{
};
use crate::tenant::storage_layer::IoConcurrency;
use crate::tenant::timeline::GetVectoredError;
use anyhow::{ensure, Context};
use bytes::{Buf, Bytes, BytesMut};
use enum_map::Enum;
use itertools::Itertools;
use pageserver_api::key::{
dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, rel_size_to_key,
rel_tag_sparse_key_range, relmap_file_key, repl_origin_key, repl_origin_key_range,
slru_block_to_key, slru_dir_to_key, slru_segment_key_range, slru_segment_size_to_key,
twophase_file_key, twophase_key_range, CompactKey, RelDirExists, AUX_FILES_KEY, CHECKPOINT_KEY,
CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
};
use pageserver_api::key::{rel_tag_sparse_key, Key};
use pageserver_api::keyspace::SparseKeySpace;
use pageserver_api::record::NeonWalRecord;
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
use pageserver_api::shard::ShardIdentity;
use pageserver_api::value::Value;
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
use postgres_ffi::BLCKSZ;
use postgres_ffi::{Oid, RepOriginId, TimestampTz, TransactionId};
use serde::{Deserialize, Serialize};
use std::collections::{hash_map, BTreeMap, HashMap, HashSet};
use std::ops::ControlFlow;
use std::ops::Range;
use strum::IntoEnumIterator;
use tokio_util::sync::CancellationToken;
use tracing::{debug, info, trace, warn};
use utils::bin_ser::DeserializeError;
use utils::pausable_failpoint;
use utils::{bin_ser::BeSer, lsn::Lsn};
use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
/// Max delta records appended to the AUX_FILES_KEY (for aux v1). The write path will write a full image once this threshold is reached.
pub const MAX_AUX_FILE_DELTAS: usize = 1024;
@@ -327,16 +326,16 @@ impl Timeline {
let clone = match &res {
Ok(buf) => Ok(buf.clone()),
Err(err) => Err(match err {
PageReconstructError::Cancelled => {
PageReconstructError::Cancelled
}
PageReconstructError::Cancelled => PageReconstructError::Cancelled,
x @ PageReconstructError::Other(_) |
x @ PageReconstructError::AncestorLsnTimeout(_) |
x @ PageReconstructError::WalRedo(_) |
x @ PageReconstructError::MissingKey(_) => {
PageReconstructError::Other(anyhow::anyhow!("there was more than one request for this key in the batch, error logged once: {x:?}"))
},
x @ PageReconstructError::Other(_)
| x @ PageReconstructError::AncestorLsnTimeout(_)
| x @ PageReconstructError::WalRedo(_)
| x @ PageReconstructError::MissingKey(_) => {
PageReconstructError::Other(anyhow::anyhow!(
"there was more than one request for this key in the batch, error logged once: {x:?}"
))
}
}),
};
@@ -355,23 +354,23 @@ impl Timeline {
// this whole `match` is a lot like `From<GetVectoredError> for PageReconstructError`
// but without taking ownership of the GetVectoredError
let err = match &err {
GetVectoredError::Cancelled => {
Err(PageReconstructError::Cancelled)
}
GetVectoredError::Cancelled => Err(PageReconstructError::Cancelled),
// TODO: restructure get_vectored API to make this error per-key
GetVectoredError::MissingKey(err) => {
Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more of the requested keys were missing: {err:?}")))
Err(PageReconstructError::Other(anyhow::anyhow!(
"whole vectored get request failed because one or more of the requested keys were missing: {err:?}"
)))
}
// TODO: restructure get_vectored API to make this error per-key
GetVectoredError::GetReadyAncestorError(err) => {
Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}")))
Err(PageReconstructError::Other(anyhow::anyhow!(
"whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}"
)))
}
// TODO: restructure get_vectored API to make this error per-key
GetVectoredError::Other(err) => {
Err(PageReconstructError::Other(
GetVectoredError::Other(err) => Err(PageReconstructError::Other(
anyhow::anyhow!("whole vectored get request failed: {err:?}"),
))
}
)),
// TODO: we can prevent this error class by moving this check into the type system
GetVectoredError::InvalidLsn(e) => {
Err(anyhow::anyhow!("invalid LSN: {e:?}").into())
@@ -379,10 +378,7 @@ impl Timeline {
// NB: this should never happen in practice because we limit MAX_GET_VECTORED_KEYS
// TODO: we can prevent this error class by moving this check into the type system
GetVectoredError::Oversized(err) => {
Err(anyhow::anyhow!(
"batching oversized: {err:?}"
)
.into())
Err(anyhow::anyhow!("batching oversized: {err:?}").into())
}
};
@@ -715,7 +711,10 @@ impl Timeline {
{
Ok(res) => res,
Err(PageReconstructError::MissingKey(e)) => {
warn!("Missing key while find_lsn_for_timestamp. Either we might have already garbage-collected that data or the key is really missing. Last error: {:#}", e);
warn!(
"Missing key while find_lsn_for_timestamp. Either we might have already garbage-collected that data or the key is really missing. Last error: {:#}",
e
);
// Return that we didn't find any requests smaller than the LSN, and logging the error.
return Ok(LsnForTimestamp::Past(min_lsn));
}
@@ -2464,10 +2463,12 @@ impl DatadirModification<'_> {
// modifications before ingesting DB create operations, which are the only kind that reads
// data pages during ingest.
if cfg!(debug_assertions) {
assert!(!self
assert!(
!self
.pending_data_batch
.as_ref()
.is_some_and(|b| b.updates_key(&key)));
.is_some_and(|b| b.updates_key(&key))
);
}
}
@@ -2666,15 +2667,14 @@ static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
#[cfg(test)]
mod tests {
use hex_literal::hex;
use pageserver_api::{models::ShardParameters, shard::ShardStripeSize};
use utils::{
id::TimelineId,
shard::{ShardCount, ShardNumber},
};
use pageserver_api::models::ShardParameters;
use pageserver_api::shard::ShardStripeSize;
use utils::id::TimelineId;
use utils::shard::{ShardCount, ShardNumber};
use super::*;
use crate::{tenant::harness::TenantHarness, DEFAULT_PG_VERSION};
use crate::DEFAULT_PG_VERSION;
use crate::tenant::harness::TenantHarness;
/// Test a round trip of aux file updates, from DatadirModification to reading back from the Timeline
#[tokio::test]

View File

@@ -73,11 +73,10 @@ impl Statvfs {
pub mod mock {
use camino::Utf8Path;
pub use pageserver_api::config::statvfs::mock::Behavior;
use regex::Regex;
use tracing::log::info;
pub use pageserver_api::config::statvfs::mock::Behavior;
pub fn get(tenants_dir: &Utf8Path, behavior: &Behavior) -> nix::Result<Statvfs> {
info!("running mocked statvfs");
@@ -85,7 +84,7 @@ pub mod mock {
Behavior::Success {
blocksize,
total_blocks,
ref name_filter,
name_filter,
} => {
let used_bytes = walk_dir_disk_usage(tenants_dir, name_filter.as_deref()).unwrap();
@@ -134,7 +133,7 @@ pub mod mock {
}
Err(e) => {
return Err(anyhow::Error::new(e)
.context(format!("get metadata of {:?}", entry.path())))
.context(format!("get metadata of {:?}", entry.path())));
}
};
total += m.len();

View File

@@ -40,15 +40,12 @@ use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex};
use futures::FutureExt;
use once_cell::sync::Lazy;
use pageserver_api::shard::TenantShardId;
use tokio::task::JoinHandle;
use tokio::task_local;
use tokio_util::sync::CancellationToken;
use tracing::{debug, error, info, warn};
use once_cell::sync::Lazy;
use utils::env;
use utils::id::TimelineId;

View File

@@ -12,150 +12,99 @@
//! parent timeline, and the last LSN that has been written to disk.
//!
use anyhow::{bail, Context};
use std::collections::hash_map::Entry;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::{Debug, Display};
use std::fs::File;
use std::future::Future;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::{Arc, Mutex, Weak};
use std::time::{Duration, Instant, SystemTime};
use std::{fmt, fs};
use anyhow::{Context, bail};
use arc_swap::ArcSwap;
use camino::Utf8Path;
use camino::Utf8PathBuf;
use camino::{Utf8Path, Utf8PathBuf};
use chrono::NaiveDateTime;
use enumset::EnumSet;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use futures::stream::FuturesUnordered;
use itertools::Itertools as _;
use once_cell::sync::Lazy;
use pageserver_api::models;
use pageserver_api::models::CompactInfoResponse;
use pageserver_api::models::LsnLease;
use pageserver_api::models::TimelineArchivalState;
use pageserver_api::models::TimelineState;
use pageserver_api::models::TopTenantShardItem;
use pageserver_api::models::WalRedoManagerStatus;
use pageserver_api::shard::ShardIdentity;
use pageserver_api::shard::ShardStripeSize;
use pageserver_api::shard::TenantShardId;
use remote_storage::DownloadError;
use remote_storage::GenericRemoteStorage;
use remote_storage::TimeoutOrCancel;
pub use pageserver_api::models::TenantState;
use pageserver_api::models::{
CompactInfoResponse, LsnLease, TimelineArchivalState, TimelineState, TopTenantShardItem,
WalRedoManagerStatus,
};
use pageserver_api::shard::{ShardIdentity, ShardStripeSize, TenantShardId};
use remote_storage::{DownloadError, GenericRemoteStorage, TimeoutOrCancel};
use remote_timeline_client::index::GcCompactionState;
use remote_timeline_client::manifest::{
OffloadedTimelineManifest, TenantManifest, LATEST_TENANT_MANIFEST_VERSION,
LATEST_TENANT_MANIFEST_VERSION, OffloadedTimelineManifest, TenantManifest,
};
use remote_timeline_client::UploadQueueNotReadyError;
use remote_timeline_client::FAILED_REMOTE_OP_RETRIES;
use remote_timeline_client::FAILED_UPLOAD_WARN_THRESHOLD;
use secondary::heatmap::HeatMapTenant;
use secondary::heatmap::HeatMapTimeline;
use std::collections::BTreeMap;
use std::fmt;
use std::future::Future;
use std::sync::atomic::AtomicBool;
use std::sync::Weak;
use std::time::SystemTime;
use remote_timeline_client::{
FAILED_REMOTE_OP_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD, UploadQueueNotReadyError,
};
use secondary::heatmap::{HeatMapTenant, HeatMapTimeline};
use storage_broker::BrokerClientChannel;
use timeline::compaction::CompactionOutcome;
use timeline::compaction::GcCompactionQueue;
use timeline::import_pgdata;
use timeline::offload::offload_timeline;
use timeline::offload::OffloadError;
use timeline::CompactFlags;
use timeline::CompactOptions;
use timeline::CompactionError;
use timeline::PreviousHeatmap;
use timeline::ShutdownMode;
use timeline::compaction::{CompactionOutcome, GcCompactionQueue};
use timeline::offload::{OffloadError, offload_timeline};
use timeline::{
CompactFlags, CompactOptions, CompactionError, PreviousHeatmap, ShutdownMode, import_pgdata,
};
use tokio::io::BufReader;
use tokio::sync::watch;
use tokio::sync::Notify;
use tokio::sync::{Notify, Semaphore, watch};
use tokio::task::JoinSet;
use tokio_util::sync::CancellationToken;
use tracing::*;
use upload_queue::NotInitialized;
use utils::backoff;
use utils::circuit_breaker::CircuitBreaker;
use utils::completion;
use utils::crashsafe::path_with_suffix_extension;
use utils::failpoint_support;
use utils::fs_ext;
use utils::pausable_failpoint;
use utils::sync::gate::Gate;
use utils::sync::gate::GateGuard;
use utils::timeout::timeout_cancellable;
use utils::timeout::TimeoutCancellableError;
use utils::sync::gate::{Gate, GateGuard};
use utils::timeout::{TimeoutCancellableError, timeout_cancellable};
use utils::try_rcu::ArcSwapExt;
use utils::zstd::create_zst_tarball;
use utils::zstd::extract_zst_tarball;
use utils::zstd::{create_zst_tarball, extract_zst_tarball};
use utils::{backoff, completion, failpoint_support, fs_ext, pausable_failpoint};
use self::config::AttachedLocationConfig;
use self::config::AttachmentMode;
use self::config::LocationConf;
use self::config::TenantConf;
use self::config::{AttachedLocationConfig, AttachmentMode, LocationConf, TenantConf};
use self::metadata::TimelineMetadata;
use self::mgr::GetActiveTenantError;
use self::mgr::GetTenantError;
use self::mgr::{GetActiveTenantError, GetTenantError};
use self::remote_timeline_client::upload::{upload_index_part, upload_tenant_manifest};
use self::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError};
use self::timeline::uninit::TimelineCreateGuard;
use self::timeline::uninit::TimelineExclusionError;
use self::timeline::uninit::UninitializedTimeline;
use self::timeline::EvictionTaskTenantState;
use self::timeline::GcCutoffs;
use self::timeline::TimelineDeleteProgress;
use self::timeline::TimelineResources;
use self::timeline::WaitLsnError;
use self::timeline::uninit::{TimelineCreateGuard, TimelineExclusionError, UninitializedTimeline};
use self::timeline::{
EvictionTaskTenantState, GcCutoffs, TimelineDeleteProgress, TimelineResources, WaitLsnError,
};
use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext};
use crate::deletion_queue::DeletionQueueClient;
use crate::deletion_queue::DeletionQueueError;
use crate::import_datadir;
use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
use crate::l0_flush::L0FlushGlobalState;
use crate::metrics::CONCURRENT_INITDBS;
use crate::metrics::INITDB_RUN_TIME;
use crate::metrics::INITDB_SEMAPHORE_ACQUISITION_TIME;
use crate::metrics::TENANT;
use crate::metrics::{
remove_tenant_metrics, BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN,
TENANT_STATE_METRIC, TENANT_SYNTHETIC_SIZE_METRIC,
BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN, CONCURRENT_INITDBS,
INITDB_RUN_TIME, INITDB_SEMAPHORE_ACQUISITION_TIME, TENANT, TENANT_STATE_METRIC,
TENANT_SYNTHETIC_SIZE_METRIC, remove_tenant_metrics,
};
use crate::task_mgr;
use crate::task_mgr::TaskKind;
use crate::tenant::config::LocationMode;
use crate::tenant::config::TenantConfOpt;
use crate::tenant::config::{LocationMode, TenantConfOpt};
use crate::tenant::gc_result::GcResult;
pub use crate::tenant::remote_timeline_client::index::IndexPart;
use crate::tenant::remote_timeline_client::remote_initdb_archive_path;
use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart;
use crate::tenant::remote_timeline_client::INITDB_PATH;
use crate::tenant::storage_layer::DeltaLayer;
use crate::tenant::storage_layer::ImageLayer;
use crate::walingest::WalLagCooldown;
use crate::walredo;
use crate::InitializationOrder;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::collections::HashSet;
use std::fmt::Debug;
use std::fmt::Display;
use std::fs;
use std::fs::File;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::sync::Mutex;
use std::time::{Duration, Instant};
use crate::span;
use crate::tenant::remote_timeline_client::{
INITDB_PATH, MaybeDeletedIndexPart, remote_initdb_archive_path,
};
use crate::tenant::storage_layer::{DeltaLayer, ImageLayer};
use crate::tenant::timeline::delete::DeleteTimelineFlow;
use crate::tenant::timeline::uninit::cleanup_timeline_directory;
use crate::virtual_file::VirtualFile;
use crate::walingest::WalLagCooldown;
use crate::walredo::PostgresRedoManager;
use crate::TEMP_FILE_SUFFIX;
use once_cell::sync::Lazy;
pub use pageserver_api::models::TenantState;
use tokio::sync::Semaphore;
use crate::{InitializationOrder, TEMP_FILE_SUFFIX, import_datadir, span, task_mgr, walredo};
static INIT_DB_SEMAPHORE: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(8));
use utils::{
crashsafe,
generation::Generation,
id::TimelineId,
lsn::{Lsn, RecordLsn},
};
use utils::crashsafe;
use utils::generation::Generation;
use utils::id::TimelineId;
use utils::lsn::{Lsn, RecordLsn};
pub mod blob_io;
pub mod block_io;
@@ -184,9 +133,9 @@ mod gc_block;
mod gc_result;
pub(crate) mod throttle;
pub(crate) use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
pub(crate) use timeline::{LogicalSizeCalculationCause, PageReconstructError, Timeline};
pub(crate) use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
// re-export for use in walreceiver
pub use crate::tenant::timeline::WalReceiverInfo;
@@ -251,7 +200,9 @@ impl AttachedTenantConf {
Ok(Self::new(location_conf.tenant_conf, *attach_conf))
}
LocationMode::Secondary(_) => {
anyhow::bail!("Attempted to construct AttachedTenantConf from a LocationConf in secondary mode")
anyhow::bail!(
"Attempted to construct AttachedTenantConf from a LocationConf in secondary mode"
)
}
}
}
@@ -465,7 +416,9 @@ impl WalredoManagerId {
static NEXT: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1);
let id = NEXT.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
if id == 0 {
panic!("WalredoManagerId::new() returned 0, indicating wraparound, risking it's no longer unique");
panic!(
"WalredoManagerId::new() returned 0, indicating wraparound, risking it's no longer unique"
);
}
Self(id)
}
@@ -1229,7 +1182,9 @@ impl Tenant {
match cause {
LoadTimelineCause::Attach | LoadTimelineCause::Unoffload => (),
LoadTimelineCause::ImportPgdata { .. } => {
unreachable!("ImportPgdata should not be reloading timeline import is done and persisted as such in s3")
unreachable!(
"ImportPgdata should not be reloading timeline import is done and persisted as such in s3"
)
}
}
let mut guard = self.timelines_creating.lock().unwrap();
@@ -1657,7 +1612,9 @@ impl Tenant {
failpoint_support::sleep_millis_async!("before-attaching-tenant");
let Some(preload) = preload else {
anyhow::bail!("local-only deployment is no longer supported, https://github.com/neondatabase/neon/issues/5624");
anyhow::bail!(
"local-only deployment is no longer supported, https://github.com/neondatabase/neon/issues/5624"
);
};
let mut offloaded_timeline_ids = HashSet::new();
@@ -2041,7 +1998,7 @@ impl Tenant {
remote_storage: GenericRemoteStorage,
previous_heatmap: Option<PreviousHeatmap>,
cancel: CancellationToken,
) -> impl Future<Output = TimelinePreload> {
) -> impl Future<Output = TimelinePreload> + use<> {
let client = self.build_timeline_client(timeline_id, remote_storage);
async move {
debug_assert_current_span_has_tenant_and_timeline_id();
@@ -2736,7 +2693,9 @@ impl Tenant {
timeline
}
CreateTimelineResult::ImportSpawned(timeline) => {
info!("import task spawned, timeline will become visible and activated once the import is done");
info!(
"import task spawned, timeline will become visible and activated once the import is done"
);
timeline
}
};
@@ -2782,7 +2741,7 @@ impl Tenant {
{
StartCreatingTimelineResult::CreateGuard(guard) => guard,
StartCreatingTimelineResult::Idempotent(timeline) => {
return Ok(CreateTimelineResult::Idempotent(timeline))
return Ok(CreateTimelineResult::Idempotent(timeline));
}
};
@@ -2916,7 +2875,9 @@ impl Tenant {
let index_part = match index_part {
MaybeDeletedIndexPart::Deleted(_) => {
// likely concurrent delete call, cplane should prevent this
anyhow::bail!("index part says deleted but we are not done creating yet, this should not happen but")
anyhow::bail!(
"index part says deleted but we are not done creating yet, this should not happen but"
)
}
MaybeDeletedIndexPart::IndexPart(p) => p,
};
@@ -3907,7 +3868,9 @@ where
if !later.is_empty() {
for (missing_id, orphan_ids) in later {
for (orphan_id, _) in orphan_ids {
error!("could not load timeline {orphan_id} because its ancestor timeline {missing_id} could not be loaded");
error!(
"could not load timeline {orphan_id} because its ancestor timeline {missing_id} could not be loaded"
);
}
}
bail!("could not load tenant because some timelines are missing ancestors");
@@ -4827,7 +4790,10 @@ impl Tenant {
let gc_info = src_timeline.gc_info.read().unwrap();
let planned_cutoff = gc_info.min_cutoff();
if gc_info.lsn_covered_by_lease(start_lsn) {
tracing::info!("skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease", *applied_gc_cutoff_lsn);
tracing::info!(
"skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease",
*applied_gc_cutoff_lsn
);
} else {
src_timeline
.check_lsn_is_in_scope(start_lsn, &applied_gc_cutoff_lsn)
@@ -4973,7 +4939,9 @@ impl Tenant {
}
// Idempotent <=> CreateTimelineIdempotency is identical
(x, y) if x == y => {
info!("timeline already exists and idempotency matches, succeeding request");
info!(
"timeline already exists and idempotency matches, succeeding request"
);
// fallthrough
}
(_, _) => {
@@ -5055,7 +5023,7 @@ impl Tenant {
{
StartCreatingTimelineResult::CreateGuard(guard) => guard,
StartCreatingTimelineResult::Idempotent(timeline) => {
return Ok(CreateTimelineResult::Idempotent(timeline))
return Ok(CreateTimelineResult::Idempotent(timeline));
}
};
@@ -5260,7 +5228,9 @@ impl Tenant {
.create_timeline_files(&create_guard.timeline_path)
.await
{
error!("Failed to create initial files for timeline {tenant_shard_id}/{new_timeline_id}, cleaning up: {e:?}");
error!(
"Failed to create initial files for timeline {tenant_shard_id}/{new_timeline_id}, cleaning up: {e:?}"
);
cleanup_timeline_directory(create_guard);
return Err(e);
}
@@ -5625,20 +5595,19 @@ pub async fn dump_layerfile_from_path(
#[cfg(test)]
pub(crate) mod harness {
use bytes::{Bytes, BytesMut};
use hex_literal::hex;
use once_cell::sync::OnceCell;
use pageserver_api::key::Key;
use pageserver_api::models::ShardParameters;
use pageserver_api::record::NeonWalRecord;
use pageserver_api::shard::ShardIndex;
use utils::id::TenantId;
use utils::logging;
use super::*;
use crate::deletion_queue::mock::MockDeletionQueue;
use crate::l0_flush::L0FlushConfig;
use crate::walredo::apply_neon;
use pageserver_api::key::Key;
use pageserver_api::record::NeonWalRecord;
use super::*;
use hex_literal::hex;
use utils::id::TenantId;
pub const TIMELINE_ID: TimelineId =
TimelineId::from_array(hex!("11223344556677881122334455667788"));
@@ -5919,34 +5888,34 @@ pub(crate) mod harness {
mod tests {
use std::collections::{BTreeMap, BTreeSet};
use super::*;
use crate::keyspace::KeySpaceAccum;
use crate::tenant::harness::*;
use crate::tenant::timeline::CompactFlags;
use crate::DEFAULT_PG_VERSION;
use bytes::{Bytes, BytesMut};
use hex_literal::hex;
use itertools::Itertools;
use pageserver_api::key::{Key, AUX_KEY_PREFIX, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX};
#[cfg(feature = "testing")]
use models::CompactLsnRange;
use pageserver_api::key::{AUX_KEY_PREFIX, Key, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX};
use pageserver_api::keyspace::KeySpace;
use pageserver_api::models::{CompactionAlgorithm, CompactionAlgorithmSettings};
#[cfg(feature = "testing")]
use pageserver_api::record::NeonWalRecord;
use pageserver_api::value::Value;
use pageserver_compaction::helpers::overlaps_with;
use rand::{thread_rng, Rng};
use rand::{Rng, thread_rng};
use storage_layer::{IoConcurrency, PersistentLayerKey};
use tests::storage_layer::ValuesReconstructState;
use tests::timeline::{GetVectoredError, ShutdownMode};
#[cfg(feature = "testing")]
use timeline::GcInfo;
#[cfg(feature = "testing")]
use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
use timeline::{CompactOptions, DeltaLayerTestDesc};
use utils::id::TenantId;
#[cfg(feature = "testing")]
use models::CompactLsnRange;
#[cfg(feature = "testing")]
use pageserver_api::record::NeonWalRecord;
#[cfg(feature = "testing")]
use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
#[cfg(feature = "testing")]
use timeline::GcInfo;
use super::*;
use crate::DEFAULT_PG_VERSION;
use crate::keyspace::KeySpaceAccum;
use crate::tenant::harness::*;
use crate::tenant::timeline::CompactFlags;
static TEST_KEY: Lazy<Key> =
Lazy::new(|| Key::from_slice(&hex!("010000000033333333444444445500000001")));
@@ -6196,11 +6165,12 @@ mod tests {
panic!("wrong error type")
};
assert!(err.to_string().contains("invalid branch start lsn"));
assert!(err
.source()
assert!(
err.source()
.unwrap()
.to_string()
.contains("we might've already garbage collected needed data"))
.contains("we might've already garbage collected needed data")
)
}
}
@@ -6229,11 +6199,12 @@ mod tests {
panic!("wrong error type");
};
assert!(&err.to_string().contains("invalid branch start lsn"));
assert!(&err
.source()
assert!(
&err.source()
.unwrap()
.to_string()
.contains("is earlier than latest GC cutoff"));
.contains("is earlier than latest GC cutoff")
);
}
}
@@ -7542,10 +7513,12 @@ mod tests {
}
}
assert!(!harness
assert!(
!harness
.conf
.timeline_path(&tenant.tenant_shard_id, &TIMELINE_ID)
.exists());
.exists()
);
Ok(())
}
@@ -7746,7 +7719,10 @@ mod tests {
let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len();
assert!(after_num_l0_delta_files < before_num_l0_delta_files, "after_num_l0_delta_files={after_num_l0_delta_files}, before_num_l0_delta_files={before_num_l0_delta_files}");
assert!(
after_num_l0_delta_files < before_num_l0_delta_files,
"after_num_l0_delta_files={after_num_l0_delta_files}, before_num_l0_delta_files={before_num_l0_delta_files}"
);
assert_eq!(
tline.get(test_key, lsn, &ctx).await?,
@@ -7913,7 +7889,10 @@ mod tests {
let (_, after_delta_file_accessed) =
scan_with_statistics(&tline, &keyspace, lsn, &ctx, io_concurrency.clone())
.await?;
assert!(after_delta_file_accessed < before_delta_file_accessed, "after_delta_file_accessed={after_delta_file_accessed}, before_delta_file_accessed={before_delta_file_accessed}");
assert!(
after_delta_file_accessed < before_delta_file_accessed,
"after_delta_file_accessed={after_delta_file_accessed}, before_delta_file_accessed={before_delta_file_accessed}"
);
// Given that we already produced an image layer, there should be no delta layer needed for the scan, but still setting a low threshold there for unforeseen circumstances.
assert!(
after_delta_file_accessed <= 2,
@@ -7967,10 +7946,12 @@ mod tests {
get_vectored_impl_wrapper(&tline, base_key, lsn, &ctx).await?,
Some(test_img("data key 1"))
);
assert!(get_vectored_impl_wrapper(&tline, base_key_child, lsn, &ctx)
assert!(
get_vectored_impl_wrapper(&tline, base_key_child, lsn, &ctx)
.await
.unwrap_err()
.is_missing_key_error());
.is_missing_key_error()
);
assert!(
get_vectored_impl_wrapper(&tline, base_key_nonexist, lsn, &ctx)
.await

View File

@@ -14,6 +14,9 @@
//! len < 128: 0XXXXXXX
//! len >= 128: 1CCCXXXX XXXXXXXX XXXXXXXX XXXXXXXX
//!
use std::cmp::min;
use std::io::{Error, ErrorKind};
use async_compression::Level;
use bytes::{BufMut, BytesMut};
use pageserver_api::models::ImageCompressionAlgorithm;
@@ -24,10 +27,8 @@ use tracing::warn;
use crate::context::RequestContext;
use crate::page_cache::PAGE_SZ;
use crate::tenant::block_io::BlockCursor;
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
use crate::virtual_file::VirtualFile;
use std::cmp::min;
use std::io::{Error, ErrorKind};
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
#[derive(Copy, Clone, Debug)]
pub struct CompressionInfo {
@@ -414,12 +415,15 @@ impl BlobWriter<false> {
#[cfg(test)]
pub(crate) mod tests {
use super::*;
use crate::{context::DownloadBehavior, task_mgr::TaskKind, tenant::block_io::BlockReaderRef};
use camino::Utf8PathBuf;
use camino_tempfile::Utf8TempDir;
use rand::{Rng, SeedableRng};
use super::*;
use crate::context::DownloadBehavior;
use crate::task_mgr::TaskKind;
use crate::tenant::block_io::BlockReaderRef;
async fn round_trip_test<const BUFFERED: bool>(blobs: &[Vec<u8>]) -> Result<(), Error> {
round_trip_test_compressed::<BUFFERED>(blobs, false).await
}
@@ -486,7 +490,7 @@ pub(crate) mod tests {
pub(crate) fn random_array(len: usize) -> Vec<u8> {
let mut rng = rand::thread_rng();
(0..len).map(|_| rng.gen()).collect::<_>()
(0..len).map(|_| rng.r#gen()).collect::<_>()
}
#[tokio::test]
@@ -544,9 +548,9 @@ pub(crate) mod tests {
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
let blobs = (0..1024)
.map(|_| {
let mut sz: u16 = rng.gen();
let mut sz: u16 = rng.r#gen();
// Make 50% of the arrays small
if rng.gen() {
if rng.r#gen() {
sz &= 63;
}
random_array(sz.into())

View File

@@ -2,14 +2,16 @@
//! Low-level Block-oriented I/O functions
//!
use std::ops::Deref;
use bytes::Bytes;
use super::storage_layer::delta_layer::{Adapter, DeltaLayerInner};
use crate::context::RequestContext;
use crate::page_cache::{self, FileId, PageReadGuard, PageWriteGuard, ReadBufResult, PAGE_SZ};
use crate::page_cache::{self, FileId, PAGE_SZ, PageReadGuard, PageWriteGuard, ReadBufResult};
#[cfg(test)]
use crate::virtual_file::IoBufferMut;
use crate::virtual_file::VirtualFile;
use bytes::Bytes;
use std::ops::Deref;
/// This is implemented by anything that can read 8 kB (PAGE_SZ)
/// blocks, using the page cache

View File

@@ -8,16 +8,17 @@
//! We cannot use global or default config instead, because wrong settings
//! may lead to a data loss.
//!
use std::num::NonZeroU64;
use std::time::Duration;
pub(crate) use pageserver_api::config::TenantConfigToml as TenantConf;
use pageserver_api::models::CompactionAlgorithmSettings;
use pageserver_api::models::EvictionPolicy;
use pageserver_api::models::{self, TenantConfigPatch};
use pageserver_api::models::{
self, CompactionAlgorithmSettings, EvictionPolicy, TenantConfigPatch,
};
use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
use serde::de::IntoDeserializer;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::num::NonZeroU64;
use std::time::Duration;
use utils::generation::Generation;
use utils::postgres_client::PostgresClientProtocol;
@@ -739,9 +740,10 @@ impl From<TenantConfOpt> for models::TenantConfig {
#[cfg(test)]
mod tests {
use super::*;
use models::TenantConfig;
use super::*;
#[test]
fn de_serializing_pageserver_config_omits_empty_values() {
let small_conf = TenantConfOpt {

View File

@@ -18,27 +18,23 @@
//! - An Iterator interface would be more convenient for the callers than the
//! 'visit' function
//!
use std::cmp::Ordering;
use std::iter::Rev;
use std::ops::{Range, RangeInclusive};
use std::{io, result};
use async_stream::try_stream;
use byteorder::{ReadBytesExt, BE};
use byteorder::{BE, ReadBytesExt};
use bytes::{BufMut, Bytes, BytesMut};
use either::Either;
use futures::{Stream, StreamExt};
use hex;
use std::{
cmp::Ordering,
io,
iter::Rev,
ops::{Range, RangeInclusive},
result,
};
use thiserror::Error;
use tracing::error;
use crate::{
context::{DownloadBehavior, RequestContext},
task_mgr::TaskKind,
tenant::block_io::{BlockReader, BlockWriter},
};
use crate::context::{DownloadBehavior, RequestContext};
use crate::task_mgr::TaskKind;
use crate::tenant::block_io::{BlockReader, BlockWriter};
// The maximum size of a value stored in the B-tree. 5 bytes is enough currently.
pub const VALUE_SZ: usize = 5;
@@ -833,12 +829,14 @@ impl<const L: usize> BuildNode<L> {
#[cfg(test)]
pub(crate) mod tests {
use super::*;
use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReaderRef};
use rand::Rng;
use std::collections::BTreeMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use rand::Rng;
use super::*;
use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReaderRef};
#[derive(Clone, Default)]
pub(crate) struct TestDisk {
blocks: Vec<Bytes>,
@@ -1115,7 +1113,7 @@ pub(crate) mod tests {
// Test get() operations on random keys, most of which will not exist
for _ in 0..100000 {
let key_int = rand::thread_rng().gen::<u128>();
let key_int = rand::thread_rng().r#gen::<u128>();
let search_key = u128::to_be_bytes(key_int);
assert!(reader.get(&search_key, &ctx).await? == all_data.get(&key_int).cloned());
}

View File

@@ -1,6 +1,17 @@
//! Implementation of append-only file data structure
//! used to keep in-memory layers spilled on disk.
use std::io;
use std::sync::Arc;
use std::sync::atomic::AtomicU64;
use camino::Utf8PathBuf;
use num_traits::Num;
use pageserver_api::shard::TenantShardId;
use tokio_epoll_uring::{BoundedBuf, Slice};
use tracing::error;
use utils::id::TimelineId;
use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};
use crate::config::PageServerConf;
use crate::context::RequestContext;
@@ -9,17 +20,7 @@ use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File;
use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
use crate::virtual_file::owned_buffers_io::write::Buffer;
use crate::virtual_file::{self, owned_buffers_io, IoBufferMut, VirtualFile};
use camino::Utf8PathBuf;
use num_traits::Num;
use pageserver_api::shard::TenantShardId;
use tokio_epoll_uring::{BoundedBuf, Slice};
use tracing::error;
use std::io;
use std::sync::atomic::AtomicU64;
use std::sync::Arc;
use utils::id::TimelineId;
use crate::virtual_file::{self, IoBufferMut, VirtualFile, owned_buffers_io};
pub struct EphemeralFile {
_tenant_shard_id: TenantShardId,
@@ -319,13 +320,14 @@ pub fn is_ephemeral_file(filename: &str) -> bool {
#[cfg(test)]
mod tests {
use std::fs;
use std::str::FromStr;
use rand::Rng;
use super::*;
use crate::context::DownloadBehavior;
use crate::task_mgr::TaskKind;
use std::fs;
use std::str::FromStr;
fn harness(
test_name: &str,

View File

@@ -1,4 +1,5 @@
use std::{collections::HashMap, sync::Arc};
use std::collections::HashMap;
use std::sync::Arc;
use utils::id::TimelineId;

View File

@@ -1,8 +1,9 @@
use anyhow::Result;
use serde::Serialize;
use std::ops::AddAssign;
use std::time::Duration;
use anyhow::Result;
use serde::Serialize;
///
/// Result of performing GC
///

View File

@@ -46,24 +46,24 @@
mod historic_layer_coverage;
mod layer_coverage;
use crate::context::RequestContext;
use crate::keyspace::KeyPartitioning;
use crate::tenant::storage_layer::InMemoryLayer;
use anyhow::Result;
use pageserver_api::key::Key;
use pageserver_api::keyspace::{KeySpace, KeySpaceAccum};
use range_set_blaze::{CheckSortedDisjoint, RangeSetBlaze};
use std::collections::{HashMap, VecDeque};
use std::iter::Peekable;
use std::ops::Range;
use std::sync::Arc;
use anyhow::Result;
use historic_layer_coverage::BufferedHistoricLayerCoverage;
pub use historic_layer_coverage::LayerKey;
use pageserver_api::key::Key;
use pageserver_api::keyspace::{KeySpace, KeySpaceAccum};
use range_set_blaze::{CheckSortedDisjoint, RangeSetBlaze};
use tokio::sync::watch;
use utils::lsn::Lsn;
use historic_layer_coverage::BufferedHistoricLayerCoverage;
pub use historic_layer_coverage::LayerKey;
use super::storage_layer::{LayerVisibilityHint, PersistentLayerDesc};
use crate::context::RequestContext;
use crate::keyspace::KeyPartitioning;
use crate::tenant::storage_layer::InMemoryLayer;
///
/// LayerMap tracks what layers exist on a timeline.
@@ -1066,18 +1066,17 @@ impl LayerMap {
#[cfg(test)]
mod tests {
use crate::tenant::{storage_layer::LayerName, IndexPart};
use pageserver_api::{
key::DBDIR_KEY,
keyspace::{KeySpace, KeySpaceRandomAccum},
};
use std::{collections::HashMap, path::PathBuf};
use utils::{
id::{TenantId, TimelineId},
shard::TenantShardId,
};
use std::collections::HashMap;
use std::path::PathBuf;
use pageserver_api::key::DBDIR_KEY;
use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
use utils::id::{TenantId, TimelineId};
use utils::shard::TenantShardId;
use super::*;
use crate::tenant::IndexPart;
use crate::tenant::storage_layer::LayerName;
#[derive(Clone)]
struct LayerDesc {
@@ -1417,9 +1416,11 @@ mod tests {
assert!(!shadow.ranges.is_empty());
// At least some layers should be marked covered
assert!(layer_visibilities
assert!(
layer_visibilities
.iter()
.any(|i| matches!(i.1, LayerVisibilityHint::Covered)));
.any(|i| matches!(i.1, LayerVisibilityHint::Covered))
);
let layer_visibilities = layer_visibilities.into_iter().collect::<HashMap<_, _>>();

View File

@@ -3,9 +3,8 @@ use std::ops::Range;
use tracing::info;
use crate::tenant::storage_layer::PersistentLayerDesc;
use super::layer_coverage::LayerCoverageTuple;
use crate::tenant::storage_layer::PersistentLayerDesc;
/// Layers in this module are identified and indexed by this data.
///

View File

@@ -19,8 +19,9 @@
use anyhow::ensure;
use serde::{Deserialize, Serialize};
use utils::bin_ser::SerializeError;
use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn};
use utils::bin_ser::{BeSer, SerializeError};
use utils::id::TimelineId;
use utils::lsn::Lsn;
/// Use special format number to enable backward compatibility.
const METADATA_FORMAT_VERSION: u16 = 4;
@@ -345,9 +346,10 @@ impl TimelineMetadata {
}
pub(crate) mod modern_serde {
use super::{TimelineMetadata, TimelineMetadataBodyV2, TimelineMetadataHeader};
use serde::{Deserialize, Serialize};
use super::{TimelineMetadata, TimelineMetadataBodyV2, TimelineMetadataHeader};
pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result<TimelineMetadata, D::Error>
where
D: serde::de::Deserializer<'de>,

View File

@@ -1,34 +1,42 @@
//! This module acts as a switchboard to access different repositories managed by this
//! page server.
use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
use futures::StreamExt;
use itertools::Itertools;
use pageserver_api::key::Key;
use pageserver_api::models::LocationConfigMode;
use pageserver_api::shard::{
ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId,
};
use pageserver_api::upcall_api::ReAttachResponseTenant;
use rand::{distributions::Alphanumeric, Rng};
use remote_storage::TimeoutOrCancel;
use std::borrow::Cow;
use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::ops::Deref;
use std::sync::Arc;
use std::time::Duration;
use sysinfo::SystemExt;
use tokio::fs;
use anyhow::Context;
use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
use futures::StreamExt;
use itertools::Itertools;
use once_cell::sync::Lazy;
use pageserver_api::key::Key;
use pageserver_api::models::LocationConfigMode;
use pageserver_api::shard::{
ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId,
};
use pageserver_api::upcall_api::ReAttachResponseTenant;
use rand::Rng;
use rand::distributions::Alphanumeric;
use remote_storage::TimeoutOrCancel;
use sysinfo::SystemExt;
use tokio::fs;
use tokio::task::JoinSet;
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::crashsafe::path_with_suffix_extension;
use utils::fs_ext::PathExt;
use utils::generation::Generation;
use utils::id::{TenantId, TimelineId};
use utils::{backoff, completion, crashsafe};
use super::remote_timeline_client::remote_tenant_path;
use super::secondary::SecondaryTenant;
use super::timeline::detach_ancestor::{self, PreparedTimelineDetach};
use super::{GlobalShutDown, TenantSharedResources};
use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext};
use crate::controller_upcall_client::{
@@ -37,7 +45,7 @@ use crate::controller_upcall_client::{
use crate::deletion_queue::DeletionQueueClient;
use crate::http::routes::ACTIVE_TENANT_TIMEOUT;
use crate::metrics::{TENANT, TENANT_MANAGER as METRICS};
use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
use crate::task_mgr::{BACKGROUND_RUNTIME, TaskKind};
use crate::tenant::config::{
AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, SecondaryLocationConfig,
};
@@ -48,16 +56,6 @@ use crate::tenant::{AttachedTenantConf, GcError, LoadConfigError, SpawnMode, Ten
use crate::virtual_file::MaybeFatalIo;
use crate::{InitializationOrder, TEMP_FILE_SUFFIX};
use utils::crashsafe::path_with_suffix_extension;
use utils::fs_ext::PathExt;
use utils::generation::Generation;
use utils::id::{TenantId, TimelineId};
use super::remote_timeline_client::remote_tenant_path;
use super::secondary::SecondaryTenant;
use super::timeline::detach_ancestor::{self, PreparedTimelineDetach};
use super::{GlobalShutDown, TenantSharedResources};
/// For a tenant that appears in TenantsMap, it may either be
/// - `Attached`: has a full Tenant object, is elegible to service
/// reads and ingest WAL.
@@ -140,7 +138,7 @@ impl TenantStartupMode {
/// If this returns None, the re-attach struct is in an invalid state and
/// should be ignored in the response.
fn from_reattach_tenant(rart: ReAttachResponseTenant) -> Option<Self> {
match (rart.mode, rart.gen) {
match (rart.mode, rart.r#gen) {
(LocationConfigMode::Detached, _) => None,
(LocationConfigMode::Secondary, _) => Some(Self::Secondary),
(LocationConfigMode::AttachedMulti, Some(g)) => {
@@ -376,7 +374,7 @@ async fn init_load_generations(
TenantStartupMode::Attached((_mode, generation)) => Some(generation),
TenantStartupMode::Secondary => None,
}
.map(|gen| (*id, *gen))
.map(|gen_| (*id, *gen_))
})
.collect();
resources.deletion_queue_client.recover(attached_tenants)?;
@@ -502,7 +500,9 @@ pub async fn init_tenant_mgr(
.total_memory();
let max_ephemeral_layer_bytes =
conf.ephemeral_bytes_per_memory_kb as u64 * (system_memory / 1024);
tracing::info!("Initialized ephemeral layer size limit to {max_ephemeral_layer_bytes}, for {system_memory} bytes of memory");
tracing::info!(
"Initialized ephemeral layer size limit to {max_ephemeral_layer_bytes}, for {system_memory} bytes of memory"
);
inmemory_layer::GLOBAL_RESOURCES.max_dirty_bytes.store(
max_ephemeral_layer_bytes,
std::sync::atomic::Ordering::Relaxed,
@@ -700,10 +700,11 @@ fn tenant_spawn(
// to avoid impacting prod runtime performance.
assert!(!crate::is_temporary(tenant_path));
debug_assert!(tenant_path.is_dir());
debug_assert!(conf
.tenant_location_config_path(&tenant_shard_id)
debug_assert!(
conf.tenant_location_config_path(&tenant_shard_id)
.try_exists()
.unwrap());
.unwrap()
);
Tenant::spawn(
conf,
@@ -791,7 +792,9 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
(total_in_progress, total_attached)
}
TenantsMap::ShuttingDown(_) => {
error!("already shutting down, this function isn't supposed to be called more than once");
error!(
"already shutting down, this function isn't supposed to be called more than once"
);
return;
}
}
@@ -1194,7 +1197,9 @@ impl TenantManager {
}
TenantSlot::Attached(tenant) => {
let (_guard, progress) = utils::completion::channel();
info!("Shutting down just-spawned tenant, because tenant manager is shut down");
info!(
"Shutting down just-spawned tenant, because tenant manager is shut down"
);
match tenant.shutdown(progress, ShutdownMode::Hard).await {
Ok(()) => {
info!("Finished shutting down just-spawned tenant");
@@ -1784,7 +1789,7 @@ impl TenantManager {
_ => {
return Err(anyhow::anyhow!(e).context(format!(
"Hard linking {relative_layer} into {child_prefix}"
)))
)));
}
}
}
@@ -2025,8 +2030,8 @@ impl TenantManager {
.wait_to_become_active(std::time::Duration::from_secs(9999))
.await
.map_err(|e| {
use pageserver_api::models::TenantState;
use GetActiveTenantError::{Cancelled, WillNotBecomeActive};
use pageserver_api::models::TenantState;
match e {
Cancelled | WillNotBecomeActive(TenantState::Stopping { .. }) => {
Error::ShuttingDown
@@ -2089,7 +2094,7 @@ impl TenantManager {
match selector {
ShardSelector::Zero if slot.0.shard_number == ShardNumber(0) => {
return ShardResolveResult::Found(tenant.clone())
return ShardResolveResult::Found(tenant.clone());
}
ShardSelector::Page(key) => {
// First slot we see for this tenant, calculate the expected shard number
@@ -2486,7 +2491,7 @@ impl SlotGuard {
TenantsMap::Initializing => {
return Err(TenantSlotUpsertError::MapState(
TenantMapError::StillInitializing,
))
));
}
TenantsMap::ShuttingDown(_) => {
return Err(TenantSlotUpsertError::ShuttingDown((
@@ -2815,21 +2820,22 @@ where
}
}
use {
crate::tenant::gc_result::GcResult, http_utils::error::ApiError,
pageserver_api::models::TimelineGcRequest,
};
use http_utils::error::ApiError;
use pageserver_api::models::TimelineGcRequest;
use crate::tenant::gc_result::GcResult;
#[cfg(test)]
mod tests {
use std::collections::BTreeMap;
use std::sync::Arc;
use tracing::Instrument;
use super::super::harness::TenantHarness;
use super::TenantsMap;
use crate::tenant::mgr::TenantSlot;
use super::{super::harness::TenantHarness, TenantsMap};
#[tokio::test(start_paused = true)]
async fn shutdown_awaits_in_progress_tenant() {
// Test that if an InProgress tenant is in the map during shutdown, the shutdown will gracefully

View File

@@ -179,78 +179,64 @@ pub mod index;
pub mod manifest;
pub(crate) mod upload;
use anyhow::Context;
use camino::Utf8Path;
use chrono::{NaiveDateTime, Utc};
pub(crate) use download::download_initdb_tar_zst;
use index::GcCompactionState;
use pageserver_api::models::TimelineArchivalState;
use pageserver_api::shard::{ShardIndex, TenantShardId};
use regex::Regex;
use scopeguard::ScopeGuard;
use tokio_util::sync::CancellationToken;
use utils::backoff::{
self, exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
};
use utils::pausable_failpoint;
use utils::shard::ShardNumber;
use std::collections::{HashMap, HashSet, VecDeque};
use std::ops::DerefMut;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, Mutex, OnceLock};
use std::time::Duration;
use anyhow::Context;
use camino::Utf8Path;
use chrono::{NaiveDateTime, Utc};
pub(crate) use download::{
download_index_part, download_initdb_tar_zst, download_tenant_manifest, is_temp_download_file,
list_remote_tenant_shards, list_remote_timelines,
};
use index::GcCompactionState;
pub(crate) use index::LayerFileMetadata;
use pageserver_api::models::TimelineArchivalState;
use pageserver_api::shard::{ShardIndex, TenantShardId};
use regex::Regex;
use remote_storage::{
DownloadError, GenericRemoteStorage, ListingMode, RemotePath, TimeoutOrCancel,
};
use std::ops::DerefMut;
use tracing::{debug, error, info, instrument, warn};
use tracing::{info_span, Instrument};
use utils::lsn::Lsn;
use crate::context::RequestContext;
use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
use crate::metrics::{
MeasureRemoteOp, RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics,
RemoteTimelineClientMetricsCallTrackSize, REMOTE_ONDEMAND_DOWNLOADED_BYTES,
REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
use scopeguard::ScopeGuard;
use tokio_util::sync::CancellationToken;
use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
pub(crate) use upload::upload_initdb_dir;
use utils::backoff::{
self, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, exponential_backoff,
};
use crate::task_mgr::shutdown_token;
use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
use crate::tenant::remote_timeline_client::download::download_retry;
use crate::tenant::storage_layer::AsLayerDesc;
use crate::tenant::upload_queue::{Delete, OpType, UploadQueueStoppedDeletable};
use crate::tenant::TIMELINES_SEGMENT_NAME;
use crate::{
config::PageServerConf,
task_mgr,
task_mgr::TaskKind,
task_mgr::BACKGROUND_RUNTIME,
tenant::metadata::TimelineMetadata,
tenant::upload_queue::{
UploadOp, UploadQueue, UploadQueueInitialized, UploadQueueStopped, UploadTask,
},
TENANT_HEATMAP_BASENAME,
};
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use utils::pausable_failpoint;
use utils::shard::ShardNumber;
use self::index::IndexPart;
use super::config::AttachedLocationConfig;
use super::metadata::MetadataUpdate;
use super::storage_layer::{Layer, LayerName, ResidentLayer};
use super::timeline::import_pgdata;
use super::upload_queue::{NotInitialized, SetDeletedFlagProgress};
use super::{DeleteTimelineError, Generation};
pub(crate) use download::{
download_index_part, download_tenant_manifest, is_temp_download_file,
list_remote_tenant_shards, list_remote_timelines,
use crate::config::PageServerConf;
use crate::context::RequestContext;
use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
use crate::metrics::{
MeasureRemoteOp, REMOTE_ONDEMAND_DOWNLOADED_BYTES, REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics,
RemoteTimelineClientMetricsCallTrackSize,
};
pub(crate) use index::LayerFileMetadata;
pub(crate) use upload::upload_initdb_dir;
use crate::task_mgr::{BACKGROUND_RUNTIME, TaskKind, shutdown_token};
use crate::tenant::metadata::TimelineMetadata;
use crate::tenant::remote_timeline_client::download::download_retry;
use crate::tenant::storage_layer::AsLayerDesc;
use crate::tenant::upload_queue::{
Delete, OpType, UploadOp, UploadQueue, UploadQueueInitialized, UploadQueueStopped,
UploadQueueStoppedDeletable, UploadTask,
};
use crate::tenant::{TIMELINES_SEGMENT_NAME, debug_assert_current_span_has_tenant_and_timeline_id};
use crate::{TENANT_HEATMAP_BASENAME, task_mgr};
// Occasional network issues and such can cause remote operations to fail, and
// that's expected. If a download fails, we log it at info-level, and retry.
@@ -1091,7 +1077,11 @@ impl RemoteTimelineClient {
if !wanted(x) && wanted(y) {
// this could be avoided by having external in-memory synchronization, like
// timeline detach ancestor
warn!(?reason, op="insert", "unexpected: two racing processes to enable and disable a gc blocking reason");
warn!(
?reason,
op = "insert",
"unexpected: two racing processes to enable and disable a gc blocking reason"
);
}
// at this point, the metadata must always show that there is a parent
@@ -1145,7 +1135,11 @@ impl RemoteTimelineClient {
(x, y) if wanted(x) && !wanted(y) => Some(self.schedule_barrier0(upload_queue)),
(x, y) => {
if !wanted(x) && wanted(y) {
warn!(?reason, op="remove", "unexpected: two racing processes to enable and disable a gc blocking reason (remove)");
warn!(
?reason,
op = "remove",
"unexpected: two racing processes to enable and disable a gc blocking reason (remove)"
);
}
upload_queue.dirty.gc_blocking =
@@ -1287,12 +1281,14 @@ impl RemoteTimelineClient {
#[cfg(feature = "testing")]
for (name, metadata) in &with_metadata {
let gen = metadata.generation;
if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), gen) {
if unexpected == gen {
let gen_ = metadata.generation;
if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), gen_) {
if unexpected == gen_ {
tracing::error!("{name} was unlinked twice with same generation");
} else {
tracing::error!("{name} was unlinked twice with different generations {gen:?} and {unexpected:?}");
tracing::error!(
"{name} was unlinked twice with different generations {gen_:?} and {unexpected:?}"
);
}
}
}
@@ -1354,11 +1350,11 @@ impl RemoteTimelineClient {
#[cfg(feature = "testing")]
for (name, meta) in &with_metadata {
let gen = meta.generation;
let gen_ = meta.generation;
match upload_queue.dangling_files.remove(name) {
Some(same) if same == gen => { /* expected */ }
Some(same) if same == gen_ => { /* expected */ }
Some(other) => {
tracing::error!("{name} was unlinked with {other:?} but deleted with {gen:?}");
tracing::error!("{name} was unlinked with {other:?} but deleted with {gen_:?}");
}
None => {
tracing::error!("{name} was unlinked but was not dangling");
@@ -1455,7 +1451,9 @@ impl RemoteTimelineClient {
// proper stop is yet to be called. On cancel the original or some later task must call
// `stop` or `shutdown`.
let sg = scopeguard::guard((), |_| {
tracing::error!("RemoteTimelineClient::shutdown was cancelled; this should not happen, do not make this into an allowed_error")
tracing::error!(
"RemoteTimelineClient::shutdown was cancelled; this should not happen, do not make this into an allowed_error"
)
});
let fut = {
@@ -1471,7 +1469,7 @@ impl RemoteTimelineClient {
scopeguard::ScopeGuard::into_inner(sg);
return;
}
UploadQueue::Initialized(ref mut init) => init,
UploadQueue::Initialized(init) => init,
};
// if the queue is already stuck due to a shutdown operation which was cancelled, then
@@ -1831,7 +1829,9 @@ impl RemoteTimelineClient {
.map(|n| n.starts_with(IndexPart::FILE_NAME))
.unwrap_or(false)
})
.filter_map(|o| parse_remote_index_path(o.key.clone()).map(|gen| (o.key.clone(), gen)))
.filter_map(|o| {
parse_remote_index_path(o.key.clone()).map(|gen_| (o.key.clone(), gen_))
})
.max_by_key(|i| i.1)
.map(|i| i.0.clone())
.unwrap_or(
@@ -2023,7 +2023,7 @@ impl RemoteTimelineClient {
}
let upload_result: anyhow::Result<()> = match &task.op {
UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => {
UploadOp::UploadLayer(layer, layer_metadata, mode) => {
// TODO: check if this mechanism can be removed now that can_bypass() performs
// conflict checks during scheduling.
if let Some(OpType::FlushDeletion) = mode {
@@ -2113,7 +2113,7 @@ impl RemoteTimelineClient {
)
.await
}
UploadOp::UploadMetadata { ref uploaded } => {
UploadOp::UploadMetadata { uploaded } => {
let res = upload::upload_index_part(
&self.storage_impl,
&self.tenant_shard_id,
@@ -2229,11 +2229,11 @@ impl RemoteTimelineClient {
let lsn_update = {
let mut upload_queue_guard = self.upload_queue.lock().unwrap();
let upload_queue = match upload_queue_guard.deref_mut() {
UploadQueue::Uninitialized => panic!("callers are responsible for ensuring this is only called on an initialized queue"),
UploadQueue::Stopped(_stopped) => {
None
},
UploadQueue::Initialized(qi) => { Some(qi) }
UploadQueue::Uninitialized => panic!(
"callers are responsible for ensuring this is only called on an initialized queue"
),
UploadQueue::Stopped(_stopped) => None,
UploadQueue::Initialized(qi) => Some(qi),
};
let upload_queue = match upload_queue {
@@ -2255,7 +2255,11 @@ impl RemoteTimelineClient {
let is_later = last_updater.is_some_and(|task_id| task_id < task.task_id);
let monotone = is_later || last_updater.is_none();
assert!(monotone, "no two index uploads should be completing at the same time, prev={last_updater:?}, task.task_id={}", task.task_id);
assert!(
monotone,
"no two index uploads should be completing at the same time, prev={last_updater:?}, task.task_id={}",
task.task_id
);
// not taking ownership is wasteful
upload_queue.clean.0.clone_from(uploaded);
@@ -2654,20 +2658,16 @@ pub fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option<Generation>
#[cfg(test)]
mod tests {
use super::*;
use crate::{
context::RequestContext,
tenant::{
config::AttachmentMode,
harness::{TenantHarness, TIMELINE_ID},
storage_layer::layer::local_layer_path,
Tenant, Timeline,
},
DEFAULT_PG_VERSION,
};
use std::collections::HashSet;
use super::*;
use crate::DEFAULT_PG_VERSION;
use crate::context::RequestContext;
use crate::tenant::config::AttachmentMode;
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
use crate::tenant::storage_layer::layer::local_layer_path;
use crate::tenant::{Tenant, Timeline};
pub(super) fn dummy_contents(name: &str) -> Vec<u8> {
format!("contents for {name}").into()
}

View File

@@ -8,41 +8,39 @@ use std::future::Future;
use std::str::FromStr;
use std::time::SystemTime;
use anyhow::{anyhow, Context};
use anyhow::{Context, anyhow};
use camino::{Utf8Path, Utf8PathBuf};
use pageserver_api::shard::TenantShardId;
use remote_storage::{
DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
};
use tokio::fs::{self, File, OpenOptions};
use tokio::io::{AsyncSeekExt, AsyncWriteExt};
use tokio_util::io::StreamReader;
use tokio_util::sync::CancellationToken;
use tracing::warn;
use utils::backoff;
use utils::crashsafe::path_with_suffix_extension;
use utils::id::{TenantId, TimelineId};
use utils::{backoff, pausable_failpoint};
use super::index::{IndexPart, LayerFileMetadata};
use super::manifest::TenantManifest;
use super::{
FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH, parse_remote_index_path,
parse_remote_tenant_manifest_path, remote_index_path, remote_initdb_archive_path,
remote_initdb_preserved_archive_path, remote_tenant_manifest_path,
remote_tenant_manifest_prefix, remote_tenant_path,
};
use crate::TEMP_FILE_SUFFIX;
use crate::config::PageServerConf;
use crate::context::RequestContext;
use crate::span::{
debug_assert_current_span_has_tenant_and_timeline_id, debug_assert_current_span_has_tenant_id,
};
use crate::tenant::Generation;
use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
use crate::tenant::storage_layer::LayerName;
use crate::tenant::Generation;
use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
use crate::TEMP_FILE_SUFFIX;
use remote_storage::{
DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
};
use utils::crashsafe::path_with_suffix_extension;
use utils::id::{TenantId, TimelineId};
use utils::pausable_failpoint;
use super::index::{IndexPart, LayerFileMetadata};
use super::manifest::TenantManifest;
use super::{
parse_remote_index_path, parse_remote_tenant_manifest_path, remote_index_path,
remote_initdb_archive_path, remote_initdb_preserved_archive_path, remote_tenant_manifest_path,
remote_tenant_manifest_prefix, remote_tenant_path, FAILED_DOWNLOAD_WARN_THRESHOLD,
FAILED_REMOTE_OP_RETRIES, INITDB_PATH,
};
use crate::virtual_file::{MaybeFatalIo, VirtualFile, on_fatal_io_error};
///
/// If 'metadata' is given, we will validate that the downloaded file's size matches that
@@ -207,9 +205,9 @@ async fn download_object(
}
#[cfg(target_os = "linux")]
crate::virtual_file::io_engine::IoEngine::TokioEpollUring => {
use crate::virtual_file::owned_buffers_io;
use crate::virtual_file::IoBufferMut;
use std::sync::Arc;
use crate::virtual_file::{IoBufferMut, owned_buffers_io};
async {
let destination_file = Arc::new(
VirtualFile::create(dst_path, ctx)

View File

@@ -7,16 +7,16 @@ use std::collections::HashMap;
use chrono::NaiveDateTime;
use pageserver_api::models::AuxFilePolicy;
use pageserver_api::shard::ShardIndex;
use serde::{Deserialize, Serialize};
use utils::id::TimelineId;
use utils::lsn::Lsn;
use super::is_same_remote_layer_path;
use crate::tenant::Generation;
use crate::tenant::metadata::TimelineMetadata;
use crate::tenant::storage_layer::LayerName;
use crate::tenant::timeline::import_pgdata;
use crate::tenant::Generation;
use pageserver_api::shard::ShardIndex;
use utils::id::TimelineId;
use utils::lsn::Lsn;
/// In-memory representation of an `index_part.json` file
///
@@ -435,10 +435,12 @@ impl GcBlocking {
#[cfg(test)]
mod tests {
use super::*;
use std::str::FromStr;
use utils::id::TimelineId;
use super::*;
#[test]
fn v1_indexpart_is_parsed() {
let example = r#"{

View File

@@ -1,6 +1,7 @@
use chrono::NaiveDateTime;
use serde::{Deserialize, Serialize};
use utils::{id::TimelineId, lsn::Lsn};
use utils::id::TimelineId;
use utils::lsn::Lsn;
/// Tenant-shard scoped manifest
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq)]

View File

@@ -1,28 +1,28 @@
//! Helper functions to upload files to remote storage with a RemoteStorage
use anyhow::{bail, Context};
use std::io::{ErrorKind, SeekFrom};
use std::time::SystemTime;
use anyhow::{Context, bail};
use bytes::Bytes;
use camino::Utf8Path;
use fail::fail_point;
use pageserver_api::shard::TenantShardId;
use std::io::{ErrorKind, SeekFrom};
use std::time::SystemTime;
use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError};
use tokio::fs::{self, File};
use tokio::io::AsyncSeekExt;
use tokio_util::sync::CancellationToken;
use tracing::info;
use utils::id::{TenantId, TimelineId};
use utils::{backoff, pausable_failpoint};
use super::Generation;
use super::index::IndexPart;
use super::manifest::TenantManifest;
use super::Generation;
use crate::tenant::remote_timeline_client::{
remote_index_path, remote_initdb_archive_path, remote_initdb_preserved_archive_path,
remote_tenant_manifest_path,
};
use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError};
use utils::id::{TenantId, TimelineId};
use tracing::info;
/// Serializes and uploads the given index part data to the remote storage.
pub(crate) async fn upload_index_part(
@@ -134,7 +134,9 @@ pub(super) async fn upload_timeline_layer<'a>(
.len();
if metadata_size != fs_size {
bail!("File {local_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}");
bail!(
"File {local_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}"
);
}
let fs_size = usize::try_from(fs_size)

View File

@@ -3,40 +3,31 @@ pub mod heatmap;
mod heatmap_uploader;
mod scheduler;
use std::{sync::Arc, time::SystemTime};
use std::sync::Arc;
use std::time::SystemTime;
use crate::{
context::RequestContext,
disk_usage_eviction_task::DiskUsageEvictionInfo,
metrics::SECONDARY_HEATMAP_TOTAL_SIZE,
task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
};
use self::{
downloader::{downloader_task, SecondaryDetail},
heatmap_uploader::heatmap_uploader_task,
};
use super::{
config::{SecondaryLocationConfig, TenantConfOpt},
mgr::TenantManager,
span::debug_assert_current_span_has_tenant_id,
storage_layer::LayerName,
GetTenantError,
};
use crate::metrics::SECONDARY_RESIDENT_PHYSICAL_SIZE;
use metrics::UIntGauge;
use pageserver_api::{
models,
shard::{ShardIdentity, TenantShardId},
};
use pageserver_api::models;
use pageserver_api::shard::{ShardIdentity, TenantShardId};
use remote_storage::GenericRemoteStorage;
use tokio::task::JoinHandle;
use tokio_util::sync::CancellationToken;
use tracing::instrument;
use utils::{completion::Barrier, id::TimelineId, sync::gate::Gate};
use utils::completion::Barrier;
use utils::id::TimelineId;
use utils::sync::gate::Gate;
use self::downloader::{SecondaryDetail, downloader_task};
use self::heatmap_uploader::heatmap_uploader_task;
use super::GetTenantError;
use super::config::{SecondaryLocationConfig, TenantConfOpt};
use super::mgr::TenantManager;
use super::span::debug_assert_current_span_has_tenant_id;
use super::storage_layer::LayerName;
use crate::context::RequestContext;
use crate::disk_usage_eviction_task::DiskUsageEvictionInfo;
use crate::metrics::{SECONDARY_HEATMAP_TOTAL_SIZE, SECONDARY_RESIDENT_PHYSICAL_SIZE};
use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};
enum DownloadCommand {
Download(TenantShardId),

View File

@@ -1,47 +1,8 @@
use std::{
collections::{HashMap, HashSet},
pin::Pin,
str::FromStr,
sync::Arc,
time::{Duration, Instant, SystemTime},
};
use crate::{
config::PageServerConf,
context::RequestContext,
disk_usage_eviction_task::{
finite_f32, DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer,
},
metrics::SECONDARY_MODE,
tenant::{
config::SecondaryLocationConfig,
debug_assert_current_span_has_tenant_and_timeline_id,
ephemeral_file::is_ephemeral_file,
remote_timeline_client::{
index::LayerFileMetadata, is_temp_download_file, FAILED_DOWNLOAD_WARN_THRESHOLD,
FAILED_REMOTE_OP_RETRIES,
},
span::debug_assert_current_span_has_tenant_id,
storage_layer::{layer::local_layer_path, LayerName, LayerVisibilityHint},
tasks::{warn_when_period_overrun, BackgroundLoopKind},
},
virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile},
TEMP_FILE_SUFFIX,
};
use super::{
heatmap::HeatMapLayer,
scheduler::{
self, period_jitter, period_warmup, Completion, JobGenerator, SchedulingResult,
TenantBackgroundJobs,
},
GetTenantError, SecondaryTenant, SecondaryTenantError,
};
use crate::tenant::{
mgr::TenantManager,
remote_timeline_client::{download::download_layer_file, remote_heatmap_path},
};
use std::collections::{HashMap, HashSet};
use std::pin::Pin;
use std::str::FromStr;
use std::sync::Arc;
use std::time::{Duration, Instant, SystemTime};
use camino::Utf8PathBuf;
use chrono::format::{DelayedFormat, StrftimeItems};
@@ -50,18 +11,43 @@ use metrics::UIntGauge;
use pageserver_api::models::SecondaryProgress;
use pageserver_api::shard::TenantShardId;
use remote_storage::{DownloadError, DownloadKind, DownloadOpts, Etag, GenericRemoteStorage};
use tokio_util::sync::CancellationToken;
use tracing::{info_span, instrument, warn, Instrument};
use utils::{
backoff, completion::Barrier, crashsafe::path_with_suffix_extension, failpoint_support, fs_ext,
id::TimelineId, pausable_failpoint, serde_system_time,
};
use tracing::{Instrument, info_span, instrument, warn};
use utils::completion::Barrier;
use utils::crashsafe::path_with_suffix_extension;
use utils::id::TimelineId;
use utils::{backoff, failpoint_support, fs_ext, pausable_failpoint, serde_system_time};
use super::{
heatmap::{HeatMapTenant, HeatMapTimeline},
CommandRequest, DownloadCommand,
use super::heatmap::{HeatMapLayer, HeatMapTenant, HeatMapTimeline};
use super::scheduler::{
self, Completion, JobGenerator, SchedulingResult, TenantBackgroundJobs, period_jitter,
period_warmup,
};
use super::{
CommandRequest, DownloadCommand, GetTenantError, SecondaryTenant, SecondaryTenantError,
};
use crate::TEMP_FILE_SUFFIX;
use crate::config::PageServerConf;
use crate::context::RequestContext;
use crate::disk_usage_eviction_task::{
DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer, finite_f32,
};
use crate::metrics::SECONDARY_MODE;
use crate::tenant::config::SecondaryLocationConfig;
use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
use crate::tenant::ephemeral_file::is_ephemeral_file;
use crate::tenant::mgr::TenantManager;
use crate::tenant::remote_timeline_client::download::download_layer_file;
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
use crate::tenant::remote_timeline_client::{
FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, is_temp_download_file,
remote_heatmap_path,
};
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
use crate::tenant::storage_layer::layer::local_layer_path;
use crate::tenant::storage_layer::{LayerName, LayerVisibilityHint};
use crate::tenant::tasks::{BackgroundLoopKind, warn_when_period_overrun};
use crate::virtual_file::{MaybeFatalIo, VirtualFile, on_fatal_io_error};
/// For each tenant, default period for how long must have passed since the last download_tenant call before
/// calling it again. This default is replaced with the value of [`HeatMapTenant::upload_period_ms`] after first

View File

@@ -1,11 +1,13 @@
use std::{collections::HashMap, time::SystemTime};
use crate::tenant::{remote_timeline_client::index::LayerFileMetadata, storage_layer::LayerName};
use std::collections::HashMap;
use std::time::SystemTime;
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr, TimestampSeconds};
use serde_with::{DisplayFromStr, TimestampSeconds, serde_as};
use utils::generation::Generation;
use utils::id::TimelineId;
use utils::{generation::Generation, id::TimelineId};
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
use crate::tenant::storage_layer::LayerName;
#[derive(Serialize, Deserialize)]
pub(crate) struct HeatMapTenant {

View File

@@ -1,42 +1,33 @@
use std::{
collections::HashMap,
pin::Pin,
sync::{Arc, Weak},
time::{Duration, Instant},
};
use crate::{
metrics::SECONDARY_MODE,
tenant::{
config::AttachmentMode,
mgr::{GetTenantError, TenantManager},
remote_timeline_client::remote_heatmap_path,
span::debug_assert_current_span_has_tenant_id,
tasks::{warn_when_period_overrun, BackgroundLoopKind},
Tenant,
},
virtual_file::VirtualFile,
TEMP_FILE_SUFFIX,
};
use std::collections::HashMap;
use std::pin::Pin;
use std::sync::{Arc, Weak};
use std::time::{Duration, Instant};
use futures::Future;
use pageserver_api::shard::TenantShardId;
use remote_storage::{GenericRemoteStorage, TimeoutOrCancel};
use super::{
heatmap::HeatMapTenant,
scheduler::{
self, period_jitter, period_warmup, JobGenerator, RunningJob, SchedulingResult,
TenantBackgroundJobs,
},
CommandRequest, SecondaryTenantError, UploadCommand,
};
use tokio_util::sync::CancellationToken;
use tracing::{info_span, instrument, Instrument};
use utils::{
backoff, completion::Barrier, crashsafe::path_with_suffix_extension,
yielding_loop::yielding_loop,
use tracing::{Instrument, info_span, instrument};
use utils::backoff;
use utils::completion::Barrier;
use utils::crashsafe::path_with_suffix_extension;
use utils::yielding_loop::yielding_loop;
use super::heatmap::HeatMapTenant;
use super::scheduler::{
self, JobGenerator, RunningJob, SchedulingResult, TenantBackgroundJobs, period_jitter,
period_warmup,
};
use super::{CommandRequest, SecondaryTenantError, UploadCommand};
use crate::TEMP_FILE_SUFFIX;
use crate::metrics::SECONDARY_MODE;
use crate::tenant::Tenant;
use crate::tenant::config::AttachmentMode;
use crate::tenant::mgr::{GetTenantError, TenantManager};
use crate::tenant::remote_timeline_client::remote_heatmap_path;
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
use crate::tenant::tasks::{BackgroundLoopKind, warn_when_period_overrun};
use crate::virtual_file::VirtualFile;
pub(super) async fn heatmap_uploader_task(
tenant_manager: Arc<TenantManager>,

View File

@@ -1,16 +1,15 @@
use futures::Future;
use rand::Rng;
use std::{
collections::HashMap,
marker::PhantomData,
pin::Pin,
time::{Duration, Instant},
};
use std::collections::HashMap;
use std::marker::PhantomData;
use std::pin::Pin;
use std::time::{Duration, Instant};
use futures::Future;
use pageserver_api::shard::TenantShardId;
use rand::Rng;
use tokio::task::JoinSet;
use tokio_util::sync::CancellationToken;
use utils::{completion::Barrier, yielding_loop::yielding_loop};
use utils::completion::Barrier;
use utils::yielding_loop::yielding_loop;
use super::{CommandRequest, CommandResponse, SecondaryTenantError};

View File

@@ -4,21 +4,18 @@ use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use tenant_size_model::svg::SvgBranchKind;
use tokio::sync::oneshot::error::RecvError;
use tenant_size_model::{Segment, StorageModel};
use tokio::sync::Semaphore;
use tokio::sync::oneshot::error::RecvError;
use tokio_util::sync::CancellationToken;
use crate::context::RequestContext;
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
use super::{GcError, LogicalSizeCalculationCause, Tenant};
use crate::tenant::{MaybeOffloaded, Timeline};
use tracing::*;
use utils::id::TimelineId;
use utils::lsn::Lsn;
use tracing::*;
use tenant_size_model::{Segment, StorageModel};
use super::{GcError, LogicalSizeCalculationCause, Tenant};
use crate::context::RequestContext;
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
use crate::tenant::{MaybeOffloaded, Timeline};
/// Inputs to the actual tenant sizing model
///
@@ -498,7 +495,9 @@ async fn fill_logical_sizes(
}
Err(join_error) => {
// cannot really do anything, as this panic is likely a bug
error!("task that calls spawn_ondemand_logical_size_calculation panicked: {join_error:#}");
error!(
"task that calls spawn_ondemand_logical_size_calculation panicked: {join_error:#}"
);
have_any_error = Some(CalculateSyntheticSizeError::Fatal(
anyhow::anyhow!(join_error)

View File

@@ -10,42 +10,39 @@ mod layer_desc;
mod layer_name;
pub mod merge_iterator;
use crate::config::PageServerConf;
use crate::context::{AccessStatsBehavior, RequestContext};
use bytes::Bytes;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use pageserver_api::key::Key;
use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
use pageserver_api::record::NeonWalRecord;
use pageserver_api::value::Value;
use std::cmp::Ordering;
use std::collections::hash_map::Entry;
use std::collections::{BinaryHeap, HashMap};
use std::future::Future;
use std::ops::Range;
use std::pin::Pin;
use std::sync::atomic::AtomicUsize;
use std::sync::Arc;
use std::sync::atomic::AtomicUsize;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use tracing::{trace, Instrument};
use utils::sync::gate::GateGuard;
use utils::lsn::Lsn;
pub use batch_split_writer::{BatchLayerWriter, SplitDeltaLayerWriter, SplitImageLayerWriter};
use bytes::Bytes;
pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
use futures::StreamExt;
use futures::stream::FuturesUnordered;
pub use image_layer::{ImageLayer, ImageLayerWriter};
pub use inmemory_layer::InMemoryLayer;
pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
use pageserver_api::key::Key;
use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
use pageserver_api::record::NeonWalRecord;
use pageserver_api::value::Value;
use tracing::{Instrument, trace};
use utils::lsn::Lsn;
use utils::sync::gate::GateGuard;
use self::inmemory_layer::InMemoryLayerFileId;
use super::timeline::{GetVectoredError, ReadPath};
use super::PageReconstructError;
use super::timeline::{GetVectoredError, ReadPath};
use crate::config::PageServerConf;
use crate::context::{AccessStatsBehavior, RequestContext};
pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
where
@@ -510,6 +507,7 @@ impl IoConcurrency {
#[cfg(test)]
pub(crate) fn spawn_for_test() -> impl std::ops::DerefMut<Target = Self> {
use std::ops::{Deref, DerefMut};
use tracing::info;
use utils::sync::gate::Gate;

View File

@@ -1,17 +1,22 @@
use std::{future::Future, ops::Range, sync::Arc};
use std::future::Future;
use std::ops::Range;
use std::sync::Arc;
use bytes::Bytes;
use pageserver_api::key::{Key, KEY_SIZE};
use utils::{id::TimelineId, lsn::Lsn, shard::TenantShardId};
use crate::tenant::storage_layer::Layer;
use crate::{config::PageServerConf, context::RequestContext, tenant::Timeline};
use pageserver_api::key::{KEY_SIZE, Key};
use pageserver_api::value::Value;
use utils::id::TimelineId;
use utils::lsn::Lsn;
use utils::shard::TenantShardId;
use super::layer::S3_UPLOAD_LIMIT;
use super::{
DeltaLayerWriter, ImageLayerWriter, PersistentLayerDesc, PersistentLayerKey, ResidentLayer,
};
use crate::config::PageServerConf;
use crate::context::RequestContext;
use crate::tenant::Timeline;
use crate::tenant::storage_layer::Layer;
pub(crate) enum BatchWriterResult {
Produced(ResidentLayer),
@@ -423,15 +428,10 @@ mod tests {
use itertools::Itertools;
use rand::{RngCore, SeedableRng};
use crate::{
tenant::{
harness::{TenantHarness, TIMELINE_ID},
storage_layer::AsLayerDesc,
},
DEFAULT_PG_VERSION,
};
use super::*;
use crate::DEFAULT_PG_VERSION;
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
use crate::tenant::storage_layer::AsLayerDesc;
fn get_key(id: u32) -> Key {
let mut key = Key::from_hex("000000000033333333444444445500000000").unwrap();

View File

@@ -27,6 +27,38 @@
//! "values" part. The actual page images and WAL records are stored in the
//! "values" part.
//!
use std::collections::{HashMap, VecDeque};
use std::fs::File;
use std::io::SeekFrom;
use std::ops::Range;
use std::os::unix::fs::FileExt;
use std::str::FromStr;
use std::sync::Arc;
use anyhow::{Context, Result, bail, ensure};
use camino::{Utf8Path, Utf8PathBuf};
use futures::StreamExt;
use itertools::Itertools;
use pageserver_api::config::MaxVectoredReadBytes;
use pageserver_api::key::{DBDIR_KEY, KEY_SIZE, Key};
use pageserver_api::keyspace::KeySpace;
use pageserver_api::models::ImageCompressionAlgorithm;
use pageserver_api::shard::TenantShardId;
use pageserver_api::value::Value;
use rand::Rng;
use rand::distributions::Alphanumeric;
use serde::{Deserialize, Serialize};
use tokio::sync::OnceCell;
use tokio_epoll_uring::IoBuf;
use tracing::*;
use utils::bin_ser::BeSer;
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use super::{
AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
ValuesReconstructState,
};
use crate::config::PageServerConf;
use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
use crate::page_cache::{self, FileId, PAGE_SZ};
@@ -42,43 +74,8 @@ use crate::tenant::vectored_blob_io::{
VectoredReadPlanner,
};
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
use crate::virtual_file::IoBufferMut;
use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
use crate::TEMP_FILE_SUFFIX;
use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
use anyhow::{bail, ensure, Context, Result};
use camino::{Utf8Path, Utf8PathBuf};
use futures::StreamExt;
use itertools::Itertools;
use pageserver_api::config::MaxVectoredReadBytes;
use pageserver_api::key::{Key, DBDIR_KEY, KEY_SIZE};
use pageserver_api::keyspace::KeySpace;
use pageserver_api::models::ImageCompressionAlgorithm;
use pageserver_api::shard::TenantShardId;
use pageserver_api::value::Value;
use rand::{distributions::Alphanumeric, Rng};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, VecDeque};
use std::fs::File;
use std::io::SeekFrom;
use std::ops::Range;
use std::os::unix::fs::FileExt;
use std::str::FromStr;
use std::sync::Arc;
use tokio::sync::OnceCell;
use tokio_epoll_uring::IoBuf;
use tracing::*;
use utils::{
bin_ser::BeSer,
id::{TenantId, TimelineId},
lsn::Lsn,
};
use super::{
AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
ValuesReconstructState,
};
use crate::virtual_file::{self, IoBufferMut, MaybeFatalIo, VirtualFile};
use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
///
/// Header stored in the beginning of the file
@@ -1130,10 +1127,11 @@ impl DeltaLayerInner {
until: Lsn,
ctx: &RequestContext,
) -> anyhow::Result<usize> {
use futures::stream::TryStreamExt;
use crate::tenant::vectored_blob_io::{
BlobMeta, ChunkedVectoredReadBuilder, VectoredReadExtended,
};
use futures::stream::TryStreamExt;
#[derive(Debug)]
enum Item {
@@ -1599,23 +1597,21 @@ impl DeltaLayerIterator<'_> {
pub(crate) mod test {
use std::collections::BTreeMap;
use bytes::Bytes;
use itertools::MinMaxResult;
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
use pageserver_api::value::Value;
use rand::RngCore;
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
use super::*;
use crate::tenant::harness::TIMELINE_ID;
use crate::DEFAULT_PG_VERSION;
use crate::context::DownloadBehavior;
use crate::task_mgr::TaskKind;
use crate::tenant::disk_btree::tests::TestDisk;
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
use crate::tenant::storage_layer::{Layer, ResidentLayer};
use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
use crate::tenant::{Tenant, Timeline};
use crate::{
context::DownloadBehavior,
task_mgr::TaskKind,
tenant::{disk_btree::tests::TestDisk, harness::TenantHarness},
DEFAULT_PG_VERSION,
};
use bytes::Bytes;
use pageserver_api::value::Value;
/// Construct an index for a fictional delta layer and and then
/// traverse in order to plan vectored reads for a query. Finally,

View File

@@ -1,18 +1,14 @@
use std::{ops::Range, sync::Arc};
use std::ops::Range;
use std::sync::Arc;
use anyhow::bail;
use pageserver_api::{
key::Key,
keyspace::{KeySpace, SparseKeySpace},
};
use pageserver_api::key::Key;
use pageserver_api::keyspace::{KeySpace, SparseKeySpace};
use pageserver_api::value::Value;
use utils::lsn::Lsn;
use pageserver_api::value::Value;
use super::{
merge_iterator::{MergeIterator, MergeIteratorItem},
PersistentLayerKey,
};
use super::PersistentLayerKey;
use super::merge_iterator::{MergeIterator, MergeIteratorItem};
/// A filter iterator over merge iterators (and can be easily extended to other types of iterators).
///
@@ -98,19 +94,14 @@ impl<'a> FilterIterator<'a> {
#[cfg(test)]
mod tests {
use super::*;
use itertools::Itertools;
use pageserver_api::key::Key;
use utils::lsn::Lsn;
use crate::{
tenant::{
harness::{TenantHarness, TIMELINE_ID},
storage_layer::delta_layer::test::produce_delta_layer,
},
DEFAULT_PG_VERSION,
};
use super::*;
use crate::DEFAULT_PG_VERSION;
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
use crate::tenant::storage_layer::delta_layer::test::produce_delta_layer;
async fn assert_filter_iter_equal(
filter_iter: &mut FilterIterator<'_>,

View File

@@ -25,6 +25,39 @@
//! layer, and offsets to the other parts. The "index" is a B-tree,
//! mapping from Key to an offset in the "values" part. The
//! actual page images are stored in the "values" part.
use std::collections::{HashMap, VecDeque};
use std::fs::File;
use std::io::SeekFrom;
use std::ops::Range;
use std::os::unix::prelude::FileExt;
use std::str::FromStr;
use std::sync::Arc;
use anyhow::{Context, Result, bail, ensure};
use bytes::Bytes;
use camino::{Utf8Path, Utf8PathBuf};
use hex;
use itertools::Itertools;
use pageserver_api::config::MaxVectoredReadBytes;
use pageserver_api::key::{DBDIR_KEY, KEY_SIZE, Key};
use pageserver_api::keyspace::KeySpace;
use pageserver_api::shard::{ShardIdentity, TenantShardId};
use pageserver_api::value::Value;
use rand::Rng;
use rand::distributions::Alphanumeric;
use serde::{Deserialize, Serialize};
use tokio::sync::OnceCell;
use tokio_stream::StreamExt;
use tracing::*;
use utils::bin_ser::BeSer;
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use super::layer_name::ImageLayerName;
use super::{
AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
ValuesReconstructState,
};
use crate::config::PageServerConf;
use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
use crate::page_cache::{self, FileId, PAGE_SZ};
@@ -39,43 +72,8 @@ use crate::tenant::vectored_blob_io::{
VectoredReadPlanner,
};
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
use crate::virtual_file::IoBufferMut;
use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
use crate::virtual_file::{self, IoBufferMut, MaybeFatalIo, VirtualFile};
use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
use anyhow::{bail, ensure, Context, Result};
use bytes::Bytes;
use camino::{Utf8Path, Utf8PathBuf};
use hex;
use itertools::Itertools;
use pageserver_api::config::MaxVectoredReadBytes;
use pageserver_api::key::{Key, DBDIR_KEY, KEY_SIZE};
use pageserver_api::keyspace::KeySpace;
use pageserver_api::shard::{ShardIdentity, TenantShardId};
use pageserver_api::value::Value;
use rand::{distributions::Alphanumeric, Rng};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, VecDeque};
use std::fs::File;
use std::io::SeekFrom;
use std::ops::Range;
use std::os::unix::prelude::FileExt;
use std::str::FromStr;
use std::sync::Arc;
use tokio::sync::OnceCell;
use tokio_stream::StreamExt;
use tracing::*;
use utils::{
bin_ser::BeSer,
id::{TenantId, TimelineId},
lsn::Lsn,
};
use super::layer_name::ImageLayerName;
use super::{
AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
ValuesReconstructState,
};
///
/// Header stored in the beginning of the file
@@ -1135,34 +1133,26 @@ impl ImageLayerIterator<'_> {
#[cfg(test)]
mod test {
use std::{sync::Arc, time::Duration};
use std::sync::Arc;
use std::time::Duration;
use bytes::Bytes;
use itertools::Itertools;
use pageserver_api::{
key::Key,
shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize},
value::Value,
};
use utils::{
generation::Generation,
id::{TenantId, TimelineId},
lsn::Lsn,
};
use crate::{
context::RequestContext,
tenant::{
config::TenantConf,
harness::{TenantHarness, TIMELINE_ID},
storage_layer::{Layer, ResidentLayer},
vectored_blob_io::StreamingVectoredReadPlanner,
Tenant, Timeline,
},
DEFAULT_PG_VERSION,
};
use pageserver_api::key::Key;
use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
use pageserver_api::value::Value;
use utils::generation::Generation;
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use super::{ImageLayerIterator, ImageLayerWriter};
use crate::DEFAULT_PG_VERSION;
use crate::context::RequestContext;
use crate::tenant::config::TenantConf;
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
use crate::tenant::storage_layer::{Layer, ResidentLayer};
use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
use crate::tenant::{Tenant, Timeline};
#[tokio::test]
async fn image_layer_rewrite() {
@@ -1172,10 +1162,10 @@ mod test {
..TenantConf::default()
};
let tenant_id = TenantId::generate();
let mut gen = Generation::new(0xdead0001);
let mut gen_ = Generation::new(0xdead0001);
let mut get_next_gen = || {
let ret = gen;
gen = gen.next();
let ret = gen_;
gen_ = gen_.next();
ret
};
// The LSN at which we will create an image layer to filter

View File

@@ -4,38 +4,39 @@
//! held in an ephemeral file, not in memory. The metadata for each page version, i.e.
//! its position in the file, is kept in memory, though.
//!
use crate::assert_u64_eq_usize::{u64_to_usize, U64IsUsize, UsizeIsU64};
use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap};
use std::fmt::Write;
use std::ops::Range;
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering as AtomicOrdering};
use std::sync::{Arc, OnceLock};
use std::time::Instant;
use anyhow::Result;
use camino::Utf8PathBuf;
use pageserver_api::key::{CompactKey, Key};
use pageserver_api::keyspace::KeySpace;
use pageserver_api::models::InMemoryLayerInfo;
use pageserver_api::shard::TenantShardId;
use tokio::sync::RwLock;
use tracing::*;
use utils::id::TimelineId;
use utils::lsn::Lsn;
use utils::vec_map::VecMap;
use wal_decoder::serialized_batch::{SerializedValueBatch, SerializedValueMeta, ValueMeta};
use super::{DeltaLayerWriter, PersistentLayerDesc, ValuesReconstructState};
use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64, u64_to_usize};
use crate::config::PageServerConf;
use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
// avoid binding to Write (conflicts with std::io::Write)
// while being able to use std::fmt::Write's methods
use crate::metrics::TIMELINE_EPHEMERAL_BYTES;
use crate::tenant::ephemeral_file::EphemeralFile;
use crate::tenant::storage_layer::{OnDiskValue, OnDiskValueIo};
use crate::tenant::timeline::GetVectoredError;
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
use crate::{l0_flush, page_cache};
use anyhow::Result;
use camino::Utf8PathBuf;
use pageserver_api::key::CompactKey;
use pageserver_api::key::Key;
use pageserver_api::keyspace::KeySpace;
use pageserver_api::models::InMemoryLayerInfo;
use pageserver_api::shard::TenantShardId;
use std::collections::{BTreeMap, HashMap};
use std::sync::{Arc, OnceLock};
use std::time::Instant;
use tracing::*;
use utils::{id::TimelineId, lsn::Lsn, vec_map::VecMap};
use wal_decoder::serialized_batch::{SerializedValueBatch, SerializedValueMeta, ValueMeta};
// avoid binding to Write (conflicts with std::io::Write)
// while being able to use std::fmt::Write's methods
use crate::metrics::TIMELINE_EPHEMERAL_BYTES;
use std::cmp::Ordering;
use std::fmt::Write;
use std::ops::Range;
use std::sync::atomic::Ordering as AtomicOrdering;
use std::sync::atomic::{AtomicU64, AtomicUsize};
use tokio::sync::RwLock;
use super::{DeltaLayerWriter, PersistentLayerDesc, ValuesReconstructState};
pub(crate) mod vectored_dio_read;
@@ -555,7 +556,9 @@ impl InMemoryLayer {
gate: &utils::sync::gate::Gate,
ctx: &RequestContext,
) -> Result<InMemoryLayer> {
trace!("initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}");
trace!(
"initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}"
);
let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate, ctx).await?;
let key = InMemoryLayerFileId(file.page_cache_file_id());
@@ -816,8 +819,7 @@ mod tests {
#[test]
fn test_index_entry() {
const MAX_SUPPORTED_POS: usize = IndexEntry::MAX_SUPPORTED_POS;
use IndexEntryNewArgs as Args;
use IndexEntryUnpacked as Unpacked;
use {IndexEntryNewArgs as Args, IndexEntryUnpacked as Unpacked};
let roundtrip = |args, expect: Unpacked| {
let res = IndexEntry::new(args).expect("this tests expects no errors");

View File

@@ -1,16 +1,13 @@
use std::{
collections::BTreeMap,
sync::{Arc, RwLock},
};
use std::collections::BTreeMap;
use std::sync::{Arc, RwLock};
use itertools::Itertools;
use tokio_epoll_uring::{BoundedBuf, IoBufMut, Slice};
use crate::{
assert_u64_eq_usize::{U64IsUsize, UsizeIsU64},
context::RequestContext,
virtual_file::{owned_buffers_io::io_buf_aligned::IoBufAlignedMut, IoBufferMut},
};
use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};
use crate::context::RequestContext;
use crate::virtual_file::IoBufferMut;
use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
/// The file interface we require. At runtime, this is a [`crate::tenant::ephemeral_file::EphemeralFile`].
pub trait File: Send {
@@ -132,7 +129,9 @@ where
let req_len = match cur {
LogicalReadState::NotStarted(buf) => {
if buf.len() != 0 {
panic!("The `LogicalRead`s that are passed in must be freshly created using `LogicalRead::new`");
panic!(
"The `LogicalRead`s that are passed in must be freshly created using `LogicalRead::new`"
);
}
// buf.cap() == 0 is ok
@@ -141,7 +140,9 @@ where
*state = LogicalReadState::Ongoing(buf);
req_len
}
x => panic!("must only call with fresh LogicalReads, got another state, leaving Undefined state behind state={x:?}"),
x => panic!(
"must only call with fresh LogicalReads, got another state, leaving Undefined state behind state={x:?}"
),
};
// plan which chunks we need to read from
@@ -422,15 +423,15 @@ impl Buffer for Vec<u8> {
#[cfg(test)]
#[allow(clippy::assertions_on_constants)]
mod tests {
use std::cell::RefCell;
use std::collections::VecDeque;
use rand::Rng;
use crate::{
context::DownloadBehavior, task_mgr::TaskKind,
virtual_file::owned_buffers_io::slice::SliceMutExt,
};
use super::*;
use std::{cell::RefCell, collections::VecDeque};
use crate::context::DownloadBehavior;
use crate::task_mgr::TaskKind;
use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
struct InMemoryFile {
content: Vec<u8>,

View File

@@ -1,32 +1,32 @@
use std::ops::Range;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::{Arc, Weak};
use std::time::{Duration, SystemTime};
use anyhow::Context;
use camino::{Utf8Path, Utf8PathBuf};
use pageserver_api::keyspace::KeySpace;
use pageserver_api::models::HistoricLayerInfo;
use pageserver_api::shard::{ShardIdentity, ShardIndex, TenantShardId};
use std::ops::Range;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::{Arc, Weak};
use std::time::{Duration, SystemTime};
use tracing::Instrument;
use utils::generation::Generation;
use utils::id::TimelineId;
use utils::lsn::Lsn;
use utils::sync::{gate, heavier_once_cell};
use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
use crate::task_mgr::TaskKind;
use crate::tenant::timeline::{CompactionError, GetVectoredError};
use crate::tenant::{remote_timeline_client::LayerFileMetadata, Timeline};
use super::delta_layer::{self};
use super::image_layer::{self};
use super::{
AsLayerDesc, ImageLayerWriter, LayerAccessStats, LayerAccessStatsReset, LayerName,
LayerVisibilityHint, PersistentLayerDesc, ValuesReconstructState,
};
use utils::generation::Generation;
use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
use crate::task_mgr::TaskKind;
use crate::tenant::Timeline;
use crate::tenant::remote_timeline_client::LayerFileMetadata;
use crate::tenant::timeline::{CompactionError, GetVectoredError};
#[cfg(test)]
mod tests;
@@ -1873,8 +1873,8 @@ impl ResidentLayer {
self.owner.record_access(ctx);
let res = match inner {
Delta(ref d) => delta_layer::DeltaLayerInner::load_keys(d, ctx).await,
Image(ref i) => image_layer::ImageLayerInner::load_keys(i, ctx).await,
Delta(d) => delta_layer::DeltaLayerInner::load_keys(d, ctx).await,
Image(i) => image_layer::ImageLayerInner::load_keys(i, ctx).await,
};
res.with_context(|| format!("Layer index is corrupted for {self}"))
}
@@ -1920,7 +1920,7 @@ impl ResidentLayer {
let owner = &self.owner.0;
match self.downloaded.get(owner, ctx).await? {
Delta(ref d) => d
Delta(d) => d
.copy_prefix(writer, until, ctx)
.await
.with_context(|| format!("copy_delta_prefix until {until} of {self}")),
@@ -1943,7 +1943,7 @@ impl ResidentLayer {
) -> anyhow::Result<&delta_layer::DeltaLayerInner> {
use LayerKind::*;
match self.downloaded.get(&self.owner.0, ctx).await? {
Delta(ref d) => Ok(d),
Delta(d) => Ok(d),
Image(_) => Err(anyhow::anyhow!("image layer")),
}
}
@@ -1955,7 +1955,7 @@ impl ResidentLayer {
) -> anyhow::Result<&image_layer::ImageLayerInner> {
use LayerKind::*;
match self.downloaded.get(&self.owner.0, ctx).await? {
Image(ref d) => Ok(d),
Image(d) => Ok(d),
Delta(_) => Err(anyhow::anyhow!("delta layer")),
}
}

View File

@@ -1,22 +1,16 @@
use std::time::UNIX_EPOCH;
use pageserver_api::key::{Key, CONTROLFILE_KEY};
use pageserver_api::key::{CONTROLFILE_KEY, Key};
use tokio::task::JoinSet;
use utils::{
completion::{self, Completion},
id::TimelineId,
};
use utils::completion::{self, Completion};
use utils::id::TimelineId;
use super::failpoints::{Failpoint, FailpointKind};
use super::*;
use crate::{
context::DownloadBehavior,
tenant::{
harness::test_img,
storage_layer::{IoConcurrency, LayerVisibilityHint},
},
};
use crate::{task_mgr::TaskKind, tenant::harness::TenantHarness};
use crate::context::DownloadBehavior;
use crate::task_mgr::TaskKind;
use crate::tenant::harness::{TenantHarness, test_img};
use crate::tenant::storage_layer::{IoConcurrency, LayerVisibilityHint};
/// Used in tests to advance a future to wanted await point, and not futher.
const ADVANCE: std::time::Duration = std::time::Duration::from_secs(3600);
@@ -771,10 +765,12 @@ async fn evict_and_wait_does_not_wait_for_download() {
let (arrival, _download_arrived) = utils::completion::channel();
layer.enable_failpoint(Failpoint::WaitBeforeDownloading(Some(arrival), barrier));
let mut download = std::pin::pin!(layer
let mut download = std::pin::pin!(
layer
.0
.get_or_maybe_download(true, None)
.instrument(download_span));
.instrument(download_span)
);
assert!(
!layer.is_likely_resident(),

View File

@@ -1,16 +1,15 @@
use core::fmt::Display;
use pageserver_api::shard::TenantShardId;
use std::ops::Range;
use utils::{id::TimelineId, lsn::Lsn};
use pageserver_api::key::Key;
use super::{DeltaLayerName, ImageLayerName, LayerName};
use pageserver_api::shard::TenantShardId;
use serde::{Deserialize, Serialize};
#[cfg(test)]
use utils::id::TenantId;
use utils::id::TimelineId;
use utils::lsn::Lsn;
use super::{DeltaLayerName, ImageLayerName, LayerName};
/// A unique identifier of a persistent layer.
///

View File

@@ -1,12 +1,12 @@
//!
//! Helper functions for dealing with filenames of the image and delta layer files.
//!
use pageserver_api::key::Key;
use std::cmp::Ordering;
use std::fmt;
use std::ops::Range;
use std::str::FromStr;
use pageserver_api::key::Key;
use utils::lsn::Lsn;
use super::PersistentLayerDesc;
@@ -305,7 +305,7 @@ impl FromStr for LayerName {
(None, None) => {
return Err(format!(
"neither delta nor image layer file name: {value:?}"
))
));
}
(Some(delta), None) => Self::Delta(delta),
(None, Some(image)) => Self::Image(image),

View File

@@ -1,21 +1,16 @@
use std::{
cmp::Ordering,
collections::{binary_heap, BinaryHeap},
sync::Arc,
};
use std::cmp::Ordering;
use std::collections::{BinaryHeap, binary_heap};
use std::sync::Arc;
use anyhow::bail;
use pageserver_api::key::Key;
use pageserver_api::value::Value;
use utils::lsn::Lsn;
use super::delta_layer::{DeltaLayerInner, DeltaLayerIterator};
use super::image_layer::{ImageLayerInner, ImageLayerIterator};
use super::{PersistentLayerDesc, PersistentLayerKey};
use crate::context::RequestContext;
use pageserver_api::value::Value;
use super::{
delta_layer::{DeltaLayerInner, DeltaLayerIterator},
image_layer::{ImageLayerInner, ImageLayerIterator},
PersistentLayerDesc, PersistentLayerKey,
};
#[derive(Clone, Copy)]
pub(crate) enum LayerRef<'a> {
@@ -349,24 +344,18 @@ impl<'a> MergeIterator<'a> {
#[cfg(test)]
mod tests {
use super::*;
use itertools::Itertools;
use pageserver_api::key::Key;
use utils::lsn::Lsn;
use crate::{
tenant::{
harness::{TenantHarness, TIMELINE_ID},
storage_layer::delta_layer::test::{produce_delta_layer, sort_delta},
},
DEFAULT_PG_VERSION,
};
#[cfg(feature = "testing")]
use crate::tenant::storage_layer::delta_layer::test::sort_delta_value;
#[cfg(feature = "testing")]
use pageserver_api::record::NeonWalRecord;
use utils::lsn::Lsn;
use super::*;
use crate::DEFAULT_PG_VERSION;
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
#[cfg(feature = "testing")]
use crate::tenant::storage_layer::delta_layer::test::sort_delta_value;
use crate::tenant::storage_layer::delta_layer::test::{produce_delta_layer, sort_delta};
async fn assert_merge_iter_equal(
merge_iter: &mut MergeIterator<'_>,

View File

@@ -8,24 +8,24 @@ use std::sync::Arc;
use std::time::{Duration, Instant};
use once_cell::sync::Lazy;
use pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD;
use rand::Rng;
use scopeguard::defer;
use tokio::sync::{Semaphore, SemaphorePermit};
use tokio_util::sync::CancellationToken;
use tracing::*;
use crate::context::{DownloadBehavior, RequestContext};
use crate::metrics::{self, BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS};
use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS};
use crate::tenant::throttle::Stats;
use crate::tenant::timeline::compaction::CompactionOutcome;
use crate::tenant::timeline::CompactionError;
use crate::tenant::{Tenant, TenantState};
use pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD;
use utils::backoff::exponential_backoff_duration;
use utils::completion::Barrier;
use utils::pausable_failpoint;
use crate::context::{DownloadBehavior, RequestContext};
use crate::metrics::{self, BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS};
use crate::task_mgr::{self, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS, TaskKind};
use crate::tenant::throttle::Stats;
use crate::tenant::timeline::CompactionError;
use crate::tenant::timeline::compaction::CompactionOutcome;
use crate::tenant::{Tenant, TenantState};
/// Semaphore limiting concurrent background tasks (across all tenants).
///
/// We use 3/4 Tokio threads, to avoid blocking all threads in case we do any CPU-heavy work.
@@ -287,11 +287,12 @@ fn log_compaction_error(
sleep_duration: Duration,
task_cancelled: bool,
) {
use crate::pgdatadir_mapping::CollectKeySpaceError;
use crate::tenant::upload_queue::NotInitialized;
use crate::tenant::PageReconstructError;
use CompactionError::*;
use crate::pgdatadir_mapping::CollectKeySpaceError;
use crate::tenant::PageReconstructError;
use crate::tenant::upload_queue::NotInitialized;
let level = match err {
ShuttingDown => return,
Offload(_) => Level::ERROR,

View File

@@ -1,10 +1,6 @@
use std::{
sync::{
atomic::{AtomicU64, Ordering},
Arc,
},
time::Instant,
};
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Instant;
use arc_swap::ArcSwap;
use utils::leaky_bucket::{LeakyBucketConfig, RateLimiter};

View File

@@ -14,55 +14,6 @@ pub mod span;
pub mod uninit;
mod walreceiver;
use anyhow::{anyhow, bail, ensure, Context, Result};
use arc_swap::{ArcSwap, ArcSwapOption};
use bytes::Bytes;
use camino::Utf8Path;
use chrono::{DateTime, Utc};
use compaction::{CompactionOutcome, GcCompactionCombinedSettings};
use enumset::EnumSet;
use fail::fail_point;
use futures::FutureExt;
use futures::{stream::FuturesUnordered, StreamExt};
use handle::ShardTimelineId;
use layer_manager::Shutdown;
use offload::OffloadError;
use once_cell::sync::Lazy;
use pageserver_api::models::PageTraceEvent;
use pageserver_api::{
key::{
KEY_SIZE, METADATA_KEY_BEGIN_PREFIX, METADATA_KEY_END_PREFIX, NON_INHERITED_RANGE,
SPARSE_RANGE,
},
keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning},
models::{
CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings,
DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
InMemoryLayerInfo, LayerMapInfo, LsnLease, TimelineState,
},
reltag::BlockNumber,
shard::{ShardIdentity, ShardNumber, TenantShardId},
};
use rand::Rng;
use remote_storage::DownloadError;
use serde_with::serde_as;
use storage_broker::BrokerClientChannel;
use tokio::runtime::Handle;
use tokio::sync::mpsc::Sender;
use tokio::sync::{oneshot, watch, Notify};
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::critical;
use utils::rate_limit::RateLimit;
use utils::{
fs_ext,
guard_arc_swap::GuardArcSwap,
pausable_failpoint,
postgres_client::PostgresClientProtocol,
sync::gate::{Gate, GateGuard},
};
use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
use std::array;
use std::cmp::{max, min};
use std::collections::btree_map::Entry;
@@ -72,74 +23,58 @@ use std::sync::atomic::{AtomicBool, AtomicU64, Ordering as AtomicOrdering};
use std::sync::{Arc, Mutex, OnceLock, RwLock, Weak};
use std::time::{Duration, Instant, SystemTime};
use crate::l0_flush::{self, L0FlushGlobalState};
use crate::tenant::storage_layer::ImageLayerName;
use crate::{
aux_file::AuxFileSizeEstimator,
page_service::TenantManagerTypes,
tenant::{
config::AttachmentMode,
layer_map::{LayerMap, SearchResult},
metadata::TimelineMetadata,
storage_layer::{
inmemory_layer::IndexEntry, BatchLayerWriter, IoConcurrency, PersistentLayerDesc,
ValueReconstructSituation,
},
},
walingest::WalLagCooldown,
walredo,
};
use crate::{
context::{DownloadBehavior, RequestContext},
disk_usage_eviction_task::DiskUsageEvictionInfo,
pgdatadir_mapping::CollectKeySpaceError,
};
use crate::{
disk_usage_eviction_task::finite_f32,
tenant::storage_layer::{
AsLayerDesc, DeltaLayerWriter, EvictionError, ImageLayerWriter, InMemoryLayer, Layer,
LayerAccessStatsReset, LayerName, ResidentLayer, ValueReconstructState,
ValuesReconstructState,
},
};
use crate::{
disk_usage_eviction_task::EvictionCandidate, tenant::storage_layer::delta_layer::DeltaEntry,
};
use crate::{
metrics::ScanLatencyOngoingRecording, tenant::timeline::logical_size::CurrentLogicalSize,
};
use crate::{
pgdatadir_mapping::DirectoryKind,
virtual_file::{MaybeFatalIo, VirtualFile},
};
use crate::{pgdatadir_mapping::LsnForTimestamp, tenant::tasks::BackgroundLoopKind};
use crate::{pgdatadir_mapping::MAX_AUX_FILE_V2_DELTAS, tenant::storage_layer::PersistentLayerKey};
use anyhow::{Context, Result, anyhow, bail, ensure};
use arc_swap::{ArcSwap, ArcSwapOption};
use bytes::Bytes;
use camino::Utf8Path;
use chrono::{DateTime, Utc};
use compaction::{CompactionOutcome, GcCompactionCombinedSettings};
use enumset::EnumSet;
use fail::fail_point;
use futures::stream::FuturesUnordered;
use futures::{FutureExt, StreamExt};
use handle::ShardTimelineId;
use layer_manager::Shutdown;
use offload::OffloadError;
use once_cell::sync::Lazy;
use pageserver_api::config::tenant_conf_defaults::DEFAULT_PITR_INTERVAL;
use crate::config::PageServerConf;
use crate::keyspace::{KeyPartitioning, KeySpace};
use crate::metrics::{TimelineMetrics, DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL};
use crate::pgdatadir_mapping::{CalculateLogicalSizeError, MetricsUpdate};
use crate::tenant::config::TenantConfOpt;
use pageserver_api::reltag::RelTag;
use pageserver_api::shard::ShardIndex;
use postgres_connection::PgConnectionConfig;
use postgres_ffi::{to_pg_timestamp, v14::xlog_utils, WAL_SEGMENT_SIZE};
use utils::{
completion,
generation::Generation,
id::TimelineId,
lsn::{AtomicLsn, Lsn, RecordLsn},
seqwait::SeqWait,
simple_rcu::{Rcu, RcuReadGuard},
use pageserver_api::key::{
KEY_SIZE, Key, METADATA_KEY_BEGIN_PREFIX, METADATA_KEY_END_PREFIX, NON_INHERITED_RANGE,
SPARSE_RANGE,
};
use crate::task_mgr;
use crate::task_mgr::TaskKind;
use crate::tenant::gc_result::GcResult;
use crate::ZERO_PAGE;
use pageserver_api::key::Key;
use pageserver_api::keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning};
use pageserver_api::models::{
CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings,
DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, TimelineState,
};
use pageserver_api::reltag::{BlockNumber, RelTag};
use pageserver_api::shard::{ShardIdentity, ShardIndex, ShardNumber, TenantShardId};
#[cfg(test)]
use pageserver_api::value::Value;
use postgres_connection::PgConnectionConfig;
use postgres_ffi::v14::xlog_utils;
use postgres_ffi::{WAL_SEGMENT_SIZE, to_pg_timestamp};
use rand::Rng;
use remote_storage::DownloadError;
use serde_with::serde_as;
use storage_broker::BrokerClientChannel;
use tokio::runtime::Handle;
use tokio::sync::mpsc::Sender;
use tokio::sync::{Notify, oneshot, watch};
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::generation::Generation;
use utils::guard_arc_swap::GuardArcSwap;
use utils::id::TimelineId;
use utils::lsn::{AtomicLsn, Lsn, RecordLsn};
use utils::postgres_client::PostgresClientProtocol;
use utils::rate_limit::RateLimit;
use utils::seqwait::SeqWait;
use utils::simple_rcu::{Rcu, RcuReadGuard};
use utils::sync::gate::{Gate, GateGuard};
use utils::{completion, critical, fs_ext, pausable_failpoint};
use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
use self::delete::DeleteTimelineFlow;
pub(super) use self::eviction_task::EvictionTaskTenantState;
@@ -147,24 +82,48 @@ use self::eviction_task::EvictionTaskTimelineState;
use self::layer_manager::LayerManager;
use self::logical_size::LogicalSize;
use self::walreceiver::{WalReceiver, WalReceiverConf};
use super::remote_timeline_client::index::GcCompactionState;
use super::config::TenantConf;
use super::remote_timeline_client::index::{GcCompactionState, IndexPart};
use super::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError};
use super::secondary::heatmap::HeatMapLayer;
use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};
use super::upload_queue::NotInitialized;
use super::{
config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized,
MaybeOffloaded,
AttachedTenantConf, GcError, HeatMapTimeline, MaybeOffloaded,
debug_assert_current_span_has_tenant_and_timeline_id,
};
use super::{
debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf, HeatMapTimeline,
use crate::aux_file::AuxFileSizeEstimator;
use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext};
use crate::disk_usage_eviction_task::{DiskUsageEvictionInfo, EvictionCandidate, finite_f32};
use crate::keyspace::{KeyPartitioning, KeySpace};
use crate::l0_flush::{self, L0FlushGlobalState};
use crate::metrics::{
DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL, ScanLatencyOngoingRecording, TimelineMetrics,
};
use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe};
use super::{
remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError,
storage_layer::ReadableLayer,
use crate::page_service::TenantManagerTypes;
use crate::pgdatadir_mapping::{
CalculateLogicalSizeError, CollectKeySpaceError, DirectoryKind, LsnForTimestamp,
MAX_AUX_FILE_V2_DELTAS, MetricsUpdate,
};
use super::{secondary::heatmap::HeatMapLayer, GcError};
#[cfg(test)]
use pageserver_api::value::Value;
use crate::task_mgr::TaskKind;
use crate::tenant::config::{AttachmentMode, TenantConfOpt};
use crate::tenant::gc_result::GcResult;
use crate::tenant::layer_map::{LayerMap, SearchResult};
use crate::tenant::metadata::TimelineMetadata;
use crate::tenant::storage_layer::delta_layer::DeltaEntry;
use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
use crate::tenant::storage_layer::{
AsLayerDesc, BatchLayerWriter, DeltaLayerWriter, EvictionError, ImageLayerName,
ImageLayerWriter, InMemoryLayer, IoConcurrency, Layer, LayerAccessStatsReset, LayerName,
PersistentLayerDesc, PersistentLayerKey, ResidentLayer, ValueReconstructSituation,
ValueReconstructState, ValuesReconstructState,
};
use crate::tenant::tasks::BackgroundLoopKind;
use crate::tenant::timeline::logical_size::CurrentLogicalSize;
use crate::virtual_file::{MaybeFatalIo, VirtualFile};
use crate::walingest::WalLagCooldown;
use crate::{ZERO_PAGE, task_mgr, walredo};
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub(crate) enum FlushLoopState {
@@ -1474,13 +1433,22 @@ impl Timeline {
| TaskKind::WalReceiverConnectionHandler
| TaskKind::WalReceiverConnectionPoller => {
let is_myself = match who_is_waiting {
WaitLsnWaiter::Timeline(waiter) => Weak::ptr_eq(&waiter.myself, &self.myself),
WaitLsnWaiter::Tenant | WaitLsnWaiter::PageService | WaitLsnWaiter::HttpEndpoint => unreachable!("tenant or page_service context are not expected to have task kind {:?}", ctx.task_kind()),
WaitLsnWaiter::Timeline(waiter) => {
Weak::ptr_eq(&waiter.myself, &self.myself)
}
WaitLsnWaiter::Tenant
| WaitLsnWaiter::PageService
| WaitLsnWaiter::HttpEndpoint => unreachable!(
"tenant or page_service context are not expected to have task kind {:?}",
ctx.task_kind()
),
};
if is_myself {
if let Err(current) = self.last_record_lsn.would_wait_for(lsn) {
// walingest is the only one that can advance last_record_lsn; it should make sure to never reach here
panic!("this timeline's walingest task is calling wait_lsn({lsn}) but we only have last_record_lsn={current}; would deadlock");
panic!(
"this timeline's walingest task is calling wait_lsn({lsn}) but we only have last_record_lsn={current}; would deadlock"
);
}
} else {
// if another timeline's is waiting for us, there's no deadlock risk because
@@ -1618,10 +1586,18 @@ impl Timeline {
if init || validate {
let latest_gc_cutoff_lsn = self.get_applied_gc_cutoff_lsn();
if lsn < *latest_gc_cutoff_lsn {
bail!("tried to request an lsn lease for an lsn below the latest gc cutoff. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
bail!(
"tried to request an lsn lease for an lsn below the latest gc cutoff. requested at {} gc cutoff {}",
lsn,
*latest_gc_cutoff_lsn
);
}
if lsn < planned_cutoff {
bail!("tried to request an lsn lease for an lsn below the planned gc cutoff. requested at {} planned gc cutoff {}", lsn, planned_cutoff);
bail!(
"tried to request an lsn lease for an lsn below the planned gc cutoff. requested at {} planned gc cutoff {}",
lsn,
planned_cutoff
);
}
}
@@ -1745,7 +1721,9 @@ impl Timeline {
// This is not harmful, but it only happens in relatively rare cases where
// time-based checkpoints are not happening fast enough to keep the amount of
// ephemeral data within configured limits. It's a sign of stress on the system.
tracing::info!("Early-rolling open layer at size {current_size} (limit {size_override}) due to dirty data pressure");
tracing::info!(
"Early-rolling open layer at size {current_size} (limit {size_override}) due to dirty data pressure"
);
}
}
@@ -1871,7 +1849,9 @@ impl Timeline {
// Last record Lsn could be zero in case the timeline was just created
if !last_record_lsn.is_valid() {
warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
warn!(
"Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}"
);
return Ok(CompactionOutcome::Skipped);
}
@@ -2033,7 +2013,9 @@ impl Timeline {
// `self.remote_client.shutdown().await` above should have already flushed everything from the queue, but
// we also do a final check here to ensure that the queue is empty.
if !self.remote_client.no_pending_work() {
warn!("still have pending work in remote upload queue, but continuing shutting down anyways");
warn!(
"still have pending work in remote upload queue, but continuing shutting down anyways"
);
}
}
}
@@ -2042,7 +2024,9 @@ impl Timeline {
// drain the upload queue
self.remote_client.shutdown().await;
if !self.remote_client.no_pending_work() {
warn!("still have pending work in remote upload queue, but continuing shutting down anyways");
warn!(
"still have pending work in remote upload queue, but continuing shutting down anyways"
);
}
}
@@ -2946,8 +2930,9 @@ impl Timeline {
disk_consistent_lsn: Lsn,
index_part: IndexPart,
) -> anyhow::Result<()> {
use init::{Decision::*, Discovered, DismissedLayer};
use LayerName::*;
use init::Decision::*;
use init::{Discovered, DismissedLayer};
let mut guard = self.layers.write().await;
@@ -3162,11 +3147,15 @@ impl Timeline {
}
TimelineState::Loading => {
// Import does not return an activated timeline.
info!("discarding priority boost for logical size calculation because timeline is not yet active");
info!(
"discarding priority boost for logical size calculation because timeline is not yet active"
);
}
TimelineState::Active => {
// activation should be setting the once cell
warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work");
warn!(
"unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work"
);
debug_assert!(false);
}
}
@@ -4306,10 +4295,14 @@ impl Timeline {
// This path is only taken for tenants with multiple shards: single sharded tenants should
// never encounter a gap in the wal.
let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
tracing::debug!("Advancing disk_consistent_lsn across layer gap {old_disk_consistent_lsn}->{frozen_to_lsn}");
tracing::debug!(
"Advancing disk_consistent_lsn across layer gap {old_disk_consistent_lsn}->{frozen_to_lsn}"
);
if self.set_disk_consistent_lsn(frozen_to_lsn) {
if let Err(e) = self.schedule_uploads(frozen_to_lsn, vec![]) {
tracing::warn!("Failed to schedule metadata upload after updating disk_consistent_lsn: {e}");
tracing::warn!(
"Failed to schedule metadata upload after updating disk_consistent_lsn: {e}"
);
}
}
}
@@ -4534,7 +4527,10 @@ impl Timeline {
/// This function must only be used from the layer flush task.
fn set_disk_consistent_lsn(&self, new_value: Lsn) -> bool {
let old_value = self.disk_consistent_lsn.fetch_max(new_value);
assert!(new_value >= old_value, "disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}");
assert!(
new_value >= old_value,
"disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}"
);
self.metrics
.disk_consistent_lsn_gauge
@@ -4829,7 +4825,9 @@ impl Timeline {
// any metadata keys, keys, as that would lead to actual data
// loss.
if img_key.is_rel_fsm_block_key() || img_key.is_rel_vm_block_key() {
warn!("could not reconstruct FSM or VM key {img_key}, filling with zeros: {err:?}");
warn!(
"could not reconstruct FSM or VM key {img_key}, filling with zeros: {err:?}"
);
ZERO_PAGE.clone()
} else {
return Err(CreateImageLayersError::from(err));
@@ -4908,7 +4906,8 @@ impl Timeline {
let trigger_generation = delta_files_accessed as usize >= MAX_AUX_FILE_V2_DELTAS;
info!(
"metadata key compaction: trigger_generation={trigger_generation}, delta_files_accessed={delta_files_accessed}, total_kb_retrieved={total_kb_retrieved}, total_keys_retrieved={total_keys_retrieved}, read_time={}s", elapsed.as_secs_f64()
"metadata key compaction: trigger_generation={trigger_generation}, delta_files_accessed={delta_files_accessed}, total_kb_retrieved={total_kb_retrieved}, total_keys_retrieved={total_keys_retrieved}, read_time={}s",
elapsed.as_secs_f64()
);
if !trigger_generation && mode == ImageLayerCreationMode::Try {
@@ -5230,7 +5229,8 @@ impl Timeline {
if should_yield {
tracing::info!(
"preempt image layer generation at {lsn} when processing partition {}..{}: too many L0 layers",
partition.start().unwrap(), partition.end().unwrap()
partition.start().unwrap(),
partition.end().unwrap()
);
last_partition_processed = Some(partition.clone());
all_generated = false;
@@ -5588,7 +5588,9 @@ impl Timeline {
// because we have not implemented L0 => L0 compaction.
duplicated_layers.insert(l.layer_desc().key());
} else if LayerMap::is_l0(&l.layer_desc().key_range, l.layer_desc().is_delta) {
return Err(CompactionError::Other(anyhow::anyhow!("compaction generates a L0 layer file as output, which will cause infinite compaction.")));
return Err(CompactionError::Other(anyhow::anyhow!(
"compaction generates a L0 layer file as output, which will cause infinite compaction."
)));
} else {
insert_layers.push(l.clone());
}
@@ -5712,8 +5714,10 @@ impl Timeline {
.await
{
Ok((index_part, index_generation, _index_mtime)) => {
tracing::info!("GC loaded shard zero metadata (gen {index_generation:?}): latest_gc_cutoff_lsn: {}",
index_part.metadata.latest_gc_cutoff_lsn());
tracing::info!(
"GC loaded shard zero metadata (gen {index_generation:?}): latest_gc_cutoff_lsn: {}",
index_part.metadata.latest_gc_cutoff_lsn()
);
Ok(Some(index_part.metadata.latest_gc_cutoff_lsn()))
}
Err(DownloadError::NotFound) => {
@@ -6122,9 +6126,7 @@ impl Timeline {
if let Some((img_lsn, img)) = &data.img {
trace!(
"found page image for key {} at {}, no WAL redo required, req LSN {}",
key,
img_lsn,
request_lsn,
key, img_lsn, request_lsn,
);
Ok(img.clone())
} else {
@@ -6153,7 +6155,12 @@ impl Timeline {
request_lsn
);
} else {
trace!("found {} WAL records that will init the page for {} at {}, performing WAL redo", data.records.len(), key, request_lsn);
trace!(
"found {} WAL records that will init the page for {} at {}, performing WAL redo",
data.records.len(),
key,
request_lsn
);
};
let res = self
.walredo_mgr
@@ -6697,7 +6704,9 @@ impl TimelineWriter<'_> {
if let Some(wait_threshold) = wait_threshold {
if l0_count >= wait_threshold {
debug!("layer roll waiting for flush due to compaction backpressure at {l0_count} L0 layers");
debug!(
"layer roll waiting for flush due to compaction backpressure at {l0_count} L0 layers"
);
self.tl.wait_flush_completion(flush_id).await?;
}
}
@@ -6884,17 +6893,15 @@ mod tests {
use pageserver_api::key::Key;
use pageserver_api::value::Value;
use tracing::Instrument;
use utils::{id::TimelineId, lsn::Lsn};
use crate::tenant::{
harness::{test_img, TenantHarness},
layer_map::LayerMap,
storage_layer::{Layer, LayerName, LayerVisibilityHint},
timeline::{DeltaLayerTestDesc, EvictionError},
PreviousHeatmap, Timeline,
};
use utils::id::TimelineId;
use utils::lsn::Lsn;
use super::HeatMapTimeline;
use crate::tenant::harness::{TenantHarness, test_img};
use crate::tenant::layer_map::LayerMap;
use crate::tenant::storage_layer::{Layer, LayerName, LayerVisibilityHint};
use crate::tenant::timeline::{DeltaLayerTestDesc, EvictionError};
use crate::tenant::{PreviousHeatmap, Timeline};
fn assert_heatmaps_have_same_layers(lhs: &HeatMapTimeline, rhs: &HeatMapTimeline) {
assert_eq!(lhs.layers.len(), rhs.layers.len());

View File

@@ -1,4 +1,5 @@
use std::{collections::BTreeSet, ops::Range};
use std::collections::BTreeSet;
use std::ops::Range;
use utils::lsn::Lsn;

Some files were not shown because too many files have changed in this diff Show More