mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-02 21:10:38 +00:00
Compare commits
22 Commits
last_activ
...
skyzh/uplo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c32c26d741 | ||
|
|
6da5c189b5 | ||
|
|
66c01e46ca | ||
|
|
94fe9d00f3 | ||
|
|
dc19960cbf | ||
|
|
d3b654f6db | ||
|
|
4a898cfa2c | ||
|
|
ad0c5fdae7 | ||
|
|
2b041964b3 | ||
|
|
d4c059a884 | ||
|
|
2c56c46d48 | ||
|
|
d1728a6bcd | ||
|
|
0a27973584 | ||
|
|
07c2411f6b | ||
|
|
5819938c93 | ||
|
|
b7548de814 | ||
|
|
9794f386f4 | ||
|
|
79083de61c | ||
|
|
ec9079f483 | ||
|
|
b9b25e13a0 | ||
|
|
cf2e695f49 | ||
|
|
fc233794f6 |
@@ -113,8 +113,6 @@ runs:
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: ${{ inputs.build_type }}
|
||||
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
|
||||
ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
|
||||
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
|
||||
RERUN_FAILED: ${{ inputs.rerun_failed }}
|
||||
PG_VERSION: ${{ inputs.pg_version }}
|
||||
SANITIZERS: ${{ inputs.sanitizers }}
|
||||
|
||||
12
.github/workflows/_build-and-test-locally.yml
vendored
12
.github/workflows/_build-and-test-locally.yml
vendored
@@ -272,10 +272,13 @@ jobs:
|
||||
# run pageserver tests with different settings
|
||||
for get_vectored_concurrent_io in sequential sidecar-task; do
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
|
||||
${cov_prefix} \
|
||||
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
for io_mode in buffered direct direct-rw ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOMODE=$io_mode \
|
||||
${cov_prefix} \
|
||||
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
@@ -392,6 +395,7 @@ jobs:
|
||||
BUILD_TAG: ${{ inputs.build-tag }}
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
||||
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
|
||||
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
|
||||
|
||||
# Temporary disable this step until we figure out why it's so flaky
|
||||
|
||||
2
.github/workflows/build_and_test.yml
vendored
2
.github/workflows/build_and_test.yml
vendored
@@ -323,6 +323,8 @@ jobs:
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
||||
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
|
||||
SYNC_BETWEEN_TESTS: true
|
||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||
# while coverage is currently collected for the debug ones
|
||||
|
||||
4
.github/workflows/pg-clients.yml
vendored
4
.github/workflows/pg-clients.yml
vendored
@@ -30,7 +30,7 @@ permissions:
|
||||
statuses: write # require for posting a status update
|
||||
|
||||
env:
|
||||
DEFAULT_PG_VERSION: 16
|
||||
DEFAULT_PG_VERSION: 17
|
||||
PLATFORM: neon-captest-new
|
||||
AWS_DEFAULT_REGION: eu-central-1
|
||||
|
||||
@@ -42,6 +42,8 @@ jobs:
|
||||
github-event-name: ${{ github.event_name }}
|
||||
|
||||
build-build-tools-image:
|
||||
permissions:
|
||||
packages: write
|
||||
needs: [ check-permissions ]
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
secrets: inherit
|
||||
|
||||
15
Cargo.lock
generated
15
Cargo.lock
generated
@@ -4251,6 +4251,7 @@ dependencies = [
|
||||
"arc-swap",
|
||||
"async-compression",
|
||||
"async-stream",
|
||||
"base64 0.13.1",
|
||||
"bincode",
|
||||
"bit_field",
|
||||
"byteorder",
|
||||
@@ -4298,6 +4299,7 @@ dependencies = [
|
||||
"rand 0.8.5",
|
||||
"range-set-blaze",
|
||||
"regex",
|
||||
"remote_keys",
|
||||
"remote_storage",
|
||||
"reqwest",
|
||||
"rpds",
|
||||
@@ -4352,6 +4354,7 @@ dependencies = [
|
||||
"humantime-serde",
|
||||
"itertools 0.10.5",
|
||||
"nix 0.27.1",
|
||||
"once_cell",
|
||||
"postgres_backend",
|
||||
"postgres_ffi",
|
||||
"rand 0.8.5",
|
||||
@@ -5502,6 +5505,16 @@ version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c707298afce11da2efef2f600116fa93ffa7a032b5d7b628aa17711ec81383ca"
|
||||
|
||||
[[package]]
|
||||
name = "remote_keys"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"rand 0.8.5",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "remote_storage"
|
||||
version = "0.1.0"
|
||||
@@ -5517,6 +5530,7 @@ dependencies = [
|
||||
"azure_identity",
|
||||
"azure_storage",
|
||||
"azure_storage_blobs",
|
||||
"base64 0.13.1",
|
||||
"bytes",
|
||||
"camino",
|
||||
"camino-tempfile",
|
||||
@@ -5527,6 +5541,7 @@ dependencies = [
|
||||
"humantime-serde",
|
||||
"hyper 1.4.1",
|
||||
"itertools 0.10.5",
|
||||
"md5",
|
||||
"metrics",
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
|
||||
@@ -30,6 +30,7 @@ members = [
|
||||
"libs/tenant_size_model",
|
||||
"libs/metrics",
|
||||
"libs/postgres_connection",
|
||||
"libs/remote_keys",
|
||||
"libs/remote_storage",
|
||||
"libs/tracing-utils",
|
||||
"libs/postgres_ffi/wal_craft",
|
||||
@@ -255,6 +256,7 @@ postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
|
||||
postgres_initdb = { path = "./libs/postgres_initdb" }
|
||||
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
|
||||
remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
|
||||
remote_keys = { version = "0.1", path = "./libs/remote_keys/" }
|
||||
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
|
||||
safekeeper_client = { path = "./safekeeper/client" }
|
||||
desim = { version = "0.1", path = "./libs/desim" }
|
||||
|
||||
@@ -270,7 +270,7 @@ By default, this runs both debug and release modes, and all supported postgres v
|
||||
testing locally, it is convenient to run just one set of permutations, like this:
|
||||
|
||||
```sh
|
||||
DEFAULT_PG_VERSION=16 BUILD_TYPE=release ./scripts/pytest
|
||||
DEFAULT_PG_VERSION=17 BUILD_TYPE=release ./scripts/pytest
|
||||
```
|
||||
|
||||
## Flamegraphs
|
||||
|
||||
@@ -12,3 +12,5 @@ disallowed-macros = [
|
||||
# cannot disallow this, because clippy finds used from tokio macros
|
||||
#"tokio::pin",
|
||||
]
|
||||
|
||||
allow-unwrap-in-tests = true
|
||||
|
||||
@@ -22,7 +22,7 @@ commands:
|
||||
- name: local_proxy
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -22,7 +22,7 @@ commands:
|
||||
- name: local_proxy
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -57,24 +57,13 @@ use tracing::{error, info};
|
||||
use url::Url;
|
||||
use utils::failpoint_support;
|
||||
|
||||
// Compatibility hack: if the control plane specified any remote-ext-config
|
||||
// use the default value for extension storage proxy gateway.
|
||||
// Remove this once the control plane is updated to pass the gateway URL
|
||||
fn parse_remote_ext_config(arg: &str) -> Result<String> {
|
||||
if arg.starts_with("http") {
|
||||
Ok(arg.trim_end_matches('/').to_string())
|
||||
} else {
|
||||
Ok("http://pg-ext-s3-gateway".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(rename_all = "kebab-case")]
|
||||
struct Cli {
|
||||
#[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
|
||||
pub pgbin: String,
|
||||
|
||||
#[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
|
||||
#[arg(short = 'r', long)]
|
||||
pub remote_ext_config: Option<String>,
|
||||
|
||||
/// The port to bind the external listening HTTP server to. Clients running
|
||||
@@ -116,9 +105,7 @@ struct Cli {
|
||||
#[arg(long)]
|
||||
pub set_disk_quota_for_fs: Option<String>,
|
||||
|
||||
// TODO(tristan957): remove alias after compatibility tests are no longer
|
||||
// an issue
|
||||
#[arg(short = 'c', long, alias = "spec-path")]
|
||||
#[arg(short = 'c', long)]
|
||||
pub config: Option<OsString>,
|
||||
|
||||
#[arg(short = 'i', long, group = "compute-id")]
|
||||
|
||||
@@ -63,7 +63,7 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
|
||||
const DEFAULT_BRANCH_NAME: &str = "main";
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
const DEFAULT_PG_VERSION: u32 = 16;
|
||||
const DEFAULT_PG_VERSION: u32 = 17;
|
||||
|
||||
const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
|
||||
|
||||
|
||||
@@ -766,10 +766,6 @@ impl Endpoint {
|
||||
}
|
||||
};
|
||||
|
||||
// TODO(tristan957): Remove the write to spec.json after compatibility
|
||||
// tests work themselves out
|
||||
let spec_path = self.endpoint_path().join("spec.json");
|
||||
std::fs::write(spec_path, serde_json::to_string_pretty(&config.spec)?)?;
|
||||
let config_path = self.endpoint_path().join("config.json");
|
||||
std::fs::write(config_path, serde_json::to_string_pretty(&config)?)?;
|
||||
|
||||
@@ -779,16 +775,6 @@ impl Endpoint {
|
||||
.append(true)
|
||||
.open(self.endpoint_path().join("compute.log"))?;
|
||||
|
||||
// TODO(tristan957): Remove when compatibility tests are no longer an
|
||||
// issue
|
||||
let old_compute_ctl = {
|
||||
let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
|
||||
let help_output = cmd.arg("--help").output()?;
|
||||
let help_output = String::from_utf8_lossy(&help_output.stdout);
|
||||
|
||||
!help_output.contains("--config")
|
||||
};
|
||||
|
||||
// Launch compute_ctl
|
||||
let conn_str = self.connstr("cloud_admin", "postgres");
|
||||
println!("Starting postgres node at '{}'", conn_str);
|
||||
@@ -807,19 +793,8 @@ impl Endpoint {
|
||||
])
|
||||
.args(["--pgdata", self.pgdata().to_str().unwrap()])
|
||||
.args(["--connstr", &conn_str])
|
||||
// TODO(tristan957): Change this to --config when compatibility tests
|
||||
// are no longer an issue
|
||||
.args([
|
||||
"--spec-path",
|
||||
self.endpoint_path()
|
||||
.join(if old_compute_ctl {
|
||||
"spec.json"
|
||||
} else {
|
||||
"config.json"
|
||||
})
|
||||
.to_str()
|
||||
.unwrap(),
|
||||
])
|
||||
.arg("--config")
|
||||
.arg(self.endpoint_path().join("config.json").as_os_str())
|
||||
.args([
|
||||
"--pgbin",
|
||||
self.env
|
||||
|
||||
@@ -23,7 +23,7 @@ use crate::object_storage::{OBJECT_STORAGE_REMOTE_STORAGE_DIR, ObjectStorage};
|
||||
use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
|
||||
pub const DEFAULT_PG_VERSION: u32 = 16;
|
||||
pub const DEFAULT_PG_VERSION: u32 = 17;
|
||||
|
||||
//
|
||||
// This data structures represents neon_local CLI config
|
||||
|
||||
@@ -81,19 +81,9 @@ sed -i "s/TIMELINE_ID/${timeline_id}/" ${CONFIG_FILE}
|
||||
|
||||
cat ${CONFIG_FILE}
|
||||
|
||||
# TODO(tristan957): Remove these workarounds for backwards compatibility after
|
||||
# the next compute release. That includes these next few lines and the
|
||||
# --spec-path in the compute_ctl invocation.
|
||||
if compute_ctl --help | grep --quiet -- '--config'; then
|
||||
SPEC_PATH="$CONFIG_FILE"
|
||||
else
|
||||
jq '.spec' < "$CONFIG_FILE" > /tmp/spec.json
|
||||
SPEC_PATH=/tmp/spec.json
|
||||
fi
|
||||
|
||||
echo "Start compute node"
|
||||
/usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
|
||||
-C "postgresql://cloud_admin@localhost:55433/postgres" \
|
||||
-b /usr/local/bin/postgres \
|
||||
--compute-id "compute-$RANDOM" \
|
||||
--spec-path "$SPEC_PATH"
|
||||
--config "$CONFIG_FILE"
|
||||
|
||||
@@ -35,6 +35,7 @@ nix = {workspace = true, optional = true}
|
||||
reqwest.workspace = true
|
||||
rand.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
once_cell.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
bincode.workspace = true
|
||||
|
||||
@@ -1817,8 +1817,34 @@ pub mod virtual_file {
|
||||
}
|
||||
|
||||
impl IoMode {
|
||||
pub const fn preferred() -> Self {
|
||||
Self::Buffered
|
||||
pub fn preferred() -> Self {
|
||||
// The default behavior when running Rust unit tests without any further
|
||||
// flags is to use the newest behavior if available on the platform (Direct).
|
||||
// The CI uses the following environment variable to unit tests for all
|
||||
// different modes.
|
||||
// NB: the Python regression & perf tests have their own defaults management
|
||||
// that writes pageserver.toml; they do not use this variable.
|
||||
if cfg!(test) {
|
||||
use once_cell::sync::Lazy;
|
||||
static CACHED: Lazy<IoMode> = Lazy::new(|| {
|
||||
utils::env::var_serde_json_string(
|
||||
"NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE",
|
||||
)
|
||||
.unwrap_or({
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
IoMode::Direct
|
||||
}
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
{
|
||||
IoMode::Buffered
|
||||
}
|
||||
})
|
||||
});
|
||||
*CACHED
|
||||
} else {
|
||||
IoMode::Buffered
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
13
libs/remote_keys/Cargo.toml
Normal file
13
libs/remote_keys/Cargo.toml
Normal file
@@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "remote_keys"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
utils.workspace = true
|
||||
workspace_hack.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
rand.workspace = true
|
||||
42
libs/remote_keys/src/lib.rs
Normal file
42
libs/remote_keys/src/lib.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
//! A module that provides a KMS implementation that generates and unwraps the keys.
|
||||
//!
|
||||
|
||||
/// A KMS implementation that does static wrapping and unwrapping of the keys.
|
||||
pub struct NaiveKms {
|
||||
account_id: String,
|
||||
}
|
||||
|
||||
impl NaiveKms {
|
||||
pub fn new(account_id: String) -> Self {
|
||||
Self { account_id }
|
||||
}
|
||||
|
||||
pub fn encrypt(&self, plain: &[u8]) -> anyhow::Result<Vec<u8>> {
|
||||
let wrapped = [self.account_id.as_bytes(), "-wrapped-".as_bytes(), plain].concat();
|
||||
Ok(wrapped)
|
||||
}
|
||||
|
||||
pub fn decrypt(&self, wrapped: &[u8]) -> anyhow::Result<Vec<u8>> {
|
||||
let Some(wrapped) = wrapped.strip_prefix(self.account_id.as_bytes()) else {
|
||||
return Err(anyhow::anyhow!("invalid key"));
|
||||
};
|
||||
let Some(plain) = wrapped.strip_prefix(b"-wrapped-") else {
|
||||
return Err(anyhow::anyhow!("invalid key"));
|
||||
};
|
||||
Ok(plain.to_vec())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_generate_key() {
|
||||
let kms = NaiveKms::new("test-tenant".to_string());
|
||||
let data = rand::random::<[u8; 32]>().to_vec();
|
||||
let encrypted = kms.encrypt(&data).unwrap();
|
||||
let decrypted = kms.decrypt(&encrypted).unwrap();
|
||||
assert_eq!(data, decrypted);
|
||||
}
|
||||
}
|
||||
@@ -13,6 +13,7 @@ aws-smithy-async.workspace = true
|
||||
aws-smithy-types.workspace = true
|
||||
aws-config.workspace = true
|
||||
aws-sdk-s3.workspace = true
|
||||
base64.workspace = true
|
||||
bytes.workspace = true
|
||||
camino = { workspace = true, features = ["serde1"] }
|
||||
humantime-serde.workspace = true
|
||||
@@ -27,6 +28,7 @@ tokio-util = { workspace = true, features = ["compat"] }
|
||||
toml_edit.workspace = true
|
||||
tracing.workspace = true
|
||||
scopeguard.workspace = true
|
||||
md5.workspace = true
|
||||
metrics.workspace = true
|
||||
utils = { path = "../utils", default-features = false }
|
||||
pin-project-lite.workspace = true
|
||||
|
||||
@@ -550,6 +550,19 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
self.download_for_builder(builder, timeout, cancel).await
|
||||
}
|
||||
|
||||
#[allow(unused_variables)]
|
||||
async fn upload_with_encryption(
|
||||
&self,
|
||||
from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
|
||||
data_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
metadata: Option<StorageMetadata>,
|
||||
encryption_key: Option<&[u8]>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {
|
||||
self.delete_objects(std::array::from_ref(path), cancel)
|
||||
.await
|
||||
|
||||
@@ -190,6 +190,8 @@ pub struct DownloadOpts {
|
||||
/// timeouts: for something like an index/manifest/heatmap, we should time out faster than
|
||||
/// for layer files
|
||||
pub kind: DownloadKind,
|
||||
/// The encryption key to use for the download.
|
||||
pub encryption_key: Option<Vec<u8>>,
|
||||
}
|
||||
|
||||
pub enum DownloadKind {
|
||||
@@ -204,6 +206,7 @@ impl Default for DownloadOpts {
|
||||
byte_start: Bound::Unbounded,
|
||||
byte_end: Bound::Unbounded,
|
||||
kind: DownloadKind::Large,
|
||||
encryption_key: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -241,6 +244,15 @@ impl DownloadOpts {
|
||||
None => format!("bytes={start}-"),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn with_encryption_key(mut self, encryption_key: Option<impl AsRef<[u8]>>) -> Self {
|
||||
self.encryption_key = encryption_key.map(|k| k.as_ref().to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn encryption_key(&self) -> Option<&[u8]> {
|
||||
self.encryption_key.as_deref()
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage (potentially remote) API to manage its state.
|
||||
@@ -331,6 +343,19 @@ pub trait RemoteStorage: Send + Sync + 'static {
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError>;
|
||||
|
||||
/// Same as upload, but with remote encryption if the backend supports it (e.g. SSE-C on AWS).
|
||||
async fn upload_with_encryption(
|
||||
&self,
|
||||
from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
|
||||
// S3 PUT request requires the content length to be specified,
|
||||
// otherwise it starts to fail with the concurrent connection count increasing.
|
||||
data_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
metadata: Option<StorageMetadata>,
|
||||
encryption_key: Option<&[u8]>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()>;
|
||||
|
||||
/// Delete a single path from remote storage.
|
||||
///
|
||||
/// If the operation fails because of timeout or cancellation, the root cause of the error will be
|
||||
@@ -615,6 +640,63 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn upload_with_encryption(
|
||||
&self,
|
||||
from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
|
||||
data_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
metadata: Option<StorageMetadata>,
|
||||
encryption_key: Option<&[u8]>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
match self {
|
||||
Self::LocalFs(s) => {
|
||||
s.upload_with_encryption(
|
||||
from,
|
||||
data_size_bytes,
|
||||
to,
|
||||
metadata,
|
||||
encryption_key,
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
}
|
||||
Self::AwsS3(s) => {
|
||||
s.upload_with_encryption(
|
||||
from,
|
||||
data_size_bytes,
|
||||
to,
|
||||
metadata,
|
||||
encryption_key,
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
}
|
||||
Self::AzureBlob(s) => {
|
||||
s.upload_with_encryption(
|
||||
from,
|
||||
data_size_bytes,
|
||||
to,
|
||||
metadata,
|
||||
encryption_key,
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
}
|
||||
Self::Unreliable(s) => {
|
||||
s.upload_with_encryption(
|
||||
from,
|
||||
data_size_bytes,
|
||||
to,
|
||||
metadata,
|
||||
encryption_key,
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl GenericRemoteStorage {
|
||||
|
||||
@@ -198,6 +198,10 @@ impl LocalFs {
|
||||
let mut entries = cur_folder.read_dir_utf8()?;
|
||||
while let Some(Ok(entry)) = entries.next() {
|
||||
let file_name = entry.file_name();
|
||||
if file_name.ends_with(".metadata") || file_name.ends_with(".enc") {
|
||||
// ignore metadata and encryption key files
|
||||
continue;
|
||||
}
|
||||
let full_file_name = cur_folder.join(file_name);
|
||||
if full_file_name.as_str().starts_with(prefix) {
|
||||
let file_remote_path = self.local_file_to_relative_path(full_file_name.clone());
|
||||
@@ -218,6 +222,7 @@ impl LocalFs {
|
||||
data_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
metadata: Option<StorageMetadata>,
|
||||
enctyption_key: Option<&[u8]>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let target_file_path = to.with_base(&self.storage_root);
|
||||
@@ -306,6 +311,8 @@ impl LocalFs {
|
||||
)
|
||||
})?;
|
||||
|
||||
// TODO: we might need to make the following writes atomic with the file write operation above
|
||||
|
||||
if let Some(storage_metadata) = metadata {
|
||||
// FIXME: we must not be using metadata much, since this would forget the old metadata
|
||||
// for new writes? or perhaps metadata is sticky; could consider removing if it's never
|
||||
@@ -324,6 +331,15 @@ impl LocalFs {
|
||||
})?;
|
||||
}
|
||||
|
||||
if let Some(encryption_key) = enctyption_key {
|
||||
let encryption_key_path = storage_encryption_key_path(&target_file_path);
|
||||
fs::write(&encryption_key_path, encryption_key).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to write encryption key to the local storage at '{encryption_key_path}'",
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -450,6 +466,7 @@ impl RemoteStorage for LocalFs {
|
||||
key: &RemotePath,
|
||||
_cancel: &CancellationToken,
|
||||
) -> Result<ListingObject, DownloadError> {
|
||||
// TODO: check encryption key
|
||||
let target_file_path = key.with_base(&self.storage_root);
|
||||
let metadata = file_metadata(&target_file_path).await?;
|
||||
Ok(ListingObject {
|
||||
@@ -461,34 +478,14 @@ impl RemoteStorage for LocalFs {
|
||||
|
||||
async fn upload(
|
||||
&self,
|
||||
data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync,
|
||||
data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
|
||||
data_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
metadata: Option<StorageMetadata>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let cancel = cancel.child_token();
|
||||
|
||||
let op = self.upload0(data, data_size_bytes, to, metadata, &cancel);
|
||||
let mut op = std::pin::pin!(op);
|
||||
|
||||
// race the upload0 to the timeout; if it goes over, do a graceful shutdown
|
||||
let (res, timeout) = tokio::select! {
|
||||
res = &mut op => (res, false),
|
||||
_ = tokio::time::sleep(self.timeout) => {
|
||||
cancel.cancel();
|
||||
(op.await, true)
|
||||
}
|
||||
};
|
||||
|
||||
match res {
|
||||
Err(e) if timeout && TimeoutOrCancel::caused_by_cancel(&e) => {
|
||||
// we caused this cancel (or they happened simultaneously) -- swap it out to
|
||||
// Timeout
|
||||
Err(TimeoutOrCancel::Timeout.into())
|
||||
}
|
||||
res => res,
|
||||
}
|
||||
self.upload_with_encryption(data, data_size_bytes, to, metadata, None, cancel)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn download(
|
||||
@@ -506,6 +503,22 @@ impl RemoteStorage for LocalFs {
|
||||
return Err(DownloadError::Unmodified);
|
||||
}
|
||||
|
||||
let key = match fs::read(storage_encryption_key_path(&target_path)).await {
|
||||
Ok(key) => Some(key),
|
||||
Err(e) if e.kind() == ErrorKind::NotFound => None,
|
||||
Err(e) => {
|
||||
return Err(DownloadError::Other(
|
||||
anyhow::anyhow!(e).context("cannot read encryption key"),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
if key != opts.encryption_key {
|
||||
return Err(DownloadError::Other(anyhow::anyhow!(
|
||||
"encryption key mismatch"
|
||||
)));
|
||||
}
|
||||
|
||||
let mut file = fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&target_path)
|
||||
@@ -551,12 +564,53 @@ impl RemoteStorage for LocalFs {
|
||||
async fn delete(&self, path: &RemotePath, _cancel: &CancellationToken) -> anyhow::Result<()> {
|
||||
let file_path = path.with_base(&self.storage_root);
|
||||
match fs::remove_file(&file_path).await {
|
||||
Ok(()) => Ok(()),
|
||||
Ok(()) => {}
|
||||
// The file doesn't exist. This shouldn't yield an error to mirror S3's behaviour.
|
||||
// See https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObject.html
|
||||
// > If there isn't a null version, Amazon S3 does not remove any objects but will still respond that the command was successful.
|
||||
Err(e) if e.kind() == ErrorKind::NotFound => Ok(()),
|
||||
Err(e) => Err(anyhow::anyhow!(e)),
|
||||
Err(e) if e.kind() == ErrorKind::NotFound => {}
|
||||
Err(e) => return Err(anyhow::anyhow!(e)),
|
||||
};
|
||||
fs::remove_file(&storage_metadata_path(&file_path))
|
||||
.await
|
||||
.ok();
|
||||
fs::remove_file(&storage_encryption_key_path(&file_path))
|
||||
.await
|
||||
.ok();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(unused_variables)]
|
||||
async fn upload_with_encryption(
|
||||
&self,
|
||||
data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
|
||||
data_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
metadata: Option<StorageMetadata>,
|
||||
encryption_key: Option<&[u8]>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let cancel = cancel.child_token();
|
||||
|
||||
let op = self.upload0(data, data_size_bytes, to, metadata, encryption_key, &cancel);
|
||||
let mut op = std::pin::pin!(op);
|
||||
|
||||
// race the upload0 to the timeout; if it goes over, do a graceful shutdown
|
||||
let (res, timeout) = tokio::select! {
|
||||
res = &mut op => (res, false),
|
||||
_ = tokio::time::sleep(self.timeout) => {
|
||||
cancel.cancel();
|
||||
(op.await, true)
|
||||
}
|
||||
};
|
||||
|
||||
match res {
|
||||
Err(e) if timeout && TimeoutOrCancel::caused_by_cancel(&e) => {
|
||||
// we caused this cancel (or they happened simultaneously) -- swap it out to
|
||||
// Timeout
|
||||
Err(TimeoutOrCancel::Timeout.into())
|
||||
}
|
||||
res => res,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -591,6 +645,7 @@ impl RemoteStorage for LocalFs {
|
||||
to_path = to_path
|
||||
)
|
||||
})?;
|
||||
// TODO: copy metadata and encryption key
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -609,6 +664,10 @@ fn storage_metadata_path(original_path: &Utf8Path) -> Utf8PathBuf {
|
||||
path_with_suffix_extension(original_path, "metadata")
|
||||
}
|
||||
|
||||
fn storage_encryption_key_path(original_path: &Utf8Path) -> Utf8PathBuf {
|
||||
path_with_suffix_extension(original_path, "enc")
|
||||
}
|
||||
|
||||
async fn create_target_directory(target_file_path: &Utf8Path) -> anyhow::Result<()> {
|
||||
let target_dir = match target_file_path.parent() {
|
||||
Some(parent_dir) => parent_dir,
|
||||
|
||||
@@ -66,7 +66,10 @@ struct GetObjectRequest {
|
||||
key: String,
|
||||
etag: Option<String>,
|
||||
range: Option<String>,
|
||||
/// Base64 encoded SSE-C key for server-side encryption.
|
||||
sse_c_key: Option<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl S3Bucket {
|
||||
/// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
|
||||
pub async fn new(remote_storage_config: &S3Config, timeout: Duration) -> anyhow::Result<Self> {
|
||||
@@ -257,6 +260,13 @@ impl S3Bucket {
|
||||
builder = builder.if_none_match(etag);
|
||||
}
|
||||
|
||||
if let Some(encryption_key) = request.sse_c_key {
|
||||
builder = builder.sse_customer_algorithm("AES256");
|
||||
builder = builder.sse_customer_key(base64::encode(&encryption_key));
|
||||
builder = builder
|
||||
.sse_customer_key_md5(base64::encode(md5::compute(&encryption_key).as_slice()));
|
||||
}
|
||||
|
||||
let get_object = builder.send();
|
||||
|
||||
let get_object = tokio::select! {
|
||||
@@ -693,12 +703,13 @@ impl RemoteStorage for S3Bucket {
|
||||
})
|
||||
}
|
||||
|
||||
async fn upload(
|
||||
async fn upload_with_encryption(
|
||||
&self,
|
||||
from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
|
||||
from_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
metadata: Option<StorageMetadata>,
|
||||
encryption_key: Option<&[u8]>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let kind = RequestKind::Put;
|
||||
@@ -709,7 +720,7 @@ impl RemoteStorage for S3Bucket {
|
||||
let body = StreamBody::new(from.map(|x| x.map(Frame::data)));
|
||||
let bytes_stream = ByteStream::new(SdkBody::from_body_1_x(body));
|
||||
|
||||
let upload = self
|
||||
let mut upload = self
|
||||
.client
|
||||
.put_object()
|
||||
.bucket(self.bucket_name.clone())
|
||||
@@ -717,8 +728,17 @@ impl RemoteStorage for S3Bucket {
|
||||
.set_metadata(metadata.map(|m| m.0))
|
||||
.set_storage_class(self.upload_storage_class.clone())
|
||||
.content_length(from_size_bytes.try_into()?)
|
||||
.body(bytes_stream)
|
||||
.send();
|
||||
.body(bytes_stream);
|
||||
|
||||
if let Some(encryption_key) = encryption_key {
|
||||
upload = upload.sse_customer_algorithm("AES256");
|
||||
let base64_key = base64::encode(encryption_key);
|
||||
upload = upload.sse_customer_key(&base64_key);
|
||||
upload = upload
|
||||
.sse_customer_key_md5(base64::encode(md5::compute(encryption_key).as_slice()));
|
||||
}
|
||||
|
||||
let upload = upload.send();
|
||||
|
||||
let upload = tokio::time::timeout(self.timeout, upload);
|
||||
|
||||
@@ -742,6 +762,18 @@ impl RemoteStorage for S3Bucket {
|
||||
}
|
||||
}
|
||||
|
||||
async fn upload(
|
||||
&self,
|
||||
from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
|
||||
data_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
metadata: Option<StorageMetadata>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
self.upload_with_encryption(from, data_size_bytes, to, metadata, None, cancel)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn copy(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
@@ -801,6 +833,7 @@ impl RemoteStorage for S3Bucket {
|
||||
key: self.relative_path_to_s3_object(from),
|
||||
etag: opts.etag.as_ref().map(|e| e.to_string()),
|
||||
range: opts.byte_range_header(),
|
||||
sse_c_key: opts.encryption_key.clone(),
|
||||
},
|
||||
cancel,
|
||||
)
|
||||
|
||||
@@ -178,6 +178,19 @@ impl RemoteStorage for UnreliableWrapper {
|
||||
self.inner.download(from, opts, cancel).await
|
||||
}
|
||||
|
||||
#[allow(unused_variables)]
|
||||
async fn upload_with_encryption(
|
||||
&self,
|
||||
from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
|
||||
data_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
metadata: Option<StorageMetadata>,
|
||||
encryption_key: Option<&[u8]>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {
|
||||
self.delete_inner(path, true, cancel).await
|
||||
}
|
||||
|
||||
@@ -421,7 +421,7 @@ async fn download_is_timeouted(ctx: &mut MaybeEnabledStorage) {
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
let len = upload_large_enough_file(&ctx.client, &path, &cancel).await;
|
||||
let len = upload_large_enough_file(&ctx.client, &path, &cancel, None).await;
|
||||
|
||||
let timeout = std::time::Duration::from_secs(5);
|
||||
|
||||
@@ -500,7 +500,7 @@ async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) {
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
let file_len = upload_large_enough_file(&ctx.client, &path, &cancel).await;
|
||||
let file_len = upload_large_enough_file(&ctx.client, &path, &cancel, None).await;
|
||||
|
||||
{
|
||||
let stream = ctx
|
||||
@@ -555,6 +555,7 @@ async fn upload_large_enough_file(
|
||||
client: &GenericRemoteStorage,
|
||||
path: &RemotePath,
|
||||
cancel: &CancellationToken,
|
||||
encryption_key: Option<&[u8]>,
|
||||
) -> usize {
|
||||
let header = bytes::Bytes::from_static("remote blob data content".as_bytes());
|
||||
let body = bytes::Bytes::from(vec![0u8; 1024]);
|
||||
@@ -565,9 +566,54 @@ async fn upload_large_enough_file(
|
||||
let contents = futures::stream::iter(contents.map(std::io::Result::Ok));
|
||||
|
||||
client
|
||||
.upload(contents, len, path, None, cancel)
|
||||
.upload_with_encryption(contents, len, path, None, encryption_key, cancel)
|
||||
.await
|
||||
.expect("upload succeeds");
|
||||
|
||||
len
|
||||
}
|
||||
|
||||
#[test_context(MaybeEnabledStorage)]
|
||||
#[tokio::test]
|
||||
async fn encryption_works(ctx: &mut MaybeEnabledStorage) {
|
||||
let MaybeEnabledStorage::Enabled(ctx) = ctx else {
|
||||
return;
|
||||
};
|
||||
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let path = RemotePath::new(Utf8Path::new(
|
||||
format!("{}/file_to_copy", ctx.base_prefix).as_str(),
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
let key = rand::random::<[u8; 32]>();
|
||||
let file_len = upload_large_enough_file(&ctx.client, &path, &cancel, Some(&key)).await;
|
||||
|
||||
{
|
||||
let download = ctx
|
||||
.client
|
||||
.download(
|
||||
&path,
|
||||
&DownloadOpts::default().with_encryption_key(Some(&key)),
|
||||
&cancel,
|
||||
)
|
||||
.await
|
||||
.expect("should succeed");
|
||||
let vec = download_to_vec(download).await.expect("should succeed");
|
||||
assert_eq!(vec.len(), file_len);
|
||||
}
|
||||
|
||||
{
|
||||
// Download without encryption key should fail
|
||||
let download = ctx
|
||||
.client
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.await;
|
||||
assert!(download.is_err());
|
||||
}
|
||||
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
ctx.client.delete_objects(&[path], &cancel).await.unwrap();
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ anyhow.workspace = true
|
||||
arc-swap.workspace = true
|
||||
async-compression.workspace = true
|
||||
async-stream.workspace = true
|
||||
base64.workspace = true
|
||||
bit_field.workspace = true
|
||||
bincode.workspace = true
|
||||
byteorder.workspace = true
|
||||
@@ -82,6 +83,7 @@ postgres_connection.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
pq_proto.workspace = true
|
||||
remote_storage.workspace = true
|
||||
remote_keys.workspace = true
|
||||
storage_broker.workspace = true
|
||||
tenant_size_model.workspace = true
|
||||
http-utils.workspace = true
|
||||
|
||||
@@ -45,6 +45,7 @@ fn bench_upload_queue_next_ready(c: &mut Criterion) {
|
||||
shard: ShardIndex::new(ShardNumber(1), ShardCount(2)),
|
||||
generation: Generation::Valid(1),
|
||||
file_size: 0,
|
||||
encryption_key: None,
|
||||
};
|
||||
|
||||
// Construct the (initial and uploaded) index with layer0.
|
||||
|
||||
@@ -118,13 +118,13 @@ pub struct PageServerConf {
|
||||
/// A lower value implicitly deprioritizes loading such tenants, vs. other work in the system.
|
||||
pub concurrent_tenant_warmup: ConfigurableSemaphore,
|
||||
|
||||
/// Number of concurrent [`Tenant::gather_size_inputs`](crate::tenant::Tenant::gather_size_inputs) allowed.
|
||||
/// Number of concurrent [`TenantShard::gather_size_inputs`](crate::tenant::TenantShard::gather_size_inputs) allowed.
|
||||
pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
|
||||
/// Limit of concurrent [`Tenant::gather_size_inputs`] issued by module `eviction_task`.
|
||||
/// Limit of concurrent [`TenantShard::gather_size_inputs`] issued by module `eviction_task`.
|
||||
/// The number of permits is the same as `concurrent_tenant_size_logical_size_queries`.
|
||||
/// See the comment in `eviction_task` for details.
|
||||
///
|
||||
/// [`Tenant::gather_size_inputs`]: crate::tenant::Tenant::gather_size_inputs
|
||||
/// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
|
||||
pub eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore,
|
||||
|
||||
// How often to collect metrics and send them to the metrics endpoint.
|
||||
@@ -588,10 +588,10 @@ impl ConfigurableSemaphore {
|
||||
/// Initializse using a non-zero amount of permits.
|
||||
///
|
||||
/// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a
|
||||
/// feature such as [`Tenant::gather_size_inputs`]. Otherwise any semaphore using future will
|
||||
/// feature such as [`TenantShard::gather_size_inputs`]. Otherwise any semaphore using future will
|
||||
/// behave like [`futures::future::pending`], just waiting until new permits are added.
|
||||
///
|
||||
/// [`Tenant::gather_size_inputs`]: crate::tenant::Tenant::gather_size_inputs
|
||||
/// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
|
||||
pub fn new(initial_permits: NonZeroUsize) -> Self {
|
||||
ConfigurableSemaphore {
|
||||
initial_permits,
|
||||
|
||||
@@ -24,7 +24,7 @@ use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};
|
||||
use crate::tenant::mgr::TenantManager;
|
||||
use crate::tenant::size::CalculateSyntheticSizeError;
|
||||
use crate::tenant::tasks::BackgroundLoopKind;
|
||||
use crate::tenant::{LogicalSizeCalculationCause, Tenant};
|
||||
use crate::tenant::{LogicalSizeCalculationCause, TenantShard};
|
||||
|
||||
mod disk_cache;
|
||||
mod metrics;
|
||||
@@ -428,7 +428,7 @@ async fn calculate_synthetic_size_worker(
|
||||
}
|
||||
}
|
||||
|
||||
async fn calculate_and_log(tenant: &Tenant, cancel: &CancellationToken, ctx: &RequestContext) {
|
||||
async fn calculate_and_log(tenant: &TenantShard, cancel: &CancellationToken, ctx: &RequestContext) {
|
||||
const CAUSE: LogicalSizeCalculationCause =
|
||||
LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;
|
||||
|
||||
|
||||
@@ -175,9 +175,9 @@ impl MetricsKey {
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// [`Tenant::remote_size`]
|
||||
/// [`TenantShard::remote_size`]
|
||||
///
|
||||
/// [`Tenant::remote_size`]: crate::tenant::Tenant::remote_size
|
||||
/// [`TenantShard::remote_size`]: crate::tenant::TenantShard::remote_size
|
||||
const fn remote_storage_size(tenant_id: TenantId) -> AbsoluteValueFactory {
|
||||
MetricsKey {
|
||||
tenant_id,
|
||||
@@ -199,9 +199,9 @@ impl MetricsKey {
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// [`Tenant::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
|
||||
/// [`TenantShard::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
|
||||
///
|
||||
/// [`Tenant::cached_synthetic_size`]: crate::tenant::Tenant::cached_synthetic_size
|
||||
/// [`TenantShard::cached_synthetic_size`]: crate::tenant::TenantShard::cached_synthetic_size
|
||||
/// [`calculate_synthetic_size_worker`]: super::calculate_synthetic_size_worker
|
||||
const fn synthetic_size(tenant_id: TenantId) -> AbsoluteValueFactory {
|
||||
MetricsKey {
|
||||
@@ -254,7 +254,7 @@ pub(super) async fn collect_all_metrics(
|
||||
|
||||
async fn collect<S>(tenants: S, cache: &Cache, ctx: &RequestContext) -> Vec<NewRawMetric>
|
||||
where
|
||||
S: futures::stream::Stream<Item = (TenantId, Arc<crate::tenant::Tenant>)>,
|
||||
S: futures::stream::Stream<Item = (TenantId, Arc<crate::tenant::TenantShard>)>,
|
||||
{
|
||||
let mut current_metrics: Vec<NewRawMetric> = Vec::new();
|
||||
|
||||
@@ -308,7 +308,7 @@ impl TenantSnapshot {
|
||||
///
|
||||
/// `resident_size` is calculated of the timelines we had access to for other metrics, so we
|
||||
/// cannot just list timelines here.
|
||||
fn collect(t: &Arc<crate::tenant::Tenant>, resident_size: u64) -> Self {
|
||||
fn collect(t: &Arc<crate::tenant::TenantShard>, resident_size: u64) -> Self {
|
||||
TenantSnapshot {
|
||||
resident_size,
|
||||
remote_size: t.remote_size(),
|
||||
|
||||
@@ -1873,7 +1873,7 @@ async fn update_tenant_config_handler(
|
||||
&ShardParameters::default(),
|
||||
);
|
||||
|
||||
crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
||||
|
||||
@@ -1917,7 +1917,7 @@ async fn patch_tenant_config_handler(
|
||||
&ShardParameters::default(),
|
||||
);
|
||||
|
||||
crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ use tracing::{info, info_span};
|
||||
/// backwards-compatible changes to the metadata format.
|
||||
pub const STORAGE_FORMAT_VERSION: u16 = 3;
|
||||
|
||||
pub const DEFAULT_PG_VERSION: u32 = 16;
|
||||
pub const DEFAULT_PG_VERSION: u32 = 17;
|
||||
|
||||
// Magic constants used to identify different kinds of files
|
||||
pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
|
||||
|
||||
@@ -1086,7 +1086,7 @@ pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
|
||||
.expect("Failed to register metric")
|
||||
});
|
||||
|
||||
/// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
|
||||
/// Metrics related to the lifecycle of a [`crate::tenant::TenantShard`] object: things
|
||||
/// like how long it took to load.
|
||||
///
|
||||
/// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
|
||||
|
||||
@@ -76,7 +76,7 @@ use crate::tenant::timeline::{self, WaitLsnError};
|
||||
use crate::tenant::{GetTimelineError, PageReconstructError, Timeline};
|
||||
use crate::{basebackup, timed_after_cancellation};
|
||||
|
||||
/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::Tenant`] which
|
||||
/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::TenantShard`] which
|
||||
/// is not yet in state [`TenantState::Active`].
|
||||
///
|
||||
/// NB: this is a different value than [`crate::http::routes::ACTIVE_TENANT_TIMEOUT`].
|
||||
|
||||
@@ -158,7 +158,7 @@ pub struct TenantSharedResources {
|
||||
pub l0_flush_global_state: L0FlushGlobalState,
|
||||
}
|
||||
|
||||
/// A [`Tenant`] is really an _attached_ tenant. The configuration
|
||||
/// A [`TenantShard`] is really an _attached_ tenant. The configuration
|
||||
/// for an attached tenant is a subset of the [`LocationConf`], represented
|
||||
/// in this struct.
|
||||
#[derive(Clone)]
|
||||
@@ -245,7 +245,7 @@ pub(crate) enum SpawnMode {
|
||||
///
|
||||
/// Tenant consists of multiple timelines. Keep them in a hash table.
|
||||
///
|
||||
pub struct Tenant {
|
||||
pub struct TenantShard {
|
||||
// Global pageserver config parameters
|
||||
pub conf: &'static PageServerConf,
|
||||
|
||||
@@ -267,7 +267,7 @@ pub struct Tenant {
|
||||
shard_identity: ShardIdentity,
|
||||
|
||||
/// The remote storage generation, used to protect S3 objects from split-brain.
|
||||
/// Does not change over the lifetime of the [`Tenant`] object.
|
||||
/// Does not change over the lifetime of the [`TenantShard`] object.
|
||||
///
|
||||
/// This duplicates the generation stored in LocationConf, but that structure is mutable:
|
||||
/// this copy enforces the invariant that generatio doesn't change during a Tenant's lifetime.
|
||||
@@ -309,7 +309,7 @@ pub struct Tenant {
|
||||
// Access to global deletion queue for when this tenant wants to schedule a deletion
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
|
||||
/// Cached logical sizes updated updated on each [`Tenant::gather_size_inputs`].
|
||||
/// Cached logical sizes updated updated on each [`TenantShard::gather_size_inputs`].
|
||||
cached_logical_sizes: tokio::sync::Mutex<HashMap<(TimelineId, Lsn), u64>>,
|
||||
cached_synthetic_tenant_size: Arc<AtomicU64>,
|
||||
|
||||
@@ -337,12 +337,12 @@ pub struct Tenant {
|
||||
// Timelines' cancellation token.
|
||||
pub(crate) cancel: CancellationToken,
|
||||
|
||||
// Users of the Tenant such as the page service must take this Gate to avoid
|
||||
// trying to use a Tenant which is shutting down.
|
||||
// Users of the TenantShard such as the page service must take this Gate to avoid
|
||||
// trying to use a TenantShard which is shutting down.
|
||||
pub(crate) gate: Gate,
|
||||
|
||||
/// Throttle applied at the top of [`Timeline::get`].
|
||||
/// All [`Tenant::timelines`] of a given [`Tenant`] instance share the same [`throttle::Throttle`] instance.
|
||||
/// All [`TenantShard::timelines`] of a given [`TenantShard`] instance share the same [`throttle::Throttle`] instance.
|
||||
pub(crate) pagestream_throttle: Arc<throttle::Throttle>,
|
||||
|
||||
pub(crate) pagestream_throttle_metrics: Arc<crate::metrics::tenant_throttling::Pagestream>,
|
||||
@@ -362,7 +362,7 @@ pub struct Tenant {
|
||||
|
||||
l0_flush_global_state: L0FlushGlobalState,
|
||||
}
|
||||
impl std::fmt::Debug for Tenant {
|
||||
impl std::fmt::Debug for TenantShard {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{} ({})", self.tenant_shard_id, self.current_state())
|
||||
}
|
||||
@@ -841,7 +841,7 @@ impl Debug for SetStoppingError {
|
||||
}
|
||||
}
|
||||
|
||||
/// Arguments to [`Tenant::create_timeline`].
|
||||
/// Arguments to [`TenantShard::create_timeline`].
|
||||
///
|
||||
/// Not usable as an idempotency key for timeline creation because if [`CreateTimelineParamsBranch::ancestor_start_lsn`]
|
||||
/// is `None`, the result of the timeline create call is not deterministic.
|
||||
@@ -876,7 +876,7 @@ pub(crate) struct CreateTimelineParamsImportPgdata {
|
||||
pub(crate) idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
|
||||
}
|
||||
|
||||
/// What is used to determine idempotency of a [`Tenant::create_timeline`] call in [`Tenant::start_creating_timeline`] in [`Tenant::start_creating_timeline`].
|
||||
/// What is used to determine idempotency of a [`TenantShard::create_timeline`] call in [`TenantShard::start_creating_timeline`] in [`TenantShard::start_creating_timeline`].
|
||||
///
|
||||
/// Each [`Timeline`] object holds [`Self`] as an immutable property in [`Timeline::create_idempotency`].
|
||||
///
|
||||
@@ -914,7 +914,7 @@ pub(crate) struct CreatingTimelineIdempotencyImportPgdata {
|
||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
|
||||
}
|
||||
|
||||
/// What is returned by [`Tenant::start_creating_timeline`].
|
||||
/// What is returned by [`TenantShard::start_creating_timeline`].
|
||||
#[must_use]
|
||||
enum StartCreatingTimelineResult {
|
||||
CreateGuard(TimelineCreateGuard),
|
||||
@@ -943,13 +943,13 @@ struct TimelineInitAndSyncNeedsSpawnImportPgdata {
|
||||
guard: TimelineCreateGuard,
|
||||
}
|
||||
|
||||
/// What is returned by [`Tenant::create_timeline`].
|
||||
/// What is returned by [`TenantShard::create_timeline`].
|
||||
enum CreateTimelineResult {
|
||||
Created(Arc<Timeline>),
|
||||
Idempotent(Arc<Timeline>),
|
||||
/// IMPORTANT: This [`Arc<Timeline>`] object is not in [`Tenant::timelines`] when
|
||||
/// IMPORTANT: This [`Arc<Timeline>`] object is not in [`TenantShard::timelines`] when
|
||||
/// we return this result, nor will this concrete object ever be added there.
|
||||
/// Cf method comment on [`Tenant::create_timeline_import_pgdata`].
|
||||
/// Cf method comment on [`TenantShard::create_timeline_import_pgdata`].
|
||||
ImportSpawned(Arc<Timeline>),
|
||||
}
|
||||
|
||||
@@ -1082,7 +1082,7 @@ pub(crate) enum LoadConfigError {
|
||||
NotFound(Utf8PathBuf),
|
||||
}
|
||||
|
||||
impl Tenant {
|
||||
impl TenantShard {
|
||||
/// Yet another helper for timeline initialization.
|
||||
///
|
||||
/// - Initializes the Timeline struct and inserts it into the tenant's hash map
|
||||
@@ -1303,7 +1303,7 @@ impl Tenant {
|
||||
init_order: Option<InitializationOrder>,
|
||||
mode: SpawnMode,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Arc<Tenant>, GlobalShutDown> {
|
||||
) -> Result<Arc<TenantShard>, GlobalShutDown> {
|
||||
let wal_redo_manager =
|
||||
WalRedoManager::new(PostgresRedoManager::new(conf, tenant_shard_id))?;
|
||||
|
||||
@@ -1317,7 +1317,7 @@ impl Tenant {
|
||||
let attach_mode = attached_conf.location.attach_mode;
|
||||
let generation = attached_conf.location.generation;
|
||||
|
||||
let tenant = Arc::new(Tenant::new(
|
||||
let tenant = Arc::new(TenantShard::new(
|
||||
TenantState::Attaching,
|
||||
conf,
|
||||
attached_conf,
|
||||
@@ -1334,7 +1334,7 @@ impl Tenant {
|
||||
let attach_gate_guard = tenant
|
||||
.gate
|
||||
.enter()
|
||||
.expect("We just created the Tenant: nothing else can have shut it down yet");
|
||||
.expect("We just created the TenantShard: nothing else can have shut it down yet");
|
||||
|
||||
// Do all the hard work in the background
|
||||
let tenant_clone = Arc::clone(&tenant);
|
||||
@@ -1362,7 +1362,7 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
|
||||
fn make_broken_or_stopping(t: &Tenant, err: anyhow::Error) {
|
||||
fn make_broken_or_stopping(t: &TenantShard, err: anyhow::Error) {
|
||||
t.state.send_modify(|state| match state {
|
||||
// TODO: the old code alluded to DeleteTenantFlow sometimes setting
|
||||
// TenantState::Stopping before we get here, but this may be outdated.
|
||||
@@ -1627,7 +1627,7 @@ impl Tenant {
|
||||
/// No background tasks are started as part of this routine.
|
||||
///
|
||||
async fn attach(
|
||||
self: &Arc<Tenant>,
|
||||
self: &Arc<TenantShard>,
|
||||
preload: Option<TenantPreload>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -1957,7 +1957,7 @@ impl Tenant {
|
||||
}
|
||||
|
||||
async fn load_timelines_metadata(
|
||||
self: &Arc<Tenant>,
|
||||
self: &Arc<TenantShard>,
|
||||
timeline_ids: HashSet<TimelineId>,
|
||||
remote_storage: &GenericRemoteStorage,
|
||||
heatmap: Option<(HeatMapTenant, std::time::Instant)>,
|
||||
@@ -2028,7 +2028,7 @@ impl Tenant {
|
||||
}
|
||||
|
||||
fn load_timeline_metadata(
|
||||
self: &Arc<Tenant>,
|
||||
self: &Arc<TenantShard>,
|
||||
timeline_id: TimelineId,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
previous_heatmap: Option<PreviousHeatmap>,
|
||||
@@ -2429,14 +2429,14 @@ impl Tenant {
|
||||
/// This is used by tests & import-from-basebackup.
|
||||
///
|
||||
/// The returned [`UninitializedTimeline`] contains no data nor metadata and it is in
|
||||
/// a state that will fail [`Tenant::load_remote_timeline`] because `disk_consistent_lsn=Lsn(0)`.
|
||||
/// a state that will fail [`TenantShard::load_remote_timeline`] because `disk_consistent_lsn=Lsn(0)`.
|
||||
///
|
||||
/// The caller is responsible for getting the timeline into a state that will be accepted
|
||||
/// by [`Tenant::load_remote_timeline`] / [`Tenant::attach`].
|
||||
/// by [`TenantShard::load_remote_timeline`] / [`TenantShard::attach`].
|
||||
/// Then they may call [`UninitializedTimeline::finish_creation`] to add the timeline
|
||||
/// to the [`Tenant::timelines`].
|
||||
/// to the [`TenantShard::timelines`].
|
||||
///
|
||||
/// Tests should use `Tenant::create_test_timeline` to set up the minimum required metadata keys.
|
||||
/// Tests should use `TenantShard::create_test_timeline` to set up the minimum required metadata keys.
|
||||
pub(crate) async fn create_empty_timeline(
|
||||
self: &Arc<Self>,
|
||||
new_timeline_id: TimelineId,
|
||||
@@ -2584,7 +2584,7 @@ impl Tenant {
|
||||
/// the same timeline ID already exists, returns CreateTimelineError::AlreadyExists.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn create_timeline(
|
||||
self: &Arc<Tenant>,
|
||||
self: &Arc<TenantShard>,
|
||||
params: CreateTimelineParams,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
ctx: &RequestContext,
|
||||
@@ -2751,13 +2751,13 @@ impl Tenant {
|
||||
Ok(activated_timeline)
|
||||
}
|
||||
|
||||
/// The returned [`Arc<Timeline>`] is NOT in the [`Tenant::timelines`] map until the import
|
||||
/// The returned [`Arc<Timeline>`] is NOT in the [`TenantShard::timelines`] map until the import
|
||||
/// completes in the background. A DIFFERENT [`Arc<Timeline>`] will be inserted into the
|
||||
/// [`Tenant::timelines`] map when the import completes.
|
||||
/// [`TenantShard::timelines`] map when the import completes.
|
||||
/// We only return an [`Arc<Timeline>`] here so the API handler can create a [`pageserver_api::models::TimelineInfo`]
|
||||
/// for the response.
|
||||
async fn create_timeline_import_pgdata(
|
||||
self: &Arc<Tenant>,
|
||||
self: &Arc<Self>,
|
||||
params: CreateTimelineParamsImportPgdata,
|
||||
activate: ActivateTimelineArgs,
|
||||
ctx: &RequestContext,
|
||||
@@ -2854,7 +2854,7 @@ impl Tenant {
|
||||
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline.timeline_id))]
|
||||
async fn create_timeline_import_pgdata_task(
|
||||
self: Arc<Tenant>,
|
||||
self: Arc<TenantShard>,
|
||||
timeline: Arc<Timeline>,
|
||||
index_part: import_pgdata::index_part_format::Root,
|
||||
activate: ActivateTimelineArgs,
|
||||
@@ -2882,7 +2882,7 @@ impl Tenant {
|
||||
}
|
||||
|
||||
async fn create_timeline_import_pgdata_task_impl(
|
||||
self: Arc<Tenant>,
|
||||
self: Arc<TenantShard>,
|
||||
timeline: Arc<Timeline>,
|
||||
index_part: import_pgdata::index_part_format::Root,
|
||||
activate: ActivateTimelineArgs,
|
||||
@@ -2899,10 +2899,10 @@ impl Tenant {
|
||||
// Reload timeline from remote.
|
||||
// This proves that the remote state is attachable, and it reuses the code.
|
||||
//
|
||||
// TODO: think about whether this is safe to do with concurrent Tenant::shutdown.
|
||||
// TODO: think about whether this is safe to do with concurrent TenantShard::shutdown.
|
||||
// timeline_create_guard hols the tenant gate open, so, shutdown cannot _complete_ until we exit.
|
||||
// But our activate() call might launch new background tasks after Tenant::shutdown
|
||||
// already went past shutting down the Tenant::timelines, which this timeline here is no part of.
|
||||
// But our activate() call might launch new background tasks after TenantShard::shutdown
|
||||
// already went past shutting down the TenantShard::timelines, which this timeline here is no part of.
|
||||
// I think the same problem exists with the bootstrap & branch mgmt API tasks (tenant shutting
|
||||
// down while bootstrapping/branching + activating), but, the race condition is much more likely
|
||||
// to manifest because of the long runtime of this import task.
|
||||
@@ -2917,7 +2917,7 @@ impl Tenant {
|
||||
// };
|
||||
let timeline_id = timeline.timeline_id;
|
||||
|
||||
// load from object storage like Tenant::attach does
|
||||
// load from object storage like TenantShard::attach does
|
||||
let resources = self.build_timeline_resources(timeline_id);
|
||||
let index_part = resources
|
||||
.remote_client
|
||||
@@ -3938,7 +3938,7 @@ enum ActivateTimelineArgs {
|
||||
No,
|
||||
}
|
||||
|
||||
impl Tenant {
|
||||
impl TenantShard {
|
||||
pub fn tenant_specific_overrides(&self) -> pageserver_api::models::TenantConfig {
|
||||
self.tenant_conf.load().tenant_conf.clone()
|
||||
}
|
||||
@@ -4096,7 +4096,7 @@ impl Tenant {
|
||||
update: F,
|
||||
) -> anyhow::Result<pageserver_api::models::TenantConfig> {
|
||||
// Use read-copy-update in order to avoid overwriting the location config
|
||||
// state if this races with [`Tenant::set_new_location_config`]. Note that
|
||||
// state if this races with [`TenantShard::set_new_location_config`]. Note that
|
||||
// this race is not possible if both request types come from the storage
|
||||
// controller (as they should!) because an exclusive op lock is required
|
||||
// on the storage controller side.
|
||||
@@ -4219,7 +4219,7 @@ impl Tenant {
|
||||
Ok((timeline, timeline_ctx))
|
||||
}
|
||||
|
||||
/// [`Tenant::shutdown`] must be called before dropping the returned [`Tenant`] object
|
||||
/// [`TenantShard::shutdown`] must be called before dropping the returned [`TenantShard`] object
|
||||
/// to ensure proper cleanup of background tasks and metrics.
|
||||
//
|
||||
// Allow too_many_arguments because a constructor's argument list naturally grows with the
|
||||
@@ -4235,7 +4235,7 @@ impl Tenant {
|
||||
remote_storage: GenericRemoteStorage,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
l0_flush_global_state: L0FlushGlobalState,
|
||||
) -> Tenant {
|
||||
) -> TenantShard {
|
||||
debug_assert!(
|
||||
!attached_conf.location.generation.is_none() || conf.control_plane_api.is_none()
|
||||
);
|
||||
@@ -4295,7 +4295,7 @@ impl Tenant {
|
||||
}
|
||||
});
|
||||
|
||||
Tenant {
|
||||
TenantShard {
|
||||
tenant_shard_id,
|
||||
shard_identity,
|
||||
generation: attached_conf.location.generation,
|
||||
@@ -4330,7 +4330,7 @@ impl Tenant {
|
||||
cancel: CancellationToken::default(),
|
||||
gate: Gate::default(),
|
||||
pagestream_throttle: Arc::new(throttle::Throttle::new(
|
||||
Tenant::get_pagestream_throttle_config(conf, &attached_conf.tenant_conf),
|
||||
TenantShard::get_pagestream_throttle_config(conf, &attached_conf.tenant_conf),
|
||||
)),
|
||||
pagestream_throttle_metrics: Arc::new(
|
||||
crate::metrics::tenant_throttling::Pagestream::new(&tenant_shard_id),
|
||||
@@ -4466,11 +4466,11 @@ impl Tenant {
|
||||
|
||||
// Perform GC for each timeline.
|
||||
//
|
||||
// Note that we don't hold the `Tenant::gc_cs` lock here because we don't want to delay the
|
||||
// Note that we don't hold the `TenantShard::gc_cs` lock here because we don't want to delay the
|
||||
// branch creation task, which requires the GC lock. A GC iteration can run concurrently
|
||||
// with branch creation.
|
||||
//
|
||||
// See comments in [`Tenant::branch_timeline`] for more information about why branch
|
||||
// See comments in [`TenantShard::branch_timeline`] for more information about why branch
|
||||
// creation task can run concurrently with timeline's GC iteration.
|
||||
for timeline in gc_timelines {
|
||||
if cancel.is_cancelled() {
|
||||
@@ -4500,7 +4500,7 @@ impl Tenant {
|
||||
|
||||
/// Refreshes the Timeline::gc_info for all timelines, returning the
|
||||
/// vector of timelines which have [`Timeline::get_last_record_lsn`] past
|
||||
/// [`Tenant::get_gc_horizon`].
|
||||
/// [`TenantShard::get_gc_horizon`].
|
||||
///
|
||||
/// This is usually executed as part of periodic gc, but can now be triggered more often.
|
||||
pub(crate) async fn refresh_gc_info(
|
||||
@@ -5499,7 +5499,7 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
|
||||
// The flushes we did above were just writes, but the Tenant might have had
|
||||
// The flushes we did above were just writes, but the TenantShard might have had
|
||||
// pending deletions as well from recent compaction/gc: we want to flush those
|
||||
// as well. This requires flushing the global delete queue. This is cheap
|
||||
// because it's typically a no-op.
|
||||
@@ -5517,7 +5517,7 @@ impl Tenant {
|
||||
|
||||
/// How much local storage would this tenant like to have? It can cope with
|
||||
/// less than this (via eviction and on-demand downloads), but this function enables
|
||||
/// the Tenant to advertise how much storage it would prefer to have to provide fast I/O
|
||||
/// the TenantShard to advertise how much storage it would prefer to have to provide fast I/O
|
||||
/// by keeping important things on local disk.
|
||||
///
|
||||
/// This is a heuristic, not a guarantee: tenants that are long-idle will actually use less
|
||||
@@ -5540,11 +5540,11 @@ impl Tenant {
|
||||
/// manifest in `Self::remote_tenant_manifest`.
|
||||
///
|
||||
/// TODO: instead of requiring callers to remember to call `maybe_upload_tenant_manifest` after
|
||||
/// changing any `Tenant` state that's included in the manifest, consider making the manifest
|
||||
/// changing any `TenantShard` state that's included in the manifest, consider making the manifest
|
||||
/// the authoritative source of data with an API that automatically uploads on changes. Revisit
|
||||
/// this when the manifest is more widely used and we have a better idea of the data model.
|
||||
pub(crate) async fn maybe_upload_tenant_manifest(&self) -> Result<(), TenantManifestError> {
|
||||
// Multiple tasks may call this function concurrently after mutating the Tenant runtime
|
||||
// Multiple tasks may call this function concurrently after mutating the TenantShard runtime
|
||||
// state, affecting the manifest generated by `build_tenant_manifest`. We use an async mutex
|
||||
// to serialize these callers. `eq_ignoring_version` acts as a slightly inefficient but
|
||||
// simple coalescing mechanism.
|
||||
@@ -5812,7 +5812,7 @@ pub(crate) mod harness {
|
||||
info_span!("TenantHarness", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug())
|
||||
}
|
||||
|
||||
pub(crate) async fn load(&self) -> (Arc<Tenant>, RequestContext) {
|
||||
pub(crate) async fn load(&self) -> (Arc<TenantShard>, RequestContext) {
|
||||
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error)
|
||||
.with_scope_unit_test();
|
||||
(
|
||||
@@ -5827,10 +5827,10 @@ pub(crate) mod harness {
|
||||
pub(crate) async fn do_try_load(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Arc<Tenant>> {
|
||||
) -> anyhow::Result<Arc<TenantShard>> {
|
||||
let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager));
|
||||
|
||||
let tenant = Arc::new(Tenant::new(
|
||||
let tenant = Arc::new(TenantShard::new(
|
||||
TenantState::Attaching,
|
||||
self.conf,
|
||||
AttachedTenantConf::try_from(LocationConf::attached_single(
|
||||
@@ -6046,7 +6046,7 @@ mod tests {
|
||||
#[cfg(feature = "testing")]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn randomize_timeline(
|
||||
tenant: &Arc<Tenant>,
|
||||
tenant: &Arc<TenantShard>,
|
||||
new_timeline_id: TimelineId,
|
||||
pg_version: u32,
|
||||
spec: TestTimelineSpecification,
|
||||
@@ -6936,7 +6936,7 @@ mod tests {
|
||||
}
|
||||
|
||||
async fn bulk_insert_compact_gc(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
timeline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
lsn: Lsn,
|
||||
@@ -6948,7 +6948,7 @@ mod tests {
|
||||
}
|
||||
|
||||
async fn bulk_insert_maybe_compact_gc(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
timeline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
mut lsn: Lsn,
|
||||
@@ -7858,7 +7858,7 @@ mod tests {
|
||||
let (tline, _ctx) = tenant
|
||||
.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)
|
||||
.await?;
|
||||
// Leave the timeline ID in [`Tenant::timelines_creating`] to exclude attempting to create it again
|
||||
// Leave the timeline ID in [`TenantShard::timelines_creating`] to exclude attempting to create it again
|
||||
let raw_tline = tline.raw_timeline().unwrap();
|
||||
raw_tline
|
||||
.shutdown(super::timeline::ShutdownMode::Hard)
|
||||
|
||||
@@ -564,8 +564,9 @@ mod tests {
|
||||
Lsn(0),
|
||||
Lsn(0),
|
||||
Lsn(0),
|
||||
// Any version will do here, so use the default
|
||||
crate::DEFAULT_PG_VERSION,
|
||||
// Updating this version to 17 will cause the test to fail at the
|
||||
// next assert_eq!().
|
||||
16,
|
||||
);
|
||||
let expected_bytes = vec![
|
||||
/* TimelineMetadataHeader */
|
||||
|
||||
@@ -52,7 +52,9 @@ use crate::tenant::config::{
|
||||
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
|
||||
use crate::tenant::storage_layer::inmemory_layer;
|
||||
use crate::tenant::timeline::ShutdownMode;
|
||||
use crate::tenant::{AttachedTenantConf, GcError, LoadConfigError, SpawnMode, Tenant, TenantState};
|
||||
use crate::tenant::{
|
||||
AttachedTenantConf, GcError, LoadConfigError, SpawnMode, TenantShard, TenantState,
|
||||
};
|
||||
use crate::virtual_file::MaybeFatalIo;
|
||||
use crate::{InitializationOrder, TEMP_FILE_SUFFIX};
|
||||
|
||||
@@ -67,7 +69,7 @@ use crate::{InitializationOrder, TEMP_FILE_SUFFIX};
|
||||
/// having a properly acquired generation (Secondary doesn't need a generation)
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum TenantSlot {
|
||||
Attached(Arc<Tenant>),
|
||||
Attached(Arc<TenantShard>),
|
||||
Secondary(Arc<SecondaryTenant>),
|
||||
/// In this state, other administrative operations acting on the TenantId should
|
||||
/// block, or return a retry indicator equivalent to HTTP 503.
|
||||
@@ -86,7 +88,7 @@ impl std::fmt::Debug for TenantSlot {
|
||||
|
||||
impl TenantSlot {
|
||||
/// Return the `Tenant` in this slot if attached, else None
|
||||
fn get_attached(&self) -> Option<&Arc<Tenant>> {
|
||||
fn get_attached(&self) -> Option<&Arc<TenantShard>> {
|
||||
match self {
|
||||
Self::Attached(t) => Some(t),
|
||||
Self::Secondary(_) => None,
|
||||
@@ -164,7 +166,7 @@ impl TenantStartupMode {
|
||||
/// Result type for looking up a TenantId to a specific shard
|
||||
pub(crate) enum ShardResolveResult {
|
||||
NotFound,
|
||||
Found(Arc<Tenant>),
|
||||
Found(Arc<TenantShard>),
|
||||
// Wait for this barrrier, then query again
|
||||
InProgress(utils::completion::Barrier),
|
||||
}
|
||||
@@ -173,7 +175,7 @@ impl TenantsMap {
|
||||
/// Convenience function for typical usage, where we want to get a `Tenant` object, for
|
||||
/// working with attached tenants. If the TenantId is in the map but in Secondary state,
|
||||
/// None is returned.
|
||||
pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<Tenant>> {
|
||||
pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<TenantShard>> {
|
||||
match self {
|
||||
TenantsMap::Initializing => None,
|
||||
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
|
||||
@@ -410,7 +412,7 @@ fn load_tenant_config(
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Tenant::load_tenant_config(conf, &tenant_shard_id))
|
||||
Some(TenantShard::load_tenant_config(conf, &tenant_shard_id))
|
||||
}
|
||||
|
||||
/// Initial stage of load: walk the local tenants directory, clean up any temp files,
|
||||
@@ -606,7 +608,8 @@ pub async fn init_tenant_mgr(
|
||||
// Presence of a generation number implies attachment: attach the tenant
|
||||
// if it wasn't already, and apply the generation number.
|
||||
config_write_futs.push(async move {
|
||||
let r = Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await;
|
||||
let r =
|
||||
TenantShard::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await;
|
||||
(tenant_shard_id, location_conf, r)
|
||||
});
|
||||
}
|
||||
@@ -694,7 +697,7 @@ fn tenant_spawn(
|
||||
init_order: Option<InitializationOrder>,
|
||||
mode: SpawnMode,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Arc<Tenant>, GlobalShutDown> {
|
||||
) -> Result<Arc<TenantShard>, GlobalShutDown> {
|
||||
// All these conditions should have been satisfied by our caller: the tenant dir exists, is a well formed
|
||||
// path, and contains a configuration file. Assertions that do synchronous I/O are limited to debug mode
|
||||
// to avoid impacting prod runtime performance.
|
||||
@@ -706,7 +709,7 @@ fn tenant_spawn(
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
Tenant::spawn(
|
||||
TenantShard::spawn(
|
||||
conf,
|
||||
tenant_shard_id,
|
||||
resources,
|
||||
@@ -883,12 +886,12 @@ impl TenantManager {
|
||||
/// Gets the attached tenant from the in-memory data, erroring if it's absent, in secondary mode, or currently
|
||||
/// undergoing a state change (i.e. slot is InProgress).
|
||||
///
|
||||
/// The return Tenant is not guaranteed to be active: check its status after obtaing it, or
|
||||
/// use [`Tenant::wait_to_become_active`] before using it if you will do I/O on it.
|
||||
/// The return TenantShard is not guaranteed to be active: check its status after obtaing it, or
|
||||
/// use [`TenantShard::wait_to_become_active`] before using it if you will do I/O on it.
|
||||
pub(crate) fn get_attached_tenant_shard(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
) -> Result<Arc<Tenant>, GetTenantError> {
|
||||
) -> Result<Arc<TenantShard>, GetTenantError> {
|
||||
let locked = self.tenants.read().unwrap();
|
||||
|
||||
let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;
|
||||
@@ -937,12 +940,12 @@ impl TenantManager {
|
||||
flush: Option<Duration>,
|
||||
mut spawn_mode: SpawnMode,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Option<Arc<Tenant>>, UpsertLocationError> {
|
||||
) -> Result<Option<Arc<TenantShard>>, UpsertLocationError> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
info!("configuring tenant location to state {new_location_config:?}");
|
||||
|
||||
enum FastPathModified {
|
||||
Attached(Arc<Tenant>),
|
||||
Attached(Arc<TenantShard>),
|
||||
Secondary(Arc<SecondaryTenant>),
|
||||
}
|
||||
|
||||
@@ -999,9 +1002,13 @@ impl TenantManager {
|
||||
// phase of writing config and/or waiting for flush, before returning.
|
||||
match fast_path_taken {
|
||||
Some(FastPathModified::Attached(tenant)) => {
|
||||
Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
||||
.await
|
||||
.fatal_err("write tenant shard config");
|
||||
TenantShard::persist_tenant_config(
|
||||
self.conf,
|
||||
&tenant_shard_id,
|
||||
&new_location_config,
|
||||
)
|
||||
.await
|
||||
.fatal_err("write tenant shard config");
|
||||
|
||||
// Transition to AttachedStale means we may well hold a valid generation
|
||||
// still, and have been requested to go stale as part of a migration. If
|
||||
@@ -1030,9 +1037,13 @@ impl TenantManager {
|
||||
return Ok(Some(tenant));
|
||||
}
|
||||
Some(FastPathModified::Secondary(_secondary_tenant)) => {
|
||||
Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
||||
.await
|
||||
.fatal_err("write tenant shard config");
|
||||
TenantShard::persist_tenant_config(
|
||||
self.conf,
|
||||
&tenant_shard_id,
|
||||
&new_location_config,
|
||||
)
|
||||
.await
|
||||
.fatal_err("write tenant shard config");
|
||||
|
||||
return Ok(None);
|
||||
}
|
||||
@@ -1122,7 +1133,7 @@ impl TenantManager {
|
||||
// Before activating either secondary or attached mode, persist the
|
||||
// configuration, so that on restart we will re-attach (or re-start
|
||||
// secondary) on the tenant.
|
||||
Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
||||
TenantShard::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
||||
.await
|
||||
.fatal_err("write tenant shard config");
|
||||
|
||||
@@ -1262,7 +1273,7 @@ impl TenantManager {
|
||||
|
||||
let tenant_path = self.conf.tenant_path(&tenant_shard_id);
|
||||
let timelines_path = self.conf.timelines_path(&tenant_shard_id);
|
||||
let config = Tenant::load_tenant_config(self.conf, &tenant_shard_id)?;
|
||||
let config = TenantShard::load_tenant_config(self.conf, &tenant_shard_id)?;
|
||||
|
||||
if drop_cache {
|
||||
tracing::info!("Dropping local file cache");
|
||||
@@ -1297,7 +1308,7 @@ impl TenantManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_attached_active_tenant_shards(&self) -> Vec<Arc<Tenant>> {
|
||||
pub(crate) fn get_attached_active_tenant_shards(&self) -> Vec<Arc<TenantShard>> {
|
||||
let locked = self.tenants.read().unwrap();
|
||||
match &*locked {
|
||||
TenantsMap::Initializing => Vec::new(),
|
||||
@@ -1446,7 +1457,7 @@ impl TenantManager {
|
||||
#[instrument(skip_all, fields(tenant_id=%tenant.get_tenant_shard_id().tenant_id, shard_id=%tenant.get_tenant_shard_id().shard_slug(), new_shard_count=%new_shard_count.literal()))]
|
||||
pub(crate) async fn shard_split(
|
||||
&self,
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
new_shard_count: ShardCount,
|
||||
new_stripe_size: Option<ShardStripeSize>,
|
||||
ctx: &RequestContext,
|
||||
@@ -1476,7 +1487,7 @@ impl TenantManager {
|
||||
|
||||
pub(crate) async fn do_shard_split(
|
||||
&self,
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
new_shard_count: ShardCount,
|
||||
new_stripe_size: Option<ShardStripeSize>,
|
||||
ctx: &RequestContext,
|
||||
@@ -1703,7 +1714,7 @@ impl TenantManager {
|
||||
/// For each resident layer in the parent shard, we will hard link it into all of the child shards.
|
||||
async fn shard_split_hardlink(
|
||||
&self,
|
||||
parent_shard: &Tenant,
|
||||
parent_shard: &TenantShard,
|
||||
child_shards: Vec<TenantShardId>,
|
||||
) -> anyhow::Result<()> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
@@ -1988,7 +1999,7 @@ impl TenantManager {
|
||||
}
|
||||
|
||||
let tenant_path = self.conf.tenant_path(&tenant_shard_id);
|
||||
let config = Tenant::load_tenant_config(self.conf, &tenant_shard_id)
|
||||
let config = TenantShard::load_tenant_config(self.conf, &tenant_shard_id)
|
||||
.map_err(|e| Error::DetachReparent(e.into()))?;
|
||||
|
||||
let shard_identity = config.shard;
|
||||
|
||||
@@ -133,7 +133,7 @@
|
||||
//! - Initiate upload queue with that [`IndexPart`].
|
||||
//! - Reschedule all lost operations by comparing the local filesystem state
|
||||
//! and remote state as per [`IndexPart`]. This is done in
|
||||
//! [`Tenant::timeline_init_and_sync`].
|
||||
//! [`TenantShard::timeline_init_and_sync`].
|
||||
//!
|
||||
//! Note that if we crash during file deletion between the index update
|
||||
//! that removes the file from the list of files, and deleting the remote file,
|
||||
@@ -171,7 +171,7 @@
|
||||
//! If no remote storage configuration is provided, the [`RemoteTimelineClient`] is
|
||||
//! not created and the uploads are skipped.
|
||||
//!
|
||||
//! [`Tenant::timeline_init_and_sync`]: super::Tenant::timeline_init_and_sync
|
||||
//! [`TenantShard::timeline_init_and_sync`]: super::TenantShard::timeline_init_and_sync
|
||||
//! [`Timeline::load_layer_map`]: super::Timeline::load_layer_map
|
||||
|
||||
pub(crate) mod download;
|
||||
@@ -192,11 +192,12 @@ pub(crate) use download::{
|
||||
download_index_part, download_initdb_tar_zst, download_tenant_manifest, is_temp_download_file,
|
||||
list_remote_tenant_shards, list_remote_timelines,
|
||||
};
|
||||
use index::GcCompactionState;
|
||||
pub(crate) use index::LayerFileMetadata;
|
||||
use index::{EncryptionKey, EncryptionKeyId, EncryptionKeyPair, GcCompactionState, KeyVersion};
|
||||
use pageserver_api::models::{RelSizeMigration, TimelineArchivalState, TimelineVisibilityState};
|
||||
use pageserver_api::shard::{ShardIndex, TenantShardId};
|
||||
use regex::Regex;
|
||||
use remote_keys::NaiveKms;
|
||||
use remote_storage::{
|
||||
DownloadError, GenericRemoteStorage, ListingMode, RemotePath, TimeoutOrCancel,
|
||||
};
|
||||
@@ -367,6 +368,10 @@ pub(crate) struct RemoteTimelineClient {
|
||||
config: std::sync::RwLock<RemoteTimelineClientConfig>,
|
||||
|
||||
cancel: CancellationToken,
|
||||
|
||||
kms_impl: Option<NaiveKms>,
|
||||
|
||||
key_repo: std::sync::Mutex<HashMap<EncryptionKeyId, EncryptionKeyPair>>,
|
||||
}
|
||||
|
||||
impl Drop for RemoteTimelineClient {
|
||||
@@ -411,6 +416,9 @@ impl RemoteTimelineClient {
|
||||
)),
|
||||
config: std::sync::RwLock::new(RemoteTimelineClientConfig::from(location_conf)),
|
||||
cancel: CancellationToken::new(),
|
||||
// TODO: make this configurable
|
||||
kms_impl: Some(NaiveKms::new(tenant_shard_id.tenant_id.to_string())),
|
||||
key_repo: std::sync::Mutex::new(HashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -727,9 +735,43 @@ impl RemoteTimelineClient {
|
||||
reason: "no need for a downloads gauge",
|
||||
},
|
||||
);
|
||||
let key_pair = if let Some(ref key_id) = layer_metadata.encryption_key {
|
||||
let wrapped_key = {
|
||||
let mut queue = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = queue.initialized_mut().unwrap();
|
||||
let encryption_key_pair =
|
||||
upload_queue.dirty.keys.iter().find(|key| &key.id == key_id);
|
||||
if let Some(encryption_key_pair) = encryption_key_pair {
|
||||
// TODO: also check if we have uploaded the key yet; we should never use a key that is not persisted
|
||||
encryption_key_pair.clone()
|
||||
} else {
|
||||
return Err(DownloadError::Other(anyhow::anyhow!(
|
||||
"Encryption key pair not found in index_part.json"
|
||||
)));
|
||||
}
|
||||
};
|
||||
let Some(kms) = self.kms_impl.as_ref() else {
|
||||
return Err(DownloadError::Other(anyhow::anyhow!(
|
||||
"KMS not configured when downloading encrypted layer file"
|
||||
)));
|
||||
};
|
||||
let plain_key = kms
|
||||
.decrypt(&wrapped_key.key)
|
||||
.context("failed to decrypt encryption key")
|
||||
.map_err(DownloadError::Other)?;
|
||||
Some(EncryptionKeyPair::new(
|
||||
wrapped_key.id,
|
||||
plain_key,
|
||||
wrapped_key.key,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
download::download_layer_file(
|
||||
self.conf,
|
||||
&self.storage_impl,
|
||||
key_pair.as_ref(),
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
layer_file_name,
|
||||
@@ -1250,6 +1292,14 @@ impl RemoteTimelineClient {
|
||||
upload_queue: &mut UploadQueueInitialized,
|
||||
layer: ResidentLayer,
|
||||
) {
|
||||
let key_pair = {
|
||||
if let Some(key_id) = layer.metadata().encryption_key {
|
||||
let guard = self.key_repo.lock().unwrap();
|
||||
Some(guard.get(&key_id).cloned().unwrap())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
let metadata = layer.metadata();
|
||||
|
||||
upload_queue
|
||||
@@ -1264,7 +1314,7 @@ impl RemoteTimelineClient {
|
||||
"scheduled layer file upload {layer}",
|
||||
);
|
||||
|
||||
let op = UploadOp::UploadLayer(layer, metadata, None);
|
||||
let op = UploadOp::UploadLayer(layer, metadata, key_pair, None);
|
||||
self.metric_begin(&op);
|
||||
upload_queue.queued_operations.push_back(op);
|
||||
}
|
||||
@@ -1446,6 +1496,58 @@ impl RemoteTimelineClient {
|
||||
upload_queue.queued_operations.push_back(op);
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn is_kms_enabled(&self) -> bool {
|
||||
self.kms_impl.is_some()
|
||||
}
|
||||
|
||||
pub(crate) fn schedule_generate_encryption_key(
|
||||
self: &Arc<Self>,
|
||||
) -> Result<Option<EncryptionKeyPair>, NotInitialized> {
|
||||
let Some(kms_impl) = self.kms_impl.as_ref() else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let plain_key = rand::random::<[u8; 32]>().to_vec(); // StdRng is cryptographically secure (?)
|
||||
let wrapped_key = kms_impl.encrypt(&plain_key).unwrap();
|
||||
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
let last_key = upload_queue.dirty.keys.last();
|
||||
let this_key_version = if let Some(last_key) = last_key {
|
||||
let key_version = EncryptionKeyId {
|
||||
version: last_key.id.version.next(),
|
||||
generation: self.generation,
|
||||
};
|
||||
assert!(key_version > last_key.id); // ensure key version is strictly increasing; no dup key versions
|
||||
key_version
|
||||
} else {
|
||||
EncryptionKeyId {
|
||||
version: KeyVersion(1),
|
||||
generation: self.generation,
|
||||
}
|
||||
};
|
||||
|
||||
let key_pair = EncryptionKeyPair {
|
||||
id: this_key_version.clone(),
|
||||
plain_key: plain_key.clone(),
|
||||
wrapped_key,
|
||||
};
|
||||
|
||||
upload_queue.dirty.keys.push(EncryptionKey {
|
||||
key: plain_key,
|
||||
id: this_key_version,
|
||||
created_at: Utc::now().naive_utc(),
|
||||
});
|
||||
|
||||
self.key_repo.lock().unwrap().insert(this_key_version, key_pair);
|
||||
|
||||
self.schedule_index_upload(upload_queue);
|
||||
|
||||
Ok(Some(key_pair))
|
||||
}
|
||||
|
||||
/// Schedules a compaction update to the remote `index_part.json`.
|
||||
///
|
||||
/// `compacted_from` represent the L0 names which have been `compacted_to` L1 layers.
|
||||
@@ -1454,6 +1556,7 @@ impl RemoteTimelineClient {
|
||||
compacted_from: &[Layer],
|
||||
compacted_to: &[ResidentLayer],
|
||||
) -> Result<(), NotInitialized> {
|
||||
// Use the same key for all layers in a single compaction job
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
@@ -1715,6 +1818,7 @@ impl RemoteTimelineClient {
|
||||
uploaded.local_path(),
|
||||
&remote_path,
|
||||
uploaded.metadata().file_size,
|
||||
None, // TODO(chi): support encryption for those layer files uploaded using this interface
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
@@ -1757,6 +1861,8 @@ impl RemoteTimelineClient {
|
||||
adopted_as.metadata().generation,
|
||||
);
|
||||
|
||||
// TODO: support encryption for those layer files uploaded using this interface
|
||||
|
||||
backoff::retry(
|
||||
|| async {
|
||||
upload::copy_timeline_layer(
|
||||
@@ -1977,7 +2083,7 @@ impl RemoteTimelineClient {
|
||||
|
||||
// Prepare upload.
|
||||
match &mut next_op {
|
||||
UploadOp::UploadLayer(layer, meta, mode) => {
|
||||
UploadOp::UploadLayer(layer, meta, _, mode) => {
|
||||
if upload_queue
|
||||
.recently_deleted
|
||||
.remove(&(layer.layer_desc().layer_name().clone(), meta.generation))
|
||||
@@ -2071,7 +2177,7 @@ impl RemoteTimelineClient {
|
||||
// Assert that we don't modify a layer that's referenced by the current index.
|
||||
if cfg!(debug_assertions) {
|
||||
let modified = match &task.op {
|
||||
UploadOp::UploadLayer(layer, layer_metadata, _) => {
|
||||
UploadOp::UploadLayer(layer, layer_metadata, _, _) => {
|
||||
vec![(layer.layer_desc().layer_name(), layer_metadata)]
|
||||
}
|
||||
UploadOp::Delete(delete) => {
|
||||
@@ -2093,7 +2199,7 @@ impl RemoteTimelineClient {
|
||||
}
|
||||
|
||||
let upload_result: anyhow::Result<()> = match &task.op {
|
||||
UploadOp::UploadLayer(layer, layer_metadata, mode) => {
|
||||
UploadOp::UploadLayer(layer, layer_metadata, encryption_key_pair, mode) => {
|
||||
// TODO: check if this mechanism can be removed now that can_bypass() performs
|
||||
// conflict checks during scheduling.
|
||||
if let Some(OpType::FlushDeletion) = mode {
|
||||
@@ -2174,6 +2280,7 @@ impl RemoteTimelineClient {
|
||||
local_path,
|
||||
&remote_path,
|
||||
layer_metadata.file_size,
|
||||
encryption_key_pair.clone(),
|
||||
&self.cancel,
|
||||
)
|
||||
.measure_remote_op(
|
||||
@@ -2324,7 +2431,7 @@ impl RemoteTimelineClient {
|
||||
upload_queue.inprogress_tasks.remove(&task.task_id);
|
||||
|
||||
let lsn_update = match task.op {
|
||||
UploadOp::UploadLayer(_, _, _) => None,
|
||||
UploadOp::UploadLayer(_, _, _, _) => None,
|
||||
UploadOp::UploadMetadata { ref uploaded } => {
|
||||
// the task id is reused as a monotonicity check for storing the "clean"
|
||||
// IndexPart.
|
||||
@@ -2403,7 +2510,7 @@ impl RemoteTimelineClient {
|
||||
)> {
|
||||
use RemoteTimelineClientMetricsCallTrackSize::DontTrackSize;
|
||||
let res = match op {
|
||||
UploadOp::UploadLayer(_, m, _) => (
|
||||
UploadOp::UploadLayer(_, m, _, _) => (
|
||||
RemoteOpFileKind::Layer,
|
||||
RemoteOpKind::Upload,
|
||||
RemoteTimelineClientMetricsCallTrackSize::Bytes(m.file_size),
|
||||
@@ -2743,7 +2850,7 @@ mod tests {
|
||||
use crate::tenant::config::AttachmentMode;
|
||||
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
||||
use crate::tenant::storage_layer::layer::local_layer_path;
|
||||
use crate::tenant::{Tenant, Timeline};
|
||||
use crate::tenant::{TenantShard, Timeline};
|
||||
|
||||
pub(super) fn dummy_contents(name: &str) -> Vec<u8> {
|
||||
format!("contents for {name}").into()
|
||||
@@ -2787,6 +2894,10 @@ mod tests {
|
||||
for entry in std::fs::read_dir(remote_path).unwrap().flatten() {
|
||||
let entry_name = entry.file_name();
|
||||
let fname = entry_name.to_str().unwrap();
|
||||
if fname.ends_with(".metadata") || fname.ends_with(".enc") {
|
||||
// ignore metadata and encryption key files; should use local_fs APIs instead in the future
|
||||
continue;
|
||||
}
|
||||
found.push(String::from(fname));
|
||||
}
|
||||
found.sort();
|
||||
@@ -2796,7 +2907,7 @@ mod tests {
|
||||
|
||||
struct TestSetup {
|
||||
harness: TenantHarness,
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
timeline: Arc<Timeline>,
|
||||
tenant_ctx: RequestContext,
|
||||
}
|
||||
@@ -2840,6 +2951,8 @@ mod tests {
|
||||
)),
|
||||
config: std::sync::RwLock::new(RemoteTimelineClientConfig::from(&location_conf)),
|
||||
cancel: CancellationToken::new(),
|
||||
kms_impl: None,
|
||||
key_repo: std::sync::Mutex::new(HashMap::new()),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::{backoff, pausable_failpoint};
|
||||
|
||||
use super::index::{IndexPart, LayerFileMetadata};
|
||||
use super::index::{EncryptionKeyPair, IndexPart, LayerFileMetadata};
|
||||
use super::manifest::TenantManifest;
|
||||
use super::{
|
||||
FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH, parse_remote_index_path,
|
||||
@@ -51,6 +51,7 @@ use crate::virtual_file::{MaybeFatalIo, VirtualFile, on_fatal_io_error};
|
||||
pub async fn download_layer_file<'a>(
|
||||
conf: &'static PageServerConf,
|
||||
storage: &'a GenericRemoteStorage,
|
||||
key_pair: Option<&'a EncryptionKeyPair>,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
layer_file_name: &'a LayerName,
|
||||
@@ -86,7 +87,16 @@ pub async fn download_layer_file<'a>(
|
||||
|
||||
let bytes_amount = download_retry(
|
||||
|| async {
|
||||
download_object(storage, &remote_path, &temp_file_path, gate, cancel, ctx).await
|
||||
download_object(
|
||||
storage,
|
||||
key_pair,
|
||||
&remote_path,
|
||||
&temp_file_path,
|
||||
gate,
|
||||
cancel,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
},
|
||||
&format!("download {remote_path:?}"),
|
||||
cancel,
|
||||
@@ -145,6 +155,7 @@ pub async fn download_layer_file<'a>(
|
||||
/// The unlinking has _not_ been made durable.
|
||||
async fn download_object(
|
||||
storage: &GenericRemoteStorage,
|
||||
encryption_key_pair: Option<&EncryptionKeyPair>,
|
||||
src_path: &RemotePath,
|
||||
dst_path: &Utf8PathBuf,
|
||||
#[cfg_attr(target_os = "macos", allow(unused_variables))] gate: &utils::sync::gate::Gate,
|
||||
@@ -160,9 +171,12 @@ async fn download_object(
|
||||
.with_context(|| format!("create a destination file for layer '{dst_path}'"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let download = storage
|
||||
.download(src_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
let mut opts = DownloadOpts::default();
|
||||
if let Some(encryption_key_pair) = encryption_key_pair {
|
||||
opts.encryption_key = Some(encryption_key_pair.plain_key.to_vec());
|
||||
}
|
||||
|
||||
let download = storage.download(src_path, &opts, cancel).await?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
@@ -452,7 +466,7 @@ async fn do_download_index_part(
|
||||
/// generation (normal case when migrating/restarting). Only if both of these return 404 do we fall back
|
||||
/// to listing objects.
|
||||
///
|
||||
/// * `my_generation`: the value of `[crate::tenant::Tenant::generation]`
|
||||
/// * `my_generation`: the value of `[crate::tenant::TenantShard::generation]`
|
||||
/// * `what`: for logging, what object are we downloading
|
||||
/// * `prefix`: when listing objects, use this prefix (i.e. the part of the object path before the generation)
|
||||
/// * `do_download`: a GET of the object in a particular generation, which should **retry indefinitely** unless
|
||||
|
||||
@@ -10,6 +10,8 @@ use pageserver_api::models::AuxFilePolicy;
|
||||
use pageserver_api::models::RelSizeMigration;
|
||||
use pageserver_api::shard::ShardIndex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::base64::Base64;
|
||||
use serde_with::serde_as;
|
||||
use utils::id::TimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -114,6 +116,70 @@ pub struct IndexPart {
|
||||
/// The timestamp when the timeline was marked invisible in synthetic size calculations.
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub(crate) marked_invisible_at: Option<NaiveDateTime>,
|
||||
|
||||
/// The encryption key used to encrypt the timeline layer files.
|
||||
#[serde(skip_serializing_if = "Vec::is_empty", default)]
|
||||
pub(crate) keys: Vec<EncryptionKey>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Ord, PartialOrd, Hash)]
|
||||
pub struct KeyVersion(pub u32);
|
||||
|
||||
impl KeyVersion {
|
||||
pub fn next(&self) -> Self {
|
||||
Self(self.0 + 1)
|
||||
}
|
||||
}
|
||||
|
||||
/// An identifier for an encryption key. The scope of the key is the timeline (TBD).
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Ord, PartialOrd, Hash)]
|
||||
pub struct EncryptionKeyId {
|
||||
pub version: KeyVersion,
|
||||
pub generation: Generation,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct EncryptionKeyPair {
|
||||
pub id: EncryptionKeyId,
|
||||
pub plain_key: Vec<u8>,
|
||||
pub wrapped_key: Vec<u8>,
|
||||
}
|
||||
|
||||
impl EncryptionKeyPair {
|
||||
pub fn new(id: EncryptionKeyId, plain_key: Vec<u8>, wrapped_key: Vec<u8>) -> Self {
|
||||
Self {
|
||||
id,
|
||||
plain_key,
|
||||
wrapped_key,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for EncryptionKeyPair {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let display =
|
||||
base64::display::Base64Display::with_config(&self.wrapped_key, base64::STANDARD);
|
||||
struct DisplayAsDebug<T: std::fmt::Display>(T);
|
||||
impl<T: std::fmt::Display> std::fmt::Debug for DisplayAsDebug<T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
f.debug_struct("EncryptionKeyPair")
|
||||
.field("id", &self.id)
|
||||
.field("plain_key", &"<REDACTED>")
|
||||
.field("wrapped_key", &DisplayAsDebug(&display))
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||
pub struct EncryptionKey {
|
||||
#[serde_as(as = "Base64")]
|
||||
pub key: Vec<u8>,
|
||||
pub id: EncryptionKeyId,
|
||||
pub created_at: NaiveDateTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||
@@ -142,10 +208,12 @@ impl IndexPart {
|
||||
/// - 12: +l2_lsn
|
||||
/// - 13: +gc_compaction
|
||||
/// - 14: +marked_invisible_at
|
||||
const LATEST_VERSION: usize = 14;
|
||||
/// - 15: +keys and encryption_key in layer_metadata
|
||||
const LATEST_VERSION: usize = 15;
|
||||
|
||||
// Versions we may see when reading from a bucket.
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14];
|
||||
pub const KNOWN_VERSIONS: &'static [usize] =
|
||||
&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
|
||||
|
||||
pub const FILE_NAME: &'static str = "index_part.json";
|
||||
|
||||
@@ -165,6 +233,7 @@ impl IndexPart {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -205,14 +274,16 @@ impl IndexPart {
|
||||
/// Check for invariants in the index: this is useful when uploading an index to ensure that if
|
||||
/// we encounter a bug, we do not persist buggy metadata.
|
||||
pub(crate) fn validate(&self) -> Result<(), String> {
|
||||
if self.import_pgdata.is_none()
|
||||
&& self.metadata.ancestor_timeline().is_none()
|
||||
&& self.layer_metadata.is_empty()
|
||||
{
|
||||
// Unless we're in the middle of a raw pgdata import, or this is a child timeline,the index must
|
||||
// always have at least one layer.
|
||||
return Err("Index has no ancestor and no layers".to_string());
|
||||
}
|
||||
// We have to disable this check: we might need to upload an empty index part with new keys, or new `reldirv2` flag.
|
||||
|
||||
// if self.import_pgdata.is_none()
|
||||
// && self.metadata.ancestor_timeline().is_none()
|
||||
// && self.layer_metadata.is_empty()
|
||||
// {
|
||||
// // Unless we're in the middle of a raw pgdata import, or this is a child timeline,the index must
|
||||
// // always have at least one layer.
|
||||
// return Err("Index has no ancestor and no layers".to_string());
|
||||
// }
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -222,7 +293,7 @@ impl IndexPart {
|
||||
///
|
||||
/// Fields have to be `Option`s because remote [`IndexPart`]'s can be from different version, which
|
||||
/// might have less or more metadata depending if upgrading or rolling back an upgrade.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct LayerFileMetadata {
|
||||
pub file_size: u64,
|
||||
|
||||
@@ -233,6 +304,9 @@ pub struct LayerFileMetadata {
|
||||
#[serde(default = "ShardIndex::unsharded")]
|
||||
#[serde(skip_serializing_if = "ShardIndex::is_unsharded")]
|
||||
pub shard: ShardIndex,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub encryption_key: Option<EncryptionKeyId>,
|
||||
}
|
||||
|
||||
impl LayerFileMetadata {
|
||||
@@ -241,6 +315,7 @@ impl LayerFileMetadata {
|
||||
file_size,
|
||||
generation,
|
||||
shard,
|
||||
encryption_key: None,
|
||||
}
|
||||
}
|
||||
/// Helper to get both generation and file size in a tuple
|
||||
@@ -453,14 +528,16 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -475,6 +552,7 @@ mod tests {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -502,14 +580,16 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -524,6 +604,7 @@ mod tests {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -552,14 +633,16 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -574,6 +657,7 @@ mod tests {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -627,6 +711,7 @@ mod tests {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let empty_layers_parsed = IndexPart::from_json_bytes(empty_layers_json.as_bytes()).unwrap();
|
||||
@@ -653,14 +738,16 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -675,6 +762,7 @@ mod tests {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -703,11 +791,13 @@ mod tests {
|
||||
file_size: 23289856,
|
||||
generation: Generation::new(1),
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF499-00000000015A7619".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 1015808,
|
||||
generation: Generation::new(1),
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: Lsn::from_str("0/15A7618").unwrap(),
|
||||
@@ -726,6 +816,7 @@ mod tests {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -756,14 +847,16 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -782,6 +875,7 @@ mod tests {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -815,12 +909,14 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -843,6 +939,7 @@ mod tests {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -877,12 +974,14 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -905,6 +1004,7 @@ mod tests {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -941,12 +1041,14 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -972,6 +1074,7 @@ mod tests {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -1017,12 +1120,14 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -1052,6 +1157,7 @@ mod tests {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -1098,12 +1204,14 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -1133,6 +1241,7 @@ mod tests {
|
||||
l2_lsn: None,
|
||||
gc_compaction: None,
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -1183,12 +1292,14 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -1220,6 +1331,7 @@ mod tests {
|
||||
last_completed_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
}),
|
||||
marked_invisible_at: None,
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -1271,12 +1383,14 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: None,
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -1308,6 +1422,139 @@ mod tests {
|
||||
last_completed_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
}),
|
||||
marked_invisible_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
|
||||
keys: Vec::new(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v15_keys_are_parsed() {
|
||||
let example = r#"{
|
||||
"version": 15,
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000, "encryption_key": { "version": 1, "generation": 5 } },
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001, "encryption_key": { "version": 2, "generation": 6 } }
|
||||
},
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata": {
|
||||
"disk_consistent_lsn": "0/16960E8",
|
||||
"prev_record_lsn": "0/1696070",
|
||||
"ancestor_timeline": "e45a7f37d3ee2ff17dc14bf4f4e3f52e",
|
||||
"ancestor_lsn": "0/0",
|
||||
"latest_gc_cutoff_lsn": "0/1696070",
|
||||
"initdb_lsn": "0/1696070",
|
||||
"pg_version": 14
|
||||
},
|
||||
"gc_blocking": {
|
||||
"started_at": "2024-07-19T09:00:00.123",
|
||||
"reasons": ["DetachAncestor"]
|
||||
},
|
||||
"import_pgdata": {
|
||||
"V1": {
|
||||
"Done": {
|
||||
"idempotency_key": "specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5",
|
||||
"started_at": "2024-11-13T09:23:42.123",
|
||||
"finished_at": "2024-11-13T09:42:23.123"
|
||||
}
|
||||
}
|
||||
},
|
||||
"rel_size_migration": "legacy",
|
||||
"l2_lsn": "0/16960E8",
|
||||
"gc_compaction": {
|
||||
"last_completed_lsn": "0/16960E8"
|
||||
},
|
||||
"marked_invisible_at": "2023-07-31T09:00:00.123",
|
||||
"keys": [
|
||||
{
|
||||
"key": "dGVzdF9rZXk=",
|
||||
"id": {
|
||||
"version": 1,
|
||||
"generation": 5
|
||||
},
|
||||
"created_at": "2024-07-19T09:00:00.123"
|
||||
},
|
||||
{
|
||||
"key": "dGVzdF9rZXlfMg==",
|
||||
"id": {
|
||||
"version": 2,
|
||||
"generation": 6
|
||||
},
|
||||
"created_at": "2024-07-19T10:00:00.123"
|
||||
}
|
||||
]
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 15,
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: Some(EncryptionKeyId {
|
||||
version: KeyVersion(1),
|
||||
generation: Generation::Valid(5),
|
||||
}),
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded(),
|
||||
encryption_key: Some(EncryptionKeyId {
|
||||
version: KeyVersion(2),
|
||||
generation: Generation::Valid(6),
|
||||
}),
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::new(
|
||||
Lsn::from_str("0/16960E8").unwrap(),
|
||||
Some(Lsn::from_str("0/1696070").unwrap()),
|
||||
Some(TimelineId::from_str("e45a7f37d3ee2ff17dc14bf4f4e3f52e").unwrap()),
|
||||
Lsn::INVALID,
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
14,
|
||||
).with_recalculated_checksum().unwrap(),
|
||||
deleted_at: None,
|
||||
lineage: Default::default(),
|
||||
gc_blocking: Some(GcBlocking {
|
||||
started_at: parse_naive_datetime("2024-07-19T09:00:00.123000000"),
|
||||
reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),
|
||||
}),
|
||||
last_aux_file_policy: Default::default(),
|
||||
archived_at: None,
|
||||
import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{
|
||||
started_at: parse_naive_datetime("2024-11-13T09:23:42.123000000"),
|
||||
finished_at: parse_naive_datetime("2024-11-13T09:42:23.123000000"),
|
||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new("specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5".to_string()),
|
||||
}))),
|
||||
rel_size_migration: Some(RelSizeMigration::Legacy),
|
||||
l2_lsn: Some("0/16960E8".parse::<Lsn>().unwrap()),
|
||||
gc_compaction: Some(GcCompactionState {
|
||||
last_completed_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
}),
|
||||
marked_invisible_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
|
||||
keys: vec![
|
||||
EncryptionKey {
|
||||
key: "test_key".as_bytes().to_vec(),
|
||||
id: EncryptionKeyId {
|
||||
version: KeyVersion(1),
|
||||
generation: Generation::Valid(5),
|
||||
},
|
||||
created_at: parse_naive_datetime("2024-07-19T09:00:00.123000000"),
|
||||
},
|
||||
EncryptionKey {
|
||||
key: "test_key_2".as_bytes().to_vec(),
|
||||
id: EncryptionKeyId {
|
||||
version: KeyVersion(2),
|
||||
generation: Generation::Valid(6),
|
||||
},
|
||||
created_at: parse_naive_datetime("2024-07-19T10:00:00.123000000"),
|
||||
}
|
||||
],
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
|
||||
@@ -17,7 +17,7 @@ use utils::id::{TenantId, TimelineId};
|
||||
use utils::{backoff, pausable_failpoint};
|
||||
|
||||
use super::Generation;
|
||||
use super::index::IndexPart;
|
||||
use super::index::{EncryptionKeyPair, IndexPart};
|
||||
use super::manifest::TenantManifest;
|
||||
use crate::tenant::remote_timeline_client::{
|
||||
remote_index_path, remote_initdb_archive_path, remote_initdb_preserved_archive_path,
|
||||
@@ -101,6 +101,7 @@ pub(super) async fn upload_timeline_layer<'a>(
|
||||
local_path: &'a Utf8Path,
|
||||
remote_path: &'a RemotePath,
|
||||
metadata_size: u64,
|
||||
encryption_key_pair: Option<EncryptionKeyPair>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
fail_point!("before-upload-layer", |_| {
|
||||
@@ -144,7 +145,14 @@ pub(super) async fn upload_timeline_layer<'a>(
|
||||
let reader = tokio_util::io::ReaderStream::with_capacity(source_file, super::BUFFER_SIZE);
|
||||
|
||||
storage
|
||||
.upload(reader, fs_size, remote_path, None, cancel)
|
||||
.upload_with_encryption(
|
||||
reader,
|
||||
fs_size,
|
||||
remote_path,
|
||||
None,
|
||||
encryption_key_pair.as_ref().map(|k| k.plain_key.as_slice()),
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("upload layer from local path '{local_path}'"))
|
||||
}
|
||||
|
||||
@@ -1310,6 +1310,7 @@ impl<'a> TenantDownloader<'a> {
|
||||
let downloaded_bytes = download_layer_file(
|
||||
self.conf,
|
||||
self.remote_storage,
|
||||
None, // TODO: add encryption key pair
|
||||
*tenant_shard_id,
|
||||
*timeline_id,
|
||||
&layer.name,
|
||||
|
||||
@@ -21,7 +21,7 @@ use super::scheduler::{
|
||||
use super::{CommandRequest, SecondaryTenantError, UploadCommand};
|
||||
use crate::TEMP_FILE_SUFFIX;
|
||||
use crate::metrics::SECONDARY_MODE;
|
||||
use crate::tenant::Tenant;
|
||||
use crate::tenant::TenantShard;
|
||||
use crate::tenant::config::AttachmentMode;
|
||||
use crate::tenant::mgr::{GetTenantError, TenantManager};
|
||||
use crate::tenant::remote_timeline_client::remote_heatmap_path;
|
||||
@@ -74,7 +74,7 @@ impl RunningJob for WriteInProgress {
|
||||
}
|
||||
|
||||
struct UploadPending {
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
last_upload: Option<LastUploadState>,
|
||||
target_time: Option<Instant>,
|
||||
period: Option<Duration>,
|
||||
@@ -106,7 +106,7 @@ impl scheduler::Completion for WriteComplete {
|
||||
struct UploaderTenantState {
|
||||
// This Weak only exists to enable culling idle instances of this type
|
||||
// when the Tenant has been deallocated.
|
||||
tenant: Weak<Tenant>,
|
||||
tenant: Weak<TenantShard>,
|
||||
|
||||
/// Digest of the serialized heatmap that we last successfully uploaded
|
||||
last_upload_state: Option<LastUploadState>,
|
||||
@@ -357,7 +357,7 @@ struct LastUploadState {
|
||||
/// of the object we would have uploaded.
|
||||
async fn upload_tenant_heatmap(
|
||||
remote_storage: GenericRemoteStorage,
|
||||
tenant: &Arc<Tenant>,
|
||||
tenant: &Arc<TenantShard>,
|
||||
last_upload: Option<LastUploadState>,
|
||||
) -> Result<UploadHeatmapOutcome, UploadHeatmapError> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
|
||||
@@ -360,7 +360,7 @@ where
|
||||
|
||||
/// Periodic execution phase: inspect all attached tenants and schedule any work they require.
|
||||
///
|
||||
/// The type in `tenants` should be a tenant-like structure, e.g. [`crate::tenant::Tenant`] or [`crate::tenant::secondary::SecondaryTenant`]
|
||||
/// The type in `tenants` should be a tenant-like structure, e.g. [`crate::tenant::TenantShard`] or [`crate::tenant::secondary::SecondaryTenant`]
|
||||
///
|
||||
/// This function resets the pending list: it is assumed that the caller may change their mind about
|
||||
/// which tenants need work between calls to schedule_iteration.
|
||||
|
||||
@@ -12,7 +12,7 @@ use tracing::*;
|
||||
use utils::id::TimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use super::{GcError, LogicalSizeCalculationCause, Tenant};
|
||||
use super::{GcError, LogicalSizeCalculationCause, TenantShard};
|
||||
use crate::context::RequestContext;
|
||||
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
|
||||
use crate::tenant::{MaybeOffloaded, Timeline};
|
||||
@@ -156,7 +156,7 @@ pub struct TimelineInputs {
|
||||
/// initdb_lsn branchpoints* next_pitr_cutoff latest
|
||||
/// ```
|
||||
pub(super) async fn gather_inputs(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
limit: &Arc<Semaphore>,
|
||||
max_retention_period: Option<u64>,
|
||||
logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
|
||||
|
||||
@@ -1620,7 +1620,7 @@ pub(crate) mod test {
|
||||
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
||||
use crate::tenant::storage_layer::{Layer, ResidentLayer};
|
||||
use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
|
||||
use crate::tenant::{Tenant, Timeline};
|
||||
use crate::tenant::{TenantShard, Timeline};
|
||||
|
||||
/// Construct an index for a fictional delta layer and and then
|
||||
/// traverse in order to plan vectored reads for a query. Finally,
|
||||
@@ -2209,7 +2209,7 @@ pub(crate) mod test {
|
||||
}
|
||||
|
||||
pub(crate) async fn produce_delta_layer(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
tline: &Arc<Timeline>,
|
||||
mut deltas: Vec<(Key, Lsn, Value)>,
|
||||
ctx: &RequestContext,
|
||||
|
||||
@@ -1228,7 +1228,7 @@ mod test {
|
||||
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
||||
use crate::tenant::storage_layer::{Layer, ResidentLayer};
|
||||
use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
|
||||
use crate::tenant::{Tenant, Timeline};
|
||||
use crate::tenant::{TenantShard, Timeline};
|
||||
|
||||
#[tokio::test]
|
||||
async fn image_layer_rewrite() {
|
||||
@@ -1410,7 +1410,7 @@ mod test {
|
||||
}
|
||||
|
||||
async fn produce_image_layer(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
tline: &Arc<Timeline>,
|
||||
mut images: Vec<(Key, Bytes)>,
|
||||
lsn: Lsn,
|
||||
|
||||
@@ -24,7 +24,7 @@ use crate::task_mgr::{self, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS, TaskKind};
|
||||
use crate::tenant::throttle::Stats;
|
||||
use crate::tenant::timeline::CompactionError;
|
||||
use crate::tenant::timeline::compaction::CompactionOutcome;
|
||||
use crate::tenant::{Tenant, TenantState};
|
||||
use crate::tenant::{TenantShard, TenantState};
|
||||
|
||||
/// Semaphore limiting concurrent background tasks (across all tenants).
|
||||
///
|
||||
@@ -117,7 +117,7 @@ pub(crate) async fn acquire_concurrency_permit(
|
||||
}
|
||||
|
||||
/// Start per tenant background loops: compaction, GC, and ingest housekeeping.
|
||||
pub fn start_background_loops(tenant: &Arc<Tenant>, can_start: Option<&Barrier>) {
|
||||
pub fn start_background_loops(tenant: &Arc<TenantShard>, can_start: Option<&Barrier>) {
|
||||
let tenant_shard_id = tenant.tenant_shard_id;
|
||||
|
||||
task_mgr::spawn(
|
||||
@@ -198,7 +198,7 @@ pub fn start_background_loops(tenant: &Arc<Tenant>, can_start: Option<&Barrier>)
|
||||
}
|
||||
|
||||
/// Compaction task's main loop.
|
||||
async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
async fn compaction_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
|
||||
const BASE_BACKOFF_SECS: f64 = 1.0;
|
||||
const MAX_BACKOFF_SECS: f64 = 300.0;
|
||||
const RECHECK_CONFIG_INTERVAL: Duration = Duration::from_secs(10);
|
||||
@@ -348,7 +348,7 @@ pub(crate) fn log_compaction_error(
|
||||
}
|
||||
|
||||
/// GC task's main loop.
|
||||
async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
async fn gc_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
|
||||
const MAX_BACKOFF_SECS: f64 = 300.0;
|
||||
let mut error_run = 0; // consecutive errors
|
||||
|
||||
@@ -432,7 +432,7 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
}
|
||||
|
||||
/// Tenant housekeeping's main loop.
|
||||
async fn tenant_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
async fn tenant_housekeeping_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
|
||||
let mut last_throttle_flag_reset_at = Instant::now();
|
||||
loop {
|
||||
if wait_for_active_tenant(&tenant, &cancel).await.is_break() {
|
||||
@@ -483,7 +483,7 @@ async fn tenant_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken
|
||||
|
||||
/// Waits until the tenant becomes active, or returns `ControlFlow::Break()` to shut down.
|
||||
async fn wait_for_active_tenant(
|
||||
tenant: &Arc<Tenant>,
|
||||
tenant: &Arc<TenantShard>,
|
||||
cancel: &CancellationToken,
|
||||
) -> ControlFlow<()> {
|
||||
if tenant.current_state() == TenantState::Active {
|
||||
|
||||
@@ -412,7 +412,7 @@ pub struct Timeline {
|
||||
/// Timeline deletion will acquire both compaction and gc locks in whatever order.
|
||||
gc_lock: tokio::sync::Mutex<()>,
|
||||
|
||||
/// Cloned from [`super::Tenant::pagestream_throttle`] on construction.
|
||||
/// Cloned from [`super::TenantShard::pagestream_throttle`] on construction.
|
||||
pub(crate) pagestream_throttle: Arc<crate::tenant::throttle::Throttle>,
|
||||
|
||||
/// Size estimator for aux file v2
|
||||
@@ -2065,7 +2065,7 @@ impl Timeline {
|
||||
|
||||
pub(crate) fn activate(
|
||||
self: &Arc<Self>,
|
||||
parent: Arc<crate::tenant::Tenant>,
|
||||
parent: Arc<crate::tenant::TenantShard>,
|
||||
broker_client: BrokerClientChannel,
|
||||
background_jobs_can_start: Option<&completion::Barrier>,
|
||||
ctx: &RequestContext,
|
||||
@@ -3325,7 +3325,7 @@ impl Timeline {
|
||||
// (1) and (4)
|
||||
// TODO: this is basically a no-op now, should we remove it?
|
||||
self.remote_client.schedule_barrier()?;
|
||||
// Tenant::create_timeline will wait for these uploads to happen before returning, or
|
||||
// TenantShard::create_timeline will wait for these uploads to happen before returning, or
|
||||
// on retry.
|
||||
|
||||
// Now that we have the full layer map, we may calculate the visibility of layers within it (a global scan)
|
||||
@@ -4864,6 +4864,7 @@ impl Timeline {
|
||||
else {
|
||||
panic!("delta layer cannot be empty if no filter is applied");
|
||||
};
|
||||
|
||||
(
|
||||
// FIXME: even though we have a single image and single delta layer assumption
|
||||
// we push them to vec
|
||||
@@ -5754,7 +5755,7 @@ impl Timeline {
|
||||
/// from our ancestor to be branches of this timeline.
|
||||
pub(crate) async fn prepare_to_detach_from_ancestor(
|
||||
self: &Arc<Timeline>,
|
||||
tenant: &crate::tenant::Tenant,
|
||||
tenant: &crate::tenant::TenantShard,
|
||||
options: detach_ancestor::Options,
|
||||
behavior: DetachBehavior,
|
||||
ctx: &RequestContext,
|
||||
@@ -5773,7 +5774,7 @@ impl Timeline {
|
||||
/// resetting the tenant.
|
||||
pub(crate) async fn detach_from_ancestor_and_reparent(
|
||||
self: &Arc<Timeline>,
|
||||
tenant: &crate::tenant::Tenant,
|
||||
tenant: &crate::tenant::TenantShard,
|
||||
prepared: detach_ancestor::PreparedTimelineDetach,
|
||||
ancestor_timeline_id: TimelineId,
|
||||
ancestor_lsn: Lsn,
|
||||
@@ -5797,7 +5798,7 @@ impl Timeline {
|
||||
/// The tenant must've been reset if ancestry was modified previously (in tenant manager).
|
||||
pub(crate) async fn complete_detaching_timeline_ancestor(
|
||||
self: &Arc<Timeline>,
|
||||
tenant: &crate::tenant::Tenant,
|
||||
tenant: &crate::tenant::TenantShard,
|
||||
attempt: detach_ancestor::Attempt,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), detach_ancestor::Error> {
|
||||
@@ -6859,14 +6860,14 @@ impl Timeline {
|
||||
/// Persistently blocks gc for `Manual` reason.
|
||||
///
|
||||
/// Returns true if no such block existed before, false otherwise.
|
||||
pub(crate) async fn block_gc(&self, tenant: &super::Tenant) -> anyhow::Result<bool> {
|
||||
pub(crate) async fn block_gc(&self, tenant: &super::TenantShard) -> anyhow::Result<bool> {
|
||||
use crate::tenant::remote_timeline_client::index::GcBlockingReason;
|
||||
assert_eq!(self.tenant_shard_id, tenant.tenant_shard_id);
|
||||
tenant.gc_block.insert(self, GcBlockingReason::Manual).await
|
||||
}
|
||||
|
||||
/// Persistently unblocks gc for `Manual` reason.
|
||||
pub(crate) async fn unblock_gc(&self, tenant: &super::Tenant) -> anyhow::Result<()> {
|
||||
pub(crate) async fn unblock_gc(&self, tenant: &super::TenantShard) -> anyhow::Result<()> {
|
||||
use crate::tenant::remote_timeline_client::index::GcBlockingReason;
|
||||
assert_eq!(self.tenant_shard_id, tenant.tenant_shard_id);
|
||||
tenant.gc_block.remove(self, GcBlockingReason::Manual).await
|
||||
@@ -6884,8 +6885,8 @@ impl Timeline {
|
||||
|
||||
/// Force create an image layer and place it into the layer map.
|
||||
///
|
||||
/// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`]
|
||||
/// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are
|
||||
/// DO NOT use this function directly. Use [`TenantShard::branch_timeline_test_with_layers`]
|
||||
/// or [`TenantShard::create_test_timeline_with_layers`] to ensure all these layers are
|
||||
/// placed into the layer map in one run AND be validated.
|
||||
#[cfg(test)]
|
||||
pub(super) async fn force_create_image_layer(
|
||||
@@ -6932,17 +6933,15 @@ impl Timeline {
|
||||
}
|
||||
|
||||
// Update remote_timeline_client state to reflect existence of this layer
|
||||
self.remote_client
|
||||
.schedule_layer_file_upload(image_layer)
|
||||
.unwrap();
|
||||
self.remote_client.schedule_layer_file_upload(image_layer)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Force create a delta layer and place it into the layer map.
|
||||
///
|
||||
/// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`]
|
||||
/// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are
|
||||
/// DO NOT use this function directly. Use [`TenantShard::branch_timeline_test_with_layers`]
|
||||
/// or [`TenantShard::create_test_timeline_with_layers`] to ensure all these layers are
|
||||
/// placed into the layer map in one run AND be validated.
|
||||
#[cfg(test)]
|
||||
pub(super) async fn force_create_delta_layer(
|
||||
@@ -6995,9 +6994,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
// Update remote_timeline_client state to reflect existence of this layer
|
||||
self.remote_client
|
||||
.schedule_layer_file_upload(delta_layer)
|
||||
.unwrap();
|
||||
self.remote_client.schedule_layer_file_upload(delta_layer)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};
|
||||
use std::ops::{Deref, Range};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
|
||||
use super::layer_manager::LayerManager;
|
||||
use super::{
|
||||
@@ -77,7 +77,7 @@ const COMPACTION_DELTA_THRESHOLD: usize = 5;
|
||||
/// shard split, which gets expensive for large tenants.
|
||||
const ANCESTOR_COMPACTION_REWRITE_THRESHOLD: f64 = 0.3;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
|
||||
#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize)]
|
||||
pub struct GcCompactionJobId(pub usize);
|
||||
|
||||
impl std::fmt::Display for GcCompactionJobId {
|
||||
@@ -105,6 +105,43 @@ pub enum GcCompactionQueueItem {
|
||||
Notify(GcCompactionJobId, Option<Lsn>),
|
||||
}
|
||||
|
||||
/// Statistics for gc-compaction meta jobs, which contains several sub compaction jobs.
|
||||
#[derive(Debug, Clone, Serialize, Default)]
|
||||
pub struct GcCompactionMetaStatistics {
|
||||
/// The total number of sub compaction jobs.
|
||||
pub total_sub_compaction_jobs: usize,
|
||||
/// The total number of sub compaction jobs that failed.
|
||||
pub failed_sub_compaction_jobs: usize,
|
||||
/// The total number of sub compaction jobs that succeeded.
|
||||
pub succeeded_sub_compaction_jobs: usize,
|
||||
/// The layer size before compaction.
|
||||
pub before_compaction_layer_size: u64,
|
||||
/// The layer size after compaction.
|
||||
pub after_compaction_layer_size: u64,
|
||||
/// The start time of the meta job.
|
||||
pub start_time: Option<SystemTime>,
|
||||
/// The end time of the meta job.
|
||||
pub end_time: Option<SystemTime>,
|
||||
/// The duration of the meta job.
|
||||
pub duration_secs: f64,
|
||||
/// The id of the meta job.
|
||||
pub meta_job_id: GcCompactionJobId,
|
||||
/// The LSN below which the layers are compacted, used to compute the statistics.
|
||||
pub below_lsn: Lsn,
|
||||
}
|
||||
|
||||
impl GcCompactionMetaStatistics {
|
||||
fn finalize(&mut self) {
|
||||
let end_time = SystemTime::now();
|
||||
if let Some(start_time) = self.start_time {
|
||||
if let Ok(duration) = end_time.duration_since(start_time) {
|
||||
self.duration_secs = duration.as_secs_f64();
|
||||
}
|
||||
}
|
||||
self.end_time = Some(end_time);
|
||||
}
|
||||
}
|
||||
|
||||
impl GcCompactionQueueItem {
|
||||
pub fn into_compact_info_resp(
|
||||
self,
|
||||
@@ -142,6 +179,7 @@ struct GcCompactionQueueInner {
|
||||
queued: VecDeque<(GcCompactionJobId, GcCompactionQueueItem)>,
|
||||
guards: HashMap<GcCompactionJobId, GcCompactionGuardItems>,
|
||||
last_id: GcCompactionJobId,
|
||||
meta_statistics: Option<GcCompactionMetaStatistics>,
|
||||
}
|
||||
|
||||
impl GcCompactionQueueInner {
|
||||
@@ -173,6 +211,7 @@ impl GcCompactionQueue {
|
||||
queued: VecDeque::new(),
|
||||
guards: HashMap::new(),
|
||||
last_id: GcCompactionJobId(0),
|
||||
meta_statistics: None,
|
||||
}),
|
||||
consumer_lock: tokio::sync::Mutex::new(()),
|
||||
}
|
||||
@@ -357,6 +396,23 @@ impl GcCompactionQueue {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn collect_layer_below_lsn(
|
||||
&self,
|
||||
timeline: &Arc<Timeline>,
|
||||
lsn: Lsn,
|
||||
) -> Result<u64, CompactionError> {
|
||||
let guard = timeline.layers.read().await;
|
||||
let layer_map = guard.layer_map()?;
|
||||
let layers = layer_map.iter_historic_layers().collect_vec();
|
||||
let mut size = 0;
|
||||
for layer in layers {
|
||||
if layer.lsn_range.start <= lsn {
|
||||
size += layer.file_size();
|
||||
}
|
||||
}
|
||||
Ok(size)
|
||||
}
|
||||
|
||||
/// Notify the caller the job has finished and unblock GC.
|
||||
fn notify_and_unblock(&self, id: GcCompactionJobId) {
|
||||
info!("compaction job id={} finished", id);
|
||||
@@ -366,6 +422,16 @@ impl GcCompactionQueue {
|
||||
let _ = tx.send(());
|
||||
}
|
||||
}
|
||||
if let Some(ref meta_statistics) = guard.meta_statistics {
|
||||
if meta_statistics.meta_job_id == id {
|
||||
if let Ok(stats) = serde_json::to_string(&meta_statistics) {
|
||||
info!(
|
||||
"gc-compaction meta statistics for job id = {}: {}",
|
||||
id, stats
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn clear_running_job(&self) {
|
||||
@@ -405,7 +471,11 @@ impl GcCompactionQueue {
|
||||
let mut pending_tasks = Vec::new();
|
||||
// gc-compaction might pick more layers or fewer layers to compact. The L2 LSN does not need to be accurate.
|
||||
// And therefore, we simply assume the maximum LSN of all jobs is the expected L2 LSN.
|
||||
let expected_l2_lsn = jobs.iter().map(|job| job.compact_lsn_range.end).max();
|
||||
let expected_l2_lsn = jobs
|
||||
.iter()
|
||||
.map(|job| job.compact_lsn_range.end)
|
||||
.max()
|
||||
.unwrap();
|
||||
for job in jobs {
|
||||
// Unfortunately we need to convert the `GcCompactJob` back to `CompactionOptions`
|
||||
// until we do further refactors to allow directly call `compact_with_gc`.
|
||||
@@ -430,9 +500,13 @@ impl GcCompactionQueue {
|
||||
if !auto {
|
||||
pending_tasks.push(GcCompactionQueueItem::Notify(id, None));
|
||||
} else {
|
||||
pending_tasks.push(GcCompactionQueueItem::Notify(id, expected_l2_lsn));
|
||||
pending_tasks.push(GcCompactionQueueItem::Notify(id, Some(expected_l2_lsn)));
|
||||
}
|
||||
|
||||
let layer_size = self
|
||||
.collect_layer_below_lsn(timeline, expected_l2_lsn)
|
||||
.await?;
|
||||
|
||||
{
|
||||
let mut guard = self.inner.lock().unwrap();
|
||||
let mut tasks = Vec::new();
|
||||
@@ -444,7 +518,16 @@ impl GcCompactionQueue {
|
||||
for item in tasks {
|
||||
guard.queued.push_front(item);
|
||||
}
|
||||
guard.meta_statistics = Some(GcCompactionMetaStatistics {
|
||||
meta_job_id: id,
|
||||
start_time: Some(SystemTime::now()),
|
||||
before_compaction_layer_size: layer_size,
|
||||
below_lsn: expected_l2_lsn,
|
||||
total_sub_compaction_jobs: jobs_len,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
|
||||
info!(
|
||||
"scheduled enhanced gc bottom-most compaction with sub-compaction, split into {} jobs",
|
||||
jobs_len
|
||||
@@ -573,6 +656,10 @@ impl GcCompactionQueue {
|
||||
Err(err) => {
|
||||
warn!(%err, "failed to run gc-compaction subcompaction job");
|
||||
self.clear_running_job();
|
||||
let mut guard = self.inner.lock().unwrap();
|
||||
if let Some(ref mut meta_statistics) = guard.meta_statistics {
|
||||
meta_statistics.failed_sub_compaction_jobs += 1;
|
||||
}
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
@@ -582,8 +669,34 @@ impl GcCompactionQueue {
|
||||
// we need to clean things up before returning from the function.
|
||||
yield_for_l0 = true;
|
||||
}
|
||||
{
|
||||
let mut guard = self.inner.lock().unwrap();
|
||||
if let Some(ref mut meta_statistics) = guard.meta_statistics {
|
||||
meta_statistics.succeeded_sub_compaction_jobs += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
GcCompactionQueueItem::Notify(id, l2_lsn) => {
|
||||
let below_lsn = {
|
||||
let mut guard = self.inner.lock().unwrap();
|
||||
if let Some(ref mut meta_statistics) = guard.meta_statistics {
|
||||
meta_statistics.below_lsn
|
||||
} else {
|
||||
Lsn::INVALID
|
||||
}
|
||||
};
|
||||
let layer_size = if below_lsn != Lsn::INVALID {
|
||||
self.collect_layer_below_lsn(timeline, below_lsn).await?
|
||||
} else {
|
||||
0
|
||||
};
|
||||
{
|
||||
let mut guard = self.inner.lock().unwrap();
|
||||
if let Some(ref mut meta_statistics) = guard.meta_statistics {
|
||||
meta_statistics.after_compaction_layer_size = layer_size;
|
||||
meta_statistics.finalize();
|
||||
}
|
||||
}
|
||||
self.notify_and_unblock(id);
|
||||
if let Some(l2_lsn) = l2_lsn {
|
||||
let current_l2_lsn = timeline
|
||||
@@ -1178,6 +1291,7 @@ impl Timeline {
|
||||
.parts
|
||||
.extend(sparse_partitioning.into_dense().parts);
|
||||
|
||||
|
||||
// 3. Create new image layers for partitions that have been modified "enough".
|
||||
let (image_layers, outcome) = self
|
||||
.create_image_layers(
|
||||
|
||||
@@ -18,8 +18,8 @@ use crate::tenant::remote_timeline_client::{
|
||||
PersistIndexPartWithDeletedFlagError, RemoteTimelineClient,
|
||||
};
|
||||
use crate::tenant::{
|
||||
CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, Tenant, TenantManifestError,
|
||||
Timeline, TimelineOrOffloaded,
|
||||
CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, TenantManifestError,
|
||||
TenantShard, Timeline, TimelineOrOffloaded,
|
||||
};
|
||||
use crate::virtual_file::MaybeFatalIo;
|
||||
|
||||
@@ -113,7 +113,7 @@ pub(super) async fn delete_local_timeline_directory(
|
||||
/// It is important that this gets called when DeletionGuard is being held.
|
||||
/// For more context see comments in [`make_timeline_delete_guard`]
|
||||
async fn remove_maybe_offloaded_timeline_from_tenant(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
timeline: &TimelineOrOffloaded,
|
||||
_: &DeletionGuard, // using it as a witness
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -192,7 +192,7 @@ impl DeleteTimelineFlow {
|
||||
// error out if some of the shutdown tasks have already been completed!
|
||||
#[instrument(skip_all)]
|
||||
pub async fn run(
|
||||
tenant: &Arc<Tenant>,
|
||||
tenant: &Arc<TenantShard>,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<(), DeleteTimelineError> {
|
||||
super::debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
@@ -288,7 +288,7 @@ impl DeleteTimelineFlow {
|
||||
/// Shortcut to create Timeline in stopping state and spawn deletion task.
|
||||
#[instrument(skip_all, fields(%timeline_id))]
|
||||
pub(crate) async fn resume_deletion(
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
timeline_id: TimelineId,
|
||||
local_metadata: &TimelineMetadata,
|
||||
remote_client: RemoteTimelineClient,
|
||||
@@ -338,7 +338,7 @@ impl DeleteTimelineFlow {
|
||||
fn schedule_background(
|
||||
guard: DeletionGuard,
|
||||
conf: &'static PageServerConf,
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
timeline: TimelineOrOffloaded,
|
||||
remote_client: Arc<RemoteTimelineClient>,
|
||||
) {
|
||||
@@ -381,7 +381,7 @@ impl DeleteTimelineFlow {
|
||||
async fn background(
|
||||
mut guard: DeletionGuard,
|
||||
conf: &PageServerConf,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
timeline: &TimelineOrOffloaded,
|
||||
remote_client: Arc<RemoteTimelineClient>,
|
||||
) -> Result<(), DeleteTimelineError> {
|
||||
@@ -435,7 +435,7 @@ pub(super) enum TimelineDeleteGuardKind {
|
||||
}
|
||||
|
||||
pub(super) fn make_timeline_delete_guard(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
timeline_id: TimelineId,
|
||||
guard_kind: TimelineDeleteGuardKind,
|
||||
) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
|
||||
|
||||
@@ -23,7 +23,7 @@ use super::layer_manager::LayerManager;
|
||||
use super::{FlushLayerError, Timeline};
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::Tenant;
|
||||
use crate::tenant::TenantShard;
|
||||
use crate::tenant::remote_timeline_client::index::GcBlockingReason::DetachAncestor;
|
||||
use crate::tenant::storage_layer::layer::local_layer_path;
|
||||
use crate::tenant::storage_layer::{
|
||||
@@ -265,7 +265,7 @@ async fn generate_tombstone_image_layer(
|
||||
/// See [`Timeline::prepare_to_detach_from_ancestor`]
|
||||
pub(super) async fn prepare(
|
||||
detached: &Arc<Timeline>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
behavior: DetachBehavior,
|
||||
options: Options,
|
||||
ctx: &RequestContext,
|
||||
@@ -590,7 +590,7 @@ pub(super) async fn prepare(
|
||||
|
||||
async fn start_new_attempt(
|
||||
detached: &Timeline,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
ancestor_timeline_id: TimelineId,
|
||||
ancestor_lsn: Lsn,
|
||||
) -> Result<Attempt, Error> {
|
||||
@@ -611,7 +611,7 @@ async fn start_new_attempt(
|
||||
|
||||
async fn continue_with_blocked_gc(
|
||||
detached: &Timeline,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
ancestor_timeline_id: TimelineId,
|
||||
ancestor_lsn: Lsn,
|
||||
) -> Result<Attempt, Error> {
|
||||
@@ -622,7 +622,7 @@ async fn continue_with_blocked_gc(
|
||||
|
||||
fn obtain_exclusive_attempt(
|
||||
detached: &Timeline,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
ancestor_timeline_id: TimelineId,
|
||||
ancestor_lsn: Lsn,
|
||||
) -> Result<Attempt, Error> {
|
||||
@@ -655,7 +655,7 @@ fn obtain_exclusive_attempt(
|
||||
|
||||
fn reparented_direct_children(
|
||||
detached: &Arc<Timeline>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
) -> Result<HashSet<TimelineId>, Error> {
|
||||
let mut all_direct_children = tenant
|
||||
.timelines
|
||||
@@ -950,7 +950,7 @@ impl DetachingAndReparenting {
|
||||
/// See [`Timeline::detach_from_ancestor_and_reparent`].
|
||||
pub(super) async fn detach_and_reparent(
|
||||
detached: &Arc<Timeline>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
prepared: PreparedTimelineDetach,
|
||||
ancestor_timeline_id: TimelineId,
|
||||
ancestor_lsn: Lsn,
|
||||
@@ -1184,7 +1184,7 @@ pub(super) async fn detach_and_reparent(
|
||||
|
||||
pub(super) async fn complete(
|
||||
detached: &Arc<Timeline>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
mut attempt: Attempt,
|
||||
_ctx: &RequestContext,
|
||||
) -> Result<(), Error> {
|
||||
@@ -1258,7 +1258,7 @@ where
|
||||
}
|
||||
|
||||
fn check_no_archived_children_of_ancestor(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
detached: &Arc<Timeline>,
|
||||
ancestor: &Arc<Timeline>,
|
||||
ancestor_lsn: Lsn,
|
||||
|
||||
@@ -33,7 +33,7 @@ use crate::tenant::size::CalculateSyntheticSizeError;
|
||||
use crate::tenant::storage_layer::LayerVisibilityHint;
|
||||
use crate::tenant::tasks::{BackgroundLoopKind, BackgroundLoopSemaphorePermit, sleep_random};
|
||||
use crate::tenant::timeline::EvictionError;
|
||||
use crate::tenant::{LogicalSizeCalculationCause, Tenant};
|
||||
use crate::tenant::{LogicalSizeCalculationCause, TenantShard};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct EvictionTaskTimelineState {
|
||||
@@ -48,7 +48,7 @@ pub struct EvictionTaskTenantState {
|
||||
impl Timeline {
|
||||
pub(super) fn launch_eviction_task(
|
||||
self: &Arc<Self>,
|
||||
parent: Arc<Tenant>,
|
||||
parent: Arc<TenantShard>,
|
||||
background_tasks_can_start: Option<&completion::Barrier>,
|
||||
) {
|
||||
let self_clone = Arc::clone(self);
|
||||
@@ -75,7 +75,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
|
||||
async fn eviction_task(self: Arc<Self>, tenant: Arc<Tenant>) {
|
||||
async fn eviction_task(self: Arc<Self>, tenant: Arc<TenantShard>) {
|
||||
// acquire the gate guard only once within a useful span
|
||||
let Ok(guard) = self.gate.enter() else {
|
||||
return;
|
||||
@@ -118,7 +118,7 @@ impl Timeline {
|
||||
#[instrument(skip_all, fields(policy_kind = policy.discriminant_str()))]
|
||||
async fn eviction_iteration(
|
||||
self: &Arc<Self>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
policy: &EvictionPolicy,
|
||||
cancel: &CancellationToken,
|
||||
gate: &GateGuard,
|
||||
@@ -175,7 +175,7 @@ impl Timeline {
|
||||
|
||||
async fn eviction_iteration_threshold(
|
||||
self: &Arc<Self>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
p: &EvictionPolicyLayerAccessThreshold,
|
||||
cancel: &CancellationToken,
|
||||
gate: &GateGuard,
|
||||
@@ -309,7 +309,7 @@ impl Timeline {
|
||||
/// disk usage based eviction task.
|
||||
async fn imitiate_only(
|
||||
self: &Arc<Self>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
p: &EvictionPolicyLayerAccessThreshold,
|
||||
cancel: &CancellationToken,
|
||||
gate: &GateGuard,
|
||||
@@ -363,7 +363,7 @@ impl Timeline {
|
||||
#[instrument(skip_all)]
|
||||
async fn imitate_layer_accesses(
|
||||
&self,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
p: &EvictionPolicyLayerAccessThreshold,
|
||||
cancel: &CancellationToken,
|
||||
gate: &GateGuard,
|
||||
@@ -499,7 +499,7 @@ impl Timeline {
|
||||
#[instrument(skip_all)]
|
||||
async fn imitate_synthetic_size_calculation_worker(
|
||||
&self,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) {
|
||||
|
||||
@@ -244,7 +244,8 @@ impl RemoteStorageWrapper {
|
||||
kind: DownloadKind::Large,
|
||||
etag: None,
|
||||
byte_start: Bound::Included(start_inclusive),
|
||||
byte_end: Bound::Excluded(end_exclusive)
|
||||
byte_end: Bound::Excluded(end_exclusive),
|
||||
encryption_key: None,
|
||||
},
|
||||
&self.cancel)
|
||||
.await?;
|
||||
|
||||
@@ -8,7 +8,7 @@ use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::tenant::remote_timeline_client::ShutdownIfArchivedError;
|
||||
use crate::tenant::timeline::delete::{TimelineDeleteGuardKind, make_timeline_delete_guard};
|
||||
use crate::tenant::{
|
||||
DeleteTimelineError, OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded,
|
||||
DeleteTimelineError, OffloadedTimeline, TenantManifestError, TenantShard, TimelineOrOffloaded,
|
||||
};
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
@@ -33,7 +33,7 @@ impl From<TenantManifestError> for OffloadError {
|
||||
}
|
||||
|
||||
pub(crate) async fn offload_timeline(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
timeline: &Arc<Timeline>,
|
||||
) -> Result<(), OffloadError> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
@@ -123,7 +123,7 @@ pub(crate) async fn offload_timeline(
|
||||
///
|
||||
/// Returns the strong count of the timeline `Arc`
|
||||
fn remove_timeline_from_tenant(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
timeline: &Timeline,
|
||||
_: &DeletionGuard, // using it as a witness
|
||||
) -> usize {
|
||||
|
||||
@@ -15,17 +15,19 @@ use super::Timeline;
|
||||
use crate::context::RequestContext;
|
||||
use crate::import_datadir;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::tenant::{CreateTimelineError, CreateTimelineIdempotency, Tenant, TimelineOrOffloaded};
|
||||
use crate::tenant::{
|
||||
CreateTimelineError, CreateTimelineIdempotency, TenantShard, TimelineOrOffloaded,
|
||||
};
|
||||
|
||||
/// A timeline with some of its files on disk, being initialized.
|
||||
/// This struct ensures the atomicity of the timeline init: it's either properly created and inserted into pageserver's memory, or
|
||||
/// its local files are removed. If we crash while this class exists, then the timeline's local
|
||||
/// state is cleaned up during [`Tenant::clean_up_timelines`], because the timeline's content isn't in remote storage.
|
||||
/// state is cleaned up during [`TenantShard::clean_up_timelines`], because the timeline's content isn't in remote storage.
|
||||
///
|
||||
/// The caller is responsible for proper timeline data filling before the final init.
|
||||
#[must_use]
|
||||
pub struct UninitializedTimeline<'t> {
|
||||
pub(crate) owning_tenant: &'t Tenant,
|
||||
pub(crate) owning_tenant: &'t TenantShard,
|
||||
timeline_id: TimelineId,
|
||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
|
||||
/// Whether we spawned the inner Timeline's tasks such that we must later shut it down
|
||||
@@ -35,7 +37,7 @@ pub struct UninitializedTimeline<'t> {
|
||||
|
||||
impl<'t> UninitializedTimeline<'t> {
|
||||
pub(crate) fn new(
|
||||
owning_tenant: &'t Tenant,
|
||||
owning_tenant: &'t TenantShard,
|
||||
timeline_id: TimelineId,
|
||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
|
||||
) -> Self {
|
||||
@@ -156,7 +158,7 @@ impl<'t> UninitializedTimeline<'t> {
|
||||
/// Prepares timeline data by loading it from the basebackup archive.
|
||||
pub(crate) async fn import_basebackup_from_tar(
|
||||
mut self,
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
|
||||
base_lsn: Lsn,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
@@ -227,17 +229,17 @@ pub(crate) fn cleanup_timeline_directory(create_guard: TimelineCreateGuard) {
|
||||
error!("Failed to clean up uninitialized timeline directory {timeline_path:?}: {e:?}")
|
||||
}
|
||||
}
|
||||
// Having cleaned up, we can release this TimelineId in `[Tenant::timelines_creating]` to allow other
|
||||
// Having cleaned up, we can release this TimelineId in `[TenantShard::timelines_creating]` to allow other
|
||||
// timeline creation attempts under this TimelineId to proceed
|
||||
drop(create_guard);
|
||||
}
|
||||
|
||||
/// A guard for timeline creations in process: as long as this object exists, the timeline ID
|
||||
/// is kept in `[Tenant::timelines_creating]` to exclude concurrent attempts to create the same timeline.
|
||||
/// is kept in `[TenantShard::timelines_creating]` to exclude concurrent attempts to create the same timeline.
|
||||
#[must_use]
|
||||
pub(crate) struct TimelineCreateGuard {
|
||||
pub(crate) _tenant_gate_guard: GateGuard,
|
||||
pub(crate) owning_tenant: Arc<Tenant>,
|
||||
pub(crate) owning_tenant: Arc<TenantShard>,
|
||||
pub(crate) timeline_id: TimelineId,
|
||||
pub(crate) timeline_path: Utf8PathBuf,
|
||||
pub(crate) idempotency: CreateTimelineIdempotency,
|
||||
@@ -263,7 +265,7 @@ pub(crate) enum TimelineExclusionError {
|
||||
|
||||
impl TimelineCreateGuard {
|
||||
pub(crate) fn new(
|
||||
owning_tenant: &Arc<Tenant>,
|
||||
owning_tenant: &Arc<TenantShard>,
|
||||
timeline_id: TimelineId,
|
||||
timeline_path: Utf8PathBuf,
|
||||
idempotency: CreateTimelineIdempotency,
|
||||
|
||||
@@ -9,6 +9,7 @@ use tracing::info;
|
||||
use utils::generation::Generation;
|
||||
use utils::lsn::{AtomicLsn, Lsn};
|
||||
|
||||
use super::remote_timeline_client::index::EncryptionKeyPair;
|
||||
use super::remote_timeline_client::is_same_remote_layer_path;
|
||||
use super::storage_layer::{AsLayerDesc as _, LayerName, ResidentLayer};
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
@@ -245,7 +246,7 @@ impl UploadQueueInitialized {
|
||||
pub(crate) fn num_inprogress_layer_uploads(&self) -> usize {
|
||||
self.inprogress_tasks
|
||||
.iter()
|
||||
.filter(|(_, t)| matches!(t.op, UploadOp::UploadLayer(_, _, _)))
|
||||
.filter(|(_, t)| matches!(t.op, UploadOp::UploadLayer(_, _, _, _)))
|
||||
.count()
|
||||
}
|
||||
|
||||
@@ -461,7 +462,12 @@ pub struct Delete {
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum UploadOp {
|
||||
/// Upload a layer file. The last field indicates the last operation for thie file.
|
||||
UploadLayer(ResidentLayer, LayerFileMetadata, Option<OpType>),
|
||||
UploadLayer(
|
||||
ResidentLayer,
|
||||
LayerFileMetadata,
|
||||
Option<EncryptionKeyPair>,
|
||||
Option<OpType>,
|
||||
),
|
||||
|
||||
/// Upload a index_part.json file
|
||||
UploadMetadata {
|
||||
@@ -483,7 +489,7 @@ pub enum UploadOp {
|
||||
impl std::fmt::Display for UploadOp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
UploadOp::UploadLayer(layer, metadata, mode) => {
|
||||
UploadOp::UploadLayer(layer, metadata, _, mode) => {
|
||||
write!(
|
||||
f,
|
||||
"UploadLayer({}, size={:?}, gen={:?}, mode={:?})",
|
||||
@@ -517,13 +523,13 @@ impl UploadOp {
|
||||
(UploadOp::Shutdown, _) | (_, UploadOp::Shutdown) => false,
|
||||
|
||||
// Uploads and deletes can bypass each other unless they're for the same file.
|
||||
(UploadOp::UploadLayer(a, ameta, _), UploadOp::UploadLayer(b, bmeta, _)) => {
|
||||
(UploadOp::UploadLayer(a, ameta, _, _), UploadOp::UploadLayer(b, bmeta, _, _)) => {
|
||||
let aname = &a.layer_desc().layer_name();
|
||||
let bname = &b.layer_desc().layer_name();
|
||||
!is_same_remote_layer_path(aname, ameta, bname, bmeta)
|
||||
}
|
||||
(UploadOp::UploadLayer(u, umeta, _), UploadOp::Delete(d))
|
||||
| (UploadOp::Delete(d), UploadOp::UploadLayer(u, umeta, _)) => {
|
||||
(UploadOp::UploadLayer(u, umeta, _, _), UploadOp::Delete(d))
|
||||
| (UploadOp::Delete(d), UploadOp::UploadLayer(u, umeta, _, _)) => {
|
||||
d.layers.iter().all(|(dname, dmeta)| {
|
||||
!is_same_remote_layer_path(&u.layer_desc().layer_name(), umeta, dname, dmeta)
|
||||
})
|
||||
@@ -539,8 +545,8 @@ impl UploadOp {
|
||||
// Similarly, index uploads can bypass uploads and deletes as long as neither the
|
||||
// uploaded index nor the active index references the file (the latter would be
|
||||
// incorrect use by the caller).
|
||||
(UploadOp::UploadLayer(u, umeta, _), UploadOp::UploadMetadata { uploaded: i })
|
||||
| (UploadOp::UploadMetadata { uploaded: i }, UploadOp::UploadLayer(u, umeta, _)) => {
|
||||
(UploadOp::UploadLayer(u, umeta, _, _), UploadOp::UploadMetadata { uploaded: i })
|
||||
| (UploadOp::UploadMetadata { uploaded: i }, UploadOp::UploadLayer(u, umeta, _, _)) => {
|
||||
let uname = u.layer_desc().layer_name();
|
||||
!i.references(&uname, umeta) && !index.references(&uname, umeta)
|
||||
}
|
||||
@@ -577,7 +583,7 @@ mod tests {
|
||||
fn assert_same_op(a: &UploadOp, b: &UploadOp) {
|
||||
use UploadOp::*;
|
||||
match (a, b) {
|
||||
(UploadLayer(a, ameta, atype), UploadLayer(b, bmeta, btype)) => {
|
||||
(UploadLayer(a, ameta, _, atype), UploadLayer(b, bmeta, _, btype)) => {
|
||||
assert_eq!(a.layer_desc().layer_name(), b.layer_desc().layer_name());
|
||||
assert_eq!(ameta, bmeta);
|
||||
assert_eq!(atype, btype);
|
||||
@@ -641,6 +647,7 @@ mod tests {
|
||||
generation: timeline.generation,
|
||||
shard: timeline.get_shard_index(),
|
||||
file_size: size as u64,
|
||||
encryption_key: None,
|
||||
};
|
||||
make_layer_with_metadata(timeline, name, metadata)
|
||||
}
|
||||
@@ -710,7 +717,7 @@ mod tests {
|
||||
|
||||
// Enqueue non-conflicting upload, delete, and index before and after a barrier.
|
||||
let ops = [
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None, None),
|
||||
UploadOp::Delete(Delete {
|
||||
layers: vec![(layer1.layer_desc().layer_name(), layer1.metadata())],
|
||||
}),
|
||||
@@ -718,7 +725,7 @@ mod tests {
|
||||
uploaded: index.clone(),
|
||||
},
|
||||
UploadOp::Barrier(barrier),
|
||||
UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None),
|
||||
UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None, None),
|
||||
UploadOp::Delete(Delete {
|
||||
layers: vec![(layer3.layer_desc().layer_name(), layer3.metadata())],
|
||||
}),
|
||||
@@ -843,9 +850,9 @@ mod tests {
|
||||
);
|
||||
|
||||
let ops = [
|
||||
UploadOp::UploadLayer(layer0a.clone(), layer0a.metadata(), None),
|
||||
UploadOp::UploadLayer(layer0b.clone(), layer0b.metadata(), None),
|
||||
UploadOp::UploadLayer(layer0c.clone(), layer0c.metadata(), None),
|
||||
UploadOp::UploadLayer(layer0a.clone(), layer0a.metadata(), None, None),
|
||||
UploadOp::UploadLayer(layer0b.clone(), layer0b.metadata(), None, None),
|
||||
UploadOp::UploadLayer(layer0c.clone(), layer0c.metadata(), None, None),
|
||||
];
|
||||
|
||||
queue.queued_operations.extend(ops.clone());
|
||||
@@ -882,14 +889,14 @@ mod tests {
|
||||
);
|
||||
|
||||
let ops = [
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None, None),
|
||||
UploadOp::Delete(Delete {
|
||||
layers: vec![
|
||||
(layer0.layer_desc().layer_name(), layer0.metadata()),
|
||||
(layer1.layer_desc().layer_name(), layer1.metadata()),
|
||||
],
|
||||
}),
|
||||
UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None),
|
||||
UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None, None),
|
||||
];
|
||||
|
||||
queue.queued_operations.extend(ops.clone());
|
||||
@@ -938,15 +945,15 @@ mod tests {
|
||||
);
|
||||
|
||||
let ops = [
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None, None),
|
||||
UploadOp::Delete(Delete {
|
||||
layers: vec![
|
||||
(layer0.layer_desc().layer_name(), layer0.metadata()),
|
||||
(layer1.layer_desc().layer_name(), layer1.metadata()),
|
||||
],
|
||||
}),
|
||||
UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None),
|
||||
UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None),
|
||||
UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None, None),
|
||||
UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None, None),
|
||||
UploadOp::Delete(Delete {
|
||||
layers: vec![(layer3.layer_desc().layer_name(), layer3.metadata())],
|
||||
}),
|
||||
@@ -984,9 +991,9 @@ mod tests {
|
||||
);
|
||||
|
||||
let ops = [
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),
|
||||
UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None),
|
||||
UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None),
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None, None),
|
||||
UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None, None),
|
||||
UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None, None),
|
||||
];
|
||||
|
||||
queue.queued_operations.extend(ops.clone());
|
||||
@@ -1061,15 +1068,15 @@ mod tests {
|
||||
let index2 = index_with(&index1, &layer2);
|
||||
|
||||
let ops = [
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None, None),
|
||||
UploadOp::UploadMetadata {
|
||||
uploaded: index0.clone(),
|
||||
},
|
||||
UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None),
|
||||
UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None, None),
|
||||
UploadOp::UploadMetadata {
|
||||
uploaded: index1.clone(),
|
||||
},
|
||||
UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None),
|
||||
UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None, None),
|
||||
UploadOp::UploadMetadata {
|
||||
uploaded: index2.clone(),
|
||||
},
|
||||
@@ -1128,7 +1135,7 @@ mod tests {
|
||||
|
||||
let ops = [
|
||||
// Initial upload, with a barrier to prevent index coalescing.
|
||||
UploadOp::UploadLayer(layer.clone(), layer.metadata(), None),
|
||||
UploadOp::UploadLayer(layer.clone(), layer.metadata(), None, None),
|
||||
UploadOp::UploadMetadata {
|
||||
uploaded: index_upload.clone(),
|
||||
},
|
||||
@@ -1177,7 +1184,7 @@ mod tests {
|
||||
|
||||
let ops = [
|
||||
// Initial upload, with a barrier to prevent index coalescing.
|
||||
UploadOp::UploadLayer(layer.clone(), layer.metadata(), None),
|
||||
UploadOp::UploadLayer(layer.clone(), layer.metadata(), None, None),
|
||||
UploadOp::UploadMetadata {
|
||||
uploaded: index_upload.clone(),
|
||||
},
|
||||
@@ -1187,7 +1194,7 @@ mod tests {
|
||||
uploaded: index_deref.clone(),
|
||||
},
|
||||
// Replace and reference the layer.
|
||||
UploadOp::UploadLayer(layer.clone(), layer.metadata(), None),
|
||||
UploadOp::UploadLayer(layer.clone(), layer.metadata(), None, None),
|
||||
UploadOp::UploadMetadata {
|
||||
uploaded: index_ref.clone(),
|
||||
},
|
||||
@@ -1235,7 +1242,7 @@ mod tests {
|
||||
|
||||
// Enqueue non-conflicting upload, delete, and index before and after a shutdown.
|
||||
let ops = [
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None, None),
|
||||
UploadOp::Delete(Delete {
|
||||
layers: vec![(layer1.layer_desc().layer_name(), layer1.metadata())],
|
||||
}),
|
||||
@@ -1243,7 +1250,7 @@ mod tests {
|
||||
uploaded: index.clone(),
|
||||
},
|
||||
UploadOp::Shutdown,
|
||||
UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None),
|
||||
UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None, None),
|
||||
UploadOp::Delete(Delete {
|
||||
layers: vec![(layer3.layer_desc().layer_name(), layer3.metadata())],
|
||||
}),
|
||||
@@ -1305,10 +1312,10 @@ mod tests {
|
||||
);
|
||||
|
||||
let ops = [
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),
|
||||
UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None),
|
||||
UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None),
|
||||
UploadOp::UploadLayer(layer3.clone(), layer3.metadata(), None),
|
||||
UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None, None),
|
||||
UploadOp::UploadLayer(layer1.clone(), layer1.metadata(), None, None),
|
||||
UploadOp::UploadLayer(layer2.clone(), layer2.metadata(), None, None),
|
||||
UploadOp::UploadLayer(layer3.clone(), layer3.metadata(), None, None),
|
||||
];
|
||||
|
||||
queue.queued_operations.extend(ops.clone());
|
||||
@@ -1359,7 +1366,7 @@ mod tests {
|
||||
.layer_metadata
|
||||
.insert(layer.layer_desc().layer_name(), layer.metadata());
|
||||
vec![
|
||||
UploadOp::UploadLayer(layer.clone(), layer.metadata(), None),
|
||||
UploadOp::UploadLayer(layer.clone(), layer.metadata(), None, None),
|
||||
UploadOp::Delete(Delete {
|
||||
layers: vec![(layer.layer_desc().layer_name(), layer.metadata())],
|
||||
}),
|
||||
@@ -1378,6 +1385,7 @@ mod tests {
|
||||
shard,
|
||||
generation: Generation::Valid(generation),
|
||||
file_size: 0,
|
||||
encryption_key: None,
|
||||
};
|
||||
make_layer_with_metadata(&tli, name, metadata)
|
||||
};
|
||||
|
||||
@@ -1366,7 +1366,8 @@ pub(crate) type IoBuffer = AlignedBuffer<ConstAlign<{ get_io_buffer_alignment()
|
||||
pub(crate) type IoPageSlice<'a> =
|
||||
AlignedSlice<'a, PAGE_SZ, ConstAlign<{ get_io_buffer_alignment() }>>;
|
||||
|
||||
static IO_MODE: AtomicU8 = AtomicU8::new(IoMode::preferred() as u8);
|
||||
static IO_MODE: once_cell::sync::Lazy<AtomicU8> =
|
||||
once_cell::sync::Lazy::new(|| AtomicU8::new(IoMode::preferred() as u8));
|
||||
|
||||
pub(crate) fn set_io_mode(mode: IoMode) {
|
||||
IO_MODE.store(mode as u8, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
@@ -776,7 +776,6 @@ impl From<&jose_jwk::Key> for KeyType {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use std::future::IntoFuture;
|
||||
use std::net::SocketAddr;
|
||||
|
||||
@@ -253,7 +253,6 @@ fn project_name_valid(name: &str) -> bool {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use ComputeUserInfoParseError::*;
|
||||
use serde_json::json;
|
||||
|
||||
@@ -258,7 +258,7 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
"unexpected startup packet, rejecting connection"
|
||||
);
|
||||
stream
|
||||
.throw_error_str(ERR_INSECURE_CONNECTION, crate::error::ErrorKind::User)
|
||||
.throw_error_str(ERR_INSECURE_CONNECTION, crate::error::ErrorKind::User, None)
|
||||
.await?
|
||||
}
|
||||
}
|
||||
|
||||
1
proxy/src/cache/endpoints.rs
vendored
1
proxy/src/cache/endpoints.rs
vendored
@@ -259,7 +259,6 @@ impl EndpointsCache {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
|
||||
1
proxy/src/cache/project_info.rs
vendored
1
proxy/src/cache/project_info.rs
vendored
@@ -585,7 +585,6 @@ impl Cache for ProjectInfoCacheImpl {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::scram::ServerSecret;
|
||||
|
||||
@@ -222,7 +222,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
{
|
||||
Ok(auth_result) => auth_result,
|
||||
Err(e) => {
|
||||
return stream.throw_error(e).await?;
|
||||
return stream.throw_error(e, Some(ctx)).await?;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -238,7 +238,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
config.wake_compute_retry_config,
|
||||
&config.connect_to_compute,
|
||||
)
|
||||
.or_else(|e| stream.throw_error(e))
|
||||
.or_else(|e| stream.throw_error(e, Some(ctx)))
|
||||
.await?;
|
||||
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
|
||||
@@ -63,7 +63,7 @@ struct RequestContextInner {
|
||||
success: bool,
|
||||
pub(crate) cold_start_info: ColdStartInfo,
|
||||
pg_options: Option<StartupMessageParams>,
|
||||
testodrome_query_id: Option<String>,
|
||||
testodrome_query_id: Option<SmolStr>,
|
||||
|
||||
// extra
|
||||
// This sender is here to keep the request monitoring channel open while requests are taking place.
|
||||
@@ -219,7 +219,7 @@ impl RequestContext {
|
||||
for option in options_str.split_whitespace() {
|
||||
if option.starts_with("neon_query_id:") {
|
||||
if let Some(value) = option.strip_prefix("neon_query_id:") {
|
||||
this.set_testodrome_id(value.to_string());
|
||||
this.set_testodrome_id(value.into());
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -272,7 +272,7 @@ impl RequestContext {
|
||||
.set_user_agent(user_agent);
|
||||
}
|
||||
|
||||
pub(crate) fn set_testodrome_id(&self, query_id: String) {
|
||||
pub(crate) fn set_testodrome_id(&self, query_id: SmolStr) {
|
||||
self.0
|
||||
.try_lock()
|
||||
.expect("should not deadlock")
|
||||
@@ -378,7 +378,7 @@ impl RequestContext {
|
||||
.accumulated()
|
||||
}
|
||||
|
||||
pub(crate) fn get_testodrome_id(&self) -> Option<String> {
|
||||
pub(crate) fn get_testodrome_id(&self) -> Option<SmolStr> {
|
||||
self.0
|
||||
.try_lock()
|
||||
.expect("should not deadlock")
|
||||
@@ -447,7 +447,7 @@ impl RequestContextInner {
|
||||
self.user = Some(user);
|
||||
}
|
||||
|
||||
fn set_testodrome_id(&mut self, query_id: String) {
|
||||
fn set_testodrome_id(&mut self, query_id: SmolStr) {
|
||||
self.testodrome_query_id = Some(query_id);
|
||||
}
|
||||
|
||||
|
||||
@@ -416,7 +416,6 @@ async fn upload_parquet(
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use std::net::Ipv4Addr;
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
@@ -227,7 +227,6 @@ impl From<AccountId> for AccountIdInt {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use std::sync::OnceLock;
|
||||
|
||||
|
||||
@@ -1032,7 +1032,6 @@ impl<const F: usize> serde::ser::Serialize for ExtractedSpanFields<'_, F> {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::{Arc, Mutex, MutexGuard};
|
||||
|
||||
@@ -400,7 +400,6 @@ impl NetworkEndianIpv6 {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use tokio::io::AsyncReadExt;
|
||||
|
||||
|
||||
@@ -262,7 +262,6 @@ impl CopyBuffer {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use tokio::io::AsyncWriteExt;
|
||||
|
||||
|
||||
@@ -196,7 +196,11 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
// OR we didn't provide it at all (for dev purposes).
|
||||
if tls.is_some() {
|
||||
return stream
|
||||
.throw_error_str(ERR_INSECURE_CONNECTION, crate::error::ErrorKind::User)
|
||||
.throw_error_str(
|
||||
ERR_INSECURE_CONNECTION,
|
||||
crate::error::ErrorKind::User,
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
|
||||
@@ -329,7 +329,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
|
||||
let user_info = match result {
|
||||
Ok(user_info) => user_info,
|
||||
Err(e) => stream.throw_error(e).await?,
|
||||
Err(e) => stream.throw_error(e, Some(ctx)).await?,
|
||||
};
|
||||
|
||||
let user = user_info.get_user().to_owned();
|
||||
@@ -349,7 +349,10 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
let app = params.get("application_name");
|
||||
let params_span = tracing::info_span!("", ?user, ?db, ?app);
|
||||
|
||||
return stream.throw_error(e).instrument(params_span).await?;
|
||||
return stream
|
||||
.throw_error(e, Some(ctx))
|
||||
.instrument(params_span)
|
||||
.await?;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -374,7 +377,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
config.wake_compute_retry_config,
|
||||
&config.connect_to_compute,
|
||||
)
|
||||
.or_else(|e| stream.throw_error(e))
|
||||
.or_else(|e| stream.throw_error(e, Some(ctx)))
|
||||
.await?;
|
||||
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//! A group of high-level tests for connection establishing logic and auth.
|
||||
#![allow(clippy::unimplemented, clippy::unwrap_used)]
|
||||
#![allow(clippy::unimplemented)]
|
||||
|
||||
mod mitm;
|
||||
|
||||
|
||||
@@ -83,7 +83,7 @@ impl From<LeakyBucketConfig> for utils::leaky_bucket::LeakyBucketConfig {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::float_cmp, clippy::unwrap_used)]
|
||||
#[allow(clippy::float_cmp)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
|
||||
@@ -63,7 +63,6 @@ impl LimitAlgorithm for Aimd {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
|
||||
@@ -259,7 +259,6 @@ impl<K: Hash + Eq, R: Rng, S: BuildHasher + Clone> BucketRateLimiter<K, R, S> {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use std::hash::BuildHasherDefault;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -51,7 +51,6 @@ impl<'a> ServerMessage<&'a str> {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
|
||||
@@ -185,7 +185,6 @@ impl fmt::Debug for OwnedServerFirstMessage {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
|
||||
@@ -57,7 +57,6 @@ fn sha256<'a>(parts: impl IntoIterator<Item = &'a [u8]>) -> [u8; 32] {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use super::threadpool::ThreadPool;
|
||||
use super::{Exchange, ServerSecret};
|
||||
|
||||
@@ -72,7 +72,6 @@ impl ServerSecret {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
|
||||
@@ -561,8 +561,10 @@ impl ConnectMechanism for TokioMechanism {
|
||||
.dbname(&self.conn_info.dbname)
|
||||
.connect_timeout(compute_config.timeout);
|
||||
|
||||
let mk_tls =
|
||||
crate::tls::postgres_rustls::MakeRustlsConnect::new(compute_config.tls.clone());
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
||||
let res = config.connect(postgres_client::NoTls).await;
|
||||
let res = config.connect(mk_tls).await;
|
||||
drop(pause);
|
||||
let (client, connection) = permit.release_result(res)?;
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::task::{Poll, ready};
|
||||
use futures::Future;
|
||||
use futures::future::poll_fn;
|
||||
use postgres_client::AsyncMessage;
|
||||
use postgres_client::tls::NoTlsStream;
|
||||
use postgres_client::tls::MakeTlsConnect;
|
||||
use smallvec::SmallVec;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::time::Instant;
|
||||
@@ -26,6 +26,9 @@ use super::conn_pool_lib::{
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::messages::MetricsAuxInfo;
|
||||
use crate::metrics::Metrics;
|
||||
use crate::tls::postgres_rustls::MakeRustlsConnect;
|
||||
|
||||
type TlsStream = <MakeRustlsConnect as MakeTlsConnect<TcpStream>>::Stream;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct ConnInfoWithAuth {
|
||||
@@ -58,7 +61,7 @@ pub(crate) fn poll_client<C: ClientInnerExt>(
|
||||
ctx: &RequestContext,
|
||||
conn_info: ConnInfo,
|
||||
client: C,
|
||||
mut connection: postgres_client::Connection<TcpStream, NoTlsStream>,
|
||||
mut connection: postgres_client::Connection<TcpStream, TlsStream>,
|
||||
conn_id: uuid::Uuid,
|
||||
aux: MetricsAuxInfo,
|
||||
) -> Client<C> {
|
||||
@@ -186,7 +189,6 @@ impl ClientDataRemote {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use std::sync::atomic::AtomicBool;
|
||||
|
||||
|
||||
@@ -256,7 +256,6 @@ fn pg_array_parse_inner(
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use serde_json::json;
|
||||
|
||||
|
||||
@@ -367,7 +367,6 @@ fn sign_jwt(sk: &SigningKey, payload: &[u8]) -> String {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use ed25519_dalek::SigningKey;
|
||||
use typed_json::json;
|
||||
|
||||
@@ -434,17 +434,6 @@ async fn request_handler(
|
||||
.map(Into::into),
|
||||
);
|
||||
|
||||
let testodrome_id = request
|
||||
.headers()
|
||||
.get("X-Neon-Query-ID")
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
if let Some(query_id) = testodrome_id {
|
||||
info!(parent: &ctx.span(), "testodrome query ID: {query_id}");
|
||||
ctx.set_testodrome_id(query_id);
|
||||
}
|
||||
|
||||
let span = ctx.span();
|
||||
info!(parent: &span, "performing websocket upgrade");
|
||||
|
||||
@@ -491,7 +480,7 @@ async fn request_handler(
|
||||
|
||||
if let Some(query_id) = testodrome_id {
|
||||
info!(parent: &ctx.span(), "testodrome query ID: {query_id}");
|
||||
ctx.set_testodrome_id(query_id);
|
||||
ctx.set_testodrome_id(query_id.into());
|
||||
}
|
||||
|
||||
sql_over_http::handle(config, ctx, request, backend, http_cancellation_token)
|
||||
|
||||
@@ -1209,7 +1209,6 @@ impl Discard<'_> {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
|
||||
@@ -157,7 +157,6 @@ pub(crate) async fn serve_websocket(
|
||||
|
||||
match res {
|
||||
Err(e) => {
|
||||
// todo: log and push to ctx the error kind
|
||||
ctx.set_error_kind(e.get_error_kind());
|
||||
Err(e.into())
|
||||
}
|
||||
@@ -178,7 +177,6 @@ pub(crate) async fn serve_websocket(
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use std::pin::pin;
|
||||
|
||||
|
||||
@@ -6,11 +6,13 @@ use bytes::BytesMut;
|
||||
use pq_proto::framed::{ConnectionError, Framed};
|
||||
use pq_proto::{BeMessage, FeMessage, FeStartupPacket, ProtocolError};
|
||||
use rustls::ServerConfig;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
|
||||
use tokio_rustls::server::TlsStream;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::control_plane::messages::ColdStartInfo;
|
||||
use crate::error::{ErrorKind, ReportableError, UserFacingError};
|
||||
use crate::metrics::Metrics;
|
||||
use crate::tls::TlsServerEndPoint;
|
||||
@@ -100,6 +102,44 @@ impl ReportableError for ReportedError {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
enum ErrorTag {
|
||||
#[serde(rename = "proxy")]
|
||||
Proxy,
|
||||
#[serde(rename = "compute")]
|
||||
Compute,
|
||||
#[serde(rename = "client")]
|
||||
Client,
|
||||
#[serde(rename = "controlplane")]
|
||||
ControlPlane,
|
||||
#[serde(rename = "other")]
|
||||
Other,
|
||||
}
|
||||
|
||||
impl From<ErrorKind> for ErrorTag {
|
||||
fn from(error_kind: ErrorKind) -> Self {
|
||||
match error_kind {
|
||||
ErrorKind::User => Self::Client,
|
||||
ErrorKind::ClientDisconnect => Self::Client,
|
||||
ErrorKind::RateLimit => Self::Proxy,
|
||||
ErrorKind::ServiceRateLimit => Self::Proxy, // considering rate limit as proxy error for SLI
|
||||
ErrorKind::Quota => Self::Proxy,
|
||||
ErrorKind::Service => Self::Proxy,
|
||||
ErrorKind::ControlPlane => Self::ControlPlane,
|
||||
ErrorKind::Postgres => Self::Other,
|
||||
ErrorKind::Compute => Self::Compute,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
struct ProbeErrorData {
|
||||
tag: ErrorTag,
|
||||
msg: String,
|
||||
cold_start_info: Option<ColdStartInfo>,
|
||||
}
|
||||
|
||||
impl<S: AsyncWrite + Unpin> PqStream<S> {
|
||||
/// Write the message into an internal buffer, but don't flush the underlying stream.
|
||||
pub(crate) fn write_message_noflush(
|
||||
@@ -125,26 +165,54 @@ impl<S: AsyncWrite + Unpin> PqStream<S> {
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Write the error message using [`Self::write_message`], then re-throw it.
|
||||
/// Writes message with the given error kind to the stream.
|
||||
/// Used only for probe queries
|
||||
async fn write_format_message(
|
||||
&mut self,
|
||||
msg: &str,
|
||||
error_kind: ErrorKind,
|
||||
ctx: Option<&crate::context::RequestContext>,
|
||||
) -> String {
|
||||
let formatted_msg = match ctx {
|
||||
Some(ctx) if ctx.get_testodrome_id().is_some() => {
|
||||
serde_json::to_string(&ProbeErrorData {
|
||||
tag: ErrorTag::from(error_kind),
|
||||
msg: msg.to_string(),
|
||||
cold_start_info: Some(ctx.cold_start_info()),
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
_ => msg.to_string(),
|
||||
};
|
||||
|
||||
// already error case, ignore client IO error
|
||||
self.write_message(&BeMessage::ErrorResponse(&formatted_msg, None))
|
||||
.await
|
||||
.inspect_err(|e| debug!("write_message failed: {e}"))
|
||||
.ok();
|
||||
|
||||
formatted_msg
|
||||
}
|
||||
|
||||
/// Write the error message using [`Self::write_format_message`], then re-throw it.
|
||||
/// Allowing string literals is safe under the assumption they might not contain any runtime info.
|
||||
/// This method exists due to `&str` not implementing `Into<anyhow::Error>`.
|
||||
/// If `ctx` is provided and has testodrome_id set, error messages will be prefixed according to error kind.
|
||||
pub async fn throw_error_str<T>(
|
||||
&mut self,
|
||||
msg: &'static str,
|
||||
error_kind: ErrorKind,
|
||||
ctx: Option<&crate::context::RequestContext>,
|
||||
) -> Result<T, ReportedError> {
|
||||
// TODO: only log this for actually interesting errors
|
||||
tracing::info!(
|
||||
kind = error_kind.to_metric_label(),
|
||||
msg,
|
||||
"forwarding error to user"
|
||||
);
|
||||
self.write_format_message(msg, error_kind, ctx).await;
|
||||
|
||||
// already error case, ignore client IO error
|
||||
self.write_message(&BeMessage::ErrorResponse(msg, None))
|
||||
.await
|
||||
.inspect_err(|e| debug!("write_message failed: {e}"))
|
||||
.ok();
|
||||
if error_kind != ErrorKind::RateLimit && error_kind != ErrorKind::User {
|
||||
tracing::info!(
|
||||
kind = error_kind.to_metric_label(),
|
||||
msg,
|
||||
"forwarding error to user"
|
||||
);
|
||||
}
|
||||
|
||||
Err(ReportedError {
|
||||
source: anyhow::anyhow!(msg),
|
||||
@@ -152,26 +220,28 @@ impl<S: AsyncWrite + Unpin> PqStream<S> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Write the error message using [`Self::write_message`], then re-throw it.
|
||||
/// Write the error message using [`Self::write_format_message`], then re-throw it.
|
||||
/// Trait [`UserFacingError`] acts as an allowlist for error types.
|
||||
pub(crate) async fn throw_error<T, E>(&mut self, error: E) -> Result<T, ReportedError>
|
||||
/// If `ctx` is provided and has testodrome_id set, error messages will be prefixed according to error kind.
|
||||
pub(crate) async fn throw_error<T, E>(
|
||||
&mut self,
|
||||
error: E,
|
||||
ctx: Option<&crate::context::RequestContext>,
|
||||
) -> Result<T, ReportedError>
|
||||
where
|
||||
E: UserFacingError + Into<anyhow::Error>,
|
||||
{
|
||||
let error_kind = error.get_error_kind();
|
||||
let msg = error.to_string_client();
|
||||
tracing::info!(
|
||||
kind=error_kind.to_metric_label(),
|
||||
error=%error,
|
||||
msg,
|
||||
"forwarding error to user"
|
||||
);
|
||||
|
||||
// already error case, ignore client IO error
|
||||
self.write_message(&BeMessage::ErrorResponse(&msg, None))
|
||||
.await
|
||||
.inspect_err(|e| debug!("write_message failed: {e}"))
|
||||
.ok();
|
||||
self.write_format_message(&msg, error_kind, ctx).await;
|
||||
if error_kind != ErrorKind::RateLimit && error_kind != ErrorKind::User {
|
||||
tracing::info!(
|
||||
kind=error_kind.to_metric_label(),
|
||||
error=%error,
|
||||
msg,
|
||||
"forwarding error to user",
|
||||
);
|
||||
}
|
||||
|
||||
Err(ReportedError {
|
||||
source: anyhow::anyhow!(error),
|
||||
|
||||
@@ -50,7 +50,6 @@ impl std::fmt::Display for ApiUrl {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
|
||||
@@ -497,7 +497,6 @@ async fn upload_backup_events(
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use std::fs;
|
||||
use std::io::BufReader;
|
||||
|
||||
@@ -72,7 +72,7 @@ Inside that dir, a `bin/postgres` binary should be present.
|
||||
`COMPATIBILITY_POSTGRES_DISTRIB_DIR`: The directory where the prevoius version of postgres distribution can be found.
|
||||
`DEFAULT_PG_VERSION`: The version of Postgres to use,
|
||||
This is used to construct full path to the postgres binaries.
|
||||
Format is 2-digit major version nubmer, i.e. `DEFAULT_PG_VERSION=16`
|
||||
Format is 2-digit major version nubmer, i.e. `DEFAULT_PG_VERSION=17`
|
||||
`TEST_OUTPUT`: Set the directory where test state and test output files
|
||||
should go.
|
||||
`RUST_LOG`: logging configuration to pass into Neon CLI
|
||||
|
||||
@@ -7,7 +7,7 @@ easier to see if you have compile errors without scrolling up.
|
||||
You may also need to run `./scripts/pysync`.
|
||||
|
||||
Then run the tests
|
||||
`DEFAULT_PG_VERSION=16 NEON_BIN=./target/release poetry run pytest test_runner/performance`
|
||||
`DEFAULT_PG_VERSION=17 NEON_BIN=./target/release poetry run pytest test_runner/performance`
|
||||
|
||||
Some handy pytest flags for local development:
|
||||
- `-x` tells pytest to stop on first error
|
||||
|
||||
@@ -11,6 +11,6 @@ It supports mounting snapshots using overlayfs, which improves iteration time.
|
||||
Here's a full command line.
|
||||
|
||||
```
|
||||
RUST_BACKTRACE=1 NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS=1 DEFAULT_PG_VERSION=16 BUILD_TYPE=release \
|
||||
RUST_BACKTRACE=1 NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS=1 DEFAULT_PG_VERSION=17 BUILD_TYPE=release \
|
||||
./scripts/pytest test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
|
||||
````
|
||||
|
||||
@@ -16,7 +16,7 @@ from performance.pageserver.util import ensure_pageserver_ready_for_benchmarking
|
||||
|
||||
"""
|
||||
Usage:
|
||||
DEFAULT_PG_VERSION=16 BUILD_TYPE=debug NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS=1 INTERACTIVE=true \
|
||||
DEFAULT_PG_VERSION=17 BUILD_TYPE=debug NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS=1 INTERACTIVE=true \
|
||||
./scripts/pytest --timeout 0 test_runner/performance/pageserver/interactive/test_many_small_tenants.py
|
||||
"""
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@ from __future__ import annotations
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -68,9 +67,7 @@ def test_compute_startup_simple(
|
||||
endpoint.safe_psql("select 1;")
|
||||
|
||||
# Get metrics
|
||||
metrics = requests.get(
|
||||
f"http://localhost:{endpoint.external_http_port}/metrics.json"
|
||||
).json()
|
||||
metrics = endpoint.http_client().metrics_json()
|
||||
durations = {
|
||||
"wait_for_spec_ms": f"{i}_wait_for_spec",
|
||||
"sync_safekeepers_ms": f"{i}_sync_safekeepers",
|
||||
@@ -155,9 +152,7 @@ def test_compute_ondemand_slru_startup(
|
||||
assert sum == 1000000
|
||||
|
||||
# Get metrics
|
||||
metrics = requests.get(
|
||||
f"http://localhost:{endpoint.external_http_port}/metrics.json"
|
||||
).json()
|
||||
metrics = endpoint.http_client().metrics_json()
|
||||
durations = {
|
||||
"wait_for_spec_ms": f"{slru}_{i}_wait_for_spec",
|
||||
"sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers",
|
||||
|
||||
@@ -72,6 +72,7 @@ PREEMPT_GC_COMPACTION_TENANT_CONF = {
|
||||
"wal_receiver_protocol",
|
||||
[PageserverWalReceiverProtocol.VANILLA, PageserverWalReceiverProtocol.INTERPRETED],
|
||||
)
|
||||
@pytest.mark.timeout(900)
|
||||
def test_pageserver_compaction_smoke(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
wal_receiver_protocol: PageserverWalReceiverProtocol,
|
||||
@@ -190,6 +191,7 @@ def test_pageserver_compaction_preempt(
|
||||
|
||||
|
||||
@skip_in_debug_build("only run with release build")
|
||||
@pytest.mark.timeout(600)
|
||||
def test_pageserver_gc_compaction_preempt(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
):
|
||||
@@ -227,7 +229,7 @@ def test_pageserver_gc_compaction_preempt(
|
||||
|
||||
|
||||
@skip_in_debug_build("only run with release build")
|
||||
@pytest.mark.timeout(900) # This test is slow with sanitizers enabled, especially on ARM
|
||||
@pytest.mark.timeout(600) # This test is slow with sanitizers enabled, especially on ARM
|
||||
@pytest.mark.parametrize(
|
||||
"with_branches",
|
||||
["with_branches", "no_branches"],
|
||||
|
||||
@@ -36,10 +36,8 @@ if TYPE_CHECKING:
|
||||
# - `test_create_snapshot` a script wrapped in a test that creates a data snapshot.
|
||||
# - `test_backward_compatibility` checks that the current version of Neon can start/read/interract with a data snapshot created by the previous version.
|
||||
# The path to the snapshot is configured by COMPATIBILITY_SNAPSHOT_DIR environment variable.
|
||||
# If the breakage is intentional, the test can be xfaild with setting ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE=true.
|
||||
# - `test_forward_compatibility` checks that a snapshot created by the current version can be started/read/interracted by the previous version of Neon.
|
||||
# Paths to Neon and Postgres are configured by COMPATIBILITY_NEON_BIN and COMPATIBILITY_POSTGRES_DISTRIB_DIR environment variables.
|
||||
# If the breakage is intentional, the test can be xfaild with setting ALLOW_FORWARD_COMPATIBILITY_BREAKAGE=true.
|
||||
#
|
||||
# The file contains a couple of helper functions:
|
||||
# - check_neon_works performs the test itself, feel free to add more checks there.
|
||||
@@ -48,7 +46,7 @@ if TYPE_CHECKING:
|
||||
#
|
||||
# How to run `test_backward_compatibility` locally:
|
||||
#
|
||||
# export DEFAULT_PG_VERSION=16
|
||||
# export DEFAULT_PG_VERSION=17
|
||||
# export BUILD_TYPE=release
|
||||
# export CHECK_ONDISK_DATA_COMPATIBILITY=true
|
||||
# export COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION}
|
||||
@@ -70,7 +68,7 @@ if TYPE_CHECKING:
|
||||
#
|
||||
# How to run `test_forward_compatibility` locally:
|
||||
#
|
||||
# export DEFAULT_PG_VERSION=16
|
||||
# export DEFAULT_PG_VERSION=17
|
||||
# export BUILD_TYPE=release
|
||||
# export CHECK_ONDISK_DATA_COMPATIBILITY=true
|
||||
# export COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE}
|
||||
@@ -96,7 +94,7 @@ if TYPE_CHECKING:
|
||||
#
|
||||
# How to run `test_version_mismatch` locally:
|
||||
#
|
||||
# export DEFAULT_PG_VERSION=16
|
||||
# export DEFAULT_PG_VERSION=17
|
||||
# export BUILD_TYPE=release
|
||||
# export CHECK_ONDISK_DATA_COMPATIBILITY=true
|
||||
# export COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE}
|
||||
@@ -208,36 +206,19 @@ def test_backward_compatibility(
|
||||
"""
|
||||
Test that the new binaries can read old data
|
||||
"""
|
||||
breaking_changes_allowed = (
|
||||
os.environ.get("ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE", "false").lower() == "true"
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.from_repo_dir(compatibility_snapshot_dir / "repo")
|
||||
env.pageserver.allowed_errors.append(ingest_lag_log_line)
|
||||
env.start()
|
||||
|
||||
check_neon_works(
|
||||
env,
|
||||
test_output_dir=test_output_dir,
|
||||
sql_dump_path=compatibility_snapshot_dir / "dump.sql",
|
||||
repo_dir=env.repo_dir,
|
||||
)
|
||||
|
||||
try:
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.from_repo_dir(compatibility_snapshot_dir / "repo")
|
||||
env.pageserver.allowed_errors.append(ingest_lag_log_line)
|
||||
env.start()
|
||||
|
||||
check_neon_works(
|
||||
env,
|
||||
test_output_dir=test_output_dir,
|
||||
sql_dump_path=compatibility_snapshot_dir / "dump.sql",
|
||||
repo_dir=env.repo_dir,
|
||||
)
|
||||
|
||||
env.pageserver.assert_log_contains(ingest_lag_log_line)
|
||||
|
||||
except Exception:
|
||||
if breaking_changes_allowed:
|
||||
pytest.xfail(
|
||||
"Breaking changes are allowed by ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE env var"
|
||||
)
|
||||
else:
|
||||
raise
|
||||
|
||||
assert not breaking_changes_allowed, (
|
||||
"Breaking changes are allowed by ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"
|
||||
)
|
||||
env.pageserver.assert_log_contains(ingest_lag_log_line)
|
||||
|
||||
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@@ -254,72 +235,56 @@ def test_forward_compatibility(
|
||||
"""
|
||||
Test that the old binaries can read new data
|
||||
"""
|
||||
breaking_changes_allowed = (
|
||||
os.environ.get("ALLOW_FORWARD_COMPATIBILITY_BREAKAGE", "false").lower() == "true"
|
||||
)
|
||||
|
||||
neon_env_builder.control_plane_hooks_api = compute_reconfigure_listener.control_plane_hooks_api
|
||||
neon_env_builder.test_may_use_compatibility_snapshot_binaries = True
|
||||
|
||||
try:
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
|
||||
# Use previous version's production binaries (pageserver, safekeeper, pg_distrib_dir, etc.).
|
||||
# But always use the current version's neon_local binary.
|
||||
# This is because we want to test the compatibility of the data format, not the compatibility of the neon_local CLI.
|
||||
assert neon_env_builder.compatibility_neon_binpath is not None, (
|
||||
"the environment variable COMPATIBILITY_NEON_BIN is required"
|
||||
)
|
||||
assert neon_env_builder.compatibility_pg_distrib_dir is not None, (
|
||||
"the environment variable COMPATIBILITY_POSTGRES_DISTRIB_DIR is required"
|
||||
)
|
||||
neon_env_builder.neon_binpath = neon_env_builder.compatibility_neon_binpath
|
||||
neon_env_builder.pg_distrib_dir = neon_env_builder.compatibility_pg_distrib_dir
|
||||
# Use previous version's production binaries (pageserver, safekeeper, pg_distrib_dir, etc.).
|
||||
# But always use the current version's neon_local binary.
|
||||
# This is because we want to test the compatibility of the data format, not the compatibility of the neon_local CLI.
|
||||
assert neon_env_builder.compatibility_neon_binpath is not None, (
|
||||
"the environment variable COMPATIBILITY_NEON_BIN is required"
|
||||
)
|
||||
assert neon_env_builder.compatibility_pg_distrib_dir is not None, (
|
||||
"the environment variable COMPATIBILITY_POSTGRES_DISTRIB_DIR is required"
|
||||
)
|
||||
neon_env_builder.neon_binpath = neon_env_builder.compatibility_neon_binpath
|
||||
neon_env_builder.pg_distrib_dir = neon_env_builder.compatibility_pg_distrib_dir
|
||||
|
||||
env = neon_env_builder.from_repo_dir(
|
||||
compatibility_snapshot_dir / "repo",
|
||||
)
|
||||
# there may be an arbitrary number of unrelated tests run between create_snapshot and here
|
||||
env.pageserver.allowed_errors.append(ingest_lag_log_line)
|
||||
env = neon_env_builder.from_repo_dir(
|
||||
compatibility_snapshot_dir / "repo",
|
||||
)
|
||||
# there may be an arbitrary number of unrelated tests run between create_snapshot and here
|
||||
env.pageserver.allowed_errors.append(ingest_lag_log_line)
|
||||
|
||||
# not using env.pageserver.version because it was initialized before
|
||||
prev_pageserver_version_str = env.get_binary_version("pageserver")
|
||||
prev_pageserver_version_match = re.search(
|
||||
"Neon page server git(?:-env)?:(.*) failpoints: (.*), features: (.*)",
|
||||
prev_pageserver_version_str,
|
||||
)
|
||||
if prev_pageserver_version_match is not None:
|
||||
prev_pageserver_version = prev_pageserver_version_match.group(1)
|
||||
else:
|
||||
raise AssertionError(
|
||||
"cannot find git hash in the version string: " + prev_pageserver_version_str
|
||||
)
|
||||
|
||||
# does not include logs from previous runs
|
||||
assert not env.pageserver.log_contains(f"git(-env)?:{prev_pageserver_version}")
|
||||
|
||||
env.start()
|
||||
|
||||
# ensure the specified pageserver is running
|
||||
assert env.pageserver.log_contains(f"git(-env)?:{prev_pageserver_version}")
|
||||
|
||||
check_neon_works(
|
||||
env,
|
||||
test_output_dir=test_output_dir,
|
||||
sql_dump_path=compatibility_snapshot_dir / "dump.sql",
|
||||
repo_dir=env.repo_dir,
|
||||
# not using env.pageserver.version because it was initialized before
|
||||
prev_pageserver_version_str = env.get_binary_version("pageserver")
|
||||
prev_pageserver_version_match = re.search(
|
||||
"Neon page server git(?:-env)?:(.*) failpoints: (.*), features: (.*)",
|
||||
prev_pageserver_version_str,
|
||||
)
|
||||
if prev_pageserver_version_match is not None:
|
||||
prev_pageserver_version = prev_pageserver_version_match.group(1)
|
||||
else:
|
||||
raise AssertionError(
|
||||
"cannot find git hash in the version string: " + prev_pageserver_version_str
|
||||
)
|
||||
|
||||
except Exception:
|
||||
if breaking_changes_allowed:
|
||||
pytest.xfail(
|
||||
"Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE env var"
|
||||
)
|
||||
else:
|
||||
raise
|
||||
# does not include logs from previous runs
|
||||
assert not env.pageserver.log_contains(f"git(-env)?:{prev_pageserver_version}")
|
||||
|
||||
assert not breaking_changes_allowed, (
|
||||
"Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"
|
||||
env.start()
|
||||
|
||||
# ensure the specified pageserver is running
|
||||
assert env.pageserver.log_contains(f"git(-env)?:{prev_pageserver_version}")
|
||||
|
||||
check_neon_works(
|
||||
env,
|
||||
test_output_dir=test_output_dir,
|
||||
sql_dump_path=compatibility_snapshot_dir / "dump.sql",
|
||||
repo_dir=env.repo_dir,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -22,7 +22,12 @@ def test_lfc_working_set_approximation(neon_simple_env: NeonEnv):
|
||||
log.info("Creating endpoint with 1MB shared_buffers and 64 MB LFC")
|
||||
endpoint = env.endpoints.create_start(
|
||||
"main",
|
||||
config_lines=["neon.max_file_cache_size='128MB'", "neon.file_cache_size_limit='64MB'"],
|
||||
config_lines=[
|
||||
"autovacuum=off",
|
||||
"bgwriter_lru_maxpages=0",
|
||||
"neon.max_file_cache_size='128MB'",
|
||||
"neon.file_cache_size_limit='64MB'",
|
||||
],
|
||||
)
|
||||
|
||||
cur = endpoint.connect().cursor()
|
||||
@@ -72,7 +77,7 @@ WITH (fillfactor='100');
|
||||
# verify working set size after some index access of a few select pages only
|
||||
blocks = query_scalar(cur, "select approximate_working_set_size(true)")
|
||||
log.info(f"working set size after some index access of a few select pages only {blocks}")
|
||||
assert blocks < 12
|
||||
assert blocks < 20
|
||||
|
||||
|
||||
@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
|
||||
@@ -83,6 +88,7 @@ def test_sliding_working_set_approximation(neon_simple_env: NeonEnv):
|
||||
branch_name="main",
|
||||
config_lines=[
|
||||
"autovacuum = off",
|
||||
"bgwriter_lru_maxpages=0",
|
||||
"shared_buffers=1MB",
|
||||
"neon.max_file_cache_size=256MB",
|
||||
"neon.file_cache_size_limit=245MB",
|
||||
@@ -92,9 +98,9 @@ def test_sliding_working_set_approximation(neon_simple_env: NeonEnv):
|
||||
cur = conn.cursor()
|
||||
cur.execute("create extension neon")
|
||||
cur.execute(
|
||||
"create table t(pk integer primary key, count integer default 0, payload text default repeat('?', 128))"
|
||||
"create table t(pk integer primary key, count integer default 0, payload text default repeat('?', 1000)) with (fillfactor=10)"
|
||||
)
|
||||
cur.execute("insert into t (pk) values (generate_series(1,1000000))")
|
||||
cur.execute("insert into t (pk) values (generate_series(1,100000))")
|
||||
time.sleep(2)
|
||||
before_10k = time.monotonic()
|
||||
cur.execute("select sum(count) from t where pk between 10000 and 20000")
|
||||
@@ -115,5 +121,5 @@ def test_sliding_working_set_approximation(neon_simple_env: NeonEnv):
|
||||
size = cur.fetchall()[0][0] // 8192
|
||||
log.info(f"Table size {size} blocks")
|
||||
|
||||
assert estimation_1k >= 20 and estimation_1k <= 40
|
||||
assert estimation_10k >= 200 and estimation_10k <= 440
|
||||
assert estimation_1k >= 900 and estimation_1k <= 2000
|
||||
assert estimation_10k >= 9000 and estimation_10k <= 20000
|
||||
|
||||
@@ -390,7 +390,7 @@ def test_create_churn_during_restart(neon_env_builder: NeonEnvBuilder):
|
||||
# Tenant creation requests which arrive out of order will generate complaints about
|
||||
# generation nubmers out of order.
|
||||
env.pageserver.allowed_errors.append(".*Generation .+ is less than existing .+")
|
||||
env.pageserver.allowed_errors.append(".*due to stale generation.+")
|
||||
env.storage_controller.allowed_errors.append(".*due to stale generation.*")
|
||||
|
||||
# Timeline::flush_and_shutdown cannot tell if it is hitting a failure because of
|
||||
# an incomplete attach, or some other problem. In the field this should be rare,
|
||||
|
||||
Reference in New Issue
Block a user