mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-23 08:00:37 +00:00
Compare commits
2 Commits
problame/b
...
bojan-tmp-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8fd6266aa | ||
|
|
151605d751 |
2
.github/actionlint.yml
vendored
2
.github/actionlint.yml
vendored
@@ -5,6 +5,4 @@ self-hosted-runner:
|
||||
- small
|
||||
- us-east-2
|
||||
config-variables:
|
||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||
- REMOTE_STORAGE_AZURE_REGION
|
||||
- SLACK_UPCOMING_RELEASE_CHANNEL_ID
|
||||
|
||||
@@ -203,10 +203,6 @@ runs:
|
||||
COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
BASE_S3_URL: ${{ steps.generate-report.outputs.base-s3-url }}
|
||||
run: |
|
||||
if [ ! -d "${WORKDIR}/report/data/test-cases" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
export DATABASE_URL=${REGRESS_TEST_RESULT_CONNSTR_NEW}
|
||||
|
||||
./scripts/pysync
|
||||
|
||||
16
.github/workflows/build_and_test.yml
vendored
16
.github/workflows/build_and_test.yml
vendored
@@ -338,16 +338,6 @@ jobs:
|
||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||
${cov_prefix} cargo test $CARGO_FLAGS --package remote_storage --test test_real_s3
|
||||
|
||||
# Run separate tests for real Azure Blob Storage
|
||||
# XXX: replace region with `eu-central-1`-like region
|
||||
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
|
||||
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
|
||||
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||
${cov_prefix} cargo test $CARGO_FLAGS --package remote_storage --test test_real_azure
|
||||
|
||||
- name: Install rust binaries
|
||||
run: |
|
||||
# Install target binaries
|
||||
@@ -433,7 +423,7 @@ jobs:
|
||||
rerun_flaky: true
|
||||
pg_version: ${{ matrix.pg_version }}
|
||||
env:
|
||||
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
|
||||
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
|
||||
|
||||
- name: Merge and upload coverage data
|
||||
@@ -468,7 +458,7 @@ jobs:
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
|
||||
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}"
|
||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||
# while coverage is currently collected for the debug ones
|
||||
|
||||
@@ -847,7 +837,7 @@ jobs:
|
||||
run:
|
||||
shell: sh -eu {0}
|
||||
env:
|
||||
VM_BUILDER_VERSION: v0.18.2
|
||||
VM_BUILDER_VERSION: v0.18.1
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
|
||||
29
Cargo.lock
generated
29
Cargo.lock
generated
@@ -2932,16 +2932,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.46.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
|
||||
dependencies = [
|
||||
"overload",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.4.3"
|
||||
@@ -3208,12 +3198,6 @@ version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
|
||||
|
||||
[[package]]
|
||||
name = "overload"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||
|
||||
[[package]]
|
||||
name = "pagectl"
|
||||
version = "0.1.0"
|
||||
@@ -3299,12 +3283,10 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-io-timeout",
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"url",
|
||||
"utils",
|
||||
"walkdir",
|
||||
@@ -3579,7 +3561,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres"
|
||||
version = "0.19.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=problame/copy-both-duplex-public#5c462bd3500e657c014ef087e4eef2c1a8f0ebda"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -3592,7 +3574,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-native-tls"
|
||||
version = "0.5.0"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=problame/copy-both-duplex-public#5c462bd3500e657c014ef087e4eef2c1a8f0ebda"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
dependencies = [
|
||||
"native-tls",
|
||||
"tokio",
|
||||
@@ -3603,7 +3585,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=problame/copy-both-duplex-public#5c462bd3500e657c014ef087e4eef2c1a8f0ebda"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
dependencies = [
|
||||
"base64 0.20.0",
|
||||
"byteorder",
|
||||
@@ -3621,7 +3603,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=problame/copy-both-duplex-public#5c462bd3500e657c014ef087e4eef2c1a8f0ebda"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -5425,7 +5407,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.7"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=problame/copy-both-duplex-public#5c462bd3500e657c014ef087e4eef2c1a8f0ebda"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
@@ -5782,7 +5764,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
|
||||
dependencies = [
|
||||
"matchers",
|
||||
"nu-ansi-term",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"serde",
|
||||
|
||||
12
Cargo.toml
12
Cargo.toml
@@ -161,11 +161,11 @@ env_logger = "0.10"
|
||||
log = "0.4"
|
||||
|
||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="problame/copy-both-duplex-public" }
|
||||
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="problame/copy-both-duplex-public" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="problame/copy-both-duplex-public" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="problame/copy-both-duplex-public" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="problame/copy-both-duplex-public" }
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
|
||||
## Other git libraries
|
||||
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
|
||||
@@ -202,7 +202,7 @@ tonic-build = "0.9"
|
||||
|
||||
# This is only needed for proxy's tests.
|
||||
# TODO: we should probably fork `tokio-postgres-rustls` instead.
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="problame/copy-both-duplex-public" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
|
||||
################# Binary contents sections
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ use utils::pid_file::{self, PidFileRead};
|
||||
// it's waiting. If the process hasn't started/stopped after 5 seconds,
|
||||
// it prints a notice that it's taking long, but keeps waiting.
|
||||
//
|
||||
const RETRY_UNTIL_SECS: u64 = 10;
|
||||
const RETRY_UNTIL_SECS: u64 = 10000;
|
||||
const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
|
||||
const RETRY_INTERVAL_MILLIS: u64 = 100;
|
||||
const DOT_EVERY_RETRIES: u64 = 10;
|
||||
|
||||
@@ -18,7 +18,7 @@ use utils::{
|
||||
|
||||
use crate::reltag::RelTag;
|
||||
use anyhow::bail;
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
|
||||
/// The state of a tenant in this pageserver.
|
||||
///
|
||||
@@ -612,18 +612,15 @@ pub enum PagestreamFeMessage {
|
||||
Nblocks(PagestreamNblocksRequest),
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
NoOp,
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(Debug)]
|
||||
pub enum PagestreamBeMessage {
|
||||
Exists(PagestreamExistsResponse),
|
||||
Nblocks(PagestreamNblocksResponse),
|
||||
GetPage(PagestreamGetPageResponse),
|
||||
Error(PagestreamErrorResponse),
|
||||
DbSize(PagestreamDbSizeResponse),
|
||||
NoOp,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
@@ -722,10 +719,6 @@ impl PagestreamFeMessage {
|
||||
bytes.put_u64(req.lsn.0);
|
||||
bytes.put_u32(req.dbnode);
|
||||
}
|
||||
|
||||
Self::NoOp => {
|
||||
bytes.put_u8(4);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
@@ -776,7 +769,6 @@ impl PagestreamFeMessage {
|
||||
lsn: Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
})),
|
||||
4 => Ok(PagestreamFeMessage::NoOp),
|
||||
_ => bail!("unknown smgr message tag: {:?}", msg_tag),
|
||||
}
|
||||
}
|
||||
@@ -811,46 +803,10 @@ impl PagestreamBeMessage {
|
||||
bytes.put_u8(104); /* tag from pagestore_client.h */
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
Self::NoOp => {
|
||||
bytes.put_u8(105);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
|
||||
let mut buf = buf.reader();
|
||||
let msg_tag = buf.read_u8()?;
|
||||
match msg_tag {
|
||||
100 => todo!(),
|
||||
101 => todo!(),
|
||||
102 => {
|
||||
let buf = buf.get_ref();
|
||||
/* TODO use constant */
|
||||
if buf.len() == 8192 {
|
||||
Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
|
||||
page: buf.clone(),
|
||||
}))
|
||||
} else {
|
||||
anyhow::bail!("invalid page size: {}", buf.len());
|
||||
}
|
||||
}
|
||||
103 => {
|
||||
let buf = buf.get_ref();
|
||||
let cstr = std::ffi::CStr::from_bytes_until_nul(&buf)?;
|
||||
let rust_str = cstr.to_str()?;
|
||||
Ok(PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: rust_str.to_owned(),
|
||||
}))
|
||||
}
|
||||
104 => todo!(),
|
||||
105 => {
|
||||
Ok(PagestreamBeMessage::NoOp)
|
||||
},
|
||||
_ => bail!("unknown tag: {:?}", msg_tag),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -22,9 +22,9 @@ use postgres_ffi::Oid;
|
||||
/// [See more related comments here](https:///github.com/postgres/postgres/blob/99c5852e20a0987eca1c38ba0c09329d4076b6a0/src/include/storage/relfilenode.h#L57).
|
||||
///
|
||||
// FIXME: should move 'forknum' as last field to keep this consistent with Postgres.
|
||||
// Then we could replace the custom Ord and PartialOrd implementations below with
|
||||
// deriving them. This will require changes in walredoproc.c.
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize)]
|
||||
// Then we could replace the custo Ord and PartialOrd implementations below with
|
||||
// deriving them.
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct RelTag {
|
||||
pub forknum: u8,
|
||||
pub spcnode: Oid,
|
||||
@@ -40,9 +40,21 @@ impl PartialOrd for RelTag {
|
||||
|
||||
impl Ord for RelTag {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
// Custom ordering where we put forknum to the end of the list
|
||||
let other_tup = (other.spcnode, other.dbnode, other.relnode, other.forknum);
|
||||
(self.spcnode, self.dbnode, self.relnode, self.forknum).cmp(&other_tup)
|
||||
let mut cmp = self.spcnode.cmp(&other.spcnode);
|
||||
if cmp != Ordering::Equal {
|
||||
return cmp;
|
||||
}
|
||||
cmp = self.dbnode.cmp(&other.dbnode);
|
||||
if cmp != Ordering::Equal {
|
||||
return cmp;
|
||||
}
|
||||
cmp = self.relnode.cmp(&other.relnode);
|
||||
if cmp != Ordering::Equal {
|
||||
return cmp;
|
||||
}
|
||||
cmp = self.forknum.cmp(&other.forknum);
|
||||
|
||||
cmp
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -112,7 +112,7 @@ impl RemotePath {
|
||||
self.0.file_name()
|
||||
}
|
||||
|
||||
pub fn join<P: AsRef<Utf8Path>>(&self, segment: P) -> Self {
|
||||
pub fn join(&self, segment: &Utf8Path) -> Self {
|
||||
Self(self.0.join(segment))
|
||||
}
|
||||
|
||||
|
||||
@@ -267,12 +267,6 @@ async fn azure_upload_download_works(ctx: &mut MaybeEnabledAzure) -> anyhow::Res
|
||||
let buf = download_and_compare(dl).await?;
|
||||
assert_eq!(buf, data);
|
||||
|
||||
debug!("Cleanup: deleting file at path {path:?}");
|
||||
ctx.client
|
||||
.delete(&path)
|
||||
.await
|
||||
.with_context(|| format!("{path:?} removal"))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -82,8 +82,6 @@ enum-map.workspace = true
|
||||
enumset.workspace = true
|
||||
strum.workspace = true
|
||||
strum_macros.workspace = true
|
||||
tokio-stream.workspace = true
|
||||
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion.workspace = true
|
||||
|
||||
@@ -1,245 +0,0 @@
|
||||
use clap::Parser;
|
||||
use hyper::client::conn::Parts;
|
||||
use hyper::client::HttpConnector;
|
||||
use hyper::{Body, Client, Uri};
|
||||
use pageserver::{repository, tenant};
|
||||
use rand::prelude::*;
|
||||
use std::env::args;
|
||||
use std::future::Future;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread;
|
||||
use tokio::sync::mpsc::{channel, Sender};
|
||||
use tokio::sync::Mutex as AsyncMutex;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
struct Key(repository::Key);
|
||||
|
||||
impl std::str::FromStr for Key {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||
repository::Key::from_hex(s).map(Key)
|
||||
}
|
||||
}
|
||||
|
||||
struct KeyRange {
|
||||
start: Key,
|
||||
end: Key,
|
||||
}
|
||||
|
||||
impl KeyRange {
|
||||
fn len(&self) -> i128 {
|
||||
self.end.0.to_i128() - self.start.0.to_i128()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
#[clap(long, default_value = "http://localhost:9898")]
|
||||
ps_endpoint: String,
|
||||
// tenant_id: String,
|
||||
// timeline_id: String,
|
||||
num_tasks: usize,
|
||||
num_requests: usize,
|
||||
tenants: Option<Vec<String>>,
|
||||
#[clap(long)]
|
||||
pick_n_tenants: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct Stats {
|
||||
completed_requests: AtomicU64,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
fn inc(&self) {
|
||||
self.completed_requests.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args: &'static Args = Box::leak(Box::new(Args::parse()));
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
let tenants = if let Some(tenants) = &args.tenants {
|
||||
tenants.clone()
|
||||
} else {
|
||||
// let tenant_id = "b97965931096047b2d54958756baee7b";
|
||||
// let timeline_id = "2868f84a8d166779e4c651b116c45059";
|
||||
|
||||
let resp = client
|
||||
.get(Uri::try_from(&format!("{}/v1/tenant", args.ps_endpoint)).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let tenants: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let mut out = Vec::new();
|
||||
for t in tenants.as_array().unwrap() {
|
||||
if let Some(limit) = args.pick_n_tenants {
|
||||
if out.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
out.push(t.get("id").unwrap().as_str().unwrap().to_owned());
|
||||
}
|
||||
if let Some(limit) = args.pick_n_tenants {
|
||||
assert_eq!(out.len(), limit);
|
||||
}
|
||||
out
|
||||
};
|
||||
|
||||
let mut tenant_timelines = Vec::new();
|
||||
for tenant_id in tenants {
|
||||
let resp = client
|
||||
.get(
|
||||
Uri::try_from(&format!(
|
||||
"{}/v1/tenant/{}/timeline",
|
||||
args.ps_endpoint, tenant_id
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let timelines: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
for t in timelines.as_array().unwrap() {
|
||||
let timeline_id = t.get("timeline_id").unwrap().as_str().unwrap().to_owned();
|
||||
tenant_timelines.push((tenant_id.clone(), timeline_id));
|
||||
}
|
||||
}
|
||||
println!("tenant_timelines:\n{:?}", tenant_timelines);
|
||||
|
||||
let mut stats = Arc::new(Stats::default());
|
||||
|
||||
tokio::spawn({
|
||||
let stats = Arc::clone(&stats);
|
||||
async move {
|
||||
loop {
|
||||
let start = std::time::Instant::now();
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"RPS: {:.0}",
|
||||
completed_requests as f64 / elapsed.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
for (tenant_id, timeline_id) in tenant_timelines {
|
||||
let t = tokio::spawn(timeline(
|
||||
args,
|
||||
client.clone(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
Arc::clone(&stats),
|
||||
));
|
||||
tasks.push(t);
|
||||
}
|
||||
|
||||
for t in tasks {
|
||||
t.await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn timeline(
|
||||
args: &'static Args,
|
||||
client: Client<HttpConnector, Body>,
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
stats: Arc<Stats>,
|
||||
) -> impl Future<Output = ()> {
|
||||
async move {
|
||||
let mut resp = client
|
||||
.get(
|
||||
Uri::try_from(&format!(
|
||||
"{}/v1/tenant/{}/timeline/{}/keyspace",
|
||||
args.ps_endpoint, tenant_id, timeline_id
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
if !resp.status().is_success() {
|
||||
panic!("Failed to get keyspace: {resp:?}");
|
||||
}
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let keyspace: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
|
||||
let lsn = Arc::new(keyspace["at_lsn"].as_str().unwrap().to_owned());
|
||||
|
||||
let ranges = keyspace["keys"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|r| {
|
||||
let r = r.as_array().unwrap();
|
||||
assert_eq!(r.len(), 2);
|
||||
let start = Key::from_str(r[0].as_str().unwrap()).unwrap();
|
||||
let end = Key::from_str(r[1].as_str().unwrap()).unwrap();
|
||||
KeyRange { start, end }
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// weighted ranges
|
||||
let weights = ranges.iter().map(|r| r.len()).collect::<Vec<_>>();
|
||||
|
||||
let ranges = Arc::new(ranges);
|
||||
let weights = Arc::new(weights);
|
||||
|
||||
let (tx, mut rx) = channel::<i32>(1000);
|
||||
let tx = Arc::new(AsyncMutex::new(tx));
|
||||
|
||||
let mut tasks = Vec::<JoinHandle<()>>::new();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
for i in 0..args.num_tasks {
|
||||
let ranges = ranges.clone();
|
||||
let weights = weights.clone();
|
||||
let lsn = lsn.clone();
|
||||
let client = client.clone();
|
||||
let tenant_id = tenant_id.clone();
|
||||
let timeline_id = timeline_id.clone();
|
||||
let stats = Arc::clone(&stats);
|
||||
let task = tokio::spawn(async move {
|
||||
for i in 0..args.num_requests {
|
||||
let key = {
|
||||
let mut rng = rand::thread_rng();
|
||||
let r = ranges.choose_weighted(&mut rng, |r| r.len()).unwrap();
|
||||
let key = rng.gen_range((r.start.0.to_i128()..r.end.0.to_i128()));
|
||||
key
|
||||
};
|
||||
let url = format!(
|
||||
"{}/v1/tenant/{}/timeline/{}/getpage?key={:036x}&lsn={}",
|
||||
args.ps_endpoint, tenant_id, timeline_id, key, lsn
|
||||
);
|
||||
let uri = url.parse::<Uri>().unwrap();
|
||||
let resp = client.get(uri).await.unwrap();
|
||||
stats.inc();
|
||||
}
|
||||
});
|
||||
tasks.push(task);
|
||||
}
|
||||
|
||||
drop(tx);
|
||||
|
||||
for task in tasks {
|
||||
task.await.unwrap();
|
||||
}
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"RPS: {:.0}",
|
||||
(args.num_requests * args.num_tasks) as f64 / elapsed.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,411 +0,0 @@
|
||||
use anyhow::Context;
|
||||
use clap::Parser;
|
||||
use futures::{SinkExt, TryStreamExt};
|
||||
use hyper::client::conn::Parts;
|
||||
use hyper::client::HttpConnector;
|
||||
use hyper::{Client, Uri};
|
||||
use pageserver::page_cache::PAGE_SZ;
|
||||
use pageserver::pgdatadir_mapping::{is_rel_block_key, key_to_rel_block};
|
||||
use pageserver::{repository, tenant};
|
||||
use pageserver_api::models::{
|
||||
PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
|
||||
};
|
||||
use pageserver_api::reltag::RelTag;
|
||||
use rand::prelude::*;
|
||||
use scopeguard::defer;
|
||||
use std::env::args;
|
||||
use std::future::Future;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread;
|
||||
use tokio::sync::mpsc::{channel, Sender};
|
||||
use tokio::sync::Mutex as AsyncMutex;
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_stream::{Stream, StreamExt};
|
||||
use utils::completion;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
struct Key(repository::Key);
|
||||
|
||||
impl std::str::FromStr for Key {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||
repository::Key::from_hex(s).map(Key)
|
||||
}
|
||||
}
|
||||
|
||||
struct KeyRange {
|
||||
start: i128,
|
||||
end: i128,
|
||||
}
|
||||
|
||||
impl KeyRange {
|
||||
fn len(&self) -> i128 {
|
||||
self.end - self.start
|
||||
}
|
||||
}
|
||||
|
||||
struct RelTagBlockNo {
|
||||
rel_tag: RelTag,
|
||||
block_no: u32,
|
||||
}
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
#[clap(long, default_value = "http://localhost:9898")]
|
||||
ps_endpoint: String,
|
||||
#[clap(long, default_value = "postgres://postgres@localhost:64000")]
|
||||
pq_client_connstring: String,
|
||||
// tenant_id: String,
|
||||
// timeline_id: String,
|
||||
num_tasks: usize,
|
||||
num_requests: usize,
|
||||
tenants: Option<Vec<String>>,
|
||||
#[clap(long)]
|
||||
pick_n_tenants: Option<usize>,
|
||||
#[clap(subcommand)]
|
||||
mode: Mode,
|
||||
}
|
||||
|
||||
#[derive(clap::Parser, Clone)]
|
||||
enum Mode {
|
||||
GetPage,
|
||||
NoOp,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct Stats {
|
||||
completed_requests: AtomicU64,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
fn inc(&self) {
|
||||
self.completed_requests.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args: &'static Args = Box::leak(Box::new(Args::parse()));
|
||||
|
||||
// std::env::set_var("RUST_LOG", "info,tokio_postgres=trace");
|
||||
// tracing_subscriber::fmt::init();
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
let tenants = if let Some(tenants) = &args.tenants {
|
||||
tenants.clone()
|
||||
} else {
|
||||
// let tenant_id = "b97965931096047b2d54958756baee7b";
|
||||
// let timeline_id = "2868f84a8d166779e4c651b116c45059";
|
||||
|
||||
let resp = client
|
||||
.get(Uri::try_from(&format!("{}/v1/tenant", args.ps_endpoint)).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let tenants: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let mut out = Vec::new();
|
||||
for t in tenants.as_array().unwrap() {
|
||||
if let Some(limit) = args.pick_n_tenants {
|
||||
if out.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
out.push(t.get("id").unwrap().as_str().unwrap().to_owned());
|
||||
}
|
||||
if let Some(limit) = args.pick_n_tenants {
|
||||
assert_eq!(out.len(), limit);
|
||||
}
|
||||
out
|
||||
};
|
||||
|
||||
let mut tenant_timelines = Vec::new();
|
||||
for tenant_id in tenants {
|
||||
let resp = client
|
||||
.get(
|
||||
Uri::try_from(&format!(
|
||||
"{}/v1/tenant/{}/timeline",
|
||||
args.ps_endpoint, tenant_id
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let timelines: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
for t in timelines.as_array().unwrap() {
|
||||
let timeline_id = t.get("timeline_id").unwrap().as_str().unwrap().to_owned();
|
||||
tenant_timelines.push((tenant_id.clone(), timeline_id));
|
||||
}
|
||||
}
|
||||
println!("tenant_timelines:\n{:?}", tenant_timelines);
|
||||
|
||||
let mut stats = Arc::new(Stats::default());
|
||||
|
||||
tokio::spawn({
|
||||
let stats = Arc::clone(&stats);
|
||||
async move {
|
||||
loop {
|
||||
let start = std::time::Instant::now();
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"RPS: {:.0}",
|
||||
completed_requests as f64 / elapsed.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
for (tenant_id, timeline_id) in tenant_timelines {
|
||||
let stats = Arc::clone(&stats);
|
||||
let t = tokio::spawn(timeline(
|
||||
args,
|
||||
client.clone(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
stats,
|
||||
));
|
||||
tasks.push(t);
|
||||
}
|
||||
|
||||
for t in tasks {
|
||||
t.await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn timeline(
|
||||
args: &'static Args,
|
||||
http_client: Client<HttpConnector, hyper::Body>,
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
stats: Arc<Stats>,
|
||||
) -> impl Future<Output = ()> + Send + Sync {
|
||||
async move {
|
||||
let mut resp = http_client
|
||||
.get(
|
||||
Uri::try_from(&format!(
|
||||
"{}/v1/tenant/{}/timeline/{}/keyspace",
|
||||
args.ps_endpoint, tenant_id, timeline_id
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
if !resp.status().is_success() {
|
||||
panic!("Failed to get keyspace: {resp:?}");
|
||||
}
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let keyspace: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let lsn: Lsn = keyspace["at_lsn"].as_str().unwrap().parse().unwrap();
|
||||
|
||||
let ranges = keyspace["keys"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.filter_map(|r| {
|
||||
let r = r.as_array().unwrap();
|
||||
assert_eq!(r.len(), 2);
|
||||
let start = Key::from_str(r[0].as_str().unwrap()).unwrap();
|
||||
let end = Key::from_str(r[1].as_str().unwrap()).unwrap();
|
||||
// filter out non-relblock keys
|
||||
match (is_rel_block_key(start.0), is_rel_block_key(end.0)) {
|
||||
(true, true) => Some(KeyRange {
|
||||
start: start.0.to_i128(),
|
||||
end: end.0.to_i128(),
|
||||
}),
|
||||
(true, false) | (false, true) => {
|
||||
unimplemented!("split up range")
|
||||
}
|
||||
(false, false) => None,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// weighted ranges
|
||||
let weights = ranges.iter().map(|r| r.len()).collect::<Vec<_>>();
|
||||
|
||||
let ranges = Arc::new(ranges);
|
||||
let weights = Arc::new(weights);
|
||||
|
||||
let mut tasks = Vec::<JoinHandle<()>>::new();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
for i in 0..args.num_tasks {
|
||||
let ranges = ranges.clone();
|
||||
let weights = weights.clone();
|
||||
let client = http_client.clone();
|
||||
let tenant_id = tenant_id.clone();
|
||||
let timeline_id = timeline_id.clone();
|
||||
let task = tokio::spawn({
|
||||
let stats = Arc::clone(&stats);
|
||||
async move {
|
||||
let mut client = getpage_client::Client::new(
|
||||
args.pq_client_connstring.clone(),
|
||||
tenant_id.clone(),
|
||||
timeline_id.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
for i in 0..args.num_requests {
|
||||
match args.mode {
|
||||
Mode::GetPage => {
|
||||
let key = {
|
||||
let mut rng = rand::thread_rng();
|
||||
let r = ranges.choose_weighted(&mut rng, |r| r.len()).unwrap();
|
||||
let key: i128 = rng.gen_range((r.start..r.end));
|
||||
let key = repository::Key::from_i128(key);
|
||||
// XXX filter these out when we iterate the keyspace
|
||||
assert!(
|
||||
is_rel_block_key(key),
|
||||
"we filter non-relblock keys out above"
|
||||
);
|
||||
let (rel_tag, block_no) =
|
||||
key_to_rel_block(key).expect("we just checked");
|
||||
RelTagBlockNo { rel_tag, block_no }
|
||||
};
|
||||
client
|
||||
.getpage(key, lsn)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"getpage for tenant {} timeline {}",
|
||||
tenant_id, timeline_id
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
Mode::NoOp => {
|
||||
client.noop().await.unwrap();
|
||||
}
|
||||
}
|
||||
stats.inc();
|
||||
}
|
||||
client.shutdown().await;
|
||||
}
|
||||
});
|
||||
tasks.push(task);
|
||||
}
|
||||
|
||||
for task in tasks {
|
||||
task.await.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod getpage_client {
|
||||
use std::pin::Pin;
|
||||
|
||||
use futures::SinkExt;
|
||||
use pageserver_api::models::{
|
||||
PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest,
|
||||
PagestreamGetPageResponse,
|
||||
};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_stream::StreamExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::RelTagBlockNo;
|
||||
|
||||
pub(crate) struct Client {
|
||||
copy_both: Pin<Box<tokio_postgres::CopyBothDuplex<bytes::Bytes>>>,
|
||||
cancel_on_client_drop: Option<tokio_util::sync::DropGuard>,
|
||||
conn_task: JoinHandle<()>,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
pub fn new(
|
||||
connstring: String,
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
) -> impl std::future::Future<Output = anyhow::Result<Self>> + Send {
|
||||
async move {
|
||||
let (client, connection) =
|
||||
tokio_postgres::connect(&connstring, postgres::NoTls).await?;
|
||||
|
||||
let conn_task_cancel = CancellationToken::new();
|
||||
let conn_task = tokio::spawn({
|
||||
let conn_task_cancel = conn_task_cancel.clone();
|
||||
async move {
|
||||
tokio::select! {
|
||||
_ = conn_task_cancel.cancelled() => {
|
||||
return;
|
||||
}
|
||||
res = connection => {
|
||||
res.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let copy_both: tokio_postgres::CopyBothDuplex<bytes::Bytes> = client
|
||||
.copy_both_simple(&format!("pagestream {tenant_id} {timeline_id}"))
|
||||
.await?;
|
||||
|
||||
Ok(Self {
|
||||
copy_both: Box::pin(copy_both),
|
||||
conn_task,
|
||||
cancel_on_client_drop: Some(conn_task_cancel.drop_guard()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn shutdown(mut self) {
|
||||
let _ = self.cancel_on_client_drop.take();
|
||||
self.conn_task.await.unwrap();
|
||||
}
|
||||
|
||||
pub async fn getpage(
|
||||
&mut self,
|
||||
key: RelTagBlockNo,
|
||||
lsn: Lsn,
|
||||
) -> anyhow::Result<PagestreamGetPageResponse> {
|
||||
let req = PagestreamGetPageRequest {
|
||||
latest: false,
|
||||
rel: key.rel_tag,
|
||||
blkno: key.block_no,
|
||||
lsn,
|
||||
};
|
||||
let req = PagestreamFeMessage::GetPage(req);
|
||||
match self.do_request(req).await? {
|
||||
PagestreamBeMessage::GetPage(p) => Ok(p),
|
||||
x => anyhow::bail!("Unexpected response: {:?}", x),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn noop(&mut self) -> anyhow::Result<()> {
|
||||
match self.do_request(PagestreamFeMessage::NoOp).await? {
|
||||
PagestreamBeMessage::NoOp => Ok(()),
|
||||
x => anyhow::bail!("Unexpected response: {:?}", x),
|
||||
}
|
||||
}
|
||||
|
||||
async fn do_request(
|
||||
&mut self,
|
||||
req: PagestreamFeMessage,
|
||||
) -> Result<PagestreamBeMessage, anyhow::Error> {
|
||||
let req: bytes::Bytes = req.serialize();
|
||||
// let mut req = tokio_util::io::ReaderStream::new(&req);
|
||||
let mut req = tokio_stream::once(Ok(req));
|
||||
|
||||
self.copy_both.send_all(&mut req).await?;
|
||||
|
||||
let next: Option<Result<bytes::Bytes, _>> = self.copy_both.next().await;
|
||||
let next = next.unwrap().unwrap();
|
||||
|
||||
match PagestreamBeMessage::deserialize(next)? {
|
||||
PagestreamBeMessage::Error(e) => anyhow::bail!("Error: {:?}", e),
|
||||
x => Ok(x),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,109 +0,0 @@
|
||||
use anyhow::Context;
|
||||
use bytes::Buf;
|
||||
use clap::Parser;
|
||||
use pageserver_api::models::{PagestreamBeMessage, PagestreamErrorResponse, PagestreamFeMessage};
|
||||
use postgres_backend::{AuthType, PostgresBackend, QueryError};
|
||||
use pq_proto::{BeMessage, FeMessage};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
bind: String,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args = Args::parse();
|
||||
|
||||
let listener = tokio::net::TcpListener::bind(&args.bind).await.unwrap();
|
||||
loop {
|
||||
let (socket, _) = listener.accept().await.unwrap();
|
||||
tokio::spawn(async move {
|
||||
handle_connection(socket).await.unwrap();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_connection(socket: tokio::net::TcpStream) -> anyhow::Result<()> {
|
||||
socket
|
||||
.set_nodelay(true)
|
||||
.context("could not set TCP_NODELAY")?;
|
||||
|
||||
let peer_addr = socket.peer_addr().context("get peer address")?;
|
||||
let socket = tokio_io_timeout::TimeoutReader::new(socket);
|
||||
tokio::pin!(socket);
|
||||
let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, AuthType::Trust, None)?;
|
||||
let mut conn_handler = NoOpHandler;
|
||||
let cancel = CancellationToken::new();
|
||||
pgbackend
|
||||
.run(&mut conn_handler, || {
|
||||
let cancel = cancel.clone();
|
||||
async move { cancel.cancelled().await }
|
||||
})
|
||||
.await?;
|
||||
anyhow::Ok(())
|
||||
}
|
||||
|
||||
struct NoOpHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<IO> postgres_backend::Handler<IO> for NoOpHandler
|
||||
where
|
||||
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
|
||||
{
|
||||
fn startup(
|
||||
&mut self,
|
||||
_pgb: &mut PostgresBackend<IO>,
|
||||
_sm: &pq_proto::FeStartupPacket,
|
||||
) -> Result<(), QueryError> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_query(
|
||||
&mut self,
|
||||
pgb: &mut PostgresBackend<IO>,
|
||||
query_string: &str,
|
||||
) -> Result<(), QueryError> {
|
||||
if !query_string.starts_with("pagestream ") {
|
||||
return Err(QueryError::Other(anyhow::anyhow!("not a pagestream query")));
|
||||
}
|
||||
|
||||
// switch client to COPYBOTH
|
||||
pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
|
||||
pgb.flush().await?;
|
||||
|
||||
loop {
|
||||
let msg = pgb.read_message().await?;
|
||||
|
||||
let copy_data_bytes = match msg {
|
||||
Some(FeMessage::CopyData(bytes)) => bytes,
|
||||
Some(FeMessage::Terminate) => return Ok(()),
|
||||
Some(m) => {
|
||||
return Err(QueryError::Other(anyhow::anyhow!(
|
||||
"unexpected message: {m:?} during COPY"
|
||||
)));
|
||||
}
|
||||
None => return Ok(()), // client disconnected
|
||||
};
|
||||
|
||||
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
|
||||
|
||||
let response = match neon_fe_msg {
|
||||
PagestreamFeMessage::NoOp => Ok(PagestreamBeMessage::NoOp),
|
||||
x => Err(QueryError::Other(anyhow::anyhow!(
|
||||
"this server only supports no-op: {x:?}"
|
||||
))),
|
||||
};
|
||||
|
||||
let response = response.unwrap_or_else(|e| {
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
})
|
||||
});
|
||||
|
||||
pgb.write_message_noflush(&BeMessage::CopyData(&response.serialize()))?;
|
||||
pgb.flush().await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -392,19 +392,13 @@ paths:
|
||||
type: string
|
||||
format: date-time
|
||||
description: A timestamp to get the LSN
|
||||
- name: version
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
description: The version of the endpoint to use
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/LsnByTimestampResponse"
|
||||
type: string
|
||||
"400":
|
||||
description: Error when no tenant id found in path, no timeline id or invalid timestamp
|
||||
content:
|
||||
@@ -1390,19 +1384,6 @@ components:
|
||||
type: string
|
||||
format: hex
|
||||
|
||||
LsnByTimestampResponse:
|
||||
type: object
|
||||
required:
|
||||
- lsn
|
||||
- kind
|
||||
properties:
|
||||
lsn:
|
||||
type: string
|
||||
format: hex
|
||||
kind:
|
||||
type: string
|
||||
enum: [past, present, future, nodata]
|
||||
|
||||
Error:
|
||||
type: object
|
||||
required:
|
||||
|
||||
@@ -8,7 +8,7 @@ use std::sync::Arc;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use futures::TryFutureExt;
|
||||
use humantime::format_rfc3339;
|
||||
use hyper::header;
|
||||
use hyper::header::CONTENT_TYPE;
|
||||
use hyper::StatusCode;
|
||||
use hyper::{Body, Request, Response, Uri};
|
||||
use metrics::launch_timestamp::LaunchTimestamp;
|
||||
@@ -17,7 +17,6 @@ use pageserver_api::models::{
|
||||
TenantLoadRequest, TenantLocationConfigRequest,
|
||||
};
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use tenant_size_model::{SizeResult, StorageModel};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
@@ -485,8 +484,6 @@ async fn get_lsn_by_timestamp_handler(
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let version: Option<u8> = parse_query_param(&request, "version")?;
|
||||
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let timestamp_raw = must_get_query_param(&request, "timestamp")?;
|
||||
let timestamp = humantime::parse_rfc3339(×tamp_raw)
|
||||
@@ -498,32 +495,13 @@ async fn get_lsn_by_timestamp_handler(
|
||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||
let result = timeline.find_lsn_for_timestamp(timestamp_pg, &ctx).await?;
|
||||
|
||||
if version.unwrap_or(0) > 1 {
|
||||
#[serde_as]
|
||||
#[derive(serde::Serialize)]
|
||||
struct Result {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
lsn: Lsn,
|
||||
kind: &'static str,
|
||||
}
|
||||
let (lsn, kind) = match result {
|
||||
LsnForTimestamp::Present(lsn) => (lsn, "present"),
|
||||
LsnForTimestamp::Future(lsn) => (lsn, "future"),
|
||||
LsnForTimestamp::Past(lsn) => (lsn, "past"),
|
||||
LsnForTimestamp::NoData(lsn) => (lsn, "nodata"),
|
||||
};
|
||||
json_response(StatusCode::OK, Result { lsn, kind })
|
||||
} else {
|
||||
// FIXME: this is a temporary crutch not to break backwards compatibility
|
||||
// See https://github.com/neondatabase/neon/pull/5608
|
||||
let result = match result {
|
||||
LsnForTimestamp::Present(lsn) => format!("{lsn}"),
|
||||
LsnForTimestamp::Future(_lsn) => "future".into(),
|
||||
LsnForTimestamp::Past(_lsn) => "past".into(),
|
||||
LsnForTimestamp::NoData(_lsn) => "nodata".into(),
|
||||
};
|
||||
json_response(StatusCode::OK, result)
|
||||
}
|
||||
let result = match result {
|
||||
LsnForTimestamp::Present(lsn) => format!("{lsn}"),
|
||||
LsnForTimestamp::Future(_lsn) => "future".into(),
|
||||
LsnForTimestamp::Past(_lsn) => "past".into(),
|
||||
LsnForTimestamp::NoData(_lsn) => "nodata".into(),
|
||||
};
|
||||
json_response(StatusCode::OK, result)
|
||||
}
|
||||
|
||||
async fn get_timestamp_of_lsn_handler(
|
||||
@@ -681,45 +659,6 @@ async fn tenant_ignore_handler(
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn tenant_duplicate_handler(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let src_tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
|
||||
let request_data: TenantCreateRequest = json_request(&mut request).await?;
|
||||
let new_tenant_id = request_data.new_tenant_id;
|
||||
check_permission(&request, None)?;
|
||||
|
||||
let _timer = STORAGE_TIME_GLOBAL
|
||||
.get_metric_with_label_values(&[StorageTimeOperation::DuplicateTenant.into()])
|
||||
.expect("bug")
|
||||
.start_timer();
|
||||
|
||||
let tenant_conf =
|
||||
TenantConfOpt::try_from(&request_data.config).map_err(ApiError::BadRequest)?;
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
let generation = get_request_generation(state, request_data.generation)?;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
|
||||
|
||||
mgr::duplicate_tenant(
|
||||
state.conf,
|
||||
tenant_conf,
|
||||
src_tenant_id,
|
||||
new_tenant_id,
|
||||
generation,
|
||||
state.tenant_resources(),
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!("tenant_duplicate", %src_tenant_id, tenant_id = %new_tenant_id))
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::CREATED, TenantCreateResponse(new_tenant_id))
|
||||
}
|
||||
|
||||
async fn tenant_list_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
@@ -828,10 +767,6 @@ async fn tenant_size_handler(
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
let mut sizes = None;
|
||||
let accepts_html = headers
|
||||
.get(header::ACCEPT)
|
||||
.map(|v| v == "text/html")
|
||||
.unwrap_or_default();
|
||||
if !inputs_only.unwrap_or(false) {
|
||||
let storage_model = inputs
|
||||
.calculate_model()
|
||||
@@ -839,11 +774,11 @@ async fn tenant_size_handler(
|
||||
let size = storage_model.calculate();
|
||||
|
||||
// If request header expects html, return html
|
||||
if accepts_html {
|
||||
if headers["Accept"] == "text/html" {
|
||||
return synthetic_size_html_response(inputs, storage_model, size);
|
||||
}
|
||||
sizes = Some(size);
|
||||
} else if accepts_html {
|
||||
} else if headers["Accept"] == "text/html" {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"inputs_only parameter is incompatible with html output request"
|
||||
)));
|
||||
@@ -994,7 +929,7 @@ fn synthetic_size_html_response(
|
||||
pub fn html_response(status: StatusCode, data: String) -> Result<Response<Body>, ApiError> {
|
||||
let response = Response::builder()
|
||||
.status(status)
|
||||
.header(header::CONTENT_TYPE, "text/html")
|
||||
.header(hyper::header::CONTENT_TYPE, "text/html")
|
||||
.body(Body::from(data.as_bytes().to_vec()))
|
||||
.map_err(|e| ApiError::InternalServerError(e.into()))?;
|
||||
Ok(response)
|
||||
@@ -1375,7 +1310,7 @@ async fn getpage_at_lsn_handler(
|
||||
Result::<_, ApiError>::Ok(
|
||||
Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(header::CONTENT_TYPE, "application/octet-stream")
|
||||
.header(CONTENT_TYPE, "application/octet-stream")
|
||||
.body(hyper::Body::from(page))
|
||||
.unwrap(),
|
||||
)
|
||||
@@ -1767,9 +1702,6 @@ pub fn make_router(
|
||||
.post("/v1/tenant/:tenant_id/ignore", |r| {
|
||||
api_handler(r, tenant_ignore_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/duplicate", |r| {
|
||||
api_handler(r, tenant_duplicate_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
|
||||
api_handler(r, timeline_detail_handler)
|
||||
})
|
||||
|
||||
@@ -149,10 +149,6 @@ fn ends_with_suffix(path: &Utf8Path, suffix: &str) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: DO NOT ADD new query methods like this, which will have a next step of parsing timelineid
|
||||
// from the directory name. Instead create type "UninitMark(TimelineId)" and only parse it once
|
||||
// from the name.
|
||||
|
||||
pub fn is_uninit_mark(path: &Utf8Path) -> bool {
|
||||
ends_with_suffix(path, TIMELINE_UNINIT_MARK_SUFFIX)
|
||||
}
|
||||
|
||||
@@ -51,9 +51,6 @@ pub enum StorageTimeOperation {
|
||||
|
||||
#[strum(serialize = "create tenant")]
|
||||
CreateTenant,
|
||||
|
||||
#[strum(serialize = "duplicate tenant")]
|
||||
DuplicateTenant,
|
||||
}
|
||||
|
||||
pub static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
|
||||
@@ -760,7 +757,6 @@ pub enum SmgrQueryType {
|
||||
GetRelSize,
|
||||
GetPageAtLsn,
|
||||
GetDbSize,
|
||||
NoOp,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -1392,22 +1388,27 @@ impl TimelineMetrics {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn record_new_file_metrics(&self, sz: u64) {
|
||||
pub fn record_new_file_metrics(&self, sz: u64) {
|
||||
self.resident_physical_size_add(sz);
|
||||
self.num_persistent_files_created.inc_by(1);
|
||||
self.persistent_bytes_written.inc_by(sz);
|
||||
}
|
||||
|
||||
pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
|
||||
pub fn resident_physical_size_sub(&self, sz: u64) {
|
||||
self.resident_physical_size_gauge.sub(sz);
|
||||
crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
|
||||
}
|
||||
|
||||
pub(crate) fn resident_physical_size_add(&self, sz: u64) {
|
||||
pub fn resident_physical_size_add(&self, sz: u64) {
|
||||
self.resident_physical_size_gauge.add(sz);
|
||||
crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
|
||||
}
|
||||
|
||||
pub fn resident_physical_size_set(&self, sz: u64) {
|
||||
self.resident_physical_size_gauge.set(sz);
|
||||
crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.set(sz);
|
||||
}
|
||||
|
||||
pub fn resident_physical_size_get(&self) -> u64 {
|
||||
self.resident_physical_size_gauge.get()
|
||||
}
|
||||
|
||||
@@ -488,11 +488,6 @@ impl PageServerHandler {
|
||||
span,
|
||||
)
|
||||
}
|
||||
PagestreamFeMessage::NoOp => {
|
||||
let _timer = metrics.start_timer(metrics::SmgrQueryType::NoOp);
|
||||
let span = tracing::info_span!("no_op");
|
||||
(Ok(PagestreamBeMessage::NoOp), span)
|
||||
}
|
||||
};
|
||||
|
||||
let response = response.unwrap_or_else(|e| {
|
||||
|
||||
@@ -675,9 +675,8 @@ impl Timeline {
|
||||
|
||||
result.add_key(CONTROLFILE_KEY);
|
||||
result.add_key(CHECKPOINT_KEY);
|
||||
if self.get(AUX_FILES_KEY, lsn, ctx).await.is_ok() {
|
||||
result.add_key(AUX_FILES_KEY);
|
||||
}
|
||||
result.add_key(AUX_FILES_KEY);
|
||||
|
||||
Ok(result.to_keyspace())
|
||||
}
|
||||
|
||||
@@ -1694,7 +1693,6 @@ const AUX_FILES_KEY: Key = Key {
|
||||
// Reverse mappings for a few Keys.
|
||||
// These are needed by WAL redo manager.
|
||||
|
||||
/// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
|
||||
pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
|
||||
Ok(match key.field1 {
|
||||
0x00 => (
|
||||
@@ -1710,8 +1708,7 @@ pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
|
||||
})
|
||||
}
|
||||
|
||||
/// See [[key_to_rel_block]].
|
||||
pub fn is_rel_block_key(key: Key) -> bool {
|
||||
fn is_rel_block_key(key: Key) -> bool {
|
||||
key.field1 == 0x00 && key.field4 != 0
|
||||
}
|
||||
|
||||
|
||||
@@ -1057,8 +1057,8 @@ impl Tenant {
|
||||
TimelineId::try_from(timeline_uninit_mark_file.file_stem())
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Could not parse timeline id out of the timeline uninit mark name {timeline_uninit_mark_file}",
|
||||
)
|
||||
"Could not parse timeline id out of the timeline uninit mark name {timeline_uninit_mark_file}",
|
||||
)
|
||||
})?;
|
||||
let timeline_dir = self.conf.timeline_path(&self.tenant_id, &timeline_id);
|
||||
if let Err(e) =
|
||||
|
||||
@@ -4,10 +4,8 @@
|
||||
use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
|
||||
use rand::{distributions::Alphanumeric, Rng};
|
||||
use std::collections::{hash_map, HashMap};
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use tokio::fs;
|
||||
use tokio::io::AsyncSeekExt;
|
||||
|
||||
use anyhow::Context;
|
||||
use once_cell::sync::Lazy;
|
||||
@@ -28,11 +26,9 @@ use crate::deletion_queue::DeletionQueueClient;
|
||||
use crate::task_mgr::{self, TaskKind};
|
||||
use crate::tenant::config::{AttachmentMode, LocationConf, LocationMode, TenantConfOpt};
|
||||
use crate::tenant::delete::DeleteTenantFlow;
|
||||
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
|
||||
use crate::tenant::storage_layer::{DeltaLayer, ImageLayer, LayerFileName};
|
||||
use crate::tenant::{
|
||||
create_tenant_files, remote_timeline_client, AttachMarkerMode, AttachedTenantConf,
|
||||
CreateTenantFilesMode, IndexPart, Tenant, TenantState,
|
||||
create_tenant_files, AttachMarkerMode, AttachedTenantConf, CreateTenantFilesMode, Tenant,
|
||||
TenantState,
|
||||
};
|
||||
use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, TEMP_FILE_SUFFIX};
|
||||
|
||||
@@ -699,159 +695,6 @@ pub(crate) async fn create_tenant(
|
||||
}).await
|
||||
}
|
||||
|
||||
pub(crate) async fn duplicate_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: TenantConfOpt,
|
||||
src_tenant_id: TenantId,
|
||||
new_tenant_id: TenantId,
|
||||
generation: Generation,
|
||||
resources: TenantSharedResources,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), TenantMapInsertError> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
|
||||
// TODO: would be nice to use tenant_map_insert here, but, we're not ready to create a Tenant object yet
|
||||
let tempdir = path_with_suffix_extension(
|
||||
conf.tenants_path().join(&new_tenant_id.to_string()),
|
||||
&format!("duplication.{TEMP_FILE_SUFFIX}"),
|
||||
);
|
||||
tokio::fs::remove_dir_all(&tempdir)
|
||||
.await
|
||||
.or_else(|e| match e.kind() {
|
||||
std::io::ErrorKind::NotFound => Ok(()),
|
||||
_ => Err(e),
|
||||
})
|
||||
.context("pre-run clean up tempdir")?;
|
||||
|
||||
tokio::fs::create_dir(&tempdir)
|
||||
.await
|
||||
.context("create tempdir")?;
|
||||
|
||||
// Copy the tenant's data in S3
|
||||
let remote_storage = resources
|
||||
.remote_storage
|
||||
.as_ref()
|
||||
.context("only works with remote storage")?;
|
||||
|
||||
let remote_src_timelines =
|
||||
remote_timeline_client::list_remote_timelines(remote_storage, src_tenant_id)
|
||||
.await
|
||||
.context("list src timelines")?;
|
||||
|
||||
info!(?remote_src_timelines, "got src timelines");
|
||||
|
||||
for timeline_id in remote_src_timelines {
|
||||
async {
|
||||
let tempdir = tempdir.join(&timeline_id.to_string());
|
||||
|
||||
tokio::fs::create_dir(&tempdir)
|
||||
.await
|
||||
.context("create tempdir for timeline")?;
|
||||
|
||||
let remote_src_tl =
|
||||
remote_timeline_client::remote_timeline_path(&src_tenant_id, &timeline_id);
|
||||
let remote_dst_tl =
|
||||
remote_timeline_client::remote_timeline_path(&new_tenant_id, &timeline_id);
|
||||
|
||||
let object_names = remote_storage
|
||||
.list_prefixes(Some(&remote_src_tl))
|
||||
.await
|
||||
.context("list timeline remote prefix")?;
|
||||
|
||||
for name in object_names {
|
||||
async {
|
||||
let name = name.object_name().context(
|
||||
"list_prefixes return values should always have object_name()=Some",
|
||||
)?;
|
||||
let remote_src_obj = remote_src_tl.join(name);
|
||||
let remote_dst_obj = remote_dst_tl.join(name);
|
||||
|
||||
let tmp_obj_filepath = tempdir.join(name);
|
||||
let mut tmp_obj_file = tokio::fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(&tmp_obj_filepath)
|
||||
.await
|
||||
.context("create temp file")?;
|
||||
let mut tmp_dl = remote_storage
|
||||
.download(&remote_src_obj)
|
||||
.await
|
||||
.context("start download")?;
|
||||
let tmp_obj_size =
|
||||
tokio::io::copy(&mut tmp_dl.download_stream, &mut tmp_obj_file)
|
||||
.await
|
||||
.context("do the download")?;
|
||||
|
||||
if name == IndexPart::FILE_NAME {
|
||||
// needs no patching
|
||||
} else {
|
||||
let name = LayerFileName::from_str(name).map_err(|e: String| {
|
||||
anyhow::anyhow!("unknown key in timeline s3 prefix: {name:?}: {e}")
|
||||
})?;
|
||||
match name {
|
||||
LayerFileName::Image(_) => {
|
||||
ImageLayer::rewrite_tenant_timeline(
|
||||
&tmp_obj_filepath,
|
||||
new_tenant_id,
|
||||
timeline_id, /* leave as is */
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.context("rewrite tenant timeline")?;
|
||||
}
|
||||
LayerFileName::Delta(_) => {
|
||||
DeltaLayer::rewrite_tenant_timeline(
|
||||
&tmp_obj_filepath,
|
||||
new_tenant_id,
|
||||
timeline_id, /* leave as is */
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.context("rewrite tenant timeline")?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!(?remote_dst_obj, "uploading");
|
||||
|
||||
tmp_obj_file
|
||||
.seek(std::io::SeekFrom::Start(0))
|
||||
.await
|
||||
.context("seek tmp file to beginning for upload")?;
|
||||
remote_storage
|
||||
.upload(
|
||||
tmp_obj_file,
|
||||
tmp_obj_size as usize,
|
||||
&remote_dst_obj,
|
||||
tmp_dl.metadata,
|
||||
)
|
||||
.await
|
||||
.context("upload modified")?;
|
||||
|
||||
tokio::fs::remove_file(tmp_obj_filepath)
|
||||
.await
|
||||
.context("remove temp file")?;
|
||||
|
||||
anyhow::Ok(())
|
||||
}
|
||||
.instrument(info_span!("copy object", object_name=?name))
|
||||
.await
|
||||
.context("copy object")?;
|
||||
}
|
||||
anyhow::Ok(())
|
||||
}
|
||||
.instrument(info_span!("copy_timeline", timeline_id=%timeline_id))
|
||||
.await?;
|
||||
}
|
||||
|
||||
tokio::fs::remove_dir_all(&tempdir)
|
||||
.await
|
||||
.context("post-run clean up tempdir")?;
|
||||
|
||||
attach_tenant(conf, new_tenant_id, generation, tenant_conf, resources, ctx).await
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum SetNewTenantConfigError {
|
||||
#[error(transparent)]
|
||||
|
||||
@@ -627,7 +627,7 @@ impl RemoteTimelineClient {
|
||||
///
|
||||
/// Launch an upload operation in the background.
|
||||
///
|
||||
pub(crate) fn schedule_layer_file_upload(
|
||||
pub fn schedule_layer_file_upload(
|
||||
self: &Arc<Self>,
|
||||
layer_file_name: &LayerFileName,
|
||||
layer_metadata: &LayerFileMetadata,
|
||||
@@ -635,17 +635,6 @@ impl RemoteTimelineClient {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
self.schedule_layer_file_upload0(upload_queue, layer_file_name, layer_metadata);
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn schedule_layer_file_upload0(
|
||||
self: &Arc<Self>,
|
||||
upload_queue: &mut UploadQueueInitialized,
|
||||
layer_file_name: &LayerFileName,
|
||||
layer_metadata: &LayerFileMetadata,
|
||||
) {
|
||||
upload_queue
|
||||
.latest_files
|
||||
.insert(layer_file_name.clone(), layer_metadata.clone());
|
||||
@@ -654,15 +643,21 @@ impl RemoteTimelineClient {
|
||||
let op = UploadOp::UploadLayer(layer_file_name.clone(), layer_metadata.clone());
|
||||
self.calls_unfinished_metric_begin(&op);
|
||||
upload_queue.queued_operations.push_back(op);
|
||||
|
||||
info!("scheduled layer file upload {layer_file_name}");
|
||||
|
||||
// Launch the task immediately, if possible
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Launch a delete operation in the background.
|
||||
///
|
||||
/// The operation does not modify local filesystem state.
|
||||
/// The operation does not modify local state but assumes the local files have already been
|
||||
/// deleted, and is used to mirror those changes to remote.
|
||||
///
|
||||
/// Note: This schedules an index file upload before the deletions. The
|
||||
/// deletion won't actually be performed, until all previously scheduled
|
||||
/// deletion won't actually be performed, until any previously scheduled
|
||||
/// upload operations, and the index file upload, have completed
|
||||
/// successfully.
|
||||
pub fn schedule_layer_file_deletion(
|
||||
@@ -672,133 +667,61 @@ impl RemoteTimelineClient {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
let with_generations =
|
||||
self.schedule_unlinking_of_layers_from_index_part0(upload_queue, &names);
|
||||
|
||||
self.schedule_deletion_of_unlinked0(upload_queue, with_generations);
|
||||
|
||||
// Launch the tasks immediately, if possible
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unlinks the layer files from `index_part.json` but does not yet schedule deletion for the
|
||||
/// layer files, leaving them dangling.
|
||||
///
|
||||
/// The files will be leaked in remote storage unless [`Self::schedule_deletion_of_unlinked`]
|
||||
/// is invoked on them.
|
||||
#[allow(unused)] // will be used by PR#4938
|
||||
pub(crate) fn schedule_unlinking_of_layers_from_index_part(
|
||||
self: &Arc<Self>,
|
||||
names: Vec<LayerFileName>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
// just forget the return value; after uploading the next index_part.json, we can consider
|
||||
// the layer files as "dangling". this is fine however.
|
||||
self.schedule_unlinking_of_layers_from_index_part0(upload_queue, &names);
|
||||
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Update the remote index file, removing the to-be-deleted files from the index,
|
||||
/// allowing scheduling of actual deletions later.
|
||||
fn schedule_unlinking_of_layers_from_index_part0(
|
||||
self: &Arc<Self>,
|
||||
upload_queue: &mut UploadQueueInitialized,
|
||||
names: &[LayerFileName],
|
||||
) -> Vec<(LayerFileName, Generation)> {
|
||||
// Deleting layers doesn't affect the values stored in TimelineMetadata,
|
||||
// so we don't need update it. Just serialize it.
|
||||
let metadata = upload_queue.latest_metadata.clone();
|
||||
|
||||
// Decorate our list of names with each name's generation, dropping
|
||||
// makes that are unexpectedly missing from our metadata.
|
||||
let with_generations: Vec<_> = names
|
||||
.iter()
|
||||
.filter_map(|name| {
|
||||
// Remove from latest_files, learning the file's remote generation in the process
|
||||
let meta = upload_queue.latest_files.remove(name);
|
||||
// Update the remote index file, removing the to-be-deleted files from the index,
|
||||
// before deleting the actual files.
|
||||
//
|
||||
// Once we start removing files from upload_queue.latest_files, there's
|
||||
// no going back! Otherwise, some of the files would already be removed
|
||||
// from latest_files, but not yet scheduled for deletion. Use a closure
|
||||
// to syntactically forbid ? or bail! calls here.
|
||||
let no_bail_here = || {
|
||||
// Decorate our list of names with each name's generation, dropping
|
||||
// makes that are unexpectedly missing from our metadata.
|
||||
let with_generations: Vec<_> = names
|
||||
.into_iter()
|
||||
.filter_map(|name| {
|
||||
// Remove from latest_files, learning the file's remote generation in the process
|
||||
let meta = upload_queue.latest_files.remove(&name);
|
||||
|
||||
if let Some(meta) = meta {
|
||||
upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;
|
||||
Some((name.to_owned(), meta.generation))
|
||||
} else {
|
||||
// This can only happen if we forgot to to schedule the file upload
|
||||
// before scheduling the delete. Log it because it is a rare/strange
|
||||
// situation, and in case something is misbehaving, we'd like to know which
|
||||
// layers experienced this.
|
||||
info!("Deleting layer {name} not found in latest_files list, never uploaded?");
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
if let Some(meta) = meta {
|
||||
upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;
|
||||
Some((name, meta.generation))
|
||||
} else {
|
||||
// This can only happen if we forgot to to schedule the file upload
|
||||
// before scheduling the delete. Log it because it is a rare/strange
|
||||
// situation, and in case something is misbehaving, we'd like to know which
|
||||
// layers experienced this.
|
||||
info!(
|
||||
"Deleting layer {name} not found in latest_files list, never uploaded?"
|
||||
);
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// after unlinking files from the upload_queue.latest_files we must always schedule an
|
||||
// index_part update, because that needs to be uploaded before we can actually delete the
|
||||
// files.
|
||||
if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {
|
||||
self.schedule_index_upload(upload_queue, metadata);
|
||||
}
|
||||
if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {
|
||||
self.schedule_index_upload(upload_queue, metadata);
|
||||
}
|
||||
|
||||
with_generations
|
||||
}
|
||||
for (name, gen) in &with_generations {
|
||||
info!("scheduling deletion of layer {}{}", name, gen.get_suffix());
|
||||
}
|
||||
|
||||
/// Schedules deletion for layer files which have previously been unlinked from the
|
||||
/// `index_part.json` with [`Self::schedule_unlinking_of_layers_from_index_part`].
|
||||
#[allow(unused)] // will be used by Layer::drop in PR#4938
|
||||
pub(crate) fn schedule_deletion_of_unlinked(
|
||||
self: &Arc<Self>,
|
||||
layers: Vec<(LayerFileName, Generation)>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
self.schedule_deletion_of_unlinked0(upload_queue, layers);
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn schedule_deletion_of_unlinked0(
|
||||
self: &Arc<Self>,
|
||||
upload_queue: &mut UploadQueueInitialized,
|
||||
with_generations: Vec<(LayerFileName, Generation)>,
|
||||
) {
|
||||
for (name, gen) in &with_generations {
|
||||
info!("scheduling deletion of layer {}{}", name, gen.get_suffix());
|
||||
}
|
||||
|
||||
// schedule the actual deletions
|
||||
let op = UploadOp::Delete(Delete {
|
||||
layers: with_generations,
|
||||
});
|
||||
self.calls_unfinished_metric_begin(&op);
|
||||
upload_queue.queued_operations.push_back(op);
|
||||
}
|
||||
|
||||
/// Schedules a compaction update to the remote `index_part.json`.
|
||||
///
|
||||
/// `compacted_from` represent the L0 names which have been `compacted_to` L1 layers.
|
||||
pub(crate) fn schedule_compaction_update(
|
||||
self: &Arc<Self>,
|
||||
compacted_from: &[LayerFileName],
|
||||
compacted_to: &[(LayerFileName, LayerFileMetadata)],
|
||||
) -> anyhow::Result<()> {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
for (name, m) in compacted_to {
|
||||
self.schedule_layer_file_upload0(upload_queue, name, m);
|
||||
}
|
||||
|
||||
let with_generations =
|
||||
self.schedule_unlinking_of_layers_from_index_part0(upload_queue, compacted_from);
|
||||
self.schedule_deletion_of_unlinked0(upload_queue, with_generations);
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
// schedule the actual deletions
|
||||
let op = UploadOp::Delete(Delete {
|
||||
layers: with_generations,
|
||||
});
|
||||
self.calls_unfinished_metric_begin(&op);
|
||||
upload_queue.queued_operations.push_back(op);
|
||||
|
||||
// Launch the tasks immediately, if possible
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
};
|
||||
no_bail_here();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -60,8 +60,6 @@ pub(super) async fn upload_timeline_layer<'a>(
|
||||
bail!("failpoint before-upload-layer")
|
||||
});
|
||||
|
||||
pausable_failpoint!("before-upload-layer-pausable");
|
||||
|
||||
let storage_path = remote_path(conf, source_path, generation)?;
|
||||
let source_file_res = fs::File::open(&source_path).await;
|
||||
let source_file = match source_file_res {
|
||||
|
||||
@@ -844,49 +844,6 @@ impl Drop for DeltaLayerWriter {
|
||||
}
|
||||
}
|
||||
|
||||
impl DeltaLayer {
|
||||
/// Assume the file at `path` is corrupt if this function returns with an error.
|
||||
pub(crate) async fn rewrite_tenant_timeline(
|
||||
path: &Utf8Path,
|
||||
new_tenant: TenantId,
|
||||
new_timeline: TimelineId,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
let file = VirtualFile::open_with_options(
|
||||
path,
|
||||
&*std::fs::OpenOptions::new().read(true).write(true),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to open file '{}'", path))?;
|
||||
let file = FileBlockReader::new(file);
|
||||
let summary_blk = file.read_blk(0, ctx).await?;
|
||||
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
|
||||
let mut file = file.file;
|
||||
if actual_summary.magic != DELTA_FILE_MAGIC {
|
||||
bail!("File '{}' is not a delta layer", path);
|
||||
}
|
||||
let new_summary = Summary {
|
||||
tenant_id: new_tenant,
|
||||
timeline_id: new_timeline,
|
||||
..actual_summary
|
||||
};
|
||||
|
||||
let mut buf = smallvec::SmallVec::<[u8; PAGE_SZ]>::new();
|
||||
Summary::ser_into(&new_summary, &mut buf)?;
|
||||
if buf.spilled() {
|
||||
// The code in ImageLayerWriterInner just warn!()s for this.
|
||||
// It should probably error out as well.
|
||||
anyhow::bail!(
|
||||
"Used more than one page size for summary buffer: {}",
|
||||
buf.len()
|
||||
);
|
||||
}
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
file.write_all(&buf).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl DeltaLayerInner {
|
||||
pub(super) async fn load(
|
||||
path: &Utf8Path,
|
||||
|
||||
@@ -436,49 +436,6 @@ impl ImageLayer {
|
||||
}
|
||||
}
|
||||
|
||||
impl ImageLayer {
|
||||
/// Assume the file at `path` is corrupt if this function returns with an error.
|
||||
pub(crate) async fn rewrite_tenant_timeline(
|
||||
path: &Utf8Path,
|
||||
new_tenant: TenantId,
|
||||
new_timeline: TimelineId,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
let file = VirtualFile::open_with_options(
|
||||
path,
|
||||
&*std::fs::OpenOptions::new().read(true).write(true),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to open file '{}'", path))?;
|
||||
let file = FileBlockReader::new(file);
|
||||
let summary_blk = file.read_blk(0, ctx).await?;
|
||||
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
|
||||
let mut file = file.file;
|
||||
if actual_summary.magic != IMAGE_FILE_MAGIC {
|
||||
bail!("File '{}' is not a delta layer", path);
|
||||
}
|
||||
let new_summary = Summary {
|
||||
tenant_id: new_tenant,
|
||||
timeline_id: new_timeline,
|
||||
..actual_summary
|
||||
};
|
||||
|
||||
let mut buf = smallvec::SmallVec::<[u8; PAGE_SZ]>::new();
|
||||
Summary::ser_into(&new_summary, &mut buf)?;
|
||||
if buf.spilled() {
|
||||
// The code in ImageLayerWriterInner just warn!()s for this.
|
||||
// It should probably error out as well.
|
||||
anyhow::bail!(
|
||||
"Used more than one page size for summary buffer: {}",
|
||||
buf.len()
|
||||
);
|
||||
}
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
file.write_all(&buf).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl ImageLayerInner {
|
||||
pub(super) async fn load(
|
||||
path: &Utf8Path,
|
||||
|
||||
@@ -159,8 +159,11 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
|
||||
// TODO: we shouldn't need to await to find tenant and this could be moved outside of
|
||||
// loop, #3501. There are also additional "allowed_errors" in tests.
|
||||
if first && random_init_delay(period, &cancel).await.is_err() {
|
||||
break;
|
||||
if first {
|
||||
first = false;
|
||||
if random_init_delay(period, &cancel).await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let started_at = Instant::now();
|
||||
@@ -180,16 +183,7 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
}
|
||||
};
|
||||
|
||||
if !first {
|
||||
// The first iteration is typically much slower, because all tenants compete for the
|
||||
// compaction sempahore to run, and because of concurrent startup work like initializing
|
||||
// logical sizes. To avoid routinely spamming warnings, we suppress this log on first iteration.
|
||||
warn_when_period_overrun(
|
||||
started_at.elapsed(),
|
||||
period,
|
||||
BackgroundLoopKind::Compaction,
|
||||
);
|
||||
}
|
||||
warn_when_period_overrun(started_at.elapsed(), period, BackgroundLoopKind::Compaction);
|
||||
|
||||
// Sleep
|
||||
if tokio::time::timeout(sleep_duration, cancel.cancelled())
|
||||
@@ -198,8 +192,6 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
.await;
|
||||
@@ -231,8 +223,11 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
|
||||
let period = tenant.get_gc_period();
|
||||
|
||||
if first && random_init_delay(period, &cancel).await.is_err() {
|
||||
break;
|
||||
if first {
|
||||
first = false;
|
||||
if random_init_delay(period, &cancel).await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let started_at = Instant::now();
|
||||
@@ -256,12 +251,7 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
}
|
||||
};
|
||||
|
||||
if !first {
|
||||
// The first iteration is typically much slower, because all tenants compete for the
|
||||
// compaction sempahore to run, and because of concurrent startup work like initializing
|
||||
// logical sizes. To avoid routinely spamming warnings, we suppress this log on first iteration.
|
||||
warn_when_period_overrun(started_at.elapsed(), period, BackgroundLoopKind::Gc);
|
||||
}
|
||||
warn_when_period_overrun(started_at.elapsed(), period, BackgroundLoopKind::Gc);
|
||||
|
||||
// Sleep
|
||||
if tokio::time::timeout(sleep_duration, cancel.cancelled())
|
||||
@@ -270,8 +260,6 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
.await;
|
||||
|
||||
@@ -1871,7 +1871,7 @@ impl Timeline {
|
||||
"loaded layer map with {} layers at {}, total physical size: {}",
|
||||
num_layers, disk_consistent_lsn, total_physical_size
|
||||
);
|
||||
self.metrics.resident_physical_size_add(total_physical_size);
|
||||
self.metrics.resident_physical_size_set(total_physical_size);
|
||||
|
||||
timer.stop_and_record();
|
||||
Ok(())
|
||||
@@ -2793,13 +2793,10 @@ impl Timeline {
|
||||
)
|
||||
};
|
||||
|
||||
let disk_consistent_lsn = Lsn(lsn_range.end.0 - 1);
|
||||
let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
|
||||
|
||||
// The new on-disk layers are now in the layer map. We can remove the
|
||||
// in-memory layer from the map now. The flushed layer is stored in
|
||||
// the mapping in `create_delta_layer`.
|
||||
let metadata = {
|
||||
{
|
||||
let mut guard = self.layers.write().await;
|
||||
|
||||
if let Some(ref l) = delta_layer_to_add {
|
||||
@@ -2815,17 +2812,8 @@ impl Timeline {
|
||||
}
|
||||
|
||||
guard.finish_flush_l0_layer(delta_layer_to_add, &frozen_layer);
|
||||
if disk_consistent_lsn != old_disk_consistent_lsn {
|
||||
assert!(disk_consistent_lsn > old_disk_consistent_lsn);
|
||||
self.disk_consistent_lsn.store(disk_consistent_lsn);
|
||||
|
||||
// Schedule remote uploads that will reflect our new disk_consistent_lsn
|
||||
Some(self.schedule_uploads(disk_consistent_lsn, layer_paths_to_upload)?)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
// release lock on 'layers'
|
||||
};
|
||||
}
|
||||
|
||||
// FIXME: between create_delta_layer and the scheduling of the upload in `update_metadata_file`,
|
||||
// a compaction can delete the file and then it won't be available for uploads any more.
|
||||
@@ -2841,22 +2829,28 @@ impl Timeline {
|
||||
//
|
||||
// TODO: This perhaps should be done in 'flush_frozen_layers', after flushing
|
||||
// *all* the layers, to avoid fsyncing the file multiple times.
|
||||
let disk_consistent_lsn = Lsn(lsn_range.end.0 - 1);
|
||||
let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
|
||||
|
||||
// If we updated our disk_consistent_lsn, persist the updated metadata to local disk.
|
||||
if let Some(metadata) = metadata {
|
||||
save_metadata(self.conf, &self.tenant_id, &self.timeline_id, &metadata)
|
||||
// If we were able to advance 'disk_consistent_lsn', save it the metadata file.
|
||||
// After crash, we will restart WAL streaming and processing from that point.
|
||||
if disk_consistent_lsn != old_disk_consistent_lsn {
|
||||
assert!(disk_consistent_lsn > old_disk_consistent_lsn);
|
||||
self.update_metadata_file(disk_consistent_lsn, layer_paths_to_upload)
|
||||
.await
|
||||
.context("save_metadata")?;
|
||||
.context("update_metadata_file")?;
|
||||
// Also update the in-memory copy
|
||||
self.disk_consistent_lsn.store(disk_consistent_lsn);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Update metadata file
|
||||
fn schedule_uploads(
|
||||
async fn update_metadata_file(
|
||||
&self,
|
||||
disk_consistent_lsn: Lsn,
|
||||
layer_paths_to_upload: HashMap<LayerFileName, LayerFileMetadata>,
|
||||
) -> anyhow::Result<TimelineMetadata> {
|
||||
) -> anyhow::Result<()> {
|
||||
// We can only save a valid 'prev_record_lsn' value on disk if we
|
||||
// flushed *all* in-memory changes to disk. We only track
|
||||
// 'prev_record_lsn' in memory for the latest processed record, so we
|
||||
@@ -2893,6 +2887,10 @@ impl Timeline {
|
||||
x.unwrap()
|
||||
));
|
||||
|
||||
save_metadata(self.conf, &self.tenant_id, &self.timeline_id, &metadata)
|
||||
.await
|
||||
.context("save_metadata")?;
|
||||
|
||||
if let Some(remote_client) = &self.remote_client {
|
||||
for (path, layer_metadata) in layer_paths_to_upload {
|
||||
remote_client.schedule_layer_file_upload(&path, &layer_metadata)?;
|
||||
@@ -2900,20 +2898,6 @@ impl Timeline {
|
||||
remote_client.schedule_index_upload_for_metadata_update(&metadata)?;
|
||||
}
|
||||
|
||||
Ok(metadata)
|
||||
}
|
||||
|
||||
async fn update_metadata_file(
|
||||
&self,
|
||||
disk_consistent_lsn: Lsn,
|
||||
layer_paths_to_upload: HashMap<LayerFileName, LayerFileMetadata>,
|
||||
) -> anyhow::Result<()> {
|
||||
let metadata = self.schedule_uploads(disk_consistent_lsn, layer_paths_to_upload)?;
|
||||
|
||||
save_metadata(self.conf, &self.tenant_id, &self.timeline_id, &metadata)
|
||||
.await
|
||||
.context("save_metadata")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -3079,7 +3063,6 @@ impl Timeline {
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, fields(%lsn, %force))]
|
||||
async fn create_image_layers(
|
||||
&self,
|
||||
partitioning: &KeyPartitioning,
|
||||
@@ -3870,21 +3853,22 @@ impl Timeline {
|
||||
// now, we just skip the file to avoid unintentional modification to files on the disk and in the layer map.
|
||||
let mut duplicated_layers = HashSet::new();
|
||||
|
||||
let mut uploaded_layers = Vec::with_capacity(new_layers.len());
|
||||
let mut insert_layers = Vec::new();
|
||||
let mut remove_layers = Vec::new();
|
||||
|
||||
for l in &new_layers {
|
||||
for l in new_layers {
|
||||
let new_delta_path = l.path();
|
||||
|
||||
let metadata = new_delta_path.metadata().with_context(|| {
|
||||
format!("read file metadata for new created layer {new_delta_path}")
|
||||
})?;
|
||||
|
||||
uploaded_layers.push((
|
||||
l.filename(),
|
||||
LayerFileMetadata::new(metadata.len(), self.generation),
|
||||
));
|
||||
if let Some(remote_client) = &self.remote_client {
|
||||
remote_client.schedule_layer_file_upload(
|
||||
&l.filename(),
|
||||
&LayerFileMetadata::new(metadata.len(), self.generation),
|
||||
)?;
|
||||
}
|
||||
|
||||
// update metrics, including the timeline's physical size
|
||||
self.metrics.record_new_file_metrics(metadata.len());
|
||||
@@ -3897,7 +3881,7 @@ impl Timeline {
|
||||
LayerResidenceStatus::Resident,
|
||||
LayerResidenceEventReason::LayerCreate,
|
||||
);
|
||||
let l = l.to_owned() as Arc<dyn PersistentLayer>;
|
||||
let l = l as Arc<dyn PersistentLayer>;
|
||||
if guard.contains(&l) {
|
||||
tracing::error!(layer=%l, "duplicated L1 layer");
|
||||
duplicated_layers.insert(l.layer_desc().key());
|
||||
@@ -3929,12 +3913,13 @@ impl Timeline {
|
||||
&self.metrics,
|
||||
)?;
|
||||
|
||||
if let Some(remote_client) = self.remote_client.as_ref() {
|
||||
remote_client.schedule_compaction_update(&layer_names_to_delete, &uploaded_layers)?;
|
||||
}
|
||||
|
||||
drop_wlock(guard);
|
||||
|
||||
// Also schedule the deletions in remote storage
|
||||
if let Some(remote_client) = &self.remote_client {
|
||||
remote_client.schedule_layer_file_deletion(layer_names_to_delete)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,8 @@ use storage_broker::proto::subscribe_safekeeper_info_request::SubscriptionKey;
|
||||
use storage_broker::proto::SafekeeperTimelineInfo;
|
||||
use storage_broker::proto::SubscribeSafekeeperInfoRequest;
|
||||
use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
|
||||
use storage_broker::{BrokerClientChannel, Code, Streaming};
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use storage_broker::Streaming;
|
||||
use tokio::select;
|
||||
use tracing::*;
|
||||
|
||||
@@ -136,17 +137,8 @@ pub(super) async fn connection_manager_loop_step(
|
||||
broker_update = broker_subscription.message() => {
|
||||
match broker_update {
|
||||
Ok(Some(broker_update)) => connection_manager_state.register_timeline_update(broker_update),
|
||||
Err(status) => {
|
||||
match status.code() {
|
||||
Code::Unknown if status.message().contains("stream closed because of a broken pipe") => {
|
||||
// tonic's error handling doesn't provide a clear code for disconnections: we get
|
||||
// "h2 protocol error: error reading a body from connection: stream closed because of a broken pipe"
|
||||
info!("broker disconnected: {status}");
|
||||
},
|
||||
_ => {
|
||||
warn!("broker subscription failed: {status}");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("broker subscription failed: {e}");
|
||||
return ControlFlow::Continue(());
|
||||
}
|
||||
Ok(None) => {
|
||||
|
||||
12
poetry.lock
generated
12
poetry.lock
generated
@@ -2447,20 +2447,20 @@ test = ["websockets"]
|
||||
|
||||
[[package]]
|
||||
name = "werkzeug"
|
||||
version = "3.0.1"
|
||||
version = "2.2.3"
|
||||
description = "The comprehensive WSGI web application library."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "werkzeug-3.0.1-py3-none-any.whl", hash = "sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10"},
|
||||
{file = "werkzeug-3.0.1.tar.gz", hash = "sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc"},
|
||||
{file = "Werkzeug-2.2.3-py3-none-any.whl", hash = "sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612"},
|
||||
{file = "Werkzeug-2.2.3.tar.gz", hash = "sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
MarkupSafe = ">=2.1.1"
|
||||
|
||||
[package.extras]
|
||||
watchdog = ["watchdog (>=2.3)"]
|
||||
watchdog = ["watchdog"]
|
||||
|
||||
[[package]]
|
||||
name = "wrapt"
|
||||
@@ -2719,4 +2719,4 @@ cffi = ["cffi (>=1.11)"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "74649cf47c52f21b01b096a42044750b1c9677576b405be0489c2909127a9bf1"
|
||||
content-hash = "c5981d8d7c2deadd47c823bc35f86f830c8e320b653d2d3718bade1f4d2dabca"
|
||||
|
||||
@@ -3,9 +3,7 @@ mod hacks;
|
||||
mod link;
|
||||
|
||||
pub use link::LinkAuthError;
|
||||
use tokio_postgres::config::AuthKeys;
|
||||
|
||||
use crate::proxy::{handle_try_wake, retry_after};
|
||||
use crate::{
|
||||
auth::{self, ClientCredentials},
|
||||
config::AuthenticationConfig,
|
||||
@@ -18,9 +16,8 @@ use crate::{
|
||||
};
|
||||
use futures::TryFutureExt;
|
||||
use std::borrow::Cow;
|
||||
use std::ops::ControlFlow;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::{error, info, warn};
|
||||
use tracing::info;
|
||||
|
||||
/// A product of successful authentication.
|
||||
pub struct AuthSuccess<T> {
|
||||
@@ -120,44 +117,6 @@ impl<'a, T, E> BackendType<'a, Result<T, E>> {
|
||||
}
|
||||
}
|
||||
|
||||
pub enum ComputeCredentials {
|
||||
Password(Vec<u8>),
|
||||
AuthKeys(AuthKeys),
|
||||
}
|
||||
|
||||
/// True to its name, this function encapsulates our current auth trade-offs.
|
||||
/// Here, we choose the appropriate auth flow based on circumstances.
|
||||
async fn auth_quirks_creds(
|
||||
api: &impl console::Api,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
creds: &mut ClientCredentials<'_>,
|
||||
client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
allow_cleartext: bool,
|
||||
config: &'static AuthenticationConfig,
|
||||
) -> auth::Result<AuthSuccess<ComputeCredentials>> {
|
||||
// If there's no project so far, that entails that client doesn't
|
||||
// support SNI or other means of passing the endpoint (project) name.
|
||||
// We now expect to see a very specific payload in the place of password.
|
||||
if creds.project.is_none() {
|
||||
// Password will be checked by the compute node later.
|
||||
return hacks::password_hack(creds, client).await;
|
||||
}
|
||||
|
||||
// Password hack should set the project name.
|
||||
// TODO: make `creds.project` more type-safe.
|
||||
assert!(creds.project.is_some());
|
||||
|
||||
// Perform cleartext auth if we're allowed to do that.
|
||||
// Currently, we use it for websocket connections (latency).
|
||||
if allow_cleartext {
|
||||
// Password will be checked by the compute node later.
|
||||
return hacks::cleartext_hack(client).await;
|
||||
}
|
||||
|
||||
// Finally, proceed with the main auth flow (SCRAM-based).
|
||||
classic::authenticate(api, extra, creds, client, config).await
|
||||
}
|
||||
|
||||
/// True to its name, this function encapsulates our current auth trade-offs.
|
||||
/// Here, we choose the appropriate auth flow based on circumstances.
|
||||
async fn auth_quirks(
|
||||
@@ -168,36 +127,27 @@ async fn auth_quirks(
|
||||
allow_cleartext: bool,
|
||||
config: &'static AuthenticationConfig,
|
||||
) -> auth::Result<AuthSuccess<CachedNodeInfo>> {
|
||||
let auth_stuff = auth_quirks_creds(api, extra, creds, client, allow_cleartext, config).await?;
|
||||
// If there's no project so far, that entails that client doesn't
|
||||
// support SNI or other means of passing the endpoint (project) name.
|
||||
// We now expect to see a very specific payload in the place of password.
|
||||
if creds.project.is_none() {
|
||||
// Password will be checked by the compute node later.
|
||||
return hacks::password_hack(api, extra, creds, client).await;
|
||||
}
|
||||
|
||||
let mut num_retries = 0;
|
||||
let mut node = loop {
|
||||
let wake_res = api.wake_compute(extra, creds).await;
|
||||
match handle_try_wake(wake_res, num_retries) {
|
||||
Err(e) => {
|
||||
error!(error = ?e, num_retries, retriable = false, "couldn't wake compute node");
|
||||
return Err(e.into());
|
||||
}
|
||||
Ok(ControlFlow::Continue(e)) => {
|
||||
warn!(error = ?e, num_retries, retriable = true, "couldn't wake compute node");
|
||||
}
|
||||
Ok(ControlFlow::Break(n)) => break n,
|
||||
}
|
||||
// Password hack should set the project name.
|
||||
// TODO: make `creds.project` more type-safe.
|
||||
assert!(creds.project.is_some());
|
||||
|
||||
let wait_duration = retry_after(num_retries);
|
||||
num_retries += 1;
|
||||
tokio::time::sleep(wait_duration).await;
|
||||
};
|
||||
// Perform cleartext auth if we're allowed to do that.
|
||||
// Currently, we use it for websocket connections (latency).
|
||||
if allow_cleartext {
|
||||
// Password will be checked by the compute node later.
|
||||
return hacks::cleartext_hack(api, extra, creds, client).await;
|
||||
}
|
||||
|
||||
match auth_stuff.value {
|
||||
ComputeCredentials::Password(password) => node.config.password(password),
|
||||
ComputeCredentials::AuthKeys(auth_keys) => node.config.auth_keys(auth_keys),
|
||||
};
|
||||
|
||||
Ok(AuthSuccess {
|
||||
reported_auth_ok: auth_stuff.reported_auth_ok,
|
||||
value: node,
|
||||
})
|
||||
// Finally, proceed with the main auth flow (SCRAM-based).
|
||||
classic::authenticate(api, extra, creds, client, config).await
|
||||
}
|
||||
|
||||
impl BackendType<'_, ClientCredentials<'_>> {
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
use super::{AuthSuccess, ComputeCredentials};
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use super::AuthSuccess;
|
||||
use crate::{
|
||||
auth::{self, AuthFlow, ClientCredentials},
|
||||
compute,
|
||||
config::AuthenticationConfig,
|
||||
console::{self, AuthInfo, ConsoleReqExtra},
|
||||
console::{self, AuthInfo, CachedNodeInfo, ConsoleReqExtra},
|
||||
proxy::{handle_try_wake, retry_after},
|
||||
sasl, scram,
|
||||
stream::PqStream,
|
||||
};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::{info, warn};
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
pub(super) async fn authenticate(
|
||||
api: &impl console::Api,
|
||||
@@ -16,7 +19,7 @@ pub(super) async fn authenticate(
|
||||
creds: &ClientCredentials<'_>,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
config: &'static AuthenticationConfig,
|
||||
) -> auth::Result<AuthSuccess<ComputeCredentials>> {
|
||||
) -> auth::Result<AuthSuccess<CachedNodeInfo>> {
|
||||
info!("fetching user's authentication info");
|
||||
let info = api.get_auth_info(extra, creds).await?.unwrap_or_else(|| {
|
||||
// If we don't have an authentication secret, we mock one to
|
||||
@@ -63,17 +66,38 @@ pub(super) async fn authenticate(
|
||||
}
|
||||
};
|
||||
|
||||
compute::ScramKeys {
|
||||
Some(compute::ScramKeys {
|
||||
client_key: client_key.as_bytes(),
|
||||
server_key: secret.server_key.as_bytes(),
|
||||
}
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
let mut num_retries = 0;
|
||||
let mut node = loop {
|
||||
let wake_res = api.wake_compute(extra, creds).await;
|
||||
match handle_try_wake(wake_res, num_retries) {
|
||||
Err(e) => {
|
||||
error!(error = ?e, num_retries, retriable = false, "couldn't wake compute node");
|
||||
return Err(e.into());
|
||||
}
|
||||
Ok(ControlFlow::Continue(e)) => {
|
||||
warn!(error = ?e, num_retries, retriable = true, "couldn't wake compute node");
|
||||
}
|
||||
Ok(ControlFlow::Break(n)) => break n,
|
||||
}
|
||||
|
||||
let wait_duration = retry_after(num_retries);
|
||||
num_retries += 1;
|
||||
tokio::time::sleep(wait_duration).await;
|
||||
};
|
||||
if let Some(keys) = scram_keys {
|
||||
use tokio_postgres::config::AuthKeys;
|
||||
node.config.auth_keys(AuthKeys::ScramSha256(keys));
|
||||
}
|
||||
|
||||
Ok(AuthSuccess {
|
||||
reported_auth_ok: false,
|
||||
value: ComputeCredentials::AuthKeys(tokio_postgres::config::AuthKeys::ScramSha256(
|
||||
scram_keys,
|
||||
)),
|
||||
value: node,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
use super::{AuthSuccess, ComputeCredentials};
|
||||
use super::AuthSuccess;
|
||||
use crate::{
|
||||
auth::{self, AuthFlow, ClientCredentials},
|
||||
console::{
|
||||
self,
|
||||
provider::{CachedNodeInfo, ConsoleReqExtra},
|
||||
},
|
||||
stream,
|
||||
};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
@@ -11,8 +15,11 @@ use tracing::{info, warn};
|
||||
/// These properties are benefical for serverless JS workers, so we
|
||||
/// use this mechanism for websocket connections.
|
||||
pub async fn cleartext_hack(
|
||||
api: &impl console::Api,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
creds: &mut ClientCredentials<'_>,
|
||||
client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
) -> auth::Result<AuthSuccess<ComputeCredentials>> {
|
||||
) -> auth::Result<AuthSuccess<CachedNodeInfo>> {
|
||||
warn!("cleartext auth flow override is enabled, proceeding");
|
||||
let password = AuthFlow::new(client)
|
||||
.begin(auth::CleartextPassword)
|
||||
@@ -20,19 +27,24 @@ pub async fn cleartext_hack(
|
||||
.authenticate()
|
||||
.await?;
|
||||
|
||||
let mut node = api.wake_compute(extra, creds).await?;
|
||||
node.config.password(password);
|
||||
|
||||
// Report tentative success; compute node will check the password anyway.
|
||||
Ok(AuthSuccess {
|
||||
reported_auth_ok: false,
|
||||
value: ComputeCredentials::Password(password),
|
||||
value: node,
|
||||
})
|
||||
}
|
||||
|
||||
/// Workaround for clients which don't provide an endpoint (project) name.
|
||||
/// Very similar to [`cleartext_hack`], but there's a specific password format.
|
||||
pub async fn password_hack(
|
||||
api: &impl console::Api,
|
||||
extra: &ConsoleReqExtra<'_>,
|
||||
creds: &mut ClientCredentials<'_>,
|
||||
client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
) -> auth::Result<AuthSuccess<ComputeCredentials>> {
|
||||
) -> auth::Result<AuthSuccess<CachedNodeInfo>> {
|
||||
warn!("project not specified, resorting to the password hack auth flow");
|
||||
let payload = AuthFlow::new(client)
|
||||
.begin(auth::PasswordHack)
|
||||
@@ -43,9 +55,12 @@ pub async fn password_hack(
|
||||
info!(project = &payload.endpoint, "received missing parameter");
|
||||
creds.project = Some(payload.endpoint);
|
||||
|
||||
let mut node = api.wake_compute(extra, creds).await?;
|
||||
node.config.password(payload.password);
|
||||
|
||||
// Report tentative success; compute node will check the password anyway.
|
||||
Ok(AuthSuccess {
|
||||
reported_auth_ok: false,
|
||||
value: ComputeCredentials::Password(payload.password),
|
||||
value: node,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -4,11 +4,10 @@ use proxy::config::AuthenticationConfig;
|
||||
use proxy::config::HttpConfig;
|
||||
use proxy::console;
|
||||
use proxy::http;
|
||||
use proxy::usage_metrics;
|
||||
use proxy::metrics;
|
||||
|
||||
use anyhow::bail;
|
||||
use proxy::config::{self, ProxyConfig};
|
||||
use proxy::serverless;
|
||||
use std::pin::pin;
|
||||
use std::{borrow::Cow, net::SocketAddr};
|
||||
use tokio::net::TcpListener;
|
||||
@@ -130,16 +129,14 @@ async fn main() -> anyhow::Result<()> {
|
||||
cancellation_token.clone(),
|
||||
));
|
||||
|
||||
// TODO: rename the argument to something like serverless.
|
||||
// It now covers more than just websockets, it also covers SQL over HTTP.
|
||||
if let Some(serverless_address) = args.wss {
|
||||
let serverless_address: SocketAddr = serverless_address.parse()?;
|
||||
info!("Starting wss on {serverless_address}");
|
||||
let serverless_listener = TcpListener::bind(serverless_address).await?;
|
||||
if let Some(wss_address) = args.wss {
|
||||
let wss_address: SocketAddr = wss_address.parse()?;
|
||||
info!("Starting wss on {wss_address}");
|
||||
let wss_listener = TcpListener::bind(wss_address).await?;
|
||||
|
||||
client_tasks.spawn(serverless::task_main(
|
||||
client_tasks.spawn(http::websocket::task_main(
|
||||
config,
|
||||
serverless_listener,
|
||||
wss_listener,
|
||||
cancellation_token.clone(),
|
||||
));
|
||||
}
|
||||
@@ -147,11 +144,11 @@ async fn main() -> anyhow::Result<()> {
|
||||
// maintenance tasks. these never return unless there's an error
|
||||
let mut maintenance_tasks = JoinSet::new();
|
||||
maintenance_tasks.spawn(proxy::handle_signals(cancellation_token));
|
||||
maintenance_tasks.spawn(http::health_server::task_main(http_listener));
|
||||
maintenance_tasks.spawn(http::server::task_main(http_listener));
|
||||
maintenance_tasks.spawn(console::mgmt::task_main(mgmt_listener));
|
||||
|
||||
if let Some(metrics_config) = &config.metric_collection {
|
||||
maintenance_tasks.spawn(usage_metrics::task_main(metrics_config));
|
||||
maintenance_tasks.spawn(metrics::task_main(metrics_config));
|
||||
}
|
||||
|
||||
let maintenance = loop {
|
||||
|
||||
@@ -90,11 +90,7 @@ pub mod errors {
|
||||
status: http::StatusCode::LOCKED,
|
||||
ref text,
|
||||
} => {
|
||||
// written data quota exceeded
|
||||
// data transfer quota exceeded
|
||||
// compute time quota exceeded
|
||||
// logical size quota exceeded
|
||||
!text.contains("quota exceeded")
|
||||
!text.contains("written data quota exceeded")
|
||||
&& !text.contains("the limit for current plan reached")
|
||||
}
|
||||
// retry server errors
|
||||
|
||||
@@ -2,7 +2,10 @@
|
||||
//! Other modules should use stuff from this module instead of
|
||||
//! directly relying on deps like `reqwest` (think loose coupling).
|
||||
|
||||
pub mod health_server;
|
||||
pub mod conn_pool;
|
||||
pub mod server;
|
||||
pub mod sql_over_http;
|
||||
pub mod websocket;
|
||||
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
|
||||
@@ -22,8 +22,8 @@ use tokio_postgres::{AsyncMessage, ReadyForQueryStatus};
|
||||
|
||||
use crate::{
|
||||
auth, console,
|
||||
metrics::{Ids, MetricCounter, USAGE_METRICS},
|
||||
proxy::{LatencyTimer, NUM_DB_CONNECTIONS_CLOSED_COUNTER, NUM_DB_CONNECTIONS_OPENED_COUNTER},
|
||||
usage_metrics::{Ids, MetricCounter, USAGE_METRICS},
|
||||
};
|
||||
use crate::{compute, config};
|
||||
|
||||
@@ -191,39 +191,22 @@ impl GlobalConnPool {
|
||||
// ok return cached connection if found and establish a new one otherwise
|
||||
let new_client = if let Some(client) = client {
|
||||
if client.inner.is_closed() {
|
||||
let conn_id = uuid::Uuid::new_v4();
|
||||
info!(%conn_id, "pool: cached connection '{conn_info}' is closed, opening a new one");
|
||||
connect_to_compute(
|
||||
self.proxy_config,
|
||||
conn_info,
|
||||
conn_id,
|
||||
session_id,
|
||||
latency_timer,
|
||||
)
|
||||
.await
|
||||
info!("pool: cached connection '{conn_info}' is closed, opening a new one");
|
||||
connect_to_compute(self.proxy_config, conn_info, session_id, latency_timer).await
|
||||
} else {
|
||||
info!("pool: reusing connection '{conn_info}'");
|
||||
client.session.send(session_id)?;
|
||||
latency_timer.pool_hit();
|
||||
latency_timer.success();
|
||||
return Ok(Client {
|
||||
conn_id: client.conn_id,
|
||||
inner: Some(client),
|
||||
span: Span::current(),
|
||||
pool,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
let conn_id = uuid::Uuid::new_v4();
|
||||
info!(%conn_id, "pool: opening a new connection '{conn_info}'");
|
||||
connect_to_compute(
|
||||
self.proxy_config,
|
||||
conn_info,
|
||||
conn_id,
|
||||
session_id,
|
||||
latency_timer,
|
||||
)
|
||||
.await
|
||||
info!("pool: opening a new connection '{conn_info}'");
|
||||
connect_to_compute(self.proxy_config, conn_info, session_id, latency_timer).await
|
||||
};
|
||||
|
||||
match &new_client {
|
||||
@@ -260,7 +243,6 @@ impl GlobalConnPool {
|
||||
}
|
||||
|
||||
new_client.map(|inner| Client {
|
||||
conn_id: inner.conn_id,
|
||||
inner: Some(inner),
|
||||
span: Span::current(),
|
||||
pool,
|
||||
@@ -268,18 +250,16 @@ impl GlobalConnPool {
|
||||
}
|
||||
|
||||
fn put(&self, conn_info: &ConnInfo, client: ClientInner) -> anyhow::Result<()> {
|
||||
let conn_id = client.conn_id;
|
||||
|
||||
// We want to hold this open while we return. This ensures that the pool can't close
|
||||
// while we are in the middle of returning the connection.
|
||||
let closed = self.closed.read();
|
||||
if *closed {
|
||||
info!(%conn_id, "pool: throwing away connection '{conn_info}' because pool is closed");
|
||||
info!("pool: throwing away connection '{conn_info}' because pool is closed");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if client.inner.is_closed() {
|
||||
info!(%conn_id, "pool: throwing away connection '{conn_info}' because connection is closed");
|
||||
info!("pool: throwing away connection '{conn_info}' because connection is closed");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -311,9 +291,9 @@ impl GlobalConnPool {
|
||||
|
||||
// do logging outside of the mutex
|
||||
if returned {
|
||||
info!(%conn_id, "pool: returning connection '{conn_info}' back to the pool, total_conns={total_conns}, for this (db, user)={per_db_size}");
|
||||
info!("pool: returning connection '{conn_info}' back to the pool, total_conns={total_conns}, for this (db, user)={per_db_size}");
|
||||
} else {
|
||||
info!(%conn_id, "pool: throwing away connection '{conn_info}' because pool is full, total_conns={total_conns}");
|
||||
info!("pool: throwing away connection '{conn_info}' because pool is full, total_conns={total_conns}");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -360,7 +340,6 @@ impl GlobalConnPool {
|
||||
struct TokioMechanism<'a> {
|
||||
conn_info: &'a ConnInfo,
|
||||
session_id: uuid::Uuid,
|
||||
conn_id: uuid::Uuid,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -374,14 +353,7 @@ impl ConnectMechanism for TokioMechanism<'_> {
|
||||
node_info: &console::CachedNodeInfo,
|
||||
timeout: time::Duration,
|
||||
) -> Result<Self::Connection, Self::ConnectError> {
|
||||
connect_to_compute_once(
|
||||
node_info,
|
||||
self.conn_info,
|
||||
timeout,
|
||||
self.conn_id,
|
||||
self.session_id,
|
||||
)
|
||||
.await
|
||||
connect_to_compute_once(node_info, self.conn_info, timeout, self.session_id).await
|
||||
}
|
||||
|
||||
fn update_connect_config(&self, _config: &mut compute::ConnCfg) {}
|
||||
@@ -394,7 +366,6 @@ impl ConnectMechanism for TokioMechanism<'_> {
|
||||
async fn connect_to_compute(
|
||||
config: &config::ProxyConfig,
|
||||
conn_info: &ConnInfo,
|
||||
conn_id: uuid::Uuid,
|
||||
session_id: uuid::Uuid,
|
||||
latency_timer: LatencyTimer,
|
||||
) -> anyhow::Result<ClientInner> {
|
||||
@@ -430,7 +401,6 @@ async fn connect_to_compute(
|
||||
|
||||
crate::proxy::connect_to_compute(
|
||||
&TokioMechanism {
|
||||
conn_id,
|
||||
conn_info,
|
||||
session_id,
|
||||
},
|
||||
@@ -446,7 +416,6 @@ async fn connect_to_compute_once(
|
||||
node_info: &console::CachedNodeInfo,
|
||||
conn_info: &ConnInfo,
|
||||
timeout: time::Duration,
|
||||
conn_id: uuid::Uuid,
|
||||
mut session: uuid::Uuid,
|
||||
) -> Result<ClientInner, tokio_postgres::Error> {
|
||||
let mut config = (*node_info.config).clone();
|
||||
@@ -461,6 +430,7 @@ async fn connect_to_compute_once(
|
||||
|
||||
let (tx, mut rx) = tokio::sync::watch::channel(session);
|
||||
|
||||
let conn_id = uuid::Uuid::new_v4();
|
||||
let span = info_span!(parent: None, "connection", %conn_id);
|
||||
span.in_scope(|| {
|
||||
info!(%conn_info, %session, "new connection");
|
||||
@@ -514,7 +484,6 @@ async fn connect_to_compute_once(
|
||||
inner: client,
|
||||
session: tx,
|
||||
ids,
|
||||
conn_id,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -522,7 +491,6 @@ struct ClientInner {
|
||||
inner: tokio_postgres::Client,
|
||||
session: tokio::sync::watch::Sender<uuid::Uuid>,
|
||||
ids: Ids,
|
||||
conn_id: uuid::Uuid,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
@@ -532,14 +500,12 @@ impl Client {
|
||||
}
|
||||
|
||||
pub struct Client {
|
||||
conn_id: uuid::Uuid,
|
||||
span: Span,
|
||||
inner: Option<ClientInner>,
|
||||
pool: Option<(ConnInfo, Arc<GlobalConnPool>)>,
|
||||
}
|
||||
|
||||
pub struct Discard<'a> {
|
||||
conn_id: uuid::Uuid,
|
||||
pool: &'a mut Option<(ConnInfo, Arc<GlobalConnPool>)>,
|
||||
}
|
||||
|
||||
@@ -548,7 +514,6 @@ impl Client {
|
||||
let Self {
|
||||
inner,
|
||||
pool,
|
||||
conn_id,
|
||||
span: _,
|
||||
} = self;
|
||||
(
|
||||
@@ -556,10 +521,7 @@ impl Client {
|
||||
.as_mut()
|
||||
.expect("client inner should not be removed")
|
||||
.inner,
|
||||
Discard {
|
||||
pool,
|
||||
conn_id: *conn_id,
|
||||
},
|
||||
Discard { pool },
|
||||
)
|
||||
}
|
||||
|
||||
@@ -575,13 +537,13 @@ impl Discard<'_> {
|
||||
pub fn check_idle(&mut self, status: ReadyForQueryStatus) {
|
||||
if status != ReadyForQueryStatus::Idle {
|
||||
if let Some((conn_info, _)) = self.pool.take() {
|
||||
info!(conn_id = %self.conn_id, "pool: throwing away connection '{conn_info}' because connection is not idle")
|
||||
info!("pool: throwing away connection '{conn_info}' because connection is not idle")
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn discard(&mut self) {
|
||||
if let Some((conn_info, _)) = self.pool.take() {
|
||||
info!(conn_id = %self.conn_id, "pool: throwing away connection '{conn_info}' because connection is potentially in a broken state")
|
||||
info!("pool: throwing away connection '{conn_info}' because connection is potentially in a broken state")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,36 +1,235 @@
|
||||
//! Routers for our serverless APIs
|
||||
//!
|
||||
//! Handles both SQL over HTTP and SQL over Websockets.
|
||||
|
||||
mod conn_pool;
|
||||
mod sql_over_http;
|
||||
mod websocket;
|
||||
|
||||
use crate::{
|
||||
cancellation::CancelMap,
|
||||
config::ProxyConfig,
|
||||
error::io_error,
|
||||
protocol2::{ProxyProtocolAccept, WithClientIp},
|
||||
proxy::{
|
||||
handle_client, ClientMode, NUM_CLIENT_CONNECTION_CLOSED_COUNTER,
|
||||
NUM_CLIENT_CONNECTION_OPENED_COUNTER,
|
||||
},
|
||||
};
|
||||
use anyhow::bail;
|
||||
use hyper::StatusCode;
|
||||
pub use reqwest_middleware::{ClientWithMiddleware, Error};
|
||||
pub use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
|
||||
|
||||
use crate::protocol2::{ProxyProtocolAccept, WithClientIp};
|
||||
use crate::proxy::{NUM_CLIENT_CONNECTION_CLOSED_COUNTER, NUM_CLIENT_CONNECTION_OPENED_COUNTER};
|
||||
use crate::{cancellation::CancelMap, config::ProxyConfig};
|
||||
use futures::StreamExt;
|
||||
use bytes::{Buf, Bytes};
|
||||
use futures::{Sink, Stream, StreamExt};
|
||||
use hyper::{
|
||||
server::{
|
||||
accept,
|
||||
conn::{AddrIncoming, AddrStream},
|
||||
},
|
||||
Body, Method, Request, Response,
|
||||
upgrade::Upgraded,
|
||||
Body, Method, Request, Response, StatusCode,
|
||||
};
|
||||
use hyper_tungstenite::{tungstenite::Message, HyperWebsocket, WebSocketStream};
|
||||
use pin_project_lite::pin_project;
|
||||
|
||||
use std::task::Poll;
|
||||
use std::{future::ready, sync::Arc};
|
||||
use std::{
|
||||
future::ready,
|
||||
pin::Pin,
|
||||
sync::Arc,
|
||||
task::{ready, Context, Poll},
|
||||
};
|
||||
use tls_listener::TlsListener;
|
||||
use tokio::net::TcpListener;
|
||||
use tokio::{
|
||||
io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf},
|
||||
net::TcpListener,
|
||||
};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info, info_span, warn, Instrument};
|
||||
use utils::http::{error::ApiError, json::json_response};
|
||||
|
||||
// TODO: use `std::sync::Exclusive` once it's stabilized.
|
||||
// Tracking issue: https://github.com/rust-lang/rust/issues/98407.
|
||||
use sync_wrapper::SyncWrapper;
|
||||
|
||||
use super::{conn_pool::GlobalConnPool, sql_over_http};
|
||||
|
||||
pin_project! {
|
||||
/// This is a wrapper around a [`WebSocketStream`] that
|
||||
/// implements [`AsyncRead`] and [`AsyncWrite`].
|
||||
pub struct WebSocketRw {
|
||||
#[pin]
|
||||
stream: SyncWrapper<WebSocketStream<Upgraded>>,
|
||||
bytes: Bytes,
|
||||
}
|
||||
}
|
||||
|
||||
impl WebSocketRw {
|
||||
pub fn new(stream: WebSocketStream<Upgraded>) -> Self {
|
||||
Self {
|
||||
stream: stream.into(),
|
||||
bytes: Bytes::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncWrite for WebSocketRw {
|
||||
fn poll_write(
|
||||
self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
buf: &[u8],
|
||||
) -> Poll<io::Result<usize>> {
|
||||
let mut stream = self.project().stream.get_pin_mut();
|
||||
|
||||
ready!(stream.as_mut().poll_ready(cx).map_err(io_error))?;
|
||||
match stream.as_mut().start_send(Message::Binary(buf.into())) {
|
||||
Ok(()) => Poll::Ready(Ok(buf.len())),
|
||||
Err(e) => Poll::Ready(Err(io_error(e))),
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
|
||||
let stream = self.project().stream.get_pin_mut();
|
||||
stream.poll_flush(cx).map_err(io_error)
|
||||
}
|
||||
|
||||
fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
|
||||
let stream = self.project().stream.get_pin_mut();
|
||||
stream.poll_close(cx).map_err(io_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncRead for WebSocketRw {
|
||||
fn poll_read(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
buf: &mut ReadBuf<'_>,
|
||||
) -> Poll<io::Result<()>> {
|
||||
if buf.remaining() > 0 {
|
||||
let bytes = ready!(self.as_mut().poll_fill_buf(cx))?;
|
||||
let len = std::cmp::min(bytes.len(), buf.remaining());
|
||||
buf.put_slice(&bytes[..len]);
|
||||
self.consume(len);
|
||||
}
|
||||
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncBufRead for WebSocketRw {
|
||||
fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<&[u8]>> {
|
||||
// Please refer to poll_fill_buf's documentation.
|
||||
const EOF: Poll<io::Result<&[u8]>> = Poll::Ready(Ok(&[]));
|
||||
|
||||
let mut this = self.project();
|
||||
loop {
|
||||
if !this.bytes.chunk().is_empty() {
|
||||
let chunk = (*this.bytes).chunk();
|
||||
return Poll::Ready(Ok(chunk));
|
||||
}
|
||||
|
||||
let res = ready!(this.stream.as_mut().get_pin_mut().poll_next(cx));
|
||||
match res.transpose().map_err(io_error)? {
|
||||
Some(message) => match message {
|
||||
Message::Ping(_) => {}
|
||||
Message::Pong(_) => {}
|
||||
Message::Text(text) => {
|
||||
// We expect to see only binary messages.
|
||||
let error = "unexpected text message in the websocket";
|
||||
warn!(length = text.len(), error);
|
||||
return Poll::Ready(Err(io_error(error)));
|
||||
}
|
||||
Message::Frame(_) => {
|
||||
// This case is impossible according to Frame's doc.
|
||||
panic!("unexpected raw frame in the websocket");
|
||||
}
|
||||
Message::Binary(chunk) => {
|
||||
assert!(this.bytes.is_empty());
|
||||
*this.bytes = Bytes::from(chunk);
|
||||
}
|
||||
Message::Close(_) => return EOF,
|
||||
},
|
||||
None => return EOF,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn consume(self: Pin<&mut Self>, amount: usize) {
|
||||
self.project().bytes.advance(amount);
|
||||
}
|
||||
}
|
||||
|
||||
async fn serve_websocket(
|
||||
websocket: HyperWebsocket,
|
||||
config: &'static ProxyConfig,
|
||||
cancel_map: &CancelMap,
|
||||
session_id: uuid::Uuid,
|
||||
hostname: Option<String>,
|
||||
) -> anyhow::Result<()> {
|
||||
let websocket = websocket.await?;
|
||||
handle_client(
|
||||
config,
|
||||
cancel_map,
|
||||
session_id,
|
||||
WebSocketRw::new(websocket),
|
||||
ClientMode::Websockets { hostname },
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn ws_handler(
|
||||
mut request: Request<Body>,
|
||||
config: &'static ProxyConfig,
|
||||
conn_pool: Arc<GlobalConnPool>,
|
||||
cancel_map: Arc<CancelMap>,
|
||||
session_id: uuid::Uuid,
|
||||
sni_hostname: Option<String>,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let host = request
|
||||
.headers()
|
||||
.get("host")
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.and_then(|h| h.split(':').next())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
// Check if the request is a websocket upgrade request.
|
||||
if hyper_tungstenite::is_upgrade_request(&request) {
|
||||
info!(session_id = ?session_id, "performing websocket upgrade");
|
||||
|
||||
let (response, websocket) = hyper_tungstenite::upgrade(&mut request, None)
|
||||
.map_err(|e| ApiError::BadRequest(e.into()))?;
|
||||
|
||||
tokio::spawn(
|
||||
async move {
|
||||
if let Err(e) =
|
||||
serve_websocket(websocket, config, &cancel_map, session_id, host).await
|
||||
{
|
||||
error!(session_id = ?session_id, "error in websocket connection: {e:#}");
|
||||
}
|
||||
}
|
||||
.in_current_span(),
|
||||
);
|
||||
|
||||
// Return the response so the spawned future can continue.
|
||||
Ok(response)
|
||||
// TODO: that deserves a refactor as now this function also handles http json client besides websockets.
|
||||
// Right now I don't want to blow up sql-over-http patch with file renames and do that as a follow up instead.
|
||||
} else if request.uri().path() == "/sql" && request.method() == Method::POST {
|
||||
sql_over_http::handle(
|
||||
request,
|
||||
sni_hostname,
|
||||
conn_pool,
|
||||
session_id,
|
||||
&config.http_config,
|
||||
)
|
||||
.await
|
||||
} else if request.uri().path() == "/sql" && request.method() == Method::OPTIONS {
|
||||
Response::builder()
|
||||
.header("Allow", "OPTIONS, POST")
|
||||
.header("Access-Control-Allow-Origin", "*")
|
||||
.header(
|
||||
"Access-Control-Allow-Headers",
|
||||
"Neon-Connection-String, Neon-Raw-Text-Output, Neon-Array-Mode, Neon-Pool-Opt-In",
|
||||
)
|
||||
.header("Access-Control-Max-Age", "86400" /* 24 hours */)
|
||||
.status(StatusCode::OK) // 204 is also valid, but see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/OPTIONS#status_code
|
||||
.body(Body::empty())
|
||||
.map_err(|e| ApiError::InternalServerError(e.into()))
|
||||
} else {
|
||||
json_response(StatusCode::BAD_REQUEST, "query is not supported")
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn task_main(
|
||||
config: &'static ProxyConfig,
|
||||
ws_listener: TcpListener,
|
||||
@@ -40,7 +239,7 @@ pub async fn task_main(
|
||||
info!("websocket server has shut down");
|
||||
}
|
||||
|
||||
let conn_pool = conn_pool::GlobalConnPool::new(config);
|
||||
let conn_pool: Arc<GlobalConnPool> = GlobalConnPool::new(config);
|
||||
|
||||
// shutdown the connection pool
|
||||
tokio::spawn({
|
||||
@@ -101,15 +300,13 @@ pub async fn task_main(
|
||||
let cancel_map = Arc::new(CancelMap::default());
|
||||
let session_id = uuid::Uuid::new_v4();
|
||||
|
||||
request_handler(
|
||||
req, config, conn_pool, cancel_map, session_id, sni_name,
|
||||
)
|
||||
.instrument(info_span!(
|
||||
"serverless",
|
||||
session = %session_id,
|
||||
%peer_addr,
|
||||
))
|
||||
.await
|
||||
ws_handler(req, config, conn_pool, cancel_map, session_id, sni_name)
|
||||
.instrument(info_span!(
|
||||
"ws-client",
|
||||
session = %session_id,
|
||||
%peer_addr,
|
||||
))
|
||||
.await
|
||||
}
|
||||
},
|
||||
)))
|
||||
@@ -162,65 +359,3 @@ where
|
||||
self.inner.call(req)
|
||||
}
|
||||
}
|
||||
|
||||
async fn request_handler(
|
||||
mut request: Request<Body>,
|
||||
config: &'static ProxyConfig,
|
||||
conn_pool: Arc<conn_pool::GlobalConnPool>,
|
||||
cancel_map: Arc<CancelMap>,
|
||||
session_id: uuid::Uuid,
|
||||
sni_hostname: Option<String>,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let host = request
|
||||
.headers()
|
||||
.get("host")
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.and_then(|h| h.split(':').next())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
// Check if the request is a websocket upgrade request.
|
||||
if hyper_tungstenite::is_upgrade_request(&request) {
|
||||
info!(session_id = ?session_id, "performing websocket upgrade");
|
||||
|
||||
let (response, websocket) = hyper_tungstenite::upgrade(&mut request, None)
|
||||
.map_err(|e| ApiError::BadRequest(e.into()))?;
|
||||
|
||||
tokio::spawn(
|
||||
async move {
|
||||
if let Err(e) =
|
||||
websocket::serve_websocket(websocket, config, &cancel_map, session_id, host)
|
||||
.await
|
||||
{
|
||||
error!(session_id = ?session_id, "error in websocket connection: {e:#}");
|
||||
}
|
||||
}
|
||||
.in_current_span(),
|
||||
);
|
||||
|
||||
// Return the response so the spawned future can continue.
|
||||
Ok(response)
|
||||
} else if request.uri().path() == "/sql" && request.method() == Method::POST {
|
||||
sql_over_http::handle(
|
||||
request,
|
||||
sni_hostname,
|
||||
conn_pool,
|
||||
session_id,
|
||||
&config.http_config,
|
||||
)
|
||||
.await
|
||||
} else if request.uri().path() == "/sql" && request.method() == Method::OPTIONS {
|
||||
Response::builder()
|
||||
.header("Allow", "OPTIONS, POST")
|
||||
.header("Access-Control-Allow-Origin", "*")
|
||||
.header(
|
||||
"Access-Control-Allow-Headers",
|
||||
"Neon-Connection-String, Neon-Raw-Text-Output, Neon-Array-Mode, Neon-Pool-Opt-In",
|
||||
)
|
||||
.header("Access-Control-Max-Age", "86400" /* 24 hours */)
|
||||
.status(StatusCode::OK) // 204 is also valid, but see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/OPTIONS#status_code
|
||||
.body(Body::empty())
|
||||
.map_err(|e| ApiError::InternalServerError(e.into()))
|
||||
} else {
|
||||
json_response(StatusCode::BAD_REQUEST, "query is not supported")
|
||||
}
|
||||
}
|
||||
@@ -14,15 +14,14 @@ pub mod console;
|
||||
pub mod error;
|
||||
pub mod http;
|
||||
pub mod logging;
|
||||
pub mod metrics;
|
||||
pub mod parse;
|
||||
pub mod protocol2;
|
||||
pub mod proxy;
|
||||
pub mod sasl;
|
||||
pub mod scram;
|
||||
pub mod serverless;
|
||||
pub mod stream;
|
||||
pub mod url;
|
||||
pub mod usage_metrics;
|
||||
pub mod waiters;
|
||||
|
||||
/// Handle unix signals appropriately.
|
||||
|
||||
@@ -8,9 +8,9 @@ use crate::{
|
||||
config::{AuthenticationConfig, ProxyConfig, TlsConfig},
|
||||
console::{self, errors::WakeComputeError, messages::MetricsAuxInfo, Api},
|
||||
http::StatusCode,
|
||||
metrics::{Ids, USAGE_METRICS},
|
||||
protocol2::WithClientIp,
|
||||
stream::{PqStream, Stream},
|
||||
usage_metrics::{Ids, USAGE_METRICS},
|
||||
};
|
||||
use anyhow::{bail, Context};
|
||||
use async_trait::async_trait;
|
||||
|
||||
@@ -1,146 +0,0 @@
|
||||
use crate::{
|
||||
cancellation::CancelMap,
|
||||
config::ProxyConfig,
|
||||
error::io_error,
|
||||
proxy::{handle_client, ClientMode},
|
||||
};
|
||||
use bytes::{Buf, Bytes};
|
||||
use futures::{Sink, Stream};
|
||||
use hyper::upgrade::Upgraded;
|
||||
use hyper_tungstenite::{tungstenite::Message, HyperWebsocket, WebSocketStream};
|
||||
use pin_project_lite::pin_project;
|
||||
|
||||
use std::{
|
||||
pin::Pin,
|
||||
task::{ready, Context, Poll},
|
||||
};
|
||||
use tokio::io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf};
|
||||
use tracing::warn;
|
||||
|
||||
// TODO: use `std::sync::Exclusive` once it's stabilized.
|
||||
// Tracking issue: https://github.com/rust-lang/rust/issues/98407.
|
||||
use sync_wrapper::SyncWrapper;
|
||||
|
||||
pin_project! {
|
||||
/// This is a wrapper around a [`WebSocketStream`] that
|
||||
/// implements [`AsyncRead`] and [`AsyncWrite`].
|
||||
pub struct WebSocketRw {
|
||||
#[pin]
|
||||
stream: SyncWrapper<WebSocketStream<Upgraded>>,
|
||||
bytes: Bytes,
|
||||
}
|
||||
}
|
||||
|
||||
impl WebSocketRw {
|
||||
pub fn new(stream: WebSocketStream<Upgraded>) -> Self {
|
||||
Self {
|
||||
stream: stream.into(),
|
||||
bytes: Bytes::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncWrite for WebSocketRw {
|
||||
fn poll_write(
|
||||
self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
buf: &[u8],
|
||||
) -> Poll<io::Result<usize>> {
|
||||
let mut stream = self.project().stream.get_pin_mut();
|
||||
|
||||
ready!(stream.as_mut().poll_ready(cx).map_err(io_error))?;
|
||||
match stream.as_mut().start_send(Message::Binary(buf.into())) {
|
||||
Ok(()) => Poll::Ready(Ok(buf.len())),
|
||||
Err(e) => Poll::Ready(Err(io_error(e))),
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
|
||||
let stream = self.project().stream.get_pin_mut();
|
||||
stream.poll_flush(cx).map_err(io_error)
|
||||
}
|
||||
|
||||
fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
|
||||
let stream = self.project().stream.get_pin_mut();
|
||||
stream.poll_close(cx).map_err(io_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncRead for WebSocketRw {
|
||||
fn poll_read(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
buf: &mut ReadBuf<'_>,
|
||||
) -> Poll<io::Result<()>> {
|
||||
if buf.remaining() > 0 {
|
||||
let bytes = ready!(self.as_mut().poll_fill_buf(cx))?;
|
||||
let len = std::cmp::min(bytes.len(), buf.remaining());
|
||||
buf.put_slice(&bytes[..len]);
|
||||
self.consume(len);
|
||||
}
|
||||
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncBufRead for WebSocketRw {
|
||||
fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<&[u8]>> {
|
||||
// Please refer to poll_fill_buf's documentation.
|
||||
const EOF: Poll<io::Result<&[u8]>> = Poll::Ready(Ok(&[]));
|
||||
|
||||
let mut this = self.project();
|
||||
loop {
|
||||
if !this.bytes.chunk().is_empty() {
|
||||
let chunk = (*this.bytes).chunk();
|
||||
return Poll::Ready(Ok(chunk));
|
||||
}
|
||||
|
||||
let res = ready!(this.stream.as_mut().get_pin_mut().poll_next(cx));
|
||||
match res.transpose().map_err(io_error)? {
|
||||
Some(message) => match message {
|
||||
Message::Ping(_) => {}
|
||||
Message::Pong(_) => {}
|
||||
Message::Text(text) => {
|
||||
// We expect to see only binary messages.
|
||||
let error = "unexpected text message in the websocket";
|
||||
warn!(length = text.len(), error);
|
||||
return Poll::Ready(Err(io_error(error)));
|
||||
}
|
||||
Message::Frame(_) => {
|
||||
// This case is impossible according to Frame's doc.
|
||||
panic!("unexpected raw frame in the websocket");
|
||||
}
|
||||
Message::Binary(chunk) => {
|
||||
assert!(this.bytes.is_empty());
|
||||
*this.bytes = Bytes::from(chunk);
|
||||
}
|
||||
Message::Close(_) => return EOF,
|
||||
},
|
||||
None => return EOF,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn consume(self: Pin<&mut Self>, amount: usize) {
|
||||
self.project().bytes.advance(amount);
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn serve_websocket(
|
||||
websocket: HyperWebsocket,
|
||||
config: &'static ProxyConfig,
|
||||
cancel_map: &CancelMap,
|
||||
session_id: uuid::Uuid,
|
||||
hostname: Option<String>,
|
||||
) -> anyhow::Result<()> {
|
||||
let websocket = websocket.await?;
|
||||
handle_client(
|
||||
config,
|
||||
cancel_map,
|
||||
session_id,
|
||||
WebSocketRw::new(websocket),
|
||||
ClientMode::Websockets { hostname },
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -24,7 +24,7 @@ backoff = "^2.2.1"
|
||||
pytest-lazy-fixture = "^0.6.3"
|
||||
prometheus-client = "^0.14.1"
|
||||
pytest-timeout = "^2.1.0"
|
||||
Werkzeug = "^3.0.1"
|
||||
Werkzeug = "^2.2.3"
|
||||
pytest-order = "^1.1.0"
|
||||
allure-pytest = "^2.13.2"
|
||||
pytest-asyncio = "^0.21.0"
|
||||
|
||||
@@ -15,15 +15,28 @@ The script fetches the durations of benchmarks from the database and stores it i
|
||||
|
||||
BENCHMARKS_DURATION_QUERY = """
|
||||
SELECT
|
||||
DISTINCT parent_suite, suite, name,
|
||||
PERCENTILE_DISC(%s) WITHIN GROUP (ORDER BY duration) as percentile_ms
|
||||
FROM results
|
||||
DISTINCT parent_suite, suite, test,
|
||||
PERCENTILE_DISC(%s) WITHIN GROUP (ORDER BY duration_ms) as percentile_ms
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
jsonb_array_elements(data -> 'children') ->> 'name' as parent_suite,
|
||||
jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') ->> 'name' as suite,
|
||||
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'name' as test,
|
||||
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'status' as status,
|
||||
to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' -> 'start')::bigint / 1000)::date as timestamp,
|
||||
(jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' -> 'duration')::int as duration_ms
|
||||
FROM
|
||||
regress_test_results
|
||||
WHERE
|
||||
reference = 'refs/heads/main'
|
||||
) data
|
||||
WHERE
|
||||
started_at > CURRENT_DATE - INTERVAL '%s' day
|
||||
timestamp > CURRENT_DATE - INTERVAL '%s' day
|
||||
AND parent_suite = 'test_runner.performance'
|
||||
AND status = 'passed'
|
||||
GROUP BY
|
||||
parent_suite, suite, name
|
||||
parent_suite, suite, test
|
||||
;
|
||||
"""
|
||||
|
||||
@@ -31,69 +44,68 @@ BENCHMARKS_DURATION_QUERY = """
|
||||
# the total duration varies from 8 to 40 minutes.
|
||||
# We use some pre-collected durations as a fallback to have a better distribution.
|
||||
FALLBACK_DURATION = {
|
||||
"test_runner/performance/test_branch_creation.py::test_branch_creation_heavy_write[20]": 62.144,
|
||||
"test_runner/performance/test_branch_creation.py::test_branch_creation_many[1024]": 90.941,
|
||||
"test_runner/performance/test_branch_creation.py::test_branch_creation_many_relations": 26.053,
|
||||
"test_runner/performance/test_branching.py::test_compare_child_and_root_pgbench_perf": 25.67,
|
||||
"test_runner/performance/test_branching.py::test_compare_child_and_root_read_perf": 14.497,
|
||||
"test_runner/performance/test_branching.py::test_compare_child_and_root_write_perf": 18.852,
|
||||
"test_runner/performance/test_bulk_insert.py::test_bulk_insert[neon]": 26.572,
|
||||
"test_runner/performance/test_bulk_insert.py::test_bulk_insert[vanilla]": 6.259,
|
||||
"test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[10]": 21.206,
|
||||
"test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[1]": 3.474,
|
||||
"test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[5]": 11.262,
|
||||
"test_runner/performance/test_bulk_update.py::test_bulk_update[100]": 94.225,
|
||||
"test_runner/performance/test_bulk_update.py::test_bulk_update[10]": 68.159,
|
||||
"test_runner/performance/test_bulk_update.py::test_bulk_update[50]": 76.719,
|
||||
"test_runner/performance/test_compaction.py::test_compaction": 110.222,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[neon-5-10-100]": 10.743,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[vanilla-5-10-100]": 16.541,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[neon-5-10-100]": 11.109,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[vanilla-5-10-100]": 18.121,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[neon-5-10-100]": 11.3,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[vanilla-5-10-100]": 16.086,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-10]": 12.024,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-1]": 11.14,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-10]": 10.375,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-1]": 10.075,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[neon-5-10-100]": 11.147,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[vanilla-5-10-100]": 16.321,
|
||||
"test_runner/performance/test_copy.py::test_copy[neon]": 16.579,
|
||||
"test_runner/performance/test_copy.py::test_copy[vanilla]": 10.094,
|
||||
"test_runner/performance/test_gc_feedback.py::test_gc_feedback": 590.157,
|
||||
"test_runner/performance/test_gist_build.py::test_gist_buffering_build[neon]": 14.102,
|
||||
"test_runner/performance/test_gist_build.py::test_gist_buffering_build[vanilla]": 8.677,
|
||||
"test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[neon-1]": 31.079,
|
||||
"test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[vanilla-1]": 38.119,
|
||||
"test_runner/performance/test_layer_map.py::test_layer_map": 24.784,
|
||||
"test_runner/performance/test_logical_replication.py::test_logical_replication": 117.707,
|
||||
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[neon]": 21.194,
|
||||
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[vanilla]": 59.068,
|
||||
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[neon]": 73.235,
|
||||
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[vanilla]": 82.586,
|
||||
"test_runner/performance/test_perf_pgbench.py::test_pgbench[neon-45-10]": 106.536,
|
||||
"test_runner/performance/test_perf_pgbench.py::test_pgbench[vanilla-45-10]": 98.753,
|
||||
"test_runner/performance/test_random_writes.py::test_random_writes[neon]": 6.975,
|
||||
"test_runner/performance/test_random_writes.py::test_random_writes[vanilla]": 3.69,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[neon-100000-100-0]": 3.529,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-0]": 64.522,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-4]": 40.964,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[vanilla-100000-100-0]": 0.55,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-0]": 12.189,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-4]": 13.899,
|
||||
"test_runner/performance/test_startup.py::test_startup": 890.114,
|
||||
"test_runner/performance/test_startup.py::test_startup_simple": 2.51,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_off-10-5-5]": 527.245,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_on-10-5-5]": 583.46,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[vanilla-10-5-5]": 113.653,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_off-1000]": 233.728,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_on-1000]": 419.093,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[vanilla-1000]": 982.461,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_off-45-100]": 116.522,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_on-45-100]": 115.583,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[vanilla-45-100]": 155.282,
|
||||
"test_runner/performance/test_write_amplification.py::test_write_amplification[neon]": 26.704,
|
||||
"test_runner/performance/test_write_amplification.py::test_write_amplification[vanilla]": 16.088,
|
||||
"test_runner/performance/test_branch_creation.py::test_branch_creation_heavy_write[20]": 57.0,
|
||||
"test_runner/performance/test_branch_creation.py::test_branch_creation_many_relations": 28.0,
|
||||
"test_runner/performance/test_branch_creation.py::test_branch_creation_many[1024]": 71.0,
|
||||
"test_runner/performance/test_branching.py::test_compare_child_and_root_pgbench_perf": 27.0,
|
||||
"test_runner/performance/test_branching.py::test_compare_child_and_root_read_perf": 11.0,
|
||||
"test_runner/performance/test_branching.py::test_compare_child_and_root_write_perf": 30.0,
|
||||
"test_runner/performance/test_bulk_insert.py::test_bulk_insert[neon]": 40.0,
|
||||
"test_runner/performance/test_bulk_insert.py::test_bulk_insert[vanilla]": 5.0,
|
||||
"test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[1]": 3.0,
|
||||
"test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[5]": 10.0,
|
||||
"test_runner/performance/test_bulk_tenant_create.py::test_bulk_tenant_create[10]": 19.0,
|
||||
"test_runner/performance/test_bulk_update.py::test_bulk_update[10]": 66.0,
|
||||
"test_runner/performance/test_bulk_update.py::test_bulk_update[50]": 30.0,
|
||||
"test_runner/performance/test_bulk_update.py::test_bulk_update[100]": 60.0,
|
||||
"test_runner/performance/test_compaction.py::test_compaction": 77.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[neon-5-10-100]": 11.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_ro_with_pgbench_select_only[vanilla-5-10-100]": 16.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[neon-5-10-100]": 11.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_rw_with_pgbench_default[vanilla-5-10-100]": 18.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[neon-5-10-100]": 11.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wal_with_pgbench_default[vanilla-5-10-100]": 16.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-1]": 11.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[neon-10-10]": 11.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-1]": 10.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_heavy_write[vanilla-10-10]": 10.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[neon-5-10-100]": 11.0,
|
||||
"test_runner/performance/test_compare_pg_stats.py::test_compare_pg_stats_wo_with_pgbench_simple_update[vanilla-5-10-100]": 16.0,
|
||||
"test_runner/performance/test_copy.py::test_copy[neon]": 12.0,
|
||||
"test_runner/performance/test_copy.py::test_copy[vanilla]": 10.0,
|
||||
"test_runner/performance/test_gc_feedback.py::test_gc_feedback": 284.0,
|
||||
"test_runner/performance/test_gist_build.py::test_gist_buffering_build[neon]": 11.0,
|
||||
"test_runner/performance/test_gist_build.py::test_gist_buffering_build[vanilla]": 7.0,
|
||||
"test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[neon-1]": 85.0,
|
||||
"test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[vanilla-1]": 29.0,
|
||||
"test_runner/performance/test_layer_map.py::test_layer_map": 44.0,
|
||||
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[neon]": 16.0,
|
||||
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_different_tables[vanilla]": 67.0,
|
||||
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[neon]": 67.0,
|
||||
"test_runner/performance/test_parallel_copy_to.py::test_parallel_copy_same_table[vanilla]": 80.0,
|
||||
"test_runner/performance/test_perf_pgbench.py::test_pgbench[neon-45-10]": 102.0,
|
||||
"test_runner/performance/test_perf_pgbench.py::test_pgbench[vanilla-45-10]": 99.0,
|
||||
"test_runner/performance/test_random_writes.py::test_random_writes[neon]": 9.0,
|
||||
"test_runner/performance/test_random_writes.py::test_random_writes[vanilla]": 2.0,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[neon-100000-100-0]": 4.0,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-0]": 80.0,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[neon-10000000-1-4]": 68.0,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[vanilla-100000-100-0]": 0.0,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-0]": 11.0,
|
||||
"test_runner/performance/test_seqscans.py::test_seqscans[vanilla-10000000-1-4]": 10.0,
|
||||
"test_runner/performance/test_startup.py::test_startup_simple": 2.0,
|
||||
"test_runner/performance/test_startup.py::test_startup": 539.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_off-10-5-5]": 375.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[neon_on-10-5-5]": 370.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_heavy_write_workload[vanilla-10-5-5]": 94.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_off-1000]": 164.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[neon_on-1000]": 274.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_intensive_init_workload[vanilla-1000]": 949.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_off-45-100]": 142.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[neon_on-45-100]": 151.0,
|
||||
"test_runner/performance/test_wal_backpressure.py::test_pgbench_simple_update_workload[vanilla-45-100]": 182.0,
|
||||
"test_runner/performance/test_write_amplification.py::test_write_amplification[neon]": 13.0,
|
||||
"test_runner/performance/test_write_amplification.py::test_write_amplification[vanilla]": 16.0,
|
||||
}
|
||||
|
||||
|
||||
@@ -118,7 +130,7 @@ def main(args: argparse.Namespace):
|
||||
res = FALLBACK_DURATION
|
||||
|
||||
for row in rows:
|
||||
pytest_name = f"{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{row['name']}"
|
||||
pytest_name = f"{row['parent_suite'].replace('.', '/')}/{row['suite']}.py::{row['test']}"
|
||||
duration = row["percentile_ms"] / 1000
|
||||
logging.info(f"\t{pytest_name}: {duration}")
|
||||
res[pytest_name] = duration
|
||||
|
||||
@@ -9,15 +9,28 @@ from typing import DefaultDict, Dict
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
# We call the test "flaky" if it failed at least once on the main branch in the last N=10 days.
|
||||
FLAKY_TESTS_QUERY = """
|
||||
SELECT
|
||||
DISTINCT parent_suite, suite, name
|
||||
FROM results
|
||||
DISTINCT parent_suite, suite, REGEXP_REPLACE(test, '(release|debug)-pg(\\d+)-?', '') as deparametrized_test
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
reference,
|
||||
jsonb_array_elements(data -> 'children') ->> 'name' as parent_suite,
|
||||
jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') ->> 'name' as suite,
|
||||
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'name' as test,
|
||||
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'status' as status,
|
||||
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') ->> 'retriesStatusChange' as retries_status_change,
|
||||
to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' ->> 'start')::bigint / 1000)::date as timestamp
|
||||
FROM
|
||||
regress_test_results
|
||||
) data
|
||||
WHERE
|
||||
started_at > CURRENT_DATE - INTERVAL '%s' day
|
||||
timestamp > CURRENT_DATE - INTERVAL '%s' day
|
||||
AND (
|
||||
(status IN ('failed', 'broken') AND reference = 'refs/heads/main')
|
||||
OR flaky
|
||||
OR retries_status_change::boolean
|
||||
)
|
||||
;
|
||||
"""
|
||||
@@ -50,14 +63,12 @@ def main(args: argparse.Namespace):
|
||||
if row["parent_suite"] != "test_runner.regress":
|
||||
continue
|
||||
|
||||
if row["name"].endswith("]"):
|
||||
parametrized_test = row["name"].replace(
|
||||
"[",
|
||||
f"[{build_type}-pg{pg_version}-",
|
||||
)
|
||||
else:
|
||||
parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}]"
|
||||
|
||||
deparametrized_test = row["deparametrized_test"]
|
||||
dash_if_needed = "" if deparametrized_test.endswith("[]") else "-"
|
||||
parametrized_test = deparametrized_test.replace(
|
||||
"[",
|
||||
f"[{build_type}-pg{pg_version}{dash_if_needed}",
|
||||
)
|
||||
res[row["parent_suite"]][row["suite"]][parametrized_test] = True
|
||||
|
||||
logging.info(
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::task::{Context, Poll};
|
||||
use std::time::Duration;
|
||||
use tonic::codegen::StdError;
|
||||
use tonic::transport::{ClientTlsConfig, Endpoint};
|
||||
use tonic::{transport::Channel, Status};
|
||||
use tonic::{transport::Channel, Code, Status};
|
||||
use utils::id::{TenantId, TenantTimelineId, TimelineId};
|
||||
|
||||
use proto::{
|
||||
@@ -23,7 +23,6 @@ pub mod proto {
|
||||
pub mod metrics;
|
||||
|
||||
// Re-exports to avoid direct tonic dependency in user crates.
|
||||
pub use tonic::Code;
|
||||
pub use tonic::Request;
|
||||
pub use tonic::Streaming;
|
||||
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
# Usage from top of repo:
|
||||
# poetry run python3 test_runner/duplicate_tenant.py b97965931096047b2d54958756baee7b 10
|
||||
from queue import Queue
|
||||
import sys
|
||||
import threading
|
||||
|
||||
import requests
|
||||
from fixtures.pageserver.http import PageserverHttpClient
|
||||
from fixtures.types import TenantId
|
||||
|
||||
initial_tenant = sys.argv[1]
|
||||
ncopies = int(sys.argv[2])
|
||||
numthreads = int(sys.argv[3])
|
||||
|
||||
|
||||
# class DuckTypedNeonEnv:
|
||||
# pass
|
||||
|
||||
|
||||
# cli = NeonCli(DuckTypedNeonEnv())
|
||||
|
||||
q = Queue()
|
||||
for i in range(0, ncopies):
|
||||
q.put(i)
|
||||
|
||||
for i in range(0, numthreads):
|
||||
q.put(None)
|
||||
|
||||
|
||||
def create():
|
||||
while True:
|
||||
if q.get() == None:
|
||||
break
|
||||
new_tenant = TenantId.generate()
|
||||
res = requests.post(
|
||||
f"http://localhost:9898/v1/tenant/{initial_tenant}/duplicate",
|
||||
json={"new_tenant_id": str(new_tenant)},
|
||||
)
|
||||
res.raise_for_status()
|
||||
|
||||
|
||||
for i in range(0, numthreads):
|
||||
threading.Thread(target=create).start()
|
||||
@@ -1719,11 +1719,6 @@ class NeonPageserver(PgProtocol):
|
||||
break
|
||||
|
||||
if error_or_warn.search(line):
|
||||
# Is this a torn log line? This happens when force-killing a process and restarting
|
||||
# Example: "2023-10-25T09:38:31.752314Z WARN deletion executo2023-10-25T09:38:31.875947Z INFO version: git-env:0f9452f76e8ccdfc88291bccb3f53e3016f40192"
|
||||
if re.match("\\d{4}-\\d{2}-\\d{2}T.+\\d{4}-\\d{2}-\\d{2}T.+INFO version.+", line):
|
||||
continue
|
||||
|
||||
# It's an ERROR or WARN. Is it in the allow-list?
|
||||
for a in self.allowed_errors:
|
||||
if re.match(a, line):
|
||||
|
||||
@@ -215,25 +215,6 @@ class PageserverHttpClient(requests.Session):
|
||||
assert isinstance(new_tenant_id, str)
|
||||
return TenantId(new_tenant_id)
|
||||
|
||||
def tenant_duplicate(
|
||||
self, src_tenant_id: TenantId, new_tenant_id: TenantId, conf: Optional[Dict[str, Any]] = None
|
||||
) -> TenantId:
|
||||
if conf is not None:
|
||||
assert "new_tenant_id" not in conf.keys()
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/tenant/{src_tenant_id}/duplicate",
|
||||
json={
|
||||
"new_tenant_id": str(new_tenant_id),
|
||||
**(conf or {}),
|
||||
},
|
||||
)
|
||||
self.verbose_error(res)
|
||||
if res.status_code == 409:
|
||||
raise Exception(f"could not create tenant: already exists for id {new_tenant_id}")
|
||||
new_tenant_id = res.json()
|
||||
assert isinstance(new_tenant_id, str)
|
||||
return TenantId(new_tenant_id)
|
||||
|
||||
def tenant_attach(
|
||||
self,
|
||||
tenant_id: TenantId,
|
||||
@@ -460,13 +441,13 @@ class PageserverHttpClient(requests.Session):
|
||||
assert res_json is None
|
||||
|
||||
def timeline_get_lsn_by_timestamp(
|
||||
self, tenant_id: TenantId, timeline_id: TimelineId, timestamp, version: int
|
||||
self, tenant_id: TenantId, timeline_id: TimelineId, timestamp
|
||||
):
|
||||
log.info(
|
||||
f"Requesting lsn by timestamp {timestamp}, tenant {tenant_id}, timeline {timeline_id}"
|
||||
)
|
||||
res = self.get(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp?timestamp={timestamp}&version={version}",
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp?timestamp={timestamp}",
|
||||
)
|
||||
self.verbose_error(res)
|
||||
res_json = res.json()
|
||||
|
||||
@@ -157,7 +157,7 @@ def wait_for_last_record_lsn(
|
||||
lsn: Lsn,
|
||||
) -> Lsn:
|
||||
"""waits for pageserver to catch up to a certain lsn, returns the last observed lsn."""
|
||||
for i in range(100):
|
||||
for i in range(1000000):
|
||||
current_lsn = last_record_lsn(pageserver_http, tenant, timeline)
|
||||
if current_lsn >= lsn:
|
||||
return current_lsn
|
||||
|
||||
@@ -4,10 +4,8 @@ First make a release build. The `-s` flag silences a lot of output, and makes it
|
||||
easier to see if you have compile errors without scrolling up.
|
||||
`BUILD_TYPE=release CARGO_BUILD_FLAGS="--features=testing" make -s -j8`
|
||||
|
||||
You may also need to run `./scripts/pysync`.
|
||||
|
||||
Then run the tests
|
||||
`DEFAULT_PG_VERSION=15 NEON_BIN=./target/release poetry run pytest test_runner/performance`
|
||||
`NEON_BIN=./target/release poetry run pytest test_runner/performance"`
|
||||
|
||||
Some handy pytest flags for local development:
|
||||
- `-x` tells pytest to stop on first error
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import pytest
|
||||
import os
|
||||
import shutil
|
||||
from contextlib import closing
|
||||
from fixtures.log_helper import log
|
||||
|
||||
from fixtures.compare_fixtures import NeonCompare, PgCompare
|
||||
from fixtures.pg_version import PgVersion
|
||||
@@ -18,6 +21,9 @@ from fixtures.pg_version import PgVersion
|
||||
def test_bulk_insert(neon_with_baseline: PgCompare):
|
||||
env = neon_with_baseline
|
||||
|
||||
# Number of times to run the write query. One run creates 350MB of wal.
|
||||
n_writes = 10
|
||||
|
||||
with closing(env.pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("create table huge (i int, j int);")
|
||||
@@ -25,7 +31,10 @@ def test_bulk_insert(neon_with_baseline: PgCompare):
|
||||
# Run INSERT, recording the time and I/O it takes
|
||||
with env.record_pageserver_writes("pageserver_writes"):
|
||||
with env.record_duration("insert"):
|
||||
cur.execute("insert into huge values (generate_series(1, 5000000), 0);")
|
||||
for i in range(n_writes):
|
||||
if n_writes > 1:
|
||||
log.info(f"running query {i}/{n_writes}")
|
||||
cur.execute("insert into huge values (generate_series(1, 5000000), 0);")
|
||||
env.flush()
|
||||
|
||||
env.report_peak_memory_use()
|
||||
@@ -39,7 +48,9 @@ def test_bulk_insert(neon_with_baseline: PgCompare):
|
||||
|
||||
|
||||
def measure_recovery_time(env: NeonCompare):
|
||||
client = env.env.pageserver.http_client()
|
||||
# Hmm why is pageserver less ready to respond to http when the datadir is large?
|
||||
from urllib3.util.retry import Retry
|
||||
client = env.env.pageserver.http_client(retries=Retry(1000))
|
||||
pg_version = PgVersion(client.timeline_detail(env.tenant, env.timeline)["pg_version"])
|
||||
|
||||
# Stop pageserver and remove tenant data
|
||||
@@ -57,3 +68,13 @@ def measure_recovery_time(env: NeonCompare):
|
||||
|
||||
# Flush, which will also wait for lsn to catch up
|
||||
env.flush()
|
||||
|
||||
|
||||
# This test is meant for local iteration only. The use case is when you want to re-run
|
||||
# the measure_recovery_time part of test_bulk_insert, but without running the setup.
|
||||
# It allows you to iterate on results 2x faster while trying to improve wal ingestion
|
||||
# performance.
|
||||
@pytest.mark.skip("this is a convenience test for local dev only")
|
||||
def test_recovery(neon_env_builder):
|
||||
env = neon_env_builder.init_start()
|
||||
measure_recovery_time(env)
|
||||
|
||||
@@ -8,71 +8,6 @@ from fixtures.types import Lsn
|
||||
from fixtures.utils import query_scalar
|
||||
|
||||
|
||||
#
|
||||
# Test pageserver get_lsn_by_timestamp API
|
||||
#
|
||||
def test_lsn_mapping_old(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping")
|
||||
endpoint_main = env.endpoints.create_start("test_lsn_mapping")
|
||||
log.info("postgres is running on 'test_lsn_mapping' branch")
|
||||
|
||||
cur = endpoint_main.connect().cursor()
|
||||
# Create table, and insert rows, each in a separate transaction
|
||||
# Disable synchronous_commit to make this initialization go faster.
|
||||
#
|
||||
# Each row contains current insert LSN and the current timestamp, when
|
||||
# the row was inserted.
|
||||
cur.execute("SET synchronous_commit=off")
|
||||
cur.execute("CREATE TABLE foo (x integer)")
|
||||
tbl = []
|
||||
for i in range(1000):
|
||||
cur.execute("INSERT INTO foo VALUES(%s)", (i,))
|
||||
# Get the timestamp at UTC
|
||||
after_timestamp = query_scalar(cur, "SELECT clock_timestamp()").replace(tzinfo=None)
|
||||
tbl.append([i, after_timestamp])
|
||||
|
||||
# Execute one more transaction with synchronous_commit enabled, to flush
|
||||
# all the previous transactions
|
||||
cur.execute("SET synchronous_commit=on")
|
||||
cur.execute("INSERT INTO foo VALUES (-1)")
|
||||
|
||||
# Wait until WAL is received by pageserver
|
||||
wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id)
|
||||
|
||||
with env.pageserver.http_client() as client:
|
||||
# Check edge cases: timestamp in the future
|
||||
probe_timestamp = tbl[-1][1] + timedelta(hours=1)
|
||||
result = client.timeline_get_lsn_by_timestamp(
|
||||
env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 1
|
||||
)
|
||||
assert result == "future"
|
||||
|
||||
# timestamp too the far history
|
||||
probe_timestamp = tbl[0][1] - timedelta(hours=10)
|
||||
result = client.timeline_get_lsn_by_timestamp(
|
||||
env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 1
|
||||
)
|
||||
assert result == "past"
|
||||
|
||||
# Probe a bunch of timestamps in the valid range
|
||||
for i in range(1, len(tbl), 100):
|
||||
probe_timestamp = tbl[i][1]
|
||||
lsn = client.timeline_get_lsn_by_timestamp(
|
||||
env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 1
|
||||
)
|
||||
# Call get_lsn_by_timestamp to get the LSN
|
||||
# Launch a new read-only node at that LSN, and check that only the rows
|
||||
# that were supposed to be committed at that point in time are visible.
|
||||
endpoint_here = env.endpoints.create_start(
|
||||
branch_name="test_lsn_mapping", endpoint_id="ep-lsn_mapping_read", lsn=lsn
|
||||
)
|
||||
assert endpoint_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i
|
||||
|
||||
endpoint_here.stop_and_destroy()
|
||||
|
||||
|
||||
#
|
||||
# Test pageserver get_lsn_by_timestamp API
|
||||
#
|
||||
@@ -110,24 +45,23 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
|
||||
# Check edge cases: timestamp in the future
|
||||
probe_timestamp = tbl[-1][1] + timedelta(hours=1)
|
||||
result = client.timeline_get_lsn_by_timestamp(
|
||||
env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2
|
||||
env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z"
|
||||
)
|
||||
assert result["kind"] == "future"
|
||||
assert result == "future"
|
||||
|
||||
# timestamp too the far history
|
||||
probe_timestamp = tbl[0][1] - timedelta(hours=10)
|
||||
result = client.timeline_get_lsn_by_timestamp(
|
||||
env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2
|
||||
env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z"
|
||||
)
|
||||
assert result["kind"] == "past"
|
||||
assert result == "past"
|
||||
|
||||
# Probe a bunch of timestamps in the valid range
|
||||
for i in range(1, len(tbl), 100):
|
||||
probe_timestamp = tbl[i][1]
|
||||
result = client.timeline_get_lsn_by_timestamp(
|
||||
env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2
|
||||
lsn = client.timeline_get_lsn_by_timestamp(
|
||||
env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z"
|
||||
)
|
||||
lsn = result["lsn"]
|
||||
# Call get_lsn_by_timestamp to get the LSN
|
||||
# Launch a new read-only node at that LSN, and check that only the rows
|
||||
# that were supposed to be committed at that point in time are visible.
|
||||
|
||||
@@ -486,20 +486,16 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
|
||||
|
||||
def evict_all_layers(env: NeonEnv, tenant_id: TenantId, timeline_id: TimelineId):
|
||||
timeline_path = env.pageserver.timeline_dir(tenant_id, timeline_id)
|
||||
initial_local_layers = sorted(
|
||||
list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
|
||||
)
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
layer_map = client.layer_map_info(tenant_id, timeline_id)
|
||||
|
||||
for layer in layer_map.historic_layers:
|
||||
if layer.remote:
|
||||
log.info(
|
||||
f"Skipping trying to evict remote layer {tenant_id}/{timeline_id} {layer.layer_file_name}"
|
||||
)
|
||||
for layer in initial_local_layers:
|
||||
if "ephemeral" in layer.name or "temp" in layer.name:
|
||||
continue
|
||||
log.info(f"Evicting layer {tenant_id}/{timeline_id} {layer.layer_file_name}")
|
||||
client.evict_layer(
|
||||
tenant_id=tenant_id, timeline_id=timeline_id, layer_name=layer.layer_file_name
|
||||
)
|
||||
log.info(f"Evicting layer {tenant_id}/{timeline_id} {layer.name}")
|
||||
client.evict_layer(tenant_id=tenant_id, timeline_id=timeline_id, layer_name=layer.name)
|
||||
|
||||
|
||||
def test_eviction_across_generations(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import json
|
||||
import subprocess
|
||||
import time
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
import psycopg2
|
||||
@@ -365,14 +364,10 @@ def test_sql_over_http_pool(static_proxy: NeonProxy):
|
||||
|
||||
pid1 = get_pid(200, "http")["rows"][0]["pid"]
|
||||
|
||||
time.sleep(0.02)
|
||||
|
||||
# query should be on the same connection
|
||||
rows = get_pid(200, "http")["rows"]
|
||||
assert rows == [{"pid": pid1}]
|
||||
|
||||
time.sleep(0.02)
|
||||
|
||||
# incorrect password should not work
|
||||
res = get_pid(400, "foobar")
|
||||
assert "password authentication failed for user" in res["message"]
|
||||
@@ -383,14 +378,10 @@ def test_sql_over_http_pool(static_proxy: NeonProxy):
|
||||
pid2 = get_pid(200, "http2")["rows"][0]["pid"]
|
||||
assert pid1 != pid2
|
||||
|
||||
time.sleep(0.02)
|
||||
|
||||
# query should be on an existing connection
|
||||
pid = get_pid(200, "http2")["rows"][0]["pid"]
|
||||
assert pid in [pid1, pid2]
|
||||
|
||||
time.sleep(0.02)
|
||||
|
||||
# old password should not work
|
||||
res = get_pid(400, "http")
|
||||
assert "password authentication failed for user" in res["message"]
|
||||
@@ -428,7 +419,6 @@ def test_sql_over_http_pool_idle(static_proxy: NeonProxy):
|
||||
)
|
||||
|
||||
pid1 = query(200, GET_CONNECTION_PID_QUERY)["rows"][0]["pid"]
|
||||
time.sleep(0.02)
|
||||
query(200, "BEGIN")
|
||||
pid2 = query(200, GET_CONNECTION_PID_QUERY)["rows"][0]["pid"]
|
||||
assert pid1 != pid2
|
||||
|
||||
@@ -757,14 +757,12 @@ def test_empty_branch_remote_storage_upload_on_restart(neon_env_builder: NeonEnv
|
||||
create_thread.join()
|
||||
|
||||
|
||||
def test_compaction_waits_for_upload(
|
||||
# Regression test for a race condition where L0 layers are compacted before the upload,
|
||||
# resulting in the uploading complaining about the file not being found
|
||||
# https://github.com/neondatabase/neon/issues/4526
|
||||
def test_compaction_delete_before_upload(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
):
|
||||
"""
|
||||
Compaction waits for outstanding uploads to complete, so that it avoids deleting layers
|
||||
files that have not yet been uploaded. This test forces a race between upload and
|
||||
compaction.
|
||||
"""
|
||||
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
|
||||
|
||||
env = neon_env_builder.init_start(
|
||||
@@ -794,81 +792,50 @@ def test_compaction_waits_for_upload(
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
# Now make the flushing hang and update one small piece of data
|
||||
client.configure_failpoints(("before-upload-layer-pausable", "pause"))
|
||||
client.configure_failpoints(("flush-frozen-pausable", "pause"))
|
||||
|
||||
endpoint.safe_psql("UPDATE foo SET x = 0 WHERE x = 1")
|
||||
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
checkpoint_result: queue.Queue[Optional[PageserverApiException]] = queue.Queue()
|
||||
compact_result: queue.Queue[Optional[PageserverApiException]] = queue.Queue()
|
||||
compact_barrier = threading.Barrier(2)
|
||||
q: queue.Queue[Optional[PageserverApiException]] = queue.Queue()
|
||||
barrier = threading.Barrier(2)
|
||||
|
||||
def checkpoint_in_background():
|
||||
barrier.wait()
|
||||
try:
|
||||
log.info("Checkpoint starting")
|
||||
client.timeline_checkpoint(tenant_id, timeline_id)
|
||||
log.info("Checkpoint complete")
|
||||
checkpoint_result.put(None)
|
||||
q.put(None)
|
||||
except PageserverApiException as e:
|
||||
log.info("Checkpoint errored: {e}")
|
||||
checkpoint_result.put(e)
|
||||
q.put(e)
|
||||
|
||||
def compact_in_background():
|
||||
compact_barrier.wait()
|
||||
try:
|
||||
log.info("Compaction starting")
|
||||
client.timeline_compact(tenant_id, timeline_id)
|
||||
log.info("Compaction complete")
|
||||
compact_result.put(None)
|
||||
except PageserverApiException as e:
|
||||
log.info("Compaction errored: {e}")
|
||||
compact_result.put(e)
|
||||
|
||||
checkpoint_thread = threading.Thread(target=checkpoint_in_background)
|
||||
checkpoint_thread.start()
|
||||
|
||||
compact_thread = threading.Thread(target=compact_in_background)
|
||||
compact_thread.start()
|
||||
create_thread = threading.Thread(target=checkpoint_in_background)
|
||||
create_thread.start()
|
||||
|
||||
try:
|
||||
# Start the checkpoint, see that it blocks
|
||||
log.info("Waiting to see checkpoint hang...")
|
||||
time.sleep(5)
|
||||
assert checkpoint_result.empty()
|
||||
barrier.wait()
|
||||
|
||||
# Start the compaction, see that it finds work to do but blocks
|
||||
compact_barrier.wait()
|
||||
log.info("Waiting to see compaction hang...")
|
||||
time.sleep(5)
|
||||
assert compact_result.empty()
|
||||
time.sleep(4)
|
||||
client.timeline_compact(tenant_id, timeline_id)
|
||||
|
||||
# This is logged once compaction is started, but before we wait for operations to complete
|
||||
assert env.pageserver.log_contains("compact_level0_phase1 stats available.")
|
||||
client.configure_failpoints(("flush-frozen-pausable", "off"))
|
||||
|
||||
# Once we unblock uploads the compaction should complete successfully
|
||||
log.info("Disabling failpoint")
|
||||
client.configure_failpoints(("before-upload-layer-pausable", "off"))
|
||||
log.info("Awaiting compaction result")
|
||||
assert compact_result.get(timeout=10) is None
|
||||
log.info("Awaiting checkpoint result")
|
||||
assert checkpoint_result.get(timeout=10) is None
|
||||
conflict = q.get()
|
||||
|
||||
except Exception:
|
||||
# Log the actual failure's backtrace here, before we proceed to join threads
|
||||
log.exception("Failure, cleaning up...")
|
||||
raise
|
||||
assert conflict is None
|
||||
finally:
|
||||
compact_barrier.abort()
|
||||
create_thread.join()
|
||||
|
||||
checkpoint_thread.join()
|
||||
compact_thread.join()
|
||||
# Add a delay for the uploads to run into either the file not found or the
|
||||
time.sleep(4)
|
||||
|
||||
# Ensure that this actually terminates
|
||||
wait_upload_queue_empty(client, tenant_id, timeline_id)
|
||||
|
||||
# We should not have hit the error handling path in uploads where the remote file is gone
|
||||
assert not env.pageserver.log_contains(
|
||||
# For now we are hitting this message.
|
||||
# Maybe in the future the underlying race condition will be fixed,
|
||||
# but until then, ensure that this message is hit instead.
|
||||
assert env.pageserver.log_contains(
|
||||
"File to upload doesn't exist. Likely the file has been deleted and an upload is not required any more."
|
||||
)
|
||||
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
import time
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnvBuilder,
|
||||
last_flush_lsn_upload,
|
||||
)
|
||||
from fixtures.remote_storage import (
|
||||
RemoteStorageKind,
|
||||
)
|
||||
from fixtures.types import TenantId
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
def test_tenant_duplicate(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
):
|
||||
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
with env.endpoints.create_start("main", tenant_id=env.initial_tenant) as ep_main:
|
||||
ep_main.safe_psql("CREATE TABLE foo (i int);")
|
||||
ep_main.safe_psql("INSERT INTO foo VALUES (1), (2), (3);")
|
||||
last_flush_lsn = last_flush_lsn_upload(
|
||||
env, ep_main, env.initial_tenant, env.initial_timeline
|
||||
)
|
||||
|
||||
new_tenant_id = TenantId.generate()
|
||||
# timeline id remains unchanged with tenant_duplicate
|
||||
# TODO: implement a remapping scheme so timeline ids remain globally unique
|
||||
new_timeline_id = env.initial_timeline
|
||||
|
||||
log.info(f"Duplicate tenant/timeline will be: {new_tenant_id}/{new_timeline_id}")
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
ps_http.tenant_duplicate(env.initial_tenant, new_tenant_id)
|
||||
|
||||
ps_http.tenant_delete(env.initial_tenant)
|
||||
|
||||
env.neon_cli.map_branch("duplicate", new_tenant_id, new_timeline_id)
|
||||
|
||||
# start read-only replicate and validate
|
||||
with env.endpoints.create_start(
|
||||
"duplicate", tenant_id=new_tenant_id, lsn=last_flush_lsn
|
||||
) as ep_dup:
|
||||
with ep_dup.connect() as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT * FROM foo ORDER BY i;")
|
||||
cur.fetchall() == [(1,), (2,), (3,)]
|
||||
|
||||
# ensure restarting PS works
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
|
||||
@@ -74,7 +74,6 @@ fn analyze_trace<R: std::io::Read>(mut reader: R) {
|
||||
prev = Some(req);
|
||||
}
|
||||
PagestreamFeMessage::DbSize(_) => {}
|
||||
PagestreamFeMessage::NoOp => {},
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user