mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-17 10:22:56 +00:00
Compare commits
18 Commits
release-pr
...
problame/b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c5ec255ce8 | ||
|
|
ebf956115c | ||
|
|
34cffd2c43 | ||
|
|
2d37857351 | ||
|
|
ddfce0cfa5 | ||
|
|
d52a622115 | ||
|
|
a066eecda0 | ||
|
|
94e94af6c7 | ||
|
|
df7346eaff | ||
|
|
76efb1b79b | ||
|
|
2f656c6691 | ||
|
|
bb5b5cbdac | ||
|
|
55cdf6c7ff | ||
|
|
82d9c68667 | ||
|
|
bc91c40f56 | ||
|
|
c5f58ef3f7 | ||
|
|
bb8531d920 | ||
|
|
7553bbe3f5 |
29
Cargo.lock
generated
29
Cargo.lock
generated
@@ -2932,6 +2932,16 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.46.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
|
||||
dependencies = [
|
||||
"overload",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.4.3"
|
||||
@@ -3198,6 +3208,12 @@ version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
|
||||
|
||||
[[package]]
|
||||
name = "overload"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||
|
||||
[[package]]
|
||||
name = "pagectl"
|
||||
version = "0.1.0"
|
||||
@@ -3283,10 +3299,12 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-io-timeout",
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"url",
|
||||
"utils",
|
||||
"walkdir",
|
||||
@@ -3561,7 +3579,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres"
|
||||
version = "0.19.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=problame/copy-both-duplex-public#5c462bd3500e657c014ef087e4eef2c1a8f0ebda"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -3574,7 +3592,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-native-tls"
|
||||
version = "0.5.0"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=problame/copy-both-duplex-public#5c462bd3500e657c014ef087e4eef2c1a8f0ebda"
|
||||
dependencies = [
|
||||
"native-tls",
|
||||
"tokio",
|
||||
@@ -3585,7 +3603,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=problame/copy-both-duplex-public#5c462bd3500e657c014ef087e4eef2c1a8f0ebda"
|
||||
dependencies = [
|
||||
"base64 0.20.0",
|
||||
"byteorder",
|
||||
@@ -3603,7 +3621,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=problame/copy-both-duplex-public#5c462bd3500e657c014ef087e4eef2c1a8f0ebda"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -5407,7 +5425,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.7"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=7434d9388965a17a6d113e5dfc0e65666a03b4c2#7434d9388965a17a6d113e5dfc0e65666a03b4c2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=problame/copy-both-duplex-public#5c462bd3500e657c014ef087e4eef2c1a8f0ebda"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
@@ -5764,6 +5782,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
|
||||
dependencies = [
|
||||
"matchers",
|
||||
"nu-ansi-term",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"serde",
|
||||
|
||||
12
Cargo.toml
12
Cargo.toml
@@ -161,11 +161,11 @@ env_logger = "0.10"
|
||||
log = "0.4"
|
||||
|
||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="problame/copy-both-duplex-public" }
|
||||
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="problame/copy-both-duplex-public" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="problame/copy-both-duplex-public" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="problame/copy-both-duplex-public" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="problame/copy-both-duplex-public" }
|
||||
|
||||
## Other git libraries
|
||||
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
|
||||
@@ -202,7 +202,7 @@ tonic-build = "0.9"
|
||||
|
||||
# This is only needed for proxy's tests.
|
||||
# TODO: we should probably fork `tokio-postgres-rustls` instead.
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="7434d9388965a17a6d113e5dfc0e65666a03b4c2" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="problame/copy-both-duplex-public" }
|
||||
|
||||
################# Binary contents sections
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ use utils::{
|
||||
|
||||
use crate::reltag::RelTag;
|
||||
use anyhow::bail;
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
|
||||
/// The state of a tenant in this pageserver.
|
||||
///
|
||||
@@ -612,15 +612,18 @@ pub enum PagestreamFeMessage {
|
||||
Nblocks(PagestreamNblocksRequest),
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
NoOp,
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(Debug)]
|
||||
pub enum PagestreamBeMessage {
|
||||
Exists(PagestreamExistsResponse),
|
||||
Nblocks(PagestreamNblocksResponse),
|
||||
GetPage(PagestreamGetPageResponse),
|
||||
Error(PagestreamErrorResponse),
|
||||
DbSize(PagestreamDbSizeResponse),
|
||||
NoOp,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
@@ -719,6 +722,10 @@ impl PagestreamFeMessage {
|
||||
bytes.put_u64(req.lsn.0);
|
||||
bytes.put_u32(req.dbnode);
|
||||
}
|
||||
|
||||
Self::NoOp => {
|
||||
bytes.put_u8(4);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
@@ -769,6 +776,7 @@ impl PagestreamFeMessage {
|
||||
lsn: Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
})),
|
||||
4 => Ok(PagestreamFeMessage::NoOp),
|
||||
_ => bail!("unknown smgr message tag: {:?}", msg_tag),
|
||||
}
|
||||
}
|
||||
@@ -803,10 +811,46 @@ impl PagestreamBeMessage {
|
||||
bytes.put_u8(104); /* tag from pagestore_client.h */
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
Self::NoOp => {
|
||||
bytes.put_u8(105);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
|
||||
let mut buf = buf.reader();
|
||||
let msg_tag = buf.read_u8()?;
|
||||
match msg_tag {
|
||||
100 => todo!(),
|
||||
101 => todo!(),
|
||||
102 => {
|
||||
let buf = buf.get_ref();
|
||||
/* TODO use constant */
|
||||
if buf.len() == 8192 {
|
||||
Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
|
||||
page: buf.clone(),
|
||||
}))
|
||||
} else {
|
||||
anyhow::bail!("invalid page size: {}", buf.len());
|
||||
}
|
||||
}
|
||||
103 => {
|
||||
let buf = buf.get_ref();
|
||||
let cstr = std::ffi::CStr::from_bytes_until_nul(&buf)?;
|
||||
let rust_str = cstr.to_str()?;
|
||||
Ok(PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: rust_str.to_owned(),
|
||||
}))
|
||||
}
|
||||
104 => todo!(),
|
||||
105 => {
|
||||
Ok(PagestreamBeMessage::NoOp)
|
||||
},
|
||||
_ => bail!("unknown tag: {:?}", msg_tag),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -112,7 +112,7 @@ impl RemotePath {
|
||||
self.0.file_name()
|
||||
}
|
||||
|
||||
pub fn join(&self, segment: &Utf8Path) -> Self {
|
||||
pub fn join<P: AsRef<Utf8Path>>(&self, segment: P) -> Self {
|
||||
Self(self.0.join(segment))
|
||||
}
|
||||
|
||||
|
||||
@@ -82,6 +82,8 @@ enum-map.workspace = true
|
||||
enumset.workspace = true
|
||||
strum.workspace = true
|
||||
strum_macros.workspace = true
|
||||
tokio-stream.workspace = true
|
||||
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion.workspace = true
|
||||
|
||||
245
pageserver/src/bin/getpage_bench_http.rs
Normal file
245
pageserver/src/bin/getpage_bench_http.rs
Normal file
@@ -0,0 +1,245 @@
|
||||
use clap::Parser;
|
||||
use hyper::client::conn::Parts;
|
||||
use hyper::client::HttpConnector;
|
||||
use hyper::{Body, Client, Uri};
|
||||
use pageserver::{repository, tenant};
|
||||
use rand::prelude::*;
|
||||
use std::env::args;
|
||||
use std::future::Future;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread;
|
||||
use tokio::sync::mpsc::{channel, Sender};
|
||||
use tokio::sync::Mutex as AsyncMutex;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
struct Key(repository::Key);
|
||||
|
||||
impl std::str::FromStr for Key {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||
repository::Key::from_hex(s).map(Key)
|
||||
}
|
||||
}
|
||||
|
||||
struct KeyRange {
|
||||
start: Key,
|
||||
end: Key,
|
||||
}
|
||||
|
||||
impl KeyRange {
|
||||
fn len(&self) -> i128 {
|
||||
self.end.0.to_i128() - self.start.0.to_i128()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
#[clap(long, default_value = "http://localhost:9898")]
|
||||
ps_endpoint: String,
|
||||
// tenant_id: String,
|
||||
// timeline_id: String,
|
||||
num_tasks: usize,
|
||||
num_requests: usize,
|
||||
tenants: Option<Vec<String>>,
|
||||
#[clap(long)]
|
||||
pick_n_tenants: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct Stats {
|
||||
completed_requests: AtomicU64,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
fn inc(&self) {
|
||||
self.completed_requests.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args: &'static Args = Box::leak(Box::new(Args::parse()));
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
let tenants = if let Some(tenants) = &args.tenants {
|
||||
tenants.clone()
|
||||
} else {
|
||||
// let tenant_id = "b97965931096047b2d54958756baee7b";
|
||||
// let timeline_id = "2868f84a8d166779e4c651b116c45059";
|
||||
|
||||
let resp = client
|
||||
.get(Uri::try_from(&format!("{}/v1/tenant", args.ps_endpoint)).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let tenants: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let mut out = Vec::new();
|
||||
for t in tenants.as_array().unwrap() {
|
||||
if let Some(limit) = args.pick_n_tenants {
|
||||
if out.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
out.push(t.get("id").unwrap().as_str().unwrap().to_owned());
|
||||
}
|
||||
if let Some(limit) = args.pick_n_tenants {
|
||||
assert_eq!(out.len(), limit);
|
||||
}
|
||||
out
|
||||
};
|
||||
|
||||
let mut tenant_timelines = Vec::new();
|
||||
for tenant_id in tenants {
|
||||
let resp = client
|
||||
.get(
|
||||
Uri::try_from(&format!(
|
||||
"{}/v1/tenant/{}/timeline",
|
||||
args.ps_endpoint, tenant_id
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let timelines: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
for t in timelines.as_array().unwrap() {
|
||||
let timeline_id = t.get("timeline_id").unwrap().as_str().unwrap().to_owned();
|
||||
tenant_timelines.push((tenant_id.clone(), timeline_id));
|
||||
}
|
||||
}
|
||||
println!("tenant_timelines:\n{:?}", tenant_timelines);
|
||||
|
||||
let mut stats = Arc::new(Stats::default());
|
||||
|
||||
tokio::spawn({
|
||||
let stats = Arc::clone(&stats);
|
||||
async move {
|
||||
loop {
|
||||
let start = std::time::Instant::now();
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"RPS: {:.0}",
|
||||
completed_requests as f64 / elapsed.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
for (tenant_id, timeline_id) in tenant_timelines {
|
||||
let t = tokio::spawn(timeline(
|
||||
args,
|
||||
client.clone(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
Arc::clone(&stats),
|
||||
));
|
||||
tasks.push(t);
|
||||
}
|
||||
|
||||
for t in tasks {
|
||||
t.await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn timeline(
|
||||
args: &'static Args,
|
||||
client: Client<HttpConnector, Body>,
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
stats: Arc<Stats>,
|
||||
) -> impl Future<Output = ()> {
|
||||
async move {
|
||||
let mut resp = client
|
||||
.get(
|
||||
Uri::try_from(&format!(
|
||||
"{}/v1/tenant/{}/timeline/{}/keyspace",
|
||||
args.ps_endpoint, tenant_id, timeline_id
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
if !resp.status().is_success() {
|
||||
panic!("Failed to get keyspace: {resp:?}");
|
||||
}
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let keyspace: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
|
||||
let lsn = Arc::new(keyspace["at_lsn"].as_str().unwrap().to_owned());
|
||||
|
||||
let ranges = keyspace["keys"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|r| {
|
||||
let r = r.as_array().unwrap();
|
||||
assert_eq!(r.len(), 2);
|
||||
let start = Key::from_str(r[0].as_str().unwrap()).unwrap();
|
||||
let end = Key::from_str(r[1].as_str().unwrap()).unwrap();
|
||||
KeyRange { start, end }
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// weighted ranges
|
||||
let weights = ranges.iter().map(|r| r.len()).collect::<Vec<_>>();
|
||||
|
||||
let ranges = Arc::new(ranges);
|
||||
let weights = Arc::new(weights);
|
||||
|
||||
let (tx, mut rx) = channel::<i32>(1000);
|
||||
let tx = Arc::new(AsyncMutex::new(tx));
|
||||
|
||||
let mut tasks = Vec::<JoinHandle<()>>::new();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
for i in 0..args.num_tasks {
|
||||
let ranges = ranges.clone();
|
||||
let weights = weights.clone();
|
||||
let lsn = lsn.clone();
|
||||
let client = client.clone();
|
||||
let tenant_id = tenant_id.clone();
|
||||
let timeline_id = timeline_id.clone();
|
||||
let stats = Arc::clone(&stats);
|
||||
let task = tokio::spawn(async move {
|
||||
for i in 0..args.num_requests {
|
||||
let key = {
|
||||
let mut rng = rand::thread_rng();
|
||||
let r = ranges.choose_weighted(&mut rng, |r| r.len()).unwrap();
|
||||
let key = rng.gen_range((r.start.0.to_i128()..r.end.0.to_i128()));
|
||||
key
|
||||
};
|
||||
let url = format!(
|
||||
"{}/v1/tenant/{}/timeline/{}/getpage?key={:036x}&lsn={}",
|
||||
args.ps_endpoint, tenant_id, timeline_id, key, lsn
|
||||
);
|
||||
let uri = url.parse::<Uri>().unwrap();
|
||||
let resp = client.get(uri).await.unwrap();
|
||||
stats.inc();
|
||||
}
|
||||
});
|
||||
tasks.push(task);
|
||||
}
|
||||
|
||||
drop(tx);
|
||||
|
||||
for task in tasks {
|
||||
task.await.unwrap();
|
||||
}
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"RPS: {:.0}",
|
||||
(args.num_requests * args.num_tasks) as f64 / elapsed.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
411
pageserver/src/bin/getpage_bench_libpq.rs
Normal file
411
pageserver/src/bin/getpage_bench_libpq.rs
Normal file
@@ -0,0 +1,411 @@
|
||||
use anyhow::Context;
|
||||
use clap::Parser;
|
||||
use futures::{SinkExt, TryStreamExt};
|
||||
use hyper::client::conn::Parts;
|
||||
use hyper::client::HttpConnector;
|
||||
use hyper::{Client, Uri};
|
||||
use pageserver::page_cache::PAGE_SZ;
|
||||
use pageserver::pgdatadir_mapping::{is_rel_block_key, key_to_rel_block};
|
||||
use pageserver::{repository, tenant};
|
||||
use pageserver_api::models::{
|
||||
PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
|
||||
};
|
||||
use pageserver_api::reltag::RelTag;
|
||||
use rand::prelude::*;
|
||||
use scopeguard::defer;
|
||||
use std::env::args;
|
||||
use std::future::Future;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread;
|
||||
use tokio::sync::mpsc::{channel, Sender};
|
||||
use tokio::sync::Mutex as AsyncMutex;
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_stream::{Stream, StreamExt};
|
||||
use utils::completion;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
struct Key(repository::Key);
|
||||
|
||||
impl std::str::FromStr for Key {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||
repository::Key::from_hex(s).map(Key)
|
||||
}
|
||||
}
|
||||
|
||||
struct KeyRange {
|
||||
start: i128,
|
||||
end: i128,
|
||||
}
|
||||
|
||||
impl KeyRange {
|
||||
fn len(&self) -> i128 {
|
||||
self.end - self.start
|
||||
}
|
||||
}
|
||||
|
||||
struct RelTagBlockNo {
|
||||
rel_tag: RelTag,
|
||||
block_no: u32,
|
||||
}
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
#[clap(long, default_value = "http://localhost:9898")]
|
||||
ps_endpoint: String,
|
||||
#[clap(long, default_value = "postgres://postgres@localhost:64000")]
|
||||
pq_client_connstring: String,
|
||||
// tenant_id: String,
|
||||
// timeline_id: String,
|
||||
num_tasks: usize,
|
||||
num_requests: usize,
|
||||
tenants: Option<Vec<String>>,
|
||||
#[clap(long)]
|
||||
pick_n_tenants: Option<usize>,
|
||||
#[clap(subcommand)]
|
||||
mode: Mode,
|
||||
}
|
||||
|
||||
#[derive(clap::Parser, Clone)]
|
||||
enum Mode {
|
||||
GetPage,
|
||||
NoOp,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct Stats {
|
||||
completed_requests: AtomicU64,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
fn inc(&self) {
|
||||
self.completed_requests.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args: &'static Args = Box::leak(Box::new(Args::parse()));
|
||||
|
||||
// std::env::set_var("RUST_LOG", "info,tokio_postgres=trace");
|
||||
// tracing_subscriber::fmt::init();
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
let tenants = if let Some(tenants) = &args.tenants {
|
||||
tenants.clone()
|
||||
} else {
|
||||
// let tenant_id = "b97965931096047b2d54958756baee7b";
|
||||
// let timeline_id = "2868f84a8d166779e4c651b116c45059";
|
||||
|
||||
let resp = client
|
||||
.get(Uri::try_from(&format!("{}/v1/tenant", args.ps_endpoint)).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let tenants: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let mut out = Vec::new();
|
||||
for t in tenants.as_array().unwrap() {
|
||||
if let Some(limit) = args.pick_n_tenants {
|
||||
if out.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
out.push(t.get("id").unwrap().as_str().unwrap().to_owned());
|
||||
}
|
||||
if let Some(limit) = args.pick_n_tenants {
|
||||
assert_eq!(out.len(), limit);
|
||||
}
|
||||
out
|
||||
};
|
||||
|
||||
let mut tenant_timelines = Vec::new();
|
||||
for tenant_id in tenants {
|
||||
let resp = client
|
||||
.get(
|
||||
Uri::try_from(&format!(
|
||||
"{}/v1/tenant/{}/timeline",
|
||||
args.ps_endpoint, tenant_id
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let timelines: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
for t in timelines.as_array().unwrap() {
|
||||
let timeline_id = t.get("timeline_id").unwrap().as_str().unwrap().to_owned();
|
||||
tenant_timelines.push((tenant_id.clone(), timeline_id));
|
||||
}
|
||||
}
|
||||
println!("tenant_timelines:\n{:?}", tenant_timelines);
|
||||
|
||||
let mut stats = Arc::new(Stats::default());
|
||||
|
||||
tokio::spawn({
|
||||
let stats = Arc::clone(&stats);
|
||||
async move {
|
||||
loop {
|
||||
let start = std::time::Instant::now();
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"RPS: {:.0}",
|
||||
completed_requests as f64 / elapsed.as_secs_f64()
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
for (tenant_id, timeline_id) in tenant_timelines {
|
||||
let stats = Arc::clone(&stats);
|
||||
let t = tokio::spawn(timeline(
|
||||
args,
|
||||
client.clone(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
stats,
|
||||
));
|
||||
tasks.push(t);
|
||||
}
|
||||
|
||||
for t in tasks {
|
||||
t.await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn timeline(
|
||||
args: &'static Args,
|
||||
http_client: Client<HttpConnector, hyper::Body>,
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
stats: Arc<Stats>,
|
||||
) -> impl Future<Output = ()> + Send + Sync {
|
||||
async move {
|
||||
let mut resp = http_client
|
||||
.get(
|
||||
Uri::try_from(&format!(
|
||||
"{}/v1/tenant/{}/timeline/{}/keyspace",
|
||||
args.ps_endpoint, tenant_id, timeline_id
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
if !resp.status().is_success() {
|
||||
panic!("Failed to get keyspace: {resp:?}");
|
||||
}
|
||||
let body = hyper::body::to_bytes(resp).await.unwrap();
|
||||
let keyspace: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let lsn: Lsn = keyspace["at_lsn"].as_str().unwrap().parse().unwrap();
|
||||
|
||||
let ranges = keyspace["keys"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.filter_map(|r| {
|
||||
let r = r.as_array().unwrap();
|
||||
assert_eq!(r.len(), 2);
|
||||
let start = Key::from_str(r[0].as_str().unwrap()).unwrap();
|
||||
let end = Key::from_str(r[1].as_str().unwrap()).unwrap();
|
||||
// filter out non-relblock keys
|
||||
match (is_rel_block_key(start.0), is_rel_block_key(end.0)) {
|
||||
(true, true) => Some(KeyRange {
|
||||
start: start.0.to_i128(),
|
||||
end: end.0.to_i128(),
|
||||
}),
|
||||
(true, false) | (false, true) => {
|
||||
unimplemented!("split up range")
|
||||
}
|
||||
(false, false) => None,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// weighted ranges
|
||||
let weights = ranges.iter().map(|r| r.len()).collect::<Vec<_>>();
|
||||
|
||||
let ranges = Arc::new(ranges);
|
||||
let weights = Arc::new(weights);
|
||||
|
||||
let mut tasks = Vec::<JoinHandle<()>>::new();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
for i in 0..args.num_tasks {
|
||||
let ranges = ranges.clone();
|
||||
let weights = weights.clone();
|
||||
let client = http_client.clone();
|
||||
let tenant_id = tenant_id.clone();
|
||||
let timeline_id = timeline_id.clone();
|
||||
let task = tokio::spawn({
|
||||
let stats = Arc::clone(&stats);
|
||||
async move {
|
||||
let mut client = getpage_client::Client::new(
|
||||
args.pq_client_connstring.clone(),
|
||||
tenant_id.clone(),
|
||||
timeline_id.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
for i in 0..args.num_requests {
|
||||
match args.mode {
|
||||
Mode::GetPage => {
|
||||
let key = {
|
||||
let mut rng = rand::thread_rng();
|
||||
let r = ranges.choose_weighted(&mut rng, |r| r.len()).unwrap();
|
||||
let key: i128 = rng.gen_range((r.start..r.end));
|
||||
let key = repository::Key::from_i128(key);
|
||||
// XXX filter these out when we iterate the keyspace
|
||||
assert!(
|
||||
is_rel_block_key(key),
|
||||
"we filter non-relblock keys out above"
|
||||
);
|
||||
let (rel_tag, block_no) =
|
||||
key_to_rel_block(key).expect("we just checked");
|
||||
RelTagBlockNo { rel_tag, block_no }
|
||||
};
|
||||
client
|
||||
.getpage(key, lsn)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"getpage for tenant {} timeline {}",
|
||||
tenant_id, timeline_id
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
Mode::NoOp => {
|
||||
client.noop().await.unwrap();
|
||||
}
|
||||
}
|
||||
stats.inc();
|
||||
}
|
||||
client.shutdown().await;
|
||||
}
|
||||
});
|
||||
tasks.push(task);
|
||||
}
|
||||
|
||||
for task in tasks {
|
||||
task.await.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod getpage_client {
|
||||
use std::pin::Pin;
|
||||
|
||||
use futures::SinkExt;
|
||||
use pageserver_api::models::{
|
||||
PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest,
|
||||
PagestreamGetPageResponse,
|
||||
};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_stream::StreamExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::RelTagBlockNo;
|
||||
|
||||
pub(crate) struct Client {
|
||||
copy_both: Pin<Box<tokio_postgres::CopyBothDuplex<bytes::Bytes>>>,
|
||||
cancel_on_client_drop: Option<tokio_util::sync::DropGuard>,
|
||||
conn_task: JoinHandle<()>,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
pub fn new(
|
||||
connstring: String,
|
||||
tenant_id: String,
|
||||
timeline_id: String,
|
||||
) -> impl std::future::Future<Output = anyhow::Result<Self>> + Send {
|
||||
async move {
|
||||
let (client, connection) =
|
||||
tokio_postgres::connect(&connstring, postgres::NoTls).await?;
|
||||
|
||||
let conn_task_cancel = CancellationToken::new();
|
||||
let conn_task = tokio::spawn({
|
||||
let conn_task_cancel = conn_task_cancel.clone();
|
||||
async move {
|
||||
tokio::select! {
|
||||
_ = conn_task_cancel.cancelled() => {
|
||||
return;
|
||||
}
|
||||
res = connection => {
|
||||
res.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let copy_both: tokio_postgres::CopyBothDuplex<bytes::Bytes> = client
|
||||
.copy_both_simple(&format!("pagestream {tenant_id} {timeline_id}"))
|
||||
.await?;
|
||||
|
||||
Ok(Self {
|
||||
copy_both: Box::pin(copy_both),
|
||||
conn_task,
|
||||
cancel_on_client_drop: Some(conn_task_cancel.drop_guard()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn shutdown(mut self) {
|
||||
let _ = self.cancel_on_client_drop.take();
|
||||
self.conn_task.await.unwrap();
|
||||
}
|
||||
|
||||
pub async fn getpage(
|
||||
&mut self,
|
||||
key: RelTagBlockNo,
|
||||
lsn: Lsn,
|
||||
) -> anyhow::Result<PagestreamGetPageResponse> {
|
||||
let req = PagestreamGetPageRequest {
|
||||
latest: false,
|
||||
rel: key.rel_tag,
|
||||
blkno: key.block_no,
|
||||
lsn,
|
||||
};
|
||||
let req = PagestreamFeMessage::GetPage(req);
|
||||
match self.do_request(req).await? {
|
||||
PagestreamBeMessage::GetPage(p) => Ok(p),
|
||||
x => anyhow::bail!("Unexpected response: {:?}", x),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn noop(&mut self) -> anyhow::Result<()> {
|
||||
match self.do_request(PagestreamFeMessage::NoOp).await? {
|
||||
PagestreamBeMessage::NoOp => Ok(()),
|
||||
x => anyhow::bail!("Unexpected response: {:?}", x),
|
||||
}
|
||||
}
|
||||
|
||||
async fn do_request(
|
||||
&mut self,
|
||||
req: PagestreamFeMessage,
|
||||
) -> Result<PagestreamBeMessage, anyhow::Error> {
|
||||
let req: bytes::Bytes = req.serialize();
|
||||
// let mut req = tokio_util::io::ReaderStream::new(&req);
|
||||
let mut req = tokio_stream::once(Ok(req));
|
||||
|
||||
self.copy_both.send_all(&mut req).await?;
|
||||
|
||||
let next: Option<Result<bytes::Bytes, _>> = self.copy_both.next().await;
|
||||
let next = next.unwrap().unwrap();
|
||||
|
||||
match PagestreamBeMessage::deserialize(next)? {
|
||||
PagestreamBeMessage::Error(e) => anyhow::bail!("Error: {:?}", e),
|
||||
x => Ok(x),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
109
pageserver/src/bin/noop_server.rs
Normal file
109
pageserver/src/bin/noop_server.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
use anyhow::Context;
|
||||
use bytes::Buf;
|
||||
use clap::Parser;
|
||||
use pageserver_api::models::{PagestreamBeMessage, PagestreamErrorResponse, PagestreamFeMessage};
|
||||
use postgres_backend::{AuthType, PostgresBackend, QueryError};
|
||||
use pq_proto::{BeMessage, FeMessage};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
bind: String,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args = Args::parse();
|
||||
|
||||
let listener = tokio::net::TcpListener::bind(&args.bind).await.unwrap();
|
||||
loop {
|
||||
let (socket, _) = listener.accept().await.unwrap();
|
||||
tokio::spawn(async move {
|
||||
handle_connection(socket).await.unwrap();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_connection(socket: tokio::net::TcpStream) -> anyhow::Result<()> {
|
||||
socket
|
||||
.set_nodelay(true)
|
||||
.context("could not set TCP_NODELAY")?;
|
||||
|
||||
let peer_addr = socket.peer_addr().context("get peer address")?;
|
||||
let socket = tokio_io_timeout::TimeoutReader::new(socket);
|
||||
tokio::pin!(socket);
|
||||
let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, AuthType::Trust, None)?;
|
||||
let mut conn_handler = NoOpHandler;
|
||||
let cancel = CancellationToken::new();
|
||||
pgbackend
|
||||
.run(&mut conn_handler, || {
|
||||
let cancel = cancel.clone();
|
||||
async move { cancel.cancelled().await }
|
||||
})
|
||||
.await?;
|
||||
anyhow::Ok(())
|
||||
}
|
||||
|
||||
struct NoOpHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<IO> postgres_backend::Handler<IO> for NoOpHandler
|
||||
where
|
||||
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
|
||||
{
|
||||
fn startup(
|
||||
&mut self,
|
||||
_pgb: &mut PostgresBackend<IO>,
|
||||
_sm: &pq_proto::FeStartupPacket,
|
||||
) -> Result<(), QueryError> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_query(
|
||||
&mut self,
|
||||
pgb: &mut PostgresBackend<IO>,
|
||||
query_string: &str,
|
||||
) -> Result<(), QueryError> {
|
||||
if !query_string.starts_with("pagestream ") {
|
||||
return Err(QueryError::Other(anyhow::anyhow!("not a pagestream query")));
|
||||
}
|
||||
|
||||
// switch client to COPYBOTH
|
||||
pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
|
||||
pgb.flush().await?;
|
||||
|
||||
loop {
|
||||
let msg = pgb.read_message().await?;
|
||||
|
||||
let copy_data_bytes = match msg {
|
||||
Some(FeMessage::CopyData(bytes)) => bytes,
|
||||
Some(FeMessage::Terminate) => return Ok(()),
|
||||
Some(m) => {
|
||||
return Err(QueryError::Other(anyhow::anyhow!(
|
||||
"unexpected message: {m:?} during COPY"
|
||||
)));
|
||||
}
|
||||
None => return Ok(()), // client disconnected
|
||||
};
|
||||
|
||||
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
|
||||
|
||||
let response = match neon_fe_msg {
|
||||
PagestreamFeMessage::NoOp => Ok(PagestreamBeMessage::NoOp),
|
||||
x => Err(QueryError::Other(anyhow::anyhow!(
|
||||
"this server only supports no-op: {x:?}"
|
||||
))),
|
||||
};
|
||||
|
||||
let response = response.unwrap_or_else(|e| {
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
})
|
||||
});
|
||||
|
||||
pgb.write_message_noflush(&BeMessage::CopyData(&response.serialize()))?;
|
||||
pgb.flush().await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -681,6 +681,45 @@ async fn tenant_ignore_handler(
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn tenant_duplicate_handler(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let src_tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
|
||||
let request_data: TenantCreateRequest = json_request(&mut request).await?;
|
||||
let new_tenant_id = request_data.new_tenant_id;
|
||||
check_permission(&request, None)?;
|
||||
|
||||
let _timer = STORAGE_TIME_GLOBAL
|
||||
.get_metric_with_label_values(&[StorageTimeOperation::DuplicateTenant.into()])
|
||||
.expect("bug")
|
||||
.start_timer();
|
||||
|
||||
let tenant_conf =
|
||||
TenantConfOpt::try_from(&request_data.config).map_err(ApiError::BadRequest)?;
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
let generation = get_request_generation(state, request_data.generation)?;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
|
||||
|
||||
mgr::duplicate_tenant(
|
||||
state.conf,
|
||||
tenant_conf,
|
||||
src_tenant_id,
|
||||
new_tenant_id,
|
||||
generation,
|
||||
state.tenant_resources(),
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!("tenant_duplicate", %src_tenant_id, tenant_id = %new_tenant_id))
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::CREATED, TenantCreateResponse(new_tenant_id))
|
||||
}
|
||||
|
||||
async fn tenant_list_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
@@ -1728,6 +1767,9 @@ pub fn make_router(
|
||||
.post("/v1/tenant/:tenant_id/ignore", |r| {
|
||||
api_handler(r, tenant_ignore_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/duplicate", |r| {
|
||||
api_handler(r, tenant_duplicate_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
|
||||
api_handler(r, timeline_detail_handler)
|
||||
})
|
||||
|
||||
@@ -51,6 +51,9 @@ pub enum StorageTimeOperation {
|
||||
|
||||
#[strum(serialize = "create tenant")]
|
||||
CreateTenant,
|
||||
|
||||
#[strum(serialize = "duplicate tenant")]
|
||||
DuplicateTenant,
|
||||
}
|
||||
|
||||
pub static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
|
||||
@@ -757,6 +760,7 @@ pub enum SmgrQueryType {
|
||||
GetRelSize,
|
||||
GetPageAtLsn,
|
||||
GetDbSize,
|
||||
NoOp,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -488,6 +488,11 @@ impl PageServerHandler {
|
||||
span,
|
||||
)
|
||||
}
|
||||
PagestreamFeMessage::NoOp => {
|
||||
let _timer = metrics.start_timer(metrics::SmgrQueryType::NoOp);
|
||||
let span = tracing::info_span!("no_op");
|
||||
(Ok(PagestreamBeMessage::NoOp), span)
|
||||
}
|
||||
};
|
||||
|
||||
let response = response.unwrap_or_else(|e| {
|
||||
|
||||
@@ -1694,6 +1694,7 @@ const AUX_FILES_KEY: Key = Key {
|
||||
// Reverse mappings for a few Keys.
|
||||
// These are needed by WAL redo manager.
|
||||
|
||||
/// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
|
||||
pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
|
||||
Ok(match key.field1 {
|
||||
0x00 => (
|
||||
@@ -1709,7 +1710,8 @@ pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
|
||||
})
|
||||
}
|
||||
|
||||
fn is_rel_block_key(key: Key) -> bool {
|
||||
/// See [[key_to_rel_block]].
|
||||
pub fn is_rel_block_key(key: Key) -> bool {
|
||||
key.field1 == 0x00 && key.field4 != 0
|
||||
}
|
||||
|
||||
|
||||
@@ -4,8 +4,10 @@
|
||||
use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
|
||||
use rand::{distributions::Alphanumeric, Rng};
|
||||
use std::collections::{hash_map, HashMap};
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use tokio::fs;
|
||||
use tokio::io::AsyncSeekExt;
|
||||
|
||||
use anyhow::Context;
|
||||
use once_cell::sync::Lazy;
|
||||
@@ -26,9 +28,11 @@ use crate::deletion_queue::DeletionQueueClient;
|
||||
use crate::task_mgr::{self, TaskKind};
|
||||
use crate::tenant::config::{AttachmentMode, LocationConf, LocationMode, TenantConfOpt};
|
||||
use crate::tenant::delete::DeleteTenantFlow;
|
||||
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
|
||||
use crate::tenant::storage_layer::{DeltaLayer, ImageLayer, LayerFileName};
|
||||
use crate::tenant::{
|
||||
create_tenant_files, AttachMarkerMode, AttachedTenantConf, CreateTenantFilesMode, Tenant,
|
||||
TenantState,
|
||||
create_tenant_files, remote_timeline_client, AttachMarkerMode, AttachedTenantConf,
|
||||
CreateTenantFilesMode, IndexPart, Tenant, TenantState,
|
||||
};
|
||||
use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, TEMP_FILE_SUFFIX};
|
||||
|
||||
@@ -695,6 +699,159 @@ pub(crate) async fn create_tenant(
|
||||
}).await
|
||||
}
|
||||
|
||||
pub(crate) async fn duplicate_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: TenantConfOpt,
|
||||
src_tenant_id: TenantId,
|
||||
new_tenant_id: TenantId,
|
||||
generation: Generation,
|
||||
resources: TenantSharedResources,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), TenantMapInsertError> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
|
||||
// TODO: would be nice to use tenant_map_insert here, but, we're not ready to create a Tenant object yet
|
||||
let tempdir = path_with_suffix_extension(
|
||||
conf.tenants_path().join(&new_tenant_id.to_string()),
|
||||
&format!("duplication.{TEMP_FILE_SUFFIX}"),
|
||||
);
|
||||
tokio::fs::remove_dir_all(&tempdir)
|
||||
.await
|
||||
.or_else(|e| match e.kind() {
|
||||
std::io::ErrorKind::NotFound => Ok(()),
|
||||
_ => Err(e),
|
||||
})
|
||||
.context("pre-run clean up tempdir")?;
|
||||
|
||||
tokio::fs::create_dir(&tempdir)
|
||||
.await
|
||||
.context("create tempdir")?;
|
||||
|
||||
// Copy the tenant's data in S3
|
||||
let remote_storage = resources
|
||||
.remote_storage
|
||||
.as_ref()
|
||||
.context("only works with remote storage")?;
|
||||
|
||||
let remote_src_timelines =
|
||||
remote_timeline_client::list_remote_timelines(remote_storage, src_tenant_id)
|
||||
.await
|
||||
.context("list src timelines")?;
|
||||
|
||||
info!(?remote_src_timelines, "got src timelines");
|
||||
|
||||
for timeline_id in remote_src_timelines {
|
||||
async {
|
||||
let tempdir = tempdir.join(&timeline_id.to_string());
|
||||
|
||||
tokio::fs::create_dir(&tempdir)
|
||||
.await
|
||||
.context("create tempdir for timeline")?;
|
||||
|
||||
let remote_src_tl =
|
||||
remote_timeline_client::remote_timeline_path(&src_tenant_id, &timeline_id);
|
||||
let remote_dst_tl =
|
||||
remote_timeline_client::remote_timeline_path(&new_tenant_id, &timeline_id);
|
||||
|
||||
let object_names = remote_storage
|
||||
.list_prefixes(Some(&remote_src_tl))
|
||||
.await
|
||||
.context("list timeline remote prefix")?;
|
||||
|
||||
for name in object_names {
|
||||
async {
|
||||
let name = name.object_name().context(
|
||||
"list_prefixes return values should always have object_name()=Some",
|
||||
)?;
|
||||
let remote_src_obj = remote_src_tl.join(name);
|
||||
let remote_dst_obj = remote_dst_tl.join(name);
|
||||
|
||||
let tmp_obj_filepath = tempdir.join(name);
|
||||
let mut tmp_obj_file = tokio::fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(&tmp_obj_filepath)
|
||||
.await
|
||||
.context("create temp file")?;
|
||||
let mut tmp_dl = remote_storage
|
||||
.download(&remote_src_obj)
|
||||
.await
|
||||
.context("start download")?;
|
||||
let tmp_obj_size =
|
||||
tokio::io::copy(&mut tmp_dl.download_stream, &mut tmp_obj_file)
|
||||
.await
|
||||
.context("do the download")?;
|
||||
|
||||
if name == IndexPart::FILE_NAME {
|
||||
// needs no patching
|
||||
} else {
|
||||
let name = LayerFileName::from_str(name).map_err(|e: String| {
|
||||
anyhow::anyhow!("unknown key in timeline s3 prefix: {name:?}: {e}")
|
||||
})?;
|
||||
match name {
|
||||
LayerFileName::Image(_) => {
|
||||
ImageLayer::rewrite_tenant_timeline(
|
||||
&tmp_obj_filepath,
|
||||
new_tenant_id,
|
||||
timeline_id, /* leave as is */
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.context("rewrite tenant timeline")?;
|
||||
}
|
||||
LayerFileName::Delta(_) => {
|
||||
DeltaLayer::rewrite_tenant_timeline(
|
||||
&tmp_obj_filepath,
|
||||
new_tenant_id,
|
||||
timeline_id, /* leave as is */
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.context("rewrite tenant timeline")?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!(?remote_dst_obj, "uploading");
|
||||
|
||||
tmp_obj_file
|
||||
.seek(std::io::SeekFrom::Start(0))
|
||||
.await
|
||||
.context("seek tmp file to beginning for upload")?;
|
||||
remote_storage
|
||||
.upload(
|
||||
tmp_obj_file,
|
||||
tmp_obj_size as usize,
|
||||
&remote_dst_obj,
|
||||
tmp_dl.metadata,
|
||||
)
|
||||
.await
|
||||
.context("upload modified")?;
|
||||
|
||||
tokio::fs::remove_file(tmp_obj_filepath)
|
||||
.await
|
||||
.context("remove temp file")?;
|
||||
|
||||
anyhow::Ok(())
|
||||
}
|
||||
.instrument(info_span!("copy object", object_name=?name))
|
||||
.await
|
||||
.context("copy object")?;
|
||||
}
|
||||
anyhow::Ok(())
|
||||
}
|
||||
.instrument(info_span!("copy_timeline", timeline_id=%timeline_id))
|
||||
.await?;
|
||||
}
|
||||
|
||||
tokio::fs::remove_dir_all(&tempdir)
|
||||
.await
|
||||
.context("post-run clean up tempdir")?;
|
||||
|
||||
attach_tenant(conf, new_tenant_id, generation, tenant_conf, resources, ctx).await
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum SetNewTenantConfigError {
|
||||
#[error(transparent)]
|
||||
|
||||
@@ -844,6 +844,49 @@ impl Drop for DeltaLayerWriter {
|
||||
}
|
||||
}
|
||||
|
||||
impl DeltaLayer {
|
||||
/// Assume the file at `path` is corrupt if this function returns with an error.
|
||||
pub(crate) async fn rewrite_tenant_timeline(
|
||||
path: &Utf8Path,
|
||||
new_tenant: TenantId,
|
||||
new_timeline: TimelineId,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
let file = VirtualFile::open_with_options(
|
||||
path,
|
||||
&*std::fs::OpenOptions::new().read(true).write(true),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to open file '{}'", path))?;
|
||||
let file = FileBlockReader::new(file);
|
||||
let summary_blk = file.read_blk(0, ctx).await?;
|
||||
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
|
||||
let mut file = file.file;
|
||||
if actual_summary.magic != DELTA_FILE_MAGIC {
|
||||
bail!("File '{}' is not a delta layer", path);
|
||||
}
|
||||
let new_summary = Summary {
|
||||
tenant_id: new_tenant,
|
||||
timeline_id: new_timeline,
|
||||
..actual_summary
|
||||
};
|
||||
|
||||
let mut buf = smallvec::SmallVec::<[u8; PAGE_SZ]>::new();
|
||||
Summary::ser_into(&new_summary, &mut buf)?;
|
||||
if buf.spilled() {
|
||||
// The code in ImageLayerWriterInner just warn!()s for this.
|
||||
// It should probably error out as well.
|
||||
anyhow::bail!(
|
||||
"Used more than one page size for summary buffer: {}",
|
||||
buf.len()
|
||||
);
|
||||
}
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
file.write_all(&buf).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl DeltaLayerInner {
|
||||
pub(super) async fn load(
|
||||
path: &Utf8Path,
|
||||
|
||||
@@ -436,6 +436,49 @@ impl ImageLayer {
|
||||
}
|
||||
}
|
||||
|
||||
impl ImageLayer {
|
||||
/// Assume the file at `path` is corrupt if this function returns with an error.
|
||||
pub(crate) async fn rewrite_tenant_timeline(
|
||||
path: &Utf8Path,
|
||||
new_tenant: TenantId,
|
||||
new_timeline: TimelineId,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
let file = VirtualFile::open_with_options(
|
||||
path,
|
||||
&*std::fs::OpenOptions::new().read(true).write(true),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to open file '{}'", path))?;
|
||||
let file = FileBlockReader::new(file);
|
||||
let summary_blk = file.read_blk(0, ctx).await?;
|
||||
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
|
||||
let mut file = file.file;
|
||||
if actual_summary.magic != IMAGE_FILE_MAGIC {
|
||||
bail!("File '{}' is not a delta layer", path);
|
||||
}
|
||||
let new_summary = Summary {
|
||||
tenant_id: new_tenant,
|
||||
timeline_id: new_timeline,
|
||||
..actual_summary
|
||||
};
|
||||
|
||||
let mut buf = smallvec::SmallVec::<[u8; PAGE_SZ]>::new();
|
||||
Summary::ser_into(&new_summary, &mut buf)?;
|
||||
if buf.spilled() {
|
||||
// The code in ImageLayerWriterInner just warn!()s for this.
|
||||
// It should probably error out as well.
|
||||
anyhow::bail!(
|
||||
"Used more than one page size for summary buffer: {}",
|
||||
buf.len()
|
||||
);
|
||||
}
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
file.write_all(&buf).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl ImageLayerInner {
|
||||
pub(super) async fn load(
|
||||
path: &Utf8Path,
|
||||
|
||||
@@ -18,7 +18,8 @@ use std::fs::{self, File, OpenOptions};
|
||||
use std::io::{Error, ErrorKind, Seek, SeekFrom};
|
||||
use std::os::unix::fs::FileExt;
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use std::sync::{RwLock, RwLockWriteGuard};
|
||||
use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
|
||||
use tokio::time::Instant;
|
||||
|
||||
///
|
||||
/// A virtual file descriptor. You can use this just like std::fs::File, but internally
|
||||
@@ -110,7 +111,7 @@ impl OpenFiles {
|
||||
///
|
||||
/// On return, we hold a lock on the slot, and its 'tag' has been updated
|
||||
/// recently_used has been set. It's all ready for reuse.
|
||||
fn find_victim_slot(&self) -> (SlotHandle, RwLockWriteGuard<SlotInner>) {
|
||||
async fn find_victim_slot(&self) -> (SlotHandle, RwLockWriteGuard<SlotInner>) {
|
||||
//
|
||||
// Run the clock algorithm to find a slot to replace.
|
||||
//
|
||||
@@ -142,7 +143,7 @@ impl OpenFiles {
|
||||
}
|
||||
retries += 1;
|
||||
} else {
|
||||
slot_guard = slot.inner.write().unwrap();
|
||||
slot_guard = slot.inner.write().await;
|
||||
index = next;
|
||||
break;
|
||||
}
|
||||
@@ -153,7 +154,7 @@ impl OpenFiles {
|
||||
// old file.
|
||||
//
|
||||
if let Some(old_file) = slot_guard.file.take() {
|
||||
// the normal path of dropping VirtualFile uses "close", use "close-by-replace" here to
|
||||
// the normal path of dropping VirtualFile uses `Close`, use `CloseByReplace` here to
|
||||
// distinguish the two.
|
||||
STORAGE_IO_TIME_METRIC
|
||||
.get(StorageIoOperation::CloseByReplace)
|
||||
@@ -208,6 +209,29 @@ impl CrashsafeOverwriteError {
|
||||
}
|
||||
}
|
||||
|
||||
/// Observe duration for the given storage I/O operation
|
||||
///
|
||||
/// Unlike `observe_closure_duration`, this supports async,
|
||||
/// where "support" means that we measure wall clock time.
|
||||
macro_rules! observe_duration {
|
||||
($op:expr, $($body:tt)*) => {{
|
||||
let instant = Instant::now();
|
||||
let result = $($body)*;
|
||||
let elapsed = instant.elapsed().as_secs_f64();
|
||||
STORAGE_IO_TIME_METRIC
|
||||
.get($op)
|
||||
.observe(elapsed);
|
||||
result
|
||||
}}
|
||||
}
|
||||
|
||||
macro_rules! with_file {
|
||||
($this:expr, $op:expr, | $ident:ident | $($body:tt)*) => {{
|
||||
let $ident = $this.lock_file().await?;
|
||||
observe_duration!($op, $($body)*)
|
||||
}};
|
||||
}
|
||||
|
||||
impl VirtualFile {
|
||||
/// Open a file in read-only mode. Like File::open.
|
||||
pub async fn open(path: &Utf8Path) -> Result<VirtualFile, std::io::Error> {
|
||||
@@ -244,11 +268,9 @@ impl VirtualFile {
|
||||
tenant_id = "*".to_string();
|
||||
timeline_id = "*".to_string();
|
||||
}
|
||||
let (handle, mut slot_guard) = get_open_files().find_victim_slot();
|
||||
let (handle, mut slot_guard) = get_open_files().find_victim_slot().await;
|
||||
|
||||
let file = STORAGE_IO_TIME_METRIC
|
||||
.get(StorageIoOperation::Open)
|
||||
.observe_closure_duration(|| open_options.open(path))?;
|
||||
let file = observe_duration!(StorageIoOperation::Open, open_options.open(path))?;
|
||||
|
||||
// Strip all options other than read and write.
|
||||
//
|
||||
@@ -331,22 +353,24 @@ impl VirtualFile {
|
||||
|
||||
/// Call File::sync_all() on the underlying File.
|
||||
pub async fn sync_all(&self) -> Result<(), Error> {
|
||||
self.with_file(StorageIoOperation::Fsync, |file| file.sync_all())
|
||||
.await?
|
||||
with_file!(self, StorageIoOperation::Fsync, |file| file
|
||||
.as_ref()
|
||||
.sync_all())
|
||||
}
|
||||
|
||||
pub async fn metadata(&self) -> Result<fs::Metadata, Error> {
|
||||
self.with_file(StorageIoOperation::Metadata, |file| file.metadata())
|
||||
.await?
|
||||
with_file!(self, StorageIoOperation::Metadata, |file| file
|
||||
.as_ref()
|
||||
.metadata())
|
||||
}
|
||||
|
||||
/// Helper function that looks up the underlying File for this VirtualFile,
|
||||
/// opening it and evicting some other File if necessary. It calls 'func'
|
||||
/// with the physical File.
|
||||
async fn with_file<F, R>(&self, op: StorageIoOperation, mut func: F) -> Result<R, Error>
|
||||
where
|
||||
F: FnMut(&File) -> R,
|
||||
{
|
||||
/// Helper function internal to `VirtualFile` that looks up the underlying File,
|
||||
/// opens it and evicts some other File if necessary. The passed parameter is
|
||||
/// assumed to be a function available for the physical `File`.
|
||||
///
|
||||
/// We are doing it via a macro as Rust doesn't support async closures that
|
||||
/// take on parameters with lifetimes.
|
||||
async fn lock_file(&self) -> Result<FileGuard<'_>, Error> {
|
||||
let open_files = get_open_files();
|
||||
|
||||
let mut handle_guard = {
|
||||
@@ -356,27 +380,23 @@ impl VirtualFile {
|
||||
// We only need to hold the handle lock while we read the current handle. If
|
||||
// another thread closes the file and recycles the slot for a different file,
|
||||
// we will notice that the handle we read is no longer valid and retry.
|
||||
let mut handle = *self.handle.read().unwrap();
|
||||
let mut handle = *self.handle.read().await;
|
||||
loop {
|
||||
// Check if the slot contains our File
|
||||
{
|
||||
let slot = &open_files.slots[handle.index];
|
||||
let slot_guard = slot.inner.read().unwrap();
|
||||
if slot_guard.tag == handle.tag {
|
||||
if let Some(file) = &slot_guard.file {
|
||||
// Found a cached file descriptor.
|
||||
slot.recently_used.store(true, Ordering::Relaxed);
|
||||
return Ok(STORAGE_IO_TIME_METRIC
|
||||
.get(op)
|
||||
.observe_closure_duration(|| func(file)));
|
||||
}
|
||||
let slot_guard = slot.inner.read().await;
|
||||
if slot_guard.tag == handle.tag && slot_guard.file.is_some() {
|
||||
// Found a cached file descriptor.
|
||||
slot.recently_used.store(true, Ordering::Relaxed);
|
||||
return Ok(FileGuard { slot_guard });
|
||||
}
|
||||
}
|
||||
|
||||
// The slot didn't contain our File. We will have to open it ourselves,
|
||||
// but before that, grab a write lock on handle in the VirtualFile, so
|
||||
// that no other thread will try to concurrently open the same file.
|
||||
let handle_guard = self.handle.write().unwrap();
|
||||
let handle_guard = self.handle.write().await;
|
||||
|
||||
// If another thread changed the handle while we were not holding the lock,
|
||||
// then the handle might now be valid again. Loop back to retry.
|
||||
@@ -390,17 +410,10 @@ impl VirtualFile {
|
||||
|
||||
// We need to open the file ourselves. The handle in the VirtualFile is
|
||||
// now locked in write-mode. Find a free slot to put it in.
|
||||
let (handle, mut slot_guard) = open_files.find_victim_slot();
|
||||
let (handle, mut slot_guard) = open_files.find_victim_slot().await;
|
||||
|
||||
// Open the physical file
|
||||
let file = STORAGE_IO_TIME_METRIC
|
||||
.get(StorageIoOperation::Open)
|
||||
.observe_closure_duration(|| self.open_options.open(&self.path))?;
|
||||
|
||||
// Perform the requested operation on it
|
||||
let result = STORAGE_IO_TIME_METRIC
|
||||
.get(op)
|
||||
.observe_closure_duration(|| func(&file));
|
||||
let file = observe_duration!(StorageIoOperation::Open, self.open_options.open(&self.path))?;
|
||||
|
||||
// Store the File in the slot and update the handle in the VirtualFile
|
||||
// to point to it.
|
||||
@@ -408,7 +421,9 @@ impl VirtualFile {
|
||||
|
||||
*handle_guard = handle;
|
||||
|
||||
Ok(result)
|
||||
return Ok(FileGuard {
|
||||
slot_guard: slot_guard.downgrade(),
|
||||
});
|
||||
}
|
||||
|
||||
pub fn remove(self) {
|
||||
@@ -423,11 +438,9 @@ impl VirtualFile {
|
||||
self.pos = offset;
|
||||
}
|
||||
SeekFrom::End(offset) => {
|
||||
self.pos = self
|
||||
.with_file(StorageIoOperation::Seek, |mut file| {
|
||||
file.seek(SeekFrom::End(offset))
|
||||
})
|
||||
.await??
|
||||
self.pos = with_file!(self, StorageIoOperation::Seek, |file| file
|
||||
.as_ref()
|
||||
.seek(SeekFrom::End(offset)))?
|
||||
}
|
||||
SeekFrom::Current(offset) => {
|
||||
let pos = self.pos as i128 + offset as i128;
|
||||
@@ -515,9 +528,9 @@ impl VirtualFile {
|
||||
}
|
||||
|
||||
pub async fn read_at(&self, buf: &mut [u8], offset: u64) -> Result<usize, Error> {
|
||||
let result = self
|
||||
.with_file(StorageIoOperation::Read, |file| file.read_at(buf, offset))
|
||||
.await?;
|
||||
let result = with_file!(self, StorageIoOperation::Read, |file| file
|
||||
.as_ref()
|
||||
.read_at(buf, offset));
|
||||
if let Ok(size) = result {
|
||||
STORAGE_IO_SIZE
|
||||
.with_label_values(&["read", &self.tenant_id, &self.timeline_id])
|
||||
@@ -527,9 +540,9 @@ impl VirtualFile {
|
||||
}
|
||||
|
||||
async fn write_at(&self, buf: &[u8], offset: u64) -> Result<usize, Error> {
|
||||
let result = self
|
||||
.with_file(StorageIoOperation::Write, |file| file.write_at(buf, offset))
|
||||
.await?;
|
||||
let result = with_file!(self, StorageIoOperation::Write, |file| file
|
||||
.as_ref()
|
||||
.write_at(buf, offset));
|
||||
if let Ok(size) = result {
|
||||
STORAGE_IO_SIZE
|
||||
.with_label_values(&["write", &self.tenant_id, &self.timeline_id])
|
||||
@@ -539,6 +552,18 @@ impl VirtualFile {
|
||||
}
|
||||
}
|
||||
|
||||
struct FileGuard<'a> {
|
||||
slot_guard: RwLockReadGuard<'a, SlotInner>,
|
||||
}
|
||||
|
||||
impl<'a> AsRef<File> for FileGuard<'a> {
|
||||
fn as_ref(&self) -> &File {
|
||||
// This unwrap is safe because we only create `FileGuard`s
|
||||
// if we know that the file is Some.
|
||||
self.slot_guard.file.as_ref().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl VirtualFile {
|
||||
pub(crate) async fn read_blk(
|
||||
@@ -571,20 +596,39 @@ impl VirtualFile {
|
||||
impl Drop for VirtualFile {
|
||||
/// If a VirtualFile is dropped, close the underlying file if it was open.
|
||||
fn drop(&mut self) {
|
||||
let handle = self.handle.get_mut().unwrap();
|
||||
let handle = self.handle.get_mut();
|
||||
|
||||
// We could check with a read-lock first, to avoid waiting on an
|
||||
// unrelated I/O.
|
||||
let slot = &get_open_files().slots[handle.index];
|
||||
let mut slot_guard = slot.inner.write().unwrap();
|
||||
if slot_guard.tag == handle.tag {
|
||||
slot.recently_used.store(false, Ordering::Relaxed);
|
||||
// there is also operation "close-by-replace" for closes done on eviction for
|
||||
// comparison.
|
||||
STORAGE_IO_TIME_METRIC
|
||||
.get(StorageIoOperation::Close)
|
||||
.observe_closure_duration(|| drop(slot_guard.file.take()));
|
||||
fn clean_slot(slot: &Slot, mut slot_guard: RwLockWriteGuard<'_, SlotInner>, tag: u64) {
|
||||
if slot_guard.tag == tag {
|
||||
slot.recently_used.store(false, Ordering::Relaxed);
|
||||
// there is also the `CloseByReplace` operation for closes done on eviction for
|
||||
// comparison.
|
||||
STORAGE_IO_TIME_METRIC
|
||||
.get(StorageIoOperation::Close)
|
||||
.observe_closure_duration(|| drop(slot_guard.file.take()));
|
||||
}
|
||||
}
|
||||
|
||||
// We don't have async drop so we cannot directly await the lock here.
|
||||
// Instead, first do a best-effort attempt at closing the underlying
|
||||
// file descriptor by using `try_write`, and if that fails, spawn
|
||||
// a tokio task to do it asynchronously: we just want it to be
|
||||
// cleaned up eventually.
|
||||
// Most of the time, the `try_lock` should succeed though,
|
||||
// as we have `&mut self` access. In other words, if the slot
|
||||
// is still occupied by our file, there should be no access from
|
||||
// other I/O operations; the only other possible place to lock
|
||||
// the slot is the lock algorithm looking for free slots.
|
||||
let slot = &get_open_files().slots[handle.index];
|
||||
if let Ok(slot_guard) = slot.inner.try_write() {
|
||||
clean_slot(slot, slot_guard, handle.tag);
|
||||
} else {
|
||||
let tag = handle.tag;
|
||||
tokio::spawn(async move {
|
||||
let slot_guard = slot.inner.write().await;
|
||||
clean_slot(slot, slot_guard, tag);
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
43
test_runner/duplicate_tenant.py
Normal file
43
test_runner/duplicate_tenant.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# Usage from top of repo:
|
||||
# poetry run python3 test_runner/duplicate_tenant.py b97965931096047b2d54958756baee7b 10
|
||||
from queue import Queue
|
||||
import sys
|
||||
import threading
|
||||
|
||||
import requests
|
||||
from fixtures.pageserver.http import PageserverHttpClient
|
||||
from fixtures.types import TenantId
|
||||
|
||||
initial_tenant = sys.argv[1]
|
||||
ncopies = int(sys.argv[2])
|
||||
numthreads = int(sys.argv[3])
|
||||
|
||||
|
||||
# class DuckTypedNeonEnv:
|
||||
# pass
|
||||
|
||||
|
||||
# cli = NeonCli(DuckTypedNeonEnv())
|
||||
|
||||
q = Queue()
|
||||
for i in range(0, ncopies):
|
||||
q.put(i)
|
||||
|
||||
for i in range(0, numthreads):
|
||||
q.put(None)
|
||||
|
||||
|
||||
def create():
|
||||
while True:
|
||||
if q.get() == None:
|
||||
break
|
||||
new_tenant = TenantId.generate()
|
||||
res = requests.post(
|
||||
f"http://localhost:9898/v1/tenant/{initial_tenant}/duplicate",
|
||||
json={"new_tenant_id": str(new_tenant)},
|
||||
)
|
||||
res.raise_for_status()
|
||||
|
||||
|
||||
for i in range(0, numthreads):
|
||||
threading.Thread(target=create).start()
|
||||
@@ -215,6 +215,25 @@ class PageserverHttpClient(requests.Session):
|
||||
assert isinstance(new_tenant_id, str)
|
||||
return TenantId(new_tenant_id)
|
||||
|
||||
def tenant_duplicate(
|
||||
self, src_tenant_id: TenantId, new_tenant_id: TenantId, conf: Optional[Dict[str, Any]] = None
|
||||
) -> TenantId:
|
||||
if conf is not None:
|
||||
assert "new_tenant_id" not in conf.keys()
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/tenant/{src_tenant_id}/duplicate",
|
||||
json={
|
||||
"new_tenant_id": str(new_tenant_id),
|
||||
**(conf or {}),
|
||||
},
|
||||
)
|
||||
self.verbose_error(res)
|
||||
if res.status_code == 409:
|
||||
raise Exception(f"could not create tenant: already exists for id {new_tenant_id}")
|
||||
new_tenant_id = res.json()
|
||||
assert isinstance(new_tenant_id, str)
|
||||
return TenantId(new_tenant_id)
|
||||
|
||||
def tenant_attach(
|
||||
self,
|
||||
tenant_id: TenantId,
|
||||
|
||||
54
test_runner/regress/test_tenant_duplicate.py
Normal file
54
test_runner/regress/test_tenant_duplicate.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import time
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnvBuilder,
|
||||
last_flush_lsn_upload,
|
||||
)
|
||||
from fixtures.remote_storage import (
|
||||
RemoteStorageKind,
|
||||
)
|
||||
from fixtures.types import TenantId
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
def test_tenant_duplicate(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
):
|
||||
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
with env.endpoints.create_start("main", tenant_id=env.initial_tenant) as ep_main:
|
||||
ep_main.safe_psql("CREATE TABLE foo (i int);")
|
||||
ep_main.safe_psql("INSERT INTO foo VALUES (1), (2), (3);")
|
||||
last_flush_lsn = last_flush_lsn_upload(
|
||||
env, ep_main, env.initial_tenant, env.initial_timeline
|
||||
)
|
||||
|
||||
new_tenant_id = TenantId.generate()
|
||||
# timeline id remains unchanged with tenant_duplicate
|
||||
# TODO: implement a remapping scheme so timeline ids remain globally unique
|
||||
new_timeline_id = env.initial_timeline
|
||||
|
||||
log.info(f"Duplicate tenant/timeline will be: {new_tenant_id}/{new_timeline_id}")
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
ps_http.tenant_duplicate(env.initial_tenant, new_tenant_id)
|
||||
|
||||
ps_http.tenant_delete(env.initial_tenant)
|
||||
|
||||
env.neon_cli.map_branch("duplicate", new_tenant_id, new_timeline_id)
|
||||
|
||||
# start read-only replicate and validate
|
||||
with env.endpoints.create_start(
|
||||
"duplicate", tenant_id=new_tenant_id, lsn=last_flush_lsn
|
||||
) as ep_dup:
|
||||
with ep_dup.connect() as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT * FROM foo ORDER BY i;")
|
||||
cur.fetchall() == [(1,), (2,), (3,)]
|
||||
|
||||
# ensure restarting PS works
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
|
||||
@@ -74,6 +74,7 @@ fn analyze_trace<R: std::io::Read>(mut reader: R) {
|
||||
prev = Some(req);
|
||||
}
|
||||
PagestreamFeMessage::DbSize(_) => {}
|
||||
PagestreamFeMessage::NoOp => {},
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user