Compare commits

..

3 Commits

Author SHA1 Message Date
Conrad Ludgate
a65a5c372b storage broker 2024-01-18 14:43:29 +00:00
Conrad Ludgate
2cf85471f5 proxy 2024-01-18 14:11:35 +00:00
Conrad Ludgate
665f4ff4b5 move to hyper 1.0 mostly 2024-01-18 12:44:50 +00:00
67 changed files with 1106 additions and 1745 deletions

View File

@@ -1,2 +1,2 @@
[profile.default]
slow-timeout = { period = "20s", terminate-after = 3 }
slow-timeout = "1m"

499
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -51,7 +51,7 @@ async-trait = "0.1"
aws-config = { version = "1.0", default-features = false, features=["rustls"] }
aws-sdk-s3 = "1.0"
aws-smithy-async = { version = "1.0", default-features = false, features=["rt-tokio"] }
aws-smithy-types = "1.0"
aws-smithy-types = { version = "1.1.2", features = ["http-body-1-x"] }
aws-credential-types = "1.0"
axum = { version = "0.6.20", features = ["ws"] }
base64 = "0.13.0"
@@ -89,8 +89,12 @@ hostname = "0.3.1"
http-types = { version = "2", default-features = false }
humantime = "2.1"
humantime-serde = "1.1.1"
hyper = "0.14"
hyper-tungstenite = "0.11"
hyper = "1.0.0"
hyper-util = "0.1.0"
http = "1"
http-body = "1"
http-body-util = "0.1"
hyper-tungstenite = "0.13.0"
inotify = "0.10.2"
ipnet = "2.9.0"
itertools = "0.10"
@@ -113,7 +117,7 @@ parquet_derive = "49.0.0"
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
pin-project-lite = "0.2"
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
prost = "0.11"
prost = "0.12"
rand = "0.8"
redis = { version = "0.24.0", features = ["tokio-rustls-comp", "keep-alive"] }
regex = "1.10.2"
@@ -121,7 +125,7 @@ reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"
reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_19"] }
reqwest-middleware = "0.2.0"
reqwest-retry = "0.2.2"
routerify = "3"
routerify = { git = "https://github.com/conradludgate/routerify", branch = "hyper1" }
rpds = "0.13"
rustc-hash = "1.1.0"
rustls = "0.21"
@@ -149,7 +153,7 @@ tar = "0.4"
task-local-extensions = "0.1.4"
test-context = "0.1"
thiserror = "1.0"
tls-listener = { version = "0.7", features = ["rustls", "hyper-h1"] }
tls-listener = { version = "0.9", features = ["rustls", "tokio-net"] }
tokio = { version = "1.17", features = ["macros"] }
tokio-io-timeout = "1.2.0"
tokio-postgres-rustls = "0.10.0"
@@ -159,7 +163,7 @@ tokio-tar = "0.3"
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
toml = "0.7"
toml_edit = "0.19"
tonic = {version = "0.9", features = ["tls", "tls-roots"]}
tonic = {version = "0.10", features = ["tls", "tls-roots"]}
tracing = "0.1"
tracing-error = "0.2.0"
tracing-opentelemetry = "0.19.0"
@@ -211,7 +215,7 @@ criterion = "0.5.1"
rcgen = "0.11"
rstest = "0.18"
camino-tempfile = "1.0.2"
tonic-build = "0.9"
tonic-build = "0.10.2"
[patch.crates-io]

View File

@@ -883,10 +883,8 @@ FROM debian:bullseye-slim
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \
mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
mkdir /var/db/postgres/pgbouncer && \
chown -R postgres:postgres /var/db/postgres && \
chmod 0750 /var/db/postgres/compute && \
chmod 0750 /var/db/postgres/pgbouncer && \
echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig && \
# create folder for file cache
mkdir -p -m 777 /neon/cache

View File

@@ -12,7 +12,11 @@ cfg-if.workspace = true
clap.workspace = true
flate2.workspace = true
futures.workspace = true
hyper = { workspace = true, features = ["full"] }
hyper = { workspace = true, features = ["server"] }
hyper-util = { workspace = true, features = ["tokio", "server", "server-auto"] }
http = { workspace = true, features = [] }
http-body = { workspace = true, features = [] }
http-body-util = { workspace = true, features = [] }
nix.workspace = true
notify.workspace = true
num_cpus.workspace = true

View File

@@ -32,6 +32,8 @@
//! -S /var/db/postgres/specs/current.json \
//! -b /usr/local/bin/postgres \
//! -r http://pg-ext-s3-gateway \
//! --pgbouncer-connstr 'host=localhost port=6432 dbname=pgbouncer user=cloud_admin sslmode=disable'
//! --pgbouncer-ini-path /etc/pgbouncer.ini \
//! ```
//!
use std::collections::HashMap;
@@ -110,6 +112,9 @@ fn main() -> Result<()> {
let spec_json = matches.get_one::<String>("spec");
let spec_path = matches.get_one::<String>("spec-path");
let pgbouncer_connstr = matches.get_one::<String>("pgbouncer-connstr");
let pgbouncer_ini_path = matches.get_one::<String>("pgbouncer-ini-path");
// Extract OpenTelemetry context for the startup actions from the
// TRACEPARENT and TRACESTATE env variables, and attach it to the current
// tracing context.
@@ -220,6 +225,8 @@ fn main() -> Result<()> {
ext_remote_storage: ext_remote_storage.map(|s| s.to_string()),
ext_download_progress: RwLock::new(HashMap::new()),
build_tag,
pgbouncer_connstr: pgbouncer_connstr.map(|s| s.to_string()),
pgbouncer_ini_path: pgbouncer_ini_path.map(|s| s.to_string()),
};
let compute = Arc::new(compute_node);
@@ -516,6 +523,23 @@ fn cli() -> clap::Command {
)
.value_name("FILECACHE_CONNSTR"),
)
.arg(
Arg::new("pgbouncer-connstr")
.long("pgbouncer-connstr")
.default_value(
"host=localhost port=6432 dbname=pgbouncer user=cloud_admin sslmode=disable",
)
.value_name("PGBOUNCER_CONNSTR"),
)
.arg(
Arg::new("pgbouncer-ini-path")
.long("pgbouncer-ini-path")
// Note: this doesn't match current path for pgbouncer.ini.
// Until we fix it, we need to pass the path explicitly
// or this will be effectively no-op.
.default_value("/etc/pgbouncer.ini")
.value_name("PGBOUNCER_INI_PATH"),
)
}
/// When compute_ctl is killed, send also termination signal to sync-safekeepers

View File

@@ -71,6 +71,10 @@ pub struct ComputeNode {
// key: ext_archive_name, value: started download time, download_completed?
pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
pub build_tag: String,
// connection string to pgbouncer to change settings
pub pgbouncer_connstr: Option<String>,
// path to pgbouncer.ini to change settings
pub pgbouncer_ini_path: Option<String>,
}
// store some metrics about download size that might impact startup time
@@ -765,8 +769,8 @@ impl ComputeNode {
pub fn reconfigure(&self) -> Result<()> {
let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;
if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {
info!("tuning pgbouncer");
if let Some(connstr) = &self.pgbouncer_connstr {
info!("tuning pgbouncer with connstr: {:?}", connstr);
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
@@ -775,9 +779,15 @@ impl ComputeNode {
// Spawn a thread to do the tuning,
// so that we don't block the main thread that starts Postgres.
let pgbouncer_settings = pgbouncer_settings.clone();
let pgbouncer_settings = spec.pgbouncer_settings.clone();
let connstr_clone = connstr.clone();
let pgbouncer_ini_path = self.pgbouncer_ini_path.clone();
let _handle = thread::spawn(move || {
let res = rt.block_on(tune_pgbouncer(pgbouncer_settings));
let res = rt.block_on(tune_pgbouncer(
pgbouncer_settings,
&connstr_clone,
pgbouncer_ini_path,
));
if let Err(err) = res {
error!("error while tuning pgbouncer: {err:?}");
}
@@ -842,8 +852,8 @@ impl ComputeNode {
);
// tune pgbouncer
if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings {
info!("tuning pgbouncer");
if let Some(connstr) = &self.pgbouncer_connstr {
info!("tuning pgbouncer with connstr: {:?}", connstr);
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
@@ -852,9 +862,15 @@ impl ComputeNode {
// Spawn a thread to do the tuning,
// so that we don't block the main thread that starts Postgres.
let pgbouncer_settings = pgbouncer_settings.clone();
let pgbouncer_settings = pspec.spec.pgbouncer_settings.clone();
let connstr_clone = connstr.clone();
let pgbouncer_ini_path = self.pgbouncer_ini_path.clone();
let _handle = thread::spawn(move || {
let res = rt.block_on(tune_pgbouncer(pgbouncer_settings));
let res = rt.block_on(tune_pgbouncer(
pgbouncer_settings,
&connstr_clone,
pgbouncer_ini_path,
));
if let Err(err) = res {
error!("error while tuning pgbouncer: {err:?}");
}

View File

@@ -6,14 +6,22 @@ use std::sync::Arc;
use std::thread;
use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
use bytes::Bytes;
use compute_api::requests::ConfigurationRequest;
use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
use anyhow::Result;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use http_body_util::BodyExt;
use http_body_util::Full;
use hyper::body::Incoming;
use hyper::service::service_fn;
use hyper::{Method, Request, Response, StatusCode};
use hyper_util::rt::TokioExecutor;
use hyper_util::rt::TokioIo;
use hyper_util::server::conn;
use num_cpus;
use serde_json;
use tokio::net::TcpListener;
use tokio::task;
use tracing::{error, info, warn};
use tracing_utils::http::OtelName;
@@ -36,7 +44,7 @@ fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
}
// Service function to handle all available routes.
async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body> {
async fn routes(req: Request<Incoming>, compute: &Arc<ComputeNode>) -> Response<Full<Bytes>> {
//
// NOTE: The URI path is currently included in traces. That's OK because
// it doesn't contain any variable parts or sensitive information. But
@@ -48,7 +56,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
info!("serving /status GET request");
let state = compute.state.lock().unwrap();
let status_response = status_response_from_state(&state);
Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
Response::new(Full::from(serde_json::to_string(&status_response).unwrap()))
}
// Startup metrics in JSON format. Keep /metrics reserved for a possible
@@ -56,7 +64,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
(&Method::GET, "/metrics.json") => {
info!("serving /metrics.json GET request");
let metrics = compute.state.lock().unwrap().metrics.clone();
Response::new(Body::from(serde_json::to_string(&metrics).unwrap()))
Response::new(Full::from(serde_json::to_string(&metrics).unwrap()))
}
// Collect Postgres current usage insights
@@ -66,11 +74,11 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
if status != ComputeStatus::Running {
let msg = format!("compute is not running, current status: {:?}", status);
error!(msg);
return Response::new(Body::from(msg));
return Response::new(Full::from(msg));
}
let insights = compute.collect_insights().await;
Response::new(Body::from(insights))
Response::new(Full::from(insights))
}
(&Method::POST, "/check_writability") => {
@@ -82,15 +90,15 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
status
);
error!(msg);
return Response::new(Body::from(msg));
return Response::new(Full::from(msg));
}
let res = crate::checker::check_writability(compute).await;
match res {
Ok(_) => Response::new(Body::from("true")),
Ok(_) => Response::new(Full::from("true")),
Err(e) => {
error!("check_writability failed: {}", e);
Response::new(Body::from(e.to_string()))
Response::new(Full::from(e.to_string()))
}
}
}
@@ -98,7 +106,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
(&Method::GET, "/info") => {
let num_cpus = num_cpus::get_physical();
info!("serving /info GET request. num_cpus: {}", num_cpus);
Response::new(Body::from(
Response::new(Full::from(
serde_json::json!({
"num_cpus": num_cpus,
})
@@ -115,7 +123,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
(&Method::POST, "/configure") => {
info!("serving /configure POST request");
match handle_configure_request(req, compute).await {
Ok(msg) => Response::new(Body::from(msg)),
Ok(msg) => Response::new(Full::from(msg)),
Err((msg, code)) => {
error!("error handling /configure request: {msg}");
render_json_error(&msg, code)
@@ -132,7 +140,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
// if no remote storage is configured
if compute.ext_remote_storage.is_none() {
info!("no extensions remote storage configured");
let mut resp = Response::new(Body::from("no remote storage configured"));
let mut resp = Response::new(Full::from("no remote storage configured"));
*resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
return resp;
}
@@ -143,7 +151,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
if params == "is_library=true" {
is_library = true;
} else {
let mut resp = Response::new(Body::from("Wrong request parameters"));
let mut resp = Response::new(Full::from("Wrong request parameters"));
*resp.status_mut() = StatusCode::BAD_REQUEST;
return resp;
}
@@ -165,7 +173,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
Some(r) => r,
None => {
info!("no remote extensions spec was provided");
let mut resp = Response::new(Body::from("no remote storage configured"));
let mut resp = Response::new(Full::from("no remote storage configured"));
*resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
return resp;
}
@@ -182,10 +190,10 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
match ext {
Ok((ext_name, ext_path)) => {
match compute.download_extension(ext_name, ext_path).await {
Ok(_) => Response::new(Body::from("OK")),
Ok(_) => Response::new(Full::from("OK")),
Err(e) => {
error!("extension download failed: {}", e);
let mut resp = Response::new(Body::from(e.to_string()));
let mut resp = Response::new(Full::from(e.to_string()));
*resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
resp
}
@@ -193,7 +201,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
}
Err(e) => {
warn!("extension download failed to find extension: {}", e);
let mut resp = Response::new(Body::from("failed to find file"));
let mut resp = Response::new(Full::from("failed to find file"));
*resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
resp
}
@@ -202,7 +210,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
// Return the `404 Not Found` for any other routes.
_ => {
let mut not_found = Response::new(Body::from("404 Not Found"));
let mut not_found = Response::new(Full::from("404 Not Found"));
*not_found.status_mut() = StatusCode::NOT_FOUND;
not_found
}
@@ -210,7 +218,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
}
async fn handle_configure_request(
req: Request<Body>,
req: Request<Incoming>,
compute: &Arc<ComputeNode>,
) -> Result<String, (String, StatusCode)> {
if !compute.live_config_allowed {
@@ -220,7 +228,7 @@ async fn handle_configure_request(
));
}
let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
let body_bytes = req.into_body().collect().await.unwrap().to_bytes();
let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
let spec = request.spec;
@@ -287,13 +295,13 @@ async fn handle_configure_request(
}
}
fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
fn render_json_error(e: &str, status: StatusCode) -> Response<Full<Bytes>> {
let error = GenericAPIError {
error: e.to_string(),
};
Response::builder()
.status(status)
.body(Body::from(serde_json::to_string(&error).unwrap()))
.body(Full::from(serde_json::to_string(&error).unwrap()))
.unwrap()
}
@@ -304,35 +312,43 @@ async fn serve(port: u16, state: Arc<ComputeNode>) {
// see e.g. https://github.com/rust-lang/rust/pull/34440
let addr = SocketAddr::new(IpAddr::from(Ipv6Addr::UNSPECIFIED), port);
let make_service = make_service_fn(move |_conn| {
let service = service_fn(move |req: Request<Incoming>| {
let state = state.clone();
async move {
Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
let state = state.clone();
async move {
Ok::<_, Infallible>(
// NOTE: We include the URI path in the string. It
// doesn't contain any variable parts or sensitive
// information in this API.
tracing_utils::http::tracing_handler(
req,
|req| routes(req, &state),
OtelName::UriPath,
)
.await,
)
}
}))
Ok::<_, Infallible>(
// NOTE: We include the URI path in the string. It
// doesn't contain any variable parts or sensitive
// information in this API.
tracing_utils::http::tracing_handler(
req,
|req| routes(req, &state),
OtelName::UriPath,
)
.await,
)
}
});
info!("starting HTTP server on {}", addr);
let server = Server::bind(&addr).serve(make_service);
// Run this server forever
if let Err(e) = server.await {
error!("server error: {}", e);
let listener = TcpListener::bind(addr).await.unwrap();
loop {
let (stream, _) = match listener.accept().await {
Ok(r) => r,
Err(e) => {
error!("server error: {}", e);
return;
}
};
let io = TokioIo::new(stream);
let service = service.clone();
tokio::task::spawn(async move {
let builder = conn::auto::Builder::new(TokioExecutor::new());
let res = builder.serve_connection(io, service).await;
if let Err(err) = res {
println!("Error serving connection: {:?}", err);
}
});
}
}

View File

@@ -366,7 +366,7 @@ pub fn create_pgdata(pgdata: &str) -> Result<()> {
}
/// Update pgbouncer.ini with provided options
fn update_pgbouncer_ini(
pub fn update_pgbouncer_ini(
pgbouncer_config: HashMap<String, String>,
pgbouncer_ini_path: &str,
) -> Result<()> {
@@ -375,10 +375,6 @@ fn update_pgbouncer_ini(
for (option_name, value) in pgbouncer_config.iter() {
section.insert(option_name, value);
debug!(
"Updating pgbouncer.ini with new values {}={}",
option_name, value
);
}
conf.write_to_file(pgbouncer_ini_path)?;
@@ -388,79 +384,48 @@ fn update_pgbouncer_ini(
/// Tune pgbouncer.
/// 1. Apply new config using pgbouncer admin console
/// 2. Add new values to pgbouncer.ini to preserve them after restart
pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result<()> {
let pgbouncer_connstr = if std::env::var_os("AUTOSCALING").is_some() {
// for VMs use pgbouncer specific way to connect to
// pgbouncer admin console without password
// when pgbouncer is running under the same user.
"host=/tmp port=6432 dbname=pgbouncer user=pgbouncer".to_string()
} else {
// for k8s use normal connection string with password
// to connect to pgbouncer admin console
let mut pgbouncer_connstr =
"host=localhost port=6432 dbname=pgbouncer user=postgres sslmode=disable".to_string();
if let Ok(pass) = std::env::var("PGBOUNCER_PASSWORD") {
pgbouncer_connstr.push_str(format!(" password={}", pass).as_str());
}
pgbouncer_connstr
};
info!(
"Connecting to pgbouncer with connection string: {}",
pgbouncer_connstr
);
// connect to pgbouncer, retrying several times
// because pgbouncer may not be ready yet
let mut retries = 3;
let client = loop {
match tokio_postgres::connect(&pgbouncer_connstr, NoTls).await {
Ok((client, connection)) => {
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {}", e);
}
});
break client;
pub async fn tune_pgbouncer(
pgbouncer_settings: Option<HashMap<String, String>>,
pgbouncer_connstr: &str,
pgbouncer_ini_path: Option<String>,
) -> Result<()> {
if let Some(pgbouncer_config) = pgbouncer_settings {
// Apply new config
let connect_result = tokio_postgres::connect(pgbouncer_connstr, NoTls).await;
let (client, connection) = connect_result.unwrap();
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {}", e);
}
Err(e) => {
if retries == 0 {
return Err(e.into());
}
error!("Failed to connect to pgbouncer: pgbouncer_connstr {}", e);
retries -= 1;
tokio::time::sleep(Duration::from_secs(1)).await;
}
}
};
});
// Apply new config
for (option_name, value) in pgbouncer_config.iter() {
let query = format!("SET {}={}", option_name, value);
// keep this log line for debugging purposes
info!("Applying pgbouncer setting change: {}", query);
if let Err(err) = client.simple_query(&query).await {
// Don't fail on error, just print it into log
error!(
"Failed to apply pgbouncer setting change: {}, {}",
query, err
for (option_name, value) in pgbouncer_config.iter() {
info!(
"Applying pgbouncer setting change: {} = {}",
option_name, value
);
};
}
let query = format!("SET {} = {}", option_name, value);
// save values to pgbouncer.ini
// so that they are preserved after pgbouncer restart
let pgbouncer_ini_path = if std::env::var_os("AUTOSCALING").is_some() {
// in VMs we use /etc/pgbouncer.ini
"/etc/pgbouncer.ini".to_string()
} else {
// in pods we use /var/db/postgres/pgbouncer/pgbouncer.ini
// this is a shared volume between pgbouncer and postgres containers
// FIXME: fix permissions for this file
"/var/db/postgres/pgbouncer/pgbouncer.ini".to_string()
};
update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;
let result = client.simple_query(&query).await;
info!("Applying pgbouncer setting change: {}", query);
info!("pgbouncer setting change result: {:?}", result);
if let Err(err) = result {
// Don't fail on error, just print it into log
error!(
"Failed to apply pgbouncer setting change: {}, {}",
query, err
);
};
}
// save values to pgbouncer.ini
// so that they are preserved after pgbouncer restart
if let Some(pgbouncer_ini_path) = pgbouncer_ini_path {
update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;
}
}
Ok(())
}

View File

@@ -1,6 +1,5 @@
use crate::{background_process, local_env::LocalEnv};
use camino::Utf8PathBuf;
use hyper::Method;
use pageserver_api::{
models::{ShardParameters, TenantCreateRequest, TimelineCreateRequest, TimelineInfo},
shard::TenantShardId,
@@ -8,6 +7,7 @@ use pageserver_api::{
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
use postgres_backend::AuthType;
use postgres_connection::parse_host_port;
use reqwest::Method;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use std::{path::PathBuf, process::Child, str::FromStr};
use tracing::instrument;
@@ -278,7 +278,7 @@ impl AttachmentService {
/// Simple HTTP request wrapper for calling into attachment service
async fn dispatch<RQ, RS>(
&self,
method: hyper::Method,
method: reqwest::Method,
path: String,
body: Option<RQ>,
) -> anyhow::Result<RS>

View File

@@ -15,7 +15,11 @@ aws-sdk-s3.workspace = true
aws-credential-types.workspace = true
bytes.workspace = true
camino.workspace = true
hyper = { workspace = true, features = ["stream"] }
hyper = { workspace = true, features = [] }
hyper-util = { workspace = true, features = [] }
http = { workspace = true, features = [] }
http-body = { workspace = true, features = [] }
http-body-util = { workspace = true, features = [] }
futures.workspace = true
serde.workspace = true
serde_json.workspace = true

View File

@@ -36,7 +36,9 @@ use aws_smithy_types::body::SdkBody;
use aws_smithy_types::byte_stream::ByteStream;
use bytes::Bytes;
use futures::stream::Stream;
use hyper::Body;
use futures_util::TryStreamExt;
use http_body::Frame;
use http_body_util::StreamBody;
use scopeguard::ScopeGuard;
use super::StorageMetadata;
@@ -469,8 +471,8 @@ impl RemoteStorage for S3Bucket {
let started_at = start_measuring_requests(kind);
let body = Body::wrap_stream(from);
let bytes_stream = ByteStream::new(SdkBody::from_body_0_4(body));
let body = StreamBody::new(from.map_ok(Frame::data));
let bytes_stream = ByteStream::new(SdkBody::from_body_1_x(body));
let res = self
.client

View File

@@ -1,7 +1,8 @@
//! Tracing wrapper for Hyper HTTP server
use hyper::body::Body;
use hyper::HeaderMap;
use hyper::{Body, Request, Response};
use hyper::{Request, Response};
use std::future::Future;
use tracing::Instrument;
use tracing_opentelemetry::OpenTelemetrySpanExt;
@@ -35,14 +36,14 @@ pub enum OtelName<'a> {
/// instrumentation libraries at:
/// <https://opentelemetry.io/registry/?language=rust&component=instrumentation>
/// If a Hyper crate appears, consider switching to that.
pub async fn tracing_handler<F, R>(
req: Request<Body>,
pub async fn tracing_handler<B1: Body, B2: Body, F, R>(
req: Request<B1>,
handler: F,
otel_name: OtelName<'_>,
) -> Response<Body>
) -> Response<B2>
where
F: Fn(Request<Body>) -> R,
R: Future<Output = Response<Body>>,
F: Fn(Request<B1>) -> R,
R: Future<Output = Response<B2>>,
{
// Create a tracing span, with context propagated from the incoming
// request if any.

View File

@@ -22,6 +22,7 @@ chrono.workspace = true
heapless.workspace = true
hex = { workspace = true, features = ["serde"] }
hyper = { workspace = true, features = ["full"] }
http-body-util = { workspace = true, features = [] }
fail.workspace = true
futures = { workspace = true}
jsonwebtoken.workspace = true

View File

@@ -4,7 +4,10 @@ use crate::http::{
error::ApiError,
json::{json_request, json_response},
};
use hyper::{Body, Request, Response, StatusCode};
use bytes::Bytes;
use http_body_util::Full;
use hyper::{Request, Response, StatusCode};
use routerify::Body;
use serde::{Deserialize, Serialize};
use tokio_util::sync::CancellationToken;
use tracing::*;
@@ -151,7 +154,7 @@ pub struct FailpointConfig {
pub async fn failpoints_handler(
mut request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
) -> Result<Response<Full<Bytes>>, ApiError> {
if !fail::has_failpoints() {
return Err(ApiError::BadRequest(anyhow::anyhow!(
"Cannot manage failpoints because storage was compiled without failpoints support"

View File

@@ -4,11 +4,11 @@ use anyhow::Context;
use hyper::header::{HeaderName, AUTHORIZATION};
use hyper::http::HeaderValue;
use hyper::Method;
use hyper::{header::CONTENT_TYPE, Body, Request, Response};
use hyper::{header::CONTENT_TYPE, Request, Response};
use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
use once_cell::sync::Lazy;
use routerify::ext::RequestExt;
use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
use routerify::{Body, Middleware, RequestInfo, Router, RouterBuilder};
use tracing::{self, debug, info, info_span, warn, Instrument};
use std::future::Future;
@@ -238,7 +238,7 @@ async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body
let (tx, rx) = mpsc::channel(1);
let body = Body::wrap_stream(ReceiverStream::new(rx));
let body = Body::from_stream(ReceiverStream::new(rx));
let mut writer = ChannelWriter::new(128 * 1024, tx);
@@ -284,7 +284,7 @@ async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body
Ok(response)
}
pub fn add_request_id_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
pub fn add_request_id_middleware<B: hyper::body::Body + Send + Sync + 'static>(
) -> Middleware<B, ApiError> {
Middleware::pre(move |req| async move {
let request_id = match req.headers().get(&X_REQUEST_ID_HEADER) {
@@ -317,7 +317,7 @@ async fn add_request_id_header_to_response(
Ok(res)
}
pub fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
pub fn make_router() -> RouterBuilder<routerify::Body, ApiError> {
Router::builder()
.middleware(add_request_id_middleware())
.middleware(Middleware::post_with_info(
@@ -328,11 +328,11 @@ pub fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
}
pub fn attach_openapi_ui(
router_builder: RouterBuilder<hyper::Body, ApiError>,
router_builder: RouterBuilder<routerify::Body, ApiError>,
spec: &'static [u8],
spec_mount_path: &'static str,
ui_mount_path: &'static str,
) -> RouterBuilder<hyper::Body, ApiError> {
) -> RouterBuilder<routerify::Body, ApiError> {
router_builder
.get(spec_mount_path,
move |r| request_span(r, move |_| async move {
@@ -388,7 +388,7 @@ fn parse_token(header_value: &str) -> Result<&str, ApiError> {
Ok(token)
}
pub fn auth_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
pub fn auth_middleware<B: hyper::body::Body + Send + Sync + 'static>(
provide_auth: fn(&Request<Body>) -> Option<&SwappableJwtAuth>,
) -> Middleware<B, ApiError> {
Middleware::pre(move |req| async move {
@@ -423,7 +423,7 @@ pub fn add_response_header_middleware<B>(
value: &str,
) -> anyhow::Result<Middleware<B, ApiError>>
where
B: hyper::body::HttpBody + Send + Sync + 'static,
B: hyper::body::Body + Send + Sync + 'static,
{
let name =
HeaderName::from_str(header).with_context(|| format!("invalid header name: {header}"))?;
@@ -464,7 +464,6 @@ pub fn check_permission_with(
#[cfg(test)]
mod tests {
use super::*;
use futures::future::poll_fn;
use hyper::service::Service;
use routerify::RequestServiceBuilder;
use std::net::{IpAddr, SocketAddr};
@@ -473,16 +472,13 @@ mod tests {
async fn test_request_id_returned() {
let builder = RequestServiceBuilder::new(make_router().build().unwrap()).unwrap();
let remote_addr = SocketAddr::new(IpAddr::from_str("127.0.0.1").unwrap(), 80);
let mut service = builder.build(remote_addr);
if let Err(e) = poll_fn(|ctx| service.poll_ready(ctx)).await {
panic!("request service is not ready: {:?}", e);
}
let service = builder.build(remote_addr);
let mut req: Request<Body> = Request::default();
req.headers_mut()
.append(&X_REQUEST_ID_HEADER, HeaderValue::from_str("42").unwrap());
let resp: Response<hyper::body::Body> = service.call(req).await.unwrap();
let resp: Response<Body> = service.call(req).await.unwrap();
let header_val = resp.headers().get(&X_REQUEST_ID_HEADER).unwrap();
@@ -493,13 +489,10 @@ mod tests {
async fn test_request_id_empty() {
let builder = RequestServiceBuilder::new(make_router().build().unwrap()).unwrap();
let remote_addr = SocketAddr::new(IpAddr::from_str("127.0.0.1").unwrap(), 80);
let mut service = builder.build(remote_addr);
if let Err(e) = poll_fn(|ctx| service.poll_ready(ctx)).await {
panic!("request service is not ready: {:?}", e);
}
let service = builder.build(remote_addr);
let req: Request<Body> = Request::default();
let resp: Response<hyper::body::Body> = service.call(req).await.unwrap();
let resp: Response<Body> = service.call(req).await.unwrap();
let header_val = resp.headers().get(&X_REQUEST_ID_HEADER);

View File

@@ -1,4 +1,5 @@
use hyper::{header, Body, Response, StatusCode};
use hyper::{header, Response, StatusCode};
use routerify::Body;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::error::Error as StdError;

View File

@@ -1,6 +1,8 @@
use anyhow::Context;
use bytes::Buf;
use hyper::{header, Body, Request, Response, StatusCode};
use anyhow::{anyhow, Context};
use bytes::{Buf, Bytes};
use http_body_util::{BodyExt, Full};
use hyper::{header, Request, Response, StatusCode};
use routerify::Body;
use serde::{Deserialize, Serialize};
use super::error::ApiError;
@@ -18,10 +20,14 @@ pub async fn json_request<T: for<'de> Deserialize<'de>>(
pub async fn json_request_or_empty_body<T: for<'de> Deserialize<'de>>(
request: &mut Request<Body>,
) -> Result<Option<T>, ApiError> {
let body = hyper::body::aggregate(request.body_mut())
let body = request
.body_mut()
.collect()
.await
.map_err(|e| anyhow!(e))
.context("Failed to read request body")
.map_err(ApiError::BadRequest)?;
.map_err(ApiError::BadRequest)?
.aggregate();
if body.remaining() == 0 {
return Ok(None);
}
@@ -35,17 +41,24 @@ pub async fn json_request_or_empty_body<T: for<'de> Deserialize<'de>>(
.map_err(ApiError::BadRequest)
}
pub fn json_response<T: Serialize>(
pub fn json_response_body<T: Serialize>(
status: StatusCode,
data: T,
) -> Result<Response<Body>, ApiError> {
json_response(status, data).map(|r| r.map(Body::new))
}
pub fn json_response<T: Serialize>(
status: StatusCode,
data: T,
) -> Result<Response<Full<Bytes>>, ApiError> {
let json = serde_json::to_string(&data)
.context("Failed to serialize JSON response")
.map_err(ApiError::InternalServerError)?;
let response = Response::builder()
.status(status)
.header(header::CONTENT_TYPE, "application/json")
.body(Body::from(json))
.body(Full::from(json))
.map_err(|e| ApiError::InternalServerError(e.into()))?;
Ok(response)
}

View File

@@ -5,4 +5,4 @@ pub mod request;
/// Current fast way to apply simple http routing in various Neon binaries.
/// Re-exported for sake of uniform approach, that could be later replaced with better alternatives, if needed.
pub use routerify::{ext::RequestExt, RouterBuilder, RouterService};
pub use routerify::{ext::RequestExt, Body, RequestServiceBuilder, RouterBuilder};

View File

@@ -3,8 +3,9 @@ use std::{borrow::Cow, str::FromStr};
use super::error::ApiError;
use anyhow::anyhow;
use hyper::{body::HttpBody, Body, Request};
use routerify::ext::RequestExt;
use http_body_util::BodyExt;
use hyper::Request;
use routerify::{ext::RequestExt, Body};
pub fn get_request_param<'a>(
request: &'a Request<Body>,
@@ -75,7 +76,7 @@ pub fn parse_query_param<E: fmt::Display, T: FromStr<Err = E>>(
}
pub async fn ensure_no_body(request: &mut Request<Body>) -> Result<(), ApiError> {
match request.body_mut().data().await {
match request.body_mut().frame().await {
Some(_) => Err(ApiError::BadRequest(anyhow!("Unexpected request body"))),
None => Ok(()),
}

View File

@@ -108,32 +108,9 @@ pub struct RelTagBlockNo {
}
impl PagestreamClient {
pub async fn shutdown(self) {
let Self {
copy_both,
cancel_on_client_drop: cancel_conn_task,
conn_task,
} = self;
// The `copy_both` contains internal channel sender, the receiver of which is polled by `conn_task`.
// When `conn_task` observes the sender has been dropped, it sends a `FeMessage::CopyFail` into the connection.
// (see https://github.com/neondatabase/rust-postgres/blob/2005bf79573b8add5cf205b52a2b208e356cc8b0/tokio-postgres/src/copy_both.rs#L56).
//
// If we drop(copy_both) first, but then immediately drop the `cancel_on_client_drop`,
// the CopyFail mesage only makes it to the socket sometimes (i.e., it's a race).
//
// Further, the pageserver makes a lot of noise when it receives CopyFail.
// Computes don't send it in practice, they just hard-close the connection.
//
// So, let's behave like the computes and suppress the CopyFail as follows:
// kill the socket first, then drop copy_both.
//
// See also: https://www.postgresql.org/docs/current/protocol-flow.html#PROTOCOL-COPY
//
// NB: page_service doesn't have a use case to exit the `pagestream` mode currently.
// => https://github.com/neondatabase/neon/issues/6390
let _ = cancel_conn_task.unwrap();
conn_task.await.unwrap();
drop(copy_both);
pub async fn shutdown(mut self) {
let _ = self.cancel_on_client_drop.take();
self.conn_task.await.unwrap();
}
pub async fn getpage(

View File

@@ -404,27 +404,23 @@ async fn client(
.await
.unwrap();
let do_requests = async {
start_work_barrier.wait().await;
while let Some(req) = work.recv().await {
let start = Instant::now();
client
.getpage(req)
.await
.with_context(|| format!("getpage for {timeline}"))
.unwrap();
let elapsed = start.elapsed();
live_stats.inc();
STATS.with(|stats| {
stats.borrow().lock().unwrap().observe(elapsed).unwrap();
});
}
};
tokio::select! {
res = do_requests => { res },
_ = cancel.cancelled() => {
client.shutdown().await;
return;
}
start_work_barrier.wait().await;
while let Some(req) =
tokio::select! { work = work.recv() => { work } , _ = cancel.cancelled() => { return; } }
{
let start = Instant::now();
let res = tokio::select! {
res = client.getpage(req) => { res },
_ = cancel.cancelled() => { return; }
};
res.with_context(|| format!("getpage for {timeline}"))
.unwrap();
let elapsed = start.elapsed();
live_stats.inc();
STATS.with(|stats| {
stats.borrow().lock().unwrap().observe(elapsed).unwrap();
});
}
}

View File

@@ -35,7 +35,6 @@ fn main() {
logging::Output::Stderr,
)
.unwrap();
logging::replace_panic_hook_with_tracing_panic_hook().forget();
let args = Args::parse();
match args {

View File

@@ -386,56 +386,39 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
// If we get far enough in the list that we start to evict layers that are below
// the tenant's min-resident-size threshold, print a warning, and memorize the disk
// usage at that point, in 'usage_planned_min_resident_size_respecting'.
let mut warned = None;
let mut usage_planned = usage_pre;
let mut evicted_amount = 0;
let selection = select_victims(&candidates, usage_pre);
let mut candidates = candidates;
let selection = if matches!(eviction_order, EvictionOrder::RelativeAccessed { .. }) {
// we currently have the layers ordered by AbsoluteAccessed so that we can get the summary
// for comparison here. this is a temporary measure to develop alternatives.
use std::fmt::Write;
let mut summary_buf = String::with_capacity(256);
{
let absolute_summary = candidates
.iter()
.take(selection.amount)
.map(|(_, candidate)| candidate)
.collect::<summary::EvictionSummary>();
write!(summary_buf, "{absolute_summary}").expect("string grows");
info!("absolute accessed selection summary: {summary_buf}");
for (i, (partition, candidate)) in candidates.iter().enumerate() {
if !usage_planned.has_pressure() {
debug!(
no_candidates_evicted = i,
"took enough candidates for pressure to be relieved"
);
break;
}
candidates.sort_unstable_by_key(|(partition, candidate)| {
(*partition, candidate.relative_last_activity)
});
let selection = select_victims(&candidates, usage_pre);
{
summary_buf.clear();
let relative_summary = candidates
.iter()
.take(selection.amount)
.map(|(_, candidate)| candidate)
.collect::<summary::EvictionSummary>();
write!(summary_buf, "{relative_summary}").expect("string grows");
info!("relative accessed selection summary: {summary_buf}");
if partition == &MinResidentSizePartition::Below && warned.is_none() {
warn!(?usage_pre, ?usage_planned, candidate_no=i, "tenant_min_resident_size-respecting LRU would not relieve pressure, evicting more following global LRU policy");
warned = Some(usage_planned);
}
selection
} else {
selection
usage_planned.add_available_bytes(candidate.layer.get_file_size());
evicted_amount += 1;
}
let usage_planned = match warned {
Some(respecting_tenant_min_resident_size) => PlannedUsage {
respecting_tenant_min_resident_size,
fallback_to_global_lru: Some(usage_planned),
},
None => PlannedUsage {
respecting_tenant_min_resident_size: usage_planned,
fallback_to_global_lru: None,
},
};
let (evicted_amount, usage_planned) = selection.into_amount_and_planned();
debug!(?usage_planned, "usage planned");
// phase2: evict layers
@@ -927,80 +910,22 @@ async fn collect_eviction_candidates(
debug_assert!(MinResidentSizePartition::Above < MinResidentSizePartition::Below,
"as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first");
// always behave as if AbsoluteAccessed was selected. if RelativeAccessed is in use, we
// will sort later by candidate.relative_last_activity to get compare evictions.
candidates
.sort_unstable_by_key(|(partition, candidate)| (*partition, candidate.last_activity_ts));
match eviction_order {
EvictionOrder::AbsoluteAccessed => {
candidates.sort_unstable_by_key(|(partition, candidate)| {
(*partition, candidate.last_activity_ts)
});
}
EvictionOrder::RelativeAccessed { .. } => {
candidates.sort_unstable_by_key(|(partition, candidate)| {
(*partition, candidate.relative_last_activity)
});
}
}
Ok(EvictionCandidates::Finished(candidates))
}
/// Given a pre-sorted vec of all layers in the system, select the first N which are enough to
/// relieve pressure.
///
/// Returns the amount of candidates selected, with the planned usage.
fn select_victims<U: Usage>(
candidates: &[(MinResidentSizePartition, EvictionCandidate)],
usage_pre: U,
) -> VictimSelection<U> {
let mut usage_when_switched = None;
let mut usage_planned = usage_pre;
let mut evicted_amount = 0;
for (i, (partition, candidate)) in candidates.iter().enumerate() {
if !usage_planned.has_pressure() {
break;
}
if partition == &MinResidentSizePartition::Below && usage_when_switched.is_none() {
usage_when_switched = Some((usage_planned, i));
}
usage_planned.add_available_bytes(candidate.layer.get_file_size());
evicted_amount += 1;
}
VictimSelection {
amount: evicted_amount,
usage_pre,
usage_when_switched,
usage_planned,
}
}
struct VictimSelection<U> {
amount: usize,
usage_pre: U,
usage_when_switched: Option<(U, usize)>,
usage_planned: U,
}
impl<U: Usage> VictimSelection<U> {
fn into_amount_and_planned(self) -> (usize, PlannedUsage<U>) {
debug!(
evicted_amount=%self.amount,
"took enough candidates for pressure to be relieved"
);
if let Some((usage_planned, candidate_no)) = self.usage_when_switched.as_ref() {
warn!(usage_pre=?self.usage_pre, ?usage_planned, candidate_no, "tenant_min_resident_size-respecting LRU would not relieve pressure, evicting more following global LRU policy");
}
let planned = match self.usage_when_switched {
Some((respecting_tenant_min_resident_size, _)) => PlannedUsage {
respecting_tenant_min_resident_size,
fallback_to_global_lru: Some(self.usage_planned),
},
None => PlannedUsage {
respecting_tenant_min_resident_size: self.usage_planned,
fallback_to_global_lru: None,
},
};
(self.amount, planned)
}
}
struct TimelineKey(Arc<Timeline>);
impl PartialEq for TimelineKey {
@@ -1085,137 +1010,6 @@ pub(crate) mod finite_f32 {
}
}
mod summary {
use super::finite_f32::FiniteF32;
use super::{EvictionCandidate, LayerCount};
use pageserver_api::shard::TenantShardId;
use std::collections::{BTreeMap, HashMap};
use std::time::SystemTime;
#[derive(Debug, Default)]
pub(super) struct EvictionSummary {
evicted_per_tenant: HashMap<TenantShardId, LayerCount>,
total: LayerCount,
last_absolute: Option<SystemTime>,
last_relative: Option<FiniteF32>,
}
impl<'a> FromIterator<&'a EvictionCandidate> for EvictionSummary {
fn from_iter<T: IntoIterator<Item = &'a EvictionCandidate>>(iter: T) -> Self {
let mut summary = EvictionSummary::default();
for item in iter {
let counts = summary
.evicted_per_tenant
.entry(*item.layer.get_tenant_shard_id())
.or_default();
let sz = item.layer.get_file_size();
counts.file_sizes += sz;
counts.count += 1;
summary.total.file_sizes += sz;
summary.total.count += 1;
summary.last_absolute = Some(item.last_activity_ts);
summary.last_relative = Some(item.relative_last_activity);
}
summary
}
}
struct SiBytesAmount(u64);
impl std::fmt::Display for SiBytesAmount {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.0 < 1024 {
return write!(f, "{}B", self.0);
}
let mut tmp = self.0;
let mut ch = 0;
let suffixes = b"KMGTPE";
while tmp > 1024 * 1024 && ch < suffixes.len() - 1 {
tmp /= 1024;
ch += 1;
}
let ch = suffixes[ch] as char;
write!(f, "{:.1}{ch}iB", tmp as f64 / 1024.0)
}
}
impl std::fmt::Display for EvictionSummary {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// wasteful, but it's for testing
let mut sorted: BTreeMap<usize, Vec<(TenantShardId, u64)>> = BTreeMap::new();
for (tenant_shard_id, count) in &self.evicted_per_tenant {
sorted
.entry(count.count)
.or_default()
.push((*tenant_shard_id, count.file_sizes));
}
let total_file_sizes = SiBytesAmount(self.total.file_sizes);
writeln!(
f,
"selected {} layers of {total_file_sizes} up to ({:?}, {:.2?}):",
self.total.count, self.last_absolute, self.last_relative,
)?;
for (count, per_tenant) in sorted.iter().rev().take(10) {
write!(f, "- {count} layers: ")?;
if per_tenant.len() < 3 {
for (i, (tenant_shard_id, bytes)) in per_tenant.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
let bytes = SiBytesAmount(*bytes);
write!(f, "{tenant_shard_id} ({bytes})")?;
}
} else {
let num_tenants = per_tenant.len();
let total_bytes = per_tenant.iter().map(|(_id, bytes)| bytes).sum::<u64>();
let total_bytes = SiBytesAmount(total_bytes);
let layers = num_tenants * count;
write!(
f,
"{num_tenants} tenants {total_bytes} in total {layers} layers",
)?;
}
writeln!(f)?;
}
if sorted.len() > 10 {
let (rem_count, rem_bytes) = sorted
.iter()
.rev()
.map(|(count, per_tenant)| {
(
count,
per_tenant.iter().map(|(_id, bytes)| bytes).sum::<u64>(),
)
})
.fold((0, 0), |acc, next| (acc.0 + next.0, acc.1 + next.1));
let rem_bytes = SiBytesAmount(rem_bytes);
writeln!(f, "- rest of tenants ({}) not shown ({rem_count} layers or {:.1}%, {rem_bytes} or {:.1}% bytes)", sorted.len() - 10, 100.0 * rem_count as f64 / self.total.count as f64, 100.0 * rem_bytes.0 as f64 / self.total.file_sizes as f64)?;
}
Ok(())
}
}
}
mod filesystem_level_usage {
use anyhow::Context;
use camino::Utf8Path;

View File

@@ -12,7 +12,7 @@ use futures::TryFutureExt;
use humantime::format_rfc3339;
use hyper::header;
use hyper::StatusCode;
use hyper::{Body, Request, Response, Uri};
use hyper::{Request, Response, Uri};
use metrics::launch_timestamp::LaunchTimestamp;
use pageserver_api::models::LocationConfigListResponse;
use pageserver_api::models::ShardParameters;
@@ -32,6 +32,7 @@ use utils::failpoint_support::failpoints_handler;
use utils::http::endpoint::request_span;
use utils::http::json::json_request_or_empty_body;
use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};
use utils::http::Body;
use crate::context::{DownloadBehavior, RequestContext};
use crate::deletion_queue::DeletionQueueClient;
@@ -64,7 +65,7 @@ use utils::{
http::{
endpoint::{self, attach_openapi_ui, auth_middleware, check_permission_with},
error::{ApiError, HttpErrorBody},
json::{json_request, json_response},
json::{json_request, json_response_body as json_response},
request::parse_request_param,
RequestExt, RouterBuilder,
},
@@ -1571,7 +1572,7 @@ async fn getpage_at_lsn_handler(
Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "application/octet-stream")
.body(hyper::Body::from(page))
.body(Body::from(page))
.unwrap(),
)
}
@@ -1868,7 +1869,7 @@ pub fn make_router(
state: Arc<State>,
launch_ts: &'static LaunchTimestamp,
auth: Option<Arc<SwappableJwtAuth>>,
) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {
) -> anyhow::Result<RouterBuilder<Body, ApiError>> {
let spec = include_bytes!("openapi_spec.yml");
let mut router = attach_openapi_ui(endpoint::make_router(), spec, "/swagger.yml", "/v1/doc");
if auth.is_some() {

View File

@@ -384,17 +384,11 @@ impl PageServerHandler {
}
}
/// Future that completes when we need to shut down the connection.
/// Analogous to calling cancelled() on a Timeline's cancellation token: waits for cancellation.
///
/// Reasons for need to shut down are:
/// - any of the timelines we hold GateGuards for in `shard_timelines` is cancelled
/// - task_mgr requests shutdown of the connection
///
/// The need to check for `task_mgr` cancellation arises mainly from `handle_pagerequests`
/// where, at first, `shard_timelines` is empty, see <https://github.com/neondatabase/neon/pull/6388>
///
/// NB: keep in sync with [`Self::is_connection_cancelled`]
async fn await_connection_cancelled(&self) {
/// We use many Timeline objects, and hold GateGuards on all of them. We must therefore respect
/// all of their cancellation tokens.
async fn timeline_cancelled(&self) {
// A short wait before we expend the cycles to walk our timeline map. This avoids incurring
// that cost every time we check for cancellation.
tokio::time::sleep(Duration::from_millis(10)).await;
@@ -410,19 +404,14 @@ impl PageServerHandler {
.map(|ht| ht.timeline.cancel.cancelled())
.collect::<FuturesUnordered<_>>();
tokio::select! {
_ = task_mgr::shutdown_watcher() => { }
_ = futs.next() => {}
}
futs.next().await;
}
/// Checking variant of [`Self::await_connection_cancelled`].
fn is_connection_cancelled(&self) -> bool {
task_mgr::is_shutdown_requested()
|| self
.shard_timelines
.values()
.any(|ht| ht.timeline.cancel.is_cancelled() || ht.timeline.is_stopping())
/// Analogous to calling is_cancelled() on a Timeline's cancellation token
fn timeline_is_cancelled(&self) -> bool {
self.shard_timelines
.values()
.any(|ht| ht.timeline.cancel.is_cancelled() || ht.timeline.is_stopping())
}
/// This function always respects cancellation of any timeline in `[Self::shard_timelines]`. Pass in
@@ -443,7 +432,7 @@ impl PageServerHandler {
flush_r = pgb.flush() => {
Ok(flush_r?)
},
_ = self.await_connection_cancelled() => {
_ = self.timeline_cancelled() => {
Err(QueryError::Shutdown)
}
_ = cancel.cancelled() => {
@@ -560,7 +549,7 @@ impl PageServerHandler {
let msg = tokio::select! {
biased;
_ = self.await_connection_cancelled() => {
_ = self.timeline_cancelled() => {
// We were requested to shut down.
info!("shutdown request received in page handler");
return Err(QueryError::Shutdown)
@@ -643,7 +632,7 @@ impl PageServerHandler {
span.in_scope(|| info!("handler requested reconnect: {reason}"));
return Err(QueryError::Reconnect);
}
Err(e) if self.is_connection_cancelled() => {
Err(e) if self.timeline_is_cancelled() => {
// This branch accomodates code within request handlers that returns an anyhow::Error instead of a clean
// shutdown error, this may be buried inside a PageReconstructError::Other for example.
//

View File

@@ -1832,7 +1832,7 @@ const CONTROLFILE_KEY: Key = Key {
field6: 0,
};
pub const CHECKPOINT_KEY: Key = Key {
const CHECKPOINT_KEY: Key = Key {
field1: 0x03,
field2: 0,
field3: 0,

View File

@@ -884,7 +884,7 @@ impl DeltaLayerInner {
let keys = self.load_keys(ctx).await?;
async fn dump_blob(val: &ValueRef<'_>, ctx: &RequestContext) -> anyhow::Result<String> {
async fn dump_blob(val: ValueRef<'_>, ctx: &RequestContext) -> anyhow::Result<String> {
let buf = val.reader.read_blob(val.blob_ref.pos(), ctx).await?;
let val = Value::des(&buf)?;
let desc = match val {
@@ -905,30 +905,14 @@ impl DeltaLayerInner {
}
for entry in keys {
let DeltaEntry { key, lsn, val, .. } = entry;
let desc = match dump_blob(&val, ctx).await {
let DeltaEntry { key, lsn, val, .. } = entry;
let desc = match dump_blob(val, ctx).await {
Ok(desc) => desc,
Err(err) => {
format!("ERROR: {err}")
}
};
println!(" key {key} at {lsn}: {desc}");
use crate::pgdatadir_mapping::CHECKPOINT_KEY;
use postgres_ffi::CheckPoint;
if key == CHECKPOINT_KEY
{
let buf = val.reader.read_blob(val.blob_ref.pos(), ctx).await?;
let val = Value::des(&buf)?;
match val {
Value::Image(img) => {
let checkpoint = CheckPoint::decode(&img)?;
println!(" CHECKPOINT: {:?}", checkpoint);
}
Value::WalRecord(_rec) => {
format!(" unexpected walrecord for checkpoint key");
}
}
}
}
Ok(())

View File

@@ -26,11 +26,8 @@ use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};
use std::str::FromStr;
use anyhow::{bail, Context, Result};
use bytes::{Buf, Bytes, BytesMut};
use hex::FromHex;
use tracing::*;
use utils::failpoint_support;
@@ -46,10 +43,9 @@ use postgres_ffi::pg_constants;
use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
use postgres_ffi::v14::nonrelfile_utils::mx_offset_to_member_segment;
use postgres_ffi::v14::xlog_utils::*;
use postgres_ffi::v14::{bindings::FullTransactionId, CheckPoint};
use postgres_ffi::v14::CheckPoint;
use postgres_ffi::TransactionId;
use postgres_ffi::BLCKSZ;
use utils::id::TenantId;
use utils::lsn::Lsn;
pub struct WalIngest {
@@ -106,61 +102,10 @@ impl WalIngest {
buf.advance(decoded.main_data_offset);
assert!(!self.checkpoint_modified);
if decoded.xl_xid != pg_constants::INVALID_TRANSACTION_ID
&& self.checkpoint.update_next_xid(decoded.xl_xid)
{
if self.checkpoint.update_next_xid(decoded.xl_xid) {
self.checkpoint_modified = true;
}
// BEGIN ONE-OFF HACK
//
// We had a bug where we incorrectly passed 0 to update_next_xid(). That was
// harmless as long as nextXid was < 2^31, because 0 looked like a very old
// XID. But once nextXid reaches 2^31, 0 starts to look like a very new XID, and
// we incorrectly bumped up nextXid to the next epoch, to value '1:1024'
//
// We have one known timeline in production where that happened. This is a one-off
// fix to fix that damage. The last WAL record on that timeline as of this writing
// is this:
//
// rmgr: Standby len (rec/tot): 50/ 50, tx: 0, lsn: 35A/E32D86D8, prev 35A/E32D86B0, desc: RUNNING_XACTS nextXid 2325447052 latestCompletedXid 2325447051 oldestRunningXid 2325447052
//
// So on that particular timeline, before that LSN, fix the incorrectly set
// nextXid to the nextXid value from that record, plus 1000 to give some safety
// margin.
// For testing this hack, this failpoint temporarily re-introduces the bug that
// was fixed
fn reintroduce_bug_failpoint_activated() -> bool {
fail::fail_point!("reintroduce-nextxid-update-bug", |_| { true });
false
}
if decoded.xl_xid == pg_constants::INVALID_TRANSACTION_ID
&& reintroduce_bug_failpoint_activated()
&& self.checkpoint.update_next_xid(decoded.xl_xid)
{
info!(
"failpoint: Incorrectly updated nextXid at LSN {} to {}",
lsn, self.checkpoint.nextXid.value
);
self.checkpoint_modified = true;
}
if self.checkpoint.nextXid.value == 4294968320 && // 1::1024, the incorrect value
modification.tline.tenant_shard_id.tenant_id == TenantId::from_hex("df254570a4f603805528b46b0d45a76c").unwrap() &&
lsn < Lsn::from_str("35A/E32D9000").unwrap() &&
!reintroduce_bug_failpoint_activated()
{
// This is the last nextXid value from the last RUNNING_XACTS record, at the
// end of the WAL as of this writing.
self.checkpoint.nextXid = FullTransactionId {
value: 2325447052 + 1000,
};
self.checkpoint_modified = true;
warn!("nextXid fixed by one-off hack at LSN {}", lsn);
}
// END ONE-OFF HACK
match decoded.xl_rmid {
pg_constants::RM_HEAP_ID | pg_constants::RM_HEAP2_ID => {
// Heap AM records need some special handling, because they modify VM pages
@@ -385,13 +330,8 @@ impl WalIngest {
< 0
{
self.checkpoint.oldestXid = xlog_checkpoint.oldestXid;
self.checkpoint_modified = true;
}
// Write a new checkpoint key-value pair on every checkpoint record, even
// if nothing really changed. Not strictly required, but it seems nice to
// have some trace of the checkpoint records in the layer files at the same
// LSNs.
self.checkpoint_modified = true;
}
}
pg_constants::RM_LOGICALMSG_ID => {

View File

@@ -308,13 +308,13 @@ lfc_change_limit_hook(int newval, void *extra)
Assert(victim->access_count == 0);
#ifdef FALLOC_FL_PUNCH_HOLE
if (fallocate(lfc_desc, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, (off_t) victim->offset * BLOCKS_PER_CHUNK * BLCKSZ, BLOCKS_PER_CHUNK * BLCKSZ) < 0)
neon_log(LOG, "Failed to punch hole in file: %m");
elog(LOG, "Failed to punch hole in file: %m");
#endif
hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
lfc_ctl->used -= 1;
}
lfc_ctl->limit = new_size;
neon_log(DEBUG1, "set local file cache limit to %d", new_size);
elog(DEBUG1, "set local file cache limit to %d", new_size);
LWLockRelease(lfc_lock);
}
@@ -327,7 +327,7 @@ lfc_init(void)
* shared_preload_libraries.
*/
if (!process_shared_preload_libraries_in_progress)
neon_log(ERROR, "Neon module should be loaded via shared_preload_libraries");
elog(ERROR, "Neon module should be loaded via shared_preload_libraries");
DefineCustomIntVariable("neon.max_file_cache_size",
@@ -643,7 +643,7 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, const void
Assert(victim->access_count == 0);
entry->offset = victim->offset; /* grab victim's chunk */
hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
neon_log(DEBUG2, "Swap file cache page");
elog(DEBUG2, "Swap file cache page");
}
else
{
@@ -846,10 +846,10 @@ local_cache_pages(PG_FUNCTION_ARGS)
* wrong) function definition though.
*/
if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
neon_log(ERROR, "return type must be a row type");
elog(ERROR, "return type must be a row type");
if (expected_tupledesc->natts != NUM_LOCALCACHE_PAGES_ELEM)
neon_log(ERROR, "incorrect number of output arguments");
elog(ERROR, "incorrect number of output arguments");
/* Construct a tuple descriptor for the result rows. */
tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);

View File

@@ -990,7 +990,7 @@ nm_pack_request(NeonRequest *msg)
case T_NeonErrorResponse:
case T_NeonDbSizeResponse:
default:
neon_log(ERROR, "unexpected neon message tag 0x%02x", msg->tag);
elog(ERROR, "unexpected neon message tag 0x%02x", msg->tag);
break;
}
return s;
@@ -1085,7 +1085,7 @@ nm_unpack_response(StringInfo s)
case T_NeonGetPageRequest:
case T_NeonDbSizeRequest:
default:
neon_log(ERROR, "unexpected neon message tag 0x%02x", tag);
elog(ERROR, "unexpected neon message tag 0x%02x", tag);
break;
}
@@ -1277,7 +1277,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
XLogFlush(recptr);
lsn = recptr;
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X",
(errmsg("Page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum, LSN_FORMAT_ARGS(lsn))));
@@ -1305,7 +1305,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
if (PageIsNew((Page) buffer))
{
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is all-zeros",
(errmsg("Page %u of relation %u/%u/%u.%u is all-zeros",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum)));
@@ -1313,7 +1313,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
else if (PageIsEmptyHeapPage((Page) buffer))
{
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is an empty heap page with no LSN",
(errmsg("Page %u of relation %u/%u/%u.%u is an empty heap page with no LSN",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum)));
@@ -1321,7 +1321,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
else
{
ereport(PANIC,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is evicted with zero LSN",
(errmsg("Page %u of relation %u/%u/%u.%u is evicted with zero LSN",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum)));
@@ -1330,7 +1330,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
else
{
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is already wal logged at lsn=%X/%X",
(errmsg("Page %u of relation %u/%u/%u.%u is already wal logged at lsn=%X/%X",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum, LSN_FORMAT_ARGS(lsn))));
@@ -1430,7 +1430,7 @@ neon_get_request_lsn(bool *latest, NRelFileInfo rinfo, ForkNumber forknum, Block
lsn = GetLastWrittenLSN(rinfo, forknum, blkno);
lsn = nm_adjust_lsn(lsn);
neon_log(DEBUG1, "neon_get_request_lsn GetXLogReplayRecPtr %X/%X request lsn 0 ",
elog(DEBUG1, "neon_get_request_lsn GetXLogReplayRecPtr %X/%X request lsn 0 ",
(uint32) ((lsn) >> 32), (uint32) (lsn));
}
else
@@ -1445,7 +1445,7 @@ neon_get_request_lsn(bool *latest, NRelFileInfo rinfo, ForkNumber forknum, Block
*latest = true;
lsn = GetLastWrittenLSN(rinfo, forknum, blkno);
Assert(lsn != InvalidXLogRecPtr);
neon_log(DEBUG1, "neon_get_request_lsn GetLastWrittenLSN lsn %X/%X ",
elog(DEBUG1, "neon_get_request_lsn GetLastWrittenLSN lsn %X/%X ",
(uint32) ((lsn) >> 32), (uint32) (lsn));
lsn = nm_adjust_lsn(lsn);
@@ -1465,7 +1465,7 @@ neon_get_request_lsn(bool *latest, NRelFileInfo rinfo, ForkNumber forknum, Block
#endif
if (lsn > flushlsn)
{
neon_log(DEBUG5, "last-written LSN %X/%X is ahead of last flushed LSN %X/%X",
elog(DEBUG5, "last-written LSN %X/%X is ahead of last flushed LSN %X/%X",
(uint32) (lsn >> 32), (uint32) lsn,
(uint32) (flushlsn >> 32), (uint32) flushlsn);
XLogFlush(lsn);
@@ -1509,7 +1509,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
return mdexists(reln, forkNum);
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
if (get_cached_relsize(InfoFromSMgrRel(reln), forkNum, &n_blocks))
@@ -1561,7 +1561,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
case T_NeonErrorResponse:
ereport(ERROR,
(errcode(ERRCODE_IO_ERROR),
errmsg(NEON_TAG "could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
errmsg("could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forkNum,
(uint32) (request_lsn >> 32), (uint32) request_lsn),
@@ -1570,7 +1570,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
break;
default:
neon_log(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
}
pfree(resp);
return exists;
@@ -1587,7 +1587,7 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
switch (reln->smgr_relpersistence)
{
case 0:
neon_log(ERROR, "cannot call smgrcreate() on rel with unknown persistence");
elog(ERROR, "cannot call smgrcreate() on rel with unknown persistence");
case RELPERSISTENCE_PERMANENT:
break;
@@ -1598,10 +1598,10 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
return;
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
neon_log(SmgrTrace, "Create relation %u/%u/%u.%u",
elog(SmgrTrace, "Create relation %u/%u/%u.%u",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forkNum);
@@ -1696,7 +1696,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
switch (reln->smgr_relpersistence)
{
case 0:
neon_log(ERROR, "cannot call smgrextend() on rel with unknown persistence");
elog(ERROR, "cannot call smgrextend() on rel with unknown persistence");
case RELPERSISTENCE_PERMANENT:
break;
@@ -1707,7 +1707,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
return;
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
/*
@@ -1745,7 +1745,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1);
lsn = PageGetLSN((Page) buffer);
neon_log(SmgrTrace, "smgrextend called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
elog(SmgrTrace, "smgrextend called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forkNum, blkno,
(uint32) (lsn >> 32), (uint32) lsn);
@@ -1785,7 +1785,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
switch (reln->smgr_relpersistence)
{
case 0:
neon_log(ERROR, "cannot call smgrextend() on rel with unknown persistence");
elog(ERROR, "cannot call smgrextend() on rel with unknown persistence");
case RELPERSISTENCE_PERMANENT:
break;
@@ -1796,7 +1796,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
return;
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
if (max_cluster_size > 0 &&
@@ -1808,7 +1808,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
if (current_size >= ((uint64) max_cluster_size) * 1024 * 1024)
ereport(ERROR,
(errcode(ERRCODE_DISK_FULL),
errmsg("could not extend file because project size limit (%d MB) has been exceeded",
errmsg("could not extend file because cluster size limit (%d MB) has been exceeded",
max_cluster_size),
errhint("This limit is defined by neon.max_cluster_size GUC")));
}
@@ -1821,7 +1821,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
if ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg(NEON_TAG "cannot extend file \"%s\" beyond %u blocks",
errmsg("cannot extend file \"%s\" beyond %u blocks",
relpath(reln->smgr_rlocator, forkNum),
InvalidBlockNumber)));
@@ -1882,7 +1882,7 @@ neon_open(SMgrRelation reln)
mdopen(reln);
/* no work */
neon_log(SmgrTrace, "open noop");
elog(SmgrTrace, "[NEON_SMGR] open noop");
}
/*
@@ -1919,7 +1919,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
return mdprefetch(reln, forknum, blocknum);
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
if (lfc_cache_contains(InfoFromSMgrRel(reln), forknum, blocknum))
@@ -1964,11 +1964,11 @@ neon_writeback(SMgrRelation reln, ForkNumber forknum,
return;
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
/* not implemented */
neon_log(SmgrTrace, "writeback noop");
elog(SmgrTrace, "[NEON_SMGR] writeback noop");
#ifdef DEBUG_COMPARE_LOCAL
if (IS_LOCAL_REL(reln))
@@ -2098,7 +2098,7 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
case T_NeonErrorResponse:
ereport(ERROR,
(errcode(ERRCODE_IO_ERROR),
errmsg(NEON_TAG "could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
errmsg("could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
blkno,
RelFileInfoFmt(rinfo),
forkNum,
@@ -2107,7 +2107,7 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
((NeonErrorResponse *) resp)->message)));
break;
default:
neon_log(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
}
/* buffer was used, clean up for later reuse */
@@ -2131,7 +2131,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
switch (reln->smgr_relpersistence)
{
case 0:
neon_log(ERROR, "cannot call smgrread() on rel with unknown persistence");
elog(ERROR, "cannot call smgrread() on rel with unknown persistence");
case RELPERSISTENCE_PERMANENT:
break;
@@ -2142,7 +2142,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
return;
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
/* Try to read from local file cache */
@@ -2170,7 +2170,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
{
if (!PageIsNew((Page) pageserver_masked))
{
neon_log(PANIC, "page is new in MD but not in Page Server at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
elog(PANIC, "page is new in MD but not in Page Server at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forkNum,
@@ -2180,7 +2180,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
}
else if (PageIsNew((Page) buffer))
{
neon_log(PANIC, "page is new in Page Server but not in MD at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
elog(PANIC, "page is new in Page Server but not in MD at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forkNum,
@@ -2195,7 +2195,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
{
neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
elog(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forkNum,
@@ -2214,7 +2214,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
{
neon_log(PANIC, "btree buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
elog(PANIC, "btree buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
blkno,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forkNum,
@@ -2294,13 +2294,13 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
return;
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
neon_wallog_page(reln, forknum, blocknum, buffer, false);
lsn = PageGetLSN((Page) buffer);
neon_log(SmgrTrace, "smgrwrite called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
elog(SmgrTrace, "smgrwrite called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum, blocknum,
(uint32) (lsn >> 32), (uint32) lsn);
@@ -2327,7 +2327,7 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
switch (reln->smgr_relpersistence)
{
case 0:
neon_log(ERROR, "cannot call smgrnblocks() on rel with unknown persistence");
elog(ERROR, "cannot call smgrnblocks() on rel with unknown persistence");
break;
case RELPERSISTENCE_PERMANENT:
@@ -2338,12 +2338,12 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
return mdnblocks(reln, forknum);
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
if (get_cached_relsize(InfoFromSMgrRel(reln), forknum, &n_blocks))
{
neon_log(SmgrTrace, "cached nblocks for %u/%u/%u.%u: %u blocks",
elog(SmgrTrace, "cached nblocks for %u/%u/%u.%u: %u blocks",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum, n_blocks);
return n_blocks;
@@ -2371,7 +2371,7 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
case T_NeonErrorResponse:
ereport(ERROR,
(errcode(ERRCODE_IO_ERROR),
errmsg(NEON_TAG "could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
errmsg("could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum,
(uint32) (request_lsn >> 32), (uint32) request_lsn),
@@ -2380,11 +2380,11 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
break;
default:
neon_log(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
}
update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);
neon_log(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
elog(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
@@ -2427,7 +2427,7 @@ neon_dbsize(Oid dbNode)
case T_NeonErrorResponse:
ereport(ERROR,
(errcode(ERRCODE_IO_ERROR),
errmsg(NEON_TAG "could not read db size of db %u from page server at lsn %X/%08X",
errmsg("could not read db size of db %u from page server at lsn %X/%08X",
dbNode,
(uint32) (request_lsn >> 32), (uint32) request_lsn),
errdetail("page server returned error: %s",
@@ -2435,10 +2435,10 @@ neon_dbsize(Oid dbNode)
break;
default:
neon_log(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
}
neon_log(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
elog(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
dbNode,
(uint32) (request_lsn >> 32), (uint32) request_lsn,
db_size);
@@ -2458,7 +2458,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
switch (reln->smgr_relpersistence)
{
case 0:
neon_log(ERROR, "cannot call smgrtruncate() on rel with unknown persistence");
elog(ERROR, "cannot call smgrtruncate() on rel with unknown persistence");
break;
case RELPERSISTENCE_PERMANENT:
@@ -2470,7 +2470,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
return;
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks);
@@ -2526,7 +2526,7 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)
switch (reln->smgr_relpersistence)
{
case 0:
neon_log(ERROR, "cannot call smgrimmedsync() on rel with unknown persistence");
elog(ERROR, "cannot call smgrimmedsync() on rel with unknown persistence");
break;
case RELPERSISTENCE_PERMANENT:
@@ -2538,10 +2538,10 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)
return;
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
neon_log(SmgrTrace, "[NEON_SMGR] immedsync noop");
elog(SmgrTrace, "[NEON_SMGR] immedsync noop");
#ifdef DEBUG_COMPARE_LOCAL
if (IS_LOCAL_REL(reln))
@@ -2566,17 +2566,17 @@ neon_start_unlogged_build(SMgrRelation reln)
* progress at a time. That's enough for the current usage.
*/
if (unlogged_build_phase != UNLOGGED_BUILD_NOT_IN_PROGRESS)
neon_log(ERROR, "unlogged relation build is already in progress");
elog(ERROR, "unlogged relation build is already in progress");
Assert(unlogged_build_rel == NULL);
ereport(SmgrTrace,
(errmsg(NEON_TAG "starting unlogged build of relation %u/%u/%u",
(errmsg("starting unlogged build of relation %u/%u/%u",
RelFileInfoFmt(InfoFromSMgrRel(reln)))));
switch (reln->smgr_relpersistence)
{
case 0:
neon_log(ERROR, "cannot call smgr_start_unlogged_build() on rel with unknown persistence");
elog(ERROR, "cannot call smgr_start_unlogged_build() on rel with unknown persistence");
break;
case RELPERSISTENCE_PERMANENT:
@@ -2589,11 +2589,11 @@ neon_start_unlogged_build(SMgrRelation reln)
return;
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
if (smgrnblocks(reln, MAIN_FORKNUM) != 0)
neon_log(ERROR, "cannot perform unlogged index build, index is not empty ");
elog(ERROR, "cannot perform unlogged index build, index is not empty ");
unlogged_build_rel = reln;
unlogged_build_phase = UNLOGGED_BUILD_PHASE_1;
@@ -2620,7 +2620,7 @@ neon_finish_unlogged_build_phase_1(SMgrRelation reln)
Assert(unlogged_build_rel == reln);
ereport(SmgrTrace,
(errmsg(NEON_TAG "finishing phase 1 of unlogged build of relation %u/%u/%u",
(errmsg("finishing phase 1 of unlogged build of relation %u/%u/%u",
RelFileInfoFmt(InfoFromSMgrRel(reln)))));
if (unlogged_build_phase == UNLOGGED_BUILD_NOT_PERMANENT)
@@ -2649,7 +2649,7 @@ neon_end_unlogged_build(SMgrRelation reln)
Assert(unlogged_build_rel == reln);
ereport(SmgrTrace,
(errmsg(NEON_TAG "ending unlogged build of relation %u/%u/%u",
(errmsg("ending unlogged build of relation %u/%u/%u",
RelFileInfoFmt(InfoFromNInfoB(rinfob)))));
if (unlogged_build_phase != UNLOGGED_BUILD_NOT_PERMANENT)
@@ -2664,7 +2664,7 @@ neon_end_unlogged_build(SMgrRelation reln)
rinfob = InfoBFromSMgrRel(reln);
for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
{
neon_log(SmgrTrace, "forgetting cached relsize for %u/%u/%u.%u",
elog(SmgrTrace, "forgetting cached relsize for %u/%u/%u.%u",
RelFileInfoFmt(InfoFromNInfoB(rinfob)),
forknum);
@@ -2707,7 +2707,7 @@ AtEOXact_neon(XactEvent event, void *arg)
unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
(errmsg(NEON_TAG "unlogged index build was not properly finished"))));
(errmsg("unlogged index build was not properly finished"))));
}
break;
}
@@ -2806,14 +2806,14 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
set_cached_relsize(rinfo, forknum, relsize);
SetLastWrittenLSNForRelation(end_recptr, rinfo, forknum);
neon_log(SmgrTrace, "Set length to %d", relsize);
elog(SmgrTrace, "Set length to %d", relsize);
}
}
#define FSM_TREE_DEPTH ((SlotsPerFSMPage >= 1626) ? 3 : 4)
/*
* TODO: May be it is better to make correspondent function from freespace.c public?
* TODO: May be it is better to make correspondent fgunctio from freespace.c public?
*/
static BlockNumber
get_fsm_physical_block(BlockNumber heapblk)
@@ -2894,7 +2894,7 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
#if PG_VERSION_NUM < 150000
if (!XLogRecGetBlockTag(record, block_id, &rinfo, &forknum, &blkno))
neon_log(PANIC, "failed to locate backup block with ID %d", block_id);
elog(PANIC, "failed to locate backup block with ID %d", block_id);
#else
XLogRecGetBlockTag(record, block_id, &rinfo, &forknum, &blkno);
#endif

View File

@@ -959,8 +959,8 @@ DetermineEpochStartLsn(WalProposer *wp)
}
/*
* If propEpochStartLsn is 0, it means flushLsn is 0 everywhere, we are bootstrapping
* and nothing was committed yet. Start streaming then from the basebackup LSN.
* If propEpochStartLsn is 0 everywhere, we are bootstrapping -- nothing
* was committed yet. Start streaming then from the basebackup LSN.
*/
if (wp->propEpochStartLsn == InvalidXLogRecPtr && !wp->config->syncSafekeepers)
{
@@ -973,13 +973,12 @@ DetermineEpochStartLsn(WalProposer *wp)
}
/*
* Safekeepers are setting truncateLsn after timelineStartLsn is known, so it
* should never be zero at this point, if we know timelineStartLsn.
*
* timelineStartLsn can be zero only on the first syncSafekeepers run.
* If propEpochStartLsn is not 0, at least one msg with WAL was sent to
* some connected safekeeper; it must have carried truncateLsn pointing to
* the first record.
*/
Assert((wp->truncateLsn != InvalidXLogRecPtr) ||
(wp->config->syncSafekeepers && wp->truncateLsn == wp->timelineStartLsn));
(wp->config->syncSafekeepers && wp->truncateLsn == wp->propEpochStartLsn));
/*
* We will be generating WAL since propEpochStartLsn, so we should set

View File

@@ -28,7 +28,11 @@ hmac.workspace = true
hostname.workspace = true
humantime.workspace = true
hyper-tungstenite.workspace = true
hyper.workspace = true
hyper = { workspace = true, features = ["server"] }
hyper-util = { workspace = true, features = ["tokio", "server", "server-auto"] }
http = { workspace = true, features = [] }
http-body = { workspace = true, features = [] }
http-body-util = { workspace = true, features = [] }
ipnet.workspace = true
itertools.workspace = true
md5.workspace = true
@@ -89,4 +93,3 @@ camino-tempfile.workspace = true
rcgen.workspace = true
rstest.workspace = true
tokio-postgres-rustls.workspace = true
walkdir.workspace = true

View File

@@ -179,18 +179,17 @@ impl super::Api for Api {
return Ok(Some(role_secret));
}
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
if let Some(project_id) = auth_info.project_id {
if let Some(secret) = &auth_info.secret {
self.caches
.project_info
.insert_role_secret(&project_id, ep, user, secret.clone())
}
self.caches.project_info.insert_allowed_ips(
&project_id,
ep,
Arc::new(auth_info.allowed_ips),
);
let project_id = auth_info.project_id.unwrap_or(ep.clone());
if let Some(secret) = &auth_info.secret {
self.caches
.project_info
.insert_role_secret(&project_id, ep, user, secret.clone())
}
self.caches.project_info.insert_allowed_ips(
&project_id,
ep,
Arc::new(auth_info.allowed_ips),
);
// When we just got a secret, we don't need to invalidate it.
Ok(auth_info.secret.map(Cached::new_uncached))
}
@@ -213,16 +212,15 @@ impl super::Api for Api {
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
let allowed_ips = Arc::new(auth_info.allowed_ips);
let user = &user_info.user;
if let Some(project_id) = auth_info.project_id {
if let Some(secret) = &auth_info.secret {
self.caches
.project_info
.insert_role_secret(&project_id, ep, user, secret.clone())
}
let project_id = auth_info.project_id.unwrap_or(ep.clone());
if let Some(secret) = &auth_info.secret {
self.caches
.project_info
.insert_allowed_ips(&project_id, ep, allowed_ips.clone());
.insert_role_secret(&project_id, ep, user, secret.clone())
}
self.caches
.project_info
.insert_allowed_ips(&project_id, ep, allowed_ips.clone());
Ok(Cached::new_uncached(allowed_ips))
}

View File

@@ -32,7 +32,6 @@ pub struct RequestMonitoring {
user: Option<SmolStr>,
application: Option<SmolStr>,
error_kind: Option<ErrorKind>,
success: bool,
// extra
// This sender is here to keep the request monitoring channel open while requests are taking place.
@@ -60,7 +59,6 @@ impl RequestMonitoring {
user: None,
application: None,
error_kind: None,
success: false,
sender: LOG_CHAN.get().and_then(|tx| tx.upgrade()),
latency_timer: LatencyTimer::new(protocol),
@@ -98,10 +96,6 @@ impl RequestMonitoring {
self.user = Some(user);
}
pub fn set_success(&mut self) {
self.success = true;
}
pub fn log(&mut self) {
if let Some(tx) = self.sender.take() {
let _: Result<(), _> = tx.send(self.clone());

View File

@@ -1,8 +1,7 @@
use std::{sync::Arc, time::SystemTime};
use std::sync::Arc;
use anyhow::Context;
use bytes::BytesMut;
use chrono::{Datelike, Timelike};
use futures::{Stream, StreamExt};
use parquet::{
basic::Compression,
@@ -87,12 +86,6 @@ struct RequestData {
project: Option<String>,
branch: Option<String>,
error: Option<&'static str>,
/// Success is counted if we form a HTTP response with sql rows inside
/// Or if we make it to proxy_pass
success: bool,
/// Tracks time from session start (HTTP request/libpq TCP handshake)
/// Through to success/failure
duration_us: u64,
}
impl From<RequestMonitoring> for RequestData {
@@ -109,11 +102,6 @@ impl From<RequestMonitoring> for RequestData {
protocol: value.protocol,
region: value.region,
error: value.error_kind.as_ref().map(|e| e.to_str()),
success: value.success,
duration_us: SystemTime::from(value.first_packet)
.elapsed()
.unwrap_or_default()
.as_micros() as u64, // 584 millenia... good enough
}
}
}
@@ -278,13 +266,7 @@ async fn upload_parquet(
let compression = len as f64 / len_uncompressed as f64;
let size = data.len();
let now = chrono::Utc::now();
let id = uuid::Uuid::new_v7(uuid::Timestamp::from_unix(
uuid::NoContext,
// we won't be running this in 1970. this cast is ok
now.timestamp() as u64,
now.timestamp_subsec_nanos(),
));
let id = uuid::Uuid::now_v7();
info!(
%id,
@@ -292,14 +274,7 @@ async fn upload_parquet(
size, compression, "uploading request parquet file"
);
let year = now.year();
let month = now.month();
let day = now.day();
let hour = now.hour();
// segment files by time for S3 performance
let path = RemotePath::from_string(&format!(
"{year:04}/{month:02}/{day:02}/{hour:02}/requests_{id}.parquet"
))?;
let path = RemotePath::from_string(&format!("requests_{id}.parquet"))?;
backoff::retry(
|| async {
let stream = futures::stream::once(futures::future::ready(Ok(data.clone())));
@@ -357,7 +332,6 @@ mod tests {
DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
};
use tokio::{sync::mpsc, time};
use walkdir::WalkDir;
use super::{worker_inner, ParquetConfig, ParquetUploadArgs, RequestData};
@@ -446,8 +420,6 @@ mod tests {
protocol: ["tcp", "ws", "http"][rng.gen_range(0..3)],
region: "us-east-1",
error: None,
success: rng.gen(),
duration_us: rng.gen_range(0..30_000_000),
}
}
@@ -470,11 +442,9 @@ mod tests {
worker_inner(storage, rx, config).await.unwrap();
let mut files = WalkDir::new(tmpdir.as_std_path())
.into_iter()
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.map(|entry| entry.path().to_path_buf())
let mut files = std::fs::read_dir(tmpdir.as_std_path())
.unwrap()
.map(|entry| entry.unwrap().path())
.collect_vec();
files.sort();
@@ -515,15 +485,15 @@ mod tests {
assert_eq!(
file_stats,
[
(1087635, 3, 6000),
(1087288, 3, 6000),
(1087444, 3, 6000),
(1087572, 3, 6000),
(1087468, 3, 6000),
(1087500, 3, 6000),
(1087533, 3, 6000),
(1087566, 3, 6000),
(362671, 1, 2000)
(1029153, 3, 6000),
(1029075, 3, 6000),
(1029216, 3, 6000),
(1029129, 3, 6000),
(1029250, 3, 6000),
(1029017, 3, 6000),
(1029175, 3, 6000),
(1029247, 3, 6000),
(343124, 1, 2000)
],
);
@@ -553,11 +523,11 @@ mod tests {
assert_eq!(
file_stats,
[
(1028637, 5, 10000),
(1031969, 5, 10000),
(1019900, 5, 10000),
(1020365, 5, 10000),
(1025010, 5, 10000)
(1166201, 6, 12000),
(1163577, 6, 12000),
(1164641, 6, 12000),
(1168772, 6, 12000),
(196761, 1, 2000)
],
);
@@ -589,11 +559,11 @@ mod tests {
assert_eq!(
file_stats,
[
(1210770, 6, 12000),
(1211036, 6, 12000),
(1210990, 6, 12000),
(1210861, 6, 12000),
(202073, 1, 2000)
(1144934, 6, 12000),
(1144941, 6, 12000),
(1144735, 6, 12000),
(1144936, 6, 12000),
(191035, 1, 2000)
],
);
@@ -618,15 +588,15 @@ mod tests {
assert_eq!(
file_stats,
[
(1087635, 3, 6000),
(1087288, 3, 6000),
(1087444, 3, 6000),
(1087572, 3, 6000),
(1087468, 3, 6000),
(1087500, 3, 6000),
(1087533, 3, 6000),
(1087566, 3, 6000),
(362671, 1, 2000)
(1029153, 3, 6000),
(1029075, 3, 6000),
(1029216, 3, 6000),
(1029129, 3, 6000),
(1029250, 3, 6000),
(1029017, 3, 6000),
(1029175, 3, 6000),
(1029247, 3, 6000),
(343124, 1, 2000)
],
);
@@ -663,7 +633,7 @@ mod tests {
// files are smaller than the size threshold, but they took too long to fill so were flushed early
assert_eq!(
file_stats,
[(545264, 2, 3001), (545025, 2, 3000), (544857, 2, 2999)],
[(515807, 2, 3001), (515585, 2, 3000), (515425, 2, 2999)],
);
tmpdir.close().unwrap();

View File

@@ -4,14 +4,12 @@
pub mod health_server;
use std::{sync::Arc, time::Duration};
use std::time::Duration;
use futures::FutureExt;
pub use reqwest::{Request, Response, StatusCode};
pub use reqwest_middleware::{ClientWithMiddleware, Error};
pub use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
use tokio::time::Instant;
use tracing::trace;
use crate::{metrics::CONSOLE_REQUEST_LATENCY, rate_limiter, url::ApiUrl};
use reqwest_middleware::RequestBuilder;
@@ -21,8 +19,6 @@ use reqwest_middleware::RequestBuilder;
/// We deliberately don't want to replace this with a public static.
pub fn new_client(rate_limiter_config: rate_limiter::RateLimiterConfig) -> ClientWithMiddleware {
let client = reqwest::ClientBuilder::new()
.dns_resolver(Arc::new(GaiResolver::default()))
.connection_verbose(true)
.build()
.expect("Failed to create http client");
@@ -34,8 +30,6 @@ pub fn new_client(rate_limiter_config: rate_limiter::RateLimiterConfig) -> Clien
pub fn new_client_with_timeout(default_timout: Duration) -> ClientWithMiddleware {
let timeout_client = reqwest::ClientBuilder::new()
.dns_resolver(Arc::new(GaiResolver::default()))
.connection_verbose(true)
.timeout(default_timout)
.build()
.expect("Failed to create http client with timeout");
@@ -100,37 +94,6 @@ impl Endpoint {
}
}
/// https://docs.rs/reqwest/0.11.18/src/reqwest/dns/gai.rs.html
use hyper::{
client::connect::dns::{GaiResolver as HyperGaiResolver, Name},
service::Service,
};
use reqwest::dns::{Addrs, Resolve, Resolving};
#[derive(Debug)]
pub struct GaiResolver(HyperGaiResolver);
impl Default for GaiResolver {
fn default() -> Self {
Self(HyperGaiResolver::new())
}
}
impl Resolve for GaiResolver {
fn resolve(&self, name: Name) -> Resolving {
let this = &mut self.0.clone();
let start = Instant::now();
Box::pin(
Service::<Name>::call(this, name.clone()).map(move |result| {
let resolve_duration = start.elapsed();
trace!(duration = ?resolve_duration, addr = %name, "resolve host complete");
result
.map(|addrs| -> Addrs { Box::new(addrs) })
.map_err(|err| -> Box<dyn std::error::Error + Send + Sync> { Box::new(err) })
}),
)
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -1,14 +1,21 @@
use anyhow::{anyhow, bail};
use hyper::{Body, Request, Response, StatusCode};
use anyhow::anyhow;
use http::{Request, Response};
use hyper::StatusCode;
use hyper_util::{
rt::{TokioExecutor, TokioIo},
server::conn,
};
use std::{convert::Infallible, net::TcpListener};
use tracing::info;
use utils::http::{endpoint, error::ApiError, json::json_response, RouterBuilder, RouterService};
use utils::http::{
endpoint, error::ApiError, json::json_response, Body, RequestServiceBuilder, RouterBuilder,
};
async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
json_response(StatusCode::OK, "")
json_response(StatusCode::OK, "").map(|req| req.map(Body::new))
}
fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
fn make_router() -> RouterBuilder<Body, ApiError> {
endpoint::make_router().get("/v1/status", status_handler)
}
@@ -17,11 +24,20 @@ pub async fn task_main(http_listener: TcpListener) -> anyhow::Result<Infallible>
info!("http has shut down");
}
let service = || RouterService::new(make_router().build()?);
let router = make_router().build().map_err(|e| anyhow!(e))?;
let builder = RequestServiceBuilder::new(router).map_err(|e| anyhow!(e))?;
let listener = tokio::net::TcpListener::from_std(http_listener)?;
hyper::Server::from_tcp(http_listener)?
.serve(service().map_err(|e| anyhow!(e))?)
.await?;
bail!("hyper server without shutdown handling cannot shutdown successfully");
loop {
let (stream, remote_addr) = listener.accept().await.unwrap();
let io = TokioIo::new(stream);
let service = builder.build(remote_addr);
tokio::task::spawn(async move {
let builder = conn::auto::Builder::new(TokioExecutor::new());
let res = builder.serve_connection(io, service).await;
if let Err(err) = res {
println!("Error serving connection: {:?}", err);
}
});
}
}

View File

@@ -10,13 +10,15 @@ use std::{
};
use bytes::{Buf, BytesMut};
use hyper::server::conn::{AddrIncoming, AddrStream};
use pin_project_lite::pin_project;
use tls_listener::AsyncAccept;
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf};
use tokio::{
io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf},
net::{TcpListener, TcpStream},
};
pub struct ProxyProtocolAccept {
pub incoming: AddrIncoming,
pub incoming: TcpListener,
}
pin_project! {
@@ -327,20 +329,18 @@ impl<T: AsyncRead> AsyncRead for WithClientIp<T> {
}
impl AsyncAccept for ProxyProtocolAccept {
type Connection = WithClientIp<AddrStream>;
type Connection = WithClientIp<TcpStream>;
type Address = SocketAddr;
type Error = io::Error;
fn poll_accept(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Option<Result<Self::Connection, Self::Error>>> {
let conn = ready!(Pin::new(&mut self.incoming).poll_accept(cx)?);
let Some(conn) = conn else {
return Poll::Ready(None);
};
Poll::Ready(Some(Ok(WithClientIp::new(conn))))
) -> Poll<Result<(Self::Connection, Self::Address), Self::Error>> {
Pin::new(&mut self.incoming)
.poll_accept(cx)
.map_ok(|(c, a)| (WithClientIp::new(c), a))
}
}

View File

@@ -356,7 +356,6 @@ pub async fn proxy_pass(
compute: impl AsyncRead + AsyncWrite + Unpin,
aux: MetricsAuxInfo,
) -> anyhow::Result<()> {
ctx.set_success();
ctx.log();
let usage = USAGE_METRICS.register(Ids {

View File

@@ -7,8 +7,8 @@ use crate::{
proxy::retry::{retry_after, ShouldRetry},
};
use async_trait::async_trait;
use hyper::StatusCode;
use pq_proto::StartupMessageParams;
use reqwest::StatusCode;
use std::ops::ControlFlow;
use tokio::time;
use tracing::{error, info, warn};

View File

@@ -46,11 +46,14 @@ enum Notification {
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
struct AllowedIpsUpdate {
#[serde(rename = "project")]
project_id: SmolStr,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
struct PasswordUpdate {
#[serde(rename = "project")]
project_id: SmolStr,
#[serde(rename = "role")]
role_name: SmolStr,
}
fn deserialize_json_string<'de, D, T>(deserializer: D) -> Result<T, D::Error>
@@ -148,7 +151,7 @@ mod tests {
#[test]
fn parse_allowed_ips() -> anyhow::Result<()> {
let project_id = "new_project".to_string();
let data = format!("{{\"project_id\": \"{project_id}\"}}");
let data = format!("{{\"project\": \"{project_id}\"}}");
let text = json!({
"type": "message",
"topic": "/allowed_ips_updated",
@@ -174,7 +177,7 @@ mod tests {
fn parse_password_updated() -> anyhow::Result<()> {
let project_id = "new_project".to_string();
let role_name = "new_role".to_string();
let data = format!("{{\"project_id\": \"{project_id}\", \"role_name\": \"{role_name}\"}}");
let data = format!("{{\"project\": \"{project_id}\", \"role\": \"{role_name}\"}}");
let text = json!({
"type": "message",
"topic": "/password_updated",

View File

@@ -6,41 +6,54 @@ mod conn_pool;
mod sql_over_http;
mod websocket;
use bytes::Bytes;
pub use conn_pool::GlobalConnPoolOptions;
use anyhow::bail;
use http_body_util::Full;
use hyper::body::Incoming;
use hyper::StatusCode;
use hyper_util::rt::{TokioExecutor, TokioIo};
use hyper_util::server::conn;
use metrics::IntCounterPairGuard;
use rand::rngs::StdRng;
use rand::SeedableRng;
pub use reqwest_middleware::{ClientWithMiddleware, Error};
pub use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
use tokio::io::{AsyncRead, AsyncWrite};
use tokio::select;
use tokio_rustls::TlsAcceptor;
use tokio_util::task::TaskTracker;
use crate::config::TlsConfig;
use crate::context::RequestMonitoring;
use crate::metrics::NUM_CLIENT_CONNECTION_GAUGE;
use crate::protocol2::{ProxyProtocolAccept, WithClientIp};
use crate::protocol2::ProxyProtocolAccept;
use crate::rate_limiter::EndpointRateLimiter;
use crate::{cancellation::CancelMap, config::ProxyConfig};
use futures::StreamExt;
use hyper::{
server::{
accept,
conn::{AddrIncoming, AddrStream},
},
Body, Method, Request, Response,
};
use hyper::{Method, Request, Response};
use std::net::IpAddr;
use std::task::Poll;
use std::{future::ready, sync::Arc};
use tls_listener::TlsListener;
use std::pin::pin;
use std::sync::Arc;
use tls_listener::{AsyncTls, TlsListener};
use tokio::net::TcpListener;
use tokio_util::sync::CancellationToken;
use tracing::{error, info, info_span, warn, Instrument};
use utils::http::{error::ApiError, json::json_response};
#[derive(Clone)]
struct Tls(TlsAcceptor);
impl<C: AsyncRead + AsyncWrite + Unpin> AsyncTls<C> for Tls {
type Stream = tokio_rustls::server::TlsStream<C>;
type Error = std::io::Error;
type AcceptFuture = tokio_rustls::Accept<C>;
fn accept(&self, conn: C) -> Self::AcceptFuture {
tokio_rustls::TlsAcceptor::accept(&self.0, conn)
}
}
pub async fn task_main(
config: &'static ProxyConfig,
ws_listener: TcpListener,
@@ -79,42 +92,52 @@ pub async fn task_main(
};
let tls_acceptor: tokio_rustls::TlsAcceptor = tls_config.to_server_config().into();
let mut addr_incoming = AddrIncoming::from_listener(ws_listener)?;
let _ = addr_incoming.set_nodelay(true);
// let mut addr_incoming = AddrIncoming::from_listener(ws_listener)?;
// let _ = addr_incoming.set_nodelay(true);
let addr_incoming = ProxyProtocolAccept {
incoming: addr_incoming,
incoming: ws_listener,
};
let ws_connections = tokio_util::task::task_tracker::TaskTracker::new();
let ws_connections2 = ws_connections.clone();
ws_connections.close(); // allows `ws_connections.wait to complete`
let tls_listener = TlsListener::new(tls_acceptor, addr_incoming).filter(|conn| {
if let Err(err) = conn {
error!("failed to accept TLS connection for websockets: {err:?}");
ready(false)
} else {
ready(true)
}
});
let mut tls_listener = TlsListener::new(Tls(tls_acceptor), addr_incoming);
let make_svc = hyper::service::make_service_fn(
|stream: &tokio_rustls::server::TlsStream<WithClientIp<AddrStream>>| {
tokio::spawn(async move {
loop {
let (stream, remote_addr) = select! {
res = tls_listener.accept() => {
match res {
Err(err) =>
{error!("failed to accept TLS connection for websockets: {err:?}"); continue},
Ok(s) => s,
}
}
_ = cancellation_token.cancelled() => break,
};
let (io, tls) = stream.get_ref();
let client_addr = io.client_addr();
let remote_addr = io.inner.remote_addr();
let sni_name = tls.server_name().map(|s| s.to_string());
let conn_pool = conn_pool.clone();
let ws_connections = ws_connections.clone();
let ws_connections = ws_connections2.clone();
let endpoint_rate_limiter = endpoint_rate_limiter.clone();
async move {
let peer_addr = match client_addr {
Some(addr) => addr,
None if config.require_client_ip => bail!("missing required client ip"),
None => remote_addr,
};
Ok(MetricService::new(hyper::service::service_fn(
move |req: Request<Body>| {
let peer_addr = match client_addr {
Some(addr) => addr,
None if config.require_client_ip => {
tracing::error!("Error serving connection: missing required client ip");
continue;
}
None => remote_addr,
};
let io = TokioIo::new(stream);
let cancellation_token = cancellation_token.clone();
tokio::task::spawn(async move {
let service = MetricService::new(hyper::service::service_fn(
move |req: Request<Incoming>| {
let sni_name = sni_name.clone();
let conn_pool = conn_pool.clone();
let ws_connections = ws_connections.clone();
@@ -144,15 +167,22 @@ pub async fn task_main(
.await
}
},
)))
}
},
);
hyper::Server::builder(accept::from_stream(tls_listener))
.serve(make_svc)
.with_graceful_shutdown(cancellation_token.cancelled())
.await?;
));
let builder = conn::auto::Builder::new(TokioExecutor::new());
let mut conn = pin!(builder.serve_connection(io, service));
let res = select! {
_ = cancellation_token.cancelled() => {
conn.as_mut().graceful_shutdown();
conn.await
}
res = conn.as_mut() => res,
};
if let Err(err) = res {
tracing::error!("Error serving connection: {:?}", err);
}
});
}
});
// await websocket connections
ws_connections.wait().await;
@@ -184,18 +214,14 @@ where
type Error = S::Error;
type Future = S::Future;
fn poll_ready(&mut self, cx: &mut std::task::Context<'_>) -> Poll<Result<(), Self::Error>> {
self.inner.poll_ready(cx)
}
fn call(&mut self, req: Request<ReqBody>) -> Self::Future {
fn call(&self, req: Request<ReqBody>) -> Self::Future {
self.inner.call(req)
}
}
#[allow(clippy::too_many_arguments)]
async fn request_handler(
mut request: Request<Body>,
mut request: Request<Incoming>,
config: &'static ProxyConfig,
tls: &'static TlsConfig,
conn_pool: Arc<conn_pool::GlobalConnPool>,
@@ -205,7 +231,7 @@ async fn request_handler(
sni_hostname: Option<String>,
peer_addr: IpAddr,
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
) -> Result<Response<Body>, ApiError> {
) -> Result<Response<Full<Bytes>>, ApiError> {
let host = request
.headers()
.get("host")
@@ -264,7 +290,7 @@ async fn request_handler(
)
.header("Access-Control-Max-Age", "86400" /* 24 hours */)
.status(StatusCode::OK) // 204 is also valid, but see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/OPTIONS#status_code
.body(Body::empty())
.body(Full::new(Bytes::new()))
.map_err(|e| ApiError::InternalServerError(e.into()))
} else {
json_response(StatusCode::BAD_REQUEST, "query is not supported")

View File

@@ -26,7 +26,7 @@ use tokio_postgres::{AsyncMessage, ReadyForQueryStatus};
use crate::{
auth::{self, backend::ComputeUserInfo, check_peer_addr_is_in_list},
console::{self, messages::MetricsAuxInfo},
console,
context::RequestMonitoring,
metrics::NUM_DB_CONNECTIONS_GAUGE,
proxy::connect_compute::ConnectMechanism,
@@ -362,7 +362,6 @@ impl GlobalConnPool {
// ok return cached connection if found and establish a new one otherwise
let new_client = if let Some(client) = client {
ctx.set_project(client.aux.clone());
if client.inner.is_closed() {
let conn_id = uuid::Uuid::new_v4();
info!(%conn_id, "pool: cached connection '{conn_info}' is closed, opening a new one");
@@ -594,6 +593,10 @@ async fn connect_to_compute_once(
span.in_scope(|| {
info!(%conn_info, %session, "new connection");
});
let ids = Ids {
endpoint_id: node_info.aux.endpoint_id.clone(),
branch_id: node_info.aux.branch_id.clone(),
};
let db_user = conn_info.db_and_user();
tokio::spawn(
@@ -661,7 +664,7 @@ async fn connect_to_compute_once(
Ok(ClientInner {
inner: client,
session: tx,
aux: node_info.aux.clone(),
ids,
conn_id,
})
}
@@ -669,17 +672,13 @@ async fn connect_to_compute_once(
struct ClientInner {
inner: tokio_postgres::Client,
session: tokio::sync::watch::Sender<uuid::Uuid>,
aux: MetricsAuxInfo,
ids: Ids,
conn_id: uuid::Uuid,
}
impl Client {
pub fn metrics(&self) -> Arc<MetricCounter> {
let aux = &self.inner.as_ref().unwrap().aux;
USAGE_METRICS.register(Ids {
endpoint_id: aux.endpoint_id.clone(),
branch_id: aux.branch_id.clone(),
})
USAGE_METRICS.register(self.inner.as_ref().unwrap().ids.clone())
}
}

View File

@@ -1,15 +1,20 @@
use std::sync::Arc;
use anyhow::bail;
use bytes::Buf;
use bytes::Bytes;
use futures::pin_mut;
use futures::StreamExt;
use hyper::body::HttpBody;
use http_body::Body;
use http_body_util::BodyExt;
use http_body_util::Full;
use hyper::body::Incoming;
use hyper::header;
use hyper::http::HeaderName;
use hyper::http::HeaderValue;
use hyper::Response;
use hyper::StatusCode;
use hyper::{Body, HeaderMap, Request};
use hyper::{HeaderMap, Request};
use serde_json::json;
use serde_json::Map;
use serde_json::Value;
@@ -235,10 +240,10 @@ pub async fn handle(
tls: &'static TlsConfig,
config: &'static HttpConfig,
ctx: &mut RequestMonitoring,
request: Request<Body>,
request: Request<Incoming>,
sni_hostname: Option<String>,
conn_pool: Arc<GlobalConnPool>,
) -> Result<Response<Body>, ApiError> {
) -> Result<Response<Full<Bytes>>, ApiError> {
let result = tokio::time::timeout(
config.request_timeout,
handle_inner(tls, config, ctx, request, sni_hostname, conn_pool),
@@ -347,10 +352,10 @@ async fn handle_inner(
tls: &'static TlsConfig,
config: &'static HttpConfig,
ctx: &mut RequestMonitoring,
request: Request<Body>,
request: Request<Incoming>,
sni_hostname: Option<String>,
conn_pool: Arc<GlobalConnPool>,
) -> anyhow::Result<Response<Body>> {
) -> anyhow::Result<Response<Full<Bytes>>> {
let _request_gauge = NUM_CONNECTION_REQUESTS_GAUGE
.with_label_values(&["http"])
.guard();
@@ -406,8 +411,8 @@ async fn handle_inner(
//
// Read the query and query params from the request body
//
let body = hyper::body::to_bytes(request.into_body()).await?;
let payload: Payload = serde_json::from_slice(&body)?;
let body = request.into_body().collect().await?.aggregate().reader();
let payload: Payload = serde_json::from_reader(body)?;
let mut client = conn_pool.get(ctx, conn_info, !allow_pool).await?;
@@ -497,7 +502,6 @@ async fn handle_inner(
}
};
ctx.set_success();
ctx.log();
let metrics = client.metrics();
@@ -505,7 +509,7 @@ async fn handle_inner(
let body = serde_json::to_string(&result).expect("json serialization should not fail");
let len = body.len();
let response = response
.body(Body::from(body))
.body(Full::from(body))
// only fails if invalid status code or invalid header/values are given.
// these are not user configurable so it cannot fail dynamically
.expect("building response payload should not fail");

View File

@@ -235,18 +235,19 @@ async fn collect_metrics_iteration(
#[cfg(test)]
mod tests {
use std::{
net::TcpListener,
sync::{Arc, Mutex},
};
use std::sync::{Arc, Mutex};
use anyhow::Error;
use bytes::{Buf, Bytes};
use chrono::Utc;
use consumption_metrics::{Event, EventChunk};
use hyper::{
service::{make_service_fn, service_fn},
Body, Response,
use http_body_util::{BodyExt, Empty};
use hyper::{body::Incoming, service::service_fn, Response};
use hyper_util::{
rt::{TokioExecutor, TokioIo},
server::conn,
};
use tokio::net::TcpListener;
use url::Url;
use super::{collect_metrics_iteration, Ids, Metrics};
@@ -254,30 +255,43 @@ mod tests {
#[tokio::test]
async fn metrics() {
let listener = TcpListener::bind("0.0.0.0:0").unwrap();
let listener = TcpListener::bind("0.0.0.0:0").await.unwrap();
let addr = listener.local_addr().unwrap();
let reports = Arc::new(Mutex::new(vec![]));
let reports2 = reports.clone();
let server = hyper::server::Server::from_tcp(listener)
.unwrap()
.serve(make_service_fn(move |_| {
let reports = reports.clone();
async move {
Ok::<_, Error>(service_fn(move |req| {
let reports = reports.clone();
async move {
let bytes = hyper::body::to_bytes(req.into_body()).await?;
let events: EventChunk<'static, Event<Ids, String>> =
serde_json::from_slice(&bytes)?;
reports.lock().unwrap().push(events);
Ok::<_, Error>(Response::new(Body::from(vec![])))
}
}))
}
}));
let addr = server.local_addr();
tokio::spawn(server);
let service = service_fn(move |req: hyper::Request<Incoming>| {
let reports = reports.clone();
async move {
let bytes = req
.into_body()
.collect()
.await
.unwrap()
.aggregate()
.reader();
let events: EventChunk<'static, Event<Ids, String>> =
serde_json::from_reader(bytes)?;
reports.lock().unwrap().push(events);
Ok::<_, Error>(Response::new(Empty::<Bytes>::new()))
}
});
tokio::spawn(async move {
loop {
let (stream, _) = listener.accept().await.unwrap();
let io = TokioIo::new(stream);
let service = service.clone();
tokio::task::spawn(async move {
let builder = conn::auto::Builder::new(TokioExecutor::new());
let res = builder.serve_connection(io, service).await;
if let Err(err) = res {
println!("Error serving connection: {:?}", err);
}
});
}
});
let metrics = Metrics::default();
let client = http::new_client(RateLimiterConfig::default());

View File

@@ -288,32 +288,34 @@ async fn timeline_files_handler(request: Request<Body>) -> Result<Response<Body>
}
/// Deactivates the timeline and removes its data directory.
async fn timeline_delete_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
async fn timeline_delete_force_handler(
mut request: Request<Body>,
) -> Result<Response<Body>, ApiError> {
let ttid = TenantTimelineId::new(
parse_request_param(&request, "tenant_id")?,
parse_request_param(&request, "timeline_id")?,
);
let only_local = parse_query_param(&request, "only_local")?.unwrap_or(false);
check_permission(&request, Some(ttid.tenant_id))?;
ensure_no_body(&mut request).await?;
// FIXME: `delete_force` can fail from both internal errors and bad requests. Add better
// error handling here when we're able to.
let resp = GlobalTimelines::delete(&ttid, only_local)
let resp = GlobalTimelines::delete_force(&ttid)
.await
.map_err(ApiError::InternalServerError)?;
json_response(StatusCode::OK, resp)
}
/// Deactivates all timelines for the tenant and removes its data directory.
/// See `timeline_delete_handler`.
async fn tenant_delete_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
/// See `timeline_delete_force_handler`.
async fn tenant_delete_force_handler(
mut request: Request<Body>,
) -> Result<Response<Body>, ApiError> {
let tenant_id = parse_request_param(&request, "tenant_id")?;
let only_local = parse_query_param(&request, "only_local")?.unwrap_or(false);
check_permission(&request, Some(tenant_id))?;
ensure_no_body(&mut request).await?;
// FIXME: `delete_force_all_for_tenant` can return an error for multiple different reasons;
// Using an `InternalServerError` should be fixed when the types support it
let delete_info = GlobalTimelines::delete_force_all_for_tenant(&tenant_id, only_local)
let delete_info = GlobalTimelines::delete_force_all_for_tenant(&tenant_id)
.await
.map_err(ApiError::InternalServerError)?;
json_response(
@@ -510,10 +512,10 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError>
request_span(r, timeline_status_handler)
})
.delete("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
request_span(r, timeline_delete_handler)
request_span(r, timeline_delete_force_handler)
})
.delete("/v1/tenant/:tenant_id", |r| {
request_span(r, tenant_delete_handler)
request_span(r, tenant_delete_force_handler)
})
.post("/v1/pull_timeline", |r| {
request_span(r, timeline_pull_handler)

View File

@@ -88,10 +88,6 @@ impl SafeKeeperConf {
self.tenant_dir(&ttid.tenant_id)
.join(ttid.timeline_id.to_string())
}
pub fn is_wal_backup_enabled(&self) -> bool {
self.remote_storage.is_some() && self.wal_backup_enabled
}
}
impl SafeKeeperConf {

View File

@@ -742,11 +742,6 @@ where
state.timeline_start_lsn
);
}
if state.peer_horizon_lsn == Lsn(0) {
// Update peer_horizon_lsn as soon as we know where timeline starts.
// It means that peer_horizon_lsn cannot be zero after we know timeline_start_lsn.
state.peer_horizon_lsn = msg.timeline_start_lsn;
}
if state.local_start_lsn == Lsn(0) {
state.local_start_lsn = msg.start_streaming_at;
info!("setting local_start_lsn to {:?}", state.local_start_lsn);

View File

@@ -407,7 +407,7 @@ impl SafekeeperPostgresHandler {
self.conf.timeline_dir(&tli.ttid),
&persisted_state,
start_pos,
self.conf.is_wal_backup_enabled(),
self.conf.wal_backup_enabled,
)?;
// Split to concurrently receive and send data; replies are generally

View File

@@ -33,13 +33,12 @@ use crate::safekeeper::{
};
use crate::send_wal::WalSenders;
use crate::state::{TimelineMemState, TimelinePersistentState};
use crate::wal_backup::{self};
use crate::{control_file, safekeeper::UNKNOWN_SERVER_VERSION};
use crate::metrics::FullTimelineInfo;
use crate::wal_storage::Storage as wal_storage_iface;
use crate::SafeKeeperConf;
use crate::{debug_dump, wal_storage};
use crate::{GlobalTimelines, SafeKeeperConf};
/// Things safekeeper should know about timeline state on peers.
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -472,29 +471,14 @@ impl Timeline {
}
}
/// Delete timeline from disk completely, by removing timeline directory.
/// Background timeline activities will stop eventually.
///
/// Also deletes WAL in s3. Might fail if e.g. s3 is unavailable, but
/// deletion API endpoint is retriable.
pub async fn delete(
/// Delete timeline from disk completely, by removing timeline directory. Background
/// timeline activities will stop eventually.
pub async fn delete_from_disk(
&self,
shared_state: &mut MutexGuard<'_, SharedState>,
only_local: bool,
) -> Result<(bool, bool)> {
let was_active = shared_state.active;
self.cancel(shared_state);
// TODO: It's better to wait for s3 offloader termination before
// removing data from s3. Though since s3 doesn't have transactions it
// still wouldn't guarantee absense of data after removal.
let conf = GlobalTimelines::get_global_config();
if !only_local && conf.is_wal_backup_enabled() {
// Note: we concurrently delete remote storage data from multiple
// safekeepers. That's ok, s3 replies 200 if object doesn't exist and we
// do some retries anyway.
wal_backup::delete_timeline(&self.ttid).await?;
}
let dir_existed = delete_dir(&self.timeline_dir).await?;
Ok((dir_existed, was_active))
}

View File

@@ -327,20 +327,16 @@ impl GlobalTimelines {
}
/// Cancels timeline, then deletes the corresponding data directory.
/// If only_local, doesn't remove WAL segments in remote storage.
pub async fn delete(
ttid: &TenantTimelineId,
only_local: bool,
) -> Result<TimelineDeleteForceResult> {
pub async fn delete_force(ttid: &TenantTimelineId) -> Result<TimelineDeleteForceResult> {
let tli_res = TIMELINES_STATE.lock().unwrap().get(ttid);
match tli_res {
Ok(timeline) => {
// Take a lock and finish the deletion holding this mutex.
let mut shared_state = timeline.write_shared_state().await;
info!("deleting timeline {}, only_local={}", ttid, only_local);
info!("deleting timeline {}", ttid);
let (dir_existed, was_active) =
timeline.delete(&mut shared_state, only_local).await?;
timeline.delete_from_disk(&mut shared_state).await?;
// Remove timeline from the map.
// FIXME: re-enable it once we fix the issue with recreation of deleted timelines
@@ -373,11 +369,8 @@ impl GlobalTimelines {
/// the tenant had, `true` if a timeline was active. There may be a race if new timelines are
/// created simultaneously. In that case the function will return error and the caller should
/// retry tenant deletion again later.
///
/// If only_local, doesn't remove WAL segments in remote storage.
pub async fn delete_force_all_for_tenant(
tenant_id: &TenantId,
only_local: bool,
) -> Result<HashMap<TenantTimelineId, TimelineDeleteForceResult>> {
info!("deleting all timelines for tenant {}", tenant_id);
let to_delete = Self::get_all_for_tenant(*tenant_id);
@@ -386,7 +379,7 @@ impl GlobalTimelines {
let mut deleted = HashMap::new();
for tli in &to_delete {
match Self::delete(&tli.ttid, only_local).await {
match Self::delete_force(&tli.ttid).await {
Ok(result) => {
deleted.insert(tli.ttid, result);
}

View File

@@ -4,8 +4,6 @@ use camino::{Utf8Path, Utf8PathBuf};
use futures::stream::FuturesOrdered;
use futures::StreamExt;
use tokio::task::JoinHandle;
use tokio_util::sync::CancellationToken;
use utils::backoff;
use utils::id::NodeId;
use std::cmp::min;
@@ -168,17 +166,6 @@ async fn update_task(
}
}
static REMOTE_STORAGE: OnceCell<Option<GenericRemoteStorage>> = OnceCell::new();
// Storage must be configured and initialized when this is called.
fn get_configured_remote_storage() -> &'static GenericRemoteStorage {
REMOTE_STORAGE
.get()
.expect("failed to get remote storage")
.as_ref()
.unwrap()
}
const CHECK_TASKS_INTERVAL_MSEC: u64 = 1000;
/// Sits on wal_backup_launcher_rx and starts/stops per timeline wal backup
@@ -212,7 +199,7 @@ pub async fn wal_backup_launcher_task_main(
ttid = wal_backup_launcher_rx.recv() => {
// channel is never expected to get closed
let ttid = ttid.unwrap();
if !conf.is_wal_backup_enabled() {
if conf.remote_storage.is_none() || !conf.wal_backup_enabled {
continue; /* just drain the channel and do nothing */
}
async {
@@ -497,12 +484,18 @@ fn get_segments(start: Lsn, end: Lsn, seg_size: usize) -> Vec<Segment> {
res
}
static REMOTE_STORAGE: OnceCell<Option<GenericRemoteStorage>> = OnceCell::new();
async fn backup_object(
source_file: &Utf8Path,
target_file: &RemotePath,
size: usize,
) -> Result<()> {
let storage = get_configured_remote_storage();
let storage = REMOTE_STORAGE
.get()
.expect("failed to get remote storage")
.as_ref()
.unwrap();
let file = File::open(&source_file)
.await
@@ -539,39 +532,6 @@ pub async fn read_object(
Ok(Box::pin(reader))
}
/// Delete WAL files for the given timeline. Remote storage must be configured
/// when called.
pub async fn delete_timeline(ttid: &TenantTimelineId) -> Result<()> {
let storage = get_configured_remote_storage();
let ttid_path = Utf8Path::new(&ttid.tenant_id.to_string()).join(ttid.timeline_id.to_string());
let remote_path = RemotePath::new(&ttid_path)?;
// A backoff::retry is used here for two reasons:
// - To provide a backoff rather than busy-polling the API on errors
// - To absorb transient 429/503 conditions without hitting our error
// logging path for issues deleting objects.
//
// Note: listing segments might take a long time if there are many of them.
// We don't currently have http requests timeout cancellation, but if/once
// we have listing should get streaming interface to make progress.
let token = CancellationToken::new(); // not really used
backoff::retry(
|| async {
let files = storage.list_files(Some(&remote_path)).await?;
storage.delete_objects(&files).await?;
Ok(())
},
|_| false,
3,
10,
"executing WAL segments deletion batch",
backoff::Cancel::new(token, || anyhow::anyhow!("canceled")),
)
.await?;
Ok(())
}
/// Copy segments from one timeline to another. Used in copy_timeline.
pub async fn copy_s3_segments(
wal_seg_size: usize,

View File

@@ -18,7 +18,11 @@ futures-core.workspace = true
futures-util.workspace = true
git-version.workspace = true
humantime.workspace = true
hyper = { workspace = true, features = ["full"] }
hyper = { workspace = true, features = ["server"] }
hyper-util = { workspace = true, features = ["tokio", "server", "server-auto"] }
http = { workspace = true, features = [] }
http-body = { workspace = true, features = [] }
http-body-util = { workspace = true, features = [] }
once_cell.workspace = true
parking_lot.workspace = true
prost.workspace = true
@@ -29,6 +33,9 @@ tracing.workspace = true
metrics.workspace = true
utils.workspace = true
# needed for tonic
http0_2 = { package = "http", version = "0.2" }
workspace_hack.workspace = true
[build-dependencies]

View File

@@ -13,10 +13,14 @@
use clap::{command, Parser};
use futures_core::Stream;
use futures_util::StreamExt;
use http::Request;
use hyper::body::Incoming;
use hyper::header::CONTENT_TYPE;
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, StatusCode};
use hyper_util::rt::{TokioExecutor, TokioIo};
use hyper_util::server::conn;
use parking_lot::RwLock;
use std::collections::HashMap;
use std::convert::Infallible;
@@ -24,6 +28,7 @@ use std::net::SocketAddr;
use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use tokio::net::TcpListener;
use tokio::sync::broadcast;
use tokio::sync::broadcast::error::RecvError;
use tokio::time;
@@ -596,9 +601,7 @@ impl BrokerService for Broker {
}
// We serve only metrics and healthcheck through http1.
async fn http1_handler(
req: hyper::Request<hyper::body::Body>,
) -> Result<hyper::Response<Body>, Infallible> {
async fn http1_handler(req: hyper::Request<Body>) -> Result<hyper::Response<Body>, Infallible> {
let resp = match (req.method(), req.uri().path()) {
(&Method::GET, "/metrics") => {
let mut buffer = vec![];
@@ -662,16 +665,19 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let storage_broker_server = BrokerServiceServer::new(storage_broker_impl);
info!("listening on {}", &args.listen_addr);
let listener = TcpListener::bind(args.listen_addr).await?;
// grpc is served along with http1 for metrics on a single port, hence we
// don't use tonic's Server.
hyper::Server::bind(&args.listen_addr)
.http2_keep_alive_interval(Some(args.http2_keepalive_interval))
.serve(make_service_fn(move |conn: &AddrStream| {
loop {
let (stream, remote_addr) = listener.accept().await?;
let io = TokioIo::new(stream);
tokio::task::spawn(async move {
let storage_broker_server_cloned = storage_broker_server.clone();
let connect_info = conn.connect_info();
async move {
Ok::<_, Infallible>(service_fn(move |mut req| {
let service = async move {
Ok::<_, Infallible>(service_fn(move |mut req: Request<Incoming>| {
// That's what tonic's MakeSvc.call does to pass conninfo to
// the request handler (and where its request.remote_addr()
// expects it to find).
@@ -690,6 +696,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
if req.headers().get("content-type").map(|x| x.as_bytes())
== Some(b"application/grpc")
{
// TODO: this doesn't work :(
let (parts, body) = req.into_parts();
let req = http0_2::Request::from_parts(parts, body);
let res_resp = storage_broker_server_svc.call(req).await;
// Grpc and http1 handlers have slightly different
// Response types: it is UnsyncBoxBody for the
@@ -703,10 +712,17 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
}
}
}))
};
let builder = conn::auto::Builder::new(TokioExecutor::new())
.http2()
.keep_alive_interval(Some(args.http2_keepalive_interval));
if let Err(err) = builder.serve_connection(io, service).await {
tracing::error!("Error serving connection: {:?}", err);
}
}))
.await?;
Ok(())
});
}
}
#[cfg(test)]

View File

@@ -1,8 +1,6 @@
use hyper::body::HttpBody;
use std::pin::Pin;
use std::task::{Context, Poll};
use std::time::Duration;
use tonic::codegen::StdError;
pub use tonic::transport::Uri;
use tonic::transport::{ClientTlsConfig, Endpoint};
use tonic::{transport::Channel, Status};
use utils::id::{TenantId, TenantTimelineId, TimelineId};
@@ -27,8 +25,6 @@ pub use tonic::Code;
pub use tonic::Request;
pub use tonic::Streaming;
pub use hyper::Uri;
pub const DEFAULT_LISTEN_ADDR: &str = "127.0.0.1:50051";
pub const DEFAULT_ENDPOINT: &str = const_format::formatcp!("http://{DEFAULT_LISTEN_ADDR}");
@@ -99,50 +95,7 @@ pub fn parse_proto_ttid(proto_ttid: &ProtoTenantTimelineId) -> Result<TenantTime
// well.
type AnyError = Box<dyn std::error::Error + Send + Sync + 'static>;
// Provides impl HttpBody for two different types implementing it. Inspired by
// https://github.com/hyperium/tonic/blob/master/examples/src/hyper_warp/server.rs
pub enum EitherBody<A, B> {
Left(A),
Right(B),
}
impl<A, B> HttpBody for EitherBody<A, B>
where
A: HttpBody + Send + Unpin,
B: HttpBody<Data = A::Data> + Send + Unpin,
A::Error: Into<AnyError>,
B::Error: Into<AnyError>,
{
type Data = A::Data;
type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
fn is_end_stream(&self) -> bool {
match self {
EitherBody::Left(b) => b.is_end_stream(),
EitherBody::Right(b) => b.is_end_stream(),
}
}
fn poll_data(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Option<Result<Self::Data, Self::Error>>> {
match self.get_mut() {
EitherBody::Left(b) => Pin::new(b).poll_data(cx).map(map_option_err),
EitherBody::Right(b) => Pin::new(b).poll_data(cx).map(map_option_err),
}
}
fn poll_trailers(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Result<Option<hyper::HeaderMap>, Self::Error>> {
match self.get_mut() {
EitherBody::Left(b) => Pin::new(b).poll_trailers(cx).map_err(Into::into),
EitherBody::Right(b) => Pin::new(b).poll_trailers(cx).map_err(Into::into),
}
}
}
pub type EitherBody<L, R> = http_body_util::Either<L, R>;
fn map_option_err<T, U: Into<AnyError>>(err: Option<Result<T, U>>) -> Option<Result<T, AnyError>> {
err.map(|e| e.map_err(Into::into))

View File

@@ -12,11 +12,9 @@ from pathlib import Path
# Type-related stuff
from typing import Callable, ClassVar, Dict, Iterator, Optional
import allure
import pytest
from _pytest.config import Config
from _pytest.config.argparsing import Parser
from _pytest.fixtures import FixtureRequest
from _pytest.terminal import TerminalReporter
from fixtures.log_helper import log
@@ -413,10 +411,7 @@ class NeonBenchmarker:
@pytest.fixture(scope="function")
def zenbenchmark(
request: FixtureRequest,
record_property: Callable[[str, object], None],
) -> Iterator[NeonBenchmarker]:
def zenbenchmark(record_property: Callable[[str, object], None]) -> Iterator[NeonBenchmarker]:
"""
This is a python decorator for benchmark fixtures. It contains functions for
recording measurements, and prints them out at the end.
@@ -424,21 +419,6 @@ def zenbenchmark(
benchmarker = NeonBenchmarker(record_property)
yield benchmarker
results = {}
for _, recorded_property in request.node.user_properties:
name = recorded_property["name"]
value = str(recorded_property["value"])
if (unit := recorded_property["unit"].strip()) != "":
value += f" {unit}"
results[name] = value
content = json.dumps(results, indent=2)
allure.attach(
content,
"benchmarks.json",
allure.attachment_type.JSON,
)
def pytest_addoption(parser: Parser):
parser.addoption(

View File

@@ -2998,23 +2998,6 @@ class Endpoint(PgProtocol):
):
self.stop()
def log_contains(self, pattern: str) -> Optional[str]:
"""Check that the compute log contains a line that matches the given regex"""
logfile = self.endpoint_path() / "compute.log"
if not logfile.exists():
log.warning(f"Skipping log check: {logfile} does not exist")
return None
contains_re = re.compile(pattern)
with logfile.open("r") as f:
for line in f:
if contains_re.search(line):
# found it!
return line
return None
# Checkpoints running endpoint and returns pg_wal size in MB.
def get_pg_wal_size(self):
log.info(f'checkpointing at LSN {self.safe_psql("select pg_current_wal_lsn()")[0][0]}')
@@ -3369,15 +3352,9 @@ class SafekeeperHttpClient(requests.Session):
)
res.raise_for_status()
# only_local doesn't remove segments in the remote storage.
def timeline_delete(
self, tenant_id: TenantId, timeline_id: TimelineId, only_local: bool = False
) -> Dict[Any, Any]:
def timeline_delete_force(self, tenant_id: TenantId, timeline_id: TimelineId) -> Dict[Any, Any]:
res = self.delete(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}",
params={
"only_local": str(only_local).lower(),
},
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}"
)
res.raise_for_status()
res_json = res.json()

View File

@@ -1,11 +1,11 @@
import time
from typing import Any, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from mypy_boto3_s3.type_defs import ListObjectsV2OutputTypeDef, ObjectTypeDef
from fixtures.log_helper import log
from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
from fixtures.remote_storage import RemoteStorage, RemoteStorageKind, S3Storage
from fixtures.remote_storage import RemoteStorageKind, S3Storage
from fixtures.types import Lsn, TenantId, TenantShardId, TimelineId
from fixtures.utils import wait_until
@@ -233,18 +233,23 @@ def timeline_delete_wait_completed(
wait_timeline_detail_404(pageserver_http, tenant_id, timeline_id, iterations, interval)
# remote_storage must not be None, but that's easier for callers to make mypy happy
if TYPE_CHECKING:
# TODO avoid by combining remote storage related stuff in single type
# and just passing in this type instead of whole builder
from fixtures.neon_fixtures import NeonEnvBuilder
def assert_prefix_empty(
remote_storage: Optional[RemoteStorage],
neon_env_builder: "NeonEnvBuilder",
prefix: Optional[str] = None,
allowed_postfix: Optional[str] = None,
):
assert remote_storage is not None
response = list_prefix(remote_storage, prefix)
response = list_prefix(neon_env_builder, prefix)
keys = response["KeyCount"]
objects: List[ObjectTypeDef] = response.get("Contents", [])
common_prefixes = response.get("CommonPrefixes", [])
remote_storage = neon_env_builder.pageserver_remote_storage
is_mock_s3 = isinstance(remote_storage, S3Storage) and not remote_storage.cleanup
if is_mock_s3:
@@ -278,20 +283,19 @@ def assert_prefix_empty(
), f"remote dir with prefix {prefix} is not empty after deletion: {objects}"
# remote_storage must not be None, but that's easier for callers to make mypy happy
def assert_prefix_not_empty(remote_storage: Optional[RemoteStorage], prefix: Optional[str] = None):
assert remote_storage is not None
response = list_prefix(remote_storage, prefix)
def assert_prefix_not_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str] = None):
response = list_prefix(neon_env_builder, prefix)
assert response["KeyCount"] != 0, f"remote dir with prefix {prefix} is empty: {response}"
def list_prefix(
remote: RemoteStorage, prefix: Optional[str] = None, delimiter: str = "/"
neon_env_builder: "NeonEnvBuilder", prefix: Optional[str] = None, delimiter: str = "/"
) -> ListObjectsV2OutputTypeDef:
"""
Note that this function takes into account prefix_in_bucket.
"""
# For local_fs we need to properly handle empty directories, which we currently dont, so for simplicity stick to s3 api.
remote = neon_env_builder.pageserver_remote_storage
assert isinstance(remote, S3Storage), "localfs is currently not supported"
assert remote.client is not None

View File

@@ -1,20 +1,10 @@
import json
import os
import time
from pathlib import Path
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_wal_insert_lsn
from fixtures.pageserver.utils import (
wait_for_last_record_lsn,
wait_for_upload,
)
from fixtures.remote_storage import RemoteStorageKind
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import query_scalar
from fixtures.neon_fixtures import NeonEnvBuilder
# Test restarting page server, while safekeeper and compute node keep
# running.
def test_next_xid(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
@@ -62,370 +52,3 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder):
cur = conn.cursor()
cur.execute("SELECT count(*) FROM t")
assert cur.fetchone() == (iterations,)
# Test for a bug we had, where nextXid was incorrectly updated when the
# XID counter reached 2 billion. The nextXid tracking logic incorrectly
# treated 0 (InvalidTransactionId) as a regular XID, and after reaching
# 2 billion, it started to look like a very new XID, which caused nextXid
# to be immediately advanced to the next epoch.
#
def test_import_at_2bil(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
pg_distrib_dir: Path,
pg_bin,
vanilla_pg,
):
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
ps_http = env.pageserver.http_client()
# Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq.
# PgBin sets it automatically, but here we need to pipe psql output to the tar command.
psql_env = {"LD_LIBRARY_PATH": str(pg_distrib_dir / "lib")}
# Reset the vanilla Postgres instance to somewhat before 2 billion transactions.
pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, "pg_resetwal")
cmd = [pg_resetwal_path, "--next-transaction-id=2129920000", "-D", str(vanilla_pg.pgdatadir)]
pg_bin.run_capture(cmd, env=psql_env)
vanilla_pg.start()
vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser")
vanilla_pg.safe_psql(
"""create table tt as select 'long string to consume some space' || g
from generate_series(1,300000) g"""
)
assert vanilla_pg.safe_psql("select count(*) from tt") == [(300000,)]
vanilla_pg.safe_psql("CREATE TABLE t (t text);")
vanilla_pg.safe_psql("INSERT INTO t VALUES ('inserted in vanilla')")
branch_name = "import_from_vanilla"
tenant = TenantId.generate()
timeline = TimelineId.generate()
env.pageserver.tenant_create(tenant)
# Take basebackup
basebackup_dir = os.path.join(test_output_dir, "basebackup")
base_tar = os.path.join(basebackup_dir, "base.tar")
wal_tar = os.path.join(basebackup_dir, "pg_wal.tar")
os.mkdir(basebackup_dir)
vanilla_pg.safe_psql("CHECKPOINT")
pg_bin.run(
[
"pg_basebackup",
"-F",
"tar",
"-d",
vanilla_pg.connstr(),
"-D",
basebackup_dir,
]
)
# Get start_lsn and end_lsn
with open(os.path.join(basebackup_dir, "backup_manifest")) as f:
manifest = json.load(f)
start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"]
end_lsn = manifest["WAL-Ranges"][0]["End-LSN"]
def import_tar(base, wal):
env.neon_cli.raw_cli(
[
"timeline",
"import",
"--tenant-id",
str(tenant),
"--timeline-id",
str(timeline),
"--node-name",
branch_name,
"--base-lsn",
start_lsn,
"--base-tarfile",
base,
"--end-lsn",
end_lsn,
"--wal-tarfile",
wal,
"--pg-version",
env.pg_version,
]
)
# Importing correct backup works
import_tar(base_tar, wal_tar)
wait_for_last_record_lsn(ps_http, tenant, timeline, Lsn(end_lsn))
endpoint = env.endpoints.create_start(
branch_name,
endpoint_id="ep-import_from_vanilla",
tenant_id=tenant,
config_lines=[
"log_autovacuum_min_duration = 0",
"autovacuum_naptime='5 s'",
],
)
assert endpoint.safe_psql("select count(*) from t") == [(1,)]
conn = endpoint.connect()
cur = conn.cursor()
# Install extension containing function needed for test
cur.execute("CREATE EXTENSION neon_test_utils")
# Advance nextXid close to 2 billion XIDs
while True:
xid = int(query_scalar(cur, "SELECT txid_current()"))
log.info(f"xid now {xid}")
# Consume 10k transactons at a time until we get to 2^31 - 200k
if xid < 2 * 1024 * 1024 * 1024 - 100000:
cur.execute("select test_consume_xids(50000);")
elif xid < 2 * 1024 * 1024 * 1024 - 10000:
cur.execute("select test_consume_xids(5000);")
else:
break
# Run a bunch of real INSERTs to cross over the 2 billion mark
# Use a begin-exception block to have a separate sub-XID for each insert.
cur.execute(
"""
do $$
begin
for i in 1..10000 loop
-- Use a begin-exception block to generate a new subtransaction on each iteration
begin
insert into t values (i);
exception when others then
raise 'not expected %', sqlerrm;
end;
end loop;
end;
$$;
"""
)
# A checkpoint writes a WAL record with xl_xid=0. Many other WAL
# records would have the same effect.
cur.execute("checkpoint")
# wait until pageserver receives that data
wait_for_wal_insert_lsn(env, endpoint, tenant, timeline)
# Restart endpoint
endpoint.stop()
endpoint.start()
conn = endpoint.connect()
cur = conn.cursor()
cur.execute("SELECT count(*) from t")
assert cur.fetchone() == (10000 + 1,)
# This is a followup to the test_import_at_2bil test.
#
# Use a failpoint to reintroduce the bug that test_import_at_2bil also
# tests. Then, after the damage has been done, clear the failpoint to
# fix the bug. Check that the one-off hack that we added for a particular
# timeline that hit this in production fixes the broken timeline.
def test_one_off_hack_for_nextxid_bug(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
pg_distrib_dir: Path,
pg_bin,
vanilla_pg,
):
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
ps_http = env.pageserver.http_client()
env.pageserver.allowed_errors.append(".*nextXid fixed by one-off hack.*")
# We begin with the old bug still present, to create a broken timeline
ps_http.configure_failpoints(("reintroduce-nextxid-update-bug", "return(true)"))
# Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq.
# PgBin sets it automatically, but here we need to pipe psql output to the tar command.
psql_env = {"LD_LIBRARY_PATH": str(pg_distrib_dir / "lib")}
# Reset the vanilla Postgres instance to somewhat before 2 billion transactions,
# and around the same LSN as with the production timeline.
pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, "pg_resetwal")
cmd = [
pg_resetwal_path,
"--next-transaction-id=2129920000",
"-l",
"000000010000035A000000E0",
"-D",
str(vanilla_pg.pgdatadir),
]
pg_bin.run_capture(cmd, env=psql_env)
vanilla_pg.start()
vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser")
vanilla_pg.safe_psql(
"""create table tt as select 'long string to consume some space' || g
from generate_series(1,300000) g"""
)
assert vanilla_pg.safe_psql("select count(*) from tt") == [(300000,)]
vanilla_pg.safe_psql("CREATE TABLE t (t text);")
vanilla_pg.safe_psql("INSERT INTO t VALUES ('inserted in vanilla')")
branch_name = "import_from_vanilla"
# This is the tenant/timeline that the one-off hack targets
tenant = "df254570a4f603805528b46b0d45a76c"
timeline = TimelineId.generate()
env.pageserver.tenant_create(tenant)
# Take basebackup
basebackup_dir = os.path.join(test_output_dir, "basebackup")
base_tar = os.path.join(basebackup_dir, "base.tar")
wal_tar = os.path.join(basebackup_dir, "pg_wal.tar")
os.mkdir(basebackup_dir)
vanilla_pg.safe_psql("CHECKPOINT")
pg_bin.run(
[
"pg_basebackup",
"-F",
"tar",
"-d",
vanilla_pg.connstr(),
"-D",
basebackup_dir,
]
)
# Get start_lsn and end_lsn
with open(os.path.join(basebackup_dir, "backup_manifest")) as f:
manifest = json.load(f)
start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"]
end_lsn = manifest["WAL-Ranges"][0]["End-LSN"]
def import_tar(base, wal):
env.neon_cli.raw_cli(
[
"timeline",
"import",
"--tenant-id",
str(tenant),
"--timeline-id",
str(timeline),
"--node-name",
branch_name,
"--base-lsn",
start_lsn,
"--base-tarfile",
base,
"--end-lsn",
end_lsn,
"--wal-tarfile",
wal,
"--pg-version",
env.pg_version,
]
)
# Importing correct backup works
import_tar(base_tar, wal_tar)
wait_for_last_record_lsn(ps_http, tenant, timeline, Lsn(end_lsn))
endpoint = env.endpoints.create_start(
branch_name,
endpoint_id="ep-import_from_vanilla",
tenant_id=tenant,
config_lines=[
"log_autovacuum_min_duration = 0",
"autovacuum_naptime='5 s'",
],
)
assert endpoint.safe_psql("select count(*) from t") == [(1,)]
conn = endpoint.connect()
cur = conn.cursor()
# Install extension containing function needed for test
cur.execute("CREATE EXTENSION neon_test_utils")
# Advance nextXid to the target XID, which is somewhat above the 2
# billion mark.
while True:
xid = int(query_scalar(cur, "SELECT txid_current()"))
log.info(f"xid now {xid}")
# Consume 10k transactons at a time until we get to 2^31 - 200k
if xid < (2325447052 - 100000):
cur.execute("select test_consume_xids(50000);")
elif xid < 2325447052 - 10000:
cur.execute("select test_consume_xids(5000);")
else:
break
# Run a bunch of real INSERTs to cross over the 2 billion mark
# Use a begin-exception block to have a separate sub-XID for each insert.
cur.execute(
"""
do $$
begin
for i in 1..10000 loop
-- Use a begin-exception block to generate a new subtransaction on each iteration
begin
insert into t values (i);
exception when others then
raise 'not expected %', sqlerrm;
end;
end loop;
end;
$$;
"""
)
# A checkpoint writes a WAL record with xl_xid=0. Many other WAL
# records would have the same effect.
cur.execute("checkpoint")
# Ok, the nextXid in the pageserver at this LSN should now be incorrectly
# set to 1:1024. Remember this LSN.
broken_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_insert_lsn()"))
# Ensure that the broken checkpoint data has reached permanent storage
ps_http.timeline_checkpoint(tenant, timeline)
wait_for_upload(ps_http, tenant, timeline, broken_lsn)
# Now fix the bug, and generate some WAL with XIDs
ps_http.configure_failpoints(("reintroduce-nextxid-update-bug", "off"))
cur.execute("INSERT INTO t VALUES ('after fix')")
fixed_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_insert_lsn()"))
log.info(f"nextXid was broken by {broken_lsn}, and fixed again by {fixed_lsn}")
# Stop the original endpoint, we don't need it anymore.
endpoint.stop()
# Test that we cannot start a new endpoint at the broken LSN.
env.neon_cli.create_branch(
"at-broken-lsn", branch_name, ancestor_start_lsn=broken_lsn, tenant_id=tenant
)
endpoint_broken = env.endpoints.create(
"at-broken-lsn",
endpoint_id="ep-at-broken-lsn",
tenant_id=tenant,
)
with pytest.raises(RuntimeError, match="Postgres exited unexpectedly with code 1"):
endpoint_broken.start()
assert endpoint_broken.log_contains(
'Could not open file "pg_xact/0000": No such file or directory'
)
# But after the bug was fixed, the one-off hack fixed the timeline,
# and a later LSN works.
env.neon_cli.create_branch(
"at-fixed-lsn", branch_name, ancestor_start_lsn=fixed_lsn, tenant_id=tenant
)
endpoint_fixed = env.endpoints.create_start(
"at-fixed-lsn", endpoint_id="ep-at-fixed-lsn", tenant_id=tenant
)
conn = endpoint_fixed.connect()
cur = conn.cursor()
cur.execute("SELECT count(*) from t")
# One "inserted in vanilla" row, 10000 in the DO-loop, and one "after fix" row
assert cur.fetchone() == (1 + 10000 + 1,)

View File

@@ -216,14 +216,8 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
log.info(f"group: {m.group(1)}")
return int(m.group(1), 16)
assert neon_env_builder.pageserver_remote_storage is not None
pre_upgrade_keys = list(
[
o["Key"]
for o in list_prefix(neon_env_builder.pageserver_remote_storage, delimiter="")[
"Contents"
]
]
[o["Key"] for o in list_prefix(neon_env_builder, delimiter="")["Contents"]]
)
for key in pre_upgrade_keys:
assert parse_generation_suffix(key) is None
@@ -238,12 +232,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
legacy_objects: list[str] = []
suffixed_objects = []
post_upgrade_keys = list(
[
o["Key"]
for o in list_prefix(neon_env_builder.pageserver_remote_storage, delimiter="")[
"Contents"
]
]
[o["Key"] for o in list_prefix(neon_env_builder, delimiter="")["Contents"]]
)
for key in post_upgrade_keys:
log.info(f"post-upgrade key: {key}")

View File

@@ -504,7 +504,7 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
tenant_delete_wait_completed(ps_attached.http_client(), tenant_id, 10)
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",

View File

@@ -75,7 +75,7 @@ def test_tenant_delete_smoke(
wait_for_last_flush_lsn(env, endpoint, tenant=tenant_id, timeline=timeline_id)
assert_prefix_not_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -96,7 +96,7 @@ def test_tenant_delete_smoke(
assert not tenant_path.exists()
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -207,7 +207,7 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
assert_prefix_not_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -268,7 +268,7 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
# Check remote is empty
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -304,7 +304,7 @@ def test_tenant_delete_is_resumed_on_attach(
# sanity check, data should be there
assert_prefix_not_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -343,7 +343,7 @@ def test_tenant_delete_is_resumed_on_attach(
)
assert_prefix_not_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -378,7 +378,7 @@ def test_tenant_delete_is_resumed_on_attach(
ps_http.deletion_queue_flush(execute=True)
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -543,7 +543,7 @@ def test_tenant_delete_concurrent(
# Physical deletion should have happened
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -645,7 +645,7 @@ def test_tenant_delete_races_timeline_creation(
# Physical deletion should have happened
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",

View File

@@ -191,7 +191,7 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
last_flush_lsn_upload(env, endpoint, env.initial_tenant, timeline_id)
assert_prefix_not_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -275,7 +275,7 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
# Check remote is empty
if remote_storage_kind is RemoteStorageKind.MOCK_S3:
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -449,7 +449,7 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
assert all([tl["state"] == "Active" for tl in timelines])
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -466,7 +466,7 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
)
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -482,7 +482,7 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
wait_until(
2,
0.5,
lambda: assert_prefix_empty(neon_env_builder.pageserver_remote_storage),
lambda: assert_prefix_empty(neon_env_builder),
)
@@ -673,7 +673,7 @@ def test_timeline_delete_works_for_remote_smoke(
for timeline_id in timeline_ids:
assert_prefix_not_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -690,7 +690,7 @@ def test_timeline_delete_works_for_remote_smoke(
timeline_delete_wait_completed(ps_http, tenant_id=tenant_id, timeline_id=timeline_id)
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -703,7 +703,7 @@ def test_timeline_delete_works_for_remote_smoke(
# for some reason the check above doesnt immediately take effect for the below.
# Assume it is mock server inconsistency and check twice.
wait_until(2, 0.5, lambda: assert_prefix_empty(neon_env_builder.pageserver_remote_storage))
wait_until(2, 0.5, lambda: assert_prefix_empty(neon_env_builder))
def test_delete_orphaned_objects(
@@ -791,7 +791,7 @@ def test_timeline_delete_resumed_on_attach(
last_flush_lsn_upload(env, endpoint, env.initial_tenant, timeline_id)
assert_prefix_not_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -839,7 +839,7 @@ def test_timeline_delete_resumed_on_attach(
assert reason.endswith(f"failpoint: {failpoint}"), reason
assert_prefix_not_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",
@@ -870,7 +870,7 @@ def test_timeline_delete_resumed_on_attach(
assert not tenant_path.exists()
assert_prefix_empty(
neon_env_builder.pageserver_remote_storage,
neon_env_builder,
prefix="/".join(
(
"tenants",

View File

@@ -1,4 +1,3 @@
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn
@@ -118,8 +117,6 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
# Test that the ALL_FROZEN VM bit is cleared correctly at a HEAP_LOCK
# record.
#
# FIXME: This test is broken
@pytest.mark.skip("See https://github.com/neondatabase/neon/pull/6412#issuecomment-1902072541")
def test_vm_bit_clear_on_heap_lock(neon_simple_env: NeonEnv):
env = neon_simple_env

View File

@@ -33,19 +33,13 @@ from fixtures.neon_fixtures import (
last_flush_lsn_upload,
)
from fixtures.pageserver.utils import (
assert_prefix_empty,
assert_prefix_not_empty,
timeline_delete_wait_completed,
wait_for_last_record_lsn,
wait_for_upload,
)
from fixtures.pg_version import PgVersion
from fixtures.port_distributor import PortDistributor
from fixtures.remote_storage import (
RemoteStorageKind,
default_remote_storage,
s3_storage,
)
from fixtures.remote_storage import RemoteStorageKind, default_remote_storage
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import get_dir_size, query_scalar, start_in_background
@@ -124,8 +118,7 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
with env.pageserver.http_client() as pageserver_http:
timeline_details = [
pageserver_http.timeline_detail(
tenant_id=tenant_id,
timeline_id=branch_names_to_timeline_ids[branch_name],
tenant_id=tenant_id, timeline_id=branch_names_to_timeline_ids[branch_name]
)
for branch_name in branch_names
]
@@ -464,19 +457,10 @@ def is_wal_trimmed(sk: Safekeeper, tenant_id: TenantId, timeline_id: TimelineId,
def test_wal_backup(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
remote_storage_kind = s3_storage()
neon_env_builder.enable_safekeeper_remote_storage(remote_storage_kind)
neon_env_builder.enable_safekeeper_remote_storage(default_remote_storage())
env = neon_env_builder.init_start()
# These are expected after timeline deletion on safekeepers.
env.pageserver.allowed_errors.extend(
[
".*Timeline .* was not found in global map.*",
".*Timeline .* was cancelled and cannot be used anymore.*",
]
)
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_safekeepers_wal_backup")
endpoint = env.endpoints.create_start("test_safekeepers_wal_backup")
@@ -504,8 +488,7 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder):
# put one of safekeepers down again
env.safekeepers[0].stop()
# restart postgres
endpoint.stop()
endpoint = env.endpoints.create_start("test_safekeepers_wal_backup")
endpoint.stop_and_destroy().create_start("test_safekeepers_wal_backup")
# and ensure offloading still works
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
@@ -515,17 +498,6 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder):
partial(is_segment_offloaded, env.safekeepers[1], tenant_id, timeline_id, seg_end),
f"segment ending at {seg_end} get offloaded",
)
env.safekeepers[0].start()
endpoint.stop()
# Test that after timeline deletion remote objects are gone.
prefix = "/".join([str(tenant_id), str(timeline_id)])
assert_prefix_not_empty(neon_env_builder.safekeepers_remote_storage, prefix)
for sk in env.safekeepers:
sk_http = sk.http_client()
sk_http.timeline_delete(tenant_id, timeline_id)
assert_prefix_empty(neon_env_builder.safekeepers_remote_storage, prefix)
def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder):
@@ -614,7 +586,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder):
# advancing peer_horizon_lsn.
for sk in env.safekeepers:
cli = sk.http_client()
cli.timeline_delete(tenant_id, timeline_id, only_local=True)
cli.timeline_delete_force(tenant_id, timeline_id)
# restart safekeeper to clear its in-memory state
sk.stop()
# wait all potenital in flight pushes to broker arrive before starting
@@ -1651,7 +1623,7 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
endpoint_3.stop_and_destroy()
# Remove initial tenant's br1 (active)
assert sk_http.timeline_delete(tenant_id, timeline_id_1)["dir_existed"]
assert sk_http.timeline_delete_force(tenant_id, timeline_id_1)["dir_existed"]
assert not (sk_data_dir / str(tenant_id) / str(timeline_id_1)).exists()
assert (sk_data_dir / str(tenant_id) / str(timeline_id_2)).is_dir()
assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).is_dir()
@@ -1659,7 +1631,7 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir()
# Ensure repeated deletion succeeds
assert not sk_http.timeline_delete(tenant_id, timeline_id_1)["dir_existed"]
assert not sk_http.timeline_delete_force(tenant_id, timeline_id_1)["dir_existed"]
assert not (sk_data_dir / str(tenant_id) / str(timeline_id_1)).exists()
assert (sk_data_dir / str(tenant_id) / str(timeline_id_2)).is_dir()
assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).is_dir()
@@ -1670,13 +1642,13 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
# Ensure we cannot delete the other tenant
for sk_h in [sk_http, sk_http_noauth]:
with pytest.raises(sk_h.HTTPError, match="Forbidden|Unauthorized"):
assert sk_h.timeline_delete(tenant_id_other, timeline_id_other)
assert sk_h.timeline_delete_force(tenant_id_other, timeline_id_other)
with pytest.raises(sk_h.HTTPError, match="Forbidden|Unauthorized"):
assert sk_h.tenant_delete_force(tenant_id_other)
assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir()
# Remove initial tenant's br2 (inactive)
assert sk_http.timeline_delete(tenant_id, timeline_id_2)["dir_existed"]
assert sk_http.timeline_delete_force(tenant_id, timeline_id_2)["dir_existed"]
assert not (sk_data_dir / str(tenant_id) / str(timeline_id_1)).exists()
assert not (sk_data_dir / str(tenant_id) / str(timeline_id_2)).exists()
assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).is_dir()
@@ -1684,7 +1656,7 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir()
# Remove non-existing branch, should succeed
assert not sk_http.timeline_delete(tenant_id, TimelineId("00" * 16))["dir_existed"]
assert not sk_http.timeline_delete_force(tenant_id, TimelineId("00" * 16))["dir_existed"]
assert not (sk_data_dir / str(tenant_id) / str(timeline_id_1)).exists()
assert not (sk_data_dir / str(tenant_id) / str(timeline_id_2)).exists()
assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).exists()

View File

@@ -6,7 +6,7 @@ commands:
sysvInitAction: sysinit
shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
- name: pgbouncer
user: postgres
user: nobody
sysvInitAction: respawn
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
- name: postgres-exporter
@@ -36,9 +36,7 @@ files:
max_client_conn=10000
default_pool_size=64
max_prepared_statements=0
admin_users=postgres
unix_socket_dir=/tmp/
unix_socket_mode=0777
admin_users=cloud_admin
- filename: cgconfig.conf
content: |
# Configuration for cgroups in VM compute nodes
@@ -200,7 +198,7 @@ merge: |
RUN set -e \
&& chown postgres:postgres /etc/pgbouncer.ini \
&& chmod 0666 /etc/pgbouncer.ini \
&& chmod 0644 /etc/pgbouncer.ini \
&& chmod 0644 /etc/cgconfig.conf \
&& chmod 0644 /etc/sql_exporter.yml \
&& chmod 0644 /etc/neon_collector.yml