mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-03 13:30:38 +00:00
Compare commits
42 Commits
release-85
...
release-co
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cd3e23b326 | ||
|
|
ac993b9dcd | ||
|
|
1c5c25156a | ||
|
|
e11c4ca77f | ||
|
|
b5deda3e08 | ||
|
|
e6b7589ba4 | ||
|
|
d57a924254 | ||
|
|
38af7f8db7 | ||
|
|
fe4762a9aa | ||
|
|
817ec9979f | ||
|
|
dbf160dc60 | ||
|
|
0d1586bab7 | ||
|
|
aa5c6c9bdd | ||
|
|
c7daf2b1e3 | ||
|
|
b347438d0e | ||
|
|
81a35b67e2 | ||
|
|
ed21175591 | ||
|
|
9985dfd26a | ||
|
|
a009203816 | ||
|
|
8542507ee5 | ||
|
|
a3e9140788 | ||
|
|
0d3f7a2b82 | ||
|
|
bcfc633bfa | ||
|
|
33e5930c97 | ||
|
|
fff386261d | ||
|
|
723f9ad3ee | ||
|
|
2b7243bd37 | ||
|
|
81367a6bbc | ||
|
|
156c18e1ad | ||
|
|
ffc1a81b83 | ||
|
|
dd04e3eb11 | ||
|
|
6d9846a9e5 | ||
|
|
3cd601b370 | ||
|
|
44ef8c884f | ||
|
|
c68b3464da | ||
|
|
045b05cd1b | ||
|
|
6a4d8ec410 | ||
|
|
f23390cf0e | ||
|
|
ebc313c768 | ||
|
|
441517dd7c | ||
|
|
31bd2dcdb4 | ||
|
|
6292d93867 |
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -1303,7 +1303,6 @@ dependencies = [
|
||||
"futures",
|
||||
"http 1.1.0",
|
||||
"indexmap 2.0.1",
|
||||
"itertools 0.10.5",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"nix 0.27.1",
|
||||
|
||||
@@ -28,7 +28,6 @@ flate2.workspace = true
|
||||
futures.workspace = true
|
||||
http.workspace = true
|
||||
indexmap.workspace = true
|
||||
itertools.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
metrics.workspace = true
|
||||
nix.workspace = true
|
||||
|
||||
@@ -60,16 +60,12 @@ use utils::failpoint_support;
|
||||
// Compatibility hack: if the control plane specified any remote-ext-config
|
||||
// use the default value for extension storage proxy gateway.
|
||||
// Remove this once the control plane is updated to pass the gateway URL
|
||||
fn parse_remote_ext_base_url(arg: &str) -> Result<String> {
|
||||
const FALLBACK_PG_EXT_GATEWAY_BASE_URL: &str =
|
||||
"http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local";
|
||||
|
||||
Ok(if arg.starts_with("http") {
|
||||
arg
|
||||
fn parse_remote_ext_config(arg: &str) -> Result<String> {
|
||||
if arg.starts_with("http") {
|
||||
Ok(arg.trim_end_matches('/').to_string())
|
||||
} else {
|
||||
FALLBACK_PG_EXT_GATEWAY_BASE_URL
|
||||
Ok("http://pg-ext-s3-gateway".to_string())
|
||||
}
|
||||
.to_owned())
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -78,10 +74,8 @@ struct Cli {
|
||||
#[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
|
||||
pub pgbin: String,
|
||||
|
||||
/// The base URL for the remote extension storage proxy gateway.
|
||||
/// Should be in the form of `http(s)://<gateway-hostname>[:<port>]`.
|
||||
#[arg(short = 'r', long, value_parser = parse_remote_ext_base_url, alias = "remote-ext-config")]
|
||||
pub remote_ext_base_url: Option<String>,
|
||||
#[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
|
||||
pub remote_ext_config: Option<String>,
|
||||
|
||||
/// The port to bind the external listening HTTP server to. Clients running
|
||||
/// outside the compute will talk to the compute through this port. Keep
|
||||
@@ -170,7 +164,7 @@ fn main() -> Result<()> {
|
||||
pgversion: get_pg_version_string(&cli.pgbin),
|
||||
external_http_port: cli.external_http_port,
|
||||
internal_http_port: cli.internal_http_port,
|
||||
remote_ext_base_url: cli.remote_ext_base_url.clone(),
|
||||
ext_remote_storage: cli.remote_ext_config.clone(),
|
||||
resize_swap_on_bind: cli.resize_swap_on_bind,
|
||||
set_disk_quota_for_fs: cli.set_disk_quota_for_fs,
|
||||
#[cfg(target_os = "linux")]
|
||||
@@ -271,18 +265,4 @@ mod test {
|
||||
fn verify_cli() {
|
||||
Cli::command().debug_assert()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_pg_ext_gateway_base_url() {
|
||||
let arg = "http://pg-ext-s3-gateway2";
|
||||
let result = super::parse_remote_ext_base_url(arg).unwrap();
|
||||
assert_eq!(result, arg);
|
||||
|
||||
let arg = "pg-ext-s3-gateway";
|
||||
let result = super::parse_remote_ext_base_url(arg).unwrap();
|
||||
assert_eq!(
|
||||
result,
|
||||
"http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -348,7 +348,6 @@ async fn run_dump_restore(
|
||||
"--no-security-labels".to_string(),
|
||||
"--no-subscriptions".to_string(),
|
||||
"--no-tablespaces".to_string(),
|
||||
"--no-event-triggers".to_string(),
|
||||
// format
|
||||
"--format".to_string(),
|
||||
"directory".to_string(),
|
||||
|
||||
@@ -11,7 +11,6 @@ use compute_api::spec::{
|
||||
use futures::StreamExt;
|
||||
use futures::future::join_all;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use itertools::Itertools;
|
||||
use nix::sys::signal::{Signal, kill};
|
||||
use nix::unistd::Pid;
|
||||
use once_cell::sync::Lazy;
|
||||
@@ -19,7 +18,7 @@ use postgres;
|
||||
use postgres::NoTls;
|
||||
use postgres::error::SqlState;
|
||||
use remote_storage::{DownloadError, RemotePath};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::os::unix::fs::{PermissionsExt, symlink};
|
||||
use std::path::Path;
|
||||
@@ -96,7 +95,7 @@ pub struct ComputeNodeParams {
|
||||
pub internal_http_port: u16,
|
||||
|
||||
/// the address of extension storage proxy gateway
|
||||
pub remote_ext_base_url: Option<String>,
|
||||
pub ext_remote_storage: Option<String>,
|
||||
}
|
||||
|
||||
/// Compute node info shared across several `compute_ctl` threads.
|
||||
@@ -330,39 +329,11 @@ struct StartVmMonitorResult {
|
||||
impl ComputeNode {
|
||||
pub fn new(params: ComputeNodeParams, config: ComputeConfig) -> Result<Self> {
|
||||
let connstr = params.connstr.as_str();
|
||||
let mut conn_conf = postgres::config::Config::from_str(connstr)
|
||||
let conn_conf = postgres::config::Config::from_str(connstr)
|
||||
.context("cannot build postgres config from connstr")?;
|
||||
let mut tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr)
|
||||
let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr)
|
||||
.context("cannot build tokio postgres config from connstr")?;
|
||||
|
||||
// Users can set some configuration parameters per database with
|
||||
// ALTER DATABASE ... SET ...
|
||||
//
|
||||
// There are at least these parameters:
|
||||
//
|
||||
// - role=some_other_role
|
||||
// - default_transaction_read_only=on
|
||||
// - statement_timeout=1, i.e., 1ms, which will cause most of the queries to fail
|
||||
// - search_path=non_public_schema, this should be actually safe because
|
||||
// we don't call any functions in user databases, but better to always reset
|
||||
// it to public.
|
||||
//
|
||||
// that can affect `compute_ctl` and prevent it from properly configuring the database schema.
|
||||
// Unset them via connection string options before connecting to the database.
|
||||
// N.B. keep it in sync with `ZENITH_OPTIONS` in `get_maintenance_client()`.
|
||||
//
|
||||
// TODO(ololobus): we currently pass `-c default_transaction_read_only=off` from control plane
|
||||
// as well. After rolling out this code, we can remove this parameter from control plane.
|
||||
// In the meantime, double-passing is fine, the last value is applied.
|
||||
// See: <https://github.com/neondatabase/cloud/blob/133dd8c4dbbba40edfbad475bf6a45073ca63faf/goapp/controlplane/internal/pkg/compute/provisioner/provisioner_common.go#L70>
|
||||
const EXTRA_OPTIONS: &str = "-c role=cloud_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0";
|
||||
let options = match conn_conf.get_options() {
|
||||
Some(options) => format!("{} {}", options, EXTRA_OPTIONS),
|
||||
None => EXTRA_OPTIONS.to_string(),
|
||||
};
|
||||
conn_conf.options(&options);
|
||||
tokio_conn_conf.options(&options);
|
||||
|
||||
let mut new_state = ComputeState::new();
|
||||
if let Some(spec) = config.spec {
|
||||
let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
|
||||
@@ -1478,20 +1449,15 @@ impl ComputeNode {
|
||||
Err(e) => match e.code() {
|
||||
Some(&SqlState::INVALID_PASSWORD)
|
||||
| Some(&SqlState::INVALID_AUTHORIZATION_SPECIFICATION) => {
|
||||
// Connect with `zenith_admin` if `cloud_admin` could not authenticate
|
||||
// Connect with zenith_admin if cloud_admin could not authenticate
|
||||
info!(
|
||||
"cannot connect to Postgres: {}, retrying with 'zenith_admin' username",
|
||||
"cannot connect to postgres: {}, retrying with `zenith_admin` username",
|
||||
e
|
||||
);
|
||||
let mut zenith_admin_conf = postgres::config::Config::from(conf.clone());
|
||||
zenith_admin_conf.application_name("compute_ctl:apply_config");
|
||||
zenith_admin_conf.user("zenith_admin");
|
||||
|
||||
// It doesn't matter what were the options before, here we just want
|
||||
// to connect and create a new superuser role.
|
||||
const ZENITH_OPTIONS: &str = "-c role=zenith_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0";
|
||||
zenith_admin_conf.options(ZENITH_OPTIONS);
|
||||
|
||||
let mut client =
|
||||
zenith_admin_conf.connect(NoTls)
|
||||
.context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?;
|
||||
@@ -1657,7 +1623,9 @@ impl ComputeNode {
|
||||
self.pg_reload_conf()?;
|
||||
|
||||
if spec.mode == ComputeMode::Primary {
|
||||
let conf = self.get_tokio_conn_conf(Some("compute_ctl:reconfigure"));
|
||||
let mut conf =
|
||||
tokio_postgres::Config::from_str(self.params.connstr.as_str()).unwrap();
|
||||
conf.application_name("apply_config");
|
||||
let conf = Arc::new(conf);
|
||||
|
||||
let spec = Arc::new(spec.clone());
|
||||
@@ -1897,9 +1865,9 @@ LIMIT 100",
|
||||
real_ext_name: String,
|
||||
ext_path: RemotePath,
|
||||
) -> Result<u64, DownloadError> {
|
||||
let remote_ext_base_url =
|
||||
let ext_remote_storage =
|
||||
self.params
|
||||
.remote_ext_base_url
|
||||
.ext_remote_storage
|
||||
.as_ref()
|
||||
.ok_or(DownloadError::BadInput(anyhow::anyhow!(
|
||||
"Remote extensions storage is not configured",
|
||||
@@ -1961,7 +1929,7 @@ LIMIT 100",
|
||||
let download_size = extension_server::download_extension(
|
||||
&real_ext_name,
|
||||
&ext_path,
|
||||
remote_ext_base_url,
|
||||
ext_remote_storage,
|
||||
&self.params.pgbin,
|
||||
)
|
||||
.await
|
||||
@@ -1996,40 +1964,23 @@ LIMIT 100",
|
||||
tokio::spawn(conn);
|
||||
|
||||
// TODO: support other types of grants apart from schemas?
|
||||
|
||||
// check the role grants first - to gracefully handle read-replicas.
|
||||
let select = "SELECT privilege_type
|
||||
FROM pg_namespace
|
||||
JOIN LATERAL (SELECT * FROM aclexplode(nspacl) AS x) acl ON true
|
||||
JOIN pg_user users ON acl.grantee = users.usesysid
|
||||
WHERE users.usename = $1
|
||||
AND nspname = $2";
|
||||
let rows = db_client
|
||||
.query(select, &[role_name, schema_name])
|
||||
.await
|
||||
.with_context(|| format!("Failed to execute query: {select}"))?;
|
||||
|
||||
let already_granted: HashSet<String> = rows.into_iter().map(|row| row.get(0)).collect();
|
||||
|
||||
let grants = privileges
|
||||
.iter()
|
||||
.filter(|p| !already_granted.contains(p.as_str()))
|
||||
// should not be quoted as it's part of the command.
|
||||
// is already sanitized so it's ok
|
||||
.map(|p| p.as_str())
|
||||
.join(", ");
|
||||
|
||||
if !grants.is_empty() {
|
||||
let query = format!(
|
||||
"GRANT {} ON SCHEMA {} TO {}",
|
||||
privileges
|
||||
.iter()
|
||||
// should not be quoted as it's part of the command.
|
||||
// is already sanitized so it's ok
|
||||
.map(|p| p.as_str())
|
||||
.collect::<Vec<&'static str>>()
|
||||
.join(", "),
|
||||
// quote the schema and role name as identifiers to sanitize them.
|
||||
let schema_name = schema_name.pg_quote();
|
||||
let role_name = role_name.pg_quote();
|
||||
|
||||
let query = format!("GRANT {grants} ON SCHEMA {schema_name} TO {role_name}",);
|
||||
db_client
|
||||
.simple_query(&query)
|
||||
.await
|
||||
.with_context(|| format!("Failed to execute query: {}", query))?;
|
||||
}
|
||||
schema_name.pg_quote(),
|
||||
role_name.pg_quote(),
|
||||
);
|
||||
db_client
|
||||
.simple_query(&query)
|
||||
.await
|
||||
.with_context(|| format!("Failed to execute query: {}", query))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -2087,7 +2038,7 @@ LIMIT 100",
|
||||
&self,
|
||||
spec: &ComputeSpec,
|
||||
) -> Result<RemoteExtensionMetrics> {
|
||||
if self.params.remote_ext_base_url.is_none() {
|
||||
if self.params.ext_remote_storage.is_none() {
|
||||
return Ok(RemoteExtensionMetrics {
|
||||
num_ext_downloaded: 0,
|
||||
largest_ext_size: 0,
|
||||
|
||||
@@ -158,14 +158,14 @@ fn parse_pg_version(human_version: &str) -> PostgresMajorVersion {
|
||||
pub async fn download_extension(
|
||||
ext_name: &str,
|
||||
ext_path: &RemotePath,
|
||||
remote_ext_base_url: &str,
|
||||
ext_remote_storage: &str,
|
||||
pgbin: &str,
|
||||
) -> Result<u64> {
|
||||
info!("Download extension {:?} from {:?}", ext_name, ext_path);
|
||||
|
||||
// TODO add retry logic
|
||||
let download_buffer =
|
||||
match download_extension_tar(remote_ext_base_url, &ext_path.to_string()).await {
|
||||
match download_extension_tar(ext_remote_storage, &ext_path.to_string()).await {
|
||||
Ok(buffer) => buffer,
|
||||
Err(error_message) => {
|
||||
return Err(anyhow::anyhow!(
|
||||
@@ -272,8 +272,8 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
|
||||
// Do request to extension storage proxy, e.g.,
|
||||
// curl http://pg-ext-s3-gateway/latest/v15/extensions/anon.tar.zst
|
||||
// using HTTP GET and return the response body as bytes.
|
||||
async fn download_extension_tar(remote_ext_base_url: &str, ext_path: &str) -> Result<Bytes> {
|
||||
let uri = format!("{}/{}", remote_ext_base_url, ext_path);
|
||||
async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Result<Bytes> {
|
||||
let uri = format!("{}/{}", ext_remote_storage, ext_path);
|
||||
let filename = Path::new(ext_path)
|
||||
.file_name()
|
||||
.unwrap_or_else(|| std::ffi::OsStr::new("unknown"))
|
||||
|
||||
@@ -22,7 +22,7 @@ pub(in crate::http) async fn download_extension(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
) -> Response {
|
||||
// Don't even try to download extensions if no remote storage is configured
|
||||
if compute.params.remote_ext_base_url.is_none() {
|
||||
if compute.params.ext_remote_storage.is_none() {
|
||||
return JsonResponse::error(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
"remote storage is not configured",
|
||||
|
||||
@@ -644,10 +644,9 @@ struct EndpointStartCmdArgs {
|
||||
|
||||
#[clap(
|
||||
long,
|
||||
help = "Configure the remote extensions storage proxy gateway URL to request for extensions.",
|
||||
alias = "remote-ext-config"
|
||||
help = "Configure the remote extensions storage proxy gateway to request for extensions."
|
||||
)]
|
||||
remote_ext_base_url: Option<String>,
|
||||
remote_ext_config: Option<String>,
|
||||
|
||||
#[clap(
|
||||
long,
|
||||
@@ -1415,16 +1414,9 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
EndpointCmd::Start(args) => {
|
||||
let endpoint_id = &args.endpoint_id;
|
||||
let pageserver_id = args.endpoint_pageserver_id;
|
||||
let remote_ext_base_url = &args.remote_ext_base_url;
|
||||
let remote_ext_config = &args.remote_ext_config;
|
||||
|
||||
let default_generation = env
|
||||
.storage_controller
|
||||
.timelines_onto_safekeepers
|
||||
.then_some(1);
|
||||
let safekeepers_generation = args
|
||||
.safekeepers_generation
|
||||
.or(default_generation)
|
||||
.map(SafekeeperGeneration::new);
|
||||
let safekeepers_generation = args.safekeepers_generation.map(SafekeeperGeneration::new);
|
||||
// If --safekeepers argument is given, use only the listed
|
||||
// safekeeper nodes; otherwise all from the env.
|
||||
let safekeepers = if let Some(safekeepers) = parse_safekeepers(&args.safekeepers)? {
|
||||
@@ -1518,7 +1510,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
safekeepers_generation,
|
||||
safekeepers,
|
||||
pageservers,
|
||||
remote_ext_base_url.as_ref(),
|
||||
remote_ext_config.as_ref(),
|
||||
stripe_size.0 as usize,
|
||||
args.create_test_user,
|
||||
args.start_timeout,
|
||||
|
||||
@@ -655,7 +655,7 @@ impl Endpoint {
|
||||
safekeepers_generation: Option<SafekeeperGeneration>,
|
||||
safekeepers: Vec<NodeId>,
|
||||
pageservers: Vec<(Host, u16)>,
|
||||
remote_ext_base_url: Option<&String>,
|
||||
remote_ext_config: Option<&String>,
|
||||
shard_stripe_size: usize,
|
||||
create_test_user: bool,
|
||||
start_timeout: Duration,
|
||||
@@ -825,8 +825,8 @@ impl Endpoint {
|
||||
.stderr(logfile.try_clone()?)
|
||||
.stdout(logfile);
|
||||
|
||||
if let Some(remote_ext_base_url) = remote_ext_base_url {
|
||||
cmd.args(["--remote-ext-base-url", remote_ext_base_url]);
|
||||
if let Some(remote_ext_config) = remote_ext_config {
|
||||
cmd.args(["--remote-ext-config", remote_ext_config]);
|
||||
}
|
||||
|
||||
let child = cmd.spawn()?;
|
||||
|
||||
@@ -10,8 +10,7 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use hyper0::Uri;
|
||||
use nix::unistd::Pid;
|
||||
use pageserver_api::controller_api::{
|
||||
NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest,
|
||||
SafekeeperSchedulingPolicyRequest, SkSchedulingPolicy, TenantCreateRequest,
|
||||
NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest,
|
||||
TenantCreateResponse, TenantLocateResponse,
|
||||
};
|
||||
use pageserver_api::models::{
|
||||
@@ -21,7 +20,7 @@ use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
|
||||
use pem::Pem;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::{Method, Response};
|
||||
use reqwest::Method;
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::process::Command;
|
||||
@@ -571,11 +570,6 @@ impl StorageController {
|
||||
let peer_jwt_token = encode_from_key_file(&peer_claims, private_key)
|
||||
.expect("failed to generate jwt token");
|
||||
args.push(format!("--peer-jwt-token={peer_jwt_token}"));
|
||||
|
||||
let claims = Claims::new(None, Scope::SafekeeperData);
|
||||
let jwt_token =
|
||||
encode_from_key_file(&claims, private_key).expect("failed to generate jwt token");
|
||||
args.push(format!("--safekeeper-jwt-token={jwt_token}"));
|
||||
}
|
||||
|
||||
if let Some(public_key) = &self.public_key {
|
||||
@@ -620,10 +614,6 @@ impl StorageController {
|
||||
self.env.base_data_dir.display()
|
||||
));
|
||||
|
||||
if self.env.safekeepers.iter().any(|sk| sk.auth_enabled) && self.private_key.is_none() {
|
||||
anyhow::bail!("Safekeeper set up for auth but no private key specified");
|
||||
}
|
||||
|
||||
if self.config.timelines_onto_safekeepers {
|
||||
args.push("--timelines-onto-safekeepers".to_string());
|
||||
}
|
||||
@@ -650,10 +640,6 @@ impl StorageController {
|
||||
)
|
||||
.await?;
|
||||
|
||||
if self.config.timelines_onto_safekeepers {
|
||||
self.register_safekeepers().await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -757,23 +743,6 @@ impl StorageController {
|
||||
where
|
||||
RQ: Serialize + Sized,
|
||||
RS: DeserializeOwned + Sized,
|
||||
{
|
||||
let response = self.dispatch_inner(method, path, body).await?;
|
||||
Ok(response
|
||||
.json()
|
||||
.await
|
||||
.map_err(pageserver_client::mgmt_api::Error::ReceiveBody)?)
|
||||
}
|
||||
|
||||
/// Simple HTTP request wrapper for calling into storage controller
|
||||
async fn dispatch_inner<RQ>(
|
||||
&self,
|
||||
method: reqwest::Method,
|
||||
path: String,
|
||||
body: Option<RQ>,
|
||||
) -> anyhow::Result<Response>
|
||||
where
|
||||
RQ: Serialize + Sized,
|
||||
{
|
||||
// In the special case of the `storage_controller start` subcommand, we wish
|
||||
// to use the API endpoint of the newly started storage controller in order
|
||||
@@ -816,31 +785,10 @@ impl StorageController {
|
||||
let response = builder.send().await?;
|
||||
let response = response.error_from_body().await?;
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// Register the safekeepers in the storage controller
|
||||
#[instrument(skip(self))]
|
||||
async fn register_safekeepers(&self) -> anyhow::Result<()> {
|
||||
for sk in self.env.safekeepers.iter() {
|
||||
let sk_id = sk.id;
|
||||
let body = serde_json::json!({
|
||||
"id": sk_id,
|
||||
"created_at": "2023-10-25T09:11:25Z",
|
||||
"updated_at": "2024-08-28T11:32:43Z",
|
||||
"region_id": "aws-us-east-2",
|
||||
"host": "127.0.0.1",
|
||||
"port": sk.pg_port,
|
||||
"http_port": sk.http_port,
|
||||
"https_port": sk.https_port,
|
||||
"version": 5957,
|
||||
"availability_zone_id": format!("us-east-2b-{sk_id}"),
|
||||
});
|
||||
self.upsert_safekeeper(sk_id, body).await?;
|
||||
self.safekeeper_scheduling_policy(sk_id, SkSchedulingPolicy::Active)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
Ok(response
|
||||
.json()
|
||||
.await
|
||||
.map_err(pageserver_client::mgmt_api::Error::ReceiveBody)?)
|
||||
}
|
||||
|
||||
/// Call into the attach_hook API, for use before handing out attachments to pageservers
|
||||
@@ -868,42 +816,6 @@ impl StorageController {
|
||||
Ok(response.generation)
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub async fn upsert_safekeeper(
|
||||
&self,
|
||||
node_id: NodeId,
|
||||
request: serde_json::Value,
|
||||
) -> anyhow::Result<()> {
|
||||
let resp = self
|
||||
.dispatch_inner::<serde_json::Value>(
|
||||
Method::POST,
|
||||
format!("control/v1/safekeeper/{node_id}"),
|
||||
Some(request),
|
||||
)
|
||||
.await?;
|
||||
if !resp.status().is_success() {
|
||||
anyhow::bail!(
|
||||
"setting scheduling policy unsuccessful for safekeeper {node_id}: {}",
|
||||
resp.status()
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub async fn safekeeper_scheduling_policy(
|
||||
&self,
|
||||
node_id: NodeId,
|
||||
scheduling_policy: SkSchedulingPolicy,
|
||||
) -> anyhow::Result<()> {
|
||||
self.dispatch::<SafekeeperSchedulingPolicyRequest, ()>(
|
||||
Method::POST,
|
||||
format!("control/v1/safekeeper/{node_id}/scheduling_policy"),
|
||||
Some(SafekeeperSchedulingPolicyRequest { scheduling_policy }),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub async fn inspect(
|
||||
&self,
|
||||
|
||||
@@ -182,7 +182,6 @@ pub struct ConfigToml {
|
||||
pub tracing: Option<Tracing>,
|
||||
pub enable_tls_page_service_api: bool,
|
||||
pub dev_mode: bool,
|
||||
pub timeline_import_config: TimelineImportConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
@@ -301,12 +300,6 @@ impl From<OtelExporterProtocol> for tracing_utils::Protocol {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct TimelineImportConfig {
|
||||
pub import_job_concurrency: NonZeroUsize,
|
||||
pub import_job_soft_size_limit: NonZeroUsize,
|
||||
}
|
||||
|
||||
pub mod statvfs {
|
||||
pub mod mock {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
@@ -666,10 +659,6 @@ impl Default for ConfigToml {
|
||||
tracing: None,
|
||||
enable_tls_page_service_api: false,
|
||||
dev_mode: false,
|
||||
timeline_import_config: TimelineImportConfig {
|
||||
import_job_concurrency: NonZeroUsize::new(128).unwrap(),
|
||||
import_job_soft_size_limit: NonZeroUsize::new(1024 * 1024 * 1024).unwrap(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ impl std::fmt::Display for RateLimitStats {
|
||||
}
|
||||
|
||||
impl RateLimit {
|
||||
pub const fn new(interval: Duration) -> Self {
|
||||
pub fn new(interval: Duration) -> Self {
|
||||
Self {
|
||||
last: None,
|
||||
interval,
|
||||
|
||||
@@ -230,8 +230,6 @@ pub struct PageServerConf {
|
||||
/// such as authentication requirements for HTTP and PostgreSQL APIs.
|
||||
/// This is insecure and should only be used in development environments.
|
||||
pub dev_mode: bool,
|
||||
|
||||
pub timeline_import_config: pageserver_api::config::TimelineImportConfig,
|
||||
}
|
||||
|
||||
/// Token for authentication to safekeepers
|
||||
@@ -406,7 +404,6 @@ impl PageServerConf {
|
||||
tracing,
|
||||
enable_tls_page_service_api,
|
||||
dev_mode,
|
||||
timeline_import_config,
|
||||
} = config_toml;
|
||||
|
||||
let mut conf = PageServerConf {
|
||||
@@ -460,7 +457,6 @@ impl PageServerConf {
|
||||
tracing,
|
||||
enable_tls_page_service_api,
|
||||
dev_mode,
|
||||
timeline_import_config,
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// fields that require additional validation or custom handling
|
||||
|
||||
@@ -94,23 +94,10 @@ impl Header {
|
||||
pub enum WriteBlobError {
|
||||
#[error(transparent)]
|
||||
Flush(FlushTaskError),
|
||||
#[error("blob too large ({len} bytes)")]
|
||||
BlobTooLarge { len: usize },
|
||||
#[error(transparent)]
|
||||
Other(anyhow::Error),
|
||||
}
|
||||
|
||||
impl WriteBlobError {
|
||||
pub fn is_cancel(&self) -> bool {
|
||||
match self {
|
||||
WriteBlobError::Flush(e) => e.is_cancel(),
|
||||
WriteBlobError::Other(_) => false,
|
||||
}
|
||||
}
|
||||
pub fn into_anyhow(self) -> anyhow::Error {
|
||||
match self {
|
||||
WriteBlobError::Flush(e) => e.into_anyhow(),
|
||||
WriteBlobError::Other(e) => e,
|
||||
}
|
||||
}
|
||||
WriteBlobRaw(anyhow::Error),
|
||||
}
|
||||
|
||||
impl BlockCursor<'_> {
|
||||
@@ -340,9 +327,7 @@ where
|
||||
return (
|
||||
(
|
||||
io_buf.slice_len(),
|
||||
Err(WriteBlobError::Other(anyhow::anyhow!(
|
||||
"blob too large ({len} bytes)"
|
||||
))),
|
||||
Err(WriteBlobError::BlobTooLarge { len }),
|
||||
),
|
||||
srcbuf,
|
||||
);
|
||||
@@ -406,7 +391,7 @@ where
|
||||
// Verify the header, to ensure we don't write invalid/corrupt data.
|
||||
let header = match Header::decode(&raw_with_header)
|
||||
.context("decoding blob header")
|
||||
.map_err(WriteBlobError::Other)
|
||||
.map_err(WriteBlobError::WriteBlobRaw)
|
||||
{
|
||||
Ok(header) => header,
|
||||
Err(err) => return (raw_with_header, Err(err)),
|
||||
@@ -416,7 +401,7 @@ where
|
||||
let raw_len = raw_with_header.len();
|
||||
return (
|
||||
raw_with_header,
|
||||
Err(WriteBlobError::Other(anyhow::anyhow!(
|
||||
Err(WriteBlobError::WriteBlobRaw(anyhow::anyhow!(
|
||||
"header length mismatch: {header_total_len} != {raw_len}"
|
||||
))),
|
||||
);
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
pub mod batch_split_writer;
|
||||
pub mod delta_layer;
|
||||
pub mod errors;
|
||||
pub mod filter_iterator;
|
||||
pub mod image_layer;
|
||||
pub mod inmemory_layer;
|
||||
|
||||
@@ -10,7 +10,6 @@ use utils::id::TimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
use utils::shard::TenantShardId;
|
||||
|
||||
use super::errors::PutError;
|
||||
use super::layer::S3_UPLOAD_LIMIT;
|
||||
use super::{
|
||||
DeltaLayerWriter, ImageLayerWriter, PersistentLayerDesc, PersistentLayerKey, ResidentLayer,
|
||||
@@ -236,7 +235,7 @@ impl<'a> SplitImageLayerWriter<'a> {
|
||||
key: Key,
|
||||
img: Bytes,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), PutError> {
|
||||
) -> anyhow::Result<()> {
|
||||
// The current estimation is an upper bound of the space that the key/image could take
|
||||
// because we did not consider compression in this estimation. The resulting image layer
|
||||
// could be smaller than the target size.
|
||||
@@ -254,8 +253,7 @@ impl<'a> SplitImageLayerWriter<'a> {
|
||||
self.cancel.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.map_err(PutError::Other)?;
|
||||
.await?;
|
||||
let prev_image_writer = std::mem::replace(&mut self.inner, next_image_writer);
|
||||
self.batches.add_unfinished_image_writer(
|
||||
prev_image_writer,
|
||||
@@ -348,7 +346,7 @@ impl<'a> SplitDeltaLayerWriter<'a> {
|
||||
lsn: Lsn,
|
||||
val: Value,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), PutError> {
|
||||
) -> anyhow::Result<()> {
|
||||
// The current estimation is key size plus LSN size plus value size estimation. This is not an accurate
|
||||
// number, and therefore the final layer size could be a little bit larger or smaller than the target.
|
||||
//
|
||||
@@ -368,8 +366,7 @@ impl<'a> SplitDeltaLayerWriter<'a> {
|
||||
self.cancel.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.map_err(PutError::Other)?,
|
||||
.await?,
|
||||
));
|
||||
}
|
||||
let (_, inner) = self.inner.as_mut().unwrap();
|
||||
@@ -389,8 +386,7 @@ impl<'a> SplitDeltaLayerWriter<'a> {
|
||||
self.cancel.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.map_err(PutError::Other)?;
|
||||
.await?;
|
||||
let (start_key, prev_delta_writer) =
|
||||
self.inner.replace((key, next_delta_writer)).unwrap();
|
||||
self.batches.add_unfinished_delta_writer(
|
||||
@@ -400,11 +396,11 @@ impl<'a> SplitDeltaLayerWriter<'a> {
|
||||
);
|
||||
} else if inner.estimated_size() >= S3_UPLOAD_LIMIT {
|
||||
// We have to produce a very large file b/c a key is updated too often.
|
||||
return Err(PutError::Other(anyhow::anyhow!(
|
||||
anyhow::bail!(
|
||||
"a single key is updated too often: key={}, estimated_size={}, and the layer file cannot be produced",
|
||||
key,
|
||||
inner.estimated_size()
|
||||
)));
|
||||
);
|
||||
}
|
||||
}
|
||||
self.last_key_written = key;
|
||||
|
||||
@@ -55,7 +55,6 @@ use utils::bin_ser::SerializeError;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use super::errors::PutError;
|
||||
use super::{
|
||||
AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
|
||||
ValuesReconstructState,
|
||||
@@ -478,15 +477,12 @@ impl DeltaLayerWriterInner {
|
||||
lsn: Lsn,
|
||||
val: Value,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), PutError> {
|
||||
) -> anyhow::Result<()> {
|
||||
let (_, res) = self
|
||||
.put_value_bytes(
|
||||
key,
|
||||
lsn,
|
||||
Value::ser(&val)
|
||||
.map_err(anyhow::Error::new)
|
||||
.map_err(PutError::Other)?
|
||||
.slice_len(),
|
||||
Value::ser(&val)?.slice_len(),
|
||||
val.will_init(),
|
||||
ctx,
|
||||
)
|
||||
@@ -501,7 +497,7 @@ impl DeltaLayerWriterInner {
|
||||
val: FullSlice<Buf>,
|
||||
will_init: bool,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<(), PutError>)
|
||||
) -> (FullSlice<Buf>, anyhow::Result<()>)
|
||||
where
|
||||
Buf: IoBuf + Send,
|
||||
{
|
||||
@@ -517,24 +513,19 @@ impl DeltaLayerWriterInner {
|
||||
.blob_writer
|
||||
.write_blob_maybe_compressed(val, ctx, compression)
|
||||
.await;
|
||||
let res = res.map_err(PutError::WriteBlob);
|
||||
let off = match res {
|
||||
Ok((off, _)) => off,
|
||||
Err(e) => return (val, Err(e)),
|
||||
Err(e) => return (val, Err(anyhow::anyhow!(e))),
|
||||
};
|
||||
|
||||
let blob_ref = BlobRef::new(off, will_init);
|
||||
|
||||
let delta_key = DeltaKey::from_key_lsn(&key, lsn);
|
||||
let res = self
|
||||
.tree
|
||||
.append(&delta_key.0, blob_ref.0)
|
||||
.map_err(anyhow::Error::new)
|
||||
.map_err(PutError::Other);
|
||||
let res = self.tree.append(&delta_key.0, blob_ref.0);
|
||||
|
||||
self.num_keys += 1;
|
||||
|
||||
(val, res)
|
||||
(val, res.map_err(|e| anyhow::anyhow!(e)))
|
||||
}
|
||||
|
||||
fn size(&self) -> u64 {
|
||||
@@ -703,7 +694,7 @@ impl DeltaLayerWriter {
|
||||
lsn: Lsn,
|
||||
val: Value,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), PutError> {
|
||||
) -> anyhow::Result<()> {
|
||||
self.inner
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
@@ -718,7 +709,7 @@ impl DeltaLayerWriter {
|
||||
val: FullSlice<Buf>,
|
||||
will_init: bool,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<(), PutError>)
|
||||
) -> (FullSlice<Buf>, anyhow::Result<()>)
|
||||
where
|
||||
Buf: IoBuf + Send,
|
||||
{
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
use crate::tenant::blob_io::WriteBlobError;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum PutError {
|
||||
#[error(transparent)]
|
||||
WriteBlob(WriteBlobError),
|
||||
#[error(transparent)]
|
||||
Other(anyhow::Error),
|
||||
}
|
||||
|
||||
impl PutError {
|
||||
pub fn is_cancel(&self) -> bool {
|
||||
match self {
|
||||
PutError::WriteBlob(e) => e.is_cancel(),
|
||||
PutError::Other(_) => false,
|
||||
}
|
||||
}
|
||||
pub fn into_anyhow(self) -> anyhow::Error {
|
||||
match self {
|
||||
PutError::WriteBlob(e) => e.into_anyhow(),
|
||||
PutError::Other(e) => e,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -53,7 +53,6 @@ use utils::bin_ser::SerializeError;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use super::errors::PutError;
|
||||
use super::layer_name::ImageLayerName;
|
||||
use super::{
|
||||
AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
|
||||
@@ -843,14 +842,8 @@ impl ImageLayerWriterInner {
|
||||
key: Key,
|
||||
img: Bytes,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), PutError> {
|
||||
if !self.key_range.contains(&key) {
|
||||
return Err(PutError::Other(anyhow::anyhow!(
|
||||
"key {:?} not in range {:?}",
|
||||
key,
|
||||
self.key_range
|
||||
)));
|
||||
}
|
||||
) -> anyhow::Result<()> {
|
||||
ensure!(self.key_range.contains(&key));
|
||||
let compression = self.conf.image_compression;
|
||||
let uncompressed_len = img.len() as u64;
|
||||
self.uncompressed_bytes += uncompressed_len;
|
||||
@@ -860,7 +853,7 @@ impl ImageLayerWriterInner {
|
||||
.write_blob_maybe_compressed(img.slice_len(), ctx, compression)
|
||||
.await;
|
||||
// TODO: re-use the buffer for `img` further upstack
|
||||
let (off, compression_info) = res.map_err(PutError::WriteBlob)?;
|
||||
let (off, compression_info) = res?;
|
||||
if compression_info.compressed_size.is_some() {
|
||||
// The image has been considered for compression at least
|
||||
self.uncompressed_bytes_eligible += uncompressed_len;
|
||||
@@ -872,10 +865,7 @@ impl ImageLayerWriterInner {
|
||||
|
||||
let mut keybuf: [u8; KEY_SIZE] = [0u8; KEY_SIZE];
|
||||
key.write_to_byte_slice(&mut keybuf);
|
||||
self.tree
|
||||
.append(&keybuf, off)
|
||||
.map_err(anyhow::Error::new)
|
||||
.map_err(PutError::Other)?;
|
||||
self.tree.append(&keybuf, off)?;
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
{
|
||||
@@ -1095,7 +1085,7 @@ impl ImageLayerWriter {
|
||||
key: Key,
|
||||
img: Bytes,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), PutError> {
|
||||
) -> anyhow::Result<()> {
|
||||
self.inner.as_mut().unwrap().put_image(key, img, ctx).await
|
||||
}
|
||||
|
||||
|
||||
@@ -340,7 +340,7 @@ pub(crate) fn log_compaction_error(
|
||||
} else {
|
||||
match level {
|
||||
Level::ERROR if degrade_to_warning => warn!("Compaction failed and discarded: {err:#}"),
|
||||
Level::ERROR => error!("Compaction failed: {err:?}"),
|
||||
Level::ERROR => error!("Compaction failed: {err:#}"),
|
||||
Level::INFO => info!("Compaction failed: {err:#}"),
|
||||
level => unimplemented!("unexpected level {level:?}"),
|
||||
}
|
||||
|
||||
@@ -987,16 +987,6 @@ impl From<PageReconstructError> for CreateImageLayersError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<super::storage_layer::errors::PutError> for CreateImageLayersError {
|
||||
fn from(e: super::storage_layer::errors::PutError) -> Self {
|
||||
if e.is_cancel() {
|
||||
CreateImageLayersError::Cancelled
|
||||
} else {
|
||||
CreateImageLayersError::Other(e.into_anyhow())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GetVectoredError> for CreateImageLayersError {
|
||||
fn from(e: GetVectoredError) -> Self {
|
||||
match e {
|
||||
@@ -2127,14 +2117,22 @@ impl Timeline {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
// Regardless of whether we're going to try_freeze_and_flush
|
||||
// or not, stop ingesting any more data.
|
||||
// or not, stop ingesting any more data. Walreceiver only provides
|
||||
// cancellation but no "wait until gone", because it uses the Timeline::gate.
|
||||
// So, only after the self.gate.close() below will we know for sure that
|
||||
// no walreceiver tasks are left.
|
||||
// For `try_freeze_and_flush=true`, this means that we might still be ingesting
|
||||
// data during the call to `self.freeze_and_flush()` below.
|
||||
// That's not ideal, but, we don't have the concept of a ChildGuard,
|
||||
// which is what we'd need to properly model early shutdown of the walreceiver
|
||||
// task sub-tree before the other Timeline task sub-trees.
|
||||
let walreceiver = self.walreceiver.lock().unwrap().take();
|
||||
tracing::debug!(
|
||||
is_some = walreceiver.is_some(),
|
||||
"Waiting for WalReceiverManager..."
|
||||
);
|
||||
if let Some(walreceiver) = walreceiver {
|
||||
walreceiver.shutdown().await;
|
||||
walreceiver.cancel();
|
||||
}
|
||||
// ... and inform any waiters for newer LSNs that there won't be any.
|
||||
self.last_record_lsn.shutdown();
|
||||
@@ -5925,16 +5923,6 @@ impl From<layer_manager::Shutdown> for CompactionError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<super::storage_layer::errors::PutError> for CompactionError {
|
||||
fn from(e: super::storage_layer::errors::PutError) -> Self {
|
||||
if e.is_cancel() {
|
||||
CompactionError::ShuttingDown
|
||||
} else {
|
||||
CompactionError::Other(e.into_anyhow())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(serde::Serialize)]
|
||||
struct RecordedDuration(#[serde_as(as = "serde_with::DurationMicroSeconds")] Duration);
|
||||
|
||||
@@ -1277,8 +1277,6 @@ impl Timeline {
|
||||
return Ok(CompactionOutcome::YieldForL0);
|
||||
}
|
||||
|
||||
let gc_cutoff = *self.applied_gc_cutoff_lsn.read();
|
||||
|
||||
// 2. Repartition and create image layers if necessary
|
||||
match self
|
||||
.repartition(
|
||||
@@ -1289,7 +1287,7 @@ impl Timeline {
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(((dense_partitioning, sparse_partitioning), lsn)) if lsn >= gc_cutoff => {
|
||||
Ok(((dense_partitioning, sparse_partitioning), lsn)) => {
|
||||
// Disables access_stats updates, so that the files we read remain candidates for eviction after we're done with them
|
||||
let image_ctx = RequestContextBuilder::from(ctx)
|
||||
.access_stats_behavior(AccessStatsBehavior::Skip)
|
||||
@@ -1343,10 +1341,6 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
|
||||
Ok(_) => {
|
||||
info!("skipping repartitioning due to image compaction LSN being below GC cutoff");
|
||||
}
|
||||
|
||||
// Suppress errors when cancelled.
|
||||
Err(_) if self.cancel.is_cancelled() => {}
|
||||
Err(err) if err.is_cancel() => {}
|
||||
@@ -2210,7 +2204,8 @@ impl Timeline {
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.put_value(key, lsn, value, ctx)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(CompactionError::Other)?;
|
||||
} else {
|
||||
let owner = self.shard_identity.get_shard_number(&key);
|
||||
|
||||
@@ -3612,13 +3607,6 @@ impl Timeline {
|
||||
last_key = Some(key);
|
||||
}
|
||||
accumulated_values.push((key, lsn, val));
|
||||
|
||||
if accumulated_values.len() >= 65536 {
|
||||
// Assume all of them are images, that would be 512MB of data in memory for a single key.
|
||||
return Err(CompactionError::Other(anyhow!(
|
||||
"too many values for a single key, giving up gc-compaction"
|
||||
)));
|
||||
}
|
||||
} else {
|
||||
let last_key: &mut Key = last_key.as_mut().unwrap();
|
||||
stat.on_unique_key_visited(); // TODO: adjust statistics for partial compaction
|
||||
|
||||
@@ -149,7 +149,14 @@ pub async fn doit(
|
||||
}
|
||||
.await?;
|
||||
|
||||
flow::run(timeline.clone(), control_file, storage.clone(), ctx).await?;
|
||||
flow::run(
|
||||
timeline.clone(),
|
||||
base_lsn,
|
||||
control_file,
|
||||
storage.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
//
|
||||
// Communicate that shard is done.
|
||||
|
||||
@@ -34,9 +34,7 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::{bail, ensure};
|
||||
use bytes::Bytes;
|
||||
use futures::stream::FuturesOrdered;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::config::TimelineImportConfig;
|
||||
use pageserver_api::key::{
|
||||
CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, Key, TWOPHASEDIR_KEY, rel_block_to_key,
|
||||
rel_dir_to_key, rel_size_to_key, relmap_file_key, slru_block_to_key, slru_dir_to_key,
|
||||
@@ -48,9 +46,8 @@ use pageserver_api::shard::ShardIdentity;
|
||||
use postgres_ffi::relfile_utils::parse_relfilename;
|
||||
use postgres_ffi::{BLCKSZ, pg_constants};
|
||||
use remote_storage::RemotePath;
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio_stream::StreamExt;
|
||||
use tracing::{debug, instrument};
|
||||
use tokio::task::JoinSet;
|
||||
use tracing::{Instrument, debug, info_span, instrument};
|
||||
use utils::bin_ser::BeSer;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -66,40 +63,38 @@ use crate::tenant::storage_layer::{ImageLayerWriter, Layer};
|
||||
|
||||
pub async fn run(
|
||||
timeline: Arc<Timeline>,
|
||||
pgdata_lsn: Lsn,
|
||||
control_file: ControlFile,
|
||||
storage: RemoteStorageWrapper,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
let planner = Planner {
|
||||
Flow {
|
||||
timeline,
|
||||
pgdata_lsn,
|
||||
control_file,
|
||||
storage: storage.clone(),
|
||||
shard: timeline.shard_identity,
|
||||
tasks: Vec::default(),
|
||||
};
|
||||
|
||||
let import_config = &timeline.conf.timeline_import_config;
|
||||
let plan = planner.plan(import_config).await?;
|
||||
plan.execute(timeline, import_config, ctx).await
|
||||
tasks: Vec::new(),
|
||||
storage,
|
||||
}
|
||||
.run(ctx)
|
||||
.await
|
||||
}
|
||||
|
||||
struct Planner {
|
||||
struct Flow {
|
||||
timeline: Arc<Timeline>,
|
||||
pgdata_lsn: Lsn,
|
||||
control_file: ControlFile,
|
||||
storage: RemoteStorageWrapper,
|
||||
shard: ShardIdentity,
|
||||
tasks: Vec<AnyImportTask>,
|
||||
storage: RemoteStorageWrapper,
|
||||
}
|
||||
|
||||
struct Plan {
|
||||
jobs: Vec<ChunkProcessingJob>,
|
||||
}
|
||||
|
||||
impl Planner {
|
||||
/// Creates an import plan
|
||||
///
|
||||
/// This function is and must remain pure: given the same input, it will generate the same import plan.
|
||||
async fn plan(mut self, import_config: &TimelineImportConfig) -> anyhow::Result<Plan> {
|
||||
impl Flow {
|
||||
/// Perform the ingestion into [`Self::timeline`].
|
||||
/// Assumes the timeline is empty (= no layers).
|
||||
pub async fn run(mut self, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
let pgdata_lsn = Lsn(self.control_file.control_file_data().checkPoint).align();
|
||||
|
||||
self.pgdata_lsn = pgdata_lsn;
|
||||
|
||||
let datadir = PgDataDir::new(&self.storage).await?;
|
||||
|
||||
// Import dbdir (00:00:00 keyspace)
|
||||
@@ -120,7 +115,7 @@ impl Planner {
|
||||
}
|
||||
|
||||
// Import SLRUs
|
||||
if self.shard.is_shard_zero() {
|
||||
if self.timeline.tenant_shard_id.is_shard_zero() {
|
||||
// pg_xact (01:00 keyspace)
|
||||
self.import_slru(SlruKind::Clog, &self.storage.pgdata().join("pg_xact"))
|
||||
.await?;
|
||||
@@ -171,16 +166,14 @@ impl Planner {
|
||||
let mut last_end_key = Key::MIN;
|
||||
let mut current_chunk = Vec::new();
|
||||
let mut current_chunk_size: usize = 0;
|
||||
let mut jobs = Vec::new();
|
||||
let mut parallel_jobs = Vec::new();
|
||||
for task in std::mem::take(&mut self.tasks).into_iter() {
|
||||
if current_chunk_size + task.total_size()
|
||||
> import_config.import_job_soft_size_limit.into()
|
||||
{
|
||||
if current_chunk_size + task.total_size() > 1024 * 1024 * 1024 {
|
||||
let key_range = last_end_key..task.key_range().start;
|
||||
jobs.push(ChunkProcessingJob::new(
|
||||
parallel_jobs.push(ChunkProcessingJob::new(
|
||||
key_range.clone(),
|
||||
std::mem::take(&mut current_chunk),
|
||||
pgdata_lsn,
|
||||
&self,
|
||||
));
|
||||
last_end_key = key_range.end;
|
||||
current_chunk_size = 0;
|
||||
@@ -188,13 +181,45 @@ impl Planner {
|
||||
current_chunk_size += task.total_size();
|
||||
current_chunk.push(task);
|
||||
}
|
||||
jobs.push(ChunkProcessingJob::new(
|
||||
parallel_jobs.push(ChunkProcessingJob::new(
|
||||
last_end_key..Key::MAX,
|
||||
current_chunk,
|
||||
pgdata_lsn,
|
||||
&self,
|
||||
));
|
||||
|
||||
Ok(Plan { jobs })
|
||||
// Start all jobs simultaneosly
|
||||
let mut work = JoinSet::new();
|
||||
// TODO: semaphore?
|
||||
for job in parallel_jobs {
|
||||
let ctx: RequestContext =
|
||||
ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Error);
|
||||
work.spawn(async move { job.run(&ctx).await }.instrument(info_span!("parallel_job")));
|
||||
}
|
||||
let mut results = Vec::new();
|
||||
while let Some(result) = work.join_next().await {
|
||||
match result {
|
||||
Ok(res) => {
|
||||
results.push(res);
|
||||
}
|
||||
Err(_joinset_err) => {
|
||||
results.push(Err(anyhow::anyhow!(
|
||||
"parallel job panicked or cancelled, check pageserver logs"
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if results.iter().all(|r| r.is_ok()) {
|
||||
Ok(())
|
||||
} else {
|
||||
let mut msg = String::new();
|
||||
for result in results {
|
||||
if let Err(err) = result {
|
||||
msg.push_str(&format!("{err:?}\n\n"));
|
||||
}
|
||||
}
|
||||
bail!("Some parallel jobs failed:\n\n{msg}");
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(dboid=%db.dboid, tablespace=%db.spcnode, path=%db.path))]
|
||||
@@ -241,7 +266,7 @@ impl Planner {
|
||||
let end_key = rel_block_to_key(file.rel_tag, start_blk + (len / 8192) as u32);
|
||||
self.tasks
|
||||
.push(AnyImportTask::RelBlocks(ImportRelBlocksTask::new(
|
||||
self.shard,
|
||||
*self.timeline.get_shard_identity(),
|
||||
start_key..end_key,
|
||||
&file.path,
|
||||
self.storage.clone(),
|
||||
@@ -264,7 +289,7 @@ impl Planner {
|
||||
}
|
||||
|
||||
async fn import_slru(&mut self, kind: SlruKind, path: &RemotePath) -> anyhow::Result<()> {
|
||||
assert!(self.shard.is_shard_zero());
|
||||
assert!(self.timeline.tenant_shard_id.is_shard_zero());
|
||||
|
||||
let segments = self.storage.listfilesindir(path).await?;
|
||||
let segments: Vec<(String, u32, usize)> = segments
|
||||
@@ -319,68 +344,6 @@ impl Planner {
|
||||
}
|
||||
}
|
||||
|
||||
impl Plan {
|
||||
async fn execute(
|
||||
self,
|
||||
timeline: Arc<Timeline>,
|
||||
import_config: &TimelineImportConfig,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut work = FuturesOrdered::new();
|
||||
let semaphore = Arc::new(Semaphore::new(import_config.import_job_concurrency.into()));
|
||||
|
||||
let jobs_in_plan = self.jobs.len();
|
||||
|
||||
let mut jobs = self.jobs.into_iter().enumerate().peekable();
|
||||
let mut results = Vec::new();
|
||||
|
||||
// Run import jobs concurrently up to the limit specified by the pageserver configuration.
|
||||
// Note that we process completed futures in the oreder of insertion. This will be the
|
||||
// building block for resuming imports across pageserver restarts or tenant migrations.
|
||||
while results.len() < jobs_in_plan {
|
||||
tokio::select! {
|
||||
permit = semaphore.clone().acquire_owned(), if jobs.peek().is_some() => {
|
||||
let permit = permit.expect("never closed");
|
||||
let (job_idx, job) = jobs.next().expect("we peeked");
|
||||
let job_timeline = timeline.clone();
|
||||
let ctx = ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Error);
|
||||
|
||||
work.push_back(tokio::task::spawn(async move {
|
||||
let _permit = permit;
|
||||
let res = job.run(job_timeline, &ctx).await;
|
||||
(job_idx, res)
|
||||
}));
|
||||
},
|
||||
maybe_complete_job_idx = work.next() => {
|
||||
match maybe_complete_job_idx {
|
||||
Some(Ok((_job_idx, res))) => {
|
||||
results.push(res);
|
||||
},
|
||||
Some(Err(_)) => {
|
||||
results.push(Err(anyhow::anyhow!(
|
||||
"parallel job panicked or cancelled, check pageserver logs"
|
||||
)));
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if results.iter().all(|r| r.is_ok()) {
|
||||
Ok(())
|
||||
} else {
|
||||
let mut msg = String::new();
|
||||
for result in results {
|
||||
if let Err(err) = result {
|
||||
msg.push_str(&format!("{err:?}\n\n"));
|
||||
}
|
||||
}
|
||||
bail!("Some parallel jobs failed:\n\n{msg}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// dbdir iteration tools
|
||||
//
|
||||
@@ -750,6 +713,7 @@ impl From<ImportSlruBlocksTask> for AnyImportTask {
|
||||
}
|
||||
|
||||
struct ChunkProcessingJob {
|
||||
timeline: Arc<Timeline>,
|
||||
range: Range<Key>,
|
||||
tasks: Vec<AnyImportTask>,
|
||||
|
||||
@@ -757,24 +721,25 @@ struct ChunkProcessingJob {
|
||||
}
|
||||
|
||||
impl ChunkProcessingJob {
|
||||
fn new(range: Range<Key>, tasks: Vec<AnyImportTask>, pgdata_lsn: Lsn) -> Self {
|
||||
assert!(pgdata_lsn.is_valid());
|
||||
fn new(range: Range<Key>, tasks: Vec<AnyImportTask>, env: &Flow) -> Self {
|
||||
assert!(env.pgdata_lsn.is_valid());
|
||||
Self {
|
||||
timeline: env.timeline.clone(),
|
||||
range,
|
||||
tasks,
|
||||
pgdata_lsn,
|
||||
pgdata_lsn: env.pgdata_lsn,
|
||||
}
|
||||
}
|
||||
|
||||
async fn run(self, timeline: Arc<Timeline>, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
async fn run(self, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
let mut writer = ImageLayerWriter::new(
|
||||
timeline.conf,
|
||||
timeline.timeline_id,
|
||||
timeline.tenant_shard_id,
|
||||
self.timeline.conf,
|
||||
self.timeline.timeline_id,
|
||||
self.timeline.tenant_shard_id,
|
||||
&self.range,
|
||||
self.pgdata_lsn,
|
||||
&timeline.gate,
|
||||
timeline.cancel.clone(),
|
||||
&self.timeline.gate,
|
||||
self.timeline.cancel.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
@@ -786,20 +751,24 @@ impl ChunkProcessingJob {
|
||||
|
||||
let resident_layer = if nimages > 0 {
|
||||
let (desc, path) = writer.finish(ctx).await?;
|
||||
Layer::finish_creating(timeline.conf, &timeline, desc, &path)?
|
||||
Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?
|
||||
} else {
|
||||
// dropping the writer cleans up
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
// this is sharing the same code as create_image_layers
|
||||
let mut guard = timeline.layers.write().await;
|
||||
let mut guard = self.timeline.layers.write().await;
|
||||
guard
|
||||
.open_mut()?
|
||||
.track_new_image_layers(&[resident_layer.clone()], &timeline.metrics);
|
||||
.track_new_image_layers(&[resident_layer.clone()], &self.timeline.metrics);
|
||||
crate::tenant::timeline::drop_wlock(guard);
|
||||
|
||||
timeline
|
||||
// Schedule the layer for upload but don't add barriers such as
|
||||
// wait for completion or index upload, so we don't inhibit upload parallelism.
|
||||
// TODO: limit upload parallelism somehow (e.g. by limiting concurrency of jobs?)
|
||||
// TODO: or regulate parallelism by upload queue depth? Prob should happen at a higher level.
|
||||
self.timeline
|
||||
.remote_client
|
||||
.schedule_layer_file_upload(resident_layer)?;
|
||||
|
||||
|
||||
@@ -63,7 +63,6 @@ pub struct WalReceiver {
|
||||
/// All task spawned by [`WalReceiver::start`] and its children are sensitive to this token.
|
||||
/// It's a child token of [`Timeline`] so that timeline shutdown can cancel WalReceiver tasks early for `freeze_and_flush=true`.
|
||||
cancel: CancellationToken,
|
||||
task: tokio::task::JoinHandle<()>,
|
||||
}
|
||||
|
||||
impl WalReceiver {
|
||||
@@ -80,7 +79,7 @@ impl WalReceiver {
|
||||
let loop_status = Arc::new(std::sync::RwLock::new(None));
|
||||
let manager_status = Arc::clone(&loop_status);
|
||||
let cancel = timeline.cancel.child_token();
|
||||
let task = WALRECEIVER_RUNTIME.spawn({
|
||||
WALRECEIVER_RUNTIME.spawn({
|
||||
let cancel = cancel.clone();
|
||||
async move {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
@@ -121,25 +120,14 @@ impl WalReceiver {
|
||||
Self {
|
||||
manager_status,
|
||||
cancel,
|
||||
task,
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(skip_all, level = tracing::Level::DEBUG)]
|
||||
pub async fn shutdown(self) {
|
||||
pub fn cancel(&self) {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
debug!("cancelling walreceiver tasks");
|
||||
self.cancel.cancel();
|
||||
match self.task.await {
|
||||
Ok(()) => debug!("Shutdown success"),
|
||||
Err(je) if je.is_cancelled() => unreachable!("not used"),
|
||||
Err(je) if je.is_panic() => {
|
||||
// already logged by panic hook
|
||||
}
|
||||
Err(je) => {
|
||||
error!("shutdown walreceiver task join error: {je}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn status(&self) -> Option<ConnectionManagerStatus> {
|
||||
|
||||
@@ -111,17 +111,13 @@ pub(crate) fn get() -> IoEngine {
|
||||
|
||||
use std::os::unix::prelude::FileExt;
|
||||
use std::sync::atomic::{AtomicU8, Ordering};
|
||||
#[cfg(target_os = "linux")]
|
||||
use {std::time::Duration, tracing::info};
|
||||
|
||||
use super::owned_buffers_io::io_buf_ext::FullSlice;
|
||||
use super::owned_buffers_io::slice::SliceMutExt;
|
||||
use super::{FileGuard, Metadata};
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
pub(super) fn epoll_uring_error_to_std(
|
||||
e: tokio_epoll_uring::Error<std::io::Error>,
|
||||
) -> std::io::Error {
|
||||
fn epoll_uring_error_to_std(e: tokio_epoll_uring::Error<std::io::Error>) -> std::io::Error {
|
||||
match e {
|
||||
tokio_epoll_uring::Error::Op(e) => e,
|
||||
tokio_epoll_uring::Error::System(system) => {
|
||||
@@ -153,11 +149,7 @@ impl IoEngine {
|
||||
#[cfg(target_os = "linux")]
|
||||
IoEngine::TokioEpollUring => {
|
||||
let system = tokio_epoll_uring_ext::thread_local_system().await;
|
||||
let (resources, res) =
|
||||
retry_ecanceled_once((file_guard, slice), |(file_guard, slice)| async {
|
||||
system.read(file_guard, offset, slice).await
|
||||
})
|
||||
.await;
|
||||
let (resources, res) = system.read(file_guard, offset, slice).await;
|
||||
(resources, res.map_err(epoll_uring_error_to_std))
|
||||
}
|
||||
}
|
||||
@@ -172,10 +164,7 @@ impl IoEngine {
|
||||
#[cfg(target_os = "linux")]
|
||||
IoEngine::TokioEpollUring => {
|
||||
let system = tokio_epoll_uring_ext::thread_local_system().await;
|
||||
let (resources, res) = retry_ecanceled_once(file_guard, |file_guard| async {
|
||||
system.fsync(file_guard).await
|
||||
})
|
||||
.await;
|
||||
let (resources, res) = system.fsync(file_guard).await;
|
||||
(resources, res.map_err(epoll_uring_error_to_std))
|
||||
}
|
||||
}
|
||||
@@ -193,10 +182,7 @@ impl IoEngine {
|
||||
#[cfg(target_os = "linux")]
|
||||
IoEngine::TokioEpollUring => {
|
||||
let system = tokio_epoll_uring_ext::thread_local_system().await;
|
||||
let (resources, res) = retry_ecanceled_once(file_guard, |file_guard| async {
|
||||
system.fdatasync(file_guard).await
|
||||
})
|
||||
.await;
|
||||
let (resources, res) = system.fdatasync(file_guard).await;
|
||||
(resources, res.map_err(epoll_uring_error_to_std))
|
||||
}
|
||||
}
|
||||
@@ -215,10 +201,7 @@ impl IoEngine {
|
||||
#[cfg(target_os = "linux")]
|
||||
IoEngine::TokioEpollUring => {
|
||||
let system = tokio_epoll_uring_ext::thread_local_system().await;
|
||||
let (resources, res) = retry_ecanceled_once(file_guard, |file_guard| async {
|
||||
system.statx(file_guard).await
|
||||
})
|
||||
.await;
|
||||
let (resources, res) = system.statx(file_guard).await;
|
||||
(
|
||||
resources,
|
||||
res.map_err(epoll_uring_error_to_std).map(Metadata::from),
|
||||
@@ -241,7 +224,6 @@ impl IoEngine {
|
||||
#[cfg(target_os = "linux")]
|
||||
IoEngine::TokioEpollUring => {
|
||||
// TODO: ftruncate op for tokio-epoll-uring
|
||||
// Don't forget to use retry_ecanceled_once
|
||||
let res = file_guard.with_std_file(|std_file| std_file.set_len(len));
|
||||
(file_guard, res)
|
||||
}
|
||||
@@ -263,11 +245,8 @@ impl IoEngine {
|
||||
#[cfg(target_os = "linux")]
|
||||
IoEngine::TokioEpollUring => {
|
||||
let system = tokio_epoll_uring_ext::thread_local_system().await;
|
||||
let ((file_guard, slice), res) = retry_ecanceled_once(
|
||||
(file_guard, buf.into_raw_slice()),
|
||||
async |(file_guard, buf)| system.write(file_guard, offset, buf).await,
|
||||
)
|
||||
.await;
|
||||
let ((file_guard, slice), res) =
|
||||
system.write(file_guard, offset, buf.into_raw_slice()).await;
|
||||
(
|
||||
(file_guard, FullSlice::must_new(slice)),
|
||||
res.map_err(epoll_uring_error_to_std),
|
||||
@@ -303,56 +282,6 @@ impl IoEngine {
|
||||
}
|
||||
}
|
||||
|
||||
/// We observe in tests that stop pageserver with SIGTERM immediately after it was ingesting data,
|
||||
/// occasionally buffered writers fail (and get retried by BufferedWriter) with ECANCELED.
|
||||
/// The problem is believed to be a race condition in how io_uring handles punted async work (io-wq) and signals.
|
||||
/// Investigation ticket: <https://github.com/neondatabase/neon/issues/11446>
|
||||
///
|
||||
/// This function retries the operation once if it fails with ECANCELED.
|
||||
/// ONLY USE FOR IDEMPOTENT [`super::VirtualFile`] operations.
|
||||
#[cfg(target_os = "linux")]
|
||||
pub(super) async fn retry_ecanceled_once<F, Fut, T, V>(
|
||||
resources: T,
|
||||
f: F,
|
||||
) -> (T, Result<V, tokio_epoll_uring::Error<std::io::Error>>)
|
||||
where
|
||||
F: Fn(T) -> Fut,
|
||||
Fut: std::future::Future<Output = (T, Result<V, tokio_epoll_uring::Error<std::io::Error>>)>,
|
||||
T: Send,
|
||||
V: Send,
|
||||
{
|
||||
let (resources, res) = f(resources).await;
|
||||
let Err(e) = res else {
|
||||
return (resources, res);
|
||||
};
|
||||
let tokio_epoll_uring::Error::Op(err) = e else {
|
||||
return (resources, Err(e));
|
||||
};
|
||||
if err.raw_os_error() != Some(nix::libc::ECANCELED) {
|
||||
return (resources, Err(tokio_epoll_uring::Error::Op(err)));
|
||||
}
|
||||
{
|
||||
static RATE_LIMIT: std::sync::Mutex<utils::rate_limit::RateLimit> =
|
||||
std::sync::Mutex::new(utils::rate_limit::RateLimit::new(Duration::from_secs(1)));
|
||||
let mut guard = RATE_LIMIT.lock().unwrap();
|
||||
guard.call2(|rate_limit_stats| {
|
||||
info!(
|
||||
%rate_limit_stats, "ECANCELED observed, assuming it is due to a signal being received by the submitting thread, retrying after a delay; this message is rate-limited"
|
||||
);
|
||||
});
|
||||
drop(guard);
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(100)).await; // something big enough to beat even heavily overcommitted CI runners
|
||||
let (resources, res) = f(resources).await;
|
||||
(resources, res)
|
||||
}
|
||||
|
||||
pub(super) fn panic_operation_must_be_idempotent() {
|
||||
panic!(
|
||||
"unsupported; io_engine may retry operations internally and thus needs them to be idempotent (retry_ecanceled_once)"
|
||||
)
|
||||
}
|
||||
|
||||
pub enum FeatureTestResult {
|
||||
PlatformPreferred(IoEngineKind),
|
||||
Worse {
|
||||
|
||||
@@ -110,23 +110,18 @@ impl OpenOptions {
|
||||
self
|
||||
}
|
||||
|
||||
/// Don't use, `O_APPEND` is not supported.
|
||||
pub fn append(&mut self, _append: bool) {
|
||||
super::io_engine::panic_operation_must_be_idempotent();
|
||||
}
|
||||
|
||||
pub(in crate::virtual_file) async fn open(&self, path: &Path) -> std::io::Result<OwnedFd> {
|
||||
match &self.inner {
|
||||
Inner::StdFs(x) => x.open(path).map(|file| file.into()),
|
||||
#[cfg(target_os = "linux")]
|
||||
Inner::TokioEpollUring(x) => {
|
||||
let system = super::io_engine::tokio_epoll_uring_ext::thread_local_system().await;
|
||||
let (_, res) = super::io_engine::retry_ecanceled_once((), |()| async {
|
||||
let res = system.open(path, x).await;
|
||||
((), res)
|
||||
system.open(path, x).await.map_err(|e| match e {
|
||||
tokio_epoll_uring::Error::Op(e) => e,
|
||||
tokio_epoll_uring::Error::System(system) => {
|
||||
std::io::Error::new(std::io::ErrorKind::Other, system)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
res.map_err(super::io_engine::epoll_uring_error_to_std)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -145,9 +140,6 @@ impl OpenOptions {
|
||||
}
|
||||
|
||||
pub fn custom_flags(mut self, flags: i32) -> Self {
|
||||
if flags & nix::libc::O_APPEND != 0 {
|
||||
super::io_engine::panic_operation_must_be_idempotent();
|
||||
}
|
||||
match &mut self.inner {
|
||||
Inner::StdFs(x) => {
|
||||
let _ = x.custom_flags(flags);
|
||||
|
||||
@@ -247,19 +247,6 @@ pub enum FlushTaskError {
|
||||
Cancelled,
|
||||
}
|
||||
|
||||
impl FlushTaskError {
|
||||
pub fn is_cancel(&self) -> bool {
|
||||
match self {
|
||||
FlushTaskError::Cancelled => true,
|
||||
}
|
||||
}
|
||||
pub fn into_anyhow(self) -> anyhow::Error {
|
||||
match self {
|
||||
FlushTaskError::Cancelled => anyhow::anyhow!(self),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Buf, W> FlushBackgroundTask<Buf, W>
|
||||
where
|
||||
Buf: IoBufAligned + Send + Sync,
|
||||
|
||||
@@ -425,12 +425,15 @@ compact_prefetch_buffers(void)
|
||||
* point inside and outside PostgreSQL.
|
||||
*
|
||||
* This still does throw errors when it receives malformed responses from PS.
|
||||
*
|
||||
* When we're not called from CHECK_FOR_INTERRUPTS (indicated by
|
||||
* IsHandlingInterrupts) we also report we've ended prefetch receive work,
|
||||
* just in case state tracking was lost due to an error in the sync getPage
|
||||
* response code.
|
||||
*/
|
||||
void
|
||||
communicator_prefetch_pump_state(void)
|
||||
communicator_prefetch_pump_state(bool IsHandlingInterrupts)
|
||||
{
|
||||
START_PREFETCH_RECEIVE_WORK();
|
||||
|
||||
while (MyPState->ring_receive != MyPState->ring_flush)
|
||||
{
|
||||
NeonResponse *response;
|
||||
@@ -479,7 +482,9 @@ communicator_prefetch_pump_state(void)
|
||||
}
|
||||
}
|
||||
|
||||
END_PREFETCH_RECEIVE_WORK();
|
||||
/* We never pump the prefetch state while handling other pages */
|
||||
if (!IsHandlingInterrupts)
|
||||
END_PREFETCH_RECEIVE_WORK();
|
||||
|
||||
communicator_reconfigure_timeout_if_needed();
|
||||
}
|
||||
@@ -667,10 +672,9 @@ prefetch_wait_for(uint64 ring_index)
|
||||
|
||||
Assert(MyPState->ring_unused > ring_index);
|
||||
|
||||
START_PREFETCH_RECEIVE_WORK();
|
||||
|
||||
while (MyPState->ring_receive <= ring_index)
|
||||
{
|
||||
START_PREFETCH_RECEIVE_WORK();
|
||||
entry = GetPrfSlot(MyPState->ring_receive);
|
||||
|
||||
Assert(entry->status == PRFS_REQUESTED);
|
||||
@@ -679,18 +683,17 @@ prefetch_wait_for(uint64 ring_index)
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
|
||||
END_PREFETCH_RECEIVE_WORK();
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
}
|
||||
|
||||
if (result)
|
||||
{
|
||||
/* Check that slot is actually received (srver can be disconnected in prefetch_pump_state called from CHECK_FOR_INTERRUPTS */
|
||||
PrefetchRequest *slot = GetPrfSlot(ring_index);
|
||||
result = slot->status == PRFS_RECEIVED;
|
||||
return slot->status == PRFS_RECEIVED;
|
||||
}
|
||||
END_PREFETCH_RECEIVE_WORK();
|
||||
|
||||
return result;
|
||||
return false;
|
||||
;
|
||||
}
|
||||
|
||||
@@ -717,7 +720,6 @@ prefetch_read(PrefetchRequest *slot)
|
||||
Assert(slot->status == PRFS_REQUESTED);
|
||||
Assert(slot->response == NULL);
|
||||
Assert(slot->my_ring_index == MyPState->ring_receive);
|
||||
Assert(readpage_reentrant_guard);
|
||||
|
||||
if (slot->status != PRFS_REQUESTED ||
|
||||
slot->response != NULL ||
|
||||
@@ -800,7 +802,6 @@ communicator_prefetch_receive(BufferTag tag)
|
||||
PrfHashEntry *entry;
|
||||
PrefetchRequest hashkey;
|
||||
|
||||
Assert(readpage_reentrant_guard);
|
||||
hashkey.buftag = tag;
|
||||
entry = prfh_lookup(MyPState->prf_hash, &hashkey);
|
||||
if (entry != NULL && prefetch_wait_for(entry->slot->my_ring_index))
|
||||
@@ -820,12 +821,8 @@ communicator_prefetch_receive(BufferTag tag)
|
||||
void
|
||||
prefetch_on_ps_disconnect(void)
|
||||
{
|
||||
bool save_readpage_reentrant_guard = readpage_reentrant_guard;
|
||||
MyPState->ring_flush = MyPState->ring_unused;
|
||||
|
||||
/* Prohibit callig of prefetch_pump_state */
|
||||
START_PREFETCH_RECEIVE_WORK();
|
||||
|
||||
while (MyPState->ring_receive < MyPState->ring_unused)
|
||||
{
|
||||
PrefetchRequest *slot;
|
||||
@@ -854,9 +851,6 @@ prefetch_on_ps_disconnect(void)
|
||||
MyNeonCounters->getpage_prefetch_discards_total += 1;
|
||||
}
|
||||
|
||||
/* Restore guard */
|
||||
readpage_reentrant_guard = save_readpage_reentrant_guard;
|
||||
|
||||
/*
|
||||
* We can have gone into retry due to network error, so update stats with
|
||||
* the latest available
|
||||
@@ -2515,7 +2509,7 @@ communicator_processinterrupts(void)
|
||||
if (timeout_signaled)
|
||||
{
|
||||
if (!readpage_reentrant_guard && readahead_getpage_pull_timeout_ms > 0)
|
||||
communicator_prefetch_pump_state();
|
||||
communicator_prefetch_pump_state(true);
|
||||
|
||||
timeout_signaled = false;
|
||||
communicator_reconfigure_timeout_if_needed();
|
||||
|
||||
@@ -44,7 +44,7 @@ extern int communicator_read_slru_segment(SlruKind kind, int64 segno,
|
||||
void *buffer);
|
||||
|
||||
extern void communicator_reconfigure_timeout_if_needed(void);
|
||||
extern void communicator_prefetch_pump_state(void);
|
||||
extern void communicator_prefetch_pump_state(bool IsHandlingInterrupts);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1179,7 +1179,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
blocknum += iterblocks;
|
||||
}
|
||||
|
||||
communicator_prefetch_pump_state();
|
||||
communicator_prefetch_pump_state(false);
|
||||
|
||||
return false;
|
||||
}
|
||||
@@ -1218,7 +1218,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
|
||||
communicator_prefetch_register_bufferv(tag, NULL, 1, NULL);
|
||||
|
||||
communicator_prefetch_pump_state();
|
||||
communicator_prefetch_pump_state(false);
|
||||
|
||||
return false;
|
||||
}
|
||||
@@ -1262,7 +1262,7 @@ neon_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
*/
|
||||
neon_log(SmgrTrace, "writeback noop");
|
||||
|
||||
communicator_prefetch_pump_state();
|
||||
communicator_prefetch_pump_state(false);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (IS_LOCAL_REL(reln))
|
||||
@@ -1315,7 +1315,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
|
||||
}
|
||||
|
||||
/* Try to read PS results if they are available */
|
||||
communicator_prefetch_pump_state();
|
||||
communicator_prefetch_pump_state(false);
|
||||
|
||||
neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1);
|
||||
|
||||
@@ -1339,7 +1339,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
|
||||
/*
|
||||
* Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.
|
||||
*/
|
||||
communicator_prefetch_pump_state();
|
||||
communicator_prefetch_pump_state(false);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln))
|
||||
@@ -1449,7 +1449,7 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
nblocks, PG_IOV_MAX);
|
||||
|
||||
/* Try to read PS results if they are available */
|
||||
communicator_prefetch_pump_state();
|
||||
communicator_prefetch_pump_state(false);
|
||||
|
||||
neon_get_request_lsns(InfoFromSMgrRel(reln), forknum, blocknum,
|
||||
request_lsns, nblocks);
|
||||
@@ -1480,7 +1480,7 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
/*
|
||||
* Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.
|
||||
*/
|
||||
communicator_prefetch_pump_state();
|
||||
communicator_prefetch_pump_state(false);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (forknum == MAIN_FORKNUM && IS_LOCAL_REL(reln))
|
||||
@@ -1665,7 +1665,7 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
|
||||
|
||||
lfc_write(InfoFromSMgrRel(reln), forknum, blocknum, buffer);
|
||||
|
||||
communicator_prefetch_pump_state();
|
||||
communicator_prefetch_pump_state(false);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (IS_LOCAL_REL(reln))
|
||||
@@ -1727,7 +1727,7 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
|
||||
lfc_writev(InfoFromSMgrRel(reln), forknum, blkno, buffers, nblocks);
|
||||
|
||||
communicator_prefetch_pump_state();
|
||||
communicator_prefetch_pump_state(false);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (IS_LOCAL_REL(reln))
|
||||
@@ -1902,7 +1902,7 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)
|
||||
|
||||
neon_log(SmgrTrace, "[NEON_SMGR] immedsync noop");
|
||||
|
||||
communicator_prefetch_pump_state();
|
||||
communicator_prefetch_pump_state(false);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (IS_LOCAL_REL(reln))
|
||||
|
||||
@@ -12,9 +12,9 @@ use tracing::{debug, warn};
|
||||
use crate::auth::password_hack::parse_endpoint_param;
|
||||
use crate::context::RequestContext;
|
||||
use crate::error::{ReportableError, UserFacingError};
|
||||
use crate::metrics::{Metrics, SniGroup, SniKind};
|
||||
use crate::metrics::{Metrics, SniKind};
|
||||
use crate::proxy::NeonOptions;
|
||||
use crate::serverless::{AUTH_BROKER_SNI, SERVERLESS_DRIVER_SNI};
|
||||
use crate::serverless::SERVERLESS_DRIVER_SNI;
|
||||
use crate::types::{EndpointId, RoleName};
|
||||
|
||||
#[derive(Debug, Error, PartialEq, Eq, Clone)]
|
||||
@@ -65,7 +65,7 @@ pub(crate) fn endpoint_sni(sni: &str, common_names: &HashSet<String>) -> Option<
|
||||
if !common_names.contains(common_name) {
|
||||
return None;
|
||||
}
|
||||
if subdomain == SERVERLESS_DRIVER_SNI || subdomain == AUTH_BROKER_SNI {
|
||||
if subdomain == SERVERLESS_DRIVER_SNI {
|
||||
return None;
|
||||
}
|
||||
Some(EndpointId::from(subdomain))
|
||||
@@ -128,23 +128,22 @@ impl ComputeUserInfoMaybeEndpoint {
|
||||
|
||||
let metrics = Metrics::get();
|
||||
debug!(%user, "credentials");
|
||||
|
||||
let protocol = ctx.protocol();
|
||||
let kind = if sni.is_some() {
|
||||
if sni.is_some() {
|
||||
debug!("Connection with sni");
|
||||
SniKind::Sni
|
||||
metrics.proxy.accepted_connections_by_sni.inc(SniKind::Sni);
|
||||
} else if endpoint.is_some() {
|
||||
metrics
|
||||
.proxy
|
||||
.accepted_connections_by_sni
|
||||
.inc(SniKind::NoSni);
|
||||
debug!("Connection without sni");
|
||||
SniKind::NoSni
|
||||
} else {
|
||||
metrics
|
||||
.proxy
|
||||
.accepted_connections_by_sni
|
||||
.inc(SniKind::PasswordHack);
|
||||
debug!("Connection with password hack");
|
||||
SniKind::PasswordHack
|
||||
};
|
||||
|
||||
metrics
|
||||
.proxy
|
||||
.accepted_connections_by_sni
|
||||
.inc(SniGroup { protocol, kind });
|
||||
}
|
||||
|
||||
let options = NeonOptions::parse_params(params);
|
||||
|
||||
|
||||
@@ -115,8 +115,8 @@ pub struct ProxyMetrics {
|
||||
#[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0]))]
|
||||
pub allowed_vpc_endpoint_ids: Histogram<10>,
|
||||
|
||||
/// Number of connections, by the method we used to determine the endpoint.
|
||||
pub accepted_connections_by_sni: CounterVec<SniSet>,
|
||||
/// Number of connections (per sni).
|
||||
pub accepted_connections_by_sni: CounterVec<StaticLabelSet<SniKind>>,
|
||||
|
||||
/// Number of connection failures (per kind).
|
||||
pub connection_failures_total: CounterVec<StaticLabelSet<ConnectionFailureKind>>,
|
||||
@@ -342,20 +342,11 @@ pub enum LatencyExclusions {
|
||||
ClientCplaneComputeRetry,
|
||||
}
|
||||
|
||||
#[derive(LabelGroup)]
|
||||
#[label(set = SniSet)]
|
||||
pub struct SniGroup {
|
||||
pub protocol: Protocol,
|
||||
pub kind: SniKind,
|
||||
}
|
||||
|
||||
#[derive(FixedCardinalityLabel, Copy, Clone)]
|
||||
#[label(singleton = "kind")]
|
||||
pub enum SniKind {
|
||||
/// Domain name based routing. SNI for libpq/websockets. Host for HTTP
|
||||
Sni,
|
||||
/// Metadata based routing. `options` for libpq/websockets. Header for HTTP
|
||||
NoSni,
|
||||
/// Metadata based routing, using the password field.
|
||||
PasswordHack,
|
||||
}
|
||||
|
||||
|
||||
@@ -56,7 +56,6 @@ use crate::serverless::backend::PoolingBackend;
|
||||
use crate::serverless::http_util::{api_error_into_response, json_response};
|
||||
|
||||
pub(crate) const SERVERLESS_DRIVER_SNI: &str = "api";
|
||||
pub(crate) const AUTH_BROKER_SNI: &str = "apiauth";
|
||||
|
||||
pub async fn task_main(
|
||||
config: &'static ProxyConfig,
|
||||
|
||||
@@ -38,7 +38,7 @@ use crate::config::{AuthenticationConfig, HttpConfig, ProxyConfig, TlsConfig};
|
||||
use crate::context::RequestContext;
|
||||
use crate::error::{ErrorKind, ReportableError, UserFacingError};
|
||||
use crate::http::{ReadBodyError, read_body_with_limit};
|
||||
use crate::metrics::{HttpDirection, Metrics, SniGroup, SniKind};
|
||||
use crate::metrics::{HttpDirection, Metrics};
|
||||
use crate::proxy::{NeonOptions, run_until_cancelled};
|
||||
use crate::serverless::backend::HttpConnError;
|
||||
use crate::types::{DbName, RoleName};
|
||||
@@ -227,32 +227,6 @@ fn get_conn_info(
|
||||
}
|
||||
}
|
||||
|
||||
// check the URL that was used, for metrics
|
||||
{
|
||||
let host_endpoint = headers
|
||||
// get the host header
|
||||
.get("host")
|
||||
// extract the domain
|
||||
.and_then(|h| {
|
||||
let (host, _port) = h.to_str().ok()?.split_once(':')?;
|
||||
Some(host)
|
||||
})
|
||||
// get the endpoint prefix
|
||||
.map(|h| h.split_once('.').map_or(h, |(prefix, _)| prefix));
|
||||
|
||||
let kind = if host_endpoint == Some(&*endpoint) {
|
||||
SniKind::Sni
|
||||
} else {
|
||||
SniKind::NoSni
|
||||
};
|
||||
|
||||
let protocol = ctx.protocol();
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.accepted_connections_by_sni
|
||||
.inc(SniGroup { protocol, kind });
|
||||
}
|
||||
|
||||
ctx.set_user_agent(
|
||||
headers
|
||||
.get(hyper::header::USER_AGENT)
|
||||
|
||||
@@ -121,20 +121,6 @@ impl Client {
|
||||
resp.json().await.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn switch_timeline_membership(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
req: &models::TimelineMembershipSwitchRequest,
|
||||
) -> Result<models::TimelineMembershipSwitchResponse> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{}/timeline/{}/membership",
|
||||
self.mgmt_api_endpoint, tenant_id, timeline_id
|
||||
);
|
||||
let resp = self.put(&uri, req).await?;
|
||||
resp.json().await.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn delete_tenant(&self, tenant_id: TenantId) -> Result<models::TenantDeleteResult> {
|
||||
let uri = format!("{}/v1/tenant/{}", self.mgmt_api_endpoint, tenant_id);
|
||||
let resp = self
|
||||
|
||||
@@ -243,7 +243,8 @@ async fn timeline_pull_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
|
||||
let resp =
|
||||
pull_timeline::handle_request(data, conf.sk_auth_token.clone(), ca_certs, global_timelines)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
json_response(StatusCode::OK, resp)
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ use bytes::Bytes;
|
||||
use camino::Utf8PathBuf;
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::{SinkExt, StreamExt, TryStreamExt};
|
||||
use http_utils::error::ApiError;
|
||||
use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo};
|
||||
use reqwest::Certificate;
|
||||
use safekeeper_api::Term;
|
||||
@@ -31,7 +30,7 @@ use utils::pausable_failpoint;
|
||||
|
||||
use crate::control_file::CONTROL_FILE_NAME;
|
||||
use crate::state::{EvictionState, TimelinePersistentState};
|
||||
use crate::timeline::{Timeline, TimelineError, WalResidentTimeline};
|
||||
use crate::timeline::{Timeline, WalResidentTimeline};
|
||||
use crate::timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline};
|
||||
use crate::wal_storage::open_wal_file;
|
||||
use crate::{GlobalTimelines, debug_dump, wal_backup};
|
||||
@@ -396,7 +395,7 @@ pub async fn handle_request(
|
||||
sk_auth_token: Option<SecretString>,
|
||||
ssl_ca_certs: Vec<Certificate>,
|
||||
global_timelines: Arc<GlobalTimelines>,
|
||||
) -> Result<PullTimelineResponse, ApiError> {
|
||||
) -> Result<PullTimelineResponse> {
|
||||
let existing_tli = global_timelines.get(TenantTimelineId::new(
|
||||
request.tenant_id,
|
||||
request.timeline_id,
|
||||
@@ -412,9 +411,7 @@ pub async fn handle_request(
|
||||
for ssl_ca_cert in ssl_ca_certs {
|
||||
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
||||
}
|
||||
let http_client = http_client
|
||||
.build()
|
||||
.map_err(|e| ApiError::InternalServerError(e.into()))?;
|
||||
let http_client = http_client.build()?;
|
||||
|
||||
let http_hosts = request.http_hosts.clone();
|
||||
|
||||
@@ -446,10 +443,10 @@ pub async fn handle_request(
|
||||
// offline and C comes online. Then we want a pull on C with A and B as hosts to work.
|
||||
let min_required_successful = (http_hosts.len() - 1).max(1);
|
||||
if statuses.len() < min_required_successful {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
bail!(
|
||||
"only got {} successful status responses. required: {min_required_successful}",
|
||||
statuses.len()
|
||||
)));
|
||||
)
|
||||
}
|
||||
|
||||
// Find the most advanced safekeeper
|
||||
@@ -468,7 +465,7 @@ pub async fn handle_request(
|
||||
assert!(status.tenant_id == request.tenant_id);
|
||||
assert!(status.timeline_id == request.timeline_id);
|
||||
|
||||
match pull_timeline(
|
||||
pull_timeline(
|
||||
status,
|
||||
safekeeper_host,
|
||||
sk_auth_token,
|
||||
@@ -476,21 +473,6 @@ pub async fn handle_request(
|
||||
global_timelines,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(e) => {
|
||||
match e.downcast_ref::<TimelineError>() {
|
||||
Some(TimelineError::AlreadyExists(_)) => Ok(PullTimelineResponse {
|
||||
safekeeper_host: None,
|
||||
}),
|
||||
Some(TimelineError::CreationInProgress(_)) => {
|
||||
// We don't return success here because creation might still fail.
|
||||
Err(ApiError::Conflict("Creation in progress".to_owned()))
|
||||
}
|
||||
_ => Err(ApiError::InternalServerError(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn pull_timeline(
|
||||
|
||||
@@ -98,23 +98,6 @@ impl SafekeeperClient {
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub(crate) async fn switch_timeline_membership(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
req: &models::TimelineMembershipSwitchRequest,
|
||||
) -> Result<models::TimelineMembershipSwitchResponse> {
|
||||
measured_request!(
|
||||
"switch_timeline_membership",
|
||||
crate::metrics::Method::Put,
|
||||
&self.node_id_label,
|
||||
self.inner
|
||||
.switch_timeline_membership(tenant_id, timeline_id, req)
|
||||
.await
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) async fn delete_tenant(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
|
||||
@@ -3886,10 +3886,10 @@ impl Service {
|
||||
|
||||
None
|
||||
} else if safekeepers {
|
||||
// Note that for imported timelines, we do not create the timeline on the safekeepers
|
||||
// straight away. Instead, we do it once the import finalized such that we know what
|
||||
// start LSN to provide for the safekeepers. This is done in
|
||||
// [`Self::finalize_timeline_import`].
|
||||
// Note that we do not support creating the timeline on the safekeepers
|
||||
// for imported timelines. The `start_lsn` of the timeline is not known
|
||||
// until the import finshes.
|
||||
// https://github.com/neondatabase/neon/issues/11569
|
||||
let res = self
|
||||
.tenant_timeline_create_safekeepers(tenant_id, &timeline_info)
|
||||
.instrument(tracing::info_span!("timeline_create_safekeepers", %tenant_id, timeline_id=%timeline_info.timeline_id))
|
||||
@@ -3966,22 +3966,11 @@ impl Service {
|
||||
let active = self.timeline_active_on_all_shards(&import).await?;
|
||||
|
||||
match active {
|
||||
Some(timeline_info) => {
|
||||
true => {
|
||||
tracing::info!("Timeline became active on all shards");
|
||||
|
||||
if self.config.timelines_onto_safekeepers {
|
||||
// Now that we know the start LSN of this timeline, create it on the
|
||||
// safekeepers.
|
||||
self.tenant_timeline_create_safekeepers_until_success(
|
||||
import.tenant_id,
|
||||
timeline_info,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
None => {
|
||||
false => {
|
||||
tracing::info!("Timeline not active on all shards yet");
|
||||
|
||||
tokio::select! {
|
||||
@@ -4015,6 +4004,9 @@ impl Service {
|
||||
.range_mut(TenantShardId::tenant_range(import.tenant_id))
|
||||
.for_each(|(_id, shard)| shard.importing = TimelineImportState::Idle);
|
||||
|
||||
// TODO(vlad): Timeline creations in import mode do not return a correct initdb lsn,
|
||||
// so we can't create the timeline on the safekeepers. Fix by moving creation here.
|
||||
// https://github.com/neondatabase/neon/issues/11569
|
||||
tracing::info!(%import_failed, "Timeline import complete");
|
||||
|
||||
Ok(())
|
||||
@@ -4029,16 +4021,10 @@ impl Service {
|
||||
.await;
|
||||
}
|
||||
|
||||
/// If the timeline is active on all shards, returns the [`TimelineInfo`]
|
||||
/// collected from shard 0.
|
||||
///
|
||||
/// An error is returned if the shard layout has changed during the import.
|
||||
/// This is guarded against within the storage controller and the pageserver,
|
||||
/// and, therefore, unexpected.
|
||||
async fn timeline_active_on_all_shards(
|
||||
self: &Arc<Self>,
|
||||
import: &TimelineImport,
|
||||
) -> anyhow::Result<Option<TimelineInfo>> {
|
||||
) -> anyhow::Result<bool> {
|
||||
let targets = {
|
||||
let locked = self.inner.read().unwrap();
|
||||
let mut targets = Vec::new();
|
||||
@@ -4062,17 +4048,13 @@ impl Service {
|
||||
.expect("Pageservers may not be deleted while referenced");
|
||||
targets.push((*tenant_shard_id, node.clone()));
|
||||
} else {
|
||||
return Ok(None);
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
||||
targets
|
||||
};
|
||||
|
||||
if targets.is_empty() {
|
||||
anyhow::bail!("No shards found to finalize import for");
|
||||
}
|
||||
|
||||
let results = self
|
||||
.tenant_for_shards_api(
|
||||
targets,
|
||||
@@ -4088,17 +4070,10 @@ impl Service {
|
||||
)
|
||||
.await;
|
||||
|
||||
let all_active = results.iter().all(|res| match res {
|
||||
Ok(results.into_iter().all(|res| match res {
|
||||
Ok(info) => info.state == TimelineState::Active,
|
||||
Err(_) => false,
|
||||
});
|
||||
|
||||
if all_active {
|
||||
// Both unwraps are validated above
|
||||
Ok(Some(results.into_iter().next().unwrap().unwrap()))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
pub(crate) async fn tenant_timeline_archival_config(
|
||||
@@ -8510,7 +8485,7 @@ impl Service {
|
||||
// By default, live migrations are generous about the wait time for getting
|
||||
// the secondary location up to speed. When draining, give up earlier in order
|
||||
// to not stall the operation when a cold secondary is encountered.
|
||||
const SECONDARY_WARMUP_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
const SECONDARY_WARMUP_TIMEOUT: Duration = Duration::from_secs(20);
|
||||
const SECONDARY_DOWNLOAD_REQUEST_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
let reconciler_config = ReconcilerConfigBuilder::new(ReconcilerPriority::Normal)
|
||||
.secondary_warmup_timeout(SECONDARY_WARMUP_TIMEOUT)
|
||||
@@ -8843,7 +8818,7 @@ impl Service {
|
||||
node_id: NodeId,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<(), OperationError> {
|
||||
const SECONDARY_WARMUP_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
const SECONDARY_WARMUP_TIMEOUT: Duration = Duration::from_secs(20);
|
||||
const SECONDARY_DOWNLOAD_REQUEST_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
let reconciler_config = ReconcilerConfigBuilder::new(ReconcilerPriority::Normal)
|
||||
.secondary_warmup_timeout(SECONDARY_WARMUP_TIMEOUT)
|
||||
|
||||
@@ -323,42 +323,6 @@ impl Service {
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn tenant_timeline_create_safekeepers_until_success(
|
||||
self: &Arc<Self>,
|
||||
tenant_id: TenantId,
|
||||
timeline_info: TimelineInfo,
|
||||
) -> anyhow::Result<()> {
|
||||
const BACKOFF: Duration = Duration::from_secs(5);
|
||||
|
||||
loop {
|
||||
if self.cancel.is_cancelled() {
|
||||
anyhow::bail!("Shut down requested while finalizing import");
|
||||
}
|
||||
|
||||
let res = self
|
||||
.tenant_timeline_create_safekeepers(tenant_id, &timeline_info)
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Ok(_) => {
|
||||
tracing::info!("Timeline created on safekeepers");
|
||||
break;
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::error!("Failed to create timeline on safekeepers: {err}");
|
||||
tokio::select! {
|
||||
_ = self.cancel.cancelled() => {
|
||||
anyhow::bail!("Shut down requested while finalizing import");
|
||||
},
|
||||
_ = tokio::time::sleep(BACKOFF) => {}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Directly insert the timeline into the database without reconciling it with safekeepers.
|
||||
///
|
||||
/// Useful if the timeline already exists on the specified safekeepers,
|
||||
|
||||
@@ -24,6 +24,7 @@ pub struct SnapshotDownloader {
|
||||
remote_client: GenericRemoteStorage,
|
||||
#[allow(dead_code)]
|
||||
target: RootTarget,
|
||||
bucket_config: BucketConfig,
|
||||
tenant_id: TenantId,
|
||||
output_path: Utf8PathBuf,
|
||||
concurrency: usize,
|
||||
@@ -42,6 +43,7 @@ impl SnapshotDownloader {
|
||||
Ok(Self {
|
||||
remote_client,
|
||||
target,
|
||||
bucket_config,
|
||||
tenant_id,
|
||||
output_path,
|
||||
concurrency,
|
||||
@@ -216,9 +218,11 @@ impl SnapshotDownloader {
|
||||
}
|
||||
|
||||
pub async fn download(&self) -> anyhow::Result<()> {
|
||||
let (remote_client, target) =
|
||||
init_remote(self.bucket_config.clone(), NodeKind::Pageserver).await?;
|
||||
|
||||
// Generate a stream of TenantShardId
|
||||
let shards =
|
||||
stream_tenant_shards(&self.remote_client, &self.target, self.tenant_id).await?;
|
||||
let shards = stream_tenant_shards(&remote_client, &target, self.tenant_id).await?;
|
||||
let shards: Vec<TenantShardId> = shards.try_collect().await?;
|
||||
|
||||
// Only read from shards that have the highest count: avoids redundantly downloading
|
||||
@@ -236,8 +240,7 @@ impl SnapshotDownloader {
|
||||
|
||||
for shard in shards.into_iter().filter(|s| s.shard_count == shard_count) {
|
||||
// Generate a stream of TenantTimelineId
|
||||
let timelines =
|
||||
stream_tenant_timelines(&self.remote_client, &self.target, shard).await?;
|
||||
let timelines = stream_tenant_timelines(&remote_client, &target, shard).await?;
|
||||
|
||||
// Generate a stream of S3TimelineBlobData
|
||||
async fn load_timeline_index(
|
||||
@@ -248,8 +251,8 @@ impl SnapshotDownloader {
|
||||
let data = list_timeline_blobs(remote_client, ttid, target).await?;
|
||||
Ok((ttid, data))
|
||||
}
|
||||
let timelines = timelines
|
||||
.map_ok(|ttid| load_timeline_index(&self.remote_client, &self.target, ttid));
|
||||
let timelines =
|
||||
timelines.map_ok(|ttid| load_timeline_index(&remote_client, &target, ttid));
|
||||
let mut timelines = std::pin::pin!(timelines.try_buffered(8));
|
||||
|
||||
while let Some(i) = timelines.next().await {
|
||||
|
||||
@@ -557,7 +557,7 @@ class NeonLocalCli(AbstractNeonCli):
|
||||
endpoint_id: str,
|
||||
safekeepers_generation: int | None = None,
|
||||
safekeepers: list[int] | None = None,
|
||||
remote_ext_base_url: str | None = None,
|
||||
remote_ext_config: str | None = None,
|
||||
pageserver_id: int | None = None,
|
||||
allow_multiple: bool = False,
|
||||
create_test_user: bool = False,
|
||||
@@ -572,8 +572,8 @@ class NeonLocalCli(AbstractNeonCli):
|
||||
extra_env_vars = env or {}
|
||||
if basebackup_request_tries is not None:
|
||||
extra_env_vars["NEON_COMPUTE_TESTING_BASEBACKUP_TRIES"] = str(basebackup_request_tries)
|
||||
if remote_ext_base_url is not None:
|
||||
args.extend(["--remote-ext-base-url", remote_ext_base_url])
|
||||
if remote_ext_config is not None:
|
||||
args.extend(["--remote-ext-config", remote_ext_config])
|
||||
|
||||
if safekeepers_generation is not None:
|
||||
args.extend(["--safekeepers-generation", str(safekeepers_generation)])
|
||||
|
||||
@@ -1299,6 +1299,13 @@ class NeonEnv:
|
||||
for key, value in override.items():
|
||||
ps_cfg[key] = value
|
||||
|
||||
if self.pageserver_virtual_file_io_mode is not None:
|
||||
# TODO(christian): https://github.com/neondatabase/neon/issues/11598
|
||||
if not config.test_may_use_compatibility_snapshot_binaries:
|
||||
ps_cfg["virtual_file_io_mode"] = self.pageserver_virtual_file_io_mode
|
||||
else:
|
||||
log.info("ignoring virtual_file_io_mode parametrization for compatibility test")
|
||||
|
||||
if self.pageserver_wal_receiver_protocol is not None:
|
||||
key, value = PageserverWalReceiverProtocol.to_config_key_value(
|
||||
self.pageserver_wal_receiver_protocol
|
||||
@@ -1402,6 +1409,30 @@ class NeonEnv:
|
||||
for f in futs:
|
||||
f.result()
|
||||
|
||||
# Last step: register safekeepers at the storage controller
|
||||
if (
|
||||
self.storage_controller_config is not None
|
||||
and self.storage_controller_config.get("timelines_onto_safekeepers") is True
|
||||
):
|
||||
for sk_id, sk in enumerate(self.safekeepers):
|
||||
# 0 is an invalid safekeeper id
|
||||
sk_id = sk_id + 1
|
||||
body = {
|
||||
"id": sk_id,
|
||||
"created_at": "2023-10-25T09:11:25Z",
|
||||
"updated_at": "2024-08-28T11:32:43Z",
|
||||
"region_id": "aws-us-east-2",
|
||||
"host": "127.0.0.1",
|
||||
"port": sk.port.pg,
|
||||
"http_port": sk.port.http,
|
||||
"https_port": None,
|
||||
"version": 5957,
|
||||
"availability_zone_id": f"us-east-2b-{sk_id}",
|
||||
}
|
||||
|
||||
self.storage_controller.on_safekeeper_deploy(sk_id, body)
|
||||
self.storage_controller.safekeeper_scheduling_policy(sk_id, "Active")
|
||||
|
||||
self.endpoint_storage.start(timeout_in_seconds=timeout_in_seconds)
|
||||
|
||||
def stop(self, immediate=False, ps_assert_metric_no_errors=False, fail_on_endpoint_errors=True):
|
||||
@@ -3835,7 +3866,7 @@ class NeonAuthBroker:
|
||||
external_http_port: int,
|
||||
auth_backend: NeonAuthBroker.ProxyV1,
|
||||
):
|
||||
self.domain = "local.neon.build" # resolves to 127.0.0.1
|
||||
self.domain = "apiauth.local.neon.build" # resolves to 127.0.0.1
|
||||
self.host = "127.0.0.1"
|
||||
self.http_port = http_port
|
||||
self.external_http_port = external_http_port
|
||||
@@ -3852,7 +3883,7 @@ class NeonAuthBroker:
|
||||
# generate key of it doesn't exist
|
||||
crt_path = self.test_output_dir / "proxy.crt"
|
||||
key_path = self.test_output_dir / "proxy.key"
|
||||
generate_proxy_tls_certs(f"apiauth.{self.domain}", key_path, crt_path)
|
||||
generate_proxy_tls_certs("apiauth.local.neon.build", key_path, crt_path)
|
||||
|
||||
args = [
|
||||
str(self.neon_binpath / "proxy"),
|
||||
@@ -3896,10 +3927,10 @@ class NeonAuthBroker:
|
||||
|
||||
log.info(f"Executing http query: {query}")
|
||||
|
||||
connstr = f"postgresql://{user}@ep-foo-bar-1234.{self.domain}/postgres"
|
||||
connstr = f"postgresql://{user}@{self.domain}/postgres"
|
||||
async with httpx.AsyncClient(verify=str(self.test_output_dir / "proxy.crt")) as client:
|
||||
response = await client.post(
|
||||
f"https://apiauth.{self.domain}:{self.external_http_port}/sql",
|
||||
f"https://{self.domain}:{self.external_http_port}/sql",
|
||||
json={"query": query, "params": args},
|
||||
headers={
|
||||
"Neon-Connection-String": connstr,
|
||||
@@ -4195,7 +4226,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
|
||||
def start(
|
||||
self,
|
||||
remote_ext_base_url: str | None = None,
|
||||
remote_ext_config: str | None = None,
|
||||
pageserver_id: int | None = None,
|
||||
safekeeper_generation: int | None = None,
|
||||
safekeepers: list[int] | None = None,
|
||||
@@ -4221,7 +4252,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
self.endpoint_id,
|
||||
safekeepers_generation=safekeeper_generation,
|
||||
safekeepers=self.active_safekeepers,
|
||||
remote_ext_base_url=remote_ext_base_url,
|
||||
remote_ext_config=remote_ext_config,
|
||||
pageserver_id=pageserver_id,
|
||||
allow_multiple=allow_multiple,
|
||||
create_test_user=create_test_user,
|
||||
@@ -4436,7 +4467,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
hot_standby: bool = False,
|
||||
lsn: Lsn | None = None,
|
||||
config_lines: list[str] | None = None,
|
||||
remote_ext_base_url: str | None = None,
|
||||
remote_ext_config: str | None = None,
|
||||
pageserver_id: int | None = None,
|
||||
allow_multiple: bool = False,
|
||||
basebackup_request_tries: int | None = None,
|
||||
@@ -4455,7 +4486,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
pageserver_id=pageserver_id,
|
||||
allow_multiple=allow_multiple,
|
||||
).start(
|
||||
remote_ext_base_url=remote_ext_base_url,
|
||||
remote_ext_config=remote_ext_config,
|
||||
pageserver_id=pageserver_id,
|
||||
allow_multiple=allow_multiple,
|
||||
basebackup_request_tries=basebackup_request_tries,
|
||||
@@ -4539,7 +4570,7 @@ class EndpointFactory:
|
||||
lsn: Lsn | None = None,
|
||||
hot_standby: bool = False,
|
||||
config_lines: list[str] | None = None,
|
||||
remote_ext_base_url: str | None = None,
|
||||
remote_ext_config: str | None = None,
|
||||
pageserver_id: int | None = None,
|
||||
basebackup_request_tries: int | None = None,
|
||||
) -> Endpoint:
|
||||
@@ -4559,7 +4590,7 @@ class EndpointFactory:
|
||||
hot_standby=hot_standby,
|
||||
config_lines=config_lines,
|
||||
lsn=lsn,
|
||||
remote_ext_base_url=remote_ext_base_url,
|
||||
remote_ext_config=remote_ext_config,
|
||||
pageserver_id=pageserver_id,
|
||||
basebackup_request_tries=basebackup_request_tries,
|
||||
)
|
||||
@@ -4613,10 +4644,7 @@ class EndpointFactory:
|
||||
return self
|
||||
|
||||
def new_replica(
|
||||
self,
|
||||
origin: Endpoint,
|
||||
endpoint_id: str | None = None,
|
||||
config_lines: list[str] | None = None,
|
||||
self, origin: Endpoint, endpoint_id: str, config_lines: list[str] | None = None
|
||||
):
|
||||
branch_name = origin.branch_name
|
||||
assert origin in self.endpoints
|
||||
@@ -4632,10 +4660,7 @@ class EndpointFactory:
|
||||
)
|
||||
|
||||
def new_replica_start(
|
||||
self,
|
||||
origin: Endpoint,
|
||||
endpoint_id: str | None = None,
|
||||
config_lines: list[str] | None = None,
|
||||
self, origin: Endpoint, endpoint_id: str, config_lines: list[str] | None = None
|
||||
):
|
||||
branch_name = origin.branch_name
|
||||
assert origin in self.endpoints
|
||||
@@ -5452,13 +5477,6 @@ def wait_for_last_flush_lsn(
|
||||
|
||||
if last_flush_lsn is None:
|
||||
last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
# The last_flush_lsn may not correspond to a record boundary.
|
||||
# For example, if the compute flushed WAL on a page boundary,
|
||||
# the remaining part of the record might not be flushed for a long time.
|
||||
# This would prevent the pageserver from reaching last_flush_lsn promptly.
|
||||
# To ensure the rest of the record reaches the pageserver quickly,
|
||||
# we forcibly flush the WAL by using CHECKPOINT.
|
||||
endpoint.safe_psql("CHECKPOINT")
|
||||
|
||||
results = []
|
||||
for tenant_shard_id, pageserver in shards:
|
||||
|
||||
@@ -122,10 +122,6 @@ DEFAULT_STORAGE_CONTROLLER_ALLOWED_ERRORS = [
|
||||
".*Call to node.*management API.*failed.*Timeout.*",
|
||||
".*Failed to update node .+ after heartbeat round.*error sending request for url.*",
|
||||
".*background_reconcile: failed to fetch top tenants:.*client error \\(Connect\\).*",
|
||||
# Many tests will take safekeepers offline
|
||||
".*Call to safekeeper.*management API.*failed.*receive body.*",
|
||||
".*Call to safekeeper.*management API.*failed.*ReceiveBody.*",
|
||||
".*Call to safekeeper.*management API.*failed.*Timeout.*",
|
||||
# Many tests will start up with a node offline
|
||||
".*startup_reconcile: Could not scan node.*",
|
||||
# Tests run in dev mode
|
||||
|
||||
@@ -544,69 +544,3 @@ def test_drop_role_with_table_privileges_from_non_neon_superuser(neon_simple_env
|
||||
)
|
||||
role = cursor.fetchone()
|
||||
assert role is None
|
||||
|
||||
|
||||
def test_db_with_custom_settings(neon_simple_env: NeonEnv):
|
||||
"""
|
||||
Test that compute_ctl can work with databases that have some custom settings.
|
||||
For example, role=some_other_role, default_transaction_read_only=on,
|
||||
search_path=non_public_schema, statement_timeout=1 (1ms).
|
||||
"""
|
||||
env = neon_simple_env
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
TEST_ROLE = "some_other_role"
|
||||
TEST_DB = "db_with_custom_settings"
|
||||
TEST_SCHEMA = "non_public_schema"
|
||||
|
||||
endpoint.respec_deep(
|
||||
**{
|
||||
"spec": {
|
||||
"skip_pg_catalog_updates": False,
|
||||
"cluster": {
|
||||
"databases": [
|
||||
{
|
||||
"name": TEST_DB,
|
||||
"owner": TEST_ROLE,
|
||||
}
|
||||
],
|
||||
"roles": [
|
||||
{
|
||||
"name": TEST_ROLE,
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
endpoint.reconfigure()
|
||||
|
||||
with endpoint.cursor(dbname=TEST_DB) as cursor:
|
||||
cursor.execute(f"CREATE SCHEMA {TEST_SCHEMA}")
|
||||
cursor.execute(f"ALTER DATABASE {TEST_DB} SET role = {TEST_ROLE}")
|
||||
cursor.execute(f"ALTER DATABASE {TEST_DB} SET default_transaction_read_only = on")
|
||||
cursor.execute(f"ALTER DATABASE {TEST_DB} SET search_path = {TEST_SCHEMA}")
|
||||
cursor.execute(f"ALTER DATABASE {TEST_DB} SET statement_timeout = 1")
|
||||
|
||||
with endpoint.cursor(dbname=TEST_DB) as cursor:
|
||||
cursor.execute("SELECT current_role")
|
||||
role = cursor.fetchone()
|
||||
assert role is not None
|
||||
assert role[0] == TEST_ROLE
|
||||
|
||||
cursor.execute("SHOW default_transaction_read_only")
|
||||
default_transaction_read_only = cursor.fetchone()
|
||||
assert default_transaction_read_only is not None
|
||||
assert default_transaction_read_only[0] == "on"
|
||||
|
||||
cursor.execute("SHOW search_path")
|
||||
search_path = cursor.fetchone()
|
||||
assert search_path is not None
|
||||
assert search_path[0] == TEST_SCHEMA
|
||||
|
||||
# Do not check statement_timeout, because we force it to 2min
|
||||
# in `endpoint.cursor()` fixture.
|
||||
|
||||
endpoint.reconfigure()
|
||||
|
||||
@@ -221,7 +221,7 @@ def test_remote_extensions(
|
||||
|
||||
endpoint.create_remote_extension_spec(spec)
|
||||
|
||||
endpoint.start(remote_ext_base_url=extensions_endpoint)
|
||||
endpoint.start(remote_ext_config=extensions_endpoint)
|
||||
|
||||
with endpoint.connect() as conn:
|
||||
with conn.cursor() as cur:
|
||||
@@ -249,7 +249,7 @@ def test_remote_extensions(
|
||||
# Remove the extension files to force a redownload of the extension.
|
||||
extension.remove(test_output_dir, pg_version)
|
||||
|
||||
endpoint.start(remote_ext_base_url=extensions_endpoint)
|
||||
endpoint.start(remote_ext_config=extensions_endpoint)
|
||||
|
||||
# Test that ALTER EXTENSION UPDATE statements also fetch remote extensions.
|
||||
with endpoint.connect() as conn:
|
||||
|
||||
@@ -24,7 +24,6 @@ from fixtures.utils import (
|
||||
skip_in_debug_build,
|
||||
wait_until,
|
||||
)
|
||||
from fixtures.workload import Workload
|
||||
from mypy_boto3_kms import KMSClient
|
||||
from mypy_boto3_kms.type_defs import EncryptResponseTypeDef
|
||||
from mypy_boto3_s3 import S3Client
|
||||
@@ -98,10 +97,6 @@ def test_pgdata_import_smoke(
|
||||
f"http://{cplane_mgmt_api_server.host}:{cplane_mgmt_api_server.port}/storage/api/v1/"
|
||||
)
|
||||
|
||||
if neon_env_builder.storage_controller_config is None:
|
||||
neon_env_builder.storage_controller_config = {}
|
||||
neon_env_builder.storage_controller_config["timelines_onto_safekeepers"] = True
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# The test needs LocalFs support, which is only built in testing mode.
|
||||
@@ -291,28 +286,34 @@ def test_pgdata_import_smoke(
|
||||
#
|
||||
# validate that we can write
|
||||
#
|
||||
workload = Workload(env, tenant_id, timeline_id, branch_name=import_branch_name)
|
||||
workload.init()
|
||||
workload.write_rows(64)
|
||||
workload.validate()
|
||||
rw_endpoint = env.endpoints.create_start(
|
||||
branch_name=import_branch_name,
|
||||
endpoint_id="rw",
|
||||
tenant_id=tenant_id,
|
||||
config_lines=ep_config,
|
||||
)
|
||||
rw_endpoint.safe_psql("create table othertable(values text)")
|
||||
rw_lsn = Lsn(rw_endpoint.safe_psql_scalar("select pg_current_wal_flush_lsn()"))
|
||||
|
||||
rw_lsn = Lsn(workload.endpoint().safe_psql_scalar("select pg_current_wal_flush_lsn()"))
|
||||
# TODO: consider using `class Workload` here
|
||||
# to do compaction and whatnot?
|
||||
|
||||
#
|
||||
# validate that we can branch (important use case)
|
||||
#
|
||||
|
||||
# ... at the tip
|
||||
child_timeline_id = env.create_branch(
|
||||
_ = env.create_branch(
|
||||
new_branch_name="br-tip",
|
||||
ancestor_branch_name=import_branch_name,
|
||||
tenant_id=tenant_id,
|
||||
ancestor_start_lsn=rw_lsn,
|
||||
)
|
||||
child_workload = workload.branch(timeline_id=child_timeline_id, branch_name="br-tip")
|
||||
child_workload.validate()
|
||||
|
||||
validate_vanilla_equivalence(child_workload.endpoint())
|
||||
br_tip_endpoint = env.endpoints.create_start(
|
||||
branch_name="br-tip", endpoint_id="br-tip-ro", tenant_id=tenant_id, config_lines=ep_config
|
||||
)
|
||||
validate_vanilla_equivalence(br_tip_endpoint)
|
||||
br_tip_endpoint.safe_psql("select * from othertable")
|
||||
|
||||
# ... at the initdb lsn
|
||||
_ = env.create_branch(
|
||||
@@ -329,7 +330,7 @@ def test_pgdata_import_smoke(
|
||||
)
|
||||
validate_vanilla_equivalence(br_initdb_endpoint)
|
||||
with pytest.raises(psycopg2.errors.UndefinedTable):
|
||||
br_initdb_endpoint.safe_psql(f"select * from {workload.table}")
|
||||
br_initdb_endpoint.safe_psql("select * from othertable")
|
||||
|
||||
|
||||
@run_only_on_default_postgres(reason="PG version is irrelevant here")
|
||||
@@ -640,55 +641,6 @@ def test_fast_import_binary(
|
||||
assert res[0][0] == 10
|
||||
|
||||
|
||||
def test_fast_import_event_triggers(
|
||||
test_output_dir,
|
||||
vanilla_pg: VanillaPostgres,
|
||||
port_distributor: PortDistributor,
|
||||
fast_import: FastImport,
|
||||
):
|
||||
vanilla_pg.start()
|
||||
vanilla_pg.safe_psql("""
|
||||
CREATE FUNCTION test_event_trigger_for_drops()
|
||||
RETURNS event_trigger LANGUAGE plpgsql AS $$
|
||||
DECLARE
|
||||
obj record;
|
||||
BEGIN
|
||||
FOR obj IN SELECT * FROM pg_event_trigger_dropped_objects()
|
||||
LOOP
|
||||
RAISE NOTICE '% dropped object: % %.% %',
|
||||
tg_tag,
|
||||
obj.object_type,
|
||||
obj.schema_name,
|
||||
obj.object_name,
|
||||
obj.object_identity;
|
||||
END LOOP;
|
||||
END
|
||||
$$;
|
||||
|
||||
CREATE EVENT TRIGGER test_event_trigger_for_drops
|
||||
ON sql_drop
|
||||
EXECUTE PROCEDURE test_event_trigger_for_drops();
|
||||
""")
|
||||
|
||||
pg_port = port_distributor.get_port()
|
||||
p = fast_import.run_pgdata(pg_port=pg_port, source_connection_string=vanilla_pg.connstr())
|
||||
assert p.returncode == 0
|
||||
|
||||
vanilla_pg.stop()
|
||||
|
||||
pgbin = PgBin(test_output_dir, fast_import.pg_distrib_dir, fast_import.pg_version)
|
||||
with VanillaPostgres(
|
||||
fast_import.workdir / "pgdata", pgbin, pg_port, False
|
||||
) as new_pgdata_vanilla_pg:
|
||||
new_pgdata_vanilla_pg.start()
|
||||
|
||||
# database name and user are hardcoded in fast_import binary, and they are different from normal vanilla postgres
|
||||
conn = PgProtocol(dsn=f"postgresql://cloud_admin@localhost:{pg_port}/neondb")
|
||||
res = conn.safe_psql("SELECT count(*) FROM pg_event_trigger;")
|
||||
log.info(f"Result: {res}")
|
||||
assert res[0][0] == 0, f"Neon does not support importing event triggers, got: {res[0][0]}"
|
||||
|
||||
|
||||
def test_fast_import_restore_to_connstring(
|
||||
test_output_dir,
|
||||
vanilla_pg: VanillaPostgres,
|
||||
|
||||
@@ -39,10 +39,3 @@ def test_role_grants(neon_simple_env: NeonEnv):
|
||||
res = cur.fetchall()
|
||||
|
||||
assert res == [(1,)], "select should not succeed"
|
||||
|
||||
# confirm that replicas can also ensure the grants are correctly set.
|
||||
replica = env.endpoints.new_replica_start(endpoint)
|
||||
replica_client = replica.http_client()
|
||||
replica_client.set_role_grants(
|
||||
"test_role_grants", "test_role", "test_schema", ["CREATE", "USAGE"]
|
||||
)
|
||||
|
||||
@@ -1822,7 +1822,7 @@ def test_timeline_detach_with_aux_files_with_detach_v1(
|
||||
endpoint2.safe_psql(
|
||||
"SELECT pg_create_logical_replication_slot('test_slot_restore', 'pgoutput')"
|
||||
)
|
||||
lsn3 = wait_for_last_flush_lsn(env, endpoint2, env.initial_tenant, branch_timeline_id)
|
||||
lsn3 = wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, branch_timeline_id)
|
||||
assert set(http.list_aux_files(env.initial_tenant, branch_timeline_id, lsn1).keys()) == set([])
|
||||
assert set(http.list_aux_files(env.initial_tenant, branch_timeline_id, lsn3).keys()) == set(
|
||||
["pg_replslot/test_slot_restore/state"]
|
||||
@@ -1839,7 +1839,7 @@ def test_timeline_detach_with_aux_files_with_detach_v1(
|
||||
assert all_reparented == set([])
|
||||
|
||||
# We need to ensure all safekeeper data are ingested before checking aux files: the API does not wait for LSN.
|
||||
wait_for_last_flush_lsn(env, endpoint2, env.initial_tenant, branch_timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, branch_timeline_id)
|
||||
assert set(http.list_aux_files(env.initial_tenant, env.initial_timeline, lsn2).keys()) == set(
|
||||
["pg_replslot/test_slot_parent_1/state", "pg_replslot/test_slot_parent_2/state"]
|
||||
), "main branch unaffected"
|
||||
|
||||
2
vendor/postgres-v14
vendored
2
vendor/postgres-v14
vendored
Submodule vendor/postgres-v14 updated: 06b405bc98...c8dab02bfc
2
vendor/postgres-v15
vendored
2
vendor/postgres-v15
vendored
Submodule vendor/postgres-v15 updated: 72f83df76c...b838c8969b
2
vendor/postgres-v16
vendored
2
vendor/postgres-v16
vendored
Submodule vendor/postgres-v16 updated: d72d76f2cd...05ddf212e2
2
vendor/postgres-v17
vendored
2
vendor/postgres-v17
vendored
Submodule vendor/postgres-v17 updated: 0d59c91c1a...eab3a37834
8
vendor/revisions.json
vendored
8
vendor/revisions.json
vendored
@@ -1,18 +1,18 @@
|
||||
{
|
||||
"v17": [
|
||||
"17.4",
|
||||
"0d59c91c1a23e667f1d1169d5f040b3fa0a0ab44"
|
||||
"eab3a37834cac6ec0719bf817ac918a201712d66"
|
||||
],
|
||||
"v16": [
|
||||
"16.8",
|
||||
"d72d76f2cdee4194dd052ce099e9784aca7c794a"
|
||||
"05ddf212e2e07b788b5c8b88bdcf98630941f6ae"
|
||||
],
|
||||
"v15": [
|
||||
"15.12",
|
||||
"72f83df76c61ce18d81bd371f0afd2a43d59c052"
|
||||
"b838c8969b7c63f3e637a769656f5f36793b797c"
|
||||
],
|
||||
"v14": [
|
||||
"14.17",
|
||||
"06b405bc982fd53522689aa4acbfd9c44b7993cf"
|
||||
"c8dab02bfc003ae7bd59096919042d7840f3c194"
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user