mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-17 18:32:56 +00:00
Compare commits
8 Commits
initdb-cac
...
spec-forma
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b1fb59ef6e | ||
|
|
51f3128657 | ||
|
|
3fc82798dd | ||
|
|
237c4a361e | ||
|
|
50e38f5196 | ||
|
|
87230bf54c | ||
|
|
d11ebbaddf | ||
|
|
e90f727131 |
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -850,6 +850,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
@@ -879,6 +880,7 @@ dependencies = [
|
||||
"tracing-subscriber",
|
||||
"tracing-utils",
|
||||
"url",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
@@ -920,8 +922,10 @@ name = "control_plane"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
"clap 4.1.4",
|
||||
"comfy-table",
|
||||
"compute_api",
|
||||
"git-version",
|
||||
"nix",
|
||||
"once_cell",
|
||||
|
||||
22
README.md
22
README.md
@@ -147,15 +147,15 @@ Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50
|
||||
Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one
|
||||
|
||||
# start postgres compute node
|
||||
> ./target/debug/neon_local pg start main
|
||||
Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
||||
> ./target/debug/neon_local endpoint start main
|
||||
Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
||||
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
|
||||
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
|
||||
Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
|
||||
|
||||
# check list of running postgres instances
|
||||
> ./target/debug/neon_local pg list
|
||||
NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS
|
||||
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running
|
||||
> ./target/debug/neon_local endpoint list
|
||||
ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS
|
||||
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running
|
||||
```
|
||||
|
||||
2. Now, it is possible to connect to postgres and run some queries:
|
||||
@@ -184,14 +184,14 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
|
||||
(L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]
|
||||
|
||||
# start postgres on that branch
|
||||
> ./target/debug/neon_local pg start migration_check --branch-name migration_check
|
||||
Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
|
||||
> ./target/debug/neon_local endpoint start migration_check --branch-name migration_check
|
||||
Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
|
||||
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
|
||||
Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
|
||||
Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
|
||||
|
||||
# check the new list of running postgres instances
|
||||
> ./target/debug/neon_local pg list
|
||||
NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS
|
||||
> ./target/debug/neon_local endpoint list
|
||||
ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS
|
||||
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running
|
||||
migration_check 127.0.0.1:55433 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running
|
||||
|
||||
|
||||
@@ -28,4 +28,5 @@ tracing-utils.workspace = true
|
||||
url.workspace = true
|
||||
|
||||
compute_api.workspace = true
|
||||
utils.workspace = true
|
||||
workspace_hack.workspace = true
|
||||
|
||||
@@ -44,8 +44,9 @@ use tracing::{error, info};
|
||||
use url::Url;
|
||||
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use compute_api::spec::{ComputeSpecAnyVersion, ComputeSpecV2};
|
||||
|
||||
use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec};
|
||||
use compute_tools::compute::{ComputeNode, ComputeState};
|
||||
use compute_tools::http::api::launch_http_server;
|
||||
use compute_tools::logger::*;
|
||||
use compute_tools::monitor::launch_monitor;
|
||||
@@ -57,6 +58,9 @@ fn main() -> Result<()> {
|
||||
|
||||
let matches = cli().get_matches();
|
||||
|
||||
let http_port = *matches
|
||||
.get_one::<u16>("http-port")
|
||||
.expect("http-port is required");
|
||||
let pgdata = matches
|
||||
.get_one::<String>("pgdata")
|
||||
.expect("PGDATA path is required");
|
||||
@@ -72,7 +76,7 @@ fn main() -> Result<()> {
|
||||
// Try to use just 'postgres' if no path is provided
|
||||
let pgbin = matches.get_one::<String>("pgbin").unwrap();
|
||||
|
||||
let mut spec = None;
|
||||
let mut spec: Option<ComputeSpecAnyVersion> = None;
|
||||
let mut live_config_allowed = false;
|
||||
match spec_json {
|
||||
// First, try to get cluster spec from the cli argument
|
||||
@@ -106,8 +110,9 @@ fn main() -> Result<()> {
|
||||
let mut new_state = ComputeState::new();
|
||||
let spec_set;
|
||||
if let Some(spec) = spec {
|
||||
let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
|
||||
new_state.pspec = Some(pspec);
|
||||
// Parse the spec file, upgrading it from older format if necessary
|
||||
let spec: ComputeSpecV2 = ComputeSpecV2::try_from(spec)?;
|
||||
new_state.spec = Some(spec);
|
||||
spec_set = true;
|
||||
} else {
|
||||
spec_set = false;
|
||||
@@ -125,7 +130,8 @@ fn main() -> Result<()> {
|
||||
|
||||
// Launch http service first, so we were able to serve control-plane
|
||||
// requests, while configuration is still in progress.
|
||||
let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
|
||||
let _http_handle =
|
||||
launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
|
||||
|
||||
if !spec_set {
|
||||
// No spec provided, hang waiting for it.
|
||||
@@ -144,8 +150,8 @@ fn main() -> Result<()> {
|
||||
|
||||
// We got all we need, update the state.
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
let pspec = state.pspec.as_ref().expect("spec must be set");
|
||||
let startup_tracing_context = pspec.spec.startup_tracing_context.clone();
|
||||
let spec = state.spec.as_ref().expect("spec must be set");
|
||||
let startup_tracing_context = spec.startup_tracing_context.clone();
|
||||
state.status = ComputeStatus::Init;
|
||||
compute.state_changed.notify_all();
|
||||
drop(state);
|
||||
@@ -245,6 +251,14 @@ fn cli() -> clap::Command {
|
||||
let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
|
||||
clap::Command::new("compute_ctl")
|
||||
.version(version)
|
||||
.arg(
|
||||
Arg::new("http-port")
|
||||
.long("http-port")
|
||||
.value_name("HTTP_PORT")
|
||||
.default_value("3080")
|
||||
.value_parser(clap::value_parser!(u16))
|
||||
.required(false),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("connstr")
|
||||
.short('C')
|
||||
|
||||
@@ -26,9 +26,10 @@ use chrono::{DateTime, Utc};
|
||||
use postgres::{Client, NoTls};
|
||||
use tokio_postgres;
|
||||
use tracing::{info, instrument, warn};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use compute_api::responses::{ComputeMetrics, ComputeStatus};
|
||||
use compute_api::spec::ComputeSpec;
|
||||
use compute_api::spec::ComputeSpecV2;
|
||||
|
||||
use crate::checker::create_writability_check_data;
|
||||
use crate::config;
|
||||
@@ -69,7 +70,7 @@ pub struct ComputeState {
|
||||
/// Timestamp of the last Postgres activity
|
||||
pub last_active: DateTime<Utc>,
|
||||
pub error: Option<String>,
|
||||
pub pspec: Option<ParsedSpec>,
|
||||
pub spec: Option<ComputeSpecV2>,
|
||||
pub metrics: ComputeMetrics,
|
||||
}
|
||||
|
||||
@@ -79,7 +80,7 @@ impl ComputeState {
|
||||
status: ComputeStatus::Empty,
|
||||
last_active: Utc::now(),
|
||||
error: None,
|
||||
pspec: None,
|
||||
spec: None,
|
||||
metrics: ComputeMetrics::default(),
|
||||
}
|
||||
}
|
||||
@@ -91,45 +92,6 @@ impl Default for ComputeState {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ParsedSpec {
|
||||
pub spec: ComputeSpec,
|
||||
pub tenant: String,
|
||||
pub timeline: String,
|
||||
pub pageserver_connstr: String,
|
||||
pub storage_auth_token: Option<String>,
|
||||
}
|
||||
|
||||
impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
type Error = String;
|
||||
fn try_from(spec: ComputeSpec) -> Result<Self, String> {
|
||||
let pageserver_connstr = spec
|
||||
.cluster
|
||||
.settings
|
||||
.find("neon.pageserver_connstring")
|
||||
.ok_or("pageserver connstr should be provided")?;
|
||||
let storage_auth_token = spec.storage_auth_token.clone();
|
||||
let tenant = spec
|
||||
.cluster
|
||||
.settings
|
||||
.find("neon.tenant_id")
|
||||
.ok_or("tenant id should be provided")?;
|
||||
let timeline = spec
|
||||
.cluster
|
||||
.settings
|
||||
.find("neon.timeline_id")
|
||||
.ok_or("tenant id should be provided")?;
|
||||
|
||||
Ok(ParsedSpec {
|
||||
spec,
|
||||
pageserver_connstr,
|
||||
storage_auth_token,
|
||||
tenant,
|
||||
timeline,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ComputeNode {
|
||||
pub fn set_status(&self, status: ComputeStatus) {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
@@ -155,11 +117,11 @@ impl ComputeNode {
|
||||
// Get basebackup from the libpq connection to pageserver using `connstr` and
|
||||
// unarchive it to `pgdata` directory overriding all its previous content.
|
||||
#[instrument(skip(self, compute_state))]
|
||||
fn get_basebackup(&self, compute_state: &ComputeState, lsn: &str) -> Result<()> {
|
||||
let spec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
|
||||
let spec = compute_state.spec.as_ref().expect("spec must be set");
|
||||
let start_time = Utc::now();
|
||||
|
||||
let mut config = postgres::Config::from_str(&spec.pageserver_connstr)?;
|
||||
let mut config = postgres::Config::from_str(&spec.pageserver_connstring)?;
|
||||
|
||||
// Use the storage auth token from the config file, if given.
|
||||
// Note: this overrides any password set in the connection string.
|
||||
@@ -172,8 +134,8 @@ impl ComputeNode {
|
||||
|
||||
let mut client = config.connect(NoTls)?;
|
||||
let basebackup_cmd = match lsn {
|
||||
"0/0" => format!("basebackup {} {}", &spec.tenant, &spec.timeline), // First start of the compute
|
||||
_ => format!("basebackup {} {} {}", &spec.tenant, &spec.timeline, lsn),
|
||||
Lsn(0) => format!("basebackup {} {}", spec.tenant_id, spec.timeline_id), // First start of the compute
|
||||
_ => format!("basebackup {} {} {}", spec.tenant_id, spec.timeline_id, lsn),
|
||||
};
|
||||
let copyreader = client.copy_out(basebackup_cmd.as_str())?;
|
||||
|
||||
@@ -197,7 +159,7 @@ impl ComputeNode {
|
||||
// Run `postgres` in a special mode with `--sync-safekeepers` argument
|
||||
// and return the reported LSN back to the caller.
|
||||
#[instrument(skip(self, storage_auth_token))]
|
||||
fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<String> {
|
||||
fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
|
||||
let start_time = Utc::now();
|
||||
|
||||
let sync_handle = Command::new(&self.pgbin)
|
||||
@@ -234,7 +196,7 @@ impl ComputeNode {
|
||||
.unwrap()
|
||||
.as_millis() as u64;
|
||||
|
||||
let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim());
|
||||
let lsn = Lsn::from_str(String::from_utf8(sync_output.stdout)?.trim())?;
|
||||
|
||||
Ok(lsn)
|
||||
}
|
||||
@@ -243,27 +205,34 @@ impl ComputeNode {
|
||||
/// safekeepers sync, basebackup, etc.
|
||||
#[instrument(skip(self, compute_state))]
|
||||
pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {
|
||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
let spec = compute_state.spec.as_ref().expect("spec must be set");
|
||||
let pgdata_path = Path::new(&self.pgdata);
|
||||
|
||||
// Remove/create an empty pgdata directory and put configuration there.
|
||||
self.create_pgdata()?;
|
||||
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &pspec.spec)?;
|
||||
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec)?;
|
||||
|
||||
info!("starting safekeepers syncing");
|
||||
let lsn = self
|
||||
.sync_safekeepers(pspec.storage_auth_token.clone())
|
||||
.with_context(|| "failed to sync safekeepers")?;
|
||||
info!("safekeepers synced at LSN {}", lsn);
|
||||
let lsn = if let Some(lsn) = spec.lsn {
|
||||
// Read-only node, anchored at 'lsn'
|
||||
lsn
|
||||
} else {
|
||||
// Primary that continues to write at end of the timeline
|
||||
info!("starting safekeepers syncing");
|
||||
let last_lsn = self
|
||||
.sync_safekeepers(spec.storage_auth_token.clone())
|
||||
.with_context(|| "failed to sync safekeepers")?;
|
||||
info!("safekeepers synced at LSN {}", last_lsn);
|
||||
last_lsn
|
||||
};
|
||||
|
||||
info!(
|
||||
"getting basebackup@{} from pageserver {}",
|
||||
lsn, &pspec.pageserver_connstr
|
||||
lsn, &spec.pageserver_connstring
|
||||
);
|
||||
self.get_basebackup(compute_state, &lsn).with_context(|| {
|
||||
self.get_basebackup(compute_state, lsn).with_context(|| {
|
||||
format!(
|
||||
"failed to get basebackup@{} from pageserver {}",
|
||||
lsn, &pspec.pageserver_connstr
|
||||
lsn, &spec.pageserver_connstring
|
||||
)
|
||||
})?;
|
||||
|
||||
@@ -331,7 +300,7 @@ impl ComputeNode {
|
||||
};
|
||||
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
|
||||
let spec = &compute_state.spec.as_ref().expect("spec must be set");
|
||||
handle_roles(spec, &mut client)?;
|
||||
handle_databases(spec, &mut client)?;
|
||||
handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
|
||||
@@ -342,10 +311,7 @@ impl ComputeNode {
|
||||
// 'Close' connection
|
||||
drop(client);
|
||||
|
||||
info!(
|
||||
"finished configuration of compute for project {}",
|
||||
spec.cluster.cluster_id
|
||||
);
|
||||
info!("finished configuration of compute");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -353,13 +319,13 @@ impl ComputeNode {
|
||||
#[instrument(skip(self))]
|
||||
pub fn start_compute(&self) -> Result<std::process::Child> {
|
||||
let compute_state = self.state.lock().unwrap().clone();
|
||||
let spec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
let spec = compute_state.spec.as_ref().expect("spec must be set");
|
||||
info!(
|
||||
"starting compute for project {}, operation {}, tenant {}, timeline {}",
|
||||
spec.spec.cluster.cluster_id,
|
||||
spec.spec.operation_uuid.as_ref().unwrap(),
|
||||
spec.tenant,
|
||||
spec.timeline,
|
||||
spec.project_id.as_deref().unwrap_or("None"),
|
||||
spec.operation_uuid.as_deref().unwrap_or("None"),
|
||||
spec.tenant_id,
|
||||
spec.timeline_id,
|
||||
);
|
||||
|
||||
self.prepare_pgdata(&compute_state)?;
|
||||
|
||||
@@ -5,8 +5,8 @@ use std::path::Path;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use crate::pg_helpers::PgOptionsSerialize;
|
||||
use compute_api::spec::ComputeSpec;
|
||||
use crate::pg_helpers::escape_conf_value;
|
||||
use compute_api::spec::ComputeSpecV2;
|
||||
|
||||
/// Check that `line` is inside a text file and put it there if it is not.
|
||||
/// Create file if it doesn't exist.
|
||||
@@ -32,20 +32,54 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
|
||||
}
|
||||
|
||||
/// Create or completely rewrite configuration file specified by `path`
|
||||
pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
|
||||
pub fn write_postgres_conf(path: &Path, spec: &ComputeSpecV2) -> Result<()> {
|
||||
// File::create() destroys the file content if it exists.
|
||||
let mut postgres_conf = File::create(path)?;
|
||||
let mut file = File::create(path)?;
|
||||
|
||||
write_auto_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Write Postgres config block wrapped with generated comment section
|
||||
fn write_auto_managed_block(file: &mut File, buf: &str) -> Result<()> {
|
||||
writeln!(file, "# Managed by compute_ctl: begin")?;
|
||||
writeln!(file, "{}", buf)?;
|
||||
writeln!(file, "# Managed by compute_ctl: end")?;
|
||||
// Write the postgresql.conf content from the spec file as is.
|
||||
if let Some(conf) = &spec.postgresql_conf {
|
||||
writeln!(file, "{}", conf)?;
|
||||
}
|
||||
|
||||
// Append any extra options from the spec file
|
||||
if let Some(settings) = &spec.settings {
|
||||
writeln!(file, "\n# Extra settings from spec document")?;
|
||||
|
||||
for setting in settings {
|
||||
if let Some(value) = &setting.value {
|
||||
let escaped_value: String = value.replace('\'', "''").replace('\\', "\\\\");
|
||||
writeln!(file, "{} = '{}'", setting.name, escaped_value)?;
|
||||
} else {
|
||||
// If there is no value, then just append the line verbatim
|
||||
writeln!(file, "{}", setting.name)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Append options for connecting to storage
|
||||
writeln!(file, "\n# Neon storage settings")?;
|
||||
writeln!(
|
||||
file,
|
||||
"neon.pageserver_connstring='{}'",
|
||||
escape_conf_value(&spec.pageserver_connstring)
|
||||
)?;
|
||||
if !spec.safekeeper_connstrings.is_empty() {
|
||||
writeln!(
|
||||
file,
|
||||
"neon.safekeepers='{}'",
|
||||
escape_conf_value(&spec.safekeeper_connstrings.join(","))
|
||||
)?;
|
||||
}
|
||||
writeln!(
|
||||
file,
|
||||
"neon.tenant_id='{}'",
|
||||
escape_conf_value(&spec.tenant_id.to_string())
|
||||
)?;
|
||||
writeln!(
|
||||
file,
|
||||
"neon.timeline_id='{}'",
|
||||
escape_conf_value(&spec.timeline_id.to_string())
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -3,9 +3,10 @@ use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
|
||||
use crate::compute::{ComputeNode, ComputeState};
|
||||
use compute_api::requests::ConfigurationRequest;
|
||||
use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
|
||||
use compute_api::spec::ComputeSpecV2;
|
||||
|
||||
use anyhow::Result;
|
||||
use hyper::service::{make_service_fn, service_fn};
|
||||
@@ -18,8 +19,8 @@ use tracing_utils::http::OtelName;
|
||||
|
||||
fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
|
||||
ComputeStatusResponse {
|
||||
tenant: state.pspec.as_ref().map(|pspec| pspec.tenant.clone()),
|
||||
timeline: state.pspec.as_ref().map(|pspec| pspec.timeline.clone()),
|
||||
tenant: state.spec.as_ref().map(|spec| spec.tenant_id.to_string()),
|
||||
timeline: state.spec.as_ref().map(|spec| spec.timeline_id.to_string()),
|
||||
status: state.status,
|
||||
last_active: state.last_active,
|
||||
error: state.error.clone(),
|
||||
@@ -134,11 +135,9 @@ async fn handle_configure_request(
|
||||
let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
|
||||
let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
|
||||
if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
|
||||
let spec = request.spec;
|
||||
|
||||
let parsed_spec = match ParsedSpec::try_from(spec) {
|
||||
let specv2 = match ComputeSpecV2::try_from(request.spec) {
|
||||
Ok(ps) => ps,
|
||||
Err(msg) => return Err((msg, StatusCode::PRECONDITION_FAILED)),
|
||||
Err(err) => return Err((err.to_string(), StatusCode::PRECONDITION_FAILED)),
|
||||
};
|
||||
|
||||
// XXX: wrap state update under lock in code blocks. Otherwise,
|
||||
@@ -156,7 +155,7 @@ async fn handle_configure_request(
|
||||
);
|
||||
return Err((msg, StatusCode::PRECONDITION_FAILED));
|
||||
}
|
||||
state.pspec = Some(parsed_spec);
|
||||
state.spec = Some(specv2);
|
||||
state.status = ComputeStatus::ConfigurationPending;
|
||||
compute.state_changed.notify_all();
|
||||
drop(state);
|
||||
@@ -210,8 +209,8 @@ fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
|
||||
|
||||
// Main Hyper HTTP server function that runs it and blocks waiting on it forever.
|
||||
#[tokio::main]
|
||||
async fn serve(state: Arc<ComputeNode>) {
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], 3080));
|
||||
async fn serve(port: u16, state: Arc<ComputeNode>) {
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], port));
|
||||
|
||||
let make_service = make_service_fn(move |_conn| {
|
||||
let state = state.clone();
|
||||
@@ -246,10 +245,10 @@ async fn serve(state: Arc<ComputeNode>) {
|
||||
}
|
||||
|
||||
/// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
|
||||
pub fn launch_http_server(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
|
||||
pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
|
||||
let state = Arc::clone(state);
|
||||
|
||||
Ok(thread::Builder::new()
|
||||
.name("http-endpoint".into())
|
||||
.spawn(move || serve(state))?)
|
||||
.spawn(move || serve(port, state))?)
|
||||
}
|
||||
|
||||
@@ -23,13 +23,12 @@ fn escape_literal(s: &str) -> String {
|
||||
|
||||
/// Escape a string so that it can be used in postgresql.conf.
|
||||
/// Same as escape_literal, currently.
|
||||
fn escape_conf_value(s: &str) -> String {
|
||||
pub fn escape_conf_value(s: &str) -> String {
|
||||
s.replace('\'', "''").replace('\\', "\\\\")
|
||||
}
|
||||
|
||||
trait GenericOptionExt {
|
||||
fn to_pg_option(&self) -> String;
|
||||
fn to_pg_setting(&self) -> String;
|
||||
}
|
||||
|
||||
impl GenericOptionExt for GenericOption {
|
||||
@@ -44,23 +43,10 @@ impl GenericOptionExt for GenericOption {
|
||||
self.name.to_owned()
|
||||
}
|
||||
}
|
||||
|
||||
/// Represent `GenericOption` as configuration option.
|
||||
fn to_pg_setting(&self) -> String {
|
||||
if let Some(val) = &self.value {
|
||||
match self.vartype.as_ref() {
|
||||
"string" => format!("{} = '{}'", self.name, escape_conf_value(val)),
|
||||
_ => format!("{} = {}", self.name, val),
|
||||
}
|
||||
} else {
|
||||
self.name.to_owned()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait PgOptionsSerialize {
|
||||
fn as_pg_options(&self) -> String;
|
||||
fn as_pg_settings(&self) -> String;
|
||||
}
|
||||
|
||||
impl PgOptionsSerialize for GenericOptions {
|
||||
@@ -76,20 +62,6 @@ impl PgOptionsSerialize for GenericOptions {
|
||||
"".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Serialize an optional collection of `GenericOption`'s to
|
||||
/// `postgresql.conf` compatible format.
|
||||
fn as_pg_settings(&self) -> String {
|
||||
if let Some(ops) = &self {
|
||||
ops.iter()
|
||||
.map(|op| op.to_pg_setting())
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n")
|
||||
+ "\n" // newline after last setting
|
||||
} else {
|
||||
"".to_string()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait GenericOptionsSearch {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
//! Functions to reconciliate Postgres cluster with the spec file
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
|
||||
@@ -10,11 +11,14 @@ use crate::config;
|
||||
use crate::params::PG_HBA_ALL_MD5;
|
||||
use crate::pg_helpers::*;
|
||||
|
||||
use compute_api::spec::{ComputeSpec, Database, PgIdent, Role};
|
||||
use compute_api::spec::{ComputeSpecAnyVersion, ComputeSpecV2, Database, PgIdent, Role};
|
||||
|
||||
/// Request spec from the control-plane by compute_id. If `NEON_CONSOLE_JWT`
|
||||
/// env variable is set, it will be used for authorization.
|
||||
pub fn get_spec_from_control_plane(base_uri: &str, compute_id: &str) -> Result<ComputeSpec> {
|
||||
pub fn get_spec_from_control_plane(
|
||||
base_uri: &str,
|
||||
compute_id: &str,
|
||||
) -> Result<ComputeSpecAnyVersion> {
|
||||
let cp_uri = format!("{base_uri}/management/api/v2/computes/{compute_id}/spec");
|
||||
let jwt: String = match std::env::var("NEON_CONSOLE_JWT") {
|
||||
Ok(v) => v,
|
||||
@@ -26,19 +30,18 @@ pub fn get_spec_from_control_plane(base_uri: &str, compute_id: &str) -> Result<C
|
||||
// - network error, then retry
|
||||
// - no spec for compute yet, then wait
|
||||
// - compute id is unknown or any other error, then bail out
|
||||
let spec = reqwest::blocking::Client::new()
|
||||
let json = reqwest::blocking::Client::new()
|
||||
.get(cp_uri)
|
||||
.header("Authorization", jwt)
|
||||
.send()?
|
||||
.json()?;
|
||||
|
||||
Ok(spec)
|
||||
Ok(ComputeSpecAnyVersion(json))
|
||||
}
|
||||
|
||||
/// It takes cluster specification and does the following:
|
||||
/// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file.
|
||||
/// - Update `pg_hba.conf` to allow external connections.
|
||||
pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> {
|
||||
pub fn handle_configuration(spec: &ComputeSpecV2, pgdata_path: &Path) -> Result<()> {
|
||||
// File `postgresql.conf` is no longer included into `basebackup`, so just
|
||||
// always write all config into it creating new file.
|
||||
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
|
||||
@@ -66,7 +69,7 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
|
||||
/// Given a cluster spec json and open transaction it handles roles creation,
|
||||
/// deletion and update.
|
||||
#[instrument(skip_all)]
|
||||
pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
pub fn handle_roles(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {
|
||||
let mut xact = client.transaction()?;
|
||||
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
|
||||
|
||||
@@ -122,7 +125,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
|
||||
|
||||
info!("cluster spec roles:");
|
||||
for role in &spec.cluster.roles {
|
||||
for role in &spec.roles {
|
||||
let name = &role.name;
|
||||
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
|
||||
let pg_role = existing_roles.iter().find(|r| r.name == *name);
|
||||
@@ -207,7 +210,11 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
|
||||
/// Reassign all dependent objects and delete requested roles.
|
||||
#[instrument(skip_all)]
|
||||
pub fn handle_role_deletions(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
|
||||
pub fn handle_role_deletions(
|
||||
spec: &ComputeSpecV2,
|
||||
connstr: &str,
|
||||
client: &mut Client,
|
||||
) -> Result<()> {
|
||||
if let Some(ops) = &spec.delta_operations {
|
||||
// First, reassign all dependent objects to db owners.
|
||||
info!("reassigning dependent objects of to-be-deleted roles");
|
||||
@@ -249,8 +256,8 @@ pub fn handle_role_deletions(spec: &ComputeSpec, connstr: &str, client: &mut Cli
|
||||
}
|
||||
|
||||
// Reassign all owned objects in all databases to the owner of the database.
|
||||
fn reassign_owned_objects(spec: &ComputeSpec, connstr: &str, role_name: &PgIdent) -> Result<()> {
|
||||
for db in &spec.cluster.databases {
|
||||
fn reassign_owned_objects(spec: &ComputeSpecV2, connstr: &str, role_name: &PgIdent) -> Result<()> {
|
||||
for db in &spec.databases {
|
||||
if db.owner != *role_name {
|
||||
let mut conf = Config::from_str(connstr)?;
|
||||
conf.dbname(&db.name);
|
||||
@@ -284,7 +291,7 @@ fn reassign_owned_objects(spec: &ComputeSpec, connstr: &str, role_name: &PgIdent
|
||||
/// atomicity should be enough here due to the order of operations and various checks,
|
||||
/// which together provide us idempotency.
|
||||
#[instrument(skip_all)]
|
||||
pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
pub fn handle_databases(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {
|
||||
let existing_dbs: Vec<Database> = get_existing_dbs(client)?;
|
||||
|
||||
// Print a list of existing Postgres databases (only in debug mode)
|
||||
@@ -332,7 +339,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
let existing_dbs: Vec<Database> = get_existing_dbs(client)?;
|
||||
|
||||
info!("cluster spec databases:");
|
||||
for db in &spec.cluster.databases {
|
||||
for db in &spec.databases {
|
||||
let name = &db.name;
|
||||
|
||||
// XXX: with a limited number of databases it is fine, but consider making it a HashMap
|
||||
@@ -397,7 +404,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
|
||||
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
|
||||
#[instrument(skip_all)]
|
||||
pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
|
||||
pub fn handle_grants(spec: &ComputeSpecV2, connstr: &str, client: &mut Client) -> Result<()> {
|
||||
info!("cluster spec grants:");
|
||||
|
||||
// We now have a separate `web_access` role to connect to the database
|
||||
@@ -407,13 +414,12 @@ pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) ->
|
||||
// XXX: later we should stop messing with Postgres ACL in such horrible
|
||||
// ways.
|
||||
let roles = spec
|
||||
.cluster
|
||||
.roles
|
||||
.iter()
|
||||
.map(|r| r.name.pg_quote())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for db in &spec.cluster.databases {
|
||||
for db in &spec.databases {
|
||||
let dbname = &db.name;
|
||||
|
||||
let query: String = format!(
|
||||
@@ -429,7 +435,7 @@ pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) ->
|
||||
// Do some per-database access adjustments. We'd better do this at db creation time,
|
||||
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
|
||||
// atomically.
|
||||
for db in &spec.cluster.databases {
|
||||
for db in &spec.databases {
|
||||
let mut conf = Config::from_str(connstr)?;
|
||||
conf.dbname(&db.name);
|
||||
|
||||
@@ -499,14 +505,11 @@ pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) ->
|
||||
|
||||
/// Create required system extensions
|
||||
#[instrument(skip_all)]
|
||||
pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
|
||||
if libs.contains("pg_stat_statements") {
|
||||
// Create extension only if this compute really needs it
|
||||
let query = "CREATE EXTENSION IF NOT EXISTS pg_stat_statements";
|
||||
info!("creating system extensions with query: {}", query);
|
||||
client.simple_query(query)?;
|
||||
}
|
||||
pub fn handle_extensions(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {
|
||||
for extension in &spec.extensions {
|
||||
let query = format!("CREATE EXTENSION IF NOT EXISTS {}", extension.pg_quote());
|
||||
info!("creating system extensions with query: {}", query);
|
||||
client.simple_query(&query)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -1,57 +1,24 @@
|
||||
#[cfg(test)]
|
||||
mod pg_helpers_tests {
|
||||
use std::fs::File;
|
||||
|
||||
use compute_api::spec::{ComputeSpec, GenericOption, GenericOptions, PgIdent};
|
||||
use anyhow::Result;
|
||||
use compute_api::spec::{ComputeSpecV2, GenericOption, GenericOptions, PgIdent};
|
||||
use compute_tools::pg_helpers::*;
|
||||
|
||||
#[test]
|
||||
fn params_serialize() {
|
||||
let file = File::open("../libs/compute_api/tests/cluster_spec.json").unwrap();
|
||||
let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
|
||||
fn params_serialize() -> Result<()> {
|
||||
let spec_v1_str =
|
||||
std::fs::read_to_string("../libs/compute_api/tests/spec-v1.json").unwrap();
|
||||
let spec = ComputeSpecV2::parse_and_upgrade(&spec_v1_str)?;
|
||||
|
||||
assert_eq!(
|
||||
spec.cluster.databases.first().unwrap().to_pg_options(),
|
||||
spec.databases.first().unwrap().to_pg_options(),
|
||||
"LC_COLLATE 'C' LC_CTYPE 'C' TEMPLATE template0 OWNER \"alexk\""
|
||||
);
|
||||
assert_eq!(
|
||||
spec.cluster.roles.first().unwrap().to_pg_options(),
|
||||
spec.roles.first().unwrap().to_pg_options(),
|
||||
"LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn settings_serialize() {
|
||||
let file = File::open("../libs/compute_api/tests/cluster_spec.json").unwrap();
|
||||
let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
spec.cluster.settings.as_pg_settings(),
|
||||
r#"fsync = off
|
||||
wal_level = replica
|
||||
hot_standby = on
|
||||
neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
|
||||
wal_log_hints = on
|
||||
log_connections = on
|
||||
shared_buffers = 32768
|
||||
port = 55432
|
||||
max_connections = 100
|
||||
max_wal_senders = 10
|
||||
listen_addresses = '0.0.0.0'
|
||||
wal_sender_timeout = 0
|
||||
password_encryption = md5
|
||||
maintenance_work_mem = 65536
|
||||
max_parallel_workers = 8
|
||||
max_worker_processes = 8
|
||||
neon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'
|
||||
max_replication_slots = 10
|
||||
neon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'
|
||||
shared_preload_libraries = 'neon'
|
||||
synchronous_standby_names = 'walproposer'
|
||||
neon.pageserver_connstring = 'host=127.0.0.1 port=6400'
|
||||
test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hooray'
|
||||
"#
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -6,6 +6,7 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
chrono.workspace = true
|
||||
clap.workspace = true
|
||||
comfy-table.workspace = true
|
||||
git-version.workspace = true
|
||||
@@ -26,6 +27,7 @@ url.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
postgres_backend.workspace = true
|
||||
safekeeper_api.workspace = true
|
||||
compute_api.workspace = true
|
||||
postgres_connection.workspace = true
|
||||
storage_broker.workspace = true
|
||||
utils.workspace = true
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
//!
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
|
||||
use control_plane::compute::ComputeControlPlane;
|
||||
use control_plane::endpoint::ComputeControlPlane;
|
||||
use control_plane::local_env::LocalEnv;
|
||||
use control_plane::pageserver::PageServerNode;
|
||||
use control_plane::safekeeper::SafekeeperNode;
|
||||
@@ -106,8 +106,8 @@ fn main() -> Result<()> {
|
||||
"start" => handle_start_all(sub_args, &env),
|
||||
"stop" => handle_stop_all(sub_args, &env),
|
||||
"pageserver" => handle_pageserver(sub_args, &env),
|
||||
"pg" => handle_pg(sub_args, &env),
|
||||
"safekeeper" => handle_safekeeper(sub_args, &env),
|
||||
"endpoint" => handle_endpoint(sub_args, &env),
|
||||
_ => bail!("unexpected subcommand {sub_name}"),
|
||||
};
|
||||
|
||||
@@ -470,10 +470,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
let mut cplane = ComputeControlPlane::load(env.clone())?;
|
||||
println!("Importing timeline into pageserver ...");
|
||||
pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)?;
|
||||
println!("Creating node for imported timeline ...");
|
||||
env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
|
||||
|
||||
cplane.new_node(tenant_id, name, timeline_id, None, None, pg_version)?;
|
||||
println!("Creating endpoint for imported timeline ...");
|
||||
cplane.new_endpoint(name, tenant_id, timeline_id, None, None, None, pg_version)?;
|
||||
println!("Done");
|
||||
}
|
||||
Some(("branch", branch_match)) => {
|
||||
@@ -521,10 +521,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let (sub_name, sub_args) = match pg_match.subcommand() {
|
||||
Some(pg_subcommand_data) => pg_subcommand_data,
|
||||
None => bail!("no pg subcommand provided"),
|
||||
fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let (sub_name, sub_args) = match ep_match.subcommand() {
|
||||
Some(ep_subcommand_data) => ep_subcommand_data,
|
||||
None => bail!("no endpoint subcommand provided"),
|
||||
};
|
||||
|
||||
let mut cplane = ComputeControlPlane::load(env.clone())?;
|
||||
@@ -546,7 +546,7 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
table.load_preset(comfy_table::presets::NOTHING);
|
||||
|
||||
table.set_header([
|
||||
"NODE",
|
||||
"ENDPOINT",
|
||||
"ADDRESS",
|
||||
"TIMELINE",
|
||||
"BRANCH NAME",
|
||||
@@ -554,39 +554,39 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
"STATUS",
|
||||
]);
|
||||
|
||||
for ((_, node_name), node) in cplane
|
||||
.nodes
|
||||
for (endpoint_id, endpoint) in cplane
|
||||
.endpoints
|
||||
.iter()
|
||||
.filter(|((node_tenant_id, _), _)| node_tenant_id == &tenant_id)
|
||||
.filter(|(_, endpoint)| endpoint.tenant_id == tenant_id)
|
||||
{
|
||||
let lsn_str = match node.lsn {
|
||||
let lsn_str = match endpoint.lsn {
|
||||
None => {
|
||||
// -> primary node
|
||||
// -> primary endpoint
|
||||
// Use the LSN at the end of the timeline.
|
||||
timeline_infos
|
||||
.get(&node.timeline_id)
|
||||
.get(&endpoint.timeline_id)
|
||||
.map(|bi| bi.last_record_lsn.to_string())
|
||||
.unwrap_or_else(|| "?".to_string())
|
||||
}
|
||||
Some(lsn) => {
|
||||
// -> read-only node
|
||||
// Use the node's LSN.
|
||||
// -> read-only endpoint
|
||||
// Use the endpoint's LSN.
|
||||
lsn.to_string()
|
||||
}
|
||||
};
|
||||
|
||||
let branch_name = timeline_name_mappings
|
||||
.get(&TenantTimelineId::new(tenant_id, node.timeline_id))
|
||||
.get(&TenantTimelineId::new(tenant_id, endpoint.timeline_id))
|
||||
.map(|name| name.as_str())
|
||||
.unwrap_or("?");
|
||||
|
||||
table.add_row([
|
||||
node_name.as_str(),
|
||||
&node.address.to_string(),
|
||||
&node.timeline_id.to_string(),
|
||||
endpoint_id.as_str(),
|
||||
&endpoint.pg_address.to_string(),
|
||||
&endpoint.timeline_id.to_string(),
|
||||
branch_name,
|
||||
lsn_str.as_str(),
|
||||
node.status(),
|
||||
endpoint.status(),
|
||||
]);
|
||||
}
|
||||
|
||||
@@ -597,10 +597,10 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
.get_one::<String>("branch-name")
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or(DEFAULT_BRANCH_NAME);
|
||||
let node_name = sub_args
|
||||
.get_one::<String>("node")
|
||||
.map(|node_name| node_name.to_string())
|
||||
.unwrap_or_else(|| format!("{branch_name}_node"));
|
||||
let endpoint_id = sub_args
|
||||
.get_one::<String>("endpoint_id")
|
||||
.map(String::to_string)
|
||||
.unwrap_or_else(|| format!("ep-{branch_name}"));
|
||||
|
||||
let lsn = sub_args
|
||||
.get_one::<String>("lsn")
|
||||
@@ -611,22 +611,46 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
.get_branch_timeline_id(branch_name, tenant_id)
|
||||
.ok_or_else(|| anyhow!("Found no timeline id for branch name '{branch_name}'"))?;
|
||||
|
||||
let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
|
||||
|
||||
let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
|
||||
let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
|
||||
let pg_version = sub_args
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port, pg_version)?;
|
||||
cplane.new_endpoint(
|
||||
&endpoint_id,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
lsn,
|
||||
pg_port,
|
||||
http_port,
|
||||
pg_version,
|
||||
)?;
|
||||
}
|
||||
"start" => {
|
||||
let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
|
||||
let node_name = sub_args
|
||||
.get_one::<String>("node")
|
||||
.ok_or_else(|| anyhow!("No node name was provided to start"))?;
|
||||
let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
|
||||
let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
|
||||
let endpoint_id = sub_args
|
||||
.get_one::<String>("endpoint_id")
|
||||
.ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?;
|
||||
|
||||
let node = cplane.nodes.get(&(tenant_id, node_name.to_string()));
|
||||
// If --safekeepers argument is given, use only the listed safekeeper nodes.
|
||||
let safekeepers =
|
||||
if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
|
||||
let mut safekeepers: Vec<NodeId> = Vec::new();
|
||||
for sk_id in safekeepers_str.split(',').map(str::trim) {
|
||||
let sk_id = NodeId(u64::from_str(sk_id).map_err(|_| {
|
||||
anyhow!("invalid node ID \"{sk_id}\" in --safekeepers list")
|
||||
})?);
|
||||
safekeepers.push(sk_id);
|
||||
}
|
||||
safekeepers
|
||||
} else {
|
||||
env.safekeepers.iter().map(|sk| sk.id).collect()
|
||||
};
|
||||
|
||||
let endpoint = cplane.endpoints.get(endpoint_id.as_str());
|
||||
|
||||
let auth_token = if matches!(env.pageserver.pg_auth_type, AuthType::NeonJWT) {
|
||||
let claims = Claims::new(Some(tenant_id), Scope::Tenant);
|
||||
@@ -636,9 +660,9 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
None
|
||||
};
|
||||
|
||||
if let Some(node) = node {
|
||||
println!("Starting existing postgres {node_name}...");
|
||||
node.start(&auth_token)?;
|
||||
if let Some(endpoint) = endpoint {
|
||||
println!("Starting existing endpoint {endpoint_id}...");
|
||||
endpoint.start(&auth_token, safekeepers)?;
|
||||
} else {
|
||||
let branch_name = sub_args
|
||||
.get_one::<String>("branch-name")
|
||||
@@ -663,27 +687,34 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
// start --port X
|
||||
// stop
|
||||
// start <-- will also use port X even without explicit port argument
|
||||
println!("Starting new postgres (v{pg_version}) {node_name} on timeline {timeline_id} ...");
|
||||
println!("Starting new endpoint {endpoint_id} (PostgreSQL v{pg_version}) on timeline {timeline_id} ...");
|
||||
|
||||
let node =
|
||||
cplane.new_node(tenant_id, node_name, timeline_id, lsn, port, pg_version)?;
|
||||
node.start(&auth_token)?;
|
||||
let ep = cplane.new_endpoint(
|
||||
endpoint_id,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
lsn,
|
||||
pg_port,
|
||||
http_port,
|
||||
pg_version,
|
||||
)?;
|
||||
ep.start(&auth_token, safekeepers)?;
|
||||
}
|
||||
}
|
||||
"stop" => {
|
||||
let node_name = sub_args
|
||||
.get_one::<String>("node")
|
||||
.ok_or_else(|| anyhow!("No node name was provided to stop"))?;
|
||||
let endpoint_id = sub_args
|
||||
.get_one::<String>("endpoint_id")
|
||||
.ok_or_else(|| anyhow!("No endpoint ID was provided to stop"))?;
|
||||
let destroy = sub_args.get_flag("destroy");
|
||||
|
||||
let node = cplane
|
||||
.nodes
|
||||
.get(&(tenant_id, node_name.to_string()))
|
||||
.with_context(|| format!("postgres {node_name} is not found"))?;
|
||||
node.stop(destroy)?;
|
||||
let endpoint = cplane
|
||||
.endpoints
|
||||
.get(endpoint_id.as_str())
|
||||
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
||||
endpoint.stop(destroy)?;
|
||||
}
|
||||
|
||||
_ => bail!("Unexpected pg subcommand '{sub_name}'"),
|
||||
_ => bail!("Unexpected endpoint subcommand '{sub_name}'"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -802,7 +833,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
}
|
||||
|
||||
fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
// Postgres nodes are not started automatically
|
||||
// Endpoints are not started automatically
|
||||
|
||||
broker::start_broker_process(env)?;
|
||||
|
||||
@@ -836,10 +867,10 @@ fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<
|
||||
fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
|
||||
// Stop all compute nodes
|
||||
// Stop all endpoints
|
||||
match ComputeControlPlane::load(env.clone()) {
|
||||
Ok(cplane) => {
|
||||
for (_k, node) in cplane.nodes {
|
||||
for (_k, node) in cplane.endpoints {
|
||||
if let Err(e) = node.stop(false) {
|
||||
eprintln!("postgres stop failed: {e:#}");
|
||||
}
|
||||
@@ -872,7 +903,9 @@ fn cli() -> Command {
|
||||
.help("Name of the branch to be created or used as an alias for other services")
|
||||
.required(false);
|
||||
|
||||
let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);
|
||||
let endpoint_id_arg = Arg::new("endpoint_id")
|
||||
.help("Postgres endpoint id")
|
||||
.required(false);
|
||||
|
||||
let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
|
||||
|
||||
@@ -893,11 +926,22 @@ fn cli() -> Command {
|
||||
.value_parser(value_parser!(u32))
|
||||
.default_value(DEFAULT_PG_VERSION);
|
||||
|
||||
let port_arg = Arg::new("port")
|
||||
.long("port")
|
||||
let pg_port_arg = Arg::new("pg-port")
|
||||
.long("pg-port")
|
||||
.required(false)
|
||||
.value_parser(value_parser!(u16))
|
||||
.value_name("port");
|
||||
.value_name("pg-port");
|
||||
|
||||
let http_port_arg = Arg::new("http-port")
|
||||
.long("http-port")
|
||||
.required(false)
|
||||
.value_parser(value_parser!(u16))
|
||||
.value_name("http-port");
|
||||
|
||||
let safekeepers_arg = Arg::new("safekeepers")
|
||||
.long("safekeepers")
|
||||
.required(false)
|
||||
.value_name("safekeepers");
|
||||
|
||||
let stop_mode_arg = Arg::new("stop-mode")
|
||||
.short('m')
|
||||
@@ -1026,37 +1070,40 @@ fn cli() -> Command {
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("pg")
|
||||
Command::new("endpoint")
|
||||
.arg_required_else_help(true)
|
||||
.about("Manage postgres instances")
|
||||
.subcommand(Command::new("list").arg(tenant_id_arg.clone()))
|
||||
.subcommand(Command::new("create")
|
||||
.about("Create a postgres compute node")
|
||||
.arg(pg_node_arg.clone())
|
||||
.about("Create a compute endpoint")
|
||||
.arg(endpoint_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(lsn_arg.clone())
|
||||
.arg(port_arg.clone())
|
||||
.arg(pg_port_arg.clone())
|
||||
.arg(http_port_arg.clone())
|
||||
.arg(
|
||||
Arg::new("config-only")
|
||||
.help("Don't do basebackup, create compute node with only config files")
|
||||
.help("Don't do basebackup, create endpoint directory with only config files")
|
||||
.long("config-only")
|
||||
.required(false))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(Command::new("start")
|
||||
.about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
|
||||
.arg(pg_node_arg.clone())
|
||||
.about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
|
||||
.arg(endpoint_id_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg)
|
||||
.arg(timeline_id_arg)
|
||||
.arg(lsn_arg)
|
||||
.arg(port_arg)
|
||||
.arg(pg_port_arg)
|
||||
.arg(http_port_arg)
|
||||
.arg(pg_version_arg)
|
||||
.arg(safekeepers_arg)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("stop")
|
||||
.arg(pg_node_arg)
|
||||
.arg(endpoint_id_arg)
|
||||
.arg(tenant_id_arg)
|
||||
.arg(
|
||||
Arg::new("destroy")
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
//! Code to manage the storage broker
|
||||
//!
|
||||
//! In the local test environment, the data for each safekeeper is stored in
|
||||
//!
|
||||
//! .neon/safekeepers/<safekeeper id>
|
||||
//!
|
||||
use anyhow::Context;
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -1,508 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::{self, File};
|
||||
use std::io::Write;
|
||||
use std::net::SocketAddr;
|
||||
use std::net::TcpStream;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::path::PathBuf;
|
||||
use std::process::{Command, Stdio};
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION};
|
||||
use crate::pageserver::PageServerNode;
|
||||
use crate::postgresql_conf::PostgresConf;
|
||||
|
||||
//
|
||||
// ComputeControlPlane
|
||||
//
|
||||
pub struct ComputeControlPlane {
|
||||
base_port: u16,
|
||||
pageserver: Arc<PageServerNode>,
|
||||
pub nodes: BTreeMap<(TenantId, String), Arc<PostgresNode>>,
|
||||
env: LocalEnv,
|
||||
}
|
||||
|
||||
impl ComputeControlPlane {
|
||||
// Load current nodes with ports from data directories on disk
|
||||
// Directory structure has the following layout:
|
||||
// pgdatadirs
|
||||
// |- tenants
|
||||
// | |- <tenant_id>
|
||||
// | | |- <node name>
|
||||
pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
|
||||
let pageserver = Arc::new(PageServerNode::from_env(&env));
|
||||
|
||||
let mut nodes = BTreeMap::default();
|
||||
let pgdatadirspath = &env.pg_data_dirs_path();
|
||||
|
||||
for tenant_dir in fs::read_dir(pgdatadirspath)
|
||||
.with_context(|| format!("failed to list {}", pgdatadirspath.display()))?
|
||||
{
|
||||
let tenant_dir = tenant_dir?;
|
||||
for timeline_dir in fs::read_dir(tenant_dir.path())
|
||||
.with_context(|| format!("failed to list {}", tenant_dir.path().display()))?
|
||||
{
|
||||
let node = PostgresNode::from_dir_entry(timeline_dir?, &env, &pageserver)?;
|
||||
nodes.insert((node.tenant_id, node.name.clone()), Arc::new(node));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ComputeControlPlane {
|
||||
base_port: 55431,
|
||||
pageserver,
|
||||
nodes,
|
||||
env,
|
||||
})
|
||||
}
|
||||
|
||||
fn get_port(&mut self) -> u16 {
|
||||
1 + self
|
||||
.nodes
|
||||
.values()
|
||||
.map(|node| node.address.port())
|
||||
.max()
|
||||
.unwrap_or(self.base_port)
|
||||
}
|
||||
|
||||
pub fn new_node(
|
||||
&mut self,
|
||||
tenant_id: TenantId,
|
||||
name: &str,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Option<Lsn>,
|
||||
port: Option<u16>,
|
||||
pg_version: u32,
|
||||
) -> Result<Arc<PostgresNode>> {
|
||||
let port = port.unwrap_or_else(|| self.get_port());
|
||||
let node = Arc::new(PostgresNode {
|
||||
name: name.to_owned(),
|
||||
address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
|
||||
env: self.env.clone(),
|
||||
pageserver: Arc::clone(&self.pageserver),
|
||||
timeline_id,
|
||||
lsn,
|
||||
tenant_id,
|
||||
pg_version,
|
||||
});
|
||||
|
||||
node.create_pgdata()?;
|
||||
node.setup_pg_conf()?;
|
||||
|
||||
self.nodes
|
||||
.insert((tenant_id, node.name.clone()), Arc::clone(&node));
|
||||
|
||||
Ok(node)
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PostgresNode {
|
||||
pub address: SocketAddr,
|
||||
name: String,
|
||||
pub env: LocalEnv,
|
||||
pageserver: Arc<PageServerNode>,
|
||||
pub timeline_id: TimelineId,
|
||||
pub lsn: Option<Lsn>, // if it's a read-only node. None for primary
|
||||
pub tenant_id: TenantId,
|
||||
pg_version: u32,
|
||||
}
|
||||
|
||||
impl PostgresNode {
|
||||
fn from_dir_entry(
|
||||
entry: std::fs::DirEntry,
|
||||
env: &LocalEnv,
|
||||
pageserver: &Arc<PageServerNode>,
|
||||
) -> Result<PostgresNode> {
|
||||
if !entry.file_type()?.is_dir() {
|
||||
anyhow::bail!(
|
||||
"PostgresNode::from_dir_entry failed: '{}' is not a directory",
|
||||
entry.path().display()
|
||||
);
|
||||
}
|
||||
|
||||
// parse data directory name
|
||||
let fname = entry.file_name();
|
||||
let name = fname.to_str().unwrap().to_string();
|
||||
|
||||
// Read config file into memory
|
||||
let cfg_path = entry.path().join("postgresql.conf");
|
||||
let cfg_path_str = cfg_path.to_string_lossy();
|
||||
let mut conf_file = File::open(&cfg_path)
|
||||
.with_context(|| format!("failed to open config file in {}", cfg_path_str))?;
|
||||
let conf = PostgresConf::read(&mut conf_file)
|
||||
.with_context(|| format!("failed to read config file in {}", cfg_path_str))?;
|
||||
|
||||
// Read a few options from the config file
|
||||
let context = format!("in config file {}", cfg_path_str);
|
||||
let port: u16 = conf.parse_field("port", &context)?;
|
||||
let timeline_id: TimelineId = conf.parse_field("neon.timeline_id", &context)?;
|
||||
let tenant_id: TenantId = conf.parse_field("neon.tenant_id", &context)?;
|
||||
|
||||
// Read postgres version from PG_VERSION file to determine which postgres version binary to use.
|
||||
// If it doesn't exist, assume broken data directory and use default pg version.
|
||||
let pg_version_path = entry.path().join("PG_VERSION");
|
||||
|
||||
let pg_version_str =
|
||||
fs::read_to_string(pg_version_path).unwrap_or_else(|_| DEFAULT_PG_VERSION.to_string());
|
||||
let pg_version = u32::from_str(&pg_version_str)?;
|
||||
|
||||
// parse recovery_target_lsn, if any
|
||||
let recovery_target_lsn: Option<Lsn> =
|
||||
conf.parse_field_optional("recovery_target_lsn", &context)?;
|
||||
|
||||
// ok now
|
||||
Ok(PostgresNode {
|
||||
address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
|
||||
name,
|
||||
env: env.clone(),
|
||||
pageserver: Arc::clone(pageserver),
|
||||
timeline_id,
|
||||
lsn: recovery_target_lsn,
|
||||
tenant_id,
|
||||
pg_version,
|
||||
})
|
||||
}
|
||||
|
||||
fn sync_safekeepers(&self, auth_token: &Option<String>, pg_version: u32) -> Result<Lsn> {
|
||||
let pg_path = self.env.pg_bin_dir(pg_version)?.join("postgres");
|
||||
let mut cmd = Command::new(pg_path);
|
||||
|
||||
cmd.arg("--sync-safekeepers")
|
||||
.env_clear()
|
||||
.env(
|
||||
"LD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
|
||||
)
|
||||
.env(
|
||||
"DYLD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
|
||||
)
|
||||
.env("PGDATA", self.pgdata().to_str().unwrap())
|
||||
.stdout(Stdio::piped())
|
||||
// Comment this to avoid capturing stderr (useful if command hangs)
|
||||
.stderr(Stdio::piped());
|
||||
|
||||
if let Some(token) = auth_token {
|
||||
cmd.env("NEON_AUTH_TOKEN", token);
|
||||
}
|
||||
|
||||
let sync_handle = cmd
|
||||
.spawn()
|
||||
.expect("postgres --sync-safekeepers failed to start");
|
||||
|
||||
let sync_output = sync_handle
|
||||
.wait_with_output()
|
||||
.expect("postgres --sync-safekeepers failed");
|
||||
if !sync_output.status.success() {
|
||||
anyhow::bail!(
|
||||
"sync-safekeepers failed: '{}'",
|
||||
String::from_utf8_lossy(&sync_output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
let lsn = Lsn::from_str(std::str::from_utf8(&sync_output.stdout)?.trim())?;
|
||||
println!("Safekeepers synced on {}", lsn);
|
||||
Ok(lsn)
|
||||
}
|
||||
|
||||
/// Get basebackup from the pageserver as a tar archive and extract it
|
||||
/// to the `self.pgdata()` directory.
|
||||
fn do_basebackup(&self, lsn: Option<Lsn>) -> Result<()> {
|
||||
println!(
|
||||
"Extracting base backup to create postgres instance: path={} port={}",
|
||||
self.pgdata().display(),
|
||||
self.address.port()
|
||||
);
|
||||
|
||||
let sql = if let Some(lsn) = lsn {
|
||||
format!("basebackup {} {} {}", self.tenant_id, self.timeline_id, lsn)
|
||||
} else {
|
||||
format!("basebackup {} {}", self.tenant_id, self.timeline_id)
|
||||
};
|
||||
|
||||
let mut client = self
|
||||
.pageserver
|
||||
.page_server_psql_client()
|
||||
.context("connecting to page server failed")?;
|
||||
|
||||
let copyreader = client
|
||||
.copy_out(sql.as_str())
|
||||
.context("page server 'basebackup' command failed")?;
|
||||
|
||||
// Read the archive directly from the `CopyOutReader`
|
||||
//
|
||||
// Set `ignore_zeros` so that unpack() reads all the Copy data and
|
||||
// doesn't stop at the end-of-archive marker. Otherwise, if the server
|
||||
// sends an Error after finishing the tarball, we will not notice it.
|
||||
let mut ar = tar::Archive::new(copyreader);
|
||||
ar.set_ignore_zeros(true);
|
||||
ar.unpack(&self.pgdata())
|
||||
.context("extracting base backup failed")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_pgdata(&self) -> Result<()> {
|
||||
fs::create_dir_all(self.pgdata()).with_context(|| {
|
||||
format!(
|
||||
"could not create data directory {}",
|
||||
self.pgdata().display()
|
||||
)
|
||||
})?;
|
||||
fs::set_permissions(self.pgdata().as_path(), fs::Permissions::from_mode(0o700))
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"could not set permissions in data directory {}",
|
||||
self.pgdata().display()
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// Write postgresql.conf with default configuration
|
||||
// and PG_VERSION file to the data directory of a new node.
|
||||
fn setup_pg_conf(&self) -> Result<()> {
|
||||
let mut conf = PostgresConf::new();
|
||||
conf.append("max_wal_senders", "10");
|
||||
conf.append("wal_log_hints", "off");
|
||||
conf.append("max_replication_slots", "10");
|
||||
conf.append("hot_standby", "on");
|
||||
conf.append("shared_buffers", "1MB");
|
||||
conf.append("fsync", "off");
|
||||
conf.append("max_connections", "100");
|
||||
conf.append("wal_level", "replica");
|
||||
// wal_sender_timeout is the maximum time to wait for WAL replication.
|
||||
// It also defines how often the walreciever will send a feedback message to the wal sender.
|
||||
conf.append("wal_sender_timeout", "5s");
|
||||
conf.append("listen_addresses", &self.address.ip().to_string());
|
||||
conf.append("port", &self.address.port().to_string());
|
||||
conf.append("wal_keep_size", "0");
|
||||
// walproposer panics when basebackup is invalid, it is pointless to restart in this case.
|
||||
conf.append("restart_after_crash", "off");
|
||||
|
||||
// Configure the node to fetch pages from pageserver
|
||||
let pageserver_connstr = {
|
||||
let config = &self.pageserver.pg_connection_config;
|
||||
let (host, port) = (config.host(), config.port());
|
||||
|
||||
// NOTE: avoid spaces in connection string, because it is less error prone if we forward it somewhere.
|
||||
format!("postgresql://no_user@{host}:{port}")
|
||||
};
|
||||
conf.append("shared_preload_libraries", "neon");
|
||||
conf.append_line("");
|
||||
conf.append("neon.pageserver_connstring", &pageserver_connstr);
|
||||
conf.append("neon.tenant_id", &self.tenant_id.to_string());
|
||||
conf.append("neon.timeline_id", &self.timeline_id.to_string());
|
||||
if let Some(lsn) = self.lsn {
|
||||
conf.append("recovery_target_lsn", &lsn.to_string());
|
||||
}
|
||||
|
||||
conf.append_line("");
|
||||
// Configure backpressure
|
||||
// - Replication write lag depends on how fast the walreceiver can process incoming WAL.
|
||||
// This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
|
||||
// so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB.
|
||||
// Actually latency should be much smaller (better if < 1sec). But we assume that recently
|
||||
// updates pages are not requested from pageserver.
|
||||
// - Replication flush lag depends on speed of persisting data by checkpointer (creation of
|
||||
// delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to
|
||||
// remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long
|
||||
// recovery time (in case of pageserver crash) and disk space overflow at safekeepers.
|
||||
// - Replication apply lag depends on speed of uploading changes to S3 by uploader thread.
|
||||
// To be able to restore database in case of pageserver node crash, safekeeper should not
|
||||
// remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
|
||||
// (if they are not able to upload WAL to S3).
|
||||
conf.append("max_replication_write_lag", "15MB");
|
||||
conf.append("max_replication_flush_lag", "10GB");
|
||||
|
||||
if !self.env.safekeepers.is_empty() {
|
||||
// Configure the node to connect to the safekeepers
|
||||
conf.append("synchronous_standby_names", "walproposer");
|
||||
|
||||
let safekeepers = self
|
||||
.env
|
||||
.safekeepers
|
||||
.iter()
|
||||
.map(|sk| format!("localhost:{}", sk.pg_port))
|
||||
.collect::<Vec<String>>()
|
||||
.join(",");
|
||||
conf.append("neon.safekeepers", &safekeepers);
|
||||
} else {
|
||||
// We only use setup without safekeepers for tests,
|
||||
// and don't care about data durability on pageserver,
|
||||
// so set more relaxed synchronous_commit.
|
||||
conf.append("synchronous_commit", "remote_write");
|
||||
|
||||
// Configure the node to stream WAL directly to the pageserver
|
||||
// This isn't really a supported configuration, but can be useful for
|
||||
// testing.
|
||||
conf.append("synchronous_standby_names", "pageserver");
|
||||
}
|
||||
|
||||
let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
|
||||
file.write_all(conf.to_string().as_bytes())?;
|
||||
|
||||
let mut file = File::create(self.pgdata().join("PG_VERSION"))?;
|
||||
file.write_all(self.pg_version.to_string().as_bytes())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_basebackup(&self, auth_token: &Option<String>) -> Result<()> {
|
||||
let backup_lsn = if let Some(lsn) = self.lsn {
|
||||
Some(lsn)
|
||||
} else if !self.env.safekeepers.is_empty() {
|
||||
// LSN 0 means that it is bootstrap and we need to download just
|
||||
// latest data from the pageserver. That is a bit clumsy but whole bootstrap
|
||||
// procedure evolves quite actively right now, so let's think about it again
|
||||
// when things would be more stable (TODO).
|
||||
let lsn = self.sync_safekeepers(auth_token, self.pg_version)?;
|
||||
if lsn == Lsn(0) {
|
||||
None
|
||||
} else {
|
||||
Some(lsn)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.do_basebackup(backup_lsn)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn pgdata(&self) -> PathBuf {
|
||||
self.env.pg_data_dir(&self.tenant_id, &self.name)
|
||||
}
|
||||
|
||||
pub fn status(&self) -> &str {
|
||||
let timeout = Duration::from_millis(300);
|
||||
let has_pidfile = self.pgdata().join("postmaster.pid").exists();
|
||||
let can_connect = TcpStream::connect_timeout(&self.address, timeout).is_ok();
|
||||
|
||||
match (has_pidfile, can_connect) {
|
||||
(true, true) => "running",
|
||||
(false, false) => "stopped",
|
||||
(true, false) => "crashed",
|
||||
(false, true) => "running, no pidfile",
|
||||
}
|
||||
}
|
||||
|
||||
fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {
|
||||
let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join("pg_ctl");
|
||||
let mut cmd = Command::new(&pg_ctl_path);
|
||||
cmd.args(
|
||||
[
|
||||
&[
|
||||
"-D",
|
||||
self.pgdata().to_str().unwrap(),
|
||||
"-l",
|
||||
self.pgdata().join("pg.log").to_str().unwrap(),
|
||||
"-w", //wait till pg_ctl actually does what was asked
|
||||
],
|
||||
args,
|
||||
]
|
||||
.concat(),
|
||||
)
|
||||
.env_clear()
|
||||
.env(
|
||||
"LD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
|
||||
)
|
||||
.env(
|
||||
"DYLD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
|
||||
);
|
||||
|
||||
// Pass authentication token used for the connections to pageserver and safekeepers
|
||||
if let Some(token) = auth_token {
|
||||
cmd.env("NEON_AUTH_TOKEN", token);
|
||||
}
|
||||
|
||||
let pg_ctl = cmd
|
||||
.output()
|
||||
.context(format!("{} failed", pg_ctl_path.display()))?;
|
||||
if !pg_ctl.status.success() {
|
||||
anyhow::bail!(
|
||||
"pg_ctl failed, exit code: {}, stdout: {}, stderr: {}",
|
||||
pg_ctl.status,
|
||||
String::from_utf8_lossy(&pg_ctl.stdout),
|
||||
String::from_utf8_lossy(&pg_ctl.stderr),
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn start(&self, auth_token: &Option<String>) -> Result<()> {
|
||||
// Bail if the node already running.
|
||||
if self.status() == "running" {
|
||||
anyhow::bail!("The node is already running");
|
||||
}
|
||||
|
||||
// 1. We always start compute node from scratch, so
|
||||
// if old dir exists, preserve 'postgresql.conf' and drop the directory
|
||||
let postgresql_conf_path = self.pgdata().join("postgresql.conf");
|
||||
let postgresql_conf = fs::read(&postgresql_conf_path).with_context(|| {
|
||||
format!(
|
||||
"failed to read config file in {}",
|
||||
postgresql_conf_path.to_str().unwrap()
|
||||
)
|
||||
})?;
|
||||
fs::remove_dir_all(self.pgdata())?;
|
||||
self.create_pgdata()?;
|
||||
|
||||
// 2. Bring back config files
|
||||
fs::write(&postgresql_conf_path, postgresql_conf)?;
|
||||
|
||||
// 3. Load basebackup
|
||||
self.load_basebackup(auth_token)?;
|
||||
|
||||
if self.lsn.is_some() {
|
||||
File::create(self.pgdata().join("standby.signal"))?;
|
||||
}
|
||||
|
||||
// 4. Finally start the compute node postgres
|
||||
println!("Starting postgres node at '{}'", self.connstr());
|
||||
self.pg_ctl(&["start"], auth_token)
|
||||
}
|
||||
|
||||
pub fn stop(&self, destroy: bool) -> Result<()> {
|
||||
// If we are going to destroy data directory,
|
||||
// use immediate shutdown mode, otherwise,
|
||||
// shutdown gracefully to leave the data directory sane.
|
||||
//
|
||||
// Compute node always starts from scratch, so stop
|
||||
// without destroy only used for testing and debugging.
|
||||
//
|
||||
if destroy {
|
||||
self.pg_ctl(&["-m", "immediate", "stop"], &None)?;
|
||||
println!(
|
||||
"Destroying postgres data directory '{}'",
|
||||
self.pgdata().to_str().unwrap()
|
||||
);
|
||||
fs::remove_dir_all(self.pgdata())?;
|
||||
} else {
|
||||
self.pg_ctl(&["stop"], &None)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn connstr(&self) -> String {
|
||||
format!(
|
||||
"host={} port={} user={} dbname={}",
|
||||
self.address.ip(),
|
||||
self.address.port(),
|
||||
"cloud_admin",
|
||||
"postgres"
|
||||
)
|
||||
}
|
||||
}
|
||||
568
control_plane/src/endpoint.rs
Normal file
568
control_plane/src/endpoint.rs
Normal file
@@ -0,0 +1,568 @@
|
||||
//! Code to manage compute endpoints
|
||||
//!
|
||||
//! In the local test environment, the data for each endpoint is stored in
|
||||
//!
|
||||
//! .neon/endpoints/<endpoint id>
|
||||
//!
|
||||
//! Some basic information about the endpoint, like the tenant and timeline IDs,
|
||||
//! are stored in the `endpoint.json` file. The `endpoint.json` file is created
|
||||
//! when the endpoint is created, and doesn't change afterwards.
|
||||
//!
|
||||
//! The endpoint is managed by the `compute_ctl` binary. When an endpoint is
|
||||
//! started, we launch `compute_ctl` It synchronizes the safekeepers, downloads
|
||||
//! the basebackup from the pageserver to initialize the the data directory, and
|
||||
//! finally launches the PostgreSQL process. It watches the PostgreSQL process
|
||||
//! until it exits.
|
||||
//!
|
||||
//! When an endpoint is created, a `postgresql.conf` file is also created in
|
||||
//! the endpoint's directory. The file can be modified before starting PostgreSQL.
|
||||
//! However, the `postgresql.conf` file in the endpoint directory is not used directly
|
||||
//! by PostgreSQL. It is passed to `compute_ctl`, and `compute_ctl` writes another
|
||||
//! copy of it in the data directory.
|
||||
//!
|
||||
//! Directory contents:
|
||||
//!
|
||||
//! ```ignore
|
||||
//! .neon/endpoints/main/
|
||||
//! compute.log - log output of `compute_ctl` and `postgres`
|
||||
//! endpoint.json - serialized `EndpointConf` struct
|
||||
//! postgresql.conf - postgresql settings
|
||||
//! spec.json - passed to `compute_ctl`
|
||||
//! pgdata/
|
||||
//! postgresql.conf - copy of postgresql.conf created by `compute_ctl`
|
||||
//! zenith.signal
|
||||
//! <other PostgreSQL files>
|
||||
//! ```
|
||||
//!
|
||||
use std::collections::BTreeMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::net::TcpStream;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use utils::{
|
||||
id::{NodeId, TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
use crate::pageserver::PageServerNode;
|
||||
use crate::postgresql_conf::PostgresConf;
|
||||
|
||||
use compute_api::responses::{ComputeState, ComputeStatus};
|
||||
use compute_api::spec::ComputeSpecV2;
|
||||
|
||||
// contents of a endpoint.json file
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
pub struct EndpointConf {
|
||||
endpoint_id: String,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
tenant_id: TenantId,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
timeline_id: TimelineId,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
lsn: Option<Lsn>,
|
||||
pg_port: u16,
|
||||
http_port: u16,
|
||||
pg_version: u32,
|
||||
}
|
||||
|
||||
//
|
||||
// ComputeControlPlane
|
||||
//
|
||||
pub struct ComputeControlPlane {
|
||||
base_port: u16,
|
||||
|
||||
// endpoint ID is the key
|
||||
pub endpoints: BTreeMap<String, Arc<Endpoint>>,
|
||||
|
||||
env: LocalEnv,
|
||||
pageserver: Arc<PageServerNode>,
|
||||
}
|
||||
|
||||
impl ComputeControlPlane {
|
||||
// Load current endpoints from the endpoints/ subdirectories
|
||||
pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
|
||||
let pageserver = Arc::new(PageServerNode::from_env(&env));
|
||||
|
||||
let mut endpoints = BTreeMap::default();
|
||||
for endpoint_dir in std::fs::read_dir(env.endpoints_path())
|
||||
.with_context(|| format!("failed to list {}", env.endpoints_path().display()))?
|
||||
{
|
||||
let ep = Endpoint::from_dir_entry(endpoint_dir?, &env, &pageserver)?;
|
||||
endpoints.insert(ep.endpoint_id.clone(), Arc::new(ep));
|
||||
}
|
||||
|
||||
Ok(ComputeControlPlane {
|
||||
base_port: 55431,
|
||||
endpoints,
|
||||
env,
|
||||
pageserver,
|
||||
})
|
||||
}
|
||||
|
||||
fn get_port(&mut self) -> u16 {
|
||||
1 + self
|
||||
.endpoints
|
||||
.values()
|
||||
.map(|ep| std::cmp::max(ep.pg_address.port(), ep.http_address.port()))
|
||||
.max()
|
||||
.unwrap_or(self.base_port)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new_endpoint(
|
||||
&mut self,
|
||||
endpoint_id: &str,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Option<Lsn>,
|
||||
pg_port: Option<u16>,
|
||||
http_port: Option<u16>,
|
||||
pg_version: u32,
|
||||
) -> Result<Arc<Endpoint>> {
|
||||
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
|
||||
let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
|
||||
let ep = Arc::new(Endpoint {
|
||||
endpoint_id: endpoint_id.to_owned(),
|
||||
pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), pg_port),
|
||||
http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), http_port),
|
||||
env: self.env.clone(),
|
||||
pageserver: Arc::clone(&self.pageserver),
|
||||
timeline_id,
|
||||
lsn,
|
||||
tenant_id,
|
||||
pg_version,
|
||||
});
|
||||
|
||||
ep.create_endpoint_dir()?;
|
||||
std::fs::write(
|
||||
ep.endpoint_path().join("endpoint.json"),
|
||||
serde_json::to_string_pretty(&EndpointConf {
|
||||
endpoint_id: endpoint_id.to_string(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
lsn,
|
||||
http_port,
|
||||
pg_port,
|
||||
pg_version,
|
||||
})?,
|
||||
)?;
|
||||
std::fs::write(
|
||||
ep.endpoint_path().join("postgresql.conf"),
|
||||
ep.setup_pg_conf()?.to_string(),
|
||||
)?;
|
||||
|
||||
self.endpoints
|
||||
.insert(ep.endpoint_id.clone(), Arc::clone(&ep));
|
||||
|
||||
Ok(ep)
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Endpoint {
|
||||
/// used as the directory name
|
||||
endpoint_id: String,
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
// Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary.
|
||||
pub lsn: Option<Lsn>,
|
||||
|
||||
// port and address of the Postgres server and `compute_ctl`'s HTTP API
|
||||
pub pg_address: SocketAddr,
|
||||
pub http_address: SocketAddr,
|
||||
pg_version: u32,
|
||||
|
||||
// These are not part of the endpoint as such, but the environment
|
||||
// the endpoint runs in.
|
||||
pub env: LocalEnv,
|
||||
pageserver: Arc<PageServerNode>,
|
||||
}
|
||||
|
||||
impl Endpoint {
|
||||
fn from_dir_entry(
|
||||
entry: std::fs::DirEntry,
|
||||
env: &LocalEnv,
|
||||
pageserver: &Arc<PageServerNode>,
|
||||
) -> Result<Endpoint> {
|
||||
if !entry.file_type()?.is_dir() {
|
||||
anyhow::bail!(
|
||||
"Endpoint::from_dir_entry failed: '{}' is not a directory",
|
||||
entry.path().display()
|
||||
);
|
||||
}
|
||||
|
||||
// parse data directory name
|
||||
let fname = entry.file_name();
|
||||
let endpoint_id = fname.to_str().unwrap().to_string();
|
||||
|
||||
// Read the endpoint.json file
|
||||
let conf: EndpointConf =
|
||||
serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;
|
||||
|
||||
Ok(Endpoint {
|
||||
pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.pg_port),
|
||||
http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.http_port),
|
||||
endpoint_id,
|
||||
env: env.clone(),
|
||||
pageserver: Arc::clone(pageserver),
|
||||
timeline_id: conf.timeline_id,
|
||||
lsn: conf.lsn,
|
||||
tenant_id: conf.tenant_id,
|
||||
pg_version: conf.pg_version,
|
||||
})
|
||||
}
|
||||
|
||||
fn create_endpoint_dir(&self) -> Result<()> {
|
||||
std::fs::create_dir_all(self.endpoint_path()).with_context(|| {
|
||||
format!(
|
||||
"could not create endpoint directory {}",
|
||||
self.endpoint_path().display()
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// Generate postgresql.conf with default configuration
|
||||
fn setup_pg_conf(&self) -> Result<PostgresConf> {
|
||||
let mut conf = PostgresConf::new();
|
||||
conf.append("max_wal_senders", "10");
|
||||
conf.append("wal_log_hints", "off");
|
||||
conf.append("max_replication_slots", "10");
|
||||
conf.append("hot_standby", "on");
|
||||
conf.append("shared_buffers", "1MB");
|
||||
conf.append("fsync", "off");
|
||||
conf.append("max_connections", "100");
|
||||
conf.append("wal_level", "replica");
|
||||
// wal_sender_timeout is the maximum time to wait for WAL replication.
|
||||
// It also defines how often the walreciever will send a feedback message to the wal sender.
|
||||
conf.append("wal_sender_timeout", "5s");
|
||||
conf.append("listen_addresses", &self.pg_address.ip().to_string());
|
||||
conf.append("port", &self.pg_address.port().to_string());
|
||||
conf.append("wal_keep_size", "0");
|
||||
// walproposer panics when basebackup is invalid, it is pointless to restart in this case.
|
||||
conf.append("restart_after_crash", "off");
|
||||
|
||||
// Load the 'neon' extension
|
||||
conf.append("shared_preload_libraries", "neon");
|
||||
conf.append_line("");
|
||||
|
||||
// Configure backpressure
|
||||
// - Replication write lag depends on how fast the walreceiver can process incoming WAL.
|
||||
// This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
|
||||
// so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB.
|
||||
// Actually latency should be much smaller (better if < 1sec). But we assume that recently
|
||||
// updates pages are not requested from pageserver.
|
||||
// - Replication flush lag depends on speed of persisting data by checkpointer (creation of
|
||||
// delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to
|
||||
// remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long
|
||||
// recovery time (in case of pageserver crash) and disk space overflow at safekeepers.
|
||||
// - Replication apply lag depends on speed of uploading changes to S3 by uploader thread.
|
||||
// To be able to restore database in case of pageserver node crash, safekeeper should not
|
||||
// remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
|
||||
// (if they are not able to upload WAL to S3).
|
||||
conf.append("max_replication_write_lag", "15MB");
|
||||
conf.append("max_replication_flush_lag", "10GB");
|
||||
|
||||
if !self.env.safekeepers.is_empty() {
|
||||
// Configure Postgres to connect to the safekeepers
|
||||
conf.append("synchronous_standby_names", "walproposer");
|
||||
} else {
|
||||
// We only use setup without safekeepers for tests,
|
||||
// and don't care about data durability on pageserver,
|
||||
// so set more relaxed synchronous_commit.
|
||||
conf.append("synchronous_commit", "remote_write");
|
||||
|
||||
// Configure the node to stream WAL directly to the pageserver
|
||||
// This isn't really a supported configuration, but can be useful for
|
||||
// testing.
|
||||
conf.append("synchronous_standby_names", "pageserver");
|
||||
}
|
||||
|
||||
Ok(conf)
|
||||
}
|
||||
|
||||
pub fn endpoint_path(&self) -> PathBuf {
|
||||
self.env.endpoints_path().join(&self.endpoint_id)
|
||||
}
|
||||
|
||||
pub fn pgdata(&self) -> PathBuf {
|
||||
self.endpoint_path().join("pgdata")
|
||||
}
|
||||
|
||||
pub fn status(&self) -> &str {
|
||||
let timeout = Duration::from_millis(300);
|
||||
let has_pidfile = self.pgdata().join("postmaster.pid").exists();
|
||||
let can_connect = TcpStream::connect_timeout(&self.pg_address, timeout).is_ok();
|
||||
|
||||
match (has_pidfile, can_connect) {
|
||||
(true, true) => "running",
|
||||
(false, false) => "stopped",
|
||||
(true, false) => "crashed",
|
||||
(false, true) => "running, no pidfile",
|
||||
}
|
||||
}
|
||||
|
||||
fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {
|
||||
let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join("pg_ctl");
|
||||
let mut cmd = Command::new(&pg_ctl_path);
|
||||
cmd.args(
|
||||
[
|
||||
&[
|
||||
"-D",
|
||||
self.pgdata().to_str().unwrap(),
|
||||
"-l", // FIXME: does this make sense when we don't use pg_ctl start ?
|
||||
self.endpoint_path().join("pg.log").to_str().unwrap(),
|
||||
"-w", //wait till pg_ctl actually does what was asked
|
||||
],
|
||||
args,
|
||||
]
|
||||
.concat(),
|
||||
)
|
||||
.env_clear()
|
||||
.env(
|
||||
"LD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
|
||||
)
|
||||
.env(
|
||||
"DYLD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
|
||||
);
|
||||
|
||||
// Pass authentication token used for the connections to pageserver and safekeepers
|
||||
if let Some(token) = auth_token {
|
||||
cmd.env("NEON_AUTH_TOKEN", token);
|
||||
}
|
||||
|
||||
let pg_ctl = cmd
|
||||
.output()
|
||||
.context(format!("{} failed", pg_ctl_path.display()))?;
|
||||
if !pg_ctl.status.success() {
|
||||
anyhow::bail!(
|
||||
"pg_ctl failed, exit code: {}, stdout: {}, stderr: {}",
|
||||
pg_ctl.status,
|
||||
String::from_utf8_lossy(&pg_ctl.stdout),
|
||||
String::from_utf8_lossy(&pg_ctl.stderr),
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn start(&self, auth_token: &Option<String>, safekeepers: Vec<NodeId>) -> Result<()> {
|
||||
if self.status() == "running" {
|
||||
anyhow::bail!("The endpoint is already running");
|
||||
}
|
||||
|
||||
// Slurp the endpoints/<endpoint id>/postgresql.conf file into
|
||||
// memory. We will include it in the spec file that we pass to
|
||||
// `compute_ctl`, and `compute_ctl` will write it to the postgresql.conf
|
||||
// in the data directory.
|
||||
let postgresql_conf_path = self.endpoint_path().join("postgresql.conf");
|
||||
let postgresql_conf = match std::fs::read(&postgresql_conf_path) {
|
||||
Ok(content) => String::from_utf8(content)?,
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => "".to_string(),
|
||||
Err(e) => {
|
||||
return Err(anyhow::Error::new(e).context(format!(
|
||||
"failed to read config file in {}",
|
||||
postgresql_conf_path.to_str().unwrap()
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
// We always start the compute node from scratch, so if the Postgres
|
||||
// data dir exists from a previous launch, remove it first.
|
||||
if self.pgdata().exists() {
|
||||
std::fs::remove_dir_all(self.pgdata())?;
|
||||
}
|
||||
|
||||
let pageserver_connstring = {
|
||||
let config = &self.pageserver.pg_connection_config;
|
||||
let (host, port) = (config.host(), config.port());
|
||||
|
||||
// NOTE: avoid spaces in connection string, because it is less error prone if we forward it somewhere.
|
||||
format!("postgresql://no_user@{host}:{port}")
|
||||
};
|
||||
let mut safekeeper_connstrings = Vec::new();
|
||||
for sk_id in safekeepers {
|
||||
let sk = self
|
||||
.env
|
||||
.safekeepers
|
||||
.iter()
|
||||
.find(|node| node.id == sk_id)
|
||||
.ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
|
||||
safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.pg_port));
|
||||
}
|
||||
|
||||
// Create spec file
|
||||
let spec = ComputeSpecV2 {
|
||||
format_version: 2,
|
||||
|
||||
project_id: None,
|
||||
endpoint_id: Some(self.endpoint_id.clone()),
|
||||
operation_uuid: None,
|
||||
|
||||
startup_tracing_context: None,
|
||||
|
||||
tenant_id: self.tenant_id,
|
||||
timeline_id: self.timeline_id,
|
||||
lsn: self.lsn,
|
||||
pageserver_connstring,
|
||||
safekeeper_connstrings,
|
||||
storage_auth_token: auth_token.clone(),
|
||||
|
||||
postgresql_conf: Some(postgresql_conf),
|
||||
settings: None,
|
||||
|
||||
roles: vec![],
|
||||
databases: vec![],
|
||||
extensions: vec![],
|
||||
delta_operations: None,
|
||||
};
|
||||
let spec_path = self.endpoint_path().join("spec.json");
|
||||
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
|
||||
|
||||
// Open log file. We'll redirect the stdout and stderr of `compute_ctl` to it.
|
||||
let logfile = std::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(self.endpoint_path().join("compute.log"))?;
|
||||
|
||||
// Launch compute_ctl
|
||||
println!("Starting postgres node at '{}'", self.connstr());
|
||||
let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
|
||||
cmd.args(["--http-port", &self.http_address.port().to_string()])
|
||||
.args(["--pgdata", self.pgdata().to_str().unwrap()])
|
||||
.args(["--connstr", &self.connstr()])
|
||||
.args([
|
||||
"--spec-path",
|
||||
self.endpoint_path().join("spec.json").to_str().unwrap(),
|
||||
])
|
||||
.args([
|
||||
"--pgbin",
|
||||
self.env
|
||||
.pg_bin_dir(self.pg_version)?
|
||||
.join("postgres")
|
||||
.to_str()
|
||||
.unwrap(),
|
||||
])
|
||||
.stdin(std::process::Stdio::null())
|
||||
.stderr(logfile.try_clone()?)
|
||||
.stdout(logfile);
|
||||
let _child = cmd.spawn()?;
|
||||
|
||||
// Wait for it to start
|
||||
let mut attempt = 0;
|
||||
const ATTEMPT_INTERVAL: Duration = Duration::from_millis(100);
|
||||
const MAX_ATTEMPTS: u32 = 10 * 30; // Wait up to 30 s
|
||||
loop {
|
||||
attempt += 1;
|
||||
match self.get_status() {
|
||||
Ok(state) => {
|
||||
match state.status {
|
||||
ComputeStatus::Init => {
|
||||
if attempt == MAX_ATTEMPTS {
|
||||
bail!("compute startup timed out; still in Init state");
|
||||
}
|
||||
// keep retrying
|
||||
}
|
||||
ComputeStatus::Running => {
|
||||
// All good!
|
||||
break;
|
||||
}
|
||||
ComputeStatus::Failed => {
|
||||
bail!(
|
||||
"compute startup failed: {}",
|
||||
state
|
||||
.error
|
||||
.as_deref()
|
||||
.unwrap_or("<no error from compute_ctl>")
|
||||
);
|
||||
}
|
||||
ComputeStatus::Empty | ComputeStatus::ConfigurationPending => {
|
||||
bail!("unexpected compute status: {:?}", state.status)
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
if attempt == MAX_ATTEMPTS {
|
||||
return Err(e).context(
|
||||
"timed out waiting to connect to compute_ctl HTTP; last error: {e}",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::thread::sleep(ATTEMPT_INTERVAL);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Call the /status HTTP API
|
||||
pub fn get_status(&self) -> Result<ComputeState> {
|
||||
let client = reqwest::blocking::Client::new();
|
||||
|
||||
let response = client
|
||||
.request(
|
||||
reqwest::Method::GET,
|
||||
format!(
|
||||
"http://{}:{}/status",
|
||||
self.http_address.ip(),
|
||||
self.http_address.port()
|
||||
),
|
||||
)
|
||||
.send()?;
|
||||
|
||||
// Interpret the response
|
||||
let status = response.status();
|
||||
if !(status.is_client_error() || status.is_server_error()) {
|
||||
Ok(response.json()?)
|
||||
} else {
|
||||
// reqwest does not export its error construction utility functions, so let's craft the message ourselves
|
||||
let url = response.url().to_owned();
|
||||
let msg = match response.text() {
|
||||
Ok(err_body) => format!("Error: {}", err_body),
|
||||
Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
|
||||
};
|
||||
Err(anyhow::anyhow!(msg))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stop(&self, destroy: bool) -> Result<()> {
|
||||
// If we are going to destroy data directory,
|
||||
// use immediate shutdown mode, otherwise,
|
||||
// shutdown gracefully to leave the data directory sane.
|
||||
//
|
||||
// Postgres is always started from scratch, so stop
|
||||
// without destroy only used for testing and debugging.
|
||||
//
|
||||
if destroy {
|
||||
self.pg_ctl(&["-m", "immediate", "stop"], &None)?;
|
||||
println!(
|
||||
"Destroying postgres data directory '{}'",
|
||||
self.pgdata().to_str().unwrap()
|
||||
);
|
||||
std::fs::remove_dir_all(self.endpoint_path())?;
|
||||
} else {
|
||||
self.pg_ctl(&["stop"], &None)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn connstr(&self) -> String {
|
||||
format!(
|
||||
"postgresql://{}@{}:{}/{}",
|
||||
"cloud_admin",
|
||||
self.pg_address.ip(),
|
||||
self.pg_address.port(),
|
||||
"postgres"
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
mod background_process;
|
||||
pub mod broker;
|
||||
pub mod compute;
|
||||
pub mod endpoint;
|
||||
pub mod local_env;
|
||||
pub mod pageserver;
|
||||
pub mod postgresql_conf;
|
||||
|
||||
@@ -37,7 +37,7 @@ pub const DEFAULT_PG_VERSION: u32 = 14;
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
pub struct LocalEnv {
|
||||
// Base directory for all the nodes (the pageserver, safekeepers and
|
||||
// compute nodes).
|
||||
// compute endpoints).
|
||||
//
|
||||
// This is not stored in the config file. Rather, this is the path where the
|
||||
// config file itself is. It is read from the NEON_REPO_DIR env variable or
|
||||
@@ -200,14 +200,8 @@ impl LocalEnv {
|
||||
self.neon_distrib_dir.join("storage_broker")
|
||||
}
|
||||
|
||||
pub fn pg_data_dirs_path(&self) -> PathBuf {
|
||||
self.base_data_dir.join("pgdatadirs").join("tenants")
|
||||
}
|
||||
|
||||
pub fn pg_data_dir(&self, tenant_id: &TenantId, branch_name: &str) -> PathBuf {
|
||||
self.pg_data_dirs_path()
|
||||
.join(tenant_id.to_string())
|
||||
.join(branch_name)
|
||||
pub fn endpoints_path(&self) -> PathBuf {
|
||||
self.base_data_dir.join("endpoints")
|
||||
}
|
||||
|
||||
// TODO: move pageserver files into ./pageserver
|
||||
@@ -427,7 +421,7 @@ impl LocalEnv {
|
||||
}
|
||||
}
|
||||
|
||||
fs::create_dir_all(self.pg_data_dirs_path())?;
|
||||
fs::create_dir_all(self.endpoints_path())?;
|
||||
|
||||
for safekeeper in &self.safekeepers {
|
||||
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
//! Code to manage pageservers
|
||||
//!
|
||||
//! In the local test environment, the pageserver stores its data directly in
|
||||
//!
|
||||
//! .neon/
|
||||
//!
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
//! Code to manage safekeepers
|
||||
//!
|
||||
//! In the local test environment, the data for each safekeeper is stored in
|
||||
//!
|
||||
//! .neon/safekeepers/<safekeeper id>
|
||||
//!
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Child;
|
||||
|
||||
@@ -10,5 +10,6 @@ chrono.workspace = true
|
||||
serde.workspace = true
|
||||
serde_with.workspace = true
|
||||
serde_json.workspace = true
|
||||
utils.workspace = true
|
||||
|
||||
workspace_hack.workspace = true
|
||||
|
||||
32
libs/compute_api/src/models.rs
Normal file
32
libs/compute_api/src/models.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
|
||||
use crate::rfc3339_serialize;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Response of the /status API
|
||||
#[derive(Deserialize, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub struct ComputeState {
|
||||
pub status: ComputeStatus,
|
||||
/// Timestamp of the last Postgres activity
|
||||
#[serde(serialize_with = "rfc3339_serialize")]
|
||||
pub last_active: DateTime<Utc>,
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, Clone, Copy, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ComputeStatus {
|
||||
Init,
|
||||
Running,
|
||||
Failed,
|
||||
}
|
||||
|
||||
/// Response of the /metrics.json API
|
||||
#[derive(Clone, Default, Serialize)]
|
||||
pub struct ComputeMetrics {
|
||||
pub sync_safekeepers_ms: u64,
|
||||
pub basebackup_ms: u64,
|
||||
pub config_ms: u64,
|
||||
pub total_startup_ms: u64,
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
|
||||
|
||||
use crate::spec::ComputeSpec;
|
||||
use crate::spec::ComputeSpecAnyVersion;
|
||||
use serde::Deserialize;
|
||||
|
||||
/// Request of the /configure API
|
||||
@@ -10,5 +10,5 @@ use serde::Deserialize;
|
||||
/// `spec` into a struct initially to be more flexible in the future.
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct ConfigurationRequest {
|
||||
pub spec: ComputeSpec,
|
||||
pub spec: ComputeSpecAnyVersion,
|
||||
}
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Serialize, Serializer};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
#[derive(Serialize, Debug, Deserialize)]
|
||||
pub struct GenericAPIError {
|
||||
pub error: String,
|
||||
}
|
||||
|
||||
/// Response of the /status API
|
||||
#[derive(Serialize, Debug)]
|
||||
#[derive(Serialize, Debug, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub struct ComputeStatusResponse {
|
||||
pub tenant: Option<String>,
|
||||
@@ -20,7 +20,7 @@ pub struct ComputeStatusResponse {
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[derive(Deserialize, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub struct ComputeState {
|
||||
pub status: ComputeStatus,
|
||||
@@ -30,7 +30,7 @@ pub struct ComputeState {
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ComputeStatus {
|
||||
// Spec wasn't provided at start, waiting for it to be
|
||||
|
||||
@@ -3,8 +3,13 @@
|
||||
//! The spec.json file is used to pass information to 'compute_ctl'. It contains
|
||||
//! all the information needed to start up the right version of PostgreSQL,
|
||||
//! and connect it to the storage nodes.
|
||||
use serde::Deserialize;
|
||||
use anyhow::anyhow;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
/// String type alias representing Postgres identifier and
|
||||
/// intended to be used for DB / role names.
|
||||
@@ -12,28 +17,191 @@ pub type PgIdent = String;
|
||||
|
||||
/// Cluster spec or configuration represented as an optional number of
|
||||
/// delta operations + final cluster state description.
|
||||
#[derive(Clone, Debug, Default, Deserialize)]
|
||||
pub struct ComputeSpec {
|
||||
pub format_version: f32,
|
||||
pub timestamp: String,
|
||||
pub operation_uuid: Option<String>,
|
||||
/// Expected cluster state at the end of transition process.
|
||||
pub cluster: Cluster,
|
||||
pub delta_operations: Option<Vec<DeltaOp>>,
|
||||
#[serde_as]
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct ComputeSpecV2 {
|
||||
pub format_version: u64,
|
||||
|
||||
// For debugging purposes only
|
||||
pub project_id: Option<String>,
|
||||
pub endpoint_id: Option<String>,
|
||||
pub operation_uuid: Option<String>,
|
||||
|
||||
/// W3C trace context of the launch operation, for OpenTelemetry tracing
|
||||
pub startup_tracing_context: Option<HashMap<String, String>>,
|
||||
|
||||
// Information needed to connect to the storage layer.
|
||||
//
|
||||
// `tenant_id`, `timeline_id` and `pageserver_connstring` are always needed.
|
||||
//
|
||||
// If Lsn == None, this is a primary endpoint that continues writing WAL at
|
||||
// the end of the timeline. If 'lsn' is set, this is a read-only node
|
||||
// "anchored" at that LSN. 'safekeeper_connstrings' must be non-empty for a
|
||||
// primary.
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub tenant_id: TenantId,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub timeline_id: TimelineId,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub lsn: Option<Lsn>,
|
||||
pub pageserver_connstring: String,
|
||||
pub safekeeper_connstrings: Vec<String>,
|
||||
|
||||
/// If set, 'storage_auth_token' is used as the password to authenticate to
|
||||
/// the pageserver and safekeepers.
|
||||
pub storage_auth_token: Option<String>,
|
||||
|
||||
/// Contents of postgresql.conf file
|
||||
pub postgresql_conf: Option<String>,
|
||||
|
||||
/// Extra settings to append to the postgresql.conf
|
||||
pub settings: GenericOptions,
|
||||
|
||||
// Expected cluster state at the end of transition process.
|
||||
pub roles: Vec<Role>,
|
||||
pub databases: Vec<Database>,
|
||||
pub extensions: Vec<PgIdent>,
|
||||
pub delta_operations: Option<Vec<DeltaOp>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct FormatVersionOnly {
|
||||
format_version: u64,
|
||||
}
|
||||
|
||||
impl TryFrom<ComputeSpecAnyVersion> for ComputeSpecV2 {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(input: ComputeSpecAnyVersion) -> Result<ComputeSpecV2, anyhow::Error> {
|
||||
// First check the 'format_version' field
|
||||
match serde_json::from_value::<FormatVersionOnly>(input.0.clone())?.format_version {
|
||||
1 => {
|
||||
let v1: ComputeSpecV1 = serde_json::from_value(input.0)?;
|
||||
|
||||
ComputeSpecV2::upgrade_from_v1(v1)
|
||||
}
|
||||
2 => {
|
||||
let v2: ComputeSpecV2 = serde_json::from_value(input.0)?;
|
||||
Ok(v2)
|
||||
}
|
||||
other => Err(anyhow::anyhow!(
|
||||
"unexpected format version {other} in spec file"
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ComputeSpecV2 {
|
||||
pub fn parse_and_upgrade(input: &str) -> anyhow::Result<ComputeSpecV2> {
|
||||
ComputeSpecV2::try_from(ComputeSpecAnyVersion(serde_json::from_str::<
|
||||
serde_json::Value,
|
||||
>(input)?))
|
||||
}
|
||||
|
||||
pub fn upgrade_from_v1(spec_v1: ComputeSpecV1) -> anyhow::Result<ComputeSpecV2> {
|
||||
let mut tenant_id = None;
|
||||
let mut timeline_id = None;
|
||||
let mut pageserver_connstring = None;
|
||||
let mut safekeeper_connstrings: Vec<String> = Vec::new();
|
||||
|
||||
let mut extensions: Vec<String> = Vec::new();
|
||||
|
||||
let mut settings: Vec<GenericOption> = Vec::new();
|
||||
for setting in &spec_v1.cluster.settings {
|
||||
if let Some(value) = &setting.value {
|
||||
match setting.name.as_str() {
|
||||
"neon.tenant_id" => {
|
||||
tenant_id = Some(TenantId::from_str(value)?);
|
||||
}
|
||||
"neon.timeline_id" => {
|
||||
timeline_id = Some(TimelineId::from_str(value)?);
|
||||
}
|
||||
"neon.pageserver_connstring" => {
|
||||
pageserver_connstring = Some(value.clone());
|
||||
}
|
||||
"neon.safekeepers" => {
|
||||
// neon.safekeepers is a comma-separated list of poestgres connection URLs
|
||||
safekeeper_connstrings =
|
||||
value.split(',').map(|s| s.trim().to_string()).collect();
|
||||
}
|
||||
"shared_preload_libraries" => {
|
||||
if value.contains("pg_stat_statements") {
|
||||
extensions.push("pg_stat_statements".to_string());
|
||||
}
|
||||
settings.push(setting.clone())
|
||||
}
|
||||
_ => settings.push(setting.clone()),
|
||||
}
|
||||
} else {
|
||||
settings.push(setting.clone())
|
||||
}
|
||||
}
|
||||
let tenant_id =
|
||||
tenant_id.ok_or_else(|| anyhow!("neon.tenant_id missing from spec file"))?;
|
||||
let timeline_id =
|
||||
timeline_id.ok_or_else(|| anyhow!("neon.timeline_id missing from spec file"))?;
|
||||
let pageserver_connstring = pageserver_connstring
|
||||
.ok_or_else(|| anyhow!("neon.pageserver_connstring missing from spec file"))?;
|
||||
|
||||
Ok(ComputeSpecV2 {
|
||||
format_version: 2,
|
||||
|
||||
project_id: Some(spec_v1.cluster.cluster_id),
|
||||
endpoint_id: Some(spec_v1.cluster.name),
|
||||
operation_uuid: spec_v1.operation_uuid,
|
||||
|
||||
startup_tracing_context: spec_v1.startup_tracing_context,
|
||||
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
lsn: None, // Not supported in V1
|
||||
pageserver_connstring,
|
||||
safekeeper_connstrings,
|
||||
|
||||
storage_auth_token: spec_v1.storage_auth_token,
|
||||
|
||||
postgresql_conf: None,
|
||||
settings: Some(settings),
|
||||
|
||||
roles: spec_v1.cluster.roles,
|
||||
databases: spec_v1.cluster.databases,
|
||||
extensions,
|
||||
delta_operations: spec_v1.delta_operations,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct ComputeSpecAnyVersion(pub serde_json::Value);
|
||||
|
||||
// Old format that didn't have explicit 'tenant_id', 'timeline_id, 'pageserver_connstring'
|
||||
// and 'safekeeper_connstrings' fields. They were stored in as GUCS in the 'cluster.settings'
|
||||
// list
|
||||
#[serde_as]
|
||||
#[derive(Clone, Deserialize, Serialize)]
|
||||
pub struct ComputeSpecV1 {
|
||||
pub format_version: u64,
|
||||
|
||||
// The control plane also includes a 'timestamp' field in the JSON document,
|
||||
// but we don't use it for anything. Serde will ignore missing fields when
|
||||
// deserializing it.
|
||||
pub operation_uuid: Option<String>,
|
||||
pub cluster: ClusterV1,
|
||||
pub delta_operations: Option<Vec<DeltaOp>>,
|
||||
pub storage_auth_token: Option<String>,
|
||||
|
||||
pub startup_tracing_context: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Deserialize)]
|
||||
pub struct Cluster {
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct ClusterV1 {
|
||||
pub cluster_id: String,
|
||||
pub name: String,
|
||||
pub state: Option<String>,
|
||||
pub roles: Vec<Role>,
|
||||
pub databases: Vec<Database>,
|
||||
pub settings: GenericOptions,
|
||||
pub settings: Vec<GenericOption>,
|
||||
}
|
||||
|
||||
/// Single cluster state changing operation that could not be represented as
|
||||
@@ -42,7 +210,7 @@ pub struct Cluster {
|
||||
/// - DROP ROLE
|
||||
/// - ALTER ROLE name RENAME TO new_name
|
||||
/// - ALTER DATABASE name RENAME TO new_name
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct DeltaOp {
|
||||
pub action: String,
|
||||
pub name: PgIdent,
|
||||
@@ -51,7 +219,7 @@ pub struct DeltaOp {
|
||||
|
||||
/// Rust representation of Postgres role info with only those fields
|
||||
/// that matter for us.
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct Role {
|
||||
pub name: PgIdent,
|
||||
pub encrypted_password: Option<String>,
|
||||
@@ -60,7 +228,7 @@ pub struct Role {
|
||||
|
||||
/// Rust representation of Postgres database info with only those fields
|
||||
/// that matter for us.
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct Database {
|
||||
pub name: PgIdent,
|
||||
pub owner: PgIdent,
|
||||
@@ -70,7 +238,7 @@ pub struct Database {
|
||||
/// Common type representing both SQL statement params with or without value,
|
||||
/// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
|
||||
/// options like `wal_level = logical`.
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
|
||||
pub struct GenericOption {
|
||||
pub name: String,
|
||||
pub value: Option<String>,
|
||||
@@ -84,11 +252,70 @@ pub type GenericOptions = Option<Vec<GenericOption>>;
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs::File;
|
||||
|
||||
#[test]
|
||||
fn parse_spec_file() {
|
||||
let file = File::open("tests/cluster_spec.json").unwrap();
|
||||
let _spec: ComputeSpec = serde_json::from_reader(file).unwrap();
|
||||
fn test_upgrade_v1_to_v2() -> anyhow::Result<()> {
|
||||
let spec_v1_str = std::fs::read_to_string("tests/spec-v1.json").unwrap();
|
||||
let spec_v2 = ComputeSpecV2::parse_and_upgrade(&spec_v1_str)?;
|
||||
|
||||
// The original V1 file contains also neon.tenant_id, neon.timeline_id,
|
||||
// neon.pageserver_connstring and neon.safekeepers. They are put to exclicit
|
||||
// fields at the top level in V2.
|
||||
assert_eq!(
|
||||
spec_v2.tenant_id,
|
||||
TenantId::from_str("3d1f7595b468230304e0b73cecbcb081")?
|
||||
);
|
||||
assert_eq!(
|
||||
spec_v2.timeline_id,
|
||||
TimelineId::from_str("7f2aff2a1042b93a2617f44851638422")?
|
||||
);
|
||||
assert_eq!(spec_v2.pageserver_connstring, "host=172.30.42.12 port=6400");
|
||||
assert_eq!(
|
||||
spec_v2.safekeeper_connstrings,
|
||||
vec![
|
||||
"172.30.42.23:6500",
|
||||
"172.30.42.22:6500",
|
||||
"172.30.42.21:6500"
|
||||
]
|
||||
);
|
||||
|
||||
fn opt(name: &str, value: &str, vartype: &str) -> GenericOption {
|
||||
GenericOption {
|
||||
name: name.to_string(),
|
||||
value: Some(value.to_string()),
|
||||
vartype: vartype.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(spec_v2.postgresql_conf, None);
|
||||
assert_eq!(
|
||||
spec_v2.settings.as_ref().unwrap(),
|
||||
&vec![
|
||||
opt("max_replication_write_lag", "500", "integer"),
|
||||
opt("restart_after_crash", "off", "bool"),
|
||||
opt("password_encryption", "md5", "enum"),
|
||||
opt(
|
||||
"shared_preload_libraries",
|
||||
"neon, pg_stat_statements",
|
||||
"string"
|
||||
),
|
||||
opt("synchronous_standby_names", "walproposer", "string"),
|
||||
opt("wal_level", "replica", "enum"),
|
||||
opt("listen_addresses", "0.0.0.0", "string"),
|
||||
opt("neon.max_cluster_size", "10240", "integer"),
|
||||
opt("shared_buffers", "65536", "integer"),
|
||||
opt(
|
||||
"test.escaping",
|
||||
r#"here's a backslash \ and a quote ' and a double-quote " hooray"#,
|
||||
"string"
|
||||
),
|
||||
]
|
||||
);
|
||||
|
||||
assert_eq!(spec_v2.extensions, vec!["pg_stat_statements"]);
|
||||
|
||||
eprintln!("SPEC: {}", serde_json::to_string_pretty(&spec_v2)?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,209 +0,0 @@
|
||||
{
|
||||
"format_version": 1.0,
|
||||
|
||||
"timestamp": "2021-05-23T18:25:43.511Z",
|
||||
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
|
||||
|
||||
"cluster": {
|
||||
"cluster_id": "test-cluster-42",
|
||||
"name": "Zenith Test",
|
||||
"state": "restarted",
|
||||
"roles": [
|
||||
{
|
||||
"name": "postgres",
|
||||
"encrypted_password": "6b1d16b78004bbd51fa06af9eda75972",
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "alexk",
|
||||
"encrypted_password": null,
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "zenith \"new\"",
|
||||
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972",
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "zen",
|
||||
"encrypted_password": "9b1d16b78004bbd51fa06af9eda75972"
|
||||
},
|
||||
{
|
||||
"name": "\"name\";\\n select 1;",
|
||||
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
|
||||
},
|
||||
{
|
||||
"name": "MyRole",
|
||||
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
|
||||
}
|
||||
],
|
||||
"databases": [
|
||||
{
|
||||
"name": "DB2",
|
||||
"owner": "alexk",
|
||||
"options": [
|
||||
{
|
||||
"name": "LC_COLLATE",
|
||||
"value": "C",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "LC_CTYPE",
|
||||
"value": "C",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "TEMPLATE",
|
||||
"value": "template0",
|
||||
"vartype": "enum"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "zenith",
|
||||
"owner": "MyRole"
|
||||
},
|
||||
{
|
||||
"name": "zen",
|
||||
"owner": "zen"
|
||||
}
|
||||
],
|
||||
"settings": [
|
||||
{
|
||||
"name": "fsync",
|
||||
"value": "off",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "wal_level",
|
||||
"value": "replica",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "hot_standby",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "neon.safekeepers",
|
||||
"value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "wal_log_hints",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "log_connections",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "shared_buffers",
|
||||
"value": "32768",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "port",
|
||||
"value": "55432",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_connections",
|
||||
"value": "100",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_wal_senders",
|
||||
"value": "10",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "listen_addresses",
|
||||
"value": "0.0.0.0",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "wal_sender_timeout",
|
||||
"value": "0",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "password_encryption",
|
||||
"value": "md5",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "maintenance_work_mem",
|
||||
"value": "65536",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_parallel_workers",
|
||||
"value": "8",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_worker_processes",
|
||||
"value": "8",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "neon.tenant_id",
|
||||
"value": "b0554b632bd4d547a63b86c3630317e8",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "max_replication_slots",
|
||||
"value": "10",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "neon.timeline_id",
|
||||
"value": "2414a61ffc94e428f14b5758fe308e13",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "shared_preload_libraries",
|
||||
"value": "neon",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "synchronous_standby_names",
|
||||
"value": "walproposer",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.pageserver_connstring",
|
||||
"value": "host=127.0.0.1 port=6400",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "test.escaping",
|
||||
"value": "here's a backslash \\ and a quote ' and a double-quote \" hooray",
|
||||
"vartype": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"delta_operations": [
|
||||
{
|
||||
"action": "delete_db",
|
||||
"name": "zenith_test"
|
||||
},
|
||||
{
|
||||
"action": "rename_db",
|
||||
"name": "DB",
|
||||
"new_name": "DB2"
|
||||
},
|
||||
{
|
||||
"action": "delete_role",
|
||||
"name": "zenith2"
|
||||
},
|
||||
{
|
||||
"action": "rename_role",
|
||||
"name": "zenith new",
|
||||
"new_name": "zenith \"new\""
|
||||
}
|
||||
]
|
||||
}
|
||||
175
libs/compute_api/tests/spec-v1.json
Normal file
175
libs/compute_api/tests/spec-v1.json
Normal file
@@ -0,0 +1,175 @@
|
||||
{
|
||||
"cluster": {
|
||||
"cluster_id": "young-snowflake-871338",
|
||||
"name": "young-snowflake-871338",
|
||||
"settings": [
|
||||
{
|
||||
"name": "max_replication_write_lag",
|
||||
"value": "500",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "neon.pageserver_connstring",
|
||||
"value": "host=172.30.42.12 port=6400",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "restart_after_crash",
|
||||
"value": "off",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "password_encryption",
|
||||
"value": "md5",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "shared_preload_libraries",
|
||||
"value": "neon, pg_stat_statements",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "synchronous_standby_names",
|
||||
"value": "walproposer",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.tenant_id",
|
||||
"value": "3d1f7595b468230304e0b73cecbcb081",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.timeline_id",
|
||||
"value": "7f2aff2a1042b93a2617f44851638422",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "wal_level",
|
||||
"value": "replica",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "listen_addresses",
|
||||
"value": "0.0.0.0",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.safekeepers",
|
||||
"value": "172.30.42.23:6500,172.30.42.22:6500,172.30.42.21:6500",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.max_cluster_size",
|
||||
"value": "10240",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "shared_buffers",
|
||||
"value": "65536",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "test.escaping",
|
||||
"value": "here's a backslash \\ and a quote ' and a double-quote \" hooray",
|
||||
"vartype": "string"
|
||||
}
|
||||
],
|
||||
"roles": [
|
||||
{
|
||||
"name": "postgres",
|
||||
"encrypted_password": "6b1d16b78004bbd51fa06af9eda75972",
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "testuser",
|
||||
"encrypted_password": "SCRAM-SHA-256$4096:R4V8wIc+aH8T7vy3weC5qg==$aXXM6IQKnEWsRgeyjbxydif6f29LZOGvAWe/oOnuXSM=:5IE7U/woZLZbYSYOJ3v4x3qlLOXS6xcsdJYnMdVkzQY=",
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "alexk",
|
||||
"encrypted_password": null,
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "neon \"new\"",
|
||||
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972",
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "bar",
|
||||
"encrypted_password": "9b1d16b78004bbd51fa06af9eda75972"
|
||||
},
|
||||
{
|
||||
"name": "\"name\";\\n select 1;",
|
||||
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
|
||||
},
|
||||
{
|
||||
"name": "MyRole",
|
||||
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
|
||||
}
|
||||
],
|
||||
"databases": [
|
||||
{
|
||||
"name": "DB2",
|
||||
"owner": "alexk",
|
||||
"options": [
|
||||
{
|
||||
"name": "LC_COLLATE",
|
||||
"value": "C",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "LC_CTYPE",
|
||||
"value": "C",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "TEMPLATE",
|
||||
"value": "template0",
|
||||
"vartype": "enum"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "neondb",
|
||||
"owner": "testuser",
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "mydb",
|
||||
"owner": "MyRole"
|
||||
},
|
||||
{
|
||||
"name": "foo",
|
||||
"owner": "bar"
|
||||
}
|
||||
]
|
||||
},
|
||||
"delta_operations": [
|
||||
{
|
||||
"action": "delete_db",
|
||||
"name": "neon_test"
|
||||
},
|
||||
{
|
||||
"action": "rename_db",
|
||||
"name": "DB",
|
||||
"new_name": "DB2"
|
||||
},
|
||||
{
|
||||
"action": "delete_role",
|
||||
"name": "neon2"
|
||||
},
|
||||
{
|
||||
"action": "rename_role",
|
||||
"name": "neon new",
|
||||
"new_name": "neon \"new\""
|
||||
}
|
||||
],
|
||||
"format_version": 1,
|
||||
"operation_uuid": "73c843c3-46dd-496f-b819-e6c5a190f584",
|
||||
"timestamp": "2023-03-25T21:36:16.729366596Z",
|
||||
"storage_auth_token": "dummy",
|
||||
"startup_tracing_context": {
|
||||
"traceparent": "00-1b79dca0e798ee42961cd13990326551-5e0222e8d7314785-01"
|
||||
}
|
||||
}
|
||||
@@ -114,7 +114,7 @@ class NeonCompare(PgCompare):
|
||||
self.timeline = self.env.neon_cli.create_timeline(branch_name, tenant_id=self.tenant)
|
||||
|
||||
# Start pg
|
||||
self._pg = self.env.postgres.create_start(branch_name, "main", self.tenant)
|
||||
self._pg = self.env.endpoints.create_start(branch_name, "main", self.tenant)
|
||||
|
||||
@property
|
||||
def pg(self) -> PgProtocol:
|
||||
|
||||
@@ -830,7 +830,7 @@ class NeonEnvBuilder:
|
||||
# Stop all the nodes.
|
||||
if self.env:
|
||||
log.info("Cleaning up all storage and compute nodes")
|
||||
self.env.postgres.stop_all()
|
||||
self.env.endpoints.stop_all()
|
||||
for sk in self.env.safekeepers:
|
||||
sk.stop(immediate=True)
|
||||
self.env.pageserver.stop(immediate=True)
|
||||
@@ -894,7 +894,7 @@ class NeonEnv:
|
||||
self.port_distributor = config.port_distributor
|
||||
self.s3_mock_server = config.mock_s3_server
|
||||
self.neon_cli = NeonCli(env=self)
|
||||
self.postgres = PostgresFactory(self)
|
||||
self.endpoints = EndpointFactory(self)
|
||||
self.safekeepers: List[Safekeeper] = []
|
||||
self.broker = config.broker
|
||||
self.remote_storage = config.remote_storage
|
||||
@@ -902,6 +902,7 @@ class NeonEnv:
|
||||
self.pg_version = config.pg_version
|
||||
self.neon_binpath = config.neon_binpath
|
||||
self.pg_distrib_dir = config.pg_distrib_dir
|
||||
self.endpoint_counter = 0
|
||||
|
||||
# generate initial tenant ID here instead of letting 'neon init' generate it,
|
||||
# so that we don't need to dig it out of the config file afterwards.
|
||||
@@ -1015,6 +1016,13 @@ class NeonEnv:
|
||||
priv = (Path(self.repo_dir) / "auth_private_key.pem").read_text()
|
||||
return AuthKeys(pub=pub, priv=priv)
|
||||
|
||||
def generate_endpoint_id(self) -> str:
|
||||
"""
|
||||
Generate a unique endpoint ID
|
||||
"""
|
||||
self.endpoint_counter += 1
|
||||
return "ep-" + str(self.endpoint_counter)
|
||||
|
||||
|
||||
@pytest.fixture(scope=shareable_scope)
|
||||
def _shared_simple_env(
|
||||
@@ -1073,7 +1081,7 @@ def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:
|
||||
"""
|
||||
yield _shared_simple_env
|
||||
|
||||
_shared_simple_env.postgres.stop_all()
|
||||
_shared_simple_env.endpoints.stop_all()
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
@@ -1097,7 +1105,7 @@ def neon_env_builder(
|
||||
neon_env_builder.init_start().
|
||||
|
||||
After the initialization, you can launch compute nodes by calling
|
||||
the functions in the 'env.postgres' factory object, stop/start the
|
||||
the functions in the 'env.endpoints' factory object, stop/start the
|
||||
nodes, etc.
|
||||
"""
|
||||
|
||||
@@ -1438,16 +1446,17 @@ class NeonCli(AbstractNeonCli):
|
||||
args.extend(["-m", "immediate"])
|
||||
return self.raw_cli(args)
|
||||
|
||||
def pg_create(
|
||||
def endpoint_create(
|
||||
self,
|
||||
branch_name: str,
|
||||
node_name: Optional[str] = None,
|
||||
pg_port: int,
|
||||
http_port: int,
|
||||
endpoint_id: Optional[str] = None,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
lsn: Optional[Lsn] = None,
|
||||
port: Optional[int] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = [
|
||||
"pg",
|
||||
"endpoint",
|
||||
"create",
|
||||
"--tenant-id",
|
||||
str(tenant_id or self.env.initial_tenant),
|
||||
@@ -1458,24 +1467,28 @@ class NeonCli(AbstractNeonCli):
|
||||
]
|
||||
if lsn is not None:
|
||||
args.extend(["--lsn", str(lsn)])
|
||||
if port is not None:
|
||||
args.extend(["--port", str(port)])
|
||||
if node_name is not None:
|
||||
args.append(node_name)
|
||||
if pg_port is not None:
|
||||
args.extend(["--pg-port", str(pg_port)])
|
||||
if http_port is not None:
|
||||
args.extend(["--http-port", str(http_port)])
|
||||
if endpoint_id is not None:
|
||||
args.append(endpoint_id)
|
||||
|
||||
res = self.raw_cli(args)
|
||||
res.check_returncode()
|
||||
return res
|
||||
|
||||
def pg_start(
|
||||
def endpoint_start(
|
||||
self,
|
||||
node_name: str,
|
||||
endpoint_id: str,
|
||||
pg_port: int,
|
||||
http_port: int,
|
||||
safekeepers: Optional[List[int]] = None,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
lsn: Optional[Lsn] = None,
|
||||
port: Optional[int] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = [
|
||||
"pg",
|
||||
"endpoint",
|
||||
"start",
|
||||
"--tenant-id",
|
||||
str(tenant_id or self.env.initial_tenant),
|
||||
@@ -1484,32 +1497,34 @@ class NeonCli(AbstractNeonCli):
|
||||
]
|
||||
if lsn is not None:
|
||||
args.append(f"--lsn={lsn}")
|
||||
if port is not None:
|
||||
args.append(f"--port={port}")
|
||||
if node_name is not None:
|
||||
args.append(node_name)
|
||||
args.extend(["--pg-port", str(pg_port)])
|
||||
args.extend(["--http-port", str(http_port)])
|
||||
if safekeepers is not None:
|
||||
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
|
||||
if endpoint_id is not None:
|
||||
args.append(endpoint_id)
|
||||
|
||||
res = self.raw_cli(args)
|
||||
res.check_returncode()
|
||||
return res
|
||||
|
||||
def pg_stop(
|
||||
def endpoint_stop(
|
||||
self,
|
||||
node_name: str,
|
||||
endpoint_id: str,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
destroy=False,
|
||||
check_return_code=True,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
args = [
|
||||
"pg",
|
||||
"endpoint",
|
||||
"stop",
|
||||
"--tenant-id",
|
||||
str(tenant_id or self.env.initial_tenant),
|
||||
]
|
||||
if destroy:
|
||||
args.append("--destroy")
|
||||
if node_name is not None:
|
||||
args.append(node_name)
|
||||
if endpoint_id is not None:
|
||||
args.append(endpoint_id)
|
||||
|
||||
return self.raw_cli(args, check_return_code=check_return_code)
|
||||
|
||||
@@ -2167,42 +2182,57 @@ def static_proxy(
|
||||
yield proxy
|
||||
|
||||
|
||||
class Postgres(PgProtocol):
|
||||
"""An object representing a running postgres daemon."""
|
||||
class Endpoint(PgProtocol):
|
||||
"""An object representing a Postgres compute endpoint managed by the control plane."""
|
||||
|
||||
def __init__(
|
||||
self, env: NeonEnv, tenant_id: TenantId, port: int, check_stop_result: bool = True
|
||||
self,
|
||||
env: NeonEnv,
|
||||
tenant_id: TenantId,
|
||||
pg_port: int,
|
||||
http_port: int,
|
||||
check_stop_result: bool = True,
|
||||
):
|
||||
super().__init__(host="localhost", port=port, user="cloud_admin", dbname="postgres")
|
||||
super().__init__(host="localhost", port=pg_port, user="cloud_admin", dbname="postgres")
|
||||
self.env = env
|
||||
self.running = False
|
||||
self.node_name: Optional[str] = None # dubious, see asserts below
|
||||
self.endpoint_id: Optional[str] = None # dubious, see asserts below
|
||||
self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA
|
||||
self.tenant_id = tenant_id
|
||||
self.port = port
|
||||
self.pg_port = pg_port
|
||||
self.http_port = http_port
|
||||
self.check_stop_result = check_stop_result
|
||||
# path to conf is <repo_dir>/pgdatadirs/tenants/<tenant_id>/<node_name>/postgresql.conf
|
||||
self.active_safekeepers: List[int] = map(lambda sk: sk.id, env.safekeepers)
|
||||
# path to conf is <repo_dir>/endpoints/<endpoint_id>/pgdata/postgresql.conf
|
||||
|
||||
def create(
|
||||
self,
|
||||
branch_name: str,
|
||||
node_name: Optional[str] = None,
|
||||
endpoint_id: Optional[str] = None,
|
||||
lsn: Optional[Lsn] = None,
|
||||
config_lines: Optional[List[str]] = None,
|
||||
) -> "Postgres":
|
||||
) -> "Endpoint":
|
||||
"""
|
||||
Create the pg data directory.
|
||||
Create a new Postgres endpoint.
|
||||
Returns self.
|
||||
"""
|
||||
|
||||
if not config_lines:
|
||||
config_lines = []
|
||||
|
||||
self.node_name = node_name or f"{branch_name}_pg_node"
|
||||
self.env.neon_cli.pg_create(
|
||||
branch_name, node_name=self.node_name, tenant_id=self.tenant_id, lsn=lsn, port=self.port
|
||||
if endpoint_id is None:
|
||||
endpoint_id = self.env.generate_endpoint_id()
|
||||
self.endpoint_id = endpoint_id
|
||||
|
||||
self.env.neon_cli.endpoint_create(
|
||||
branch_name,
|
||||
endpoint_id=self.endpoint_id,
|
||||
tenant_id=self.tenant_id,
|
||||
lsn=lsn,
|
||||
pg_port=self.pg_port,
|
||||
http_port=self.http_port,
|
||||
)
|
||||
path = Path("pgdatadirs") / "tenants" / str(self.tenant_id) / self.node_name
|
||||
path = Path("endpoints") / self.endpoint_id / "pgdata"
|
||||
self.pgdata_dir = os.path.join(self.env.repo_dir, path)
|
||||
|
||||
if config_lines is None:
|
||||
@@ -2215,26 +2245,36 @@ class Postgres(PgProtocol):
|
||||
|
||||
return self
|
||||
|
||||
def start(self) -> "Postgres":
|
||||
def start(self) -> "Endpoint":
|
||||
"""
|
||||
Start the Postgres instance.
|
||||
Returns self.
|
||||
"""
|
||||
|
||||
assert self.node_name is not None
|
||||
assert self.endpoint_id is not None
|
||||
|
||||
log.info(f"Starting postgres node {self.node_name}")
|
||||
log.info(f"Starting postgres endpoint {self.endpoint_id}")
|
||||
|
||||
self.env.neon_cli.pg_start(self.node_name, tenant_id=self.tenant_id, port=self.port)
|
||||
self.env.neon_cli.endpoint_start(
|
||||
self.endpoint_id,
|
||||
pg_port=self.pg_port,
|
||||
http_port=self.http_port,
|
||||
tenant_id=self.tenant_id,
|
||||
safekeepers=self.active_safekeepers,
|
||||
)
|
||||
self.running = True
|
||||
|
||||
return self
|
||||
|
||||
def endpoint_path(self) -> Path:
|
||||
"""Path to endpoint directory"""
|
||||
assert self.endpoint_id
|
||||
path = Path("endpoints") / self.endpoint_id
|
||||
return self.env.repo_dir / path
|
||||
|
||||
def pg_data_dir_path(self) -> str:
|
||||
"""Path to data directory"""
|
||||
assert self.node_name
|
||||
path = Path("pgdatadirs") / "tenants" / str(self.tenant_id) / self.node_name
|
||||
return os.path.join(self.env.repo_dir, path)
|
||||
"""Path to Postgres data directory"""
|
||||
return os.path.join(self.endpoint_path(), "pgdata")
|
||||
|
||||
def pg_xact_dir_path(self) -> str:
|
||||
"""Path to pg_xact dir"""
|
||||
@@ -2246,33 +2286,10 @@ class Postgres(PgProtocol):
|
||||
|
||||
def config_file_path(self) -> str:
|
||||
"""Path to postgresql.conf"""
|
||||
return os.path.join(self.pg_data_dir_path(), "postgresql.conf")
|
||||
path = Path("endpoints") / self.endpoint_id / "postgresql.conf"
|
||||
return os.path.join(self.env.repo_dir, path)
|
||||
|
||||
def adjust_for_safekeepers(self, safekeepers: str) -> "Postgres":
|
||||
"""
|
||||
Adjust instance config for working with wal acceptors instead of
|
||||
pageserver (pre-configured by CLI) directly.
|
||||
"""
|
||||
|
||||
# TODO: reuse config()
|
||||
with open(self.config_file_path(), "r") as f:
|
||||
cfg_lines = f.readlines()
|
||||
with open(self.config_file_path(), "w") as f:
|
||||
for cfg_line in cfg_lines:
|
||||
# walproposer uses different application_name
|
||||
if (
|
||||
"synchronous_standby_names" in cfg_line
|
||||
or
|
||||
# don't repeat safekeepers/wal_acceptors multiple times
|
||||
"neon.safekeepers" in cfg_line
|
||||
):
|
||||
continue
|
||||
f.write(cfg_line)
|
||||
f.write("synchronous_standby_names = 'walproposer'\n")
|
||||
f.write("neon.safekeepers = '{}'\n".format(safekeepers))
|
||||
return self
|
||||
|
||||
def config(self, lines: List[str]) -> "Postgres":
|
||||
def config(self, lines: List[str]) -> "Endpoint":
|
||||
"""
|
||||
Add lines to postgresql.conf.
|
||||
Lines should be an array of valid postgresql.conf rows.
|
||||
@@ -2286,32 +2303,32 @@ class Postgres(PgProtocol):
|
||||
|
||||
return self
|
||||
|
||||
def stop(self) -> "Postgres":
|
||||
def stop(self) -> "Endpoint":
|
||||
"""
|
||||
Stop the Postgres instance if it's running.
|
||||
Returns self.
|
||||
"""
|
||||
|
||||
if self.running:
|
||||
assert self.node_name is not None
|
||||
self.env.neon_cli.pg_stop(
|
||||
self.node_name, self.tenant_id, check_return_code=self.check_stop_result
|
||||
assert self.endpoint_id is not None
|
||||
self.env.neon_cli.endpoint_stop(
|
||||
self.endpoint_id, self.tenant_id, check_return_code=self.check_stop_result
|
||||
)
|
||||
self.running = False
|
||||
|
||||
return self
|
||||
|
||||
def stop_and_destroy(self) -> "Postgres":
|
||||
def stop_and_destroy(self) -> "Endpoint":
|
||||
"""
|
||||
Stop the Postgres instance, then destroy it.
|
||||
Stop the Postgres instance, then destroy the endpoint.
|
||||
Returns self.
|
||||
"""
|
||||
|
||||
assert self.node_name is not None
|
||||
self.env.neon_cli.pg_stop(
|
||||
self.node_name, self.tenant_id, True, check_return_code=self.check_stop_result
|
||||
assert self.endpoint_id is not None
|
||||
self.env.neon_cli.endpoint_stop(
|
||||
self.endpoint_id, self.tenant_id, True, check_return_code=self.check_stop_result
|
||||
)
|
||||
self.node_name = None
|
||||
self.endpoint_id = None
|
||||
self.running = False
|
||||
|
||||
return self
|
||||
@@ -2319,13 +2336,12 @@ class Postgres(PgProtocol):
|
||||
def create_start(
|
||||
self,
|
||||
branch_name: str,
|
||||
node_name: Optional[str] = None,
|
||||
endpoint_id: Optional[str] = None,
|
||||
lsn: Optional[Lsn] = None,
|
||||
config_lines: Optional[List[str]] = None,
|
||||
) -> "Postgres":
|
||||
) -> "Endpoint":
|
||||
"""
|
||||
Create a Postgres instance, apply config
|
||||
and then start it.
|
||||
Create an endpoint, apply config, and start Postgres.
|
||||
Returns self.
|
||||
"""
|
||||
|
||||
@@ -2333,7 +2349,7 @@ class Postgres(PgProtocol):
|
||||
|
||||
self.create(
|
||||
branch_name=branch_name,
|
||||
node_name=node_name,
|
||||
endpoint_id=endpoint_id,
|
||||
config_lines=config_lines,
|
||||
lsn=lsn,
|
||||
).start()
|
||||
@@ -2342,7 +2358,7 @@ class Postgres(PgProtocol):
|
||||
|
||||
return self
|
||||
|
||||
def __enter__(self) -> "Postgres":
|
||||
def __enter__(self) -> "Endpoint":
|
||||
return self
|
||||
|
||||
def __exit__(
|
||||
@@ -2354,33 +2370,34 @@ class Postgres(PgProtocol):
|
||||
self.stop()
|
||||
|
||||
|
||||
class PostgresFactory:
|
||||
"""An object representing multiple running postgres daemons."""
|
||||
class EndpointFactory:
|
||||
"""An object representing multiple compute endpoints."""
|
||||
|
||||
def __init__(self, env: NeonEnv):
|
||||
self.env = env
|
||||
self.num_instances: int = 0
|
||||
self.instances: List[Postgres] = []
|
||||
self.endpoints: List[Endpoint] = []
|
||||
|
||||
def create_start(
|
||||
self,
|
||||
branch_name: str,
|
||||
node_name: Optional[str] = None,
|
||||
endpoint_id: Optional[str] = None,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
lsn: Optional[Lsn] = None,
|
||||
config_lines: Optional[List[str]] = None,
|
||||
) -> Postgres:
|
||||
pg = Postgres(
|
||||
) -> Endpoint:
|
||||
ep = Endpoint(
|
||||
self.env,
|
||||
tenant_id=tenant_id or self.env.initial_tenant,
|
||||
port=self.env.port_distributor.get_port(),
|
||||
pg_port=self.env.port_distributor.get_port(),
|
||||
http_port=self.env.port_distributor.get_port(),
|
||||
)
|
||||
self.num_instances += 1
|
||||
self.instances.append(pg)
|
||||
self.endpoints.append(ep)
|
||||
|
||||
return pg.create_start(
|
||||
return ep.create_start(
|
||||
branch_name=branch_name,
|
||||
node_name=node_name,
|
||||
endpoint_id=endpoint_id,
|
||||
config_lines=config_lines,
|
||||
lsn=lsn,
|
||||
)
|
||||
@@ -2388,30 +2405,34 @@ class PostgresFactory:
|
||||
def create(
|
||||
self,
|
||||
branch_name: str,
|
||||
node_name: Optional[str] = None,
|
||||
endpoint_id: Optional[str] = None,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
lsn: Optional[Lsn] = None,
|
||||
config_lines: Optional[List[str]] = None,
|
||||
) -> Postgres:
|
||||
pg = Postgres(
|
||||
) -> Endpoint:
|
||||
ep = Endpoint(
|
||||
self.env,
|
||||
tenant_id=tenant_id or self.env.initial_tenant,
|
||||
port=self.env.port_distributor.get_port(),
|
||||
pg_port=self.env.port_distributor.get_port(),
|
||||
http_port=self.env.port_distributor.get_port(),
|
||||
)
|
||||
|
||||
self.num_instances += 1
|
||||
self.instances.append(pg)
|
||||
if endpoint_id is None:
|
||||
endpoint_id = self.env.generate_endpoint_id()
|
||||
|
||||
return pg.create(
|
||||
self.num_instances += 1
|
||||
self.endpoints.append(ep)
|
||||
|
||||
return ep.create(
|
||||
branch_name=branch_name,
|
||||
node_name=node_name,
|
||||
endpoint_id=endpoint_id,
|
||||
lsn=lsn,
|
||||
config_lines=config_lines,
|
||||
)
|
||||
|
||||
def stop_all(self) -> "PostgresFactory":
|
||||
for pg in self.instances:
|
||||
pg.stop()
|
||||
def stop_all(self) -> "EndpointFactory":
|
||||
for ep in self.endpoints:
|
||||
ep.stop()
|
||||
|
||||
return self
|
||||
|
||||
@@ -2786,16 +2807,16 @@ def list_files_to_compare(pgdata_dir: Path) -> List[str]:
|
||||
def check_restored_datadir_content(
|
||||
test_output_dir: Path,
|
||||
env: NeonEnv,
|
||||
pg: Postgres,
|
||||
endpoint: Endpoint,
|
||||
):
|
||||
# Get the timeline ID. We need it for the 'basebackup' command
|
||||
timeline = TimelineId(pg.safe_psql("SHOW neon.timeline_id")[0][0])
|
||||
timeline = TimelineId(endpoint.safe_psql("SHOW neon.timeline_id")[0][0])
|
||||
|
||||
# stop postgres to ensure that files won't change
|
||||
pg.stop()
|
||||
endpoint.stop()
|
||||
|
||||
# Take a basebackup from pageserver
|
||||
restored_dir_path = env.repo_dir / f"{pg.node_name}_restored_datadir"
|
||||
restored_dir_path = env.repo_dir / f"{endpoint.endpoint_id}_restored_datadir"
|
||||
restored_dir_path.mkdir(exist_ok=True)
|
||||
|
||||
pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version)
|
||||
@@ -2805,7 +2826,7 @@ def check_restored_datadir_content(
|
||||
{psql_path} \
|
||||
--no-psqlrc \
|
||||
postgres://localhost:{env.pageserver.service_port.pg} \
|
||||
-c 'basebackup {pg.tenant_id} {timeline}' \
|
||||
-c 'basebackup {endpoint.tenant_id} {timeline}' \
|
||||
| tar -x -C {restored_dir_path}
|
||||
"""
|
||||
|
||||
@@ -2822,8 +2843,8 @@ def check_restored_datadir_content(
|
||||
assert result.returncode == 0
|
||||
|
||||
# list files we're going to compare
|
||||
assert pg.pgdata_dir
|
||||
pgdata_files = list_files_to_compare(Path(pg.pgdata_dir))
|
||||
assert endpoint.pgdata_dir
|
||||
pgdata_files = list_files_to_compare(Path(endpoint.pgdata_dir))
|
||||
restored_files = list_files_to_compare(restored_dir_path)
|
||||
|
||||
# check that file sets are equal
|
||||
@@ -2834,12 +2855,12 @@ def check_restored_datadir_content(
|
||||
# We've already filtered all mismatching files in list_files_to_compare(),
|
||||
# so here expect that the content is identical
|
||||
(match, mismatch, error) = filecmp.cmpfiles(
|
||||
pg.pgdata_dir, restored_dir_path, pgdata_files, shallow=False
|
||||
endpoint.pgdata_dir, restored_dir_path, pgdata_files, shallow=False
|
||||
)
|
||||
log.info(f"filecmp result mismatch and error lists:\n\t mismatch={mismatch}\n\t error={error}")
|
||||
|
||||
for f in mismatch:
|
||||
f1 = os.path.join(pg.pgdata_dir, f)
|
||||
f1 = os.path.join(endpoint.pgdata_dir, f)
|
||||
f2 = os.path.join(restored_dir_path, f)
|
||||
stdout_filename = "{}.filediff".format(f2)
|
||||
|
||||
@@ -2854,24 +2875,24 @@ def check_restored_datadir_content(
|
||||
|
||||
|
||||
def wait_for_last_flush_lsn(
|
||||
env: NeonEnv, pg: Postgres, tenant: TenantId, timeline: TimelineId
|
||||
env: NeonEnv, endpoint: Endpoint, tenant: TenantId, timeline: TimelineId
|
||||
) -> Lsn:
|
||||
"""Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn."""
|
||||
last_flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn)
|
||||
|
||||
|
||||
def wait_for_wal_insert_lsn(
|
||||
env: NeonEnv, pg: Postgres, tenant: TenantId, timeline: TimelineId
|
||||
env: NeonEnv, endpoint: Endpoint, tenant: TenantId, timeline: TimelineId
|
||||
) -> Lsn:
|
||||
"""Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn."""
|
||||
last_flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_insert_lsn()")[0][0])
|
||||
last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_insert_lsn()")[0][0])
|
||||
return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn)
|
||||
|
||||
|
||||
def fork_at_current_lsn(
|
||||
env: NeonEnv,
|
||||
pg: Postgres,
|
||||
endpoint: Endpoint,
|
||||
new_branch_name: str,
|
||||
ancestor_branch_name: str,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
@@ -2881,7 +2902,7 @@ def fork_at_current_lsn(
|
||||
The "last LSN" is taken from the given Postgres instance. The pageserver will wait for all the
|
||||
the WAL up to that LSN to arrive in the pageserver before creating the branch.
|
||||
"""
|
||||
current_lsn = pg.safe_psql("SELECT pg_current_wal_lsn()")[0][0]
|
||||
current_lsn = endpoint.safe_psql("SELECT pg_current_wal_lsn()")[0][0]
|
||||
return env.neon_cli.create_branch(new_branch_name, ancestor_branch_name, tenant_id, current_lsn)
|
||||
|
||||
|
||||
|
||||
@@ -52,13 +52,13 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
|
||||
def run_pgbench(branch: str):
|
||||
log.info(f"Start a pgbench workload on branch {branch}")
|
||||
|
||||
pg = env.postgres.create_start(branch, tenant_id=tenant)
|
||||
connstr = pg.connstr()
|
||||
endpoint = env.endpoints.create_start(branch, tenant_id=tenant)
|
||||
connstr = endpoint.connstr()
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", connstr])
|
||||
pg_bin.run_capture(["pgbench", "-c10", "-T10", connstr])
|
||||
|
||||
pg.stop()
|
||||
endpoint.stop()
|
||||
|
||||
env.neon_cli.create_branch("b0", tenant_id=tenant)
|
||||
|
||||
@@ -96,8 +96,8 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int):
|
||||
|
||||
env.neon_cli.create_branch("b0")
|
||||
|
||||
pg = env.postgres.create_start("b0")
|
||||
neon_compare.pg_bin.run_capture(["pgbench", "-i", "-s10", pg.connstr()])
|
||||
endpoint = env.endpoints.create_start("b0")
|
||||
neon_compare.pg_bin.run_capture(["pgbench", "-i", "-s10", endpoint.connstr()])
|
||||
|
||||
branch_creation_durations = []
|
||||
|
||||
@@ -124,15 +124,15 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare):
|
||||
|
||||
timeline_id = env.neon_cli.create_branch("root")
|
||||
|
||||
pg = env.postgres.create_start("root")
|
||||
with closing(pg.connect()) as conn:
|
||||
endpoint = env.endpoints.create_start("root")
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
for i in range(10000):
|
||||
cur.execute(f"CREATE TABLE t{i} as SELECT g FROM generate_series(1, 1000) g")
|
||||
|
||||
# Wait for the pageserver to finish processing all the pending WALs,
|
||||
# as we don't want the LSN wait time to be included during the branch creation
|
||||
flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
wait_for_last_record_lsn(
|
||||
env.pageserver.http_client(), env.initial_tenant, timeline_id, flush_lsn
|
||||
)
|
||||
@@ -142,7 +142,7 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare):
|
||||
|
||||
# run a concurrent insertion to make the ancestor "busy" during the branch creation
|
||||
thread = threading.Thread(
|
||||
target=pg.safe_psql, args=("INSERT INTO t0 VALUES (generate_series(1, 100000))",)
|
||||
target=endpoint.safe_psql, args=("INSERT INTO t0 VALUES (generate_series(1, 100000))",)
|
||||
)
|
||||
thread.start()
|
||||
|
||||
|
||||
@@ -42,41 +42,41 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare):
|
||||
neon_compare.zenbenchmark.record_pg_bench_result(branch, res)
|
||||
|
||||
env.neon_cli.create_branch("root")
|
||||
pg_root = env.postgres.create_start("root")
|
||||
pg_bin.run_capture(["pgbench", "-i", pg_root.connstr(), "-s10"])
|
||||
endpoint_root = env.endpoints.create_start("root")
|
||||
pg_bin.run_capture(["pgbench", "-i", endpoint_root.connstr(), "-s10"])
|
||||
|
||||
fork_at_current_lsn(env, pg_root, "child", "root")
|
||||
fork_at_current_lsn(env, endpoint_root, "child", "root")
|
||||
|
||||
pg_child = env.postgres.create_start("child")
|
||||
endpoint_child = env.endpoints.create_start("child")
|
||||
|
||||
run_pgbench_on_branch("root", ["pgbench", "-c10", "-T10", pg_root.connstr()])
|
||||
run_pgbench_on_branch("child", ["pgbench", "-c10", "-T10", pg_child.connstr()])
|
||||
run_pgbench_on_branch("root", ["pgbench", "-c10", "-T10", endpoint_root.connstr()])
|
||||
run_pgbench_on_branch("child", ["pgbench", "-c10", "-T10", endpoint_child.connstr()])
|
||||
|
||||
|
||||
def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):
|
||||
env = neon_compare.env
|
||||
env.neon_cli.create_branch("root")
|
||||
pg_root = env.postgres.create_start("root")
|
||||
endpoint_root = env.endpoints.create_start("root")
|
||||
|
||||
pg_root.safe_psql(
|
||||
endpoint_root.safe_psql(
|
||||
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
|
||||
)
|
||||
|
||||
env.neon_cli.create_branch("child", "root")
|
||||
pg_child = env.postgres.create_start("child")
|
||||
endpoint_child = env.endpoints.create_start("child")
|
||||
|
||||
with neon_compare.record_duration("root_run_duration"):
|
||||
pg_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
|
||||
endpoint_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
|
||||
with neon_compare.record_duration("child_run_duration"):
|
||||
pg_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
|
||||
endpoint_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
|
||||
|
||||
|
||||
def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
|
||||
env = neon_compare.env
|
||||
env.neon_cli.create_branch("root")
|
||||
pg_root = env.postgres.create_start("root")
|
||||
endpoint_root = env.endpoints.create_start("root")
|
||||
|
||||
pg_root.safe_psql_many(
|
||||
endpoint_root.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
|
||||
"INSERT INTO foo SELECT FROM generate_series(1,1000000)",
|
||||
@@ -84,12 +84,12 @@ def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
|
||||
)
|
||||
|
||||
env.neon_cli.create_branch("child", "root")
|
||||
pg_child = env.postgres.create_start("child")
|
||||
endpoint_child = env.endpoints.create_start("child")
|
||||
|
||||
with neon_compare.record_duration("root_run_duration"):
|
||||
pg_root.safe_psql("SELECT count(*) from foo")
|
||||
endpoint_root.safe_psql("SELECT count(*) from foo")
|
||||
with neon_compare.record_duration("child_run_duration"):
|
||||
pg_child.safe_psql("SELECT count(*) from foo")
|
||||
endpoint_child.safe_psql("SELECT count(*) from foo")
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
@@ -35,14 +35,14 @@ def test_bulk_tenant_create(
|
||||
# if use_safekeepers == 'with_sa':
|
||||
# wa_factory.start_n_new(3)
|
||||
|
||||
pg_tenant = env.postgres.create_start(
|
||||
endpoint_tenant = env.endpoints.create_start(
|
||||
f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant
|
||||
)
|
||||
|
||||
end = timeit.default_timer()
|
||||
time_slices.append(end - start)
|
||||
|
||||
pg_tenant.stop()
|
||||
endpoint_tenant.stop()
|
||||
|
||||
zenbenchmark.record(
|
||||
"tenant_creation_time",
|
||||
|
||||
@@ -18,8 +18,8 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
|
||||
|
||||
timeline_id = env.neon_cli.create_branch("test_bulk_update")
|
||||
tenant_id = env.initial_tenant
|
||||
pg = env.postgres.create_start("test_bulk_update")
|
||||
cur = pg.connect().cursor()
|
||||
endpoint = env.endpoints.create_start("test_bulk_update")
|
||||
cur = endpoint.connect().cursor()
|
||||
cur.execute("set statement_timeout=0")
|
||||
|
||||
cur.execute(f"create table t(x integer) WITH (fillfactor={fillfactor})")
|
||||
@@ -28,13 +28,13 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
|
||||
cur.execute(f"insert into t values (generate_series(1,{n_records}))")
|
||||
|
||||
cur.execute("vacuum t")
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
with zenbenchmark.record_duration("update-no-prefetch"):
|
||||
cur.execute("update t set x=x+1")
|
||||
|
||||
cur.execute("vacuum t")
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
with zenbenchmark.record_duration("delete-no-prefetch"):
|
||||
cur.execute("delete from t")
|
||||
@@ -50,13 +50,13 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
|
||||
cur.execute(f"insert into t2 values (generate_series(1,{n_records}))")
|
||||
|
||||
cur.execute("vacuum t2")
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
with zenbenchmark.record_duration("update-with-prefetch"):
|
||||
cur.execute("update t2 set x=x+1")
|
||||
|
||||
cur.execute("vacuum t2")
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
with zenbenchmark.record_duration("delete-with-prefetch"):
|
||||
cur.execute("delete from t2")
|
||||
|
||||
@@ -33,11 +33,11 @@ def test_compaction(neon_compare: NeonCompare):
|
||||
|
||||
# Create some tables, and run a bunch of INSERTs and UPDATes on them,
|
||||
# to generate WAL and layers
|
||||
pg = env.postgres.create_start(
|
||||
endpoint = env.endpoints.create_start(
|
||||
"main", tenant_id=tenant_id, config_lines=["shared_buffers=512MB"]
|
||||
)
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
for i in range(100):
|
||||
cur.execute(f"create table tbl{i} (i int, j int);")
|
||||
@@ -45,7 +45,7 @@ def test_compaction(neon_compare: NeonCompare):
|
||||
for j in range(100):
|
||||
cur.execute(f"update tbl{i} set j = {j};")
|
||||
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
# First compaction generates L1 layers
|
||||
with neon_compare.zenbenchmark.record_duration("compaction"):
|
||||
|
||||
@@ -2,13 +2,13 @@ import threading
|
||||
|
||||
import pytest
|
||||
from fixtures.compare_fixtures import PgCompare
|
||||
from fixtures.neon_fixtures import Postgres
|
||||
from fixtures.neon_fixtures import PgProtocol
|
||||
|
||||
from performance.test_perf_pgbench import get_scales_matrix
|
||||
from performance.test_wal_backpressure import record_read_latency
|
||||
|
||||
|
||||
def start_write_workload(pg: Postgres, scale: int = 10):
|
||||
def start_write_workload(pg: PgProtocol, scale: int = 10):
|
||||
with pg.connect().cursor() as cur:
|
||||
cur.execute(f"create table big as select generate_series(1,{scale*100_000})")
|
||||
|
||||
|
||||
@@ -25,8 +25,8 @@ def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark):
|
||||
)
|
||||
|
||||
env.neon_cli.create_timeline("test_layer_map", tenant_id=tenant)
|
||||
pg = env.postgres.create_start("test_layer_map", tenant_id=tenant)
|
||||
cur = pg.connect().cursor()
|
||||
endpoint = env.endpoints.create_start("test_layer_map", tenant_id=tenant)
|
||||
cur = endpoint.connect().cursor()
|
||||
cur.execute("create table t(x integer)")
|
||||
for i in range(n_iters):
|
||||
cur.execute(f"insert into t values (generate_series(1,{n_records}))")
|
||||
|
||||
@@ -14,19 +14,19 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker
|
||||
# Start
|
||||
env.neon_cli.create_branch("test_startup")
|
||||
with zenbenchmark.record_duration("startup_time"):
|
||||
pg = env.postgres.create_start("test_startup")
|
||||
pg.safe_psql("select 1;")
|
||||
endpoint = env.endpoints.create_start("test_startup")
|
||||
endpoint.safe_psql("select 1;")
|
||||
|
||||
# Restart
|
||||
pg.stop_and_destroy()
|
||||
endpoint.stop_and_destroy()
|
||||
with zenbenchmark.record_duration("restart_time"):
|
||||
pg.create_start("test_startup")
|
||||
pg.safe_psql("select 1;")
|
||||
endpoint.create_start("test_startup")
|
||||
endpoint.safe_psql("select 1;")
|
||||
|
||||
# Fill up
|
||||
num_rows = 1000000 # 30 MB
|
||||
num_tables = 100
|
||||
with closing(pg.connect()) as conn:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
for i in range(num_tables):
|
||||
cur.execute(f"create table t_{i} (i integer);")
|
||||
@@ -34,18 +34,18 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker
|
||||
|
||||
# Read
|
||||
with zenbenchmark.record_duration("read_time"):
|
||||
pg.safe_psql("select * from t_0;")
|
||||
endpoint.safe_psql("select * from t_0;")
|
||||
|
||||
# Read again
|
||||
with zenbenchmark.record_duration("second_read_time"):
|
||||
pg.safe_psql("select * from t_0;")
|
||||
endpoint.safe_psql("select * from t_0;")
|
||||
|
||||
# Restart
|
||||
pg.stop_and_destroy()
|
||||
endpoint.stop_and_destroy()
|
||||
with zenbenchmark.record_duration("restart_with_data"):
|
||||
pg.create_start("test_startup")
|
||||
pg.safe_psql("select 1;")
|
||||
endpoint.create_start("test_startup")
|
||||
endpoint.safe_psql("select 1;")
|
||||
|
||||
# Read
|
||||
with zenbenchmark.record_duration("read_after_restart"):
|
||||
pg.safe_psql("select * from t_0;")
|
||||
endpoint.safe_psql("select * from t_0;")
|
||||
|
||||
@@ -22,8 +22,8 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
pageserver_http.configure_failpoints(("flush-frozen-before-sync", "sleep(10000)"))
|
||||
|
||||
pg_branch0 = env.postgres.create_start("main", tenant_id=tenant)
|
||||
branch0_cur = pg_branch0.connect().cursor()
|
||||
endpoint_branch0 = env.endpoints.create_start("main", tenant_id=tenant)
|
||||
branch0_cur = endpoint_branch0.connect().cursor()
|
||||
branch0_timeline = TimelineId(query_scalar(branch0_cur, "SHOW neon.timeline_id"))
|
||||
log.info(f"b0 timeline {branch0_timeline}")
|
||||
|
||||
@@ -44,10 +44,10 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Create branch1.
|
||||
env.neon_cli.create_branch("branch1", "main", tenant_id=tenant, ancestor_start_lsn=lsn_100)
|
||||
pg_branch1 = env.postgres.create_start("branch1", tenant_id=tenant)
|
||||
endpoint_branch1 = env.endpoints.create_start("branch1", tenant_id=tenant)
|
||||
log.info("postgres is running on 'branch1' branch")
|
||||
|
||||
branch1_cur = pg_branch1.connect().cursor()
|
||||
branch1_cur = endpoint_branch1.connect().cursor()
|
||||
branch1_timeline = TimelineId(query_scalar(branch1_cur, "SHOW neon.timeline_id"))
|
||||
log.info(f"b1 timeline {branch1_timeline}")
|
||||
|
||||
@@ -67,9 +67,9 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Create branch2.
|
||||
env.neon_cli.create_branch("branch2", "branch1", tenant_id=tenant, ancestor_start_lsn=lsn_200)
|
||||
pg_branch2 = env.postgres.create_start("branch2", tenant_id=tenant)
|
||||
endpoint_branch2 = env.endpoints.create_start("branch2", tenant_id=tenant)
|
||||
log.info("postgres is running on 'branch2' branch")
|
||||
branch2_cur = pg_branch2.connect().cursor()
|
||||
branch2_cur = endpoint_branch2.connect().cursor()
|
||||
|
||||
branch2_timeline = TimelineId(query_scalar(branch2_cur, "SHOW neon.timeline_id"))
|
||||
log.info(f"b2 timeline {branch2_timeline}")
|
||||
|
||||
@@ -64,9 +64,9 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
branch = "test_compute_auth_to_pageserver"
|
||||
env.neon_cli.create_branch(branch)
|
||||
pg = env.postgres.create_start(branch)
|
||||
endpoint = env.endpoints.create_start(branch)
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
# we rely upon autocommit after each statement
|
||||
# as waiting for acceptors happens there
|
||||
@@ -83,7 +83,7 @@ def test_auth_failures(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
|
||||
|
||||
branch = f"test_auth_failures_auth_enabled_{auth_enabled}"
|
||||
timeline_id = env.neon_cli.create_branch(branch)
|
||||
env.postgres.create_start(branch)
|
||||
env.endpoints.create_start(branch)
|
||||
|
||||
tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant)
|
||||
invalid_tenant_token = env.auth_keys.generate_tenant_token(TenantId.generate())
|
||||
|
||||
@@ -5,7 +5,7 @@ from contextlib import closing, contextmanager
|
||||
import psycopg2.extras
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, Postgres
|
||||
from fixtures.neon_fixtures import Endpoint, NeonEnvBuilder
|
||||
|
||||
pytest_plugins = "fixtures.neon_fixtures"
|
||||
|
||||
@@ -20,10 +20,10 @@ def pg_cur(pg):
|
||||
# Periodically check that all backpressure lags are below the configured threshold,
|
||||
# assert if they are not.
|
||||
# If the check query fails, stop the thread. Main thread should notice that and stop the test.
|
||||
def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interval=5):
|
||||
def check_backpressure(endpoint: Endpoint, stop_event: threading.Event, polling_interval=5):
|
||||
log.info("checks started")
|
||||
|
||||
with pg_cur(pg) as cur:
|
||||
with pg_cur(endpoint) as cur:
|
||||
cur.execute("CREATE EXTENSION neon") # TODO move it to neon_fixtures?
|
||||
|
||||
cur.execute("select pg_size_bytes(current_setting('max_replication_write_lag'))")
|
||||
@@ -41,7 +41,7 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
|
||||
max_replication_apply_lag_bytes = res[0]
|
||||
log.info(f"max_replication_apply_lag: {max_replication_apply_lag_bytes} bytes")
|
||||
|
||||
with pg_cur(pg) as cur:
|
||||
with pg_cur(endpoint) as cur:
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
cur.execute(
|
||||
@@ -102,14 +102,14 @@ def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
|
||||
# Create a branch for us
|
||||
env.neon_cli.create_branch("test_backpressure")
|
||||
|
||||
pg = env.postgres.create_start(
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_backpressure", config_lines=["max_replication_write_lag=30MB"]
|
||||
)
|
||||
log.info("postgres is running on 'test_backpressure' branch")
|
||||
|
||||
# setup check thread
|
||||
check_stop_event = threading.Event()
|
||||
check_thread = threading.Thread(target=check_backpressure, args=(pg, check_stop_event))
|
||||
check_thread = threading.Thread(target=check_backpressure, args=(endpoint, check_stop_event))
|
||||
check_thread.start()
|
||||
|
||||
# Configure failpoint to slow down walreceiver ingest
|
||||
@@ -125,7 +125,7 @@ def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
|
||||
# because of the lag and waiting for lsn to replay to arrive.
|
||||
time.sleep(2)
|
||||
|
||||
with pg_cur(pg) as cur:
|
||||
with pg_cur(endpoint) as cur:
|
||||
# Create and initialize test table
|
||||
cur.execute("CREATE TABLE foo(x bigint)")
|
||||
|
||||
|
||||
@@ -15,4 +15,4 @@ def test_basebackup_error(neon_simple_env: NeonEnv):
|
||||
pageserver_http.configure_failpoints(("basebackup-before-control-file", "return"))
|
||||
|
||||
with pytest.raises(Exception, match="basebackup-before-control-file"):
|
||||
env.postgres.create_start("test_basebackup_error")
|
||||
env.endpoints.create_start("test_basebackup_error")
|
||||
|
||||
@@ -67,9 +67,9 @@ def test_branch_and_gc(neon_simple_env: NeonEnv):
|
||||
)
|
||||
|
||||
timeline_main = env.neon_cli.create_timeline("test_main", tenant_id=tenant)
|
||||
pg_main = env.postgres.create_start("test_main", tenant_id=tenant)
|
||||
endpoint_main = env.endpoints.create_start("test_main", tenant_id=tenant)
|
||||
|
||||
main_cur = pg_main.connect().cursor()
|
||||
main_cur = endpoint_main.connect().cursor()
|
||||
|
||||
main_cur.execute(
|
||||
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')"
|
||||
@@ -90,9 +90,9 @@ def test_branch_and_gc(neon_simple_env: NeonEnv):
|
||||
env.neon_cli.create_branch(
|
||||
"test_branch", "test_main", tenant_id=tenant, ancestor_start_lsn=lsn1
|
||||
)
|
||||
pg_branch = env.postgres.create_start("test_branch", tenant_id=tenant)
|
||||
endpoint_branch = env.endpoints.create_start("test_branch", tenant_id=tenant)
|
||||
|
||||
branch_cur = pg_branch.connect().cursor()
|
||||
branch_cur = endpoint_branch.connect().cursor()
|
||||
branch_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)")
|
||||
|
||||
assert query_scalar(branch_cur, "SELECT count(*) FROM foo") == 200000
|
||||
@@ -142,8 +142,8 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
|
||||
)
|
||||
|
||||
b0 = env.neon_cli.create_branch("b0", tenant_id=tenant)
|
||||
pg0 = env.postgres.create_start("b0", tenant_id=tenant)
|
||||
res = pg0.safe_psql_many(
|
||||
endpoint0 = env.endpoints.create_start("b0", tenant_id=tenant)
|
||||
res = endpoint0.safe_psql_many(
|
||||
queries=[
|
||||
"CREATE TABLE t(key serial primary key)",
|
||||
"INSERT INTO t SELECT FROM generate_series(1, 100000)",
|
||||
|
||||
@@ -18,10 +18,10 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
env.neon_cli.create_branch("test_branch_behind")
|
||||
pgmain = env.postgres.create_start("test_branch_behind")
|
||||
endpoint_main = env.endpoints.create_start("test_branch_behind")
|
||||
log.info("postgres is running on 'test_branch_behind' branch")
|
||||
|
||||
main_cur = pgmain.connect().cursor()
|
||||
main_cur = endpoint_main.connect().cursor()
|
||||
|
||||
timeline = TimelineId(query_scalar(main_cur, "SHOW neon.timeline_id"))
|
||||
|
||||
@@ -74,15 +74,15 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
"test_branch_behind_more", "test_branch_behind", ancestor_start_lsn=lsn_b
|
||||
)
|
||||
|
||||
pg_hundred = env.postgres.create_start("test_branch_behind_hundred")
|
||||
pg_more = env.postgres.create_start("test_branch_behind_more")
|
||||
endpoint_hundred = env.endpoints.create_start("test_branch_behind_hundred")
|
||||
endpoint_more = env.endpoints.create_start("test_branch_behind_more")
|
||||
|
||||
# On the 'hundred' branch, we should see only 100 rows
|
||||
hundred_cur = pg_hundred.connect().cursor()
|
||||
hundred_cur = endpoint_hundred.connect().cursor()
|
||||
assert query_scalar(hundred_cur, "SELECT count(*) FROM foo") == 100
|
||||
|
||||
# On the 'more' branch, we should see 100200 rows
|
||||
more_cur = pg_more.connect().cursor()
|
||||
more_cur = endpoint_more.connect().cursor()
|
||||
assert query_scalar(more_cur, "SELECT count(*) FROM foo") == 200100
|
||||
|
||||
# All the rows are visible on the main branch
|
||||
@@ -94,8 +94,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
env.neon_cli.create_branch(
|
||||
"test_branch_segment_boundary", "test_branch_behind", ancestor_start_lsn=Lsn("0/3000000")
|
||||
)
|
||||
pg = env.postgres.create_start("test_branch_segment_boundary")
|
||||
assert pg.safe_psql("SELECT 1")[0][0] == 1
|
||||
endpoint = env.endpoints.create_start("test_branch_segment_boundary")
|
||||
assert endpoint.safe_psql("SELECT 1")[0][0] == 1
|
||||
|
||||
# branch at pre-initdb lsn
|
||||
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
|
||||
|
||||
@@ -5,7 +5,7 @@ from typing import List
|
||||
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres
|
||||
from fixtures.neon_fixtures import Endpoint, NeonEnv, PgBin
|
||||
from fixtures.types import Lsn
|
||||
from fixtures.utils import query_scalar
|
||||
from performance.test_perf_pgbench import get_scales_matrix
|
||||
@@ -40,20 +40,20 @@ def test_branching_with_pgbench(
|
||||
}
|
||||
)
|
||||
|
||||
def run_pgbench(pg: Postgres):
|
||||
connstr = pg.connstr()
|
||||
|
||||
def run_pgbench(connstr: str):
|
||||
log.info(f"Start a pgbench workload on pg {connstr}")
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
|
||||
pg_bin.run_capture(["pgbench", "-T15", connstr])
|
||||
|
||||
env.neon_cli.create_branch("b0", tenant_id=tenant)
|
||||
pgs: List[Postgres] = []
|
||||
pgs.append(env.postgres.create_start("b0", tenant_id=tenant))
|
||||
endpoints: List[Endpoint] = []
|
||||
endpoints.append(env.endpoints.create_start("b0", tenant_id=tenant))
|
||||
|
||||
threads: List[threading.Thread] = []
|
||||
threads.append(threading.Thread(target=run_pgbench, args=(pgs[0],), daemon=True))
|
||||
threads.append(
|
||||
threading.Thread(target=run_pgbench, args=(endpoints[0].connstr(),), daemon=True)
|
||||
)
|
||||
threads[-1].start()
|
||||
|
||||
thread_limit = 4
|
||||
@@ -79,16 +79,18 @@ def test_branching_with_pgbench(
|
||||
else:
|
||||
env.neon_cli.create_branch("b{}".format(i + 1), "b0", tenant_id=tenant)
|
||||
|
||||
pgs.append(env.postgres.create_start("b{}".format(i + 1), tenant_id=tenant))
|
||||
endpoints.append(env.endpoints.create_start("b{}".format(i + 1), tenant_id=tenant))
|
||||
|
||||
threads.append(threading.Thread(target=run_pgbench, args=(pgs[-1],), daemon=True))
|
||||
threads.append(
|
||||
threading.Thread(target=run_pgbench, args=(endpoints[-1].connstr(),), daemon=True)
|
||||
)
|
||||
threads[-1].start()
|
||||
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
for pg in pgs:
|
||||
res = pg.safe_psql("SELECT count(*) from pgbench_accounts")
|
||||
for ep in endpoints:
|
||||
res = ep.safe_psql("SELECT count(*) from pgbench_accounts")
|
||||
assert res[0] == (100000 * scale,)
|
||||
|
||||
|
||||
@@ -110,11 +112,11 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi
|
||||
env = neon_simple_env
|
||||
|
||||
env.neon_cli.create_branch("b0")
|
||||
pg0 = env.postgres.create_start("b0")
|
||||
endpoint0 = env.endpoints.create_start("b0")
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", pg0.connstr()])
|
||||
pg_bin.run_capture(["pgbench", "-i", endpoint0.connstr()])
|
||||
|
||||
with pg0.cursor() as cur:
|
||||
with endpoint0.cursor() as cur:
|
||||
curr_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
|
||||
# Specify the `start_lsn` as a number that is divided by `XLOG_BLCKSZ`
|
||||
@@ -123,6 +125,6 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi
|
||||
|
||||
log.info(f"Branching b1 from b0 starting at lsn {start_lsn}...")
|
||||
env.neon_cli.create_branch("b1", "b0", ancestor_start_lsn=start_lsn)
|
||||
pg1 = env.postgres.create_start("b1")
|
||||
endpoint1 = env.endpoints.create_start("b1")
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", pg1.connstr()])
|
||||
pg_bin.run_capture(["pgbench", "-i", endpoint1.connstr()])
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import List, Tuple
|
||||
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
|
||||
from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
|
||||
|
||||
@@ -24,17 +24,17 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
|
||||
]
|
||||
)
|
||||
|
||||
tenant_timelines: List[Tuple[TenantId, TimelineId, Postgres]] = []
|
||||
tenant_timelines: List[Tuple[TenantId, TimelineId, Endpoint]] = []
|
||||
|
||||
for n in range(4):
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
with pg.cursor() as cur:
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100), 'payload'")
|
||||
pg.stop()
|
||||
tenant_timelines.append((tenant_id, timeline_id, pg))
|
||||
endpoint.stop()
|
||||
tenant_timelines.append((tenant_id, timeline_id, endpoint))
|
||||
|
||||
# Stop the pageserver
|
||||
env.pageserver.stop()
|
||||
|
||||
@@ -24,14 +24,14 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
|
||||
"autovacuum_freeze_max_age=100000",
|
||||
]
|
||||
|
||||
pg = env.postgres.create_start("test_clog_truncate", config_lines=config)
|
||||
endpoint = env.endpoints.create_start("test_clog_truncate", config_lines=config)
|
||||
log.info("postgres is running on test_clog_truncate branch")
|
||||
|
||||
# Install extension containing function needed for test
|
||||
pg.safe_psql("CREATE EXTENSION neon_test_utils")
|
||||
endpoint.safe_psql("CREATE EXTENSION neon_test_utils")
|
||||
|
||||
# Consume many xids to advance clog
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("select test_consume_xids(1000*1000*10);")
|
||||
log.info("xids consumed")
|
||||
|
||||
@@ -44,7 +44,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
|
||||
|
||||
# wait for autovacuum to truncate the pg_xact
|
||||
# XXX Is it worth to add a timeout here?
|
||||
pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), "0000")
|
||||
pg_xact_0000_path = os.path.join(endpoint.pg_xact_dir_path(), "0000")
|
||||
log.info(f"pg_xact_0000_path = {pg_xact_0000_path}")
|
||||
|
||||
while os.path.isfile(pg_xact_0000_path):
|
||||
@@ -52,7 +52,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
|
||||
time.sleep(5)
|
||||
|
||||
# checkpoint to advance latest lsn
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CHECKPOINT;")
|
||||
lsn_after_truncation = query_scalar(cur, "select pg_current_wal_insert_lsn()")
|
||||
|
||||
@@ -61,10 +61,10 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
|
||||
env.neon_cli.create_branch(
|
||||
"test_clog_truncate_new", "test_clog_truncate", ancestor_start_lsn=lsn_after_truncation
|
||||
)
|
||||
pg2 = env.postgres.create_start("test_clog_truncate_new")
|
||||
endpoint2 = env.endpoints.create_start("test_clog_truncate_new")
|
||||
log.info("postgres is running on test_clog_truncate_new branch")
|
||||
|
||||
# check that new node doesn't contain truncated segment
|
||||
pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), "0000")
|
||||
pg_xact_0000_path_new = os.path.join(endpoint2.pg_xact_dir_path(), "0000")
|
||||
log.info(f"pg_xact_0000_path_new = {pg_xact_0000_path_new}")
|
||||
assert os.path.isfile(pg_xact_0000_path_new) is False
|
||||
|
||||
@@ -24,8 +24,8 @@ def test_lsof_pageserver_pid(neon_simple_env: NeonEnv):
|
||||
|
||||
def start_workload():
|
||||
env.neon_cli.create_branch("test_lsof_pageserver_pid")
|
||||
pg = env.postgres.create_start("test_lsof_pageserver_pid")
|
||||
with closing(pg.connect()) as conn:
|
||||
endpoint = env.endpoints.create_start("test_lsof_pageserver_pid")
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE TABLE foo as SELECT x FROM generate_series(1,100000) x")
|
||||
cur.execute("update foo set x=x+1")
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import copy
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
@@ -55,29 +56,31 @@ def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_o
|
||||
neon_env_builder.preserve_database_files = True
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
# FIXME: Is this expected?
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
|
||||
pg_bin.run(["pgbench", "--initialize", "--scale=10", pg.connstr()])
|
||||
pg_bin.run(["pgbench", "--time=60", "--progress=2", pg.connstr()])
|
||||
pg_bin.run(["pg_dumpall", f"--dbname={pg.connstr()}", f"--file={test_output_dir / 'dump.sql'}"])
|
||||
pg_bin.run(["pgbench", "--initialize", "--scale=10", endpoint.connstr()])
|
||||
pg_bin.run(["pgbench", "--time=60", "--progress=2", endpoint.connstr()])
|
||||
pg_bin.run(
|
||||
["pg_dumpall", f"--dbname={endpoint.connstr()}", f"--file={test_output_dir / 'dump.sql'}"]
|
||||
)
|
||||
|
||||
snapshot_config = toml.load(test_output_dir / "repo" / "config")
|
||||
tenant_id = snapshot_config["default_tenant_id"]
|
||||
timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id]
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
|
||||
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, lsn)
|
||||
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
wait_for_upload(pageserver_http, tenant_id, timeline_id, lsn)
|
||||
|
||||
env.postgres.stop_all()
|
||||
env.endpoints.stop_all()
|
||||
for sk in env.safekeepers:
|
||||
sk.stop()
|
||||
env.pageserver.stop()
|
||||
@@ -98,6 +101,9 @@ def test_backward_compatibility(
|
||||
pg_version: str,
|
||||
request: FixtureRequest,
|
||||
):
|
||||
"""
|
||||
Test that the new binaries can read old data
|
||||
"""
|
||||
compatibility_snapshot_dir_env = os.environ.get("COMPATIBILITY_SNAPSHOT_DIR")
|
||||
assert (
|
||||
compatibility_snapshot_dir_env is not None
|
||||
@@ -120,6 +126,7 @@ def test_backward_compatibility(
|
||||
check_neon_works(
|
||||
test_output_dir / "compatibility_snapshot" / "repo",
|
||||
neon_binpath,
|
||||
neon_binpath,
|
||||
pg_distrib_dir,
|
||||
pg_version,
|
||||
port_distributor,
|
||||
@@ -148,7 +155,11 @@ def test_forward_compatibility(
|
||||
port_distributor: PortDistributor,
|
||||
pg_version: str,
|
||||
request: FixtureRequest,
|
||||
neon_binpath: Path,
|
||||
):
|
||||
"""
|
||||
Test that the old binaries can read new data
|
||||
"""
|
||||
compatibility_neon_bin_env = os.environ.get("COMPATIBILITY_NEON_BIN")
|
||||
assert compatibility_neon_bin_env is not None, (
|
||||
"COMPATIBILITY_NEON_BIN is not set. It should be set to a path with Neon binaries "
|
||||
@@ -183,6 +194,7 @@ def test_forward_compatibility(
|
||||
check_neon_works(
|
||||
test_output_dir / "compatibility_snapshot" / "repo",
|
||||
compatibility_neon_bin,
|
||||
neon_binpath,
|
||||
compatibility_postgres_distrib_dir,
|
||||
pg_version,
|
||||
port_distributor,
|
||||
@@ -223,9 +235,13 @@ def prepare_snapshot(
|
||||
for logfile in repo_dir.glob("**/*.log"):
|
||||
logfile.unlink()
|
||||
|
||||
# Remove tenants data for compute
|
||||
for tenant in (repo_dir / "pgdatadirs" / "tenants").glob("*"):
|
||||
shutil.rmtree(tenant)
|
||||
# Remove old computes in 'endpoints'. Old versions of the control plane used a directory
|
||||
# called "pgdatadirs". Delete it, too.
|
||||
if (repo_dir / "endpoints").exists():
|
||||
shutil.rmtree(repo_dir / "endpoints")
|
||||
if (repo_dir / "pgdatadirs").exists():
|
||||
shutil.rmtree(repo_dir / "pgdatadirs")
|
||||
os.mkdir(repo_dir / "endpoints")
|
||||
|
||||
# Remove wal-redo temp directory if it exists. Newer pageserver versions don't create
|
||||
# them anymore, but old versions did.
|
||||
@@ -326,7 +342,8 @@ def get_neon_version(neon_binpath: Path):
|
||||
|
||||
def check_neon_works(
|
||||
repo_dir: Path,
|
||||
neon_binpath: Path,
|
||||
neon_target_binpath: Path,
|
||||
neon_current_binpath: Path,
|
||||
pg_distrib_dir: Path,
|
||||
pg_version: str,
|
||||
port_distributor: PortDistributor,
|
||||
@@ -336,7 +353,7 @@ def check_neon_works(
|
||||
):
|
||||
snapshot_config_toml = repo_dir / "config"
|
||||
snapshot_config = toml.load(snapshot_config_toml)
|
||||
snapshot_config["neon_distrib_dir"] = str(neon_binpath)
|
||||
snapshot_config["neon_distrib_dir"] = str(neon_target_binpath)
|
||||
snapshot_config["postgres_distrib_dir"] = str(pg_distrib_dir)
|
||||
with (snapshot_config_toml).open("w") as f:
|
||||
toml.dump(snapshot_config, f)
|
||||
@@ -347,17 +364,26 @@ def check_neon_works(
|
||||
config.repo_dir = repo_dir
|
||||
config.pg_version = pg_version
|
||||
config.initial_tenant = snapshot_config["default_tenant_id"]
|
||||
config.neon_binpath = neon_binpath
|
||||
config.pg_distrib_dir = pg_distrib_dir
|
||||
config.preserve_database_files = True
|
||||
|
||||
cli = NeonCli(config)
|
||||
cli.raw_cli(["start"])
|
||||
request.addfinalizer(lambda: cli.raw_cli(["stop"]))
|
||||
# Use the "target" binaries to launch the storage nodes
|
||||
config_target = config
|
||||
config_target.neon_binpath = neon_target_binpath
|
||||
cli_target = NeonCli(config_target)
|
||||
|
||||
# And the current binaries to launch computes
|
||||
config_current = copy.copy(config)
|
||||
config_current.neon_binpath = neon_current_binpath
|
||||
cli_current = NeonCli(config_current)
|
||||
|
||||
cli_target.raw_cli(["start"])
|
||||
request.addfinalizer(lambda: cli_target.raw_cli(["stop"]))
|
||||
|
||||
pg_port = port_distributor.get_port()
|
||||
cli.pg_start("main", port=pg_port)
|
||||
request.addfinalizer(lambda: cli.pg_stop("main"))
|
||||
http_port = port_distributor.get_port()
|
||||
cli_current.endpoint_start("main", pg_port=pg_port, http_port=http_port)
|
||||
request.addfinalizer(lambda: cli_current.endpoint_stop("main"))
|
||||
|
||||
connstr = f"host=127.0.0.1 port={pg_port} user=cloud_admin dbname=postgres"
|
||||
pg_bin.run(["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump.sql'}"])
|
||||
|
||||
@@ -1,255 +0,0 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from subprocess import TimeoutExpired
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import ComputeCtl, NeonEnvBuilder, PgBin
|
||||
|
||||
|
||||
# Test that compute_ctl works and prints "--sync-safekeepers" logs.
|
||||
def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
ctl = ComputeCtl(env)
|
||||
|
||||
env.neon_cli.create_branch("test_compute_ctl", "main")
|
||||
pg = env.postgres.create_start("test_compute_ctl")
|
||||
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
|
||||
|
||||
with open(pg.config_file_path(), "r") as f:
|
||||
cfg_lines = f.readlines()
|
||||
cfg_map = {}
|
||||
for line in cfg_lines:
|
||||
if "=" in line:
|
||||
k, v = line.split("=")
|
||||
cfg_map[k] = v.strip("\n '\"")
|
||||
log.info(f"postgres config: {cfg_map}")
|
||||
pgdata = pg.pg_data_dir_path()
|
||||
pg_bin_path = os.path.join(pg_bin.pg_bin_path, "postgres")
|
||||
|
||||
pg.stop_and_destroy()
|
||||
|
||||
spec = (
|
||||
"""
|
||||
{
|
||||
"format_version": 1.0,
|
||||
|
||||
"timestamp": "2021-05-23T18:25:43.511Z",
|
||||
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
|
||||
|
||||
"cluster": {
|
||||
"cluster_id": "test-cluster-42",
|
||||
"name": "Neon Test",
|
||||
"state": "restarted",
|
||||
"roles": [
|
||||
],
|
||||
"databases": [
|
||||
],
|
||||
"settings": [
|
||||
{
|
||||
"name": "fsync",
|
||||
"value": "off",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "wal_level",
|
||||
"value": "replica",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "hot_standby",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "neon.safekeepers",
|
||||
"value": """
|
||||
+ f'"{cfg_map["neon.safekeepers"]}"'
|
||||
+ """,
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "wal_log_hints",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "log_connections",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "shared_buffers",
|
||||
"value": "32768",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "port",
|
||||
"value": """
|
||||
+ f'"{cfg_map["port"]}"'
|
||||
+ """,
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_connections",
|
||||
"value": "100",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_wal_senders",
|
||||
"value": "10",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "listen_addresses",
|
||||
"value": "0.0.0.0",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "wal_sender_timeout",
|
||||
"value": "0",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "password_encryption",
|
||||
"value": "md5",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "maintenance_work_mem",
|
||||
"value": "65536",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_parallel_workers",
|
||||
"value": "8",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_worker_processes",
|
||||
"value": "8",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "neon.tenant_id",
|
||||
"value": """
|
||||
+ f'"{cfg_map["neon.tenant_id"]}"'
|
||||
+ """,
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "max_replication_slots",
|
||||
"value": "10",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "neon.timeline_id",
|
||||
"value": """
|
||||
+ f'"{cfg_map["neon.timeline_id"]}"'
|
||||
+ """,
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "shared_preload_libraries",
|
||||
"value": "neon",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "synchronous_standby_names",
|
||||
"value": "walproposer",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.pageserver_connstring",
|
||||
"value": """
|
||||
+ f'"{cfg_map["neon.pageserver_connstring"]}"'
|
||||
+ """,
|
||||
"vartype": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"delta_operations": [
|
||||
]
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
ps_connstr = cfg_map["neon.pageserver_connstring"]
|
||||
log.info(f"ps_connstr: {ps_connstr}, pgdata: {pgdata}")
|
||||
|
||||
# run compute_ctl and wait for 10s
|
||||
try:
|
||||
ctl.raw_cli(
|
||||
[
|
||||
"--connstr",
|
||||
"postgres://invalid/",
|
||||
"--pgdata",
|
||||
pgdata,
|
||||
"--spec",
|
||||
spec,
|
||||
"--pgbin",
|
||||
pg_bin_path,
|
||||
],
|
||||
timeout=10,
|
||||
)
|
||||
except TimeoutExpired as exc:
|
||||
ctl_logs = (exc.stderr or b"").decode("utf-8")
|
||||
log.info(f"compute_ctl stderr:\n{ctl_logs}")
|
||||
|
||||
with ExternalProcessManager(Path(pgdata) / "postmaster.pid"):
|
||||
start = "starting safekeepers syncing"
|
||||
end = "safekeepers synced at LSN"
|
||||
start_pos = ctl_logs.index(start)
|
||||
assert start_pos != -1
|
||||
end_pos = ctl_logs.index(end, start_pos)
|
||||
assert end_pos != -1
|
||||
sync_safekeepers_logs = ctl_logs[start_pos : end_pos + len(end)]
|
||||
log.info("sync_safekeepers_logs:\n" + sync_safekeepers_logs)
|
||||
|
||||
# assert that --sync-safekeepers logs are present in the output
|
||||
assert "connecting with node" in sync_safekeepers_logs
|
||||
assert "connected with node" in sync_safekeepers_logs
|
||||
assert "proposer connected to quorum (2)" in sync_safekeepers_logs
|
||||
assert "got votes from majority (2)" in sync_safekeepers_logs
|
||||
assert "sending elected msg to node" in sync_safekeepers_logs
|
||||
|
||||
|
||||
class ExternalProcessManager:
|
||||
"""
|
||||
Context manager that kills a process with a pid file on exit.
|
||||
"""
|
||||
|
||||
def __init__(self, pid_file: Path):
|
||||
self.path = pid_file
|
||||
self.pid_file = open(pid_file, "r")
|
||||
self.pid = int(self.pid_file.readline().strip())
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def leave_alive(self):
|
||||
self.pid_file.close()
|
||||
|
||||
def __exit__(self, _type, _value, _traceback):
|
||||
import signal
|
||||
import time
|
||||
|
||||
if self.pid_file.closed:
|
||||
return
|
||||
|
||||
with self.pid_file:
|
||||
try:
|
||||
os.kill(self.pid, signal.SIGTERM)
|
||||
except OSError as e:
|
||||
if not self.path.is_file():
|
||||
return
|
||||
log.info(f"Failed to kill {self.pid}, but the pidfile remains: {e}")
|
||||
return
|
||||
|
||||
for _ in range(20):
|
||||
if not self.path.is_file():
|
||||
return
|
||||
time.sleep(0.2)
|
||||
|
||||
log.info("Process failed to stop after SIGTERM: {self.pid}")
|
||||
os.kill(self.pid, signal.SIGKILL)
|
||||
@@ -12,10 +12,10 @@ def test_config(neon_simple_env: NeonEnv):
|
||||
env.neon_cli.create_branch("test_config", "empty")
|
||||
|
||||
# change config
|
||||
pg = env.postgres.create_start("test_config", config_lines=["log_min_messages=debug1"])
|
||||
endpoint = env.endpoints.create_start("test_config", config_lines=["log_min_messages=debug1"])
|
||||
log.info("postgres is running on test_config branch")
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
|
||||
@@ -21,11 +21,11 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_crafted_wal_end")
|
||||
|
||||
pg = env.postgres.create("test_crafted_wal_end")
|
||||
endpoint = env.endpoints.create("test_crafted_wal_end")
|
||||
wal_craft = WalCraft(env)
|
||||
pg.config(wal_craft.postgres_config())
|
||||
pg.start()
|
||||
res = pg.safe_psql_many(
|
||||
endpoint.config(wal_craft.postgres_config())
|
||||
endpoint.start()
|
||||
res = endpoint.safe_psql_many(
|
||||
queries=[
|
||||
"CREATE TABLE keys(key int primary key)",
|
||||
"INSERT INTO keys SELECT generate_series(1, 100)",
|
||||
@@ -34,7 +34,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
|
||||
)
|
||||
assert res[-1][0] == (5050,)
|
||||
|
||||
wal_craft.in_existing(wal_type, pg.connstr())
|
||||
wal_craft.in_existing(wal_type, endpoint.connstr())
|
||||
|
||||
log.info("Restarting all safekeepers and pageservers")
|
||||
env.pageserver.stop()
|
||||
@@ -43,7 +43,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
|
||||
env.pageserver.start()
|
||||
|
||||
log.info("Trying more queries")
|
||||
res = pg.safe_psql_many(
|
||||
res = endpoint.safe_psql_many(
|
||||
queries=[
|
||||
"SELECT SUM(key) FROM keys",
|
||||
"INSERT INTO keys SELECT generate_series(101, 200)",
|
||||
@@ -60,7 +60,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
|
||||
env.pageserver.start()
|
||||
|
||||
log.info("Trying more queries (again)")
|
||||
res = pg.safe_psql_many(
|
||||
res = endpoint.safe_psql_many(
|
||||
queries=[
|
||||
"SELECT SUM(key) FROM keys",
|
||||
"INSERT INTO keys SELECT generate_series(201, 300)",
|
||||
|
||||
@@ -13,10 +13,10 @@ def test_createdb(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_createdb", "empty")
|
||||
|
||||
pg = env.postgres.create_start("test_createdb")
|
||||
endpoint = env.endpoints.create_start("test_createdb")
|
||||
log.info("postgres is running on 'test_createdb' branch")
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
# Cause a 'relmapper' change in the original branch
|
||||
cur.execute("VACUUM FULL pg_class")
|
||||
|
||||
@@ -26,10 +26,10 @@ def test_createdb(neon_simple_env: NeonEnv):
|
||||
|
||||
# Create a branch
|
||||
env.neon_cli.create_branch("test_createdb2", "test_createdb", ancestor_start_lsn=lsn)
|
||||
pg2 = env.postgres.create_start("test_createdb2")
|
||||
endpoint2 = env.endpoints.create_start("test_createdb2")
|
||||
|
||||
# Test that you can connect to the new database on both branches
|
||||
for db in (pg, pg2):
|
||||
for db in (endpoint, endpoint2):
|
||||
with db.cursor(dbname="foodb") as cur:
|
||||
# Check database size in both branches
|
||||
cur.execute(
|
||||
@@ -55,17 +55,17 @@ def test_createdb(neon_simple_env: NeonEnv):
|
||||
def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_dropdb", "empty")
|
||||
pg = env.postgres.create_start("test_dropdb")
|
||||
endpoint = env.endpoints.create_start("test_dropdb")
|
||||
log.info("postgres is running on 'test_dropdb' branch")
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE DATABASE foodb")
|
||||
|
||||
lsn_before_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
|
||||
|
||||
dboid = query_scalar(cur, "SELECT oid FROM pg_database WHERE datname='foodb';")
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("DROP DATABASE foodb")
|
||||
|
||||
cur.execute("CHECKPOINT")
|
||||
@@ -76,29 +76,29 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
|
||||
env.neon_cli.create_branch(
|
||||
"test_before_dropdb", "test_dropdb", ancestor_start_lsn=lsn_before_drop
|
||||
)
|
||||
pg_before = env.postgres.create_start("test_before_dropdb")
|
||||
endpoint_before = env.endpoints.create_start("test_before_dropdb")
|
||||
|
||||
env.neon_cli.create_branch(
|
||||
"test_after_dropdb", "test_dropdb", ancestor_start_lsn=lsn_after_drop
|
||||
)
|
||||
pg_after = env.postgres.create_start("test_after_dropdb")
|
||||
endpoint_after = env.endpoints.create_start("test_after_dropdb")
|
||||
|
||||
# Test that database exists on the branch before drop
|
||||
pg_before.connect(dbname="foodb").close()
|
||||
endpoint_before.connect(dbname="foodb").close()
|
||||
|
||||
# Test that database subdir exists on the branch before drop
|
||||
assert pg_before.pgdata_dir
|
||||
dbpath = pathlib.Path(pg_before.pgdata_dir) / "base" / str(dboid)
|
||||
assert endpoint_before.pgdata_dir
|
||||
dbpath = pathlib.Path(endpoint_before.pgdata_dir) / "base" / str(dboid)
|
||||
log.info(dbpath)
|
||||
|
||||
assert os.path.isdir(dbpath) is True
|
||||
|
||||
# Test that database subdir doesn't exist on the branch after drop
|
||||
assert pg_after.pgdata_dir
|
||||
dbpath = pathlib.Path(pg_after.pgdata_dir) / "base" / str(dboid)
|
||||
assert endpoint_after.pgdata_dir
|
||||
dbpath = pathlib.Path(endpoint_after.pgdata_dir) / "base" / str(dboid)
|
||||
log.info(dbpath)
|
||||
|
||||
assert os.path.isdir(dbpath) is False
|
||||
|
||||
# Check that we restore the content of the datadir correctly
|
||||
check_restored_datadir_content(test_output_dir, env, pg)
|
||||
check_restored_datadir_content(test_output_dir, env, endpoint)
|
||||
|
||||
@@ -9,10 +9,10 @@ from fixtures.utils import query_scalar
|
||||
def test_createuser(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_createuser", "empty")
|
||||
pg = env.postgres.create_start("test_createuser")
|
||||
endpoint = env.endpoints.create_start("test_createuser")
|
||||
log.info("postgres is running on 'test_createuser' branch")
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
# Cause a 'relmapper' change in the original branch
|
||||
cur.execute("CREATE USER testuser with password %s", ("testpwd",))
|
||||
|
||||
@@ -22,7 +22,7 @@ def test_createuser(neon_simple_env: NeonEnv):
|
||||
|
||||
# Create a branch
|
||||
env.neon_cli.create_branch("test_createuser2", "test_createuser", ancestor_start_lsn=lsn)
|
||||
pg2 = env.postgres.create_start("test_createuser2")
|
||||
endpoint2 = env.endpoints.create_start("test_createuser2")
|
||||
|
||||
# Test that you can connect to new branch as a new user
|
||||
assert pg2.safe_psql("select current_user", user="testuser") == [("testuser",)]
|
||||
assert endpoint2.safe_psql("select current_user", user="testuser") == [("testuser",)]
|
||||
|
||||
@@ -91,8 +91,8 @@ class EvictionEnv:
|
||||
This assumes that the tenant is still at the state after pbench -i.
|
||||
"""
|
||||
lsn = self.pgbench_init_lsns[tenant_id]
|
||||
with self.neon_env.postgres.create_start("main", tenant_id=tenant_id, lsn=lsn) as pg:
|
||||
self.pg_bin.run(["pgbench", "-S", pg.connstr()])
|
||||
with self.neon_env.endpoints.create_start("main", tenant_id=tenant_id, lsn=lsn) as endpoint:
|
||||
self.pg_bin.run(["pgbench", "-S", endpoint.connstr()])
|
||||
|
||||
def pageserver_start_with_disk_usage_eviction(
|
||||
self, period, max_usage_pct, min_avail_bytes, mock_behavior
|
||||
@@ -168,9 +168,9 @@ def eviction_env(request, neon_env_builder: NeonEnvBuilder, pg_bin: PgBin) -> Ev
|
||||
}
|
||||
)
|
||||
|
||||
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
|
||||
pg_bin.run(["pgbench", "-i", f"-s{scale}", pg.connstr()])
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||
pg_bin.run(["pgbench", "-i", f"-s{scale}", endpoint.connstr()])
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
timelines.append((tenant_id, timeline_id))
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
def test_fsm_truncate(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_fsm_truncate")
|
||||
pg = env.postgres.create_start("test_fsm_truncate")
|
||||
pg.safe_psql(
|
||||
endpoint = env.endpoints.create_start("test_fsm_truncate")
|
||||
endpoint.safe_psql(
|
||||
"CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;"
|
||||
)
|
||||
|
||||
@@ -24,10 +24,10 @@ def test_fullbackup(
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch("test_fullbackup")
|
||||
pgmain = env.postgres.create_start("test_fullbackup")
|
||||
endpoint_main = env.endpoints.create_start("test_fullbackup")
|
||||
log.info("postgres is running on 'test_fullbackup' branch")
|
||||
|
||||
with pgmain.cursor() as cur:
|
||||
with endpoint_main.cursor() as cur:
|
||||
timeline = TimelineId(query_scalar(cur, "SHOW neon.timeline_id"))
|
||||
|
||||
# data loading may take a while, so increase statement timeout
|
||||
|
||||
@@ -5,9 +5,9 @@ import random
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
Endpoint,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
Postgres,
|
||||
RemoteStorageKind,
|
||||
wait_for_last_flush_lsn,
|
||||
)
|
||||
@@ -26,9 +26,9 @@ updates_performed = 0
|
||||
|
||||
|
||||
# Run random UPDATEs on test table
|
||||
async def update_table(pg: Postgres):
|
||||
async def update_table(endpoint: Endpoint):
|
||||
global updates_performed
|
||||
pg_conn = await pg.connect_async()
|
||||
pg_conn = await endpoint.connect_async()
|
||||
|
||||
while updates_performed < updates_to_perform:
|
||||
updates_performed += 1
|
||||
@@ -52,10 +52,10 @@ async def gc(env: NeonEnv, timeline: TimelineId):
|
||||
|
||||
|
||||
# At the same time, run UPDATEs and GC
|
||||
async def update_and_gc(env: NeonEnv, pg: Postgres, timeline: TimelineId):
|
||||
async def update_and_gc(env: NeonEnv, endpoint: Endpoint, timeline: TimelineId):
|
||||
workers = []
|
||||
for worker_id in range(num_connections):
|
||||
workers.append(asyncio.create_task(update_table(pg)))
|
||||
workers.append(asyncio.create_task(update_table(endpoint)))
|
||||
workers.append(asyncio.create_task(gc(env, timeline)))
|
||||
|
||||
# await all workers
|
||||
@@ -72,10 +72,10 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_gc_aggressive", "main")
|
||||
pg = env.postgres.create_start("test_gc_aggressive")
|
||||
endpoint = env.endpoints.create_start("test_gc_aggressive")
|
||||
log.info("postgres is running on test_gc_aggressive branch")
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
timeline = TimelineId(query_scalar(cur, "SHOW neon.timeline_id"))
|
||||
|
||||
# Create table, and insert the first 100 rows
|
||||
@@ -89,7 +89,7 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
|
||||
)
|
||||
cur.execute("CREATE INDEX ON foo(id)")
|
||||
|
||||
asyncio.run(update_and_gc(env, pg, timeline))
|
||||
asyncio.run(update_and_gc(env, endpoint, timeline))
|
||||
|
||||
cur.execute("SELECT COUNT(*), SUM(counter) FROM foo")
|
||||
r = cur.fetchone()
|
||||
@@ -110,11 +110,11 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder, remote_storage_kind:
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_gc_index_upload", "main")
|
||||
pg = env.postgres.create_start("test_gc_index_upload")
|
||||
endpoint = env.endpoints.create_start("test_gc_index_upload")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
pg_conn = pg.connect()
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
tenant_id = TenantId(query_scalar(cur, "SHOW neon.tenant_id"))
|
||||
@@ -146,7 +146,7 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder, remote_storage_kind:
|
||||
return int(total)
|
||||
|
||||
# Sanity check that the metric works
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
pageserver_http.timeline_gc(tenant_id, timeline_id, 10000)
|
||||
before = get_num_remote_ops("index", "upload")
|
||||
|
||||
@@ -31,8 +31,8 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
"image_creation_threshold": "2",
|
||||
}
|
||||
)
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
connstr = pg.connstr(options="-csynchronous_commit=off")
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
connstr = endpoint.connstr(options="-csynchronous_commit=off")
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s10", connstr])
|
||||
|
||||
pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
|
||||
|
||||
@@ -9,10 +9,10 @@ from pathlib import Path
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
Endpoint,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
PgBin,
|
||||
Postgres,
|
||||
)
|
||||
from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
@@ -72,7 +72,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
|
||||
start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"]
|
||||
end_lsn = manifest["WAL-Ranges"][0]["End-LSN"]
|
||||
|
||||
node_name = "import_from_vanilla"
|
||||
endpoint_id = "ep-import_from_vanilla"
|
||||
tenant = TenantId.generate()
|
||||
timeline = TimelineId.generate()
|
||||
|
||||
@@ -113,7 +113,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
|
||||
"--timeline-id",
|
||||
str(timeline),
|
||||
"--node-name",
|
||||
node_name,
|
||||
endpoint_id,
|
||||
"--base-lsn",
|
||||
start_lsn,
|
||||
"--base-tarfile",
|
||||
@@ -153,8 +153,8 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
|
||||
wait_for_upload(client, tenant, timeline, Lsn(end_lsn))
|
||||
|
||||
# Check it worked
|
||||
pg = env.postgres.create_start(node_name, tenant_id=tenant)
|
||||
assert pg.safe_psql("select count(*) from t") == [(300000,)]
|
||||
endpoint = env.endpoints.create_start(endpoint_id, tenant_id=tenant)
|
||||
assert endpoint.safe_psql("select count(*) from t") == [(300000,)]
|
||||
|
||||
|
||||
@pytest.mark.timeout(600)
|
||||
@@ -168,10 +168,10 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu
|
||||
)
|
||||
|
||||
timeline = env.neon_cli.create_branch("test_import_from_pageserver_small")
|
||||
pg = env.postgres.create_start("test_import_from_pageserver_small")
|
||||
endpoint = env.endpoints.create_start("test_import_from_pageserver_small")
|
||||
|
||||
num_rows = 3000
|
||||
lsn = _generate_data(num_rows, pg)
|
||||
lsn = _generate_data(num_rows, endpoint)
|
||||
_import(num_rows, lsn, env, pg_bin, timeline, env.pg_distrib_dir)
|
||||
|
||||
|
||||
@@ -185,14 +185,14 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
timeline = env.neon_cli.create_branch("test_import_from_pageserver_multisegment")
|
||||
pg = env.postgres.create_start("test_import_from_pageserver_multisegment")
|
||||
endpoint = env.endpoints.create_start("test_import_from_pageserver_multisegment")
|
||||
|
||||
# For `test_import_from_pageserver_multisegment`, we want to make sure that the data
|
||||
# is large enough to create multi-segment files. Typically, a segment file's size is
|
||||
# at most 1GB. A large number of inserted rows (`30000000`) is used to increase the
|
||||
# DB size to above 1GB. Related: https://github.com/neondatabase/neon/issues/2097.
|
||||
num_rows = 30000000
|
||||
lsn = _generate_data(num_rows, pg)
|
||||
lsn = _generate_data(num_rows, endpoint)
|
||||
|
||||
logical_size = env.pageserver.http_client().timeline_detail(env.initial_tenant, timeline)[
|
||||
"current_logical_size"
|
||||
@@ -213,12 +213,12 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne
|
||||
assert cnt_seg_files > 0
|
||||
|
||||
|
||||
def _generate_data(num_rows: int, pg: Postgres) -> Lsn:
|
||||
def _generate_data(num_rows: int, endpoint: Endpoint) -> Lsn:
|
||||
"""Generate a table with `num_rows` rows.
|
||||
|
||||
Returns:
|
||||
the latest insert WAL's LSN"""
|
||||
with closing(pg.connect()) as conn:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
# data loading may take a while, so increase statement timeout
|
||||
cur.execute("SET statement_timeout='300s'")
|
||||
@@ -263,7 +263,7 @@ def _import(
|
||||
tar_output_file = result_basepath + ".stdout"
|
||||
|
||||
# Stop the first pageserver instance, erase all its data
|
||||
env.postgres.stop_all()
|
||||
env.endpoints.stop_all()
|
||||
env.pageserver.stop()
|
||||
|
||||
dir_to_clear = Path(env.repo_dir) / "tenants"
|
||||
@@ -278,7 +278,7 @@ def _import(
|
||||
tenant = TenantId.generate()
|
||||
|
||||
# Import to pageserver
|
||||
node_name = "import_from_pageserver"
|
||||
endpoint_id = "ep-import_from_pageserver"
|
||||
client = env.pageserver.http_client()
|
||||
client.tenant_create(tenant)
|
||||
env.neon_cli.raw_cli(
|
||||
@@ -290,7 +290,7 @@ def _import(
|
||||
"--timeline-id",
|
||||
str(timeline),
|
||||
"--node-name",
|
||||
node_name,
|
||||
endpoint_id,
|
||||
"--base-lsn",
|
||||
str(lsn),
|
||||
"--base-tarfile",
|
||||
@@ -305,8 +305,8 @@ def _import(
|
||||
wait_for_upload(client, tenant, timeline, lsn)
|
||||
|
||||
# Check it worked
|
||||
pg = env.postgres.create_start(node_name, tenant_id=tenant)
|
||||
assert pg.safe_psql("select count(*) from tbl") == [(expected_num_rows,)]
|
||||
endpoint = env.endpoints.create_start(endpoint_id, tenant_id=tenant)
|
||||
assert endpoint.safe_psql("select count(*) from tbl") == [(expected_num_rows,)]
|
||||
|
||||
# Take another fullbackup
|
||||
query = f"fullbackup { tenant} {timeline} {lsn}"
|
||||
|
||||
@@ -15,9 +15,9 @@ from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
def test_large_schema(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
conn = pg.connect()
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
tables = 2 # 10 is too much for debug build
|
||||
@@ -27,18 +27,18 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Restart compute. Restart is actually not strictly needed.
|
||||
# It is done mostly because this test originally tries to model the problem reported by Ketteq.
|
||||
pg.stop()
|
||||
endpoint.stop()
|
||||
# Kill and restart the pageserver.
|
||||
# env.pageserver.stop(immediate=True)
|
||||
# env.pageserver.start()
|
||||
pg.start()
|
||||
endpoint.start()
|
||||
|
||||
retry_sleep = 0.5
|
||||
max_retries = 200
|
||||
retries = 0
|
||||
while True:
|
||||
try:
|
||||
conn = pg.connect()
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(f"CREATE TABLE if not exists t_{i}(pk integer) partition by range (pk)")
|
||||
for j in range(1, partitions + 1):
|
||||
@@ -63,7 +63,7 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder):
|
||||
raise
|
||||
break
|
||||
|
||||
conn = pg.connect()
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
for i in range(1, tables + 1):
|
||||
@@ -74,8 +74,8 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder):
|
||||
cur.execute("select * from pg_depend order by refclassid, refobjid, refobjsubid")
|
||||
|
||||
# Check layer file sizes
|
||||
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
|
||||
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
|
||||
timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant_id, timeline_id)
|
||||
for filename in os.listdir(timeline_path):
|
||||
if filename.startswith("00000"):
|
||||
|
||||
@@ -27,13 +27,13 @@ def test_basic_eviction(
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
client = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
# Create a number of layers in the tenant
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE TABLE foo (t text)")
|
||||
cur.execute(
|
||||
"""
|
||||
@@ -172,15 +172,15 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder):
|
||||
env.initial_tenant = tenant_id # update_and_gc relies on this
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
log.info("fill with data, creating delta & image layers, some of which are GC'able after")
|
||||
# no particular reason to create the layers like this, but we are sure
|
||||
# not to hit the image_creation_threshold here.
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("create table a (id bigserial primary key, some_value bigint not null)")
|
||||
cur.execute("insert into a(some_value) select i from generate_series(1, 10000) s(i)")
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
ps_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
# Create delta layers, then turn them into image layers.
|
||||
@@ -191,19 +191,19 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder):
|
||||
for i in range(0, 2):
|
||||
for j in range(0, 3):
|
||||
# create a minimal amount of "delta difficulty" for this table
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("update a set some_value = -some_value + %s", (j,))
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
# vacuuming should aid to reuse keys, though it's not really important
|
||||
# with image_creation_threshold=1 which we will use on the last compaction
|
||||
cur.execute("vacuum")
|
||||
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
if i == 1 and j == 2 and k == 1:
|
||||
# last iteration; stop before checkpoint to avoid leaving an inmemory layer
|
||||
pg.stop_and_destroy()
|
||||
endpoint.stop_and_destroy()
|
||||
|
||||
ps_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ def test_image_layer_writer_fail_before_finish(neon_simple_env: NeonEnv):
|
||||
}
|
||||
)
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
pg = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
pg.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)",
|
||||
@@ -64,8 +64,8 @@ def test_delta_layer_writer_fail_before_finish(neon_simple_env: NeonEnv):
|
||||
}
|
||||
)
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
pg.safe_psql_many(
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)",
|
||||
"""INSERT INTO foo
|
||||
|
||||
@@ -12,10 +12,10 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping")
|
||||
pgmain = env.postgres.create_start("test_lsn_mapping")
|
||||
endpoint_main = env.endpoints.create_start("test_lsn_mapping")
|
||||
log.info("postgres is running on 'test_lsn_mapping' branch")
|
||||
|
||||
cur = pgmain.connect().cursor()
|
||||
cur = endpoint_main.connect().cursor()
|
||||
# Create table, and insert rows, each in a separate transaction
|
||||
# Disable synchronous_commit to make this initialization go faster.
|
||||
#
|
||||
@@ -35,7 +35,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
|
||||
cur.execute("INSERT INTO foo VALUES (-1)")
|
||||
|
||||
# Wait until WAL is received by pageserver
|
||||
wait_for_last_flush_lsn(env, pgmain, env.initial_tenant, new_timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id)
|
||||
|
||||
with env.pageserver.http_client() as client:
|
||||
# Check edge cases: timestamp in the future
|
||||
@@ -61,9 +61,9 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
|
||||
# Call get_lsn_by_timestamp to get the LSN
|
||||
# Launch a new read-only node at that LSN, and check that only the rows
|
||||
# that were supposed to be committed at that point in time are visible.
|
||||
pg_here = env.postgres.create_start(
|
||||
branch_name="test_lsn_mapping", node_name="test_lsn_mapping_read", lsn=lsn
|
||||
endpoint_here = env.endpoints.create_start(
|
||||
branch_name="test_lsn_mapping", endpoint_id="ep-lsn_mapping_read", lsn=lsn
|
||||
)
|
||||
assert pg_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i
|
||||
assert endpoint_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i
|
||||
|
||||
pg_here.stop_and_destroy()
|
||||
endpoint_here.stop_and_destroy()
|
||||
|
||||
@@ -123,9 +123,9 @@ def test_metric_collection(
|
||||
# before pageserver, pageserver log might contain such errors in the end.
|
||||
env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*")
|
||||
env.neon_cli.create_branch("test_metric_collection")
|
||||
pg = env.postgres.create_start("test_metric_collection")
|
||||
endpoint = env.endpoints.create_start("test_metric_collection")
|
||||
|
||||
pg_conn = pg.connect()
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
tenant_id = TenantId(query_scalar(cur, "SHOW neon.tenant_id"))
|
||||
@@ -158,7 +158,7 @@ def test_metric_collection(
|
||||
|
||||
# upload some data to remote storage
|
||||
if remote_storage_kind == RemoteStorageKind.LOCAL_FS:
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
pageserver_http.timeline_gc(tenant_id, timeline_id, 10000)
|
||||
|
||||
@@ -12,10 +12,10 @@ from fixtures.utils import query_scalar
|
||||
def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_multixact", "empty")
|
||||
pg = env.postgres.create_start("test_multixact")
|
||||
endpoint = env.endpoints.create_start("test_multixact")
|
||||
|
||||
log.info("postgres is running on 'test_multixact' branch")
|
||||
cur = pg.connect().cursor()
|
||||
cur = endpoint.connect().cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
CREATE TABLE t1(i int primary key);
|
||||
@@ -32,7 +32,7 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
|
||||
connections = []
|
||||
for i in range(nclients):
|
||||
# Do not turn on autocommit. We want to hold the key-share locks.
|
||||
conn = pg.connect(autocommit=False)
|
||||
conn = endpoint.connect(autocommit=False)
|
||||
connections.append(conn)
|
||||
|
||||
# On each iteration, we commit the previous transaction on a connection,
|
||||
@@ -65,10 +65,10 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
|
||||
|
||||
# Branch at this point
|
||||
env.neon_cli.create_branch("test_multixact_new", "test_multixact", ancestor_start_lsn=lsn)
|
||||
pg_new = env.postgres.create_start("test_multixact_new")
|
||||
endpoint_new = env.endpoints.create_start("test_multixact_new")
|
||||
|
||||
log.info("postgres is running on 'test_multixact_new' branch")
|
||||
next_multixact_id_new = pg_new.safe_psql(
|
||||
next_multixact_id_new = endpoint_new.safe_psql(
|
||||
"SELECT next_multixact_id FROM pg_control_checkpoint()"
|
||||
)[0][0]
|
||||
|
||||
@@ -76,4 +76,4 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
|
||||
assert next_multixact_id_new == next_multixact_id
|
||||
|
||||
# Check that we can restore the content of the datadir correctly
|
||||
check_restored_datadir_content(test_output_dir, env, pg)
|
||||
check_restored_datadir_content(test_output_dir, env, endpoint)
|
||||
|
||||
@@ -9,9 +9,18 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por
|
||||
try:
|
||||
env.neon_cli.start()
|
||||
env.neon_cli.create_tenant(tenant_id=env.initial_tenant, set_default=True)
|
||||
env.neon_cli.pg_start(node_name="main", port=port_distributor.get_port())
|
||||
|
||||
pg_port = port_distributor.get_port()
|
||||
http_port = port_distributor.get_port()
|
||||
env.neon_cli.endpoint_start(
|
||||
endpoint_id="ep-basic-main", pg_port=pg_port, http_port=http_port
|
||||
)
|
||||
|
||||
env.neon_cli.create_branch(new_branch_name="migration_check")
|
||||
env.neon_cli.pg_start(node_name="migration_check", port=port_distributor.get_port())
|
||||
pg_port = port_distributor.get_port()
|
||||
http_port = port_distributor.get_port()
|
||||
env.neon_cli.endpoint_start(
|
||||
endpoint_id="ep-migration_check", pg_port=pg_port, http_port=http_port
|
||||
)
|
||||
finally:
|
||||
env.neon_cli.stop()
|
||||
|
||||
@@ -8,9 +8,9 @@ from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
def test_next_xid(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
conn = pg.connect()
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("CREATE TABLE t(x integer)")
|
||||
|
||||
@@ -19,17 +19,17 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder):
|
||||
print(f"iteration {i} / {iterations}")
|
||||
|
||||
# Kill and restart the pageserver.
|
||||
pg.stop()
|
||||
endpoint.stop()
|
||||
env.pageserver.stop(immediate=True)
|
||||
env.pageserver.start()
|
||||
pg.start()
|
||||
endpoint.start()
|
||||
|
||||
retry_sleep = 0.5
|
||||
max_retries = 200
|
||||
retries = 0
|
||||
while True:
|
||||
try:
|
||||
conn = pg.connect()
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(f"INSERT INTO t values({i})")
|
||||
conn.close()
|
||||
@@ -48,7 +48,7 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder):
|
||||
raise
|
||||
break
|
||||
|
||||
conn = pg.connect()
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT count(*) FROM t")
|
||||
assert cur.fetchone() == (iterations,)
|
||||
|
||||
@@ -6,9 +6,9 @@ from fixtures.pageserver.http import PageserverHttpClient
|
||||
|
||||
def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
# we rely upon autocommit after each statement
|
||||
res_1 = pg.safe_psql_many(
|
||||
res_1 = endpoint.safe_psql_many(
|
||||
queries=[
|
||||
"CREATE TABLE t(key int primary key, value text)",
|
||||
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
|
||||
@@ -19,14 +19,14 @@ def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
|
||||
assert res_1[-1][0] == (5000050000,)
|
||||
# TODO check detach on live instance
|
||||
log.info("stopping compute")
|
||||
pg.stop()
|
||||
endpoint.stop()
|
||||
log.info("compute stopped")
|
||||
|
||||
pg.start()
|
||||
res_2 = pg.safe_psql("SELECT sum(key) FROM t")
|
||||
endpoint.start()
|
||||
res_2 = endpoint.safe_psql("SELECT sum(key) FROM t")
|
||||
assert res_2[0] == (5000050000,)
|
||||
|
||||
pg.stop()
|
||||
endpoint.stop()
|
||||
pageserver_http.tenant_detach(tenant_id)
|
||||
|
||||
|
||||
|
||||
@@ -19,10 +19,10 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_old_request_lsn", "main")
|
||||
pg = env.postgres.create_start("test_old_request_lsn")
|
||||
endpoint = env.endpoints.create_start("test_old_request_lsn")
|
||||
log.info("postgres is running on test_old_request_lsn branch")
|
||||
|
||||
pg_conn = pg.connect()
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
# Get the timeline ID of our branch. We need it for the 'do_gc' command
|
||||
|
||||
@@ -73,17 +73,17 @@ def test_ondemand_download_large_rel(
|
||||
)
|
||||
env.initial_tenant = tenant
|
||||
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
|
||||
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
|
||||
|
||||
# We want to make sure that the data is large enough that the keyspace is partitioned.
|
||||
num_rows = 1000000
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
# data loading may take a while, so increase statement timeout
|
||||
cur.execute("SET statement_timeout='300s'")
|
||||
cur.execute(
|
||||
@@ -106,7 +106,7 @@ def test_ondemand_download_large_rel(
|
||||
log.info("uploads have finished")
|
||||
|
||||
##### Stop the first pageserver instance, erase all its data
|
||||
pg.stop()
|
||||
endpoint.stop()
|
||||
env.pageserver.stop()
|
||||
|
||||
# remove all the layer files
|
||||
@@ -117,7 +117,7 @@ def test_ondemand_download_large_rel(
|
||||
##### Second start, restore the data and ensure it's the same
|
||||
env.pageserver.start()
|
||||
|
||||
pg.start()
|
||||
endpoint.start()
|
||||
before_downloads = get_num_downloaded_layers(client, tenant_id, timeline_id)
|
||||
|
||||
# Probe in the middle of the table. There's a high chance that the beginning
|
||||
@@ -125,7 +125,7 @@ def test_ondemand_download_large_rel(
|
||||
# from other tables, and with the entry that stores the size of the
|
||||
# relation, so they are likely already downloaded. But the middle of the
|
||||
# table should not have been needed by anything yet.
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
assert query_scalar(cur, "select count(*) from tbl where id = 500000") == 1
|
||||
|
||||
after_downloads = get_num_downloaded_layers(client, tenant_id, timeline_id)
|
||||
@@ -167,17 +167,17 @@ def test_ondemand_download_timetravel(
|
||||
)
|
||||
env.initial_tenant = tenant
|
||||
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
|
||||
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
|
||||
|
||||
lsns = []
|
||||
|
||||
table_len = 10000
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""
|
||||
CREATE TABLE testtab(id serial primary key, checkpoint_number int, data text);
|
||||
@@ -192,7 +192,7 @@ def test_ondemand_download_timetravel(
|
||||
lsns.append((0, current_lsn))
|
||||
|
||||
for checkpoint_number in range(1, 20):
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(f"UPDATE testtab SET checkpoint_number = {checkpoint_number}")
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
lsns.append((checkpoint_number, current_lsn))
|
||||
@@ -204,7 +204,7 @@ def test_ondemand_download_timetravel(
|
||||
client.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
##### Stop the first pageserver instance, erase all its data
|
||||
env.postgres.stop_all()
|
||||
env.endpoints.stop_all()
|
||||
|
||||
# wait until pageserver has successfully uploaded all the data to remote storage
|
||||
wait_for_sk_commit_lsn_to_reach_remote_storage(
|
||||
@@ -251,10 +251,10 @@ def test_ondemand_download_timetravel(
|
||||
num_layers_downloaded = [0]
|
||||
resident_size = [get_resident_physical_size()]
|
||||
for checkpoint_number, lsn in lsns:
|
||||
pg_old = env.postgres.create_start(
|
||||
branch_name="main", node_name=f"test_old_lsn_{checkpoint_number}", lsn=lsn
|
||||
endpoint_old = env.endpoints.create_start(
|
||||
branch_name="main", endpoint_id=f"ep-old_lsn_{checkpoint_number}", lsn=lsn
|
||||
)
|
||||
with pg_old.cursor() as cur:
|
||||
with endpoint_old.cursor() as cur:
|
||||
# assert query_scalar(cur, f"select count(*) from testtab where checkpoint_number={checkpoint_number}") == 100000
|
||||
assert (
|
||||
query_scalar(
|
||||
@@ -331,15 +331,15 @@ def test_download_remote_layers_api(
|
||||
)
|
||||
env.initial_tenant = tenant
|
||||
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
|
||||
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
|
||||
|
||||
table_len = 10000
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""
|
||||
CREATE TABLE testtab(id serial primary key, checkpoint_number int, data text);
|
||||
@@ -347,7 +347,7 @@ def test_download_remote_layers_api(
|
||||
"""
|
||||
)
|
||||
|
||||
env.postgres.stop_all()
|
||||
env.endpoints.stop_all()
|
||||
|
||||
wait_for_sk_commit_lsn_to_reach_remote_storage(
|
||||
tenant_id, timeline_id, env.safekeepers, env.pageserver
|
||||
@@ -463,8 +463,8 @@ def test_download_remote_layers_api(
|
||||
sk.start()
|
||||
|
||||
# ensure that all the data is back
|
||||
pg_old = env.postgres.create_start(branch_name="main")
|
||||
with pg_old.cursor() as cur:
|
||||
endpoint_old = env.endpoints.create_start(branch_name="main")
|
||||
with endpoint_old.cursor() as cur:
|
||||
assert query_scalar(cur, "select count(*) from testtab") == table_len
|
||||
|
||||
|
||||
@@ -513,17 +513,17 @@ def test_compaction_downloads_on_demand_without_image_creation(
|
||||
env.initial_tenant = tenant_id
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
with env.postgres.create_start("main") as pg:
|
||||
with env.endpoints.create_start("main") as endpoint:
|
||||
# no particular reason to create the layers like this, but we are sure
|
||||
# not to hit the image_creation_threshold here.
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("create table a as select id::bigint from generate_series(1, 204800) s(id)")
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("update a set id = -id")
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
|
||||
@@ -589,32 +589,32 @@ def test_compaction_downloads_on_demand_with_image_creation(
|
||||
env.initial_tenant = tenant_id
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
# no particular reason to create the layers like this, but we are sure
|
||||
# not to hit the image_creation_threshold here.
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("create table a (id bigserial primary key, some_value bigint not null)")
|
||||
cur.execute("insert into a(some_value) select i from generate_series(1, 10000) s(i)")
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
for i in range(0, 2):
|
||||
for j in range(0, 3):
|
||||
# create a minimal amount of "delta difficulty" for this table
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("update a set some_value = -some_value + %s", (j,))
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
# vacuuming should aid to reuse keys, though it's not really important
|
||||
# with image_creation_threshold=1 which we will use on the last compaction
|
||||
cur.execute("vacuum")
|
||||
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
if i == 1 and j == 2:
|
||||
# last iteration; stop before checkpoint to avoid leaving an inmemory layer
|
||||
pg.stop_and_destroy()
|
||||
endpoint.stop_and_destroy()
|
||||
|
||||
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
|
||||
@@ -150,7 +150,7 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
with env.pageserver.http_client() as client:
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
pg = env.postgres.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)
|
||||
endpoint = env.endpoints.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)
|
||||
|
||||
# Wait to make sure that we get a latest WAL receiver data.
|
||||
# We need to wait here because it's possible that we don't have access to
|
||||
@@ -163,7 +163,7 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
|
||||
)
|
||||
|
||||
# Make a DB modification then expect getting a new WAL receiver's data.
|
||||
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
|
||||
endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)")
|
||||
wait_until(
|
||||
number_of_iterations=5,
|
||||
interval=1,
|
||||
|
||||
@@ -11,11 +11,11 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder)
|
||||
|
||||
env.neon_cli.create_branch("test_pageserver_catchup_while_compute_down")
|
||||
# Make shared_buffers large to ensure we won't query pageserver while it is down.
|
||||
pg = env.postgres.create_start(
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_pageserver_catchup_while_compute_down", config_lines=["shared_buffers=512MB"]
|
||||
)
|
||||
|
||||
pg_conn = pg.connect()
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
# Create table, and insert some rows.
|
||||
@@ -59,10 +59,10 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder)
|
||||
env.safekeepers[2].start()
|
||||
|
||||
# restart compute node
|
||||
pg.stop_and_destroy().create_start("test_pageserver_catchup_while_compute_down")
|
||||
endpoint.stop_and_destroy().create_start("test_pageserver_catchup_while_compute_down")
|
||||
|
||||
# Ensure that basebackup went correct and pageserver returned all data
|
||||
pg_conn = pg.connect()
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
cur.execute("SELECT count(*) FROM foo")
|
||||
|
||||
@@ -11,9 +11,9 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch("test_pageserver_restart")
|
||||
pg = env.postgres.create_start("test_pageserver_restart")
|
||||
endpoint = env.endpoints.create_start("test_pageserver_restart")
|
||||
|
||||
pg_conn = pg.connect()
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
# Create table, and insert some rows. Make it big enough that it doesn't fit in
|
||||
@@ -84,13 +84,13 @@ def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder):
|
||||
}
|
||||
)
|
||||
env.neon_cli.create_timeline("test_pageserver_chaos", tenant_id=tenant)
|
||||
pg = env.postgres.create_start("test_pageserver_chaos", tenant_id=tenant)
|
||||
endpoint = env.endpoints.create_start("test_pageserver_chaos", tenant_id=tenant)
|
||||
|
||||
# Create table, and insert some rows. Make it big enough that it doesn't fit in
|
||||
# shared_buffers, otherwise the SELECT after restart will just return answer
|
||||
# from shared_buffers without hitting the page server, which defeats the point
|
||||
# of this test.
|
||||
with closing(pg.connect()) as conn:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE TABLE foo (id int, t text, updates int)")
|
||||
cur.execute("CREATE INDEX ON foo (id)")
|
||||
@@ -116,12 +116,12 @@ def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Update the whole table, then immediately kill and restart the pageserver
|
||||
for i in range(1, 15):
|
||||
pg.safe_psql("UPDATE foo set updates = updates + 1")
|
||||
endpoint.safe_psql("UPDATE foo set updates = updates + 1")
|
||||
|
||||
# This kills the pageserver immediately, to simulate a crash
|
||||
env.pageserver.stop(immediate=True)
|
||||
env.pageserver.start()
|
||||
|
||||
# Check that all the updates are visible
|
||||
num_updates = pg.safe_psql("SELECT sum(updates) FROM foo")[0][0]
|
||||
num_updates = endpoint.safe_psql("SELECT sum(updates) FROM foo")[0][0]
|
||||
assert num_updates == i * 100000
|
||||
|
||||
@@ -5,7 +5,7 @@ import threading
|
||||
import time
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres
|
||||
from fixtures.neon_fixtures import NeonEnv, PgBin
|
||||
|
||||
|
||||
# Test restarting page server, while safekeeper and compute node keep
|
||||
@@ -13,7 +13,7 @@ from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres
|
||||
def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgBin):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_pageserver_restarts")
|
||||
pg = env.postgres.create_start("test_pageserver_restarts")
|
||||
endpoint = env.endpoints.create_start("test_pageserver_restarts")
|
||||
n_restarts = 10
|
||||
scale = 10
|
||||
|
||||
@@ -23,13 +23,12 @@ def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgB
|
||||
r".*Gc failed, retrying in \S+: Cannot run GC iteration on inactive tenant"
|
||||
)
|
||||
|
||||
def run_pgbench(pg: Postgres):
|
||||
connstr = pg.connstr()
|
||||
def run_pgbench(connstr: str):
|
||||
log.info(f"Start a pgbench workload on pg {connstr}")
|
||||
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
|
||||
pg_bin.run_capture(["pgbench", f"-T{n_restarts}", connstr])
|
||||
|
||||
thread = threading.Thread(target=run_pgbench, args=(pg,), daemon=True)
|
||||
thread = threading.Thread(target=run_pgbench, args=(endpoint.connstr(),), daemon=True)
|
||||
thread.start()
|
||||
|
||||
for i in range(n_restarts):
|
||||
|
||||
@@ -2,7 +2,7 @@ import asyncio
|
||||
from io import BytesIO
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv, Postgres
|
||||
from fixtures.neon_fixtures import Endpoint, NeonEnv
|
||||
|
||||
|
||||
async def repeat_bytes(buf, repetitions: int):
|
||||
@@ -10,7 +10,7 @@ async def repeat_bytes(buf, repetitions: int):
|
||||
yield buf
|
||||
|
||||
|
||||
async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str):
|
||||
async def copy_test_data_to_table(endpoint: Endpoint, worker_id: int, table_name: str):
|
||||
buf = BytesIO()
|
||||
for i in range(1000):
|
||||
buf.write(
|
||||
@@ -20,7 +20,7 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str)
|
||||
|
||||
copy_input = repeat_bytes(buf.read(), 5000)
|
||||
|
||||
pg_conn = await pg.connect_async()
|
||||
pg_conn = await endpoint.connect_async()
|
||||
|
||||
# PgProtocol.connect_async sets statement_timeout to 2 minutes.
|
||||
# That's not enough for this test, on a slow system in debug mode.
|
||||
@@ -29,10 +29,10 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str)
|
||||
await pg_conn.copy_to_table(table_name, source=copy_input)
|
||||
|
||||
|
||||
async def parallel_load_same_table(pg: Postgres, n_parallel: int):
|
||||
async def parallel_load_same_table(endpoint: Endpoint, n_parallel: int):
|
||||
workers = []
|
||||
for worker_id in range(n_parallel):
|
||||
worker = copy_test_data_to_table(pg, worker_id, "copytest")
|
||||
worker = copy_test_data_to_table(endpoint, worker_id, "copytest")
|
||||
workers.append(asyncio.create_task(worker))
|
||||
|
||||
# await all workers
|
||||
@@ -43,13 +43,13 @@ async def parallel_load_same_table(pg: Postgres, n_parallel: int):
|
||||
def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_parallel_copy", "empty")
|
||||
pg = env.postgres.create_start("test_parallel_copy")
|
||||
endpoint = env.endpoints.create_start("test_parallel_copy")
|
||||
log.info("postgres is running on 'test_parallel_copy' branch")
|
||||
|
||||
# Create test table
|
||||
conn = pg.connect()
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("CREATE TABLE copytest (i int, t text)")
|
||||
|
||||
# Run COPY TO to load the table with parallel connections.
|
||||
asyncio.run(parallel_load_same_table(pg, n_parallel))
|
||||
asyncio.run(parallel_load_same_table(endpoint, n_parallel))
|
||||
|
||||
@@ -24,8 +24,8 @@ def test_pg_regress(
|
||||
|
||||
env.neon_cli.create_branch("test_pg_regress", "empty")
|
||||
# Connect to postgres and create a database called "regression".
|
||||
pg = env.postgres.create_start("test_pg_regress")
|
||||
pg.safe_psql("CREATE DATABASE regression")
|
||||
endpoint = env.endpoints.create_start("test_pg_regress")
|
||||
endpoint.safe_psql("CREATE DATABASE regression")
|
||||
|
||||
# Create some local directories for pg_regress to run in.
|
||||
runpath = test_output_dir / "regress"
|
||||
@@ -49,9 +49,9 @@ def test_pg_regress(
|
||||
]
|
||||
|
||||
env_vars = {
|
||||
"PGPORT": str(pg.default_options["port"]),
|
||||
"PGUSER": pg.default_options["user"],
|
||||
"PGHOST": pg.default_options["host"],
|
||||
"PGPORT": str(endpoint.default_options["port"]),
|
||||
"PGUSER": endpoint.default_options["user"],
|
||||
"PGHOST": endpoint.default_options["host"],
|
||||
}
|
||||
|
||||
# Run the command.
|
||||
@@ -61,10 +61,10 @@ def test_pg_regress(
|
||||
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)
|
||||
|
||||
# checkpoint one more time to ensure that the lsn we get is the latest one
|
||||
pg.safe_psql("CHECKPOINT")
|
||||
endpoint.safe_psql("CHECKPOINT")
|
||||
|
||||
# Check that we restore the content of the datadir correctly
|
||||
check_restored_datadir_content(test_output_dir, env, pg)
|
||||
check_restored_datadir_content(test_output_dir, env, endpoint)
|
||||
|
||||
|
||||
# Run the PostgreSQL "isolation" tests, in src/test/isolation.
|
||||
@@ -85,8 +85,10 @@ def test_isolation(
|
||||
env.neon_cli.create_branch("test_isolation", "empty")
|
||||
# Connect to postgres and create a database called "regression".
|
||||
# isolation tests use prepared transactions, so enable them
|
||||
pg = env.postgres.create_start("test_isolation", config_lines=["max_prepared_transactions=100"])
|
||||
pg.safe_psql("CREATE DATABASE isolation_regression")
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_isolation", config_lines=["max_prepared_transactions=100"]
|
||||
)
|
||||
endpoint.safe_psql("CREATE DATABASE isolation_regression")
|
||||
|
||||
# Create some local directories for pg_isolation_regress to run in.
|
||||
runpath = test_output_dir / "regress"
|
||||
@@ -109,9 +111,9 @@ def test_isolation(
|
||||
]
|
||||
|
||||
env_vars = {
|
||||
"PGPORT": str(pg.default_options["port"]),
|
||||
"PGUSER": pg.default_options["user"],
|
||||
"PGHOST": pg.default_options["host"],
|
||||
"PGPORT": str(endpoint.default_options["port"]),
|
||||
"PGUSER": endpoint.default_options["user"],
|
||||
"PGHOST": endpoint.default_options["host"],
|
||||
}
|
||||
|
||||
# Run the command.
|
||||
@@ -135,8 +137,8 @@ def test_sql_regress(
|
||||
|
||||
env.neon_cli.create_branch("test_sql_regress", "empty")
|
||||
# Connect to postgres and create a database called "regression".
|
||||
pg = env.postgres.create_start("test_sql_regress")
|
||||
pg.safe_psql("CREATE DATABASE regression")
|
||||
endpoint = env.endpoints.create_start("test_sql_regress")
|
||||
endpoint.safe_psql("CREATE DATABASE regression")
|
||||
|
||||
# Create some local directories for pg_regress to run in.
|
||||
runpath = test_output_dir / "regress"
|
||||
@@ -160,9 +162,9 @@ def test_sql_regress(
|
||||
]
|
||||
|
||||
env_vars = {
|
||||
"PGPORT": str(pg.default_options["port"]),
|
||||
"PGUSER": pg.default_options["user"],
|
||||
"PGHOST": pg.default_options["host"],
|
||||
"PGPORT": str(endpoint.default_options["port"]),
|
||||
"PGUSER": endpoint.default_options["user"],
|
||||
"PGHOST": endpoint.default_options["host"],
|
||||
}
|
||||
|
||||
# Run the command.
|
||||
@@ -172,8 +174,8 @@ def test_sql_regress(
|
||||
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)
|
||||
|
||||
# checkpoint one more time to ensure that the lsn we get is the latest one
|
||||
pg.safe_psql("CHECKPOINT")
|
||||
pg.safe_psql("select pg_current_wal_insert_lsn()")[0][0]
|
||||
endpoint.safe_psql("CHECKPOINT")
|
||||
endpoint.safe_psql("select pg_current_wal_insert_lsn()")[0][0]
|
||||
|
||||
# Check that we restore the content of the datadir correctly
|
||||
check_restored_datadir_content(test_output_dir, env, pg)
|
||||
check_restored_datadir_content(test_output_dir, env, endpoint)
|
||||
|
||||
@@ -15,10 +15,10 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
pgmain = env.postgres.create_start("main")
|
||||
endpoint_main = env.endpoints.create_start("main")
|
||||
log.info("postgres is running on 'main' branch")
|
||||
|
||||
main_pg_conn = pgmain.connect()
|
||||
main_pg_conn = endpoint_main.connect()
|
||||
main_cur = main_pg_conn.cursor()
|
||||
timeline = TimelineId(query_scalar(main_cur, "SHOW neon.timeline_id"))
|
||||
|
||||
@@ -62,10 +62,10 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
|
||||
# It must have been preserved by PITR setting
|
||||
env.neon_cli.create_branch("test_pitr_gc_hundred", "main", ancestor_start_lsn=lsn_a)
|
||||
|
||||
pg_hundred = env.postgres.create_start("test_pitr_gc_hundred")
|
||||
endpoint_hundred = env.endpoints.create_start("test_pitr_gc_hundred")
|
||||
|
||||
# On the 'hundred' branch, we should see only 100 rows
|
||||
hundred_pg_conn = pg_hundred.connect()
|
||||
hundred_pg_conn = endpoint_hundred.connect()
|
||||
hundred_cur = hundred_pg_conn.cursor()
|
||||
hundred_cur.execute("SELECT count(*) FROM foo")
|
||||
assert hundred_cur.fetchone() == (100,)
|
||||
|
||||
@@ -21,22 +21,22 @@ def test_read_request_tracing(neon_env_builder: NeonEnvBuilder):
|
||||
)
|
||||
|
||||
timeline = env.neon_cli.create_timeline("test_trace_replay", tenant_id=tenant)
|
||||
pg = env.postgres.create_start("test_trace_replay", "main", tenant)
|
||||
endpoint = env.endpoints.create_start("test_trace_replay", "main", tenant)
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("create table t (i integer);")
|
||||
cur.execute(f"insert into t values (generate_series(1,{10000}));")
|
||||
cur.execute("select count(*) from t;")
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
# wait until pageserver receives that data
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, current_lsn)
|
||||
|
||||
# Stop pg so we drop the connection and flush the traces
|
||||
pg.stop()
|
||||
# Stop postgres so we drop the connection and flush the traces
|
||||
endpoint.stop()
|
||||
|
||||
trace_path = env.repo_dir / "traces" / str(tenant) / str(timeline)
|
||||
assert trace_path.exists()
|
||||
|
||||
@@ -17,10 +17,10 @@ def test_read_validation(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_read_validation", "empty")
|
||||
|
||||
pg = env.postgres.create_start("test_read_validation")
|
||||
endpoint = env.endpoints.create_start("test_read_validation")
|
||||
log.info("postgres is running on 'test_read_validation' branch")
|
||||
|
||||
with closing(pg.connect()) as con:
|
||||
with closing(endpoint.connect()) as con:
|
||||
with con.cursor() as c:
|
||||
for e in extensions:
|
||||
c.execute("create extension if not exists {};".format(e))
|
||||
@@ -144,10 +144,10 @@ def test_read_validation_neg(neon_simple_env: NeonEnv):
|
||||
|
||||
env.pageserver.allowed_errors.append(".*invalid LSN\\(0\\) in request.*")
|
||||
|
||||
pg = env.postgres.create_start("test_read_validation_neg")
|
||||
endpoint = env.endpoints.create_start("test_read_validation_neg")
|
||||
log.info("postgres is running on 'test_read_validation_neg' branch")
|
||||
|
||||
with closing(pg.connect()) as con:
|
||||
with closing(endpoint.connect()) as con:
|
||||
with con.cursor() as c:
|
||||
for e in extensions:
|
||||
c.execute("create extension if not exists {};".format(e))
|
||||
|
||||
@@ -15,12 +15,12 @@ from fixtures.utils import query_scalar
|
||||
def test_readonly_node(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_readonly_node", "empty")
|
||||
pgmain = env.postgres.create_start("test_readonly_node")
|
||||
endpoint_main = env.endpoints.create_start("test_readonly_node")
|
||||
log.info("postgres is running on 'test_readonly_node' branch")
|
||||
|
||||
env.pageserver.allowed_errors.append(".*basebackup .* failed: invalid basebackup lsn.*")
|
||||
|
||||
main_pg_conn = pgmain.connect()
|
||||
main_pg_conn = endpoint_main.connect()
|
||||
main_cur = main_pg_conn.cursor()
|
||||
|
||||
# Create table, and insert the first 100 rows
|
||||
@@ -61,23 +61,23 @@ def test_readonly_node(neon_simple_env: NeonEnv):
|
||||
log.info("LSN after 400100 rows: " + lsn_c)
|
||||
|
||||
# Create first read-only node at the point where only 100 rows were inserted
|
||||
pg_hundred = env.postgres.create_start(
|
||||
branch_name="test_readonly_node", node_name="test_readonly_node_hundred", lsn=lsn_a
|
||||
endpoint_hundred = env.endpoints.create_start(
|
||||
branch_name="test_readonly_node", endpoint_id="ep-readonly_node_hundred", lsn=lsn_a
|
||||
)
|
||||
|
||||
# And another at the point where 200100 rows were inserted
|
||||
pg_more = env.postgres.create_start(
|
||||
branch_name="test_readonly_node", node_name="test_readonly_node_more", lsn=lsn_b
|
||||
endpoint_more = env.endpoints.create_start(
|
||||
branch_name="test_readonly_node", endpoint_id="ep-readonly_node_more", lsn=lsn_b
|
||||
)
|
||||
|
||||
# On the 'hundred' node, we should see only 100 rows
|
||||
hundred_pg_conn = pg_hundred.connect()
|
||||
hundred_pg_conn = endpoint_hundred.connect()
|
||||
hundred_cur = hundred_pg_conn.cursor()
|
||||
hundred_cur.execute("SELECT count(*) FROM foo")
|
||||
assert hundred_cur.fetchone() == (100,)
|
||||
|
||||
# On the 'more' node, we should see 100200 rows
|
||||
more_pg_conn = pg_more.connect()
|
||||
more_pg_conn = endpoint_more.connect()
|
||||
more_cur = more_pg_conn.cursor()
|
||||
more_cur.execute("SELECT count(*) FROM foo")
|
||||
assert more_cur.fetchone() == (200100,)
|
||||
@@ -87,21 +87,21 @@ def test_readonly_node(neon_simple_env: NeonEnv):
|
||||
assert main_cur.fetchone() == (400100,)
|
||||
|
||||
# Check creating a node at segment boundary
|
||||
pg = env.postgres.create_start(
|
||||
endpoint = env.endpoints.create_start(
|
||||
branch_name="test_readonly_node",
|
||||
node_name="test_branch_segment_boundary",
|
||||
endpoint_id="ep-branch_segment_boundary",
|
||||
lsn=Lsn("0/3000000"),
|
||||
)
|
||||
cur = pg.connect().cursor()
|
||||
cur = endpoint.connect().cursor()
|
||||
cur.execute("SELECT 1")
|
||||
assert cur.fetchone() == (1,)
|
||||
|
||||
# Create node at pre-initdb lsn
|
||||
with pytest.raises(Exception, match="invalid basebackup lsn"):
|
||||
# compute node startup with invalid LSN should fail
|
||||
env.postgres.create_start(
|
||||
env.endpoints.create_start(
|
||||
branch_name="test_readonly_node",
|
||||
node_name="test_readonly_node_preinitdb",
|
||||
endpoint_id="ep-readonly_node_preinitdb",
|
||||
lsn=Lsn("0/42"),
|
||||
)
|
||||
|
||||
@@ -111,16 +111,16 @@ def test_timetravel(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
pageserver_http_client = env.pageserver.http_client()
|
||||
env.neon_cli.create_branch("test_timetravel", "empty")
|
||||
pg = env.postgres.create_start("test_timetravel")
|
||||
endpoint = env.endpoints.create_start("test_timetravel")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
|
||||
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
|
||||
|
||||
lsns = []
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
CREATE TABLE testtab(id serial primary key, iteration int, data text);
|
||||
@@ -131,7 +131,7 @@ def test_timetravel(neon_simple_env: NeonEnv):
|
||||
lsns.append((0, current_lsn))
|
||||
|
||||
for i in range(1, 5):
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(f"UPDATE testtab SET iteration = {i}")
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
lsns.append((i, current_lsn))
|
||||
@@ -143,14 +143,14 @@ def test_timetravel(neon_simple_env: NeonEnv):
|
||||
pageserver_http_client.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
##### Restart pageserver
|
||||
env.postgres.stop_all()
|
||||
env.endpoints.stop_all()
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
|
||||
for i, lsn in lsns:
|
||||
pg_old = env.postgres.create_start(
|
||||
branch_name="test_timetravel", node_name=f"test_old_lsn_{i}", lsn=lsn
|
||||
endpoint_old = env.endpoints.create_start(
|
||||
branch_name="test_timetravel", endpoint_id=f"ep-old_lsn_{i}", lsn=lsn
|
||||
)
|
||||
with pg_old.cursor() as cur:
|
||||
with endpoint_old.cursor() as cur:
|
||||
assert query_scalar(cur, f"select count(*) from testtab where iteration={i}") == 100000
|
||||
assert query_scalar(cur, f"select count(*) from testtab where iteration<>{i}") == 0
|
||||
|
||||
@@ -22,10 +22,10 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
|
||||
# Create a branch for us
|
||||
env.neon_cli.create_branch("test_pageserver_recovery", "main")
|
||||
|
||||
pg = env.postgres.create_start("test_pageserver_recovery")
|
||||
endpoint = env.endpoints.create_start("test_pageserver_recovery")
|
||||
log.info("postgres is running on 'test_pageserver_recovery' branch")
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
with env.pageserver.http_client() as pageserver_http:
|
||||
# Create and initialize test table
|
||||
@@ -54,7 +54,7 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("select count(*) from foo")
|
||||
assert cur.fetchone() == (100000,)
|
||||
|
||||
@@ -87,17 +87,17 @@ def test_remote_storage_backup_and_restore(
|
||||
env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
checkpoint_numbers = range(1, 3)
|
||||
|
||||
for checkpoint_number in checkpoint_numbers:
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""
|
||||
CREATE TABLE t{checkpoint_number}(id int primary key, data text);
|
||||
@@ -126,7 +126,7 @@ def test_remote_storage_backup_and_restore(
|
||||
)
|
||||
|
||||
##### Stop the first pageserver instance, erase all its data
|
||||
env.postgres.stop_all()
|
||||
env.endpoints.stop_all()
|
||||
env.pageserver.stop()
|
||||
|
||||
dir_to_clear = Path(env.repo_dir) / "tenants"
|
||||
@@ -187,8 +187,8 @@ def test_remote_storage_backup_and_restore(
|
||||
), "current db Lsn should should not be less than the one stored on remote storage"
|
||||
|
||||
log.info("select some data, this will cause layers to be downloaded")
|
||||
pg = env.postgres.create_start("main")
|
||||
with pg.cursor() as cur:
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
with endpoint.cursor() as cur:
|
||||
for checkpoint_number in checkpoint_numbers:
|
||||
assert (
|
||||
query_scalar(cur, f"SELECT data FROM t{checkpoint_number} WHERE id = {data_id};")
|
||||
@@ -238,9 +238,9 @@ def test_remote_storage_upload_queue_retries(
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
|
||||
pg.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
|
||||
endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
|
||||
|
||||
def configure_storage_sync_failpoints(action):
|
||||
client.configure_failpoints(
|
||||
@@ -253,7 +253,7 @@ def test_remote_storage_upload_queue_retries(
|
||||
|
||||
def overwrite_data_and_wait_for_it_to_arrive_at_pageserver(data):
|
||||
# create initial set of layers & upload them with failpoints configured
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
f"""
|
||||
INSERT INTO foo (id, val)
|
||||
@@ -266,7 +266,7 @@ def test_remote_storage_upload_queue_retries(
|
||||
"VACUUM foo",
|
||||
]
|
||||
)
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
def get_queued_count(file_kind, op_kind):
|
||||
val = client.get_remote_timeline_client_metric(
|
||||
@@ -343,7 +343,7 @@ def test_remote_storage_upload_queue_retries(
|
||||
# but how do we validate the result after restore?
|
||||
|
||||
env.pageserver.stop(immediate=True)
|
||||
env.postgres.stop_all()
|
||||
env.endpoints.stop_all()
|
||||
|
||||
dir_to_clear = Path(env.repo_dir) / "tenants"
|
||||
shutil.rmtree(dir_to_clear)
|
||||
@@ -357,8 +357,8 @@ def test_remote_storage_upload_queue_retries(
|
||||
wait_until_tenant_active(client, tenant_id)
|
||||
|
||||
log.info("restarting postgres to validate")
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
with pg.cursor() as cur:
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
with endpoint.cursor() as cur:
|
||||
assert query_scalar(cur, "SELECT COUNT(*) FROM foo WHERE val = 'd'") == 10000
|
||||
|
||||
|
||||
@@ -394,13 +394,13 @@ def test_remote_timeline_client_calls_started_metric(
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
|
||||
pg.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
|
||||
endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
|
||||
|
||||
def overwrite_data_and_wait_for_it_to_arrive_at_pageserver(data):
|
||||
# create initial set of layers & upload them with failpoints configured
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
f"""
|
||||
INSERT INTO foo (id, val)
|
||||
@@ -413,7 +413,7 @@ def test_remote_timeline_client_calls_started_metric(
|
||||
"VACUUM foo",
|
||||
]
|
||||
)
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
calls_started: Dict[Tuple[str, str], List[int]] = {
|
||||
("layer", "upload"): [0],
|
||||
@@ -478,7 +478,7 @@ def test_remote_timeline_client_calls_started_metric(
|
||||
)
|
||||
|
||||
env.pageserver.stop(immediate=True)
|
||||
env.postgres.stop_all()
|
||||
env.endpoints.stop_all()
|
||||
|
||||
dir_to_clear = Path(env.repo_dir) / "tenants"
|
||||
shutil.rmtree(dir_to_clear)
|
||||
@@ -492,8 +492,8 @@ def test_remote_timeline_client_calls_started_metric(
|
||||
wait_until_tenant_active(client, tenant_id)
|
||||
|
||||
log.info("restarting postgres to validate")
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
with pg.cursor() as cur:
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
with endpoint.cursor() as cur:
|
||||
assert query_scalar(cur, "SELECT COUNT(*) FROM foo WHERE val = 'd'") == 10000
|
||||
|
||||
# ensure that we updated the calls_started download metric
|
||||
@@ -543,17 +543,17 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue(
|
||||
)
|
||||
return int(val) if val is not None else val
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
|
||||
client.configure_failpoints(("before-upload-layer", "return"))
|
||||
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo (x INTEGER)",
|
||||
"INSERT INTO foo SELECT g FROM generate_series(1, 10000) g",
|
||||
]
|
||||
)
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
# Kick off a checkpoint operation.
|
||||
# It will get stuck in remote_client.wait_completion(), since the select query will have
|
||||
@@ -627,8 +627,8 @@ def test_empty_branch_remote_storage_upload(
|
||||
new_branch_name = "new_branch"
|
||||
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
|
||||
|
||||
with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg:
|
||||
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id)
|
||||
with env.endpoints.create_start(new_branch_name, tenant_id=env.initial_tenant) as endpoint:
|
||||
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_branch_timeline_id)
|
||||
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
|
||||
|
||||
timelines_before_detach = set(
|
||||
@@ -676,8 +676,8 @@ def test_empty_branch_remote_storage_upload_on_restart(
|
||||
new_branch_name = "new_branch"
|
||||
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
|
||||
|
||||
with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg:
|
||||
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id)
|
||||
with env.endpoints.create_start(new_branch_name, tenant_id=env.initial_tenant) as endpoint:
|
||||
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_branch_timeline_id)
|
||||
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
|
||||
|
||||
env.pageserver.stop()
|
||||
|
||||
@@ -11,10 +11,10 @@ from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
|
||||
def test_subxacts(neon_simple_env: NeonEnv, test_output_dir):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_subxacts", "empty")
|
||||
pg = env.postgres.create_start("test_subxacts")
|
||||
endpoint = env.endpoints.create_start("test_subxacts")
|
||||
|
||||
log.info("postgres is running on 'test_subxacts' branch")
|
||||
pg_conn = pg.connect()
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
cur.execute(
|
||||
@@ -37,4 +37,4 @@ def test_subxacts(neon_simple_env: NeonEnv, test_output_dir):
|
||||
cur.execute("checkpoint")
|
||||
|
||||
# Check that we can restore the content of the datadir correctly
|
||||
check_restored_datadir_content(test_output_dir, env, pg)
|
||||
check_restored_datadir_content(test_output_dir, env, endpoint)
|
||||
|
||||
@@ -43,11 +43,7 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}"""
|
||||
tenant, _ = env.neon_cli.create_tenant(conf=new_conf)
|
||||
|
||||
env.neon_cli.create_timeline("test_tenant_conf", tenant_id=tenant)
|
||||
env.postgres.create_start(
|
||||
"test_tenant_conf",
|
||||
"main",
|
||||
tenant,
|
||||
)
|
||||
env.endpoints.create_start("test_tenant_conf", "main", tenant)
|
||||
|
||||
# check the configuration of the default tenant
|
||||
# it should match global configuration
|
||||
|
||||
@@ -7,9 +7,9 @@ import asyncpg
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
Endpoint,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
Postgres,
|
||||
RemoteStorageKind,
|
||||
available_remote_storages,
|
||||
)
|
||||
@@ -59,8 +59,8 @@ def test_tenant_reattach(
|
||||
# create new nenant
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
|
||||
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
|
||||
with pg.cursor() as cur:
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
@@ -99,8 +99,8 @@ def test_tenant_reattach(
|
||||
|
||||
assert pageserver_last_record_lsn_before_detach == pageserver_last_record_lsn
|
||||
|
||||
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
|
||||
with pg.cursor() as cur:
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||
with endpoint.cursor() as cur:
|
||||
assert query_scalar(cur, "SELECT count(*) FROM t") == 100000
|
||||
|
||||
# Check that we had to retry the downloads
|
||||
@@ -157,11 +157,11 @@ async def sleep_and_reattach(pageserver_http: PageserverHttpClient, tenant_id: T
|
||||
|
||||
# async guts of test_tenant_reattach_while_bysy test
|
||||
async def reattach_while_busy(
|
||||
env: NeonEnv, pg: Postgres, pageserver_http: PageserverHttpClient, tenant_id: TenantId
|
||||
env: NeonEnv, endpoint: Endpoint, pageserver_http: PageserverHttpClient, tenant_id: TenantId
|
||||
):
|
||||
workers = []
|
||||
for worker_id in range(num_connections):
|
||||
pg_conn = await pg.connect_async()
|
||||
pg_conn = await endpoint.connect_async()
|
||||
workers.append(asyncio.create_task(update_table(pg_conn)))
|
||||
|
||||
workers.append(asyncio.create_task(sleep_and_reattach(pageserver_http, tenant_id)))
|
||||
@@ -238,15 +238,15 @@ def test_tenant_reattach_while_busy(
|
||||
conf={"checkpoint_distance": "100000"}
|
||||
)
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
|
||||
cur = pg.connect().cursor()
|
||||
cur = endpoint.connect().cursor()
|
||||
|
||||
cur.execute("CREATE TABLE t(id int primary key, counter int)")
|
||||
cur.execute(f"INSERT INTO t SELECT generate_series(1,{num_rows}), 0")
|
||||
|
||||
# Run the test
|
||||
asyncio.run(reattach_while_busy(env, pg, pageserver_http, tenant_id))
|
||||
asyncio.run(reattach_while_busy(env, endpoint, pageserver_http, tenant_id))
|
||||
|
||||
# Verify table contents
|
||||
assert query_scalar(cur, "SELECT count(*) FROM t") == num_rows
|
||||
@@ -278,9 +278,9 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
# assert tenant exists on disk
|
||||
assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
# we rely upon autocommit after each statement
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
queries=[
|
||||
"CREATE TABLE t(key int primary key, value text)",
|
||||
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
|
||||
@@ -339,9 +339,9 @@ def test_tenant_detach_ignored_tenant(neon_simple_env: NeonEnv):
|
||||
# assert tenant exists on disk
|
||||
assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
# we rely upon autocommit after each statement
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
queries=[
|
||||
"CREATE TABLE t(key int primary key, value text)",
|
||||
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
|
||||
@@ -388,9 +388,9 @@ def test_tenant_detach_regular_tenant(neon_simple_env: NeonEnv):
|
||||
# assert tenant exists on disk
|
||||
assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
# we rely upon autocommit after each statement
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
queries=[
|
||||
"CREATE TABLE t(key int primary key, value text)",
|
||||
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
|
||||
@@ -425,18 +425,18 @@ def test_detach_while_attaching(
|
||||
##### First start, insert secret data and upload it to the remote storage
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
# Create table, and insert some rows. Make it big enough that it doesn't fit in
|
||||
# shared_buffers, otherwise the SELECT after restart will just return answer
|
||||
# from shared_buffers without hitting the page server, which defeats the point
|
||||
# of this test.
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE TABLE foo (t text)")
|
||||
cur.execute(
|
||||
"""
|
||||
@@ -477,7 +477,7 @@ def test_detach_while_attaching(
|
||||
# cycle are still running, things could get really confusing..
|
||||
pageserver_http.tenant_attach(tenant_id)
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("SELECT COUNT(*) FROM foo")
|
||||
|
||||
|
||||
@@ -572,14 +572,14 @@ def test_ignored_tenant_download_missing_layers(
|
||||
)
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
data_id = 1
|
||||
data_secret = "very secret secret"
|
||||
insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, pg)
|
||||
insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, endpoint)
|
||||
|
||||
tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()]
|
||||
tenants_before_ignore.sort()
|
||||
@@ -611,9 +611,9 @@ def test_ignored_tenant_download_missing_layers(
|
||||
]
|
||||
assert timelines_before_ignore == timelines_after_ignore, "Should have all timelines back"
|
||||
|
||||
pg.stop()
|
||||
pg.start()
|
||||
ensure_test_data(data_id, data_secret, pg)
|
||||
endpoint.stop()
|
||||
endpoint.start()
|
||||
ensure_test_data(data_id, data_secret, endpoint)
|
||||
|
||||
|
||||
# Tests that it's possible to `load` broken tenants:
|
||||
@@ -631,10 +631,10 @@ def test_ignored_tenant_stays_broken_without_metadata(
|
||||
)
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
# ignore the tenant and remove its metadata
|
||||
pageserver_http.tenant_ignore(tenant_id)
|
||||
@@ -666,9 +666,9 @@ def test_load_attach_negatives(
|
||||
)
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
|
||||
env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
|
||||
with pytest.raises(
|
||||
@@ -707,16 +707,16 @@ def test_ignore_while_attaching(
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
data_id = 1
|
||||
data_secret = "very secret secret"
|
||||
insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, pg)
|
||||
insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, endpoint)
|
||||
|
||||
tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()]
|
||||
|
||||
@@ -754,9 +754,9 @@ def test_ignore_while_attaching(
|
||||
|
||||
wait_until_tenant_state(pageserver_http, tenant_id, "Active", 5)
|
||||
|
||||
pg.stop()
|
||||
pg.start()
|
||||
ensure_test_data(data_id, data_secret, pg)
|
||||
endpoint.stop()
|
||||
endpoint.start()
|
||||
ensure_test_data(data_id, data_secret, endpoint)
|
||||
|
||||
|
||||
def insert_test_data(
|
||||
@@ -765,9 +765,9 @@ def insert_test_data(
|
||||
timeline_id: TimelineId,
|
||||
data_id: int,
|
||||
data: str,
|
||||
pg: Postgres,
|
||||
endpoint: Endpoint,
|
||||
):
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""
|
||||
CREATE TABLE test(id int primary key, secret text);
|
||||
@@ -787,8 +787,8 @@ def insert_test_data(
|
||||
wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn)
|
||||
|
||||
|
||||
def ensure_test_data(data_id: int, data: str, pg: Postgres):
|
||||
with pg.cursor() as cur:
|
||||
def ensure_test_data(data_id: int, data: str, endpoint: Endpoint):
|
||||
with endpoint.cursor() as cur:
|
||||
assert (
|
||||
query_scalar(cur, f"SELECT secret FROM test WHERE id = {data_id};") == data
|
||||
), "Should have timeline data back"
|
||||
|
||||
@@ -7,11 +7,11 @@ from typing import Any, Dict, Optional, Tuple
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
Endpoint,
|
||||
NeonBroker,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
PortDistributor,
|
||||
Postgres,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverHttpClient
|
||||
from fixtures.pageserver.utils import (
|
||||
@@ -87,20 +87,20 @@ def new_pageserver_service(
|
||||
|
||||
|
||||
@contextmanager
|
||||
def pg_cur(pg):
|
||||
with closing(pg.connect()) as conn:
|
||||
def pg_cur(endpoint):
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
yield cur
|
||||
|
||||
|
||||
def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Event):
|
||||
def load(endpoint: Endpoint, stop_event: threading.Event, load_ok_event: threading.Event):
|
||||
log.info("load started")
|
||||
|
||||
inserted_ctr = 0
|
||||
failed = False
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
with pg_cur(pg) as cur:
|
||||
with pg_cur(endpoint) as cur:
|
||||
cur.execute("INSERT INTO load VALUES ('some payload')")
|
||||
inserted_ctr += 1
|
||||
except: # noqa: E722
|
||||
@@ -110,7 +110,7 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
|
||||
load_ok_event.clear()
|
||||
else:
|
||||
if failed:
|
||||
with pg_cur(pg) as cur:
|
||||
with pg_cur(endpoint) as cur:
|
||||
# if we recovered after failure verify that we have correct number of rows
|
||||
log.info("recovering at %s", inserted_ctr)
|
||||
cur.execute("SELECT count(*) FROM load")
|
||||
@@ -124,14 +124,14 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
|
||||
|
||||
|
||||
def populate_branch(
|
||||
pg: Postgres,
|
||||
endpoint: Endpoint,
|
||||
tenant_id: TenantId,
|
||||
ps_http: PageserverHttpClient,
|
||||
create_table: bool,
|
||||
expected_sum: Optional[int],
|
||||
) -> Tuple[TimelineId, Lsn]:
|
||||
# insert some data
|
||||
with pg_cur(pg) as cur:
|
||||
with pg_cur(endpoint) as cur:
|
||||
cur.execute("SHOW neon.timeline_id")
|
||||
timeline_id = TimelineId(cur.fetchone()[0])
|
||||
log.info("timeline to relocate %s", timeline_id)
|
||||
@@ -196,19 +196,19 @@ def check_timeline_attached(
|
||||
|
||||
def switch_pg_to_new_pageserver(
|
||||
env: NeonEnv,
|
||||
pg: Postgres,
|
||||
endpoint: Endpoint,
|
||||
new_pageserver_port: int,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Path:
|
||||
pg.stop()
|
||||
endpoint.stop()
|
||||
|
||||
pg_config_file_path = Path(pg.config_file_path())
|
||||
pg_config_file_path = Path(endpoint.config_file_path())
|
||||
pg_config_file_path.open("a").write(
|
||||
f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_port}'"
|
||||
)
|
||||
|
||||
pg.start()
|
||||
endpoint.start()
|
||||
|
||||
timeline_to_detach_local_path = (
|
||||
env.repo_dir / "tenants" / str(tenant_id) / "timelines" / str(timeline_id)
|
||||
@@ -226,8 +226,8 @@ def switch_pg_to_new_pageserver(
|
||||
return timeline_to_detach_local_path
|
||||
|
||||
|
||||
def post_migration_check(pg: Postgres, sum_before_migration: int, old_local_path: Path):
|
||||
with pg_cur(pg) as cur:
|
||||
def post_migration_check(endpoint: Endpoint, sum_before_migration: int, old_local_path: Path):
|
||||
with pg_cur(endpoint) as cur:
|
||||
# check that data is still there
|
||||
cur.execute("SELECT sum(key) FROM t")
|
||||
assert cur.fetchone() == (sum_before_migration,)
|
||||
@@ -288,12 +288,12 @@ def test_tenant_relocation(
|
||||
log.info("tenant to relocate %s initial_timeline_id %s", tenant_id, initial_timeline_id)
|
||||
|
||||
env.neon_cli.create_branch("test_tenant_relocation_main", tenant_id=tenant_id)
|
||||
pg_main = env.postgres.create_start(
|
||||
ep_main = env.endpoints.create_start(
|
||||
branch_name="test_tenant_relocation_main", tenant_id=tenant_id
|
||||
)
|
||||
|
||||
timeline_id_main, current_lsn_main = populate_branch(
|
||||
pg_main,
|
||||
ep_main,
|
||||
tenant_id=tenant_id,
|
||||
ps_http=pageserver_http,
|
||||
create_table=True,
|
||||
@@ -306,12 +306,12 @@ def test_tenant_relocation(
|
||||
ancestor_start_lsn=current_lsn_main,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
pg_second = env.postgres.create_start(
|
||||
ep_second = env.endpoints.create_start(
|
||||
branch_name="test_tenant_relocation_second", tenant_id=tenant_id
|
||||
)
|
||||
|
||||
timeline_id_second, current_lsn_second = populate_branch(
|
||||
pg_second,
|
||||
ep_second,
|
||||
tenant_id=tenant_id,
|
||||
ps_http=pageserver_http,
|
||||
create_table=False,
|
||||
@@ -327,14 +327,14 @@ def test_tenant_relocation(
|
||||
|
||||
if with_load == "with_load":
|
||||
# create load table
|
||||
with pg_cur(pg_main) as cur:
|
||||
with pg_cur(ep_main) as cur:
|
||||
cur.execute("CREATE TABLE load(value text)")
|
||||
|
||||
load_stop_event = threading.Event()
|
||||
load_ok_event = threading.Event()
|
||||
load_thread = threading.Thread(
|
||||
target=load,
|
||||
args=(pg_main, load_stop_event, load_ok_event),
|
||||
args=(ep_main, load_stop_event, load_ok_event),
|
||||
daemon=True, # To make sure the child dies when the parent errors
|
||||
)
|
||||
load_thread.start()
|
||||
@@ -450,7 +450,7 @@ def test_tenant_relocation(
|
||||
|
||||
old_local_path_main = switch_pg_to_new_pageserver(
|
||||
env,
|
||||
pg_main,
|
||||
ep_main,
|
||||
new_pageserver_pg_port,
|
||||
tenant_id,
|
||||
timeline_id_main,
|
||||
@@ -458,7 +458,7 @@ def test_tenant_relocation(
|
||||
|
||||
old_local_path_second = switch_pg_to_new_pageserver(
|
||||
env,
|
||||
pg_second,
|
||||
ep_second,
|
||||
new_pageserver_pg_port,
|
||||
tenant_id,
|
||||
timeline_id_second,
|
||||
@@ -475,11 +475,11 @@ def test_tenant_relocation(
|
||||
interval=1,
|
||||
func=lambda: tenant_exists(pageserver_http, tenant_id),
|
||||
)
|
||||
post_migration_check(pg_main, 500500, old_local_path_main)
|
||||
post_migration_check(pg_second, 1001000, old_local_path_second)
|
||||
post_migration_check(ep_main, 500500, old_local_path_main)
|
||||
post_migration_check(ep_second, 1001000, old_local_path_second)
|
||||
|
||||
# ensure that we can successfully read all relations on the new pageserver
|
||||
with pg_cur(pg_second) as cur:
|
||||
with pg_cur(ep_second) as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
DO $$
|
||||
|
||||
@@ -4,9 +4,9 @@ from typing import List, Tuple
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
Endpoint,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
Postgres,
|
||||
wait_for_last_flush_lsn,
|
||||
wait_for_wal_insert_lsn,
|
||||
)
|
||||
@@ -28,12 +28,12 @@ def test_empty_tenant_size(neon_simple_env: NeonEnv, test_output_dir: Path):
|
||||
branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0]
|
||||
assert branch_name == main_branch_name
|
||||
|
||||
with env.postgres.create_start(
|
||||
with env.endpoints.create_start(
|
||||
main_branch_name,
|
||||
tenant_id=tenant_id,
|
||||
config_lines=["autovacuum=off", "checkpoint_timeout=10min"],
|
||||
) as pg:
|
||||
with pg.cursor() as cur:
|
||||
) as endpoint:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("SELECT 1")
|
||||
row = cur.fetchone()
|
||||
assert row is not None
|
||||
@@ -105,12 +105,12 @@ def test_branched_empty_timeline_size(neon_simple_env: NeonEnv, test_output_dir:
|
||||
|
||||
first_branch_timeline_id = env.neon_cli.create_branch("first-branch", tenant_id=tenant_id)
|
||||
|
||||
with env.postgres.create_start("first-branch", tenant_id=tenant_id) as pg:
|
||||
with pg.cursor() as cur:
|
||||
with env.endpoints.create_start("first-branch", tenant_id=tenant_id) as endpoint:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
"CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)"
|
||||
)
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, first_branch_timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, first_branch_timeline_id)
|
||||
|
||||
size_after_branching = http_client.tenant_size(tenant_id)
|
||||
log.info(f"size_after_branching: {size_after_branching}")
|
||||
@@ -164,12 +164,12 @@ def test_branched_from_many_empty_parents_size(neon_simple_env: NeonEnv, test_ou
|
||||
|
||||
assert last_branch is not None
|
||||
|
||||
with env.postgres.create_start(last_branch_name, tenant_id=tenant_id) as pg:
|
||||
with pg.cursor() as cur:
|
||||
with env.endpoints.create_start(last_branch_name, tenant_id=tenant_id) as endpoint:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
"CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)"
|
||||
)
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, last_branch)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, last_branch)
|
||||
|
||||
size_after_writes = http_client.tenant_size(tenant_id)
|
||||
assert size_after_writes > initial_size
|
||||
@@ -194,11 +194,11 @@ def test_branch_point_within_horizon(neon_simple_env: NeonEnv, test_output_dir:
|
||||
(tenant_id, main_id) = env.neon_cli.create_tenant(conf={"gc_horizon": str(gc_horizon)})
|
||||
http_client = env.pageserver.http_client()
|
||||
|
||||
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
|
||||
initdb_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id)
|
||||
with pg.cursor() as cur:
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||
initdb_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id)
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000) s(i)")
|
||||
flushed_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id)
|
||||
flushed_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id)
|
||||
|
||||
size_before_branching = http_client.tenant_size(tenant_id)
|
||||
|
||||
@@ -208,10 +208,10 @@ def test_branch_point_within_horizon(neon_simple_env: NeonEnv, test_output_dir:
|
||||
"branch", tenant_id=tenant_id, ancestor_start_lsn=flushed_lsn
|
||||
)
|
||||
|
||||
with env.postgres.create_start("branch", tenant_id=tenant_id) as pg:
|
||||
with pg.cursor() as cur:
|
||||
with env.endpoints.create_start("branch", tenant_id=tenant_id) as endpoint:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 1000) s(i)")
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, branch_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, branch_id)
|
||||
|
||||
size_after = http_client.tenant_size(tenant_id)
|
||||
|
||||
@@ -237,17 +237,17 @@ def test_parent_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path):
|
||||
(tenant_id, main_id) = env.neon_cli.create_tenant(conf={"gc_horizon": str(gc_horizon)})
|
||||
http_client = env.pageserver.http_client()
|
||||
|
||||
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
|
||||
initdb_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id)
|
||||
with pg.cursor() as cur:
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||
initdb_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id)
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000) s(i)")
|
||||
|
||||
flushed_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id)
|
||||
flushed_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id)
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t00 AS SELECT i::bigint n FROM generate_series(0, 2000) s(i)")
|
||||
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, main_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id)
|
||||
|
||||
size_before_branching = http_client.tenant_size(tenant_id)
|
||||
|
||||
@@ -257,10 +257,10 @@ def test_parent_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path):
|
||||
"branch", tenant_id=tenant_id, ancestor_start_lsn=flushed_lsn
|
||||
)
|
||||
|
||||
with env.postgres.create_start("branch", tenant_id=tenant_id) as pg:
|
||||
with pg.cursor() as cur:
|
||||
with env.endpoints.create_start("branch", tenant_id=tenant_id) as endpoint:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 10000) s(i)")
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, branch_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, branch_id)
|
||||
|
||||
size_after = http_client.tenant_size(tenant_id)
|
||||
|
||||
@@ -297,12 +297,12 @@ def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Pa
|
||||
# gc is not expected to change the results
|
||||
|
||||
for branch_name, amount in [("main", 2000), ("first", 15000), ("second", 3000)]:
|
||||
with env.postgres.create_start(branch_name, tenant_id=tenant_id) as pg:
|
||||
with pg.cursor() as cur:
|
||||
with env.endpoints.create_start(branch_name, tenant_id=tenant_id) as endpoint:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
f"CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, {amount}) s(i)"
|
||||
)
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, ids[branch_name])
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, ids[branch_name])
|
||||
size_now = http_client.tenant_size(tenant_id)
|
||||
if latest_size is not None:
|
||||
assert size_now > latest_size
|
||||
@@ -359,7 +359,7 @@ def test_single_branch_get_tenant_size_grows(
|
||||
|
||||
def get_current_consistent_size(
|
||||
env: NeonEnv,
|
||||
pg: Postgres,
|
||||
endpoint: Endpoint,
|
||||
size_debug_file, # apparently there is no public signature for open()...
|
||||
http_client: PageserverHttpClient,
|
||||
tenant_id: TenantId,
|
||||
@@ -368,7 +368,7 @@ def test_single_branch_get_tenant_size_grows(
|
||||
consistent = False
|
||||
size_debug = None
|
||||
|
||||
current_lsn = wait_for_wal_insert_lsn(env, pg, tenant_id, timeline_id)
|
||||
current_lsn = wait_for_wal_insert_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
# We want to make sure we have a self-consistent set of values.
|
||||
# Size changes with WAL, so only if both before and after getting
|
||||
# the size of the tenant reports the same WAL insert LSN, we're OK
|
||||
@@ -382,35 +382,35 @@ def test_single_branch_get_tenant_size_grows(
|
||||
size, sizes = http_client.tenant_size_and_modelinputs(tenant_id)
|
||||
size_debug = http_client.tenant_size_debug(tenant_id)
|
||||
|
||||
after_lsn = wait_for_wal_insert_lsn(env, pg, tenant_id, timeline_id)
|
||||
after_lsn = wait_for_wal_insert_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
consistent = current_lsn == after_lsn
|
||||
current_lsn = after_lsn
|
||||
size_debug_file.write(size_debug)
|
||||
return (current_lsn, size)
|
||||
|
||||
with env.postgres.create_start(
|
||||
with env.endpoints.create_start(
|
||||
branch_name,
|
||||
tenant_id=tenant_id,
|
||||
### autovacuum is disabled to limit WAL logging.
|
||||
config_lines=["autovacuum=off"],
|
||||
) as pg:
|
||||
) as endpoint:
|
||||
(initdb_lsn, size) = get_current_consistent_size(
|
||||
env, pg, size_debug_file, http_client, tenant_id, timeline_id
|
||||
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
|
||||
)
|
||||
collected_responses.append(("INITDB", initdb_lsn, size))
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t0 (i BIGINT NOT NULL) WITH (fillfactor = 40)")
|
||||
|
||||
(current_lsn, size) = get_current_consistent_size(
|
||||
env, pg, size_debug_file, http_client, tenant_id, timeline_id
|
||||
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
|
||||
)
|
||||
collected_responses.append(("CREATE", current_lsn, size))
|
||||
|
||||
batch_size = 100
|
||||
|
||||
for i in range(3):
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
f"INSERT INTO t0(i) SELECT i FROM generate_series({batch_size} * %s, ({batch_size} * (%s + 1)) - 1) s(i)",
|
||||
(i, i),
|
||||
@@ -419,7 +419,7 @@ def test_single_branch_get_tenant_size_grows(
|
||||
i += 1
|
||||
|
||||
(current_lsn, size) = get_current_consistent_size(
|
||||
env, pg, size_debug_file, http_client, tenant_id, timeline_id
|
||||
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
|
||||
)
|
||||
|
||||
prev_size = collected_responses[-1][2]
|
||||
@@ -438,7 +438,7 @@ def test_single_branch_get_tenant_size_grows(
|
||||
collected_responses.append(("INSERT", current_lsn, size))
|
||||
|
||||
while True:
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
f"UPDATE t0 SET i = -i WHERE i IN (SELECT i FROM t0 WHERE i > 0 LIMIT {batch_size})"
|
||||
)
|
||||
@@ -448,7 +448,7 @@ def test_single_branch_get_tenant_size_grows(
|
||||
break
|
||||
|
||||
(current_lsn, size) = get_current_consistent_size(
|
||||
env, pg, size_debug_file, http_client, tenant_id, timeline_id
|
||||
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
|
||||
)
|
||||
|
||||
prev_size = collected_responses[-1][2]
|
||||
@@ -458,7 +458,7 @@ def test_single_branch_get_tenant_size_grows(
|
||||
collected_responses.append(("UPDATE", current_lsn, size))
|
||||
|
||||
while True:
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(f"DELETE FROM t0 WHERE i IN (SELECT i FROM t0 LIMIT {batch_size})")
|
||||
deleted = cur.rowcount
|
||||
|
||||
@@ -466,7 +466,7 @@ def test_single_branch_get_tenant_size_grows(
|
||||
break
|
||||
|
||||
(current_lsn, size) = get_current_consistent_size(
|
||||
env, pg, size_debug_file, http_client, tenant_id, timeline_id
|
||||
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
|
||||
)
|
||||
|
||||
prev_size = collected_responses[-1][2]
|
||||
@@ -475,14 +475,14 @@ def test_single_branch_get_tenant_size_grows(
|
||||
|
||||
collected_responses.append(("DELETE", current_lsn, size))
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("DROP TABLE t0")
|
||||
|
||||
# The size of the tenant should still be as large as before we dropped
|
||||
# the table, because the drop operation can still be undone in the PITR
|
||||
# defined by gc_horizon.
|
||||
(current_lsn, size) = get_current_consistent_size(
|
||||
env, pg, size_debug_file, http_client, tenant_id, timeline_id
|
||||
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
|
||||
)
|
||||
|
||||
prev_size = collected_responses[-1][2]
|
||||
@@ -532,16 +532,16 @@ def test_get_tenant_size_with_multiple_branches(
|
||||
|
||||
http_client = env.pageserver.http_client()
|
||||
|
||||
main_pg = env.postgres.create_start(main_branch_name, tenant_id=tenant_id)
|
||||
main_endpoint = env.endpoints.create_start(main_branch_name, tenant_id=tenant_id)
|
||||
|
||||
batch_size = 10000
|
||||
|
||||
with main_pg.cursor() as cur:
|
||||
with main_endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
f"CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, {batch_size}) s(i)"
|
||||
)
|
||||
|
||||
wait_for_last_flush_lsn(env, main_pg, tenant_id, main_timeline_id)
|
||||
wait_for_last_flush_lsn(env, main_endpoint, tenant_id, main_timeline_id)
|
||||
size_at_branch = http_client.tenant_size(tenant_id)
|
||||
assert size_at_branch > 0
|
||||
|
||||
@@ -552,23 +552,23 @@ def test_get_tenant_size_with_multiple_branches(
|
||||
size_after_first_branch = http_client.tenant_size(tenant_id)
|
||||
assert size_after_first_branch == size_at_branch
|
||||
|
||||
first_branch_pg = env.postgres.create_start("first-branch", tenant_id=tenant_id)
|
||||
first_branch_endpoint = env.endpoints.create_start("first-branch", tenant_id=tenant_id)
|
||||
|
||||
with first_branch_pg.cursor() as cur:
|
||||
with first_branch_endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
f"CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, {batch_size}) s(i)"
|
||||
)
|
||||
|
||||
wait_for_last_flush_lsn(env, first_branch_pg, tenant_id, first_branch_timeline_id)
|
||||
wait_for_last_flush_lsn(env, first_branch_endpoint, tenant_id, first_branch_timeline_id)
|
||||
size_after_growing_first_branch = http_client.tenant_size(tenant_id)
|
||||
assert size_after_growing_first_branch > size_after_first_branch
|
||||
|
||||
with main_pg.cursor() as cur:
|
||||
with main_endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
f"CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 2*{batch_size}) s(i)"
|
||||
)
|
||||
|
||||
wait_for_last_flush_lsn(env, main_pg, tenant_id, main_timeline_id)
|
||||
wait_for_last_flush_lsn(env, main_endpoint, tenant_id, main_timeline_id)
|
||||
size_after_continuing_on_main = http_client.tenant_size(tenant_id)
|
||||
assert size_after_continuing_on_main > size_after_growing_first_branch
|
||||
|
||||
@@ -578,31 +578,31 @@ def test_get_tenant_size_with_multiple_branches(
|
||||
size_after_second_branch = http_client.tenant_size(tenant_id)
|
||||
assert size_after_second_branch == size_after_continuing_on_main
|
||||
|
||||
second_branch_pg = env.postgres.create_start("second-branch", tenant_id=tenant_id)
|
||||
second_branch_endpoint = env.endpoints.create_start("second-branch", tenant_id=tenant_id)
|
||||
|
||||
with second_branch_pg.cursor() as cur:
|
||||
with second_branch_endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
f"CREATE TABLE t2 AS SELECT i::bigint n FROM generate_series(0, 3*{batch_size}) s(i)"
|
||||
)
|
||||
|
||||
wait_for_last_flush_lsn(env, second_branch_pg, tenant_id, second_branch_timeline_id)
|
||||
wait_for_last_flush_lsn(env, second_branch_endpoint, tenant_id, second_branch_timeline_id)
|
||||
size_after_growing_second_branch = http_client.tenant_size(tenant_id)
|
||||
assert size_after_growing_second_branch > size_after_second_branch
|
||||
|
||||
with second_branch_pg.cursor() as cur:
|
||||
with second_branch_endpoint.cursor() as cur:
|
||||
cur.execute("DROP TABLE t0")
|
||||
cur.execute("DROP TABLE t1")
|
||||
cur.execute("VACUUM FULL")
|
||||
|
||||
wait_for_last_flush_lsn(env, second_branch_pg, tenant_id, second_branch_timeline_id)
|
||||
wait_for_last_flush_lsn(env, second_branch_endpoint, tenant_id, second_branch_timeline_id)
|
||||
size_after_thinning_branch = http_client.tenant_size(tenant_id)
|
||||
assert (
|
||||
size_after_thinning_branch > size_after_growing_second_branch
|
||||
), "tenant_size should grow with dropped tables and full vacuum"
|
||||
|
||||
first_branch_pg.stop_and_destroy()
|
||||
second_branch_pg.stop_and_destroy()
|
||||
main_pg.stop()
|
||||
first_branch_endpoint.stop_and_destroy()
|
||||
second_branch_endpoint.stop_and_destroy()
|
||||
main_endpoint.stop()
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
|
||||
|
||||
@@ -31,13 +31,13 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
|
||||
# Create tenant, start compute
|
||||
tenant, _ = env.neon_cli.create_tenant()
|
||||
env.neon_cli.create_timeline(name, tenant_id=tenant)
|
||||
pg = env.postgres.create_start(name, tenant_id=tenant)
|
||||
endpoint = env.endpoints.create_start(name, tenant_id=tenant)
|
||||
assert (
|
||||
get_state(tenant) == "Active"
|
||||
), "Pageserver should activate a tenant and start background jobs if timelines are loaded"
|
||||
|
||||
# Stop compute
|
||||
pg.stop()
|
||||
endpoint.stop()
|
||||
|
||||
# Delete all timelines on all tenants.
|
||||
#
|
||||
|
||||
@@ -66,17 +66,17 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder):
|
||||
env.neon_cli.create_timeline("test_tenants_normal_work", tenant_id=tenant_1)
|
||||
env.neon_cli.create_timeline("test_tenants_normal_work", tenant_id=tenant_2)
|
||||
|
||||
pg_tenant1 = env.postgres.create_start(
|
||||
endpoint_tenant1 = env.endpoints.create_start(
|
||||
"test_tenants_normal_work",
|
||||
tenant_id=tenant_1,
|
||||
)
|
||||
pg_tenant2 = env.postgres.create_start(
|
||||
endpoint_tenant2 = env.endpoints.create_start(
|
||||
"test_tenants_normal_work",
|
||||
tenant_id=tenant_2,
|
||||
)
|
||||
|
||||
for pg in [pg_tenant1, pg_tenant2]:
|
||||
with closing(pg.connect()) as conn:
|
||||
for endpoint in [endpoint_tenant1, endpoint_tenant2]:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
# we rely upon autocommit after each statement
|
||||
# as waiting for acceptors happens there
|
||||
@@ -97,11 +97,11 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
|
||||
timeline_1 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_1)
|
||||
timeline_2 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_2)
|
||||
|
||||
pg_tenant1 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_1)
|
||||
pg_tenant2 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_2)
|
||||
endpoint_tenant1 = env.endpoints.create_start("test_metrics_normal_work", tenant_id=tenant_1)
|
||||
endpoint_tenant2 = env.endpoints.create_start("test_metrics_normal_work", tenant_id=tenant_2)
|
||||
|
||||
for pg in [pg_tenant1, pg_tenant2]:
|
||||
with closing(pg.connect()) as conn:
|
||||
for endpoint in [endpoint_tenant1, endpoint_tenant2]:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
@@ -242,11 +242,15 @@ def test_pageserver_metrics_removed_after_detach(
|
||||
env.neon_cli.create_timeline("test_metrics_removed_after_detach", tenant_id=tenant_1)
|
||||
env.neon_cli.create_timeline("test_metrics_removed_after_detach", tenant_id=tenant_2)
|
||||
|
||||
pg_tenant1 = env.postgres.create_start("test_metrics_removed_after_detach", tenant_id=tenant_1)
|
||||
pg_tenant2 = env.postgres.create_start("test_metrics_removed_after_detach", tenant_id=tenant_2)
|
||||
endpoint_tenant1 = env.endpoints.create_start(
|
||||
"test_metrics_removed_after_detach", tenant_id=tenant_1
|
||||
)
|
||||
endpoint_tenant2 = env.endpoints.create_start(
|
||||
"test_metrics_removed_after_detach", tenant_id=tenant_2
|
||||
)
|
||||
|
||||
for pg in [pg_tenant1, pg_tenant2]:
|
||||
with closing(pg.connect()) as conn:
|
||||
for endpoint in [endpoint_tenant1, endpoint_tenant2]:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
@@ -317,7 +321,7 @@ def test_pageserver_with_empty_tenants(
|
||||
), f"Tenant {tenant_with_empty_timelines_dir} should have an empty timelines/ directory"
|
||||
|
||||
# Trigger timeline re-initialization after pageserver restart
|
||||
env.postgres.stop_all()
|
||||
env.endpoints.stop_all()
|
||||
env.pageserver.stop()
|
||||
|
||||
tenant_without_timelines_dir = env.initial_tenant
|
||||
|
||||
@@ -15,10 +15,10 @@ from typing import List, Tuple
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
Endpoint,
|
||||
LocalFsStorage,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
Postgres,
|
||||
RemoteStorageKind,
|
||||
available_remote_storages,
|
||||
wait_for_sk_commit_lsn_to_reach_remote_storage,
|
||||
@@ -32,10 +32,10 @@ from fixtures.types import Lsn, TenantId, TimelineId
|
||||
from fixtures.utils import query_scalar, wait_until
|
||||
|
||||
|
||||
async def tenant_workload(env: NeonEnv, pg: Postgres):
|
||||
async def tenant_workload(env: NeonEnv, endpoint: Endpoint):
|
||||
await env.pageserver.connect_async()
|
||||
|
||||
pg_conn = await pg.connect_async()
|
||||
pg_conn = await endpoint.connect_async()
|
||||
|
||||
await pg_conn.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
for i in range(1, 100):
|
||||
@@ -49,10 +49,10 @@ async def tenant_workload(env: NeonEnv, pg: Postgres):
|
||||
assert res == i * 1000
|
||||
|
||||
|
||||
async def all_tenants_workload(env: NeonEnv, tenants_pgs):
|
||||
async def all_tenants_workload(env: NeonEnv, tenants_endpoints):
|
||||
workers = []
|
||||
for _, pg in tenants_pgs:
|
||||
worker = tenant_workload(env, pg)
|
||||
for _, endpoint in tenants_endpoints:
|
||||
worker = tenant_workload(env, endpoint)
|
||||
workers.append(asyncio.create_task(worker))
|
||||
|
||||
# await all workers
|
||||
@@ -73,7 +73,7 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
|
||||
tenants_pgs: List[Tuple[TenantId, Postgres]] = []
|
||||
tenants_endpoints: List[Tuple[TenantId, Endpoint]] = []
|
||||
|
||||
for _ in range(1, 5):
|
||||
# Use a tiny checkpoint distance, to create a lot of layers quickly
|
||||
@@ -84,18 +84,18 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem
|
||||
)
|
||||
env.neon_cli.create_timeline("test_tenants_many", tenant_id=tenant)
|
||||
|
||||
pg = env.postgres.create_start(
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_tenants_many",
|
||||
tenant_id=tenant,
|
||||
)
|
||||
tenants_pgs.append((tenant, pg))
|
||||
tenants_endpoints.append((tenant, endpoint))
|
||||
|
||||
asyncio.run(all_tenants_workload(env, tenants_pgs))
|
||||
asyncio.run(all_tenants_workload(env, tenants_endpoints))
|
||||
|
||||
# Wait for the remote storage uploads to finish
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
for tenant, pg in tenants_pgs:
|
||||
res = pg.safe_psql_many(
|
||||
for tenant, endpoint in tenants_endpoints:
|
||||
res = endpoint.safe_psql_many(
|
||||
["SHOW neon.tenant_id", "SHOW neon.timeline_id", "SELECT pg_current_wal_flush_lsn()"]
|
||||
)
|
||||
tenant_id = TenantId(res[0][0][0])
|
||||
@@ -137,15 +137,15 @@ def test_tenants_attached_after_download(
|
||||
)
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
for checkpoint_number in range(1, 3):
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""
|
||||
CREATE TABLE t{checkpoint_number}(id int primary key, secret text);
|
||||
@@ -174,7 +174,7 @@ def test_tenants_attached_after_download(
|
||||
)
|
||||
|
||||
##### Stop the pageserver, erase its layer file to force it being downloaded from S3
|
||||
env.postgres.stop_all()
|
||||
env.endpoints.stop_all()
|
||||
|
||||
wait_for_sk_commit_lsn_to_reach_remote_storage(
|
||||
tenant_id, timeline_id, env.safekeepers, env.pageserver
|
||||
@@ -244,12 +244,12 @@ def test_tenant_redownloads_truncated_file_on_startup(
|
||||
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t1 AS VALUES (123, 'foobar');")
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
|
||||
@@ -257,7 +257,7 @@ def test_tenant_redownloads_truncated_file_on_startup(
|
||||
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn)
|
||||
|
||||
env.postgres.stop_all()
|
||||
env.endpoints.stop_all()
|
||||
env.pageserver.stop()
|
||||
|
||||
timeline_dir = Path(env.repo_dir) / "tenants" / str(tenant_id) / "timelines" / str(timeline_id)
|
||||
@@ -313,9 +313,9 @@ def test_tenant_redownloads_truncated_file_on_startup(
|
||||
os.stat(remote_layer_path).st_size == expected_size
|
||||
), "truncated file should not had been uploaded around re-download"
|
||||
|
||||
pg = env.postgres.create_start("main")
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
with pg.cursor() as cur:
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("INSERT INTO t1 VALUES (234, 'test data');")
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
|
||||
|
||||
@@ -12,11 +12,11 @@ import psycopg2.extras
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
Endpoint,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
PgBin,
|
||||
PortDistributor,
|
||||
Postgres,
|
||||
RemoteStorageKind,
|
||||
VanillaPostgres,
|
||||
wait_for_last_flush_lsn,
|
||||
@@ -38,10 +38,10 @@ def test_timeline_size(neon_simple_env: NeonEnv):
|
||||
client = env.pageserver.http_client()
|
||||
wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id)
|
||||
|
||||
pgmain = env.postgres.create_start("test_timeline_size")
|
||||
endpoint_main = env.endpoints.create_start("test_timeline_size")
|
||||
log.info("postgres is running on 'test_timeline_size' branch")
|
||||
|
||||
with closing(pgmain.connect()) as conn:
|
||||
with closing(endpoint_main.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE TABLE foo (t text)")
|
||||
cur.execute(
|
||||
@@ -74,10 +74,10 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
|
||||
env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True
|
||||
)
|
||||
|
||||
pgmain = env.postgres.create_start("test_timeline_size_createdropdb")
|
||||
endpoint_main = env.endpoints.create_start("test_timeline_size_createdropdb")
|
||||
log.info("postgres is running on 'test_timeline_size_createdropdb' branch")
|
||||
|
||||
with closing(pgmain.connect()) as conn:
|
||||
with closing(endpoint_main.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
res = client.timeline_detail(
|
||||
env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True
|
||||
@@ -89,7 +89,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
|
||||
), "no writes should not change the incremental logical size"
|
||||
|
||||
cur.execute("CREATE DATABASE foodb")
|
||||
with closing(pgmain.connect(dbname="foodb")) as conn:
|
||||
with closing(endpoint_main.connect(dbname="foodb")) as conn:
|
||||
with conn.cursor() as cur2:
|
||||
cur2.execute("CREATE TABLE foo (t text)")
|
||||
cur2.execute(
|
||||
@@ -118,7 +118,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
|
||||
|
||||
|
||||
# wait until received_lsn_lag is 0
|
||||
def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60):
|
||||
def wait_for_pageserver_catchup(endpoint_main: Endpoint, polling_interval=1, timeout=60):
|
||||
started_at = time.time()
|
||||
|
||||
received_lsn_lag = 1
|
||||
@@ -129,7 +129,7 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60
|
||||
"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()"
|
||||
)
|
||||
|
||||
res = pgmain.safe_psql(
|
||||
res = endpoint_main.safe_psql(
|
||||
"""
|
||||
SELECT
|
||||
pg_size_pretty(pg_cluster_size()),
|
||||
@@ -150,20 +150,20 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id)
|
||||
|
||||
pgmain = env.postgres.create_start(
|
||||
endpoint_main = env.endpoints.create_start(
|
||||
"test_timeline_size_quota",
|
||||
# Set small limit for the test
|
||||
config_lines=["neon.max_cluster_size=30MB"],
|
||||
)
|
||||
log.info("postgres is running on 'test_timeline_size_quota' branch")
|
||||
|
||||
with closing(pgmain.connect()) as conn:
|
||||
with closing(endpoint_main.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE EXTENSION neon") # TODO move it to neon_fixtures?
|
||||
|
||||
cur.execute("CREATE TABLE foo (t text)")
|
||||
|
||||
wait_for_pageserver_catchup(pgmain)
|
||||
wait_for_pageserver_catchup(endpoint_main)
|
||||
|
||||
# Insert many rows. This query must fail because of space limit
|
||||
try:
|
||||
@@ -175,7 +175,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
)
|
||||
|
||||
wait_for_pageserver_catchup(pgmain)
|
||||
wait_for_pageserver_catchup(endpoint_main)
|
||||
|
||||
cur.execute(
|
||||
"""
|
||||
@@ -195,7 +195,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
|
||||
# drop table to free space
|
||||
cur.execute("DROP TABLE foo")
|
||||
|
||||
wait_for_pageserver_catchup(pgmain)
|
||||
wait_for_pageserver_catchup(endpoint_main)
|
||||
|
||||
# create it again and insert some rows. This query must succeed
|
||||
cur.execute("CREATE TABLE foo (t text)")
|
||||
@@ -207,7 +207,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
)
|
||||
|
||||
wait_for_pageserver_catchup(pgmain)
|
||||
wait_for_pageserver_catchup(endpoint_main)
|
||||
|
||||
cur.execute("SELECT * from pg_size_pretty(pg_cluster_size())")
|
||||
pg_cluster_size = cur.fetchone()
|
||||
@@ -231,15 +231,15 @@ def test_timeline_initial_logical_size_calculation_cancellation(
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
|
||||
# load in some data
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
pg.safe_psql_many(
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo (x INTEGER)",
|
||||
"INSERT INTO foo SELECT g FROM generate_series(1, 10000) g",
|
||||
]
|
||||
)
|
||||
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
||||
pg.stop()
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
endpoint.stop()
|
||||
|
||||
# restart with failpoint inside initial size calculation task
|
||||
env.pageserver.stop()
|
||||
@@ -311,9 +311,9 @@ def test_timeline_physical_size_init(
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_init")
|
||||
pg = env.postgres.create_start("test_timeline_physical_size_init")
|
||||
endpoint = env.endpoints.create_start("test_timeline_physical_size_init")
|
||||
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo (t text)",
|
||||
"""INSERT INTO foo
|
||||
@@ -322,7 +322,7 @@ def test_timeline_physical_size_init(
|
||||
]
|
||||
)
|
||||
|
||||
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
|
||||
|
||||
# restart the pageserer to force calculating timeline's initial physical size
|
||||
env.pageserver.stop()
|
||||
@@ -355,9 +355,9 @@ def test_timeline_physical_size_post_checkpoint(
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_checkpoint")
|
||||
pg = env.postgres.create_start("test_timeline_physical_size_post_checkpoint")
|
||||
endpoint = env.endpoints.create_start("test_timeline_physical_size_post_checkpoint")
|
||||
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo (t text)",
|
||||
"""INSERT INTO foo
|
||||
@@ -366,7 +366,7 @@ def test_timeline_physical_size_post_checkpoint(
|
||||
]
|
||||
)
|
||||
|
||||
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
|
||||
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
|
||||
|
||||
assert_physical_size_invariants(
|
||||
@@ -394,7 +394,7 @@ def test_timeline_physical_size_post_compaction(
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_compaction")
|
||||
pg = env.postgres.create_start("test_timeline_physical_size_post_compaction")
|
||||
endpoint = env.endpoints.create_start("test_timeline_physical_size_post_compaction")
|
||||
|
||||
# We don't want autovacuum to run on the table, while we are calculating the
|
||||
# physical size, because that could cause a new layer to be created and a
|
||||
@@ -402,7 +402,7 @@ def test_timeline_physical_size_post_compaction(
|
||||
# happens, because of some other background activity or autovacuum on other
|
||||
# tables, we could simply retry the size calculations. It's unlikely that
|
||||
# that would happen more than once.)
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)",
|
||||
"""INSERT INTO foo
|
||||
@@ -411,7 +411,7 @@ def test_timeline_physical_size_post_compaction(
|
||||
]
|
||||
)
|
||||
|
||||
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
|
||||
|
||||
# shutdown safekeepers to prevent new data from coming in
|
||||
for sk in env.safekeepers:
|
||||
@@ -446,10 +446,10 @@ def test_timeline_physical_size_post_gc(
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_gc")
|
||||
pg = env.postgres.create_start("test_timeline_physical_size_post_gc")
|
||||
endpoint = env.endpoints.create_start("test_timeline_physical_size_post_gc")
|
||||
|
||||
# Like in test_timeline_physical_size_post_compaction, disable autovacuum
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)",
|
||||
"""INSERT INTO foo
|
||||
@@ -458,10 +458,10 @@ def test_timeline_physical_size_post_gc(
|
||||
]
|
||||
)
|
||||
|
||||
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
|
||||
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
|
||||
|
||||
pg.safe_psql(
|
||||
endpoint.safe_psql(
|
||||
"""
|
||||
INSERT INTO foo
|
||||
SELECT 'long string to consume some space' || g
|
||||
@@ -469,7 +469,7 @@ def test_timeline_physical_size_post_gc(
|
||||
"""
|
||||
)
|
||||
|
||||
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
|
||||
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
|
||||
pageserver_http.timeline_gc(env.initial_tenant, new_timeline_id, gc_horizon=None)
|
||||
|
||||
@@ -495,9 +495,9 @@ def test_timeline_size_metrics(
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_metrics")
|
||||
pg = env.postgres.create_start("test_timeline_size_metrics")
|
||||
endpoint = env.endpoints.create_start("test_timeline_size_metrics")
|
||||
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo (t text)",
|
||||
"""INSERT INTO foo
|
||||
@@ -506,7 +506,7 @@ def test_timeline_size_metrics(
|
||||
]
|
||||
)
|
||||
|
||||
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
|
||||
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
|
||||
|
||||
# get the metrics and parse the metric for the current timeline's physical size
|
||||
@@ -558,7 +558,7 @@ def test_timeline_size_metrics(
|
||||
# The sum of the sizes of all databases, as seen by pg_database_size(), should also
|
||||
# be close. Again allow some slack, the logical size metric includes some things like
|
||||
# the SLRUs that are not included in pg_database_size().
|
||||
dbsize_sum = pg.safe_psql("select sum(pg_database_size(oid)) from pg_database")[0][0]
|
||||
dbsize_sum = endpoint.safe_psql("select sum(pg_database_size(oid)) from pg_database")[0][0]
|
||||
assert math.isclose(dbsize_sum, tl_logical_size_metric, abs_tol=2 * 1024 * 1024)
|
||||
|
||||
|
||||
@@ -592,16 +592,16 @@ def test_tenant_physical_size(
|
||||
n_rows = random.randint(100, 1000)
|
||||
|
||||
timeline = env.neon_cli.create_branch(f"test_tenant_physical_size_{i}", tenant_id=tenant)
|
||||
pg = env.postgres.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant)
|
||||
endpoint = env.endpoints.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant)
|
||||
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo (t text)",
|
||||
f"INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, {n_rows}) g",
|
||||
]
|
||||
)
|
||||
|
||||
wait_for_last_flush_lsn(env, pg, tenant, timeline)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant, timeline)
|
||||
pageserver_http.timeline_checkpoint(tenant, timeline)
|
||||
|
||||
if remote_storage_kind is not None:
|
||||
@@ -609,7 +609,7 @@ def test_tenant_physical_size(
|
||||
|
||||
timeline_total_resident_physical_size += get_timeline_resident_physical_size(timeline)
|
||||
|
||||
pg.stop()
|
||||
endpoint.stop()
|
||||
|
||||
# ensure that tenant_status current_physical size reports sum of timeline current_physical_size
|
||||
tenant_current_physical_size = int(
|
||||
|
||||
@@ -27,8 +27,8 @@ def test_truncate(neon_env_builder: NeonEnvBuilder, zenbenchmark):
|
||||
)
|
||||
|
||||
env.neon_cli.create_timeline("test_truncate", tenant_id=tenant)
|
||||
pg = env.postgres.create_start("test_truncate", tenant_id=tenant)
|
||||
cur = pg.connect().cursor()
|
||||
endpoint = env.endpoints.create_start("test_truncate", tenant_id=tenant)
|
||||
cur = endpoint.connect().cursor()
|
||||
cur.execute("create table t1(x integer)")
|
||||
cur.execute(f"insert into t1 values (generate_series(1,{n_records}))")
|
||||
cur.execute("vacuum t1")
|
||||
|
||||
@@ -10,10 +10,12 @@ from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn
|
||||
def test_twophase(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_twophase", "empty")
|
||||
pg = env.postgres.create_start("test_twophase", config_lines=["max_prepared_transactions=5"])
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_twophase", config_lines=["max_prepared_transactions=5"]
|
||||
)
|
||||
log.info("postgres is running on 'test_twophase' branch")
|
||||
|
||||
conn = pg.connect()
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("CREATE TABLE foo (t text)")
|
||||
@@ -42,7 +44,7 @@ def test_twophase(neon_simple_env: NeonEnv):
|
||||
# pg_twophase directory and fsynced
|
||||
cur.execute("CHECKPOINT")
|
||||
|
||||
twophase_files = os.listdir(pg.pg_twophase_dir_path())
|
||||
twophase_files = os.listdir(endpoint.pg_twophase_dir_path())
|
||||
log.info(twophase_files)
|
||||
assert len(twophase_files) == 4
|
||||
|
||||
@@ -50,25 +52,25 @@ def test_twophase(neon_simple_env: NeonEnv):
|
||||
cur.execute("ROLLBACK PREPARED 'insert_four'")
|
||||
cur.execute("CHECKPOINT")
|
||||
|
||||
twophase_files = os.listdir(pg.pg_twophase_dir_path())
|
||||
twophase_files = os.listdir(endpoint.pg_twophase_dir_path())
|
||||
log.info(twophase_files)
|
||||
assert len(twophase_files) == 2
|
||||
|
||||
# Create a branch with the transaction in prepared state
|
||||
fork_at_current_lsn(env, pg, "test_twophase_prepared", "test_twophase")
|
||||
fork_at_current_lsn(env, endpoint, "test_twophase_prepared", "test_twophase")
|
||||
|
||||
# Start compute on the new branch
|
||||
pg2 = env.postgres.create_start(
|
||||
endpoint2 = env.endpoints.create_start(
|
||||
"test_twophase_prepared",
|
||||
config_lines=["max_prepared_transactions=5"],
|
||||
)
|
||||
|
||||
# Check that we restored only needed twophase files
|
||||
twophase_files2 = os.listdir(pg2.pg_twophase_dir_path())
|
||||
twophase_files2 = os.listdir(endpoint2.pg_twophase_dir_path())
|
||||
log.info(twophase_files2)
|
||||
assert twophase_files2.sort() == twophase_files.sort()
|
||||
|
||||
conn2 = pg2.connect()
|
||||
conn2 = endpoint2.connect()
|
||||
cur2 = conn2.cursor()
|
||||
|
||||
# On the new branch, commit one of the prepared transactions,
|
||||
|
||||
@@ -9,9 +9,9 @@ from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn
|
||||
def test_unlogged(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_unlogged", "empty")
|
||||
pg = env.postgres.create_start("test_unlogged")
|
||||
endpoint = env.endpoints.create_start("test_unlogged")
|
||||
|
||||
conn = pg.connect()
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("CREATE UNLOGGED TABLE iut (id int);")
|
||||
@@ -20,12 +20,10 @@ def test_unlogged(neon_simple_env: NeonEnv):
|
||||
cur.execute("INSERT INTO iut values (42);")
|
||||
|
||||
# create another compute to fetch inital empty contents from pageserver
|
||||
fork_at_current_lsn(env, pg, "test_unlogged_basebackup", "test_unlogged")
|
||||
pg2 = env.postgres.create_start(
|
||||
"test_unlogged_basebackup",
|
||||
)
|
||||
fork_at_current_lsn(env, endpoint, "test_unlogged_basebackup", "test_unlogged")
|
||||
endpoint2 = env.endpoints.create_start("test_unlogged_basebackup")
|
||||
|
||||
conn2 = pg2.connect()
|
||||
conn2 = endpoint2.connect()
|
||||
cur2 = conn2.cursor()
|
||||
# after restart table should be empty but valid
|
||||
cur2.execute("PREPARE iut_plan (int) AS INSERT INTO iut VALUES ($1)")
|
||||
|
||||
@@ -10,10 +10,10 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
env.neon_cli.create_branch("test_vm_bit_clear", "empty")
|
||||
pg = env.postgres.create_start("test_vm_bit_clear")
|
||||
endpoint = env.endpoints.create_start("test_vm_bit_clear")
|
||||
|
||||
log.info("postgres is running on 'test_vm_bit_clear' branch")
|
||||
pg_conn = pg.connect()
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
# Install extension containing function needed for test
|
||||
@@ -33,7 +33,7 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
|
||||
cur.execute("UPDATE vmtest_update SET id = 5000 WHERE id = 1")
|
||||
|
||||
# Branch at this point, to test that later
|
||||
fork_at_current_lsn(env, pg, "test_vm_bit_clear_new", "test_vm_bit_clear")
|
||||
fork_at_current_lsn(env, endpoint, "test_vm_bit_clear_new", "test_vm_bit_clear")
|
||||
|
||||
# Clear the buffer cache, to force the VM page to be re-fetched from
|
||||
# the page server
|
||||
@@ -63,10 +63,10 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
|
||||
# a dirty VM page is evicted. If the VM bit was not correctly cleared by the
|
||||
# earlier WAL record, the full-page image hides the problem. Starting a new
|
||||
# server at the right point-in-time avoids that full-page image.
|
||||
pg_new = env.postgres.create_start("test_vm_bit_clear_new")
|
||||
endpoint_new = env.endpoints.create_start("test_vm_bit_clear_new")
|
||||
|
||||
log.info("postgres is running on 'test_vm_bit_clear_new' branch")
|
||||
pg_new_conn = pg_new.connect()
|
||||
pg_new_conn = endpoint_new.connect()
|
||||
cur_new = pg_new_conn.cursor()
|
||||
|
||||
cur_new.execute(
|
||||
|
||||
@@ -16,14 +16,13 @@ from typing import Any, List, Optional
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
Endpoint,
|
||||
NeonBroker,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
NeonPageserver,
|
||||
PgBin,
|
||||
PgProtocol,
|
||||
PortDistributor,
|
||||
Postgres,
|
||||
RemoteStorageKind,
|
||||
RemoteStorageUsers,
|
||||
Safekeeper,
|
||||
@@ -39,11 +38,11 @@ from fixtures.utils import get_dir_size, query_scalar, start_in_background
|
||||
def wait_lsn_force_checkpoint(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
pg: Postgres,
|
||||
endpoint: Endpoint,
|
||||
ps: NeonPageserver,
|
||||
pageserver_conn_options={},
|
||||
):
|
||||
lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
log.info(f"pg_current_wal_flush_lsn is {lsn}, waiting for it on pageserver")
|
||||
|
||||
auth_token = None
|
||||
@@ -97,10 +96,10 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
|
||||
branch_names_to_timeline_ids = {}
|
||||
|
||||
# start postgres on each timeline
|
||||
pgs = []
|
||||
endpoints = []
|
||||
for branch_name in branch_names:
|
||||
new_timeline_id = env.neon_cli.create_branch(branch_name)
|
||||
pgs.append(env.postgres.create_start(branch_name))
|
||||
endpoints.append(env.endpoints.create_start(branch_name))
|
||||
branch_names_to_timeline_ids[branch_name] = new_timeline_id
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
@@ -160,8 +159,8 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
|
||||
# Do everything in different loops to have actions on different timelines
|
||||
# interleaved.
|
||||
# create schema
|
||||
for pg in pgs:
|
||||
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
|
||||
for endpoint in endpoints:
|
||||
endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)")
|
||||
init_m = collect_metrics("after CREATE TABLE")
|
||||
|
||||
# Populate data for 2/3 timelines
|
||||
@@ -197,16 +196,16 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
|
||||
metrics_checker = MetricsChecker()
|
||||
metrics_checker.start()
|
||||
|
||||
for pg in pgs[:-1]:
|
||||
pg.safe_psql("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
for endpoint in endpoints[:-1]:
|
||||
endpoint.safe_psql("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
|
||||
metrics_checker.stop()
|
||||
|
||||
collect_metrics("after INSERT INTO")
|
||||
|
||||
# Check data for 2/3 timelines
|
||||
for pg in pgs[:-1]:
|
||||
res = pg.safe_psql("SELECT sum(key) FROM t")
|
||||
for endpoint in endpoints[:-1]:
|
||||
res = endpoint.safe_psql("SELECT sum(key) FROM t")
|
||||
assert res[0] == (5000050000,)
|
||||
|
||||
final_m = collect_metrics("after SELECT")
|
||||
@@ -233,11 +232,11 @@ def test_restarts(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch("test_safekeepers_restarts")
|
||||
pg = env.postgres.create_start("test_safekeepers_restarts")
|
||||
endpoint = env.endpoints.create_start("test_safekeepers_restarts")
|
||||
|
||||
# we rely upon autocommit after each statement
|
||||
# as waiting for acceptors happens there
|
||||
pg_conn = pg.connect()
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
failed_node = None
|
||||
@@ -268,22 +267,22 @@ def test_broker(neon_env_builder: NeonEnvBuilder):
|
||||
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
|
||||
)
|
||||
|
||||
pg = env.postgres.create_start("test_broker")
|
||||
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
|
||||
endpoint = env.endpoints.create_start("test_broker")
|
||||
endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)")
|
||||
|
||||
# learn neon timeline from compute
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
# wait until remote_consistent_lsn gets advanced on all safekeepers
|
||||
clients = [sk.http_client() for sk in env.safekeepers]
|
||||
stat_before = [cli.timeline_status(tenant_id, timeline_id) for cli in clients]
|
||||
log.info(f"statuses is {stat_before}")
|
||||
|
||||
pg.safe_psql("INSERT INTO t SELECT generate_series(1,100), 'payload'")
|
||||
endpoint.safe_psql("INSERT INTO t SELECT generate_series(1,100), 'payload'")
|
||||
|
||||
# force checkpoint in pageserver to advance remote_consistent_lsn
|
||||
wait_lsn_force_checkpoint(tenant_id, timeline_id, pg, env.pageserver)
|
||||
wait_lsn_force_checkpoint(tenant_id, timeline_id, endpoint, env.pageserver)
|
||||
|
||||
# and wait till remote_consistent_lsn propagates to all safekeepers
|
||||
started_at = time.time()
|
||||
@@ -317,26 +316,28 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
|
||||
)
|
||||
|
||||
env.neon_cli.create_branch("test_safekeepers_wal_removal")
|
||||
pg = env.postgres.create_start("test_safekeepers_wal_removal")
|
||||
endpoint = env.endpoints.create_start("test_safekeepers_wal_removal")
|
||||
|
||||
# Note: it is important to insert at least two segments, as currently
|
||||
# control file is synced roughly once in segment range and WAL is not
|
||||
# removed until all horizons are persisted.
|
||||
pg.safe_psql_many(
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE t(key int primary key, value text)",
|
||||
"INSERT INTO t SELECT generate_series(1,200000), 'payload'",
|
||||
]
|
||||
)
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
# force checkpoint to advance remote_consistent_lsn
|
||||
pageserver_conn_options = {}
|
||||
if auth_enabled:
|
||||
pageserver_conn_options["password"] = env.auth_keys.generate_tenant_token(tenant_id)
|
||||
wait_lsn_force_checkpoint(tenant_id, timeline_id, pg, env.pageserver, pageserver_conn_options)
|
||||
wait_lsn_force_checkpoint(
|
||||
tenant_id, timeline_id, endpoint, env.pageserver, pageserver_conn_options
|
||||
)
|
||||
|
||||
# We will wait for first segment removal. Make sure they exist for starter.
|
||||
first_segments = [
|
||||
@@ -436,13 +437,13 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Remot
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch("test_safekeepers_wal_backup")
|
||||
pg = env.postgres.create_start("test_safekeepers_wal_backup")
|
||||
endpoint = env.endpoints.create_start("test_safekeepers_wal_backup")
|
||||
|
||||
# learn neon timeline from compute
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
pg_conn = pg.connect()
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
cur.execute("create table t(key int, value text)")
|
||||
|
||||
@@ -465,9 +466,9 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Remot
|
||||
# put one of safekeepers down again
|
||||
env.safekeepers[0].stop()
|
||||
# restart postgres
|
||||
pg.stop_and_destroy().create_start("test_safekeepers_wal_backup")
|
||||
endpoint.stop_and_destroy().create_start("test_safekeepers_wal_backup")
|
||||
# and ensure offloading still works
|
||||
with closing(pg.connect()) as conn:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("insert into t select generate_series(1,250000), 'payload'")
|
||||
seg_end = Lsn("0/5000000")
|
||||
@@ -491,15 +492,15 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_s3_wal_replay")
|
||||
|
||||
pg = env.postgres.create_start("test_s3_wal_replay")
|
||||
endpoint = env.endpoints.create_start("test_s3_wal_replay")
|
||||
|
||||
# learn neon timeline from compute
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
expected_sum = 0
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("create table t(key int, value text)")
|
||||
cur.execute("insert into t values (1, 'payload')")
|
||||
@@ -547,7 +548,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
|
||||
f"Pageserver last_record_lsn={pageserver_lsn}; flush_lsn={last_lsn}; lag before replay is {lag / 1024}kb"
|
||||
)
|
||||
|
||||
pg.stop_and_destroy()
|
||||
endpoint.stop_and_destroy()
|
||||
ps_cli.timeline_delete(tenant_id, timeline_id)
|
||||
|
||||
# Also delete and manually create timeline on safekeepers -- this tests
|
||||
@@ -609,9 +610,9 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
|
||||
log.info(f"WAL redo took {elapsed} s")
|
||||
|
||||
# verify data
|
||||
pg.create_start("test_s3_wal_replay")
|
||||
endpoint.create_start("test_s3_wal_replay")
|
||||
|
||||
assert pg.safe_psql("select sum(key) from t")[0][0] == expected_sum
|
||||
assert endpoint.safe_psql("select sum(key) from t")[0][0] == expected_sum
|
||||
|
||||
|
||||
class ProposerPostgres(PgProtocol):
|
||||
@@ -762,13 +763,13 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch("test_timeline_status")
|
||||
pg = env.postgres.create_start("test_timeline_status")
|
||||
endpoint = env.endpoints.create_start("test_timeline_status")
|
||||
|
||||
wa = env.safekeepers[0]
|
||||
|
||||
# learn neon timeline from compute
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
if not auth_enabled:
|
||||
wa_http_cli = wa.http_client()
|
||||
@@ -806,11 +807,11 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
|
||||
assert debug_dump_0["timelines_count"] == 1
|
||||
assert debug_dump_0["timelines"][0]["timeline_id"] == str(timeline_id)
|
||||
|
||||
pg.safe_psql("create table t(i int)")
|
||||
endpoint.safe_psql("create table t(i int)")
|
||||
|
||||
# ensure epoch goes up after reboot
|
||||
pg.stop().start()
|
||||
pg.safe_psql("insert into t values(10)")
|
||||
endpoint.stop().start()
|
||||
endpoint.safe_psql("insert into t values(10)")
|
||||
|
||||
tli_status = wa_http_cli.timeline_status(tenant_id, timeline_id)
|
||||
epoch_after_reboot = tli_status.acceptor_epoch
|
||||
@@ -989,11 +990,8 @@ def test_safekeeper_without_pageserver(
|
||||
|
||||
|
||||
def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
|
||||
def safekeepers_guc(env: NeonEnv, sk_names: List[int]) -> str:
|
||||
return ",".join([f"localhost:{sk.port.pg}" for sk in env.safekeepers if sk.id in sk_names])
|
||||
|
||||
def execute_payload(pg: Postgres):
|
||||
with closing(pg.connect()) as conn:
|
||||
def execute_payload(endpoint: Endpoint):
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
# we rely upon autocommit after each statement
|
||||
# as waiting for acceptors happens there
|
||||
@@ -1020,27 +1018,26 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
log.info("Use only first 3 safekeepers")
|
||||
env.safekeepers[3].stop()
|
||||
active_safekeepers = [1, 2, 3]
|
||||
pg = env.postgres.create("test_replace_safekeeper")
|
||||
pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
|
||||
pg.start()
|
||||
endpoint = env.endpoints.create("test_replace_safekeeper")
|
||||
endpoint.active_safekeepers = [1, 2, 3]
|
||||
endpoint.start()
|
||||
|
||||
# learn neon timeline from compute
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
execute_payload(pg)
|
||||
execute_payload(endpoint)
|
||||
show_statuses(env.safekeepers, tenant_id, timeline_id)
|
||||
|
||||
log.info("Restart all safekeepers to flush everything")
|
||||
env.safekeepers[0].stop(immediate=True)
|
||||
execute_payload(pg)
|
||||
execute_payload(endpoint)
|
||||
env.safekeepers[0].start()
|
||||
env.safekeepers[1].stop(immediate=True)
|
||||
execute_payload(pg)
|
||||
execute_payload(endpoint)
|
||||
env.safekeepers[1].start()
|
||||
env.safekeepers[2].stop(immediate=True)
|
||||
execute_payload(pg)
|
||||
execute_payload(endpoint)
|
||||
env.safekeepers[2].start()
|
||||
|
||||
env.safekeepers[0].stop(immediate=True)
|
||||
@@ -1050,27 +1047,26 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
|
||||
env.safekeepers[1].start()
|
||||
env.safekeepers[2].start()
|
||||
|
||||
execute_payload(pg)
|
||||
execute_payload(endpoint)
|
||||
show_statuses(env.safekeepers, tenant_id, timeline_id)
|
||||
|
||||
log.info("Stop sk1 (simulate failure) and use only quorum of sk2 and sk3")
|
||||
env.safekeepers[0].stop(immediate=True)
|
||||
execute_payload(pg)
|
||||
execute_payload(endpoint)
|
||||
show_statuses(env.safekeepers, tenant_id, timeline_id)
|
||||
|
||||
log.info("Recreate postgres to replace failed sk1 with new sk4")
|
||||
pg.stop_and_destroy().create("test_replace_safekeeper")
|
||||
active_safekeepers = [2, 3, 4]
|
||||
endpoint.stop_and_destroy().create("test_replace_safekeeper")
|
||||
env.safekeepers[3].start()
|
||||
pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
|
||||
pg.start()
|
||||
endpoint.active_safekeepers = [2, 3, 4]
|
||||
endpoint.start()
|
||||
|
||||
execute_payload(pg)
|
||||
execute_payload(endpoint)
|
||||
show_statuses(env.safekeepers, tenant_id, timeline_id)
|
||||
|
||||
log.info("Stop sk2 to require quorum of sk3 and sk4 for normal work")
|
||||
env.safekeepers[1].stop(immediate=True)
|
||||
execute_payload(pg)
|
||||
execute_payload(endpoint)
|
||||
show_statuses(env.safekeepers, tenant_id, timeline_id)
|
||||
|
||||
|
||||
@@ -1082,13 +1078,13 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder):
|
||||
last_lsn = Lsn(0)
|
||||
|
||||
# returns pg_wal size in MB
|
||||
def collect_stats(pg: Postgres, cur, enable_logs=True):
|
||||
def collect_stats(endpoint: Endpoint, cur, enable_logs=True):
|
||||
nonlocal last_lsn
|
||||
assert pg.pgdata_dir is not None
|
||||
assert endpoint.pgdata_dir is not None
|
||||
|
||||
log.info("executing INSERT to generate WAL")
|
||||
current_lsn = Lsn(query_scalar(cur, "select pg_current_wal_lsn()"))
|
||||
pg_wal_size_mb = get_dir_size(os.path.join(pg.pgdata_dir, "pg_wal")) / 1024 / 1024
|
||||
pg_wal_size_mb = get_dir_size(os.path.join(endpoint.pgdata_dir, "pg_wal")) / 1024 / 1024
|
||||
if enable_logs:
|
||||
lsn_delta_mb = (current_lsn - last_lsn) / 1024 / 1024
|
||||
log.info(f"LSN delta: {lsn_delta_mb} MB, current WAL size: {pg_wal_size_mb} MB")
|
||||
@@ -1104,25 +1100,25 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
env.neon_cli.create_branch("test_wal_deleted_after_broadcast")
|
||||
# Adjust checkpoint config to prevent keeping old WAL segments
|
||||
pg = env.postgres.create_start(
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_wal_deleted_after_broadcast",
|
||||
config_lines=["min_wal_size=32MB", "max_wal_size=32MB", "log_checkpoints=on"],
|
||||
)
|
||||
|
||||
pg_conn = pg.connect()
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
cur.execute("CREATE TABLE t(key int, value text)")
|
||||
|
||||
collect_stats(pg, cur)
|
||||
collect_stats(endpoint, cur)
|
||||
|
||||
# generate WAL to simulate normal workload
|
||||
for i in range(5):
|
||||
generate_wal(cur)
|
||||
collect_stats(pg, cur)
|
||||
collect_stats(endpoint, cur)
|
||||
|
||||
log.info("executing checkpoint")
|
||||
cur.execute("CHECKPOINT")
|
||||
wal_size_after_checkpoint = collect_stats(pg, cur)
|
||||
wal_size_after_checkpoint = collect_stats(endpoint, cur)
|
||||
|
||||
# there shouldn't be more than 2 WAL segments (but dir may have archive_status files)
|
||||
assert wal_size_after_checkpoint < 16 * 2.5
|
||||
@@ -1151,13 +1147,13 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
|
||||
tenant_id_other, timeline_id_other = env.neon_cli.create_tenant()
|
||||
|
||||
# Populate branches
|
||||
pg_1 = env.postgres.create_start("br1")
|
||||
pg_2 = env.postgres.create_start("br2")
|
||||
pg_3 = env.postgres.create_start("br3")
|
||||
pg_4 = env.postgres.create_start("br4")
|
||||
pg_other = env.postgres.create_start("main", tenant_id=tenant_id_other)
|
||||
for pg in [pg_1, pg_2, pg_3, pg_4, pg_other]:
|
||||
with closing(pg.connect()) as conn:
|
||||
endpoint_1 = env.endpoints.create_start("br1")
|
||||
endpoint_2 = env.endpoints.create_start("br2")
|
||||
endpoint_3 = env.endpoints.create_start("br3")
|
||||
endpoint_4 = env.endpoints.create_start("br4")
|
||||
endpoint_other = env.endpoints.create_start("main", tenant_id=tenant_id_other)
|
||||
for endpoint in [endpoint_1, endpoint_2, endpoint_3, endpoint_4, endpoint_other]:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t(key int primary key)")
|
||||
sk = env.safekeepers[0]
|
||||
@@ -1178,14 +1174,14 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
|
||||
assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir()
|
||||
|
||||
# Stop branches which should be inactive and restart Safekeeper to drop its in-memory state.
|
||||
pg_2.stop_and_destroy()
|
||||
pg_4.stop_and_destroy()
|
||||
endpoint_2.stop_and_destroy()
|
||||
endpoint_4.stop_and_destroy()
|
||||
sk.stop()
|
||||
sk.start()
|
||||
|
||||
# Ensure connections to Safekeeper are established
|
||||
for pg in [pg_1, pg_3, pg_other]:
|
||||
with closing(pg.connect()) as conn:
|
||||
for endpoint in [endpoint_1, endpoint_3, endpoint_other]:
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("INSERT INTO t (key) VALUES (1)")
|
||||
|
||||
@@ -1244,6 +1240,6 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
|
||||
|
||||
# Ensure the other tenant still works
|
||||
sk_http_other.timeline_status(tenant_id_other, timeline_id_other)
|
||||
with closing(pg_other.connect()) as conn:
|
||||
with closing(endpoint_other.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("INSERT INTO t (key) VALUES (123)")
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import List, Optional
|
||||
|
||||
import asyncpg
|
||||
from fixtures.log_helper import getLogger
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper
|
||||
from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder, Safekeeper
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
|
||||
log = getLogger("root.safekeeper_async")
|
||||
@@ -82,8 +82,10 @@ class WorkerStats(object):
|
||||
log.info("All workers made {} transactions".format(progress))
|
||||
|
||||
|
||||
async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accounts, max_transfer):
|
||||
pg_conn = await pg.connect_async()
|
||||
async def run_random_worker(
|
||||
stats: WorkerStats, endpoint: Endpoint, worker_id, n_accounts, max_transfer
|
||||
):
|
||||
pg_conn = await endpoint.connect_async()
|
||||
log.debug("Started worker {}".format(worker_id))
|
||||
|
||||
while stats.running:
|
||||
@@ -141,7 +143,7 @@ async def wait_for_lsn(
|
||||
# consistent.
|
||||
async def run_restarts_under_load(
|
||||
env: NeonEnv,
|
||||
pg: Postgres,
|
||||
endpoint: Endpoint,
|
||||
acceptors: List[Safekeeper],
|
||||
n_workers=10,
|
||||
n_accounts=100,
|
||||
@@ -154,7 +156,7 @@ async def run_restarts_under_load(
|
||||
# taking into account that this timeout is checked only at the beginning of every iteration.
|
||||
test_timeout_at = time.monotonic() + 5 * 60
|
||||
|
||||
pg_conn = await pg.connect_async()
|
||||
pg_conn = await endpoint.connect_async()
|
||||
tenant_id = TenantId(await pg_conn.fetchval("show neon.tenant_id"))
|
||||
timeline_id = TimelineId(await pg_conn.fetchval("show neon.timeline_id"))
|
||||
|
||||
@@ -165,7 +167,7 @@ async def run_restarts_under_load(
|
||||
stats = WorkerStats(n_workers)
|
||||
workers = []
|
||||
for worker_id in range(n_workers):
|
||||
worker = run_random_worker(stats, pg, worker_id, bank.n_accounts, max_transfer)
|
||||
worker = run_random_worker(stats, endpoint, worker_id, bank.n_accounts, max_transfer)
|
||||
workers.append(asyncio.create_task(worker))
|
||||
|
||||
for it in range(iterations):
|
||||
@@ -212,11 +214,11 @@ def test_restarts_under_load(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
env.neon_cli.create_branch("test_safekeepers_restarts_under_load")
|
||||
# Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
|
||||
pg = env.postgres.create_start(
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_safekeepers_restarts_under_load", config_lines=["max_replication_write_lag=1MB"]
|
||||
)
|
||||
|
||||
asyncio.run(run_restarts_under_load(env, pg, env.safekeepers))
|
||||
asyncio.run(run_restarts_under_load(env, endpoint, env.safekeepers))
|
||||
|
||||
|
||||
# Restart acceptors one by one and test that everything is working as expected
|
||||
@@ -228,7 +230,7 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
env.neon_cli.create_branch("test_restarts_frequent_checkpoints")
|
||||
# Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
|
||||
pg = env.postgres.create_start(
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_restarts_frequent_checkpoints",
|
||||
config_lines=[
|
||||
"max_replication_write_lag=1MB",
|
||||
@@ -240,11 +242,13 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# we try to simulate large (flush_lsn - truncate_lsn) lag, to test that WAL segments
|
||||
# are not removed before broadcasted to all safekeepers, with the help of replication slot
|
||||
asyncio.run(run_restarts_under_load(env, pg, env.safekeepers, period_time=15, iterations=5))
|
||||
asyncio.run(
|
||||
run_restarts_under_load(env, endpoint, env.safekeepers, period_time=15, iterations=5)
|
||||
)
|
||||
|
||||
|
||||
def postgres_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]):
|
||||
pg = Postgres(
|
||||
def endpoint_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]):
|
||||
endpoint = Endpoint(
|
||||
env,
|
||||
tenant_id=env.initial_tenant,
|
||||
port=env.port_distributor.get_port(),
|
||||
@@ -253,19 +257,19 @@ def postgres_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]):
|
||||
check_stop_result=False,
|
||||
)
|
||||
|
||||
# embed current time in node name
|
||||
node_name = pgdir_name or f"pg_node_{time.time()}"
|
||||
return pg.create_start(
|
||||
branch_name=branch, node_name=node_name, config_lines=["log_statement=all"]
|
||||
# embed current time in endpoint ID
|
||||
endpoint_id = pgdir_name or f"ep-{time.time()}"
|
||||
return endpoint.create_start(
|
||||
branch_name=branch, endpoint_id=endpoint_id, config_lines=["log_statement=all"]
|
||||
)
|
||||
|
||||
|
||||
async def exec_compute_query(
|
||||
env: NeonEnv, branch: str, query: str, pgdir_name: Optional[str] = None
|
||||
):
|
||||
with postgres_create_start(env, branch=branch, pgdir_name=pgdir_name) as pg:
|
||||
with endpoint_create_start(env, branch=branch, pgdir_name=pgdir_name) as endpoint:
|
||||
before_conn = time.time()
|
||||
conn = await pg.connect_async()
|
||||
conn = await endpoint.connect_async()
|
||||
res = await conn.fetch(query)
|
||||
await conn.close()
|
||||
after_conn = time.time()
|
||||
@@ -436,8 +440,8 @@ async def check_unavailability(
|
||||
assert bg_query.done()
|
||||
|
||||
|
||||
async def run_unavailability(env: NeonEnv, pg: Postgres):
|
||||
conn = await pg.connect_async()
|
||||
async def run_unavailability(env: NeonEnv, endpoint: Endpoint):
|
||||
conn = await endpoint.connect_async()
|
||||
|
||||
# check basic work with table
|
||||
await conn.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
@@ -462,9 +466,9 @@ def test_unavailability(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch("test_safekeepers_unavailability")
|
||||
pg = env.postgres.create_start("test_safekeepers_unavailability")
|
||||
endpoint = env.endpoints.create_start("test_safekeepers_unavailability")
|
||||
|
||||
asyncio.run(run_unavailability(env, pg))
|
||||
asyncio.run(run_unavailability(env, endpoint))
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -493,8 +497,8 @@ async def xmas_garland(safekeepers: List[Safekeeper], data: RaceConditionTest):
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
async def run_race_conditions(env: NeonEnv, pg: Postgres):
|
||||
conn = await pg.connect_async()
|
||||
async def run_race_conditions(env: NeonEnv, endpoint: Endpoint):
|
||||
conn = await endpoint.connect_async()
|
||||
await conn.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
|
||||
data = RaceConditionTest(0, False)
|
||||
@@ -525,14 +529,14 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch("test_safekeepers_race_conditions")
|
||||
pg = env.postgres.create_start("test_safekeepers_race_conditions")
|
||||
endpoint = env.endpoints.create_start("test_safekeepers_race_conditions")
|
||||
|
||||
asyncio.run(run_race_conditions(env, pg))
|
||||
asyncio.run(run_race_conditions(env, endpoint))
|
||||
|
||||
|
||||
# Check that pageserver can select safekeeper with largest commit_lsn
|
||||
# and switch if LSN is not updated for some time (NoWalTimeout).
|
||||
async def run_wal_lagging(env: NeonEnv, pg: Postgres):
|
||||
async def run_wal_lagging(env: NeonEnv, endpoint: Endpoint):
|
||||
def safekeepers_guc(env: NeonEnv, active_sk: List[bool]) -> str:
|
||||
# use ports 10, 11 and 12 to simulate unavailable safekeepers
|
||||
return ",".join(
|
||||
@@ -542,10 +546,10 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres):
|
||||
]
|
||||
)
|
||||
|
||||
conn = await pg.connect_async()
|
||||
conn = await endpoint.connect_async()
|
||||
await conn.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
await conn.close()
|
||||
pg.stop()
|
||||
endpoint.stop()
|
||||
|
||||
n_iterations = 20
|
||||
n_txes = 10000
|
||||
@@ -561,11 +565,11 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres):
|
||||
it -= 1
|
||||
continue
|
||||
|
||||
pg.adjust_for_safekeepers(safekeepers_guc(env, active_sk))
|
||||
endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_sk))
|
||||
log.info(f"Iteration {it}: {active_sk}")
|
||||
|
||||
pg.start()
|
||||
conn = await pg.connect_async()
|
||||
endpoint.start()
|
||||
conn = await endpoint.connect_async()
|
||||
|
||||
for _ in range(n_txes):
|
||||
await conn.execute(f"INSERT INTO t values ({i}, 'payload')")
|
||||
@@ -573,11 +577,11 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres):
|
||||
i += 1
|
||||
|
||||
await conn.close()
|
||||
pg.stop()
|
||||
endpoint.stop()
|
||||
|
||||
pg.adjust_for_safekeepers(safekeepers_guc(env, [True] * len(env.safekeepers)))
|
||||
pg.start()
|
||||
conn = await pg.connect_async()
|
||||
endpoint.adjust_for_safekeepers(safekeepers_guc(env, [True] * len(env.safekeepers)))
|
||||
endpoint.start()
|
||||
conn = await endpoint.connect_async()
|
||||
|
||||
log.info(f"Executed {i-1} queries")
|
||||
|
||||
@@ -591,6 +595,6 @@ def test_wal_lagging(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch("test_wal_lagging")
|
||||
pg = env.postgres.create_start("test_wal_lagging")
|
||||
endpoint = env.endpoints.create_start("test_wal_lagging")
|
||||
|
||||
asyncio.run(run_wal_lagging(env, pg))
|
||||
asyncio.run(run_wal_lagging(env, endpoint))
|
||||
|
||||
@@ -19,9 +19,9 @@ def test_wal_restore(
|
||||
):
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_wal_restore")
|
||||
pg = env.postgres.create_start("test_wal_restore")
|
||||
pg.safe_psql("create table t as select generate_series(1,300000)")
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
endpoint = env.endpoints.create_start("test_wal_restore")
|
||||
endpoint.safe_psql("create table t as select generate_series(1,300000)")
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
env.neon_cli.pageserver_stop()
|
||||
port = port_distributor.get_port()
|
||||
data_dir = test_output_dir / "pgsql.restored"
|
||||
|
||||
@@ -45,9 +45,9 @@ def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder):
|
||||
# assert tenant exists on disk
|
||||
assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
|
||||
pg_conn = pg.connect()
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
# Create table, and insert some rows. Make it big enough that it doesn't fit in
|
||||
|
||||
@@ -24,7 +24,7 @@ def test_broken(neon_simple_env: NeonEnv, pg_bin):
|
||||
env = neon_simple_env
|
||||
|
||||
env.neon_cli.create_branch("test_broken", "empty")
|
||||
env.postgres.create_start("test_broken")
|
||||
env.endpoints.create_start("test_broken")
|
||||
log.info("postgres is running")
|
||||
|
||||
log.info("THIS NEXT COMMAND WILL FAIL:")
|
||||
|
||||
Reference in New Issue
Block a user