Rename "Postgres nodes" in control_plane to endpoints.

We use the term "endpoint" in for compute Postgres nodes in the web UI
and user-facing documentation now. Adjust the nomenclature in the code.

This changes the name of the "neon_local pg" command to "neon_local
endpoint". Also adjust names of classes, variables etc. in the python
tests accordingly.

This also changes the directory structure so that endpoints are now
stored in:

    .neon/endpoints/<endpoint id>

instead of:

    .neon/pgdatadirs/tenants/<tenant_id>/<endpoint (node) name>

The tenant ID is no longer part of the path. That means that you
cannot have two endpoints with the same name/ID in two different
tenants anymore. That's consistent with how we treat endpoints in the
real control plane and proxy: the endpoint ID must be globally unique.
This commit is contained in:
Heikki Linnakangas
2023-04-13 13:45:43 +03:00
committed by Heikki Linnakangas
parent 356439aa33
commit 53f438a8a8
78 changed files with 1061 additions and 991 deletions

View File

@@ -147,15 +147,15 @@ Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50
Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one
# start postgres compute node
> ./target/debug/neon_local pg start main
Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
> ./target/debug/neon_local endpoint start main
Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
# check list of running postgres instances
> ./target/debug/neon_local pg list
NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running
> ./target/debug/neon_local endpoint list
ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running
```
2. Now, it is possible to connect to postgres and run some queries:
@@ -184,14 +184,14 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
(L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]
# start postgres on that branch
> ./target/debug/neon_local pg start migration_check --branch-name migration_check
Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
> ./target/debug/neon_local endpoint start migration_check --branch-name migration_check
Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
# check the new list of running postgres instances
> ./target/debug/neon_local pg list
NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS
> ./target/debug/neon_local endpoint list
ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running
migration_check 127.0.0.1:55433 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running

View File

@@ -7,7 +7,7 @@
//!
use anyhow::{anyhow, bail, Context, Result};
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
use control_plane::compute::ComputeControlPlane;
use control_plane::endpoint::ComputeControlPlane;
use control_plane::local_env::LocalEnv;
use control_plane::pageserver::PageServerNode;
use control_plane::safekeeper::SafekeeperNode;
@@ -106,8 +106,9 @@ fn main() -> Result<()> {
"start" => handle_start_all(sub_args, &env),
"stop" => handle_stop_all(sub_args, &env),
"pageserver" => handle_pageserver(sub_args, &env),
"pg" => handle_pg(sub_args, &env),
"safekeeper" => handle_safekeeper(sub_args, &env),
"endpoint" => handle_endpoint(sub_args, &env),
"pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"),
_ => bail!("unexpected subcommand {sub_name}"),
};
@@ -470,10 +471,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
let mut cplane = ComputeControlPlane::load(env.clone())?;
println!("Importing timeline into pageserver ...");
pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)?;
println!("Creating node for imported timeline ...");
env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
cplane.new_node(tenant_id, name, timeline_id, None, None, pg_version)?;
println!("Creating endpoint for imported timeline ...");
cplane.new_endpoint(tenant_id, name, timeline_id, None, None, pg_version)?;
println!("Done");
}
Some(("branch", branch_match)) => {
@@ -521,10 +522,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
Ok(())
}
fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
let (sub_name, sub_args) = match pg_match.subcommand() {
Some(pg_subcommand_data) => pg_subcommand_data,
None => bail!("no pg subcommand provided"),
fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
let (sub_name, sub_args) = match ep_match.subcommand() {
Some(ep_subcommand_data) => ep_subcommand_data,
None => bail!("no endpoint subcommand provided"),
};
let mut cplane = ComputeControlPlane::load(env.clone())?;
@@ -546,7 +547,7 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
table.load_preset(comfy_table::presets::NOTHING);
table.set_header([
"NODE",
"ENDPOINT",
"ADDRESS",
"TIMELINE",
"BRANCH NAME",
@@ -554,39 +555,39 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
"STATUS",
]);
for ((_, node_name), node) in cplane
.nodes
for (endpoint_id, endpoint) in cplane
.endpoints
.iter()
.filter(|((node_tenant_id, _), _)| node_tenant_id == &tenant_id)
.filter(|(_, endpoint)| endpoint.tenant_id == tenant_id)
{
let lsn_str = match node.lsn {
let lsn_str = match endpoint.lsn {
None => {
// -> primary node
// -> primary endpoint
// Use the LSN at the end of the timeline.
timeline_infos
.get(&node.timeline_id)
.get(&endpoint.timeline_id)
.map(|bi| bi.last_record_lsn.to_string())
.unwrap_or_else(|| "?".to_string())
}
Some(lsn) => {
// -> read-only node
// Use the node's LSN.
// -> read-only endpoint
// Use the endpoint's LSN.
lsn.to_string()
}
};
let branch_name = timeline_name_mappings
.get(&TenantTimelineId::new(tenant_id, node.timeline_id))
.get(&TenantTimelineId::new(tenant_id, endpoint.timeline_id))
.map(|name| name.as_str())
.unwrap_or("?");
table.add_row([
node_name.as_str(),
&node.address.to_string(),
&node.timeline_id.to_string(),
endpoint_id.as_str(),
&endpoint.address.to_string(),
&endpoint.timeline_id.to_string(),
branch_name,
lsn_str.as_str(),
node.status(),
endpoint.status(),
]);
}
@@ -597,10 +598,10 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
.get_one::<String>("branch-name")
.map(|s| s.as_str())
.unwrap_or(DEFAULT_BRANCH_NAME);
let node_name = sub_args
.get_one::<String>("node")
.map(|node_name| node_name.to_string())
.unwrap_or_else(|| format!("{branch_name}_node"));
let endpoint_id = sub_args
.get_one::<String>("endpoint_id")
.map(String::to_string)
.unwrap_or_else(|| format!("ep-{branch_name}"));
let lsn = sub_args
.get_one::<String>("lsn")
@@ -618,15 +619,15 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
.copied()
.context("Failed to parse postgres version from the argument string")?;
cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port, pg_version)?;
cplane.new_endpoint(tenant_id, &endpoint_id, timeline_id, lsn, port, pg_version)?;
}
"start" => {
let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
let node_name = sub_args
.get_one::<String>("node")
.ok_or_else(|| anyhow!("No node name was provided to start"))?;
let endpoint_id = sub_args
.get_one::<String>("endpoint_id")
.ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?;
let node = cplane.nodes.get(&(tenant_id, node_name.to_string()));
let endpoint = cplane.endpoints.get(endpoint_id.as_str());
let auth_token = if matches!(env.pageserver.pg_auth_type, AuthType::NeonJWT) {
let claims = Claims::new(Some(tenant_id), Scope::Tenant);
@@ -636,9 +637,9 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
None
};
if let Some(node) = node {
println!("Starting existing postgres {node_name}...");
node.start(&auth_token)?;
if let Some(endpoint) = endpoint {
println!("Starting existing endpoint {endpoint_id}...");
endpoint.start(&auth_token)?;
} else {
let branch_name = sub_args
.get_one::<String>("branch-name")
@@ -663,27 +664,33 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
// start --port X
// stop
// start <-- will also use port X even without explicit port argument
println!("Starting new postgres (v{pg_version}) {node_name} on timeline {timeline_id} ...");
println!("Starting new endpoint {endpoint_id} (PostgreSQL v{pg_version}) on timeline {timeline_id} ...");
let node =
cplane.new_node(tenant_id, node_name, timeline_id, lsn, port, pg_version)?;
node.start(&auth_token)?;
let ep = cplane.new_endpoint(
tenant_id,
endpoint_id,
timeline_id,
lsn,
port,
pg_version,
)?;
ep.start(&auth_token)?;
}
}
"stop" => {
let node_name = sub_args
.get_one::<String>("node")
.ok_or_else(|| anyhow!("No node name was provided to stop"))?;
let endpoint_id = sub_args
.get_one::<String>("endpoint_id")
.ok_or_else(|| anyhow!("No endpoint ID was provided to stop"))?;
let destroy = sub_args.get_flag("destroy");
let node = cplane
.nodes
.get(&(tenant_id, node_name.to_string()))
.with_context(|| format!("postgres {node_name} is not found"))?;
node.stop(destroy)?;
let endpoint = cplane
.endpoints
.get(endpoint_id.as_str())
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
endpoint.stop(destroy)?;
}
_ => bail!("Unexpected pg subcommand '{sub_name}'"),
_ => bail!("Unexpected endpoint subcommand '{sub_name}'"),
}
Ok(())
@@ -802,7 +809,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
}
fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
// Postgres nodes are not started automatically
// Endpoints are not started automatically
broker::start_broker_process(env)?;
@@ -836,10 +843,10 @@ fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<
fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
let pageserver = PageServerNode::from_env(env);
// Stop all compute nodes
// Stop all endpoints
match ComputeControlPlane::load(env.clone()) {
Ok(cplane) => {
for (_k, node) in cplane.nodes {
for (_k, node) in cplane.endpoints {
if let Err(e) = node.stop(false) {
eprintln!("postgres stop failed: {e:#}");
}
@@ -872,7 +879,9 @@ fn cli() -> Command {
.help("Name of the branch to be created or used as an alias for other services")
.required(false);
let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);
let endpoint_id_arg = Arg::new("endpoint_id")
.help("Postgres endpoint id")
.required(false);
let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
@@ -1026,27 +1035,27 @@ fn cli() -> Command {
)
)
.subcommand(
Command::new("pg")
Command::new("endpoint")
.arg_required_else_help(true)
.about("Manage postgres instances")
.subcommand(Command::new("list").arg(tenant_id_arg.clone()))
.subcommand(Command::new("create")
.about("Create a postgres compute node")
.arg(pg_node_arg.clone())
.about("Create a compute endpoint")
.arg(endpoint_id_arg.clone())
.arg(branch_name_arg.clone())
.arg(tenant_id_arg.clone())
.arg(lsn_arg.clone())
.arg(port_arg.clone())
.arg(
Arg::new("config-only")
.help("Don't do basebackup, create compute node with only config files")
.help("Don't do basebackup, create endpoint directory with only config files")
.long("config-only")
.required(false))
.arg(pg_version_arg.clone())
)
.subcommand(Command::new("start")
.about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
.arg(pg_node_arg.clone())
.about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
.arg(endpoint_id_arg.clone())
.arg(tenant_id_arg.clone())
.arg(branch_name_arg)
.arg(timeline_id_arg)
@@ -1056,7 +1065,7 @@ fn cli() -> Command {
)
.subcommand(
Command::new("stop")
.arg(pg_node_arg)
.arg(endpoint_id_arg)
.arg(tenant_id_arg)
.arg(
Arg::new("destroy")
@@ -1068,6 +1077,13 @@ fn cli() -> Command {
)
)
// Obsolete old name for 'endpoint'. We now just print an error if it's used.
.subcommand(
Command::new("pg")
.hide(true)
.arg(Arg::new("ignore-rest").allow_hyphen_values(true).num_args(0..).required(false))
.trailing_var_arg(true)
)
.subcommand(
Command::new("start")
.about("Start page server and safekeepers")

View File

@@ -25,54 +25,45 @@ use crate::postgresql_conf::PostgresConf;
//
pub struct ComputeControlPlane {
base_port: u16,
pageserver: Arc<PageServerNode>,
pub nodes: BTreeMap<(TenantId, String), Arc<PostgresNode>>,
// endpoint ID is the key
pub endpoints: BTreeMap<String, Arc<Endpoint>>,
env: LocalEnv,
pageserver: Arc<PageServerNode>,
}
impl ComputeControlPlane {
// Load current nodes with ports from data directories on disk
// Directory structure has the following layout:
// pgdatadirs
// |- tenants
// | |- <tenant_id>
// | | |- <node name>
// Load current endpoints from the endpoints/ subdirectories
pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
let pageserver = Arc::new(PageServerNode::from_env(&env));
let mut nodes = BTreeMap::default();
let pgdatadirspath = &env.pg_data_dirs_path();
for tenant_dir in fs::read_dir(pgdatadirspath)
.with_context(|| format!("failed to list {}", pgdatadirspath.display()))?
let mut endpoints = BTreeMap::default();
for endpoint_dir in fs::read_dir(env.endpoints_path())
.with_context(|| format!("failed to list {}", env.endpoints_path().display()))?
{
let tenant_dir = tenant_dir?;
for timeline_dir in fs::read_dir(tenant_dir.path())
.with_context(|| format!("failed to list {}", tenant_dir.path().display()))?
{
let node = PostgresNode::from_dir_entry(timeline_dir?, &env, &pageserver)?;
nodes.insert((node.tenant_id, node.name.clone()), Arc::new(node));
}
let ep = Endpoint::from_dir_entry(endpoint_dir?, &env, &pageserver)?;
endpoints.insert(ep.name.clone(), Arc::new(ep));
}
Ok(ComputeControlPlane {
base_port: 55431,
pageserver,
nodes,
endpoints,
env,
pageserver,
})
}
fn get_port(&mut self) -> u16 {
1 + self
.nodes
.endpoints
.values()
.map(|node| node.address.port())
.map(|ep| ep.address.port())
.max()
.unwrap_or(self.base_port)
}
pub fn new_node(
pub fn new_endpoint(
&mut self,
tenant_id: TenantId,
name: &str,
@@ -80,9 +71,9 @@ impl ComputeControlPlane {
lsn: Option<Lsn>,
port: Option<u16>,
pg_version: u32,
) -> Result<Arc<PostgresNode>> {
) -> Result<Arc<Endpoint>> {
let port = port.unwrap_or_else(|| self.get_port());
let node = Arc::new(PostgresNode {
let ep = Arc::new(Endpoint {
name: name.to_owned(),
address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
env: self.env.clone(),
@@ -93,39 +84,45 @@ impl ComputeControlPlane {
pg_version,
});
node.create_pgdata()?;
node.setup_pg_conf()?;
ep.create_pgdata()?;
ep.setup_pg_conf()?;
self.nodes
.insert((tenant_id, node.name.clone()), Arc::clone(&node));
self.endpoints.insert(ep.name.clone(), Arc::clone(&ep));
Ok(node)
Ok(ep)
}
}
///////////////////////////////////////////////////////////////////////////////
#[derive(Debug)]
pub struct PostgresNode {
pub address: SocketAddr,
pub struct Endpoint {
/// used as the directory name
name: String,
pub tenant_id: TenantId,
pub timeline_id: TimelineId,
// Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary.
pub lsn: Option<Lsn>,
// port and address of the Postgres server
pub address: SocketAddr,
pg_version: u32,
// These are not part of the endpoint as such, but the environment
// the endpoint runs in.
pub env: LocalEnv,
pageserver: Arc<PageServerNode>,
pub timeline_id: TimelineId,
pub lsn: Option<Lsn>, // if it's a read-only node. None for primary
pub tenant_id: TenantId,
pg_version: u32,
}
impl PostgresNode {
impl Endpoint {
fn from_dir_entry(
entry: std::fs::DirEntry,
env: &LocalEnv,
pageserver: &Arc<PageServerNode>,
) -> Result<PostgresNode> {
) -> Result<Endpoint> {
if !entry.file_type()?.is_dir() {
anyhow::bail!(
"PostgresNode::from_dir_entry failed: '{}' is not a directory",
"Endpoint::from_dir_entry failed: '{}' is not a directory",
entry.path().display()
);
}
@@ -135,7 +132,7 @@ impl PostgresNode {
let name = fname.to_str().unwrap().to_string();
// Read config file into memory
let cfg_path = entry.path().join("postgresql.conf");
let cfg_path = entry.path().join("pgdata").join("postgresql.conf");
let cfg_path_str = cfg_path.to_string_lossy();
let mut conf_file = File::open(&cfg_path)
.with_context(|| format!("failed to open config file in {}", cfg_path_str))?;
@@ -161,7 +158,7 @@ impl PostgresNode {
conf.parse_field_optional("recovery_target_lsn", &context)?;
// ok now
Ok(PostgresNode {
Ok(Endpoint {
address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
name,
env: env.clone(),
@@ -269,7 +266,7 @@ impl PostgresNode {
}
// Write postgresql.conf with default configuration
// and PG_VERSION file to the data directory of a new node.
// and PG_VERSION file to the data directory of a new endpoint.
fn setup_pg_conf(&self) -> Result<()> {
let mut conf = PostgresConf::new();
conf.append("max_wal_senders", "10");
@@ -289,7 +286,7 @@ impl PostgresNode {
// walproposer panics when basebackup is invalid, it is pointless to restart in this case.
conf.append("restart_after_crash", "off");
// Configure the node to fetch pages from pageserver
// Configure the Neon Postgres extension to fetch pages from pageserver
let pageserver_connstr = {
let config = &self.pageserver.pg_connection_config;
let (host, port) = (config.host(), config.port());
@@ -325,7 +322,7 @@ impl PostgresNode {
conf.append("max_replication_flush_lag", "10GB");
if !self.env.safekeepers.is_empty() {
// Configure the node to connect to the safekeepers
// Configure Postgres to connect to the safekeepers
conf.append("synchronous_standby_names", "walproposer");
let safekeepers = self
@@ -380,8 +377,12 @@ impl PostgresNode {
Ok(())
}
pub fn endpoint_path(&self) -> PathBuf {
self.env.endpoints_path().join(&self.name)
}
pub fn pgdata(&self) -> PathBuf {
self.env.pg_data_dir(&self.tenant_id, &self.name)
self.endpoint_path().join("pgdata")
}
pub fn status(&self) -> &str {
@@ -443,12 +444,11 @@ impl PostgresNode {
}
pub fn start(&self, auth_token: &Option<String>) -> Result<()> {
// Bail if the node already running.
if self.status() == "running" {
anyhow::bail!("The node is already running");
anyhow::bail!("The endpoint is already running");
}
// 1. We always start compute node from scratch, so
// 1. We always start Postgres from scratch, so
// if old dir exists, preserve 'postgresql.conf' and drop the directory
let postgresql_conf_path = self.pgdata().join("postgresql.conf");
let postgresql_conf = fs::read(&postgresql_conf_path).with_context(|| {
@@ -470,8 +470,8 @@ impl PostgresNode {
File::create(self.pgdata().join("standby.signal"))?;
}
// 4. Finally start the compute node postgres
println!("Starting postgres node at '{}'", self.connstr());
// 4. Finally start postgres
println!("Starting postgres at '{}'", self.connstr());
self.pg_ctl(&["start"], auth_token)
}
@@ -480,7 +480,7 @@ impl PostgresNode {
// use immediate shutdown mode, otherwise,
// shutdown gracefully to leave the data directory sane.
//
// Compute node always starts from scratch, so stop
// Postgres is always started from scratch, so stop
// without destroy only used for testing and debugging.
//
if destroy {
@@ -489,7 +489,7 @@ impl PostgresNode {
"Destroying postgres data directory '{}'",
self.pgdata().to_str().unwrap()
);
fs::remove_dir_all(self.pgdata())?;
fs::remove_dir_all(self.endpoint_path())?;
} else {
self.pg_ctl(&["stop"], &None)?;
}

View File

@@ -9,7 +9,7 @@
mod background_process;
pub mod broker;
pub mod compute;
pub mod endpoint;
pub mod local_env;
pub mod pageserver;
pub mod postgresql_conf;

View File

@@ -200,14 +200,8 @@ impl LocalEnv {
self.neon_distrib_dir.join("storage_broker")
}
pub fn pg_data_dirs_path(&self) -> PathBuf {
self.base_data_dir.join("pgdatadirs").join("tenants")
}
pub fn pg_data_dir(&self, tenant_id: &TenantId, branch_name: &str) -> PathBuf {
self.pg_data_dirs_path()
.join(tenant_id.to_string())
.join(branch_name)
pub fn endpoints_path(&self) -> PathBuf {
self.base_data_dir.join("endpoints")
}
// TODO: move pageserver files into ./pageserver
@@ -427,7 +421,7 @@ impl LocalEnv {
}
}
fs::create_dir_all(self.pg_data_dirs_path())?;
fs::create_dir_all(self.endpoints_path())?;
for safekeeper in &self.safekeepers {
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;

View File

@@ -114,7 +114,7 @@ class NeonCompare(PgCompare):
self.timeline = self.env.neon_cli.create_timeline(branch_name, tenant_id=self.tenant)
# Start pg
self._pg = self.env.postgres.create_start(branch_name, "main", self.tenant)
self._pg = self.env.endpoints.create_start(branch_name, "main", self.tenant)
@property
def pg(self) -> PgProtocol:

View File

@@ -830,7 +830,7 @@ class NeonEnvBuilder:
# Stop all the nodes.
if self.env:
log.info("Cleaning up all storage and compute nodes")
self.env.postgres.stop_all()
self.env.endpoints.stop_all()
for sk in self.env.safekeepers:
sk.stop(immediate=True)
self.env.pageserver.stop(immediate=True)
@@ -894,7 +894,7 @@ class NeonEnv:
self.port_distributor = config.port_distributor
self.s3_mock_server = config.mock_s3_server
self.neon_cli = NeonCli(env=self)
self.postgres = PostgresFactory(self)
self.endpoints = EndpointFactory(self)
self.safekeepers: List[Safekeeper] = []
self.broker = config.broker
self.remote_storage = config.remote_storage
@@ -902,6 +902,7 @@ class NeonEnv:
self.pg_version = config.pg_version
self.neon_binpath = config.neon_binpath
self.pg_distrib_dir = config.pg_distrib_dir
self.endpoint_counter = 0
# generate initial tenant ID here instead of letting 'neon init' generate it,
# so that we don't need to dig it out of the config file afterwards.
@@ -1015,6 +1016,13 @@ class NeonEnv:
priv = (Path(self.repo_dir) / "auth_private_key.pem").read_text()
return AuthKeys(pub=pub, priv=priv)
def generate_endpoint_id(self) -> str:
"""
Generate a unique endpoint ID
"""
self.endpoint_counter += 1
return "ep-" + str(self.endpoint_counter)
@pytest.fixture(scope=shareable_scope)
def _shared_simple_env(
@@ -1073,7 +1081,7 @@ def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:
"""
yield _shared_simple_env
_shared_simple_env.postgres.stop_all()
_shared_simple_env.endpoints.stop_all()
@pytest.fixture(scope="function")
@@ -1097,7 +1105,7 @@ def neon_env_builder(
neon_env_builder.init_start().
After the initialization, you can launch compute nodes by calling
the functions in the 'env.postgres' factory object, stop/start the
the functions in the 'env.endpoints' factory object, stop/start the
nodes, etc.
"""
@@ -1438,16 +1446,16 @@ class NeonCli(AbstractNeonCli):
args.extend(["-m", "immediate"])
return self.raw_cli(args)
def pg_create(
def endpoint_create(
self,
branch_name: str,
node_name: Optional[str] = None,
endpoint_id: Optional[str] = None,
tenant_id: Optional[TenantId] = None,
lsn: Optional[Lsn] = None,
port: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
args = [
"pg",
"endpoint",
"create",
"--tenant-id",
str(tenant_id or self.env.initial_tenant),
@@ -1460,22 +1468,22 @@ class NeonCli(AbstractNeonCli):
args.extend(["--lsn", str(lsn)])
if port is not None:
args.extend(["--port", str(port)])
if node_name is not None:
args.append(node_name)
if endpoint_id is not None:
args.append(endpoint_id)
res = self.raw_cli(args)
res.check_returncode()
return res
def pg_start(
def endpoint_start(
self,
node_name: str,
endpoint_id: str,
tenant_id: Optional[TenantId] = None,
lsn: Optional[Lsn] = None,
port: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
args = [
"pg",
"endpoint",
"start",
"--tenant-id",
str(tenant_id or self.env.initial_tenant),
@@ -1486,30 +1494,30 @@ class NeonCli(AbstractNeonCli):
args.append(f"--lsn={lsn}")
if port is not None:
args.append(f"--port={port}")
if node_name is not None:
args.append(node_name)
if endpoint_id is not None:
args.append(endpoint_id)
res = self.raw_cli(args)
res.check_returncode()
return res
def pg_stop(
def endpoint_stop(
self,
node_name: str,
endpoint_id: str,
tenant_id: Optional[TenantId] = None,
destroy=False,
check_return_code=True,
) -> "subprocess.CompletedProcess[str]":
args = [
"pg",
"endpoint",
"stop",
"--tenant-id",
str(tenant_id or self.env.initial_tenant),
]
if destroy:
args.append("--destroy")
if node_name is not None:
args.append(node_name)
if endpoint_id is not None:
args.append(endpoint_id)
return self.raw_cli(args, check_return_code=check_return_code)
@@ -2167,8 +2175,8 @@ def static_proxy(
yield proxy
class Postgres(PgProtocol):
"""An object representing a running postgres daemon."""
class Endpoint(PgProtocol):
"""An object representing a Postgres compute endpoint managed by the control plane."""
def __init__(
self, env: NeonEnv, tenant_id: TenantId, port: int, check_stop_result: bool = True
@@ -2176,33 +2184,40 @@ class Postgres(PgProtocol):
super().__init__(host="localhost", port=port, user="cloud_admin", dbname="postgres")
self.env = env
self.running = False
self.node_name: Optional[str] = None # dubious, see asserts below
self.endpoint_id: Optional[str] = None # dubious, see asserts below
self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA
self.tenant_id = tenant_id
self.port = port
self.check_stop_result = check_stop_result
# path to conf is <repo_dir>/pgdatadirs/tenants/<tenant_id>/<node_name>/postgresql.conf
# path to conf is <repo_dir>/endpoints/<endpoint_id>/pgdata/postgresql.conf
def create(
self,
branch_name: str,
node_name: Optional[str] = None,
endpoint_id: Optional[str] = None,
lsn: Optional[Lsn] = None,
config_lines: Optional[List[str]] = None,
) -> "Postgres":
) -> "Endpoint":
"""
Create the pg data directory.
Create a new Postgres endpoint.
Returns self.
"""
if not config_lines:
config_lines = []
self.node_name = node_name or f"{branch_name}_pg_node"
self.env.neon_cli.pg_create(
branch_name, node_name=self.node_name, tenant_id=self.tenant_id, lsn=lsn, port=self.port
if endpoint_id is None:
endpoint_id = self.env.generate_endpoint_id()
self.endpoint_id = endpoint_id
self.env.neon_cli.endpoint_create(
branch_name,
endpoint_id=self.endpoint_id,
tenant_id=self.tenant_id,
lsn=lsn,
port=self.port,
)
path = Path("pgdatadirs") / "tenants" / str(self.tenant_id) / self.node_name
path = Path("endpoints") / self.endpoint_id / "pgdata"
self.pgdata_dir = os.path.join(self.env.repo_dir, path)
if config_lines is None:
@@ -2215,26 +2230,30 @@ class Postgres(PgProtocol):
return self
def start(self) -> "Postgres":
def start(self) -> "Endpoint":
"""
Start the Postgres instance.
Returns self.
"""
assert self.node_name is not None
assert self.endpoint_id is not None
log.info(f"Starting postgres node {self.node_name}")
log.info(f"Starting postgres endpoint {self.endpoint_id}")
self.env.neon_cli.pg_start(self.node_name, tenant_id=self.tenant_id, port=self.port)
self.env.neon_cli.endpoint_start(self.endpoint_id, tenant_id=self.tenant_id, port=self.port)
self.running = True
return self
def endpoint_path(self) -> Path:
"""Path to endpoint directory"""
assert self.endpoint_id
path = Path("endpoints") / self.endpoint_id
return self.env.repo_dir / path
def pg_data_dir_path(self) -> str:
"""Path to data directory"""
assert self.node_name
path = Path("pgdatadirs") / "tenants" / str(self.tenant_id) / self.node_name
return os.path.join(self.env.repo_dir, path)
"""Path to Postgres data directory"""
return os.path.join(self.endpoint_path(), "pgdata")
def pg_xact_dir_path(self) -> str:
"""Path to pg_xact dir"""
@@ -2248,7 +2267,7 @@ class Postgres(PgProtocol):
"""Path to postgresql.conf"""
return os.path.join(self.pg_data_dir_path(), "postgresql.conf")
def adjust_for_safekeepers(self, safekeepers: str) -> "Postgres":
def adjust_for_safekeepers(self, safekeepers: str) -> "Endpoint":
"""
Adjust instance config for working with wal acceptors instead of
pageserver (pre-configured by CLI) directly.
@@ -2272,7 +2291,7 @@ class Postgres(PgProtocol):
f.write("neon.safekeepers = '{}'\n".format(safekeepers))
return self
def config(self, lines: List[str]) -> "Postgres":
def config(self, lines: List[str]) -> "Endpoint":
"""
Add lines to postgresql.conf.
Lines should be an array of valid postgresql.conf rows.
@@ -2286,32 +2305,32 @@ class Postgres(PgProtocol):
return self
def stop(self) -> "Postgres":
def stop(self) -> "Endpoint":
"""
Stop the Postgres instance if it's running.
Returns self.
"""
if self.running:
assert self.node_name is not None
self.env.neon_cli.pg_stop(
self.node_name, self.tenant_id, check_return_code=self.check_stop_result
assert self.endpoint_id is not None
self.env.neon_cli.endpoint_stop(
self.endpoint_id, self.tenant_id, check_return_code=self.check_stop_result
)
self.running = False
return self
def stop_and_destroy(self) -> "Postgres":
def stop_and_destroy(self) -> "Endpoint":
"""
Stop the Postgres instance, then destroy it.
Stop the Postgres instance, then destroy the endpoint.
Returns self.
"""
assert self.node_name is not None
self.env.neon_cli.pg_stop(
self.node_name, self.tenant_id, True, check_return_code=self.check_stop_result
assert self.endpoint_id is not None
self.env.neon_cli.endpoint_stop(
self.endpoint_id, self.tenant_id, True, check_return_code=self.check_stop_result
)
self.node_name = None
self.endpoint_id = None
self.running = False
return self
@@ -2319,13 +2338,12 @@ class Postgres(PgProtocol):
def create_start(
self,
branch_name: str,
node_name: Optional[str] = None,
endpoint_id: Optional[str] = None,
lsn: Optional[Lsn] = None,
config_lines: Optional[List[str]] = None,
) -> "Postgres":
) -> "Endpoint":
"""
Create a Postgres instance, apply config
and then start it.
Create an endpoint, apply config, and start Postgres.
Returns self.
"""
@@ -2333,7 +2351,7 @@ class Postgres(PgProtocol):
self.create(
branch_name=branch_name,
node_name=node_name,
endpoint_id=endpoint_id,
config_lines=config_lines,
lsn=lsn,
).start()
@@ -2342,7 +2360,7 @@ class Postgres(PgProtocol):
return self
def __enter__(self) -> "Postgres":
def __enter__(self) -> "Endpoint":
return self
def __exit__(
@@ -2354,33 +2372,33 @@ class Postgres(PgProtocol):
self.stop()
class PostgresFactory:
"""An object representing multiple running postgres daemons."""
class EndpointFactory:
"""An object representing multiple compute endpoints."""
def __init__(self, env: NeonEnv):
self.env = env
self.num_instances: int = 0
self.instances: List[Postgres] = []
self.endpoints: List[Endpoint] = []
def create_start(
self,
branch_name: str,
node_name: Optional[str] = None,
endpoint_id: Optional[str] = None,
tenant_id: Optional[TenantId] = None,
lsn: Optional[Lsn] = None,
config_lines: Optional[List[str]] = None,
) -> Postgres:
pg = Postgres(
) -> Endpoint:
ep = Endpoint(
self.env,
tenant_id=tenant_id or self.env.initial_tenant,
port=self.env.port_distributor.get_port(),
)
self.num_instances += 1
self.instances.append(pg)
self.endpoints.append(ep)
return pg.create_start(
return ep.create_start(
branch_name=branch_name,
node_name=node_name,
endpoint_id=endpoint_id,
config_lines=config_lines,
lsn=lsn,
)
@@ -2388,30 +2406,33 @@ class PostgresFactory:
def create(
self,
branch_name: str,
node_name: Optional[str] = None,
endpoint_id: Optional[str] = None,
tenant_id: Optional[TenantId] = None,
lsn: Optional[Lsn] = None,
config_lines: Optional[List[str]] = None,
) -> Postgres:
pg = Postgres(
) -> Endpoint:
ep = Endpoint(
self.env,
tenant_id=tenant_id or self.env.initial_tenant,
port=self.env.port_distributor.get_port(),
)
self.num_instances += 1
self.instances.append(pg)
if endpoint_id is None:
endpoint_id = self.env.generate_endpoint_id()
return pg.create(
self.num_instances += 1
self.endpoints.append(ep)
return ep.create(
branch_name=branch_name,
node_name=node_name,
endpoint_id=endpoint_id,
lsn=lsn,
config_lines=config_lines,
)
def stop_all(self) -> "PostgresFactory":
for pg in self.instances:
pg.stop()
def stop_all(self) -> "EndpointFactory":
for ep in self.endpoints:
ep.stop()
return self
@@ -2786,16 +2807,16 @@ def list_files_to_compare(pgdata_dir: Path) -> List[str]:
def check_restored_datadir_content(
test_output_dir: Path,
env: NeonEnv,
pg: Postgres,
endpoint: Endpoint,
):
# Get the timeline ID. We need it for the 'basebackup' command
timeline = TimelineId(pg.safe_psql("SHOW neon.timeline_id")[0][0])
timeline = TimelineId(endpoint.safe_psql("SHOW neon.timeline_id")[0][0])
# stop postgres to ensure that files won't change
pg.stop()
endpoint.stop()
# Take a basebackup from pageserver
restored_dir_path = env.repo_dir / f"{pg.node_name}_restored_datadir"
restored_dir_path = env.repo_dir / f"{endpoint.endpoint_id}_restored_datadir"
restored_dir_path.mkdir(exist_ok=True)
pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version)
@@ -2805,7 +2826,7 @@ def check_restored_datadir_content(
{psql_path} \
--no-psqlrc \
postgres://localhost:{env.pageserver.service_port.pg} \
-c 'basebackup {pg.tenant_id} {timeline}' \
-c 'basebackup {endpoint.tenant_id} {timeline}' \
| tar -x -C {restored_dir_path}
"""
@@ -2822,8 +2843,8 @@ def check_restored_datadir_content(
assert result.returncode == 0
# list files we're going to compare
assert pg.pgdata_dir
pgdata_files = list_files_to_compare(Path(pg.pgdata_dir))
assert endpoint.pgdata_dir
pgdata_files = list_files_to_compare(Path(endpoint.pgdata_dir))
restored_files = list_files_to_compare(restored_dir_path)
# check that file sets are equal
@@ -2834,12 +2855,12 @@ def check_restored_datadir_content(
# We've already filtered all mismatching files in list_files_to_compare(),
# so here expect that the content is identical
(match, mismatch, error) = filecmp.cmpfiles(
pg.pgdata_dir, restored_dir_path, pgdata_files, shallow=False
endpoint.pgdata_dir, restored_dir_path, pgdata_files, shallow=False
)
log.info(f"filecmp result mismatch and error lists:\n\t mismatch={mismatch}\n\t error={error}")
for f in mismatch:
f1 = os.path.join(pg.pgdata_dir, f)
f1 = os.path.join(endpoint.pgdata_dir, f)
f2 = os.path.join(restored_dir_path, f)
stdout_filename = "{}.filediff".format(f2)
@@ -2854,24 +2875,24 @@ def check_restored_datadir_content(
def wait_for_last_flush_lsn(
env: NeonEnv, pg: Postgres, tenant: TenantId, timeline: TimelineId
env: NeonEnv, endpoint: Endpoint, tenant: TenantId, timeline: TimelineId
) -> Lsn:
"""Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn."""
last_flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn)
def wait_for_wal_insert_lsn(
env: NeonEnv, pg: Postgres, tenant: TenantId, timeline: TimelineId
env: NeonEnv, endpoint: Endpoint, tenant: TenantId, timeline: TimelineId
) -> Lsn:
"""Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn."""
last_flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_insert_lsn()")[0][0])
last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_insert_lsn()")[0][0])
return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn)
def fork_at_current_lsn(
env: NeonEnv,
pg: Postgres,
endpoint: Endpoint,
new_branch_name: str,
ancestor_branch_name: str,
tenant_id: Optional[TenantId] = None,
@@ -2881,7 +2902,7 @@ def fork_at_current_lsn(
The "last LSN" is taken from the given Postgres instance. The pageserver will wait for all the
the WAL up to that LSN to arrive in the pageserver before creating the branch.
"""
current_lsn = pg.safe_psql("SELECT pg_current_wal_lsn()")[0][0]
current_lsn = endpoint.safe_psql("SELECT pg_current_wal_lsn()")[0][0]
return env.neon_cli.create_branch(new_branch_name, ancestor_branch_name, tenant_id, current_lsn)

View File

@@ -52,13 +52,13 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
def run_pgbench(branch: str):
log.info(f"Start a pgbench workload on branch {branch}")
pg = env.postgres.create_start(branch, tenant_id=tenant)
connstr = pg.connstr()
endpoint = env.endpoints.create_start(branch, tenant_id=tenant)
connstr = endpoint.connstr()
pg_bin.run_capture(["pgbench", "-i", connstr])
pg_bin.run_capture(["pgbench", "-c10", "-T10", connstr])
pg.stop()
endpoint.stop()
env.neon_cli.create_branch("b0", tenant_id=tenant)
@@ -96,8 +96,8 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int):
env.neon_cli.create_branch("b0")
pg = env.postgres.create_start("b0")
neon_compare.pg_bin.run_capture(["pgbench", "-i", "-s10", pg.connstr()])
endpoint = env.endpoints.create_start("b0")
neon_compare.pg_bin.run_capture(["pgbench", "-i", "-s10", endpoint.connstr()])
branch_creation_durations = []
@@ -124,15 +124,15 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare):
timeline_id = env.neon_cli.create_branch("root")
pg = env.postgres.create_start("root")
with closing(pg.connect()) as conn:
endpoint = env.endpoints.create_start("root")
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
for i in range(10000):
cur.execute(f"CREATE TABLE t{i} as SELECT g FROM generate_series(1, 1000) g")
# Wait for the pageserver to finish processing all the pending WALs,
# as we don't want the LSN wait time to be included during the branch creation
flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
wait_for_last_record_lsn(
env.pageserver.http_client(), env.initial_tenant, timeline_id, flush_lsn
)
@@ -142,7 +142,7 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare):
# run a concurrent insertion to make the ancestor "busy" during the branch creation
thread = threading.Thread(
target=pg.safe_psql, args=("INSERT INTO t0 VALUES (generate_series(1, 100000))",)
target=endpoint.safe_psql, args=("INSERT INTO t0 VALUES (generate_series(1, 100000))",)
)
thread.start()

View File

@@ -42,41 +42,41 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare):
neon_compare.zenbenchmark.record_pg_bench_result(branch, res)
env.neon_cli.create_branch("root")
pg_root = env.postgres.create_start("root")
pg_bin.run_capture(["pgbench", "-i", pg_root.connstr(), "-s10"])
endpoint_root = env.endpoints.create_start("root")
pg_bin.run_capture(["pgbench", "-i", endpoint_root.connstr(), "-s10"])
fork_at_current_lsn(env, pg_root, "child", "root")
fork_at_current_lsn(env, endpoint_root, "child", "root")
pg_child = env.postgres.create_start("child")
endpoint_child = env.endpoints.create_start("child")
run_pgbench_on_branch("root", ["pgbench", "-c10", "-T10", pg_root.connstr()])
run_pgbench_on_branch("child", ["pgbench", "-c10", "-T10", pg_child.connstr()])
run_pgbench_on_branch("root", ["pgbench", "-c10", "-T10", endpoint_root.connstr()])
run_pgbench_on_branch("child", ["pgbench", "-c10", "-T10", endpoint_child.connstr()])
def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):
env = neon_compare.env
env.neon_cli.create_branch("root")
pg_root = env.postgres.create_start("root")
endpoint_root = env.endpoints.create_start("root")
pg_root.safe_psql(
endpoint_root.safe_psql(
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
)
env.neon_cli.create_branch("child", "root")
pg_child = env.postgres.create_start("child")
endpoint_child = env.endpoints.create_start("child")
with neon_compare.record_duration("root_run_duration"):
pg_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
endpoint_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
with neon_compare.record_duration("child_run_duration"):
pg_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
endpoint_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)")
def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
env = neon_compare.env
env.neon_cli.create_branch("root")
pg_root = env.postgres.create_start("root")
endpoint_root = env.endpoints.create_start("root")
pg_root.safe_psql_many(
endpoint_root.safe_psql_many(
[
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
"INSERT INTO foo SELECT FROM generate_series(1,1000000)",
@@ -84,12 +84,12 @@ def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
)
env.neon_cli.create_branch("child", "root")
pg_child = env.postgres.create_start("child")
endpoint_child = env.endpoints.create_start("child")
with neon_compare.record_duration("root_run_duration"):
pg_root.safe_psql("SELECT count(*) from foo")
endpoint_root.safe_psql("SELECT count(*) from foo")
with neon_compare.record_duration("child_run_duration"):
pg_child.safe_psql("SELECT count(*) from foo")
endpoint_child.safe_psql("SELECT count(*) from foo")
# -----------------------------------------------------------------------

View File

@@ -35,14 +35,14 @@ def test_bulk_tenant_create(
# if use_safekeepers == 'with_sa':
# wa_factory.start_n_new(3)
pg_tenant = env.postgres.create_start(
endpoint_tenant = env.endpoints.create_start(
f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant
)
end = timeit.default_timer()
time_slices.append(end - start)
pg_tenant.stop()
endpoint_tenant.stop()
zenbenchmark.record(
"tenant_creation_time",

View File

@@ -18,8 +18,8 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
timeline_id = env.neon_cli.create_branch("test_bulk_update")
tenant_id = env.initial_tenant
pg = env.postgres.create_start("test_bulk_update")
cur = pg.connect().cursor()
endpoint = env.endpoints.create_start("test_bulk_update")
cur = endpoint.connect().cursor()
cur.execute("set statement_timeout=0")
cur.execute(f"create table t(x integer) WITH (fillfactor={fillfactor})")
@@ -28,13 +28,13 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
cur.execute(f"insert into t values (generate_series(1,{n_records}))")
cur.execute("vacuum t")
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
with zenbenchmark.record_duration("update-no-prefetch"):
cur.execute("update t set x=x+1")
cur.execute("vacuum t")
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
with zenbenchmark.record_duration("delete-no-prefetch"):
cur.execute("delete from t")
@@ -50,13 +50,13 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
cur.execute(f"insert into t2 values (generate_series(1,{n_records}))")
cur.execute("vacuum t2")
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
with zenbenchmark.record_duration("update-with-prefetch"):
cur.execute("update t2 set x=x+1")
cur.execute("vacuum t2")
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
with zenbenchmark.record_duration("delete-with-prefetch"):
cur.execute("delete from t2")

View File

@@ -33,11 +33,11 @@ def test_compaction(neon_compare: NeonCompare):
# Create some tables, and run a bunch of INSERTs and UPDATes on them,
# to generate WAL and layers
pg = env.postgres.create_start(
endpoint = env.endpoints.create_start(
"main", tenant_id=tenant_id, config_lines=["shared_buffers=512MB"]
)
with closing(pg.connect()) as conn:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
for i in range(100):
cur.execute(f"create table tbl{i} (i int, j int);")
@@ -45,7 +45,7 @@ def test_compaction(neon_compare: NeonCompare):
for j in range(100):
cur.execute(f"update tbl{i} set j = {j};")
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
# First compaction generates L1 layers
with neon_compare.zenbenchmark.record_duration("compaction"):

View File

@@ -2,13 +2,13 @@ import threading
import pytest
from fixtures.compare_fixtures import PgCompare
from fixtures.neon_fixtures import Postgres
from fixtures.neon_fixtures import PgProtocol
from performance.test_perf_pgbench import get_scales_matrix
from performance.test_wal_backpressure import record_read_latency
def start_write_workload(pg: Postgres, scale: int = 10):
def start_write_workload(pg: PgProtocol, scale: int = 10):
with pg.connect().cursor() as cur:
cur.execute(f"create table big as select generate_series(1,{scale*100_000})")

View File

@@ -25,8 +25,8 @@ def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark):
)
env.neon_cli.create_timeline("test_layer_map", tenant_id=tenant)
pg = env.postgres.create_start("test_layer_map", tenant_id=tenant)
cur = pg.connect().cursor()
endpoint = env.endpoints.create_start("test_layer_map", tenant_id=tenant)
cur = endpoint.connect().cursor()
cur.execute("create table t(x integer)")
for i in range(n_iters):
cur.execute(f"insert into t values (generate_series(1,{n_records}))")

View File

@@ -14,19 +14,19 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker
# Start
env.neon_cli.create_branch("test_startup")
with zenbenchmark.record_duration("startup_time"):
pg = env.postgres.create_start("test_startup")
pg.safe_psql("select 1;")
endpoint = env.endpoints.create_start("test_startup")
endpoint.safe_psql("select 1;")
# Restart
pg.stop_and_destroy()
endpoint.stop_and_destroy()
with zenbenchmark.record_duration("restart_time"):
pg.create_start("test_startup")
pg.safe_psql("select 1;")
endpoint.create_start("test_startup")
endpoint.safe_psql("select 1;")
# Fill up
num_rows = 1000000 # 30 MB
num_tables = 100
with closing(pg.connect()) as conn:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
for i in range(num_tables):
cur.execute(f"create table t_{i} (i integer);")
@@ -34,18 +34,18 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker
# Read
with zenbenchmark.record_duration("read_time"):
pg.safe_psql("select * from t_0;")
endpoint.safe_psql("select * from t_0;")
# Read again
with zenbenchmark.record_duration("second_read_time"):
pg.safe_psql("select * from t_0;")
endpoint.safe_psql("select * from t_0;")
# Restart
pg.stop_and_destroy()
endpoint.stop_and_destroy()
with zenbenchmark.record_duration("restart_with_data"):
pg.create_start("test_startup")
pg.safe_psql("select 1;")
endpoint.create_start("test_startup")
endpoint.safe_psql("select 1;")
# Read
with zenbenchmark.record_duration("read_after_restart"):
pg.safe_psql("select * from t_0;")
endpoint.safe_psql("select * from t_0;")

View File

@@ -22,8 +22,8 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
pageserver_http.configure_failpoints(("flush-frozen-before-sync", "sleep(10000)"))
pg_branch0 = env.postgres.create_start("main", tenant_id=tenant)
branch0_cur = pg_branch0.connect().cursor()
endpoint_branch0 = env.endpoints.create_start("main", tenant_id=tenant)
branch0_cur = endpoint_branch0.connect().cursor()
branch0_timeline = TimelineId(query_scalar(branch0_cur, "SHOW neon.timeline_id"))
log.info(f"b0 timeline {branch0_timeline}")
@@ -44,10 +44,10 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
# Create branch1.
env.neon_cli.create_branch("branch1", "main", tenant_id=tenant, ancestor_start_lsn=lsn_100)
pg_branch1 = env.postgres.create_start("branch1", tenant_id=tenant)
endpoint_branch1 = env.endpoints.create_start("branch1", tenant_id=tenant)
log.info("postgres is running on 'branch1' branch")
branch1_cur = pg_branch1.connect().cursor()
branch1_cur = endpoint_branch1.connect().cursor()
branch1_timeline = TimelineId(query_scalar(branch1_cur, "SHOW neon.timeline_id"))
log.info(f"b1 timeline {branch1_timeline}")
@@ -67,9 +67,9 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
# Create branch2.
env.neon_cli.create_branch("branch2", "branch1", tenant_id=tenant, ancestor_start_lsn=lsn_200)
pg_branch2 = env.postgres.create_start("branch2", tenant_id=tenant)
endpoint_branch2 = env.endpoints.create_start("branch2", tenant_id=tenant)
log.info("postgres is running on 'branch2' branch")
branch2_cur = pg_branch2.connect().cursor()
branch2_cur = endpoint_branch2.connect().cursor()
branch2_timeline = TimelineId(query_scalar(branch2_cur, "SHOW neon.timeline_id"))
log.info(f"b2 timeline {branch2_timeline}")

View File

@@ -64,9 +64,9 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
branch = "test_compute_auth_to_pageserver"
env.neon_cli.create_branch(branch)
pg = env.postgres.create_start(branch)
endpoint = env.endpoints.create_start(branch)
with closing(pg.connect()) as conn:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
# we rely upon autocommit after each statement
# as waiting for acceptors happens there
@@ -83,7 +83,7 @@ def test_auth_failures(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
branch = f"test_auth_failures_auth_enabled_{auth_enabled}"
timeline_id = env.neon_cli.create_branch(branch)
env.postgres.create_start(branch)
env.endpoints.create_start(branch)
tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant)
invalid_tenant_token = env.auth_keys.generate_tenant_token(TenantId.generate())

View File

@@ -5,7 +5,7 @@ from contextlib import closing, contextmanager
import psycopg2.extras
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder, Postgres
from fixtures.neon_fixtures import Endpoint, NeonEnvBuilder
pytest_plugins = "fixtures.neon_fixtures"
@@ -20,10 +20,10 @@ def pg_cur(pg):
# Periodically check that all backpressure lags are below the configured threshold,
# assert if they are not.
# If the check query fails, stop the thread. Main thread should notice that and stop the test.
def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interval=5):
def check_backpressure(endpoint: Endpoint, stop_event: threading.Event, polling_interval=5):
log.info("checks started")
with pg_cur(pg) as cur:
with pg_cur(endpoint) as cur:
cur.execute("CREATE EXTENSION neon") # TODO move it to neon_fixtures?
cur.execute("select pg_size_bytes(current_setting('max_replication_write_lag'))")
@@ -41,7 +41,7 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
max_replication_apply_lag_bytes = res[0]
log.info(f"max_replication_apply_lag: {max_replication_apply_lag_bytes} bytes")
with pg_cur(pg) as cur:
with pg_cur(endpoint) as cur:
while not stop_event.is_set():
try:
cur.execute(
@@ -102,14 +102,14 @@ def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
# Create a branch for us
env.neon_cli.create_branch("test_backpressure")
pg = env.postgres.create_start(
endpoint = env.endpoints.create_start(
"test_backpressure", config_lines=["max_replication_write_lag=30MB"]
)
log.info("postgres is running on 'test_backpressure' branch")
# setup check thread
check_stop_event = threading.Event()
check_thread = threading.Thread(target=check_backpressure, args=(pg, check_stop_event))
check_thread = threading.Thread(target=check_backpressure, args=(endpoint, check_stop_event))
check_thread.start()
# Configure failpoint to slow down walreceiver ingest
@@ -125,7 +125,7 @@ def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
# because of the lag and waiting for lsn to replay to arrive.
time.sleep(2)
with pg_cur(pg) as cur:
with pg_cur(endpoint) as cur:
# Create and initialize test table
cur.execute("CREATE TABLE foo(x bigint)")

View File

@@ -15,4 +15,4 @@ def test_basebackup_error(neon_simple_env: NeonEnv):
pageserver_http.configure_failpoints(("basebackup-before-control-file", "return"))
with pytest.raises(Exception, match="basebackup-before-control-file"):
env.postgres.create_start("test_basebackup_error")
env.endpoints.create_start("test_basebackup_error")

View File

@@ -67,9 +67,9 @@ def test_branch_and_gc(neon_simple_env: NeonEnv):
)
timeline_main = env.neon_cli.create_timeline("test_main", tenant_id=tenant)
pg_main = env.postgres.create_start("test_main", tenant_id=tenant)
endpoint_main = env.endpoints.create_start("test_main", tenant_id=tenant)
main_cur = pg_main.connect().cursor()
main_cur = endpoint_main.connect().cursor()
main_cur.execute(
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')"
@@ -90,9 +90,9 @@ def test_branch_and_gc(neon_simple_env: NeonEnv):
env.neon_cli.create_branch(
"test_branch", "test_main", tenant_id=tenant, ancestor_start_lsn=lsn1
)
pg_branch = env.postgres.create_start("test_branch", tenant_id=tenant)
endpoint_branch = env.endpoints.create_start("test_branch", tenant_id=tenant)
branch_cur = pg_branch.connect().cursor()
branch_cur = endpoint_branch.connect().cursor()
branch_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)")
assert query_scalar(branch_cur, "SELECT count(*) FROM foo") == 200000
@@ -142,8 +142,8 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
)
b0 = env.neon_cli.create_branch("b0", tenant_id=tenant)
pg0 = env.postgres.create_start("b0", tenant_id=tenant)
res = pg0.safe_psql_many(
endpoint0 = env.endpoints.create_start("b0", tenant_id=tenant)
res = endpoint0.safe_psql_many(
queries=[
"CREATE TABLE t(key serial primary key)",
"INSERT INTO t SELECT FROM generate_series(1, 100000)",

View File

@@ -18,10 +18,10 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
# Branch at the point where only 100 rows were inserted
env.neon_cli.create_branch("test_branch_behind")
pgmain = env.postgres.create_start("test_branch_behind")
endpoint_main = env.endpoints.create_start("test_branch_behind")
log.info("postgres is running on 'test_branch_behind' branch")
main_cur = pgmain.connect().cursor()
main_cur = endpoint_main.connect().cursor()
timeline = TimelineId(query_scalar(main_cur, "SHOW neon.timeline_id"))
@@ -74,15 +74,15 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
"test_branch_behind_more", "test_branch_behind", ancestor_start_lsn=lsn_b
)
pg_hundred = env.postgres.create_start("test_branch_behind_hundred")
pg_more = env.postgres.create_start("test_branch_behind_more")
endpoint_hundred = env.endpoints.create_start("test_branch_behind_hundred")
endpoint_more = env.endpoints.create_start("test_branch_behind_more")
# On the 'hundred' branch, we should see only 100 rows
hundred_cur = pg_hundred.connect().cursor()
hundred_cur = endpoint_hundred.connect().cursor()
assert query_scalar(hundred_cur, "SELECT count(*) FROM foo") == 100
# On the 'more' branch, we should see 100200 rows
more_cur = pg_more.connect().cursor()
more_cur = endpoint_more.connect().cursor()
assert query_scalar(more_cur, "SELECT count(*) FROM foo") == 200100
# All the rows are visible on the main branch
@@ -94,8 +94,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
env.neon_cli.create_branch(
"test_branch_segment_boundary", "test_branch_behind", ancestor_start_lsn=Lsn("0/3000000")
)
pg = env.postgres.create_start("test_branch_segment_boundary")
assert pg.safe_psql("SELECT 1")[0][0] == 1
endpoint = env.endpoints.create_start("test_branch_segment_boundary")
assert endpoint.safe_psql("SELECT 1")[0][0] == 1
# branch at pre-initdb lsn
with pytest.raises(Exception, match="invalid branch start lsn: .*"):

View File

@@ -5,7 +5,7 @@ from typing import List
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres
from fixtures.neon_fixtures import Endpoint, NeonEnv, PgBin
from fixtures.types import Lsn
from fixtures.utils import query_scalar
from performance.test_perf_pgbench import get_scales_matrix
@@ -40,20 +40,20 @@ def test_branching_with_pgbench(
}
)
def run_pgbench(pg: Postgres):
connstr = pg.connstr()
def run_pgbench(connstr: str):
log.info(f"Start a pgbench workload on pg {connstr}")
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
pg_bin.run_capture(["pgbench", "-T15", connstr])
env.neon_cli.create_branch("b0", tenant_id=tenant)
pgs: List[Postgres] = []
pgs.append(env.postgres.create_start("b0", tenant_id=tenant))
endpoints: List[Endpoint] = []
endpoints.append(env.endpoints.create_start("b0", tenant_id=tenant))
threads: List[threading.Thread] = []
threads.append(threading.Thread(target=run_pgbench, args=(pgs[0],), daemon=True))
threads.append(
threading.Thread(target=run_pgbench, args=(endpoints[0].connstr(),), daemon=True)
)
threads[-1].start()
thread_limit = 4
@@ -79,16 +79,18 @@ def test_branching_with_pgbench(
else:
env.neon_cli.create_branch("b{}".format(i + 1), "b0", tenant_id=tenant)
pgs.append(env.postgres.create_start("b{}".format(i + 1), tenant_id=tenant))
endpoints.append(env.endpoints.create_start("b{}".format(i + 1), tenant_id=tenant))
threads.append(threading.Thread(target=run_pgbench, args=(pgs[-1],), daemon=True))
threads.append(
threading.Thread(target=run_pgbench, args=(endpoints[-1].connstr(),), daemon=True)
)
threads[-1].start()
for thread in threads:
thread.join()
for pg in pgs:
res = pg.safe_psql("SELECT count(*) from pgbench_accounts")
for ep in endpoints:
res = ep.safe_psql("SELECT count(*) from pgbench_accounts")
assert res[0] == (100000 * scale,)
@@ -110,11 +112,11 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi
env = neon_simple_env
env.neon_cli.create_branch("b0")
pg0 = env.postgres.create_start("b0")
endpoint0 = env.endpoints.create_start("b0")
pg_bin.run_capture(["pgbench", "-i", pg0.connstr()])
pg_bin.run_capture(["pgbench", "-i", endpoint0.connstr()])
with pg0.cursor() as cur:
with endpoint0.cursor() as cur:
curr_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
# Specify the `start_lsn` as a number that is divided by `XLOG_BLCKSZ`
@@ -123,6 +125,6 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi
log.info(f"Branching b1 from b0 starting at lsn {start_lsn}...")
env.neon_cli.create_branch("b1", "b0", ancestor_start_lsn=start_lsn)
pg1 = env.postgres.create_start("b1")
endpoint1 = env.endpoints.create_start("b1")
pg_bin.run_capture(["pgbench", "-i", pg1.connstr()])
pg_bin.run_capture(["pgbench", "-i", endpoint1.connstr()])

View File

@@ -4,7 +4,7 @@ from typing import List, Tuple
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder
from fixtures.types import TenantId, TimelineId
@@ -24,17 +24,17 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
]
)
tenant_timelines: List[Tuple[TenantId, TimelineId, Postgres]] = []
tenant_timelines: List[Tuple[TenantId, TimelineId, Endpoint]] = []
for n in range(4):
tenant_id, timeline_id = env.neon_cli.create_tenant()
pg = env.postgres.create_start("main", tenant_id=tenant_id)
with pg.cursor() as cur:
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
with endpoint.cursor() as cur:
cur.execute("CREATE TABLE t(key int primary key, value text)")
cur.execute("INSERT INTO t SELECT generate_series(1,100), 'payload'")
pg.stop()
tenant_timelines.append((tenant_id, timeline_id, pg))
endpoint.stop()
tenant_timelines.append((tenant_id, timeline_id, endpoint))
# Stop the pageserver
env.pageserver.stop()

View File

@@ -24,14 +24,14 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
"autovacuum_freeze_max_age=100000",
]
pg = env.postgres.create_start("test_clog_truncate", config_lines=config)
endpoint = env.endpoints.create_start("test_clog_truncate", config_lines=config)
log.info("postgres is running on test_clog_truncate branch")
# Install extension containing function needed for test
pg.safe_psql("CREATE EXTENSION neon_test_utils")
endpoint.safe_psql("CREATE EXTENSION neon_test_utils")
# Consume many xids to advance clog
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("select test_consume_xids(1000*1000*10);")
log.info("xids consumed")
@@ -44,7 +44,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
# wait for autovacuum to truncate the pg_xact
# XXX Is it worth to add a timeout here?
pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), "0000")
pg_xact_0000_path = os.path.join(endpoint.pg_xact_dir_path(), "0000")
log.info(f"pg_xact_0000_path = {pg_xact_0000_path}")
while os.path.isfile(pg_xact_0000_path):
@@ -52,7 +52,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
time.sleep(5)
# checkpoint to advance latest lsn
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("CHECKPOINT;")
lsn_after_truncation = query_scalar(cur, "select pg_current_wal_insert_lsn()")
@@ -61,10 +61,10 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
env.neon_cli.create_branch(
"test_clog_truncate_new", "test_clog_truncate", ancestor_start_lsn=lsn_after_truncation
)
pg2 = env.postgres.create_start("test_clog_truncate_new")
endpoint2 = env.endpoints.create_start("test_clog_truncate_new")
log.info("postgres is running on test_clog_truncate_new branch")
# check that new node doesn't contain truncated segment
pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), "0000")
pg_xact_0000_path_new = os.path.join(endpoint2.pg_xact_dir_path(), "0000")
log.info(f"pg_xact_0000_path_new = {pg_xact_0000_path_new}")
assert os.path.isfile(pg_xact_0000_path_new) is False

View File

@@ -24,8 +24,8 @@ def test_lsof_pageserver_pid(neon_simple_env: NeonEnv):
def start_workload():
env.neon_cli.create_branch("test_lsof_pageserver_pid")
pg = env.postgres.create_start("test_lsof_pageserver_pid")
with closing(pg.connect()) as conn:
endpoint = env.endpoints.create_start("test_lsof_pageserver_pid")
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TABLE foo as SELECT x FROM generate_series(1,100000) x")
cur.execute("update foo set x=x+1")

View File

@@ -1,3 +1,4 @@
import copy
import os
import shutil
import subprocess
@@ -55,29 +56,31 @@ def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_o
neon_env_builder.preserve_database_files = True
env = neon_env_builder.init_start()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
# FIXME: Is this expected?
env.pageserver.allowed_errors.append(
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
)
pg_bin.run(["pgbench", "--initialize", "--scale=10", pg.connstr()])
pg_bin.run(["pgbench", "--time=60", "--progress=2", pg.connstr()])
pg_bin.run(["pg_dumpall", f"--dbname={pg.connstr()}", f"--file={test_output_dir / 'dump.sql'}"])
pg_bin.run(["pgbench", "--initialize", "--scale=10", endpoint.connstr()])
pg_bin.run(["pgbench", "--time=60", "--progress=2", endpoint.connstr()])
pg_bin.run(
["pg_dumpall", f"--dbname={endpoint.connstr()}", f"--file={test_output_dir / 'dump.sql'}"]
)
snapshot_config = toml.load(test_output_dir / "repo" / "config")
tenant_id = snapshot_config["default_tenant_id"]
timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id]
pageserver_http = env.pageserver.http_client()
lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, lsn)
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
wait_for_upload(pageserver_http, tenant_id, timeline_id, lsn)
env.postgres.stop_all()
env.endpoints.stop_all()
for sk in env.safekeepers:
sk.stop()
env.pageserver.stop()
@@ -98,6 +101,9 @@ def test_backward_compatibility(
pg_version: str,
request: FixtureRequest,
):
"""
Test that the new binaries can read old data
"""
compatibility_snapshot_dir_env = os.environ.get("COMPATIBILITY_SNAPSHOT_DIR")
assert (
compatibility_snapshot_dir_env is not None
@@ -120,6 +126,7 @@ def test_backward_compatibility(
check_neon_works(
test_output_dir / "compatibility_snapshot" / "repo",
neon_binpath,
neon_binpath,
pg_distrib_dir,
pg_version,
port_distributor,
@@ -148,7 +155,11 @@ def test_forward_compatibility(
port_distributor: PortDistributor,
pg_version: str,
request: FixtureRequest,
neon_binpath: Path,
):
"""
Test that the old binaries can read new data
"""
compatibility_neon_bin_env = os.environ.get("COMPATIBILITY_NEON_BIN")
assert compatibility_neon_bin_env is not None, (
"COMPATIBILITY_NEON_BIN is not set. It should be set to a path with Neon binaries "
@@ -183,6 +194,7 @@ def test_forward_compatibility(
check_neon_works(
test_output_dir / "compatibility_snapshot" / "repo",
compatibility_neon_bin,
neon_binpath,
compatibility_postgres_distrib_dir,
pg_version,
port_distributor,
@@ -223,9 +235,13 @@ def prepare_snapshot(
for logfile in repo_dir.glob("**/*.log"):
logfile.unlink()
# Remove tenants data for compute
for tenant in (repo_dir / "pgdatadirs" / "tenants").glob("*"):
shutil.rmtree(tenant)
# Remove old computes in 'endpoints'. Old versions of the control plane used a directory
# called "pgdatadirs". Delete it, too.
if (repo_dir / "endpoints").exists():
shutil.rmtree(repo_dir / "endpoints")
if (repo_dir / "pgdatadirs").exists():
shutil.rmtree(repo_dir / "pgdatadirs")
os.mkdir(repo_dir / "endpoints")
# Remove wal-redo temp directory if it exists. Newer pageserver versions don't create
# them anymore, but old versions did.
@@ -326,7 +342,8 @@ def get_neon_version(neon_binpath: Path):
def check_neon_works(
repo_dir: Path,
neon_binpath: Path,
neon_target_binpath: Path,
neon_current_binpath: Path,
pg_distrib_dir: Path,
pg_version: str,
port_distributor: PortDistributor,
@@ -336,7 +353,7 @@ def check_neon_works(
):
snapshot_config_toml = repo_dir / "config"
snapshot_config = toml.load(snapshot_config_toml)
snapshot_config["neon_distrib_dir"] = str(neon_binpath)
snapshot_config["neon_distrib_dir"] = str(neon_target_binpath)
snapshot_config["postgres_distrib_dir"] = str(pg_distrib_dir)
with (snapshot_config_toml).open("w") as f:
toml.dump(snapshot_config, f)
@@ -347,17 +364,25 @@ def check_neon_works(
config.repo_dir = repo_dir
config.pg_version = pg_version
config.initial_tenant = snapshot_config["default_tenant_id"]
config.neon_binpath = neon_binpath
config.pg_distrib_dir = pg_distrib_dir
config.preserve_database_files = True
cli = NeonCli(config)
cli.raw_cli(["start"])
request.addfinalizer(lambda: cli.raw_cli(["stop"]))
# Use the "target" binaries to launch the storage nodes
config_target = config
config_target.neon_binpath = neon_target_binpath
cli_target = NeonCli(config_target)
# And the current binaries to launch computes
config_current = copy.copy(config)
config_current.neon_binpath = neon_current_binpath
cli_current = NeonCli(config_current)
cli_target.raw_cli(["start"])
request.addfinalizer(lambda: cli_target.raw_cli(["stop"]))
pg_port = port_distributor.get_port()
cli.pg_start("main", port=pg_port)
request.addfinalizer(lambda: cli.pg_stop("main"))
cli_current.endpoint_start("main", port=pg_port)
request.addfinalizer(lambda: cli_current.endpoint_stop("main"))
connstr = f"host=127.0.0.1 port={pg_port} user=cloud_admin dbname=postgres"
pg_bin.run(["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump.sql'}"])

View File

@@ -13,10 +13,10 @@ def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
ctl = ComputeCtl(env)
env.neon_cli.create_branch("test_compute_ctl", "main")
pg = env.postgres.create_start("test_compute_ctl")
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
endpoint = env.endpoints.create_start("test_compute_ctl")
endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)")
with open(pg.config_file_path(), "r") as f:
with open(endpoint.config_file_path(), "r") as f:
cfg_lines = f.readlines()
cfg_map = {}
for line in cfg_lines:
@@ -24,10 +24,13 @@ def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
k, v = line.split("=")
cfg_map[k] = v.strip("\n '\"")
log.info(f"postgres config: {cfg_map}")
pgdata = pg.pg_data_dir_path()
pgdata = endpoint.pg_data_dir_path()
pg_bin_path = os.path.join(pg_bin.pg_bin_path, "postgres")
pg.stop_and_destroy()
endpoint.stop_and_destroy()
# stop_and_destroy removes the whole endpoint directory. Recreate it.
Path(pgdata).mkdir(parents=True)
spec = (
"""

View File

@@ -12,10 +12,10 @@ def test_config(neon_simple_env: NeonEnv):
env.neon_cli.create_branch("test_config", "empty")
# change config
pg = env.postgres.create_start("test_config", config_lines=["log_min_messages=debug1"])
endpoint = env.endpoints.create_start("test_config", config_lines=["log_min_messages=debug1"])
log.info("postgres is running on test_config branch")
with closing(pg.connect()) as conn:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute(
"""

View File

@@ -21,11 +21,11 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_crafted_wal_end")
pg = env.postgres.create("test_crafted_wal_end")
endpoint = env.endpoints.create("test_crafted_wal_end")
wal_craft = WalCraft(env)
pg.config(wal_craft.postgres_config())
pg.start()
res = pg.safe_psql_many(
endpoint.config(wal_craft.postgres_config())
endpoint.start()
res = endpoint.safe_psql_many(
queries=[
"CREATE TABLE keys(key int primary key)",
"INSERT INTO keys SELECT generate_series(1, 100)",
@@ -34,7 +34,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
)
assert res[-1][0] == (5050,)
wal_craft.in_existing(wal_type, pg.connstr())
wal_craft.in_existing(wal_type, endpoint.connstr())
log.info("Restarting all safekeepers and pageservers")
env.pageserver.stop()
@@ -43,7 +43,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
env.pageserver.start()
log.info("Trying more queries")
res = pg.safe_psql_many(
res = endpoint.safe_psql_many(
queries=[
"SELECT SUM(key) FROM keys",
"INSERT INTO keys SELECT generate_series(101, 200)",
@@ -60,7 +60,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
env.pageserver.start()
log.info("Trying more queries (again)")
res = pg.safe_psql_many(
res = endpoint.safe_psql_many(
queries=[
"SELECT SUM(key) FROM keys",
"INSERT INTO keys SELECT generate_series(201, 300)",

View File

@@ -13,10 +13,10 @@ def test_createdb(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_createdb", "empty")
pg = env.postgres.create_start("test_createdb")
endpoint = env.endpoints.create_start("test_createdb")
log.info("postgres is running on 'test_createdb' branch")
with pg.cursor() as cur:
with endpoint.cursor() as cur:
# Cause a 'relmapper' change in the original branch
cur.execute("VACUUM FULL pg_class")
@@ -26,10 +26,10 @@ def test_createdb(neon_simple_env: NeonEnv):
# Create a branch
env.neon_cli.create_branch("test_createdb2", "test_createdb", ancestor_start_lsn=lsn)
pg2 = env.postgres.create_start("test_createdb2")
endpoint2 = env.endpoints.create_start("test_createdb2")
# Test that you can connect to the new database on both branches
for db in (pg, pg2):
for db in (endpoint, endpoint2):
with db.cursor(dbname="foodb") as cur:
# Check database size in both branches
cur.execute(
@@ -55,17 +55,17 @@ def test_createdb(neon_simple_env: NeonEnv):
def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
env.neon_cli.create_branch("test_dropdb", "empty")
pg = env.postgres.create_start("test_dropdb")
endpoint = env.endpoints.create_start("test_dropdb")
log.info("postgres is running on 'test_dropdb' branch")
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("CREATE DATABASE foodb")
lsn_before_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
dboid = query_scalar(cur, "SELECT oid FROM pg_database WHERE datname='foodb';")
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("DROP DATABASE foodb")
cur.execute("CHECKPOINT")
@@ -76,29 +76,29 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
env.neon_cli.create_branch(
"test_before_dropdb", "test_dropdb", ancestor_start_lsn=lsn_before_drop
)
pg_before = env.postgres.create_start("test_before_dropdb")
endpoint_before = env.endpoints.create_start("test_before_dropdb")
env.neon_cli.create_branch(
"test_after_dropdb", "test_dropdb", ancestor_start_lsn=lsn_after_drop
)
pg_after = env.postgres.create_start("test_after_dropdb")
endpoint_after = env.endpoints.create_start("test_after_dropdb")
# Test that database exists on the branch before drop
pg_before.connect(dbname="foodb").close()
endpoint_before.connect(dbname="foodb").close()
# Test that database subdir exists on the branch before drop
assert pg_before.pgdata_dir
dbpath = pathlib.Path(pg_before.pgdata_dir) / "base" / str(dboid)
assert endpoint_before.pgdata_dir
dbpath = pathlib.Path(endpoint_before.pgdata_dir) / "base" / str(dboid)
log.info(dbpath)
assert os.path.isdir(dbpath) is True
# Test that database subdir doesn't exist on the branch after drop
assert pg_after.pgdata_dir
dbpath = pathlib.Path(pg_after.pgdata_dir) / "base" / str(dboid)
assert endpoint_after.pgdata_dir
dbpath = pathlib.Path(endpoint_after.pgdata_dir) / "base" / str(dboid)
log.info(dbpath)
assert os.path.isdir(dbpath) is False
# Check that we restore the content of the datadir correctly
check_restored_datadir_content(test_output_dir, env, pg)
check_restored_datadir_content(test_output_dir, env, endpoint)

View File

@@ -9,10 +9,10 @@ from fixtures.utils import query_scalar
def test_createuser(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_createuser", "empty")
pg = env.postgres.create_start("test_createuser")
endpoint = env.endpoints.create_start("test_createuser")
log.info("postgres is running on 'test_createuser' branch")
with pg.cursor() as cur:
with endpoint.cursor() as cur:
# Cause a 'relmapper' change in the original branch
cur.execute("CREATE USER testuser with password %s", ("testpwd",))
@@ -22,7 +22,7 @@ def test_createuser(neon_simple_env: NeonEnv):
# Create a branch
env.neon_cli.create_branch("test_createuser2", "test_createuser", ancestor_start_lsn=lsn)
pg2 = env.postgres.create_start("test_createuser2")
endpoint2 = env.endpoints.create_start("test_createuser2")
# Test that you can connect to new branch as a new user
assert pg2.safe_psql("select current_user", user="testuser") == [("testuser",)]
assert endpoint2.safe_psql("select current_user", user="testuser") == [("testuser",)]

View File

@@ -91,8 +91,8 @@ class EvictionEnv:
This assumes that the tenant is still at the state after pbench -i.
"""
lsn = self.pgbench_init_lsns[tenant_id]
with self.neon_env.postgres.create_start("main", tenant_id=tenant_id, lsn=lsn) as pg:
self.pg_bin.run(["pgbench", "-S", pg.connstr()])
with self.neon_env.endpoints.create_start("main", tenant_id=tenant_id, lsn=lsn) as endpoint:
self.pg_bin.run(["pgbench", "-S", endpoint.connstr()])
def pageserver_start_with_disk_usage_eviction(
self, period, max_usage_pct, min_avail_bytes, mock_behavior
@@ -168,9 +168,9 @@ def eviction_env(request, neon_env_builder: NeonEnvBuilder, pg_bin: PgBin) -> Ev
}
)
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
pg_bin.run(["pgbench", "-i", f"-s{scale}", pg.connstr()])
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
pg_bin.run(["pgbench", "-i", f"-s{scale}", endpoint.connstr()])
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
timelines.append((tenant_id, timeline_id))

View File

@@ -4,7 +4,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder
def test_fsm_truncate(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_fsm_truncate")
pg = env.postgres.create_start("test_fsm_truncate")
pg.safe_psql(
endpoint = env.endpoints.create_start("test_fsm_truncate")
endpoint.safe_psql(
"CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;"
)

View File

@@ -24,10 +24,10 @@ def test_fullbackup(
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_fullbackup")
pgmain = env.postgres.create_start("test_fullbackup")
endpoint_main = env.endpoints.create_start("test_fullbackup")
log.info("postgres is running on 'test_fullbackup' branch")
with pgmain.cursor() as cur:
with endpoint_main.cursor() as cur:
timeline = TimelineId(query_scalar(cur, "SHOW neon.timeline_id"))
# data loading may take a while, so increase statement timeout

View File

@@ -5,9 +5,9 @@ import random
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
NeonEnv,
NeonEnvBuilder,
Postgres,
RemoteStorageKind,
wait_for_last_flush_lsn,
)
@@ -26,9 +26,9 @@ updates_performed = 0
# Run random UPDATEs on test table
async def update_table(pg: Postgres):
async def update_table(endpoint: Endpoint):
global updates_performed
pg_conn = await pg.connect_async()
pg_conn = await endpoint.connect_async()
while updates_performed < updates_to_perform:
updates_performed += 1
@@ -52,10 +52,10 @@ async def gc(env: NeonEnv, timeline: TimelineId):
# At the same time, run UPDATEs and GC
async def update_and_gc(env: NeonEnv, pg: Postgres, timeline: TimelineId):
async def update_and_gc(env: NeonEnv, endpoint: Endpoint, timeline: TimelineId):
workers = []
for worker_id in range(num_connections):
workers.append(asyncio.create_task(update_table(pg)))
workers.append(asyncio.create_task(update_table(endpoint)))
workers.append(asyncio.create_task(gc(env, timeline)))
# await all workers
@@ -72,10 +72,10 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_gc_aggressive", "main")
pg = env.postgres.create_start("test_gc_aggressive")
endpoint = env.endpoints.create_start("test_gc_aggressive")
log.info("postgres is running on test_gc_aggressive branch")
with pg.cursor() as cur:
with endpoint.cursor() as cur:
timeline = TimelineId(query_scalar(cur, "SHOW neon.timeline_id"))
# Create table, and insert the first 100 rows
@@ -89,7 +89,7 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
)
cur.execute("CREATE INDEX ON foo(id)")
asyncio.run(update_and_gc(env, pg, timeline))
asyncio.run(update_and_gc(env, endpoint, timeline))
cur.execute("SELECT COUNT(*), SUM(counter) FROM foo")
r = cur.fetchone()
@@ -110,11 +110,11 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder, remote_storage_kind:
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_gc_index_upload", "main")
pg = env.postgres.create_start("test_gc_index_upload")
endpoint = env.endpoints.create_start("test_gc_index_upload")
pageserver_http = env.pageserver.http_client()
pg_conn = pg.connect()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
tenant_id = TenantId(query_scalar(cur, "SHOW neon.tenant_id"))
@@ -146,7 +146,7 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder, remote_storage_kind:
return int(total)
# Sanity check that the metric works
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
pageserver_http.timeline_gc(tenant_id, timeline_id, 10000)
before = get_num_remote_ops("index", "upload")

View File

@@ -31,8 +31,8 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
"image_creation_threshold": "2",
}
)
pg = env.postgres.create_start("main", tenant_id=tenant_id)
connstr = pg.connstr(options="-csynchronous_commit=off")
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
connstr = endpoint.connstr(options="-csynchronous_commit=off")
pg_bin.run_capture(["pgbench", "-i", "-s10", connstr])
pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))

View File

@@ -9,10 +9,10 @@ from pathlib import Path
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
NeonEnv,
NeonEnvBuilder,
PgBin,
Postgres,
)
from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
from fixtures.types import Lsn, TenantId, TimelineId
@@ -72,7 +72,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"]
end_lsn = manifest["WAL-Ranges"][0]["End-LSN"]
node_name = "import_from_vanilla"
endpoint_id = "ep-import_from_vanilla"
tenant = TenantId.generate()
timeline = TimelineId.generate()
@@ -113,7 +113,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
"--timeline-id",
str(timeline),
"--node-name",
node_name,
endpoint_id,
"--base-lsn",
start_lsn,
"--base-tarfile",
@@ -153,8 +153,8 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
wait_for_upload(client, tenant, timeline, Lsn(end_lsn))
# Check it worked
pg = env.postgres.create_start(node_name, tenant_id=tenant)
assert pg.safe_psql("select count(*) from t") == [(300000,)]
endpoint = env.endpoints.create_start(endpoint_id, tenant_id=tenant)
assert endpoint.safe_psql("select count(*) from t") == [(300000,)]
@pytest.mark.timeout(600)
@@ -168,10 +168,10 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu
)
timeline = env.neon_cli.create_branch("test_import_from_pageserver_small")
pg = env.postgres.create_start("test_import_from_pageserver_small")
endpoint = env.endpoints.create_start("test_import_from_pageserver_small")
num_rows = 3000
lsn = _generate_data(num_rows, pg)
lsn = _generate_data(num_rows, endpoint)
_import(num_rows, lsn, env, pg_bin, timeline, env.pg_distrib_dir)
@@ -185,14 +185,14 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne
env = neon_env_builder.init_start()
timeline = env.neon_cli.create_branch("test_import_from_pageserver_multisegment")
pg = env.postgres.create_start("test_import_from_pageserver_multisegment")
endpoint = env.endpoints.create_start("test_import_from_pageserver_multisegment")
# For `test_import_from_pageserver_multisegment`, we want to make sure that the data
# is large enough to create multi-segment files. Typically, a segment file's size is
# at most 1GB. A large number of inserted rows (`30000000`) is used to increase the
# DB size to above 1GB. Related: https://github.com/neondatabase/neon/issues/2097.
num_rows = 30000000
lsn = _generate_data(num_rows, pg)
lsn = _generate_data(num_rows, endpoint)
logical_size = env.pageserver.http_client().timeline_detail(env.initial_tenant, timeline)[
"current_logical_size"
@@ -213,12 +213,12 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne
assert cnt_seg_files > 0
def _generate_data(num_rows: int, pg: Postgres) -> Lsn:
def _generate_data(num_rows: int, endpoint: Endpoint) -> Lsn:
"""Generate a table with `num_rows` rows.
Returns:
the latest insert WAL's LSN"""
with closing(pg.connect()) as conn:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
# data loading may take a while, so increase statement timeout
cur.execute("SET statement_timeout='300s'")
@@ -263,7 +263,7 @@ def _import(
tar_output_file = result_basepath + ".stdout"
# Stop the first pageserver instance, erase all its data
env.postgres.stop_all()
env.endpoints.stop_all()
env.pageserver.stop()
dir_to_clear = Path(env.repo_dir) / "tenants"
@@ -278,7 +278,7 @@ def _import(
tenant = TenantId.generate()
# Import to pageserver
node_name = "import_from_pageserver"
endpoint_id = "ep-import_from_pageserver"
client = env.pageserver.http_client()
client.tenant_create(tenant)
env.neon_cli.raw_cli(
@@ -290,7 +290,7 @@ def _import(
"--timeline-id",
str(timeline),
"--node-name",
node_name,
endpoint_id,
"--base-lsn",
str(lsn),
"--base-tarfile",
@@ -305,8 +305,8 @@ def _import(
wait_for_upload(client, tenant, timeline, lsn)
# Check it worked
pg = env.postgres.create_start(node_name, tenant_id=tenant)
assert pg.safe_psql("select count(*) from tbl") == [(expected_num_rows,)]
endpoint = env.endpoints.create_start(endpoint_id, tenant_id=tenant)
assert endpoint.safe_psql("select count(*) from tbl") == [(expected_num_rows,)]
# Take another fullbackup
query = f"fullbackup { tenant} {timeline} {lsn}"

View File

@@ -15,9 +15,9 @@ from fixtures.neon_fixtures import NeonEnvBuilder
def test_large_schema(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
conn = pg.connect()
conn = endpoint.connect()
cur = conn.cursor()
tables = 2 # 10 is too much for debug build
@@ -27,18 +27,18 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder):
# Restart compute. Restart is actually not strictly needed.
# It is done mostly because this test originally tries to model the problem reported by Ketteq.
pg.stop()
endpoint.stop()
# Kill and restart the pageserver.
# env.pageserver.stop(immediate=True)
# env.pageserver.start()
pg.start()
endpoint.start()
retry_sleep = 0.5
max_retries = 200
retries = 0
while True:
try:
conn = pg.connect()
conn = endpoint.connect()
cur = conn.cursor()
cur.execute(f"CREATE TABLE if not exists t_{i}(pk integer) partition by range (pk)")
for j in range(1, partitions + 1):
@@ -63,7 +63,7 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder):
raise
break
conn = pg.connect()
conn = endpoint.connect()
cur = conn.cursor()
for i in range(1, tables + 1):
@@ -74,8 +74,8 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder):
cur.execute("select * from pg_depend order by refclassid, refobjid, refobjsubid")
# Check layer file sizes
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant_id, timeline_id)
for filename in os.listdir(timeline_path):
if filename.startswith("00000"):

View File

@@ -27,13 +27,13 @@ def test_basic_eviction(
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
# Create a number of layers in the tenant
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("CREATE TABLE foo (t text)")
cur.execute(
"""
@@ -172,15 +172,15 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder):
env.initial_tenant = tenant_id # update_and_gc relies on this
ps_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
log.info("fill with data, creating delta & image layers, some of which are GC'able after")
# no particular reason to create the layers like this, but we are sure
# not to hit the image_creation_threshold here.
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("create table a (id bigserial primary key, some_value bigint not null)")
cur.execute("insert into a(some_value) select i from generate_series(1, 10000) s(i)")
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
ps_http.timeline_checkpoint(tenant_id, timeline_id)
# Create delta layers, then turn them into image layers.
@@ -191,19 +191,19 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder):
for i in range(0, 2):
for j in range(0, 3):
# create a minimal amount of "delta difficulty" for this table
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("update a set some_value = -some_value + %s", (j,))
with pg.cursor() as cur:
with endpoint.cursor() as cur:
# vacuuming should aid to reuse keys, though it's not really important
# with image_creation_threshold=1 which we will use on the last compaction
cur.execute("vacuum")
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
if i == 1 and j == 2 and k == 1:
# last iteration; stop before checkpoint to avoid leaving an inmemory layer
pg.stop_and_destroy()
endpoint.stop_and_destroy()
ps_http.timeline_checkpoint(tenant_id, timeline_id)

View File

@@ -20,7 +20,7 @@ def test_image_layer_writer_fail_before_finish(neon_simple_env: NeonEnv):
}
)
pg = env.postgres.create_start("main", tenant_id=tenant_id)
pg = env.endpoints.create_start("main", tenant_id=tenant_id)
pg.safe_psql_many(
[
"CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)",
@@ -64,8 +64,8 @@ def test_delta_layer_writer_fail_before_finish(neon_simple_env: NeonEnv):
}
)
pg = env.postgres.create_start("main", tenant_id=tenant_id)
pg.safe_psql_many(
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)",
"""INSERT INTO foo

View File

@@ -12,10 +12,10 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping")
pgmain = env.postgres.create_start("test_lsn_mapping")
endpoint_main = env.endpoints.create_start("test_lsn_mapping")
log.info("postgres is running on 'test_lsn_mapping' branch")
cur = pgmain.connect().cursor()
cur = endpoint_main.connect().cursor()
# Create table, and insert rows, each in a separate transaction
# Disable synchronous_commit to make this initialization go faster.
#
@@ -35,7 +35,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
cur.execute("INSERT INTO foo VALUES (-1)")
# Wait until WAL is received by pageserver
wait_for_last_flush_lsn(env, pgmain, env.initial_tenant, new_timeline_id)
wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id)
with env.pageserver.http_client() as client:
# Check edge cases: timestamp in the future
@@ -61,9 +61,9 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
# Call get_lsn_by_timestamp to get the LSN
# Launch a new read-only node at that LSN, and check that only the rows
# that were supposed to be committed at that point in time are visible.
pg_here = env.postgres.create_start(
branch_name="test_lsn_mapping", node_name="test_lsn_mapping_read", lsn=lsn
endpoint_here = env.endpoints.create_start(
branch_name="test_lsn_mapping", endpoint_id="ep-lsn_mapping_read", lsn=lsn
)
assert pg_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i
assert endpoint_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i
pg_here.stop_and_destroy()
endpoint_here.stop_and_destroy()

View File

@@ -123,9 +123,9 @@ def test_metric_collection(
# before pageserver, pageserver log might contain such errors in the end.
env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*")
env.neon_cli.create_branch("test_metric_collection")
pg = env.postgres.create_start("test_metric_collection")
endpoint = env.endpoints.create_start("test_metric_collection")
pg_conn = pg.connect()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
tenant_id = TenantId(query_scalar(cur, "SHOW neon.tenant_id"))
@@ -158,7 +158,7 @@ def test_metric_collection(
# upload some data to remote storage
if remote_storage_kind == RemoteStorageKind.LOCAL_FS:
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
pageserver_http = env.pageserver.http_client()
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
pageserver_http.timeline_gc(tenant_id, timeline_id, 10000)

View File

@@ -12,10 +12,10 @@ from fixtures.utils import query_scalar
def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
env.neon_cli.create_branch("test_multixact", "empty")
pg = env.postgres.create_start("test_multixact")
endpoint = env.endpoints.create_start("test_multixact")
log.info("postgres is running on 'test_multixact' branch")
cur = pg.connect().cursor()
cur = endpoint.connect().cursor()
cur.execute(
"""
CREATE TABLE t1(i int primary key);
@@ -32,7 +32,7 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
connections = []
for i in range(nclients):
# Do not turn on autocommit. We want to hold the key-share locks.
conn = pg.connect(autocommit=False)
conn = endpoint.connect(autocommit=False)
connections.append(conn)
# On each iteration, we commit the previous transaction on a connection,
@@ -65,10 +65,10 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
# Branch at this point
env.neon_cli.create_branch("test_multixact_new", "test_multixact", ancestor_start_lsn=lsn)
pg_new = env.postgres.create_start("test_multixact_new")
endpoint_new = env.endpoints.create_start("test_multixact_new")
log.info("postgres is running on 'test_multixact_new' branch")
next_multixact_id_new = pg_new.safe_psql(
next_multixact_id_new = endpoint_new.safe_psql(
"SELECT next_multixact_id FROM pg_control_checkpoint()"
)[0][0]
@@ -76,4 +76,4 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
assert next_multixact_id_new == next_multixact_id
# Check that we can restore the content of the datadir correctly
check_restored_datadir_content(test_output_dir, env, pg)
check_restored_datadir_content(test_output_dir, env, endpoint)

View File

@@ -9,9 +9,11 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por
try:
env.neon_cli.start()
env.neon_cli.create_tenant(tenant_id=env.initial_tenant, set_default=True)
env.neon_cli.pg_start(node_name="main", port=port_distributor.get_port())
env.neon_cli.endpoint_start(endpoint_id="ep-main", port=port_distributor.get_port())
env.neon_cli.create_branch(new_branch_name="migration_check")
env.neon_cli.pg_start(node_name="migration_check", port=port_distributor.get_port())
env.neon_cli.endpoint_start(
endpoint_id="ep-migration_check", port=port_distributor.get_port()
)
finally:
env.neon_cli.stop()

View File

@@ -8,9 +8,9 @@ from fixtures.neon_fixtures import NeonEnvBuilder
def test_next_xid(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
conn = pg.connect()
conn = endpoint.connect()
cur = conn.cursor()
cur.execute("CREATE TABLE t(x integer)")
@@ -19,17 +19,17 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder):
print(f"iteration {i} / {iterations}")
# Kill and restart the pageserver.
pg.stop()
endpoint.stop()
env.pageserver.stop(immediate=True)
env.pageserver.start()
pg.start()
endpoint.start()
retry_sleep = 0.5
max_retries = 200
retries = 0
while True:
try:
conn = pg.connect()
conn = endpoint.connect()
cur = conn.cursor()
cur.execute(f"INSERT INTO t values({i})")
conn.close()
@@ -48,7 +48,7 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder):
raise
break
conn = pg.connect()
conn = endpoint.connect()
cur = conn.cursor()
cur.execute("SELECT count(*) FROM t")
assert cur.fetchone() == (iterations,)

View File

@@ -6,9 +6,9 @@ from fixtures.pageserver.http import PageserverHttpClient
def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
tenant_id, timeline_id = env.neon_cli.create_tenant()
pg = env.postgres.create_start("main", tenant_id=tenant_id)
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
# we rely upon autocommit after each statement
res_1 = pg.safe_psql_many(
res_1 = endpoint.safe_psql_many(
queries=[
"CREATE TABLE t(key int primary key, value text)",
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
@@ -19,14 +19,14 @@ def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
assert res_1[-1][0] == (5000050000,)
# TODO check detach on live instance
log.info("stopping compute")
pg.stop()
endpoint.stop()
log.info("compute stopped")
pg.start()
res_2 = pg.safe_psql("SELECT sum(key) FROM t")
endpoint.start()
res_2 = endpoint.safe_psql("SELECT sum(key) FROM t")
assert res_2[0] == (5000050000,)
pg.stop()
endpoint.stop()
pageserver_http.tenant_detach(tenant_id)

View File

@@ -19,10 +19,10 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_old_request_lsn", "main")
pg = env.postgres.create_start("test_old_request_lsn")
endpoint = env.endpoints.create_start("test_old_request_lsn")
log.info("postgres is running on test_old_request_lsn branch")
pg_conn = pg.connect()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
# Get the timeline ID of our branch. We need it for the 'do_gc' command

View File

@@ -73,17 +73,17 @@ def test_ondemand_download_large_rel(
)
env.initial_tenant = tenant
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
client = env.pageserver.http_client()
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
# We want to make sure that the data is large enough that the keyspace is partitioned.
num_rows = 1000000
with pg.cursor() as cur:
with endpoint.cursor() as cur:
# data loading may take a while, so increase statement timeout
cur.execute("SET statement_timeout='300s'")
cur.execute(
@@ -106,7 +106,7 @@ def test_ondemand_download_large_rel(
log.info("uploads have finished")
##### Stop the first pageserver instance, erase all its data
pg.stop()
endpoint.stop()
env.pageserver.stop()
# remove all the layer files
@@ -117,7 +117,7 @@ def test_ondemand_download_large_rel(
##### Second start, restore the data and ensure it's the same
env.pageserver.start()
pg.start()
endpoint.start()
before_downloads = get_num_downloaded_layers(client, tenant_id, timeline_id)
# Probe in the middle of the table. There's a high chance that the beginning
@@ -125,7 +125,7 @@ def test_ondemand_download_large_rel(
# from other tables, and with the entry that stores the size of the
# relation, so they are likely already downloaded. But the middle of the
# table should not have been needed by anything yet.
with pg.cursor() as cur:
with endpoint.cursor() as cur:
assert query_scalar(cur, "select count(*) from tbl where id = 500000") == 1
after_downloads = get_num_downloaded_layers(client, tenant_id, timeline_id)
@@ -167,17 +167,17 @@ def test_ondemand_download_timetravel(
)
env.initial_tenant = tenant
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
client = env.pageserver.http_client()
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
lsns = []
table_len = 10000
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute(
f"""
CREATE TABLE testtab(id serial primary key, checkpoint_number int, data text);
@@ -192,7 +192,7 @@ def test_ondemand_download_timetravel(
lsns.append((0, current_lsn))
for checkpoint_number in range(1, 20):
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute(f"UPDATE testtab SET checkpoint_number = {checkpoint_number}")
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
lsns.append((checkpoint_number, current_lsn))
@@ -204,7 +204,7 @@ def test_ondemand_download_timetravel(
client.timeline_checkpoint(tenant_id, timeline_id)
##### Stop the first pageserver instance, erase all its data
env.postgres.stop_all()
env.endpoints.stop_all()
# wait until pageserver has successfully uploaded all the data to remote storage
wait_for_sk_commit_lsn_to_reach_remote_storage(
@@ -251,10 +251,10 @@ def test_ondemand_download_timetravel(
num_layers_downloaded = [0]
resident_size = [get_resident_physical_size()]
for checkpoint_number, lsn in lsns:
pg_old = env.postgres.create_start(
branch_name="main", node_name=f"test_old_lsn_{checkpoint_number}", lsn=lsn
endpoint_old = env.endpoints.create_start(
branch_name="main", endpoint_id=f"ep-old_lsn_{checkpoint_number}", lsn=lsn
)
with pg_old.cursor() as cur:
with endpoint_old.cursor() as cur:
# assert query_scalar(cur, f"select count(*) from testtab where checkpoint_number={checkpoint_number}") == 100000
assert (
query_scalar(
@@ -331,15 +331,15 @@ def test_download_remote_layers_api(
)
env.initial_tenant = tenant
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
client = env.pageserver.http_client()
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
table_len = 10000
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute(
f"""
CREATE TABLE testtab(id serial primary key, checkpoint_number int, data text);
@@ -347,7 +347,7 @@ def test_download_remote_layers_api(
"""
)
env.postgres.stop_all()
env.endpoints.stop_all()
wait_for_sk_commit_lsn_to_reach_remote_storage(
tenant_id, timeline_id, env.safekeepers, env.pageserver
@@ -463,8 +463,8 @@ def test_download_remote_layers_api(
sk.start()
# ensure that all the data is back
pg_old = env.postgres.create_start(branch_name="main")
with pg_old.cursor() as cur:
endpoint_old = env.endpoints.create_start(branch_name="main")
with endpoint_old.cursor() as cur:
assert query_scalar(cur, "select count(*) from testtab") == table_len
@@ -513,17 +513,17 @@ def test_compaction_downloads_on_demand_without_image_creation(
env.initial_tenant = tenant_id
pageserver_http = env.pageserver.http_client()
with env.postgres.create_start("main") as pg:
with env.endpoints.create_start("main") as endpoint:
# no particular reason to create the layers like this, but we are sure
# not to hit the image_creation_threshold here.
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("create table a as select id::bigint from generate_series(1, 204800) s(id)")
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("update a set id = -id")
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
@@ -589,32 +589,32 @@ def test_compaction_downloads_on_demand_with_image_creation(
env.initial_tenant = tenant_id
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
# no particular reason to create the layers like this, but we are sure
# not to hit the image_creation_threshold here.
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("create table a (id bigserial primary key, some_value bigint not null)")
cur.execute("insert into a(some_value) select i from generate_series(1, 10000) s(i)")
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
for i in range(0, 2):
for j in range(0, 3):
# create a minimal amount of "delta difficulty" for this table
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("update a set some_value = -some_value + %s", (j,))
with pg.cursor() as cur:
with endpoint.cursor() as cur:
# vacuuming should aid to reuse keys, though it's not really important
# with image_creation_threshold=1 which we will use on the last compaction
cur.execute("vacuum")
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
if i == 1 and j == 2:
# last iteration; stop before checkpoint to avoid leaving an inmemory layer
pg.stop_and_destroy()
endpoint.stop_and_destroy()
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)

View File

@@ -150,7 +150,7 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
env = neon_simple_env
with env.pageserver.http_client() as client:
tenant_id, timeline_id = env.neon_cli.create_tenant()
pg = env.postgres.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)
endpoint = env.endpoints.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)
# Wait to make sure that we get a latest WAL receiver data.
# We need to wait here because it's possible that we don't have access to
@@ -163,7 +163,7 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
)
# Make a DB modification then expect getting a new WAL receiver's data.
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)")
wait_until(
number_of_iterations=5,
interval=1,

View File

@@ -11,11 +11,11 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder)
env.neon_cli.create_branch("test_pageserver_catchup_while_compute_down")
# Make shared_buffers large to ensure we won't query pageserver while it is down.
pg = env.postgres.create_start(
endpoint = env.endpoints.create_start(
"test_pageserver_catchup_while_compute_down", config_lines=["shared_buffers=512MB"]
)
pg_conn = pg.connect()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
# Create table, and insert some rows.
@@ -59,10 +59,10 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder)
env.safekeepers[2].start()
# restart compute node
pg.stop_and_destroy().create_start("test_pageserver_catchup_while_compute_down")
endpoint.stop_and_destroy().create_start("test_pageserver_catchup_while_compute_down")
# Ensure that basebackup went correct and pageserver returned all data
pg_conn = pg.connect()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
cur.execute("SELECT count(*) FROM foo")

View File

@@ -11,9 +11,9 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_pageserver_restart")
pg = env.postgres.create_start("test_pageserver_restart")
endpoint = env.endpoints.create_start("test_pageserver_restart")
pg_conn = pg.connect()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
# Create table, and insert some rows. Make it big enough that it doesn't fit in
@@ -84,13 +84,13 @@ def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder):
}
)
env.neon_cli.create_timeline("test_pageserver_chaos", tenant_id=tenant)
pg = env.postgres.create_start("test_pageserver_chaos", tenant_id=tenant)
endpoint = env.endpoints.create_start("test_pageserver_chaos", tenant_id=tenant)
# Create table, and insert some rows. Make it big enough that it doesn't fit in
# shared_buffers, otherwise the SELECT after restart will just return answer
# from shared_buffers without hitting the page server, which defeats the point
# of this test.
with closing(pg.connect()) as conn:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TABLE foo (id int, t text, updates int)")
cur.execute("CREATE INDEX ON foo (id)")
@@ -116,12 +116,12 @@ def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder):
# Update the whole table, then immediately kill and restart the pageserver
for i in range(1, 15):
pg.safe_psql("UPDATE foo set updates = updates + 1")
endpoint.safe_psql("UPDATE foo set updates = updates + 1")
# This kills the pageserver immediately, to simulate a crash
env.pageserver.stop(immediate=True)
env.pageserver.start()
# Check that all the updates are visible
num_updates = pg.safe_psql("SELECT sum(updates) FROM foo")[0][0]
num_updates = endpoint.safe_psql("SELECT sum(updates) FROM foo")[0][0]
assert num_updates == i * 100000

View File

@@ -5,7 +5,7 @@ import threading
import time
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres
from fixtures.neon_fixtures import NeonEnv, PgBin
# Test restarting page server, while safekeeper and compute node keep
@@ -13,7 +13,7 @@ from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres
def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgBin):
env = neon_simple_env
env.neon_cli.create_branch("test_pageserver_restarts")
pg = env.postgres.create_start("test_pageserver_restarts")
endpoint = env.endpoints.create_start("test_pageserver_restarts")
n_restarts = 10
scale = 10
@@ -23,13 +23,12 @@ def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgB
r".*Gc failed, retrying in \S+: Cannot run GC iteration on inactive tenant"
)
def run_pgbench(pg: Postgres):
connstr = pg.connstr()
def run_pgbench(connstr: str):
log.info(f"Start a pgbench workload on pg {connstr}")
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
pg_bin.run_capture(["pgbench", f"-T{n_restarts}", connstr])
thread = threading.Thread(target=run_pgbench, args=(pg,), daemon=True)
thread = threading.Thread(target=run_pgbench, args=(endpoint.connstr(),), daemon=True)
thread.start()
for i in range(n_restarts):

View File

@@ -2,7 +2,7 @@ import asyncio
from io import BytesIO
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, Postgres
from fixtures.neon_fixtures import Endpoint, NeonEnv
async def repeat_bytes(buf, repetitions: int):
@@ -10,7 +10,7 @@ async def repeat_bytes(buf, repetitions: int):
yield buf
async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str):
async def copy_test_data_to_table(endpoint: Endpoint, worker_id: int, table_name: str):
buf = BytesIO()
for i in range(1000):
buf.write(
@@ -20,7 +20,7 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str)
copy_input = repeat_bytes(buf.read(), 5000)
pg_conn = await pg.connect_async()
pg_conn = await endpoint.connect_async()
# PgProtocol.connect_async sets statement_timeout to 2 minutes.
# That's not enough for this test, on a slow system in debug mode.
@@ -29,10 +29,10 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str)
await pg_conn.copy_to_table(table_name, source=copy_input)
async def parallel_load_same_table(pg: Postgres, n_parallel: int):
async def parallel_load_same_table(endpoint: Endpoint, n_parallel: int):
workers = []
for worker_id in range(n_parallel):
worker = copy_test_data_to_table(pg, worker_id, "copytest")
worker = copy_test_data_to_table(endpoint, worker_id, "copytest")
workers.append(asyncio.create_task(worker))
# await all workers
@@ -43,13 +43,13 @@ async def parallel_load_same_table(pg: Postgres, n_parallel: int):
def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5):
env = neon_simple_env
env.neon_cli.create_branch("test_parallel_copy", "empty")
pg = env.postgres.create_start("test_parallel_copy")
endpoint = env.endpoints.create_start("test_parallel_copy")
log.info("postgres is running on 'test_parallel_copy' branch")
# Create test table
conn = pg.connect()
conn = endpoint.connect()
cur = conn.cursor()
cur.execute("CREATE TABLE copytest (i int, t text)")
# Run COPY TO to load the table with parallel connections.
asyncio.run(parallel_load_same_table(pg, n_parallel))
asyncio.run(parallel_load_same_table(endpoint, n_parallel))

View File

@@ -24,8 +24,8 @@ def test_pg_regress(
env.neon_cli.create_branch("test_pg_regress", "empty")
# Connect to postgres and create a database called "regression".
pg = env.postgres.create_start("test_pg_regress")
pg.safe_psql("CREATE DATABASE regression")
endpoint = env.endpoints.create_start("test_pg_regress")
endpoint.safe_psql("CREATE DATABASE regression")
# Create some local directories for pg_regress to run in.
runpath = test_output_dir / "regress"
@@ -49,9 +49,9 @@ def test_pg_regress(
]
env_vars = {
"PGPORT": str(pg.default_options["port"]),
"PGUSER": pg.default_options["user"],
"PGHOST": pg.default_options["host"],
"PGPORT": str(endpoint.default_options["port"]),
"PGUSER": endpoint.default_options["user"],
"PGHOST": endpoint.default_options["host"],
}
# Run the command.
@@ -61,10 +61,10 @@ def test_pg_regress(
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)
# checkpoint one more time to ensure that the lsn we get is the latest one
pg.safe_psql("CHECKPOINT")
endpoint.safe_psql("CHECKPOINT")
# Check that we restore the content of the datadir correctly
check_restored_datadir_content(test_output_dir, env, pg)
check_restored_datadir_content(test_output_dir, env, endpoint)
# Run the PostgreSQL "isolation" tests, in src/test/isolation.
@@ -85,8 +85,10 @@ def test_isolation(
env.neon_cli.create_branch("test_isolation", "empty")
# Connect to postgres and create a database called "regression".
# isolation tests use prepared transactions, so enable them
pg = env.postgres.create_start("test_isolation", config_lines=["max_prepared_transactions=100"])
pg.safe_psql("CREATE DATABASE isolation_regression")
endpoint = env.endpoints.create_start(
"test_isolation", config_lines=["max_prepared_transactions=100"]
)
endpoint.safe_psql("CREATE DATABASE isolation_regression")
# Create some local directories for pg_isolation_regress to run in.
runpath = test_output_dir / "regress"
@@ -109,9 +111,9 @@ def test_isolation(
]
env_vars = {
"PGPORT": str(pg.default_options["port"]),
"PGUSER": pg.default_options["user"],
"PGHOST": pg.default_options["host"],
"PGPORT": str(endpoint.default_options["port"]),
"PGUSER": endpoint.default_options["user"],
"PGHOST": endpoint.default_options["host"],
}
# Run the command.
@@ -135,8 +137,8 @@ def test_sql_regress(
env.neon_cli.create_branch("test_sql_regress", "empty")
# Connect to postgres and create a database called "regression".
pg = env.postgres.create_start("test_sql_regress")
pg.safe_psql("CREATE DATABASE regression")
endpoint = env.endpoints.create_start("test_sql_regress")
endpoint.safe_psql("CREATE DATABASE regression")
# Create some local directories for pg_regress to run in.
runpath = test_output_dir / "regress"
@@ -160,9 +162,9 @@ def test_sql_regress(
]
env_vars = {
"PGPORT": str(pg.default_options["port"]),
"PGUSER": pg.default_options["user"],
"PGHOST": pg.default_options["host"],
"PGPORT": str(endpoint.default_options["port"]),
"PGUSER": endpoint.default_options["user"],
"PGHOST": endpoint.default_options["host"],
}
# Run the command.
@@ -172,8 +174,8 @@ def test_sql_regress(
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)
# checkpoint one more time to ensure that the lsn we get is the latest one
pg.safe_psql("CHECKPOINT")
pg.safe_psql("select pg_current_wal_insert_lsn()")[0][0]
endpoint.safe_psql("CHECKPOINT")
endpoint.safe_psql("select pg_current_wal_insert_lsn()")[0][0]
# Check that we restore the content of the datadir correctly
check_restored_datadir_content(test_output_dir, env, pg)
check_restored_datadir_content(test_output_dir, env, endpoint)

View File

@@ -15,10 +15,10 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
)
env = neon_env_builder.init_start()
pgmain = env.postgres.create_start("main")
endpoint_main = env.endpoints.create_start("main")
log.info("postgres is running on 'main' branch")
main_pg_conn = pgmain.connect()
main_pg_conn = endpoint_main.connect()
main_cur = main_pg_conn.cursor()
timeline = TimelineId(query_scalar(main_cur, "SHOW neon.timeline_id"))
@@ -62,10 +62,10 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
# It must have been preserved by PITR setting
env.neon_cli.create_branch("test_pitr_gc_hundred", "main", ancestor_start_lsn=lsn_a)
pg_hundred = env.postgres.create_start("test_pitr_gc_hundred")
endpoint_hundred = env.endpoints.create_start("test_pitr_gc_hundred")
# On the 'hundred' branch, we should see only 100 rows
hundred_pg_conn = pg_hundred.connect()
hundred_pg_conn = endpoint_hundred.connect()
hundred_cur = hundred_pg_conn.cursor()
hundred_cur.execute("SELECT count(*) FROM foo")
assert hundred_cur.fetchone() == (100,)

View File

@@ -21,22 +21,22 @@ def test_read_request_tracing(neon_env_builder: NeonEnvBuilder):
)
timeline = env.neon_cli.create_timeline("test_trace_replay", tenant_id=tenant)
pg = env.postgres.create_start("test_trace_replay", "main", tenant)
endpoint = env.endpoints.create_start("test_trace_replay", "main", tenant)
with closing(pg.connect()) as conn:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("create table t (i integer);")
cur.execute(f"insert into t values (generate_series(1,{10000}));")
cur.execute("select count(*) from t;")
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
# wait until pageserver receives that data
pageserver_http = env.pageserver.http_client()
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, current_lsn)
# Stop pg so we drop the connection and flush the traces
pg.stop()
# Stop postgres so we drop the connection and flush the traces
endpoint.stop()
trace_path = env.repo_dir / "traces" / str(tenant) / str(timeline)
assert trace_path.exists()

View File

@@ -17,10 +17,10 @@ def test_read_validation(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_read_validation", "empty")
pg = env.postgres.create_start("test_read_validation")
endpoint = env.endpoints.create_start("test_read_validation")
log.info("postgres is running on 'test_read_validation' branch")
with closing(pg.connect()) as con:
with closing(endpoint.connect()) as con:
with con.cursor() as c:
for e in extensions:
c.execute("create extension if not exists {};".format(e))
@@ -144,10 +144,10 @@ def test_read_validation_neg(neon_simple_env: NeonEnv):
env.pageserver.allowed_errors.append(".*invalid LSN\\(0\\) in request.*")
pg = env.postgres.create_start("test_read_validation_neg")
endpoint = env.endpoints.create_start("test_read_validation_neg")
log.info("postgres is running on 'test_read_validation_neg' branch")
with closing(pg.connect()) as con:
with closing(endpoint.connect()) as con:
with con.cursor() as c:
for e in extensions:
c.execute("create extension if not exists {};".format(e))

View File

@@ -15,12 +15,12 @@ from fixtures.utils import query_scalar
def test_readonly_node(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_readonly_node", "empty")
pgmain = env.postgres.create_start("test_readonly_node")
endpoint_main = env.endpoints.create_start("test_readonly_node")
log.info("postgres is running on 'test_readonly_node' branch")
env.pageserver.allowed_errors.append(".*basebackup .* failed: invalid basebackup lsn.*")
main_pg_conn = pgmain.connect()
main_pg_conn = endpoint_main.connect()
main_cur = main_pg_conn.cursor()
# Create table, and insert the first 100 rows
@@ -61,23 +61,23 @@ def test_readonly_node(neon_simple_env: NeonEnv):
log.info("LSN after 400100 rows: " + lsn_c)
# Create first read-only node at the point where only 100 rows were inserted
pg_hundred = env.postgres.create_start(
branch_name="test_readonly_node", node_name="test_readonly_node_hundred", lsn=lsn_a
endpoint_hundred = env.endpoints.create_start(
branch_name="test_readonly_node", endpoint_id="ep-readonly_node_hundred", lsn=lsn_a
)
# And another at the point where 200100 rows were inserted
pg_more = env.postgres.create_start(
branch_name="test_readonly_node", node_name="test_readonly_node_more", lsn=lsn_b
endpoint_more = env.endpoints.create_start(
branch_name="test_readonly_node", endpoint_id="ep-readonly_node_more", lsn=lsn_b
)
# On the 'hundred' node, we should see only 100 rows
hundred_pg_conn = pg_hundred.connect()
hundred_pg_conn = endpoint_hundred.connect()
hundred_cur = hundred_pg_conn.cursor()
hundred_cur.execute("SELECT count(*) FROM foo")
assert hundred_cur.fetchone() == (100,)
# On the 'more' node, we should see 100200 rows
more_pg_conn = pg_more.connect()
more_pg_conn = endpoint_more.connect()
more_cur = more_pg_conn.cursor()
more_cur.execute("SELECT count(*) FROM foo")
assert more_cur.fetchone() == (200100,)
@@ -87,21 +87,21 @@ def test_readonly_node(neon_simple_env: NeonEnv):
assert main_cur.fetchone() == (400100,)
# Check creating a node at segment boundary
pg = env.postgres.create_start(
endpoint = env.endpoints.create_start(
branch_name="test_readonly_node",
node_name="test_branch_segment_boundary",
endpoint_id="ep-branch_segment_boundary",
lsn=Lsn("0/3000000"),
)
cur = pg.connect().cursor()
cur = endpoint.connect().cursor()
cur.execute("SELECT 1")
assert cur.fetchone() == (1,)
# Create node at pre-initdb lsn
with pytest.raises(Exception, match="invalid basebackup lsn"):
# compute node startup with invalid LSN should fail
env.postgres.create_start(
env.endpoints.create_start(
branch_name="test_readonly_node",
node_name="test_readonly_node_preinitdb",
endpoint_id="ep-readonly_node_preinitdb",
lsn=Lsn("0/42"),
)
@@ -111,16 +111,16 @@ def test_timetravel(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http_client = env.pageserver.http_client()
env.neon_cli.create_branch("test_timetravel", "empty")
pg = env.postgres.create_start("test_timetravel")
endpoint = env.endpoints.create_start("test_timetravel")
client = env.pageserver.http_client()
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
lsns = []
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute(
"""
CREATE TABLE testtab(id serial primary key, iteration int, data text);
@@ -131,7 +131,7 @@ def test_timetravel(neon_simple_env: NeonEnv):
lsns.append((0, current_lsn))
for i in range(1, 5):
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute(f"UPDATE testtab SET iteration = {i}")
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
lsns.append((i, current_lsn))
@@ -143,14 +143,14 @@ def test_timetravel(neon_simple_env: NeonEnv):
pageserver_http_client.timeline_checkpoint(tenant_id, timeline_id)
##### Restart pageserver
env.postgres.stop_all()
env.endpoints.stop_all()
env.pageserver.stop()
env.pageserver.start()
for i, lsn in lsns:
pg_old = env.postgres.create_start(
branch_name="test_timetravel", node_name=f"test_old_lsn_{i}", lsn=lsn
endpoint_old = env.endpoints.create_start(
branch_name="test_timetravel", endpoint_id=f"ep-old_lsn_{i}", lsn=lsn
)
with pg_old.cursor() as cur:
with endpoint_old.cursor() as cur:
assert query_scalar(cur, f"select count(*) from testtab where iteration={i}") == 100000
assert query_scalar(cur, f"select count(*) from testtab where iteration<>{i}") == 0

View File

@@ -22,10 +22,10 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
# Create a branch for us
env.neon_cli.create_branch("test_pageserver_recovery", "main")
pg = env.postgres.create_start("test_pageserver_recovery")
endpoint = env.endpoints.create_start("test_pageserver_recovery")
log.info("postgres is running on 'test_pageserver_recovery' branch")
with closing(pg.connect()) as conn:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
with env.pageserver.http_client() as pageserver_http:
# Create and initialize test table
@@ -54,7 +54,7 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
env.pageserver.stop()
env.pageserver.start()
with closing(pg.connect()) as conn:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("select count(*) from foo")
assert cur.fetchone() == (100000,)

View File

@@ -87,17 +87,17 @@ def test_remote_storage_backup_and_restore(
env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*")
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
client = env.pageserver.http_client()
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
checkpoint_numbers = range(1, 3)
for checkpoint_number in checkpoint_numbers:
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute(
f"""
CREATE TABLE t{checkpoint_number}(id int primary key, data text);
@@ -126,7 +126,7 @@ def test_remote_storage_backup_and_restore(
)
##### Stop the first pageserver instance, erase all its data
env.postgres.stop_all()
env.endpoints.stop_all()
env.pageserver.stop()
dir_to_clear = Path(env.repo_dir) / "tenants"
@@ -187,8 +187,8 @@ def test_remote_storage_backup_and_restore(
), "current db Lsn should should not be less than the one stored on remote storage"
log.info("select some data, this will cause layers to be downloaded")
pg = env.postgres.create_start("main")
with pg.cursor() as cur:
endpoint = env.endpoints.create_start("main")
with endpoint.cursor() as cur:
for checkpoint_number in checkpoint_numbers:
assert (
query_scalar(cur, f"SELECT data FROM t{checkpoint_number} WHERE id = {data_id};")
@@ -238,9 +238,9 @@ def test_remote_storage_upload_queue_retries(
client = env.pageserver.http_client()
pg = env.postgres.create_start("main", tenant_id=tenant_id)
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
pg.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
def configure_storage_sync_failpoints(action):
client.configure_failpoints(
@@ -253,7 +253,7 @@ def test_remote_storage_upload_queue_retries(
def overwrite_data_and_wait_for_it_to_arrive_at_pageserver(data):
# create initial set of layers & upload them with failpoints configured
pg.safe_psql_many(
endpoint.safe_psql_many(
[
f"""
INSERT INTO foo (id, val)
@@ -266,7 +266,7 @@ def test_remote_storage_upload_queue_retries(
"VACUUM foo",
]
)
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
def get_queued_count(file_kind, op_kind):
val = client.get_remote_timeline_client_metric(
@@ -343,7 +343,7 @@ def test_remote_storage_upload_queue_retries(
# but how do we validate the result after restore?
env.pageserver.stop(immediate=True)
env.postgres.stop_all()
env.endpoints.stop_all()
dir_to_clear = Path(env.repo_dir) / "tenants"
shutil.rmtree(dir_to_clear)
@@ -357,8 +357,8 @@ def test_remote_storage_upload_queue_retries(
wait_until_tenant_active(client, tenant_id)
log.info("restarting postgres to validate")
pg = env.postgres.create_start("main", tenant_id=tenant_id)
with pg.cursor() as cur:
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
with endpoint.cursor() as cur:
assert query_scalar(cur, "SELECT COUNT(*) FROM foo WHERE val = 'd'") == 10000
@@ -394,13 +394,13 @@ def test_remote_timeline_client_calls_started_metric(
client = env.pageserver.http_client()
pg = env.postgres.create_start("main", tenant_id=tenant_id)
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
pg.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
def overwrite_data_and_wait_for_it_to_arrive_at_pageserver(data):
# create initial set of layers & upload them with failpoints configured
pg.safe_psql_many(
endpoint.safe_psql_many(
[
f"""
INSERT INTO foo (id, val)
@@ -413,7 +413,7 @@ def test_remote_timeline_client_calls_started_metric(
"VACUUM foo",
]
)
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
calls_started: Dict[Tuple[str, str], List[int]] = {
("layer", "upload"): [0],
@@ -478,7 +478,7 @@ def test_remote_timeline_client_calls_started_metric(
)
env.pageserver.stop(immediate=True)
env.postgres.stop_all()
env.endpoints.stop_all()
dir_to_clear = Path(env.repo_dir) / "tenants"
shutil.rmtree(dir_to_clear)
@@ -492,8 +492,8 @@ def test_remote_timeline_client_calls_started_metric(
wait_until_tenant_active(client, tenant_id)
log.info("restarting postgres to validate")
pg = env.postgres.create_start("main", tenant_id=tenant_id)
with pg.cursor() as cur:
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
with endpoint.cursor() as cur:
assert query_scalar(cur, "SELECT COUNT(*) FROM foo WHERE val = 'd'") == 10000
# ensure that we updated the calls_started download metric
@@ -543,17 +543,17 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue(
)
return int(val) if val is not None else val
pg = env.postgres.create_start("main", tenant_id=tenant_id)
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
client.configure_failpoints(("before-upload-layer", "return"))
pg.safe_psql_many(
endpoint.safe_psql_many(
[
"CREATE TABLE foo (x INTEGER)",
"INSERT INTO foo SELECT g FROM generate_series(1, 10000) g",
]
)
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
# Kick off a checkpoint operation.
# It will get stuck in remote_client.wait_completion(), since the select query will have
@@ -627,8 +627,8 @@ def test_empty_branch_remote_storage_upload(
new_branch_name = "new_branch"
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg:
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id)
with env.endpoints.create_start(new_branch_name, tenant_id=env.initial_tenant) as endpoint:
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_branch_timeline_id)
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
timelines_before_detach = set(
@@ -676,8 +676,8 @@ def test_empty_branch_remote_storage_upload_on_restart(
new_branch_name = "new_branch"
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg:
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id)
with env.endpoints.create_start(new_branch_name, tenant_id=env.initial_tenant) as endpoint:
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_branch_timeline_id)
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
env.pageserver.stop()

View File

@@ -11,10 +11,10 @@ from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
def test_subxacts(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
env.neon_cli.create_branch("test_subxacts", "empty")
pg = env.postgres.create_start("test_subxacts")
endpoint = env.endpoints.create_start("test_subxacts")
log.info("postgres is running on 'test_subxacts' branch")
pg_conn = pg.connect()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
cur.execute(
@@ -37,4 +37,4 @@ def test_subxacts(neon_simple_env: NeonEnv, test_output_dir):
cur.execute("checkpoint")
# Check that we can restore the content of the datadir correctly
check_restored_datadir_content(test_output_dir, env, pg)
check_restored_datadir_content(test_output_dir, env, endpoint)

View File

@@ -43,11 +43,7 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}"""
tenant, _ = env.neon_cli.create_tenant(conf=new_conf)
env.neon_cli.create_timeline("test_tenant_conf", tenant_id=tenant)
env.postgres.create_start(
"test_tenant_conf",
"main",
tenant,
)
env.endpoints.create_start("test_tenant_conf", "main", tenant)
# check the configuration of the default tenant
# it should match global configuration

View File

@@ -7,9 +7,9 @@ import asyncpg
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
NeonEnv,
NeonEnvBuilder,
Postgres,
RemoteStorageKind,
available_remote_storages,
)
@@ -59,8 +59,8 @@ def test_tenant_reattach(
# create new nenant
tenant_id, timeline_id = env.neon_cli.create_tenant()
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
with pg.cursor() as cur:
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
with endpoint.cursor() as cur:
cur.execute("CREATE TABLE t(key int primary key, value text)")
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
@@ -99,8 +99,8 @@ def test_tenant_reattach(
assert pageserver_last_record_lsn_before_detach == pageserver_last_record_lsn
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
with pg.cursor() as cur:
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
with endpoint.cursor() as cur:
assert query_scalar(cur, "SELECT count(*) FROM t") == 100000
# Check that we had to retry the downloads
@@ -157,11 +157,11 @@ async def sleep_and_reattach(pageserver_http: PageserverHttpClient, tenant_id: T
# async guts of test_tenant_reattach_while_bysy test
async def reattach_while_busy(
env: NeonEnv, pg: Postgres, pageserver_http: PageserverHttpClient, tenant_id: TenantId
env: NeonEnv, endpoint: Endpoint, pageserver_http: PageserverHttpClient, tenant_id: TenantId
):
workers = []
for worker_id in range(num_connections):
pg_conn = await pg.connect_async()
pg_conn = await endpoint.connect_async()
workers.append(asyncio.create_task(update_table(pg_conn)))
workers.append(asyncio.create_task(sleep_and_reattach(pageserver_http, tenant_id)))
@@ -238,15 +238,15 @@ def test_tenant_reattach_while_busy(
conf={"checkpoint_distance": "100000"}
)
pg = env.postgres.create_start("main", tenant_id=tenant_id)
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
cur = pg.connect().cursor()
cur = endpoint.connect().cursor()
cur.execute("CREATE TABLE t(id int primary key, counter int)")
cur.execute(f"INSERT INTO t SELECT generate_series(1,{num_rows}), 0")
# Run the test
asyncio.run(reattach_while_busy(env, pg, pageserver_http, tenant_id))
asyncio.run(reattach_while_busy(env, endpoint, pageserver_http, tenant_id))
# Verify table contents
assert query_scalar(cur, "SELECT count(*) FROM t") == num_rows
@@ -278,9 +278,9 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
# assert tenant exists on disk
assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
pg = env.postgres.create_start("main", tenant_id=tenant_id)
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
# we rely upon autocommit after each statement
pg.safe_psql_many(
endpoint.safe_psql_many(
queries=[
"CREATE TABLE t(key int primary key, value text)",
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
@@ -339,9 +339,9 @@ def test_tenant_detach_ignored_tenant(neon_simple_env: NeonEnv):
# assert tenant exists on disk
assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
pg = env.postgres.create_start("main", tenant_id=tenant_id)
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
# we rely upon autocommit after each statement
pg.safe_psql_many(
endpoint.safe_psql_many(
queries=[
"CREATE TABLE t(key int primary key, value text)",
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
@@ -388,9 +388,9 @@ def test_tenant_detach_regular_tenant(neon_simple_env: NeonEnv):
# assert tenant exists on disk
assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
pg = env.postgres.create_start("main", tenant_id=tenant_id)
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
# we rely upon autocommit after each statement
pg.safe_psql_many(
endpoint.safe_psql_many(
queries=[
"CREATE TABLE t(key int primary key, value text)",
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
@@ -425,18 +425,18 @@ def test_detach_while_attaching(
##### First start, insert secret data and upload it to the remote storage
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
client = env.pageserver.http_client()
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
# Create table, and insert some rows. Make it big enough that it doesn't fit in
# shared_buffers, otherwise the SELECT after restart will just return answer
# from shared_buffers without hitting the page server, which defeats the point
# of this test.
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("CREATE TABLE foo (t text)")
cur.execute(
"""
@@ -477,7 +477,7 @@ def test_detach_while_attaching(
# cycle are still running, things could get really confusing..
pageserver_http.tenant_attach(tenant_id)
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("SELECT COUNT(*) FROM foo")
@@ -572,14 +572,14 @@ def test_ignored_tenant_download_missing_layers(
)
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
data_id = 1
data_secret = "very secret secret"
insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, pg)
insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, endpoint)
tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()]
tenants_before_ignore.sort()
@@ -611,9 +611,9 @@ def test_ignored_tenant_download_missing_layers(
]
assert timelines_before_ignore == timelines_after_ignore, "Should have all timelines back"
pg.stop()
pg.start()
ensure_test_data(data_id, data_secret, pg)
endpoint.stop()
endpoint.start()
ensure_test_data(data_id, data_secret, endpoint)
# Tests that it's possible to `load` broken tenants:
@@ -631,10 +631,10 @@ def test_ignored_tenant_stays_broken_without_metadata(
)
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
# ignore the tenant and remove its metadata
pageserver_http.tenant_ignore(tenant_id)
@@ -666,9 +666,9 @@ def test_load_attach_negatives(
)
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
with pytest.raises(
@@ -707,16 +707,16 @@ def test_ignore_while_attaching(
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
pageserver_http = env.pageserver.http_client()
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
data_id = 1
data_secret = "very secret secret"
insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, pg)
insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, endpoint)
tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()]
@@ -754,9 +754,9 @@ def test_ignore_while_attaching(
wait_until_tenant_state(pageserver_http, tenant_id, "Active", 5)
pg.stop()
pg.start()
ensure_test_data(data_id, data_secret, pg)
endpoint.stop()
endpoint.start()
ensure_test_data(data_id, data_secret, endpoint)
def insert_test_data(
@@ -765,9 +765,9 @@ def insert_test_data(
timeline_id: TimelineId,
data_id: int,
data: str,
pg: Postgres,
endpoint: Endpoint,
):
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute(
f"""
CREATE TABLE test(id int primary key, secret text);
@@ -787,8 +787,8 @@ def insert_test_data(
wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn)
def ensure_test_data(data_id: int, data: str, pg: Postgres):
with pg.cursor() as cur:
def ensure_test_data(data_id: int, data: str, endpoint: Endpoint):
with endpoint.cursor() as cur:
assert (
query_scalar(cur, f"SELECT secret FROM test WHERE id = {data_id};") == data
), "Should have timeline data back"

View File

@@ -7,11 +7,11 @@ from typing import Any, Dict, Optional, Tuple
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
NeonBroker,
NeonEnv,
NeonEnvBuilder,
PortDistributor,
Postgres,
)
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pageserver.utils import (
@@ -87,20 +87,20 @@ def new_pageserver_service(
@contextmanager
def pg_cur(pg):
with closing(pg.connect()) as conn:
def pg_cur(endpoint):
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
yield cur
def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Event):
def load(endpoint: Endpoint, stop_event: threading.Event, load_ok_event: threading.Event):
log.info("load started")
inserted_ctr = 0
failed = False
while not stop_event.is_set():
try:
with pg_cur(pg) as cur:
with pg_cur(endpoint) as cur:
cur.execute("INSERT INTO load VALUES ('some payload')")
inserted_ctr += 1
except: # noqa: E722
@@ -110,7 +110,7 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
load_ok_event.clear()
else:
if failed:
with pg_cur(pg) as cur:
with pg_cur(endpoint) as cur:
# if we recovered after failure verify that we have correct number of rows
log.info("recovering at %s", inserted_ctr)
cur.execute("SELECT count(*) FROM load")
@@ -124,14 +124,14 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
def populate_branch(
pg: Postgres,
endpoint: Endpoint,
tenant_id: TenantId,
ps_http: PageserverHttpClient,
create_table: bool,
expected_sum: Optional[int],
) -> Tuple[TimelineId, Lsn]:
# insert some data
with pg_cur(pg) as cur:
with pg_cur(endpoint) as cur:
cur.execute("SHOW neon.timeline_id")
timeline_id = TimelineId(cur.fetchone()[0])
log.info("timeline to relocate %s", timeline_id)
@@ -196,19 +196,19 @@ def check_timeline_attached(
def switch_pg_to_new_pageserver(
env: NeonEnv,
pg: Postgres,
endpoint: Endpoint,
new_pageserver_port: int,
tenant_id: TenantId,
timeline_id: TimelineId,
) -> Path:
pg.stop()
endpoint.stop()
pg_config_file_path = Path(pg.config_file_path())
pg_config_file_path = Path(endpoint.config_file_path())
pg_config_file_path.open("a").write(
f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_port}'"
)
pg.start()
endpoint.start()
timeline_to_detach_local_path = (
env.repo_dir / "tenants" / str(tenant_id) / "timelines" / str(timeline_id)
@@ -226,8 +226,8 @@ def switch_pg_to_new_pageserver(
return timeline_to_detach_local_path
def post_migration_check(pg: Postgres, sum_before_migration: int, old_local_path: Path):
with pg_cur(pg) as cur:
def post_migration_check(endpoint: Endpoint, sum_before_migration: int, old_local_path: Path):
with pg_cur(endpoint) as cur:
# check that data is still there
cur.execute("SELECT sum(key) FROM t")
assert cur.fetchone() == (sum_before_migration,)
@@ -288,12 +288,12 @@ def test_tenant_relocation(
log.info("tenant to relocate %s initial_timeline_id %s", tenant_id, initial_timeline_id)
env.neon_cli.create_branch("test_tenant_relocation_main", tenant_id=tenant_id)
pg_main = env.postgres.create_start(
ep_main = env.endpoints.create_start(
branch_name="test_tenant_relocation_main", tenant_id=tenant_id
)
timeline_id_main, current_lsn_main = populate_branch(
pg_main,
ep_main,
tenant_id=tenant_id,
ps_http=pageserver_http,
create_table=True,
@@ -306,12 +306,12 @@ def test_tenant_relocation(
ancestor_start_lsn=current_lsn_main,
tenant_id=tenant_id,
)
pg_second = env.postgres.create_start(
ep_second = env.endpoints.create_start(
branch_name="test_tenant_relocation_second", tenant_id=tenant_id
)
timeline_id_second, current_lsn_second = populate_branch(
pg_second,
ep_second,
tenant_id=tenant_id,
ps_http=pageserver_http,
create_table=False,
@@ -327,14 +327,14 @@ def test_tenant_relocation(
if with_load == "with_load":
# create load table
with pg_cur(pg_main) as cur:
with pg_cur(ep_main) as cur:
cur.execute("CREATE TABLE load(value text)")
load_stop_event = threading.Event()
load_ok_event = threading.Event()
load_thread = threading.Thread(
target=load,
args=(pg_main, load_stop_event, load_ok_event),
args=(ep_main, load_stop_event, load_ok_event),
daemon=True, # To make sure the child dies when the parent errors
)
load_thread.start()
@@ -450,7 +450,7 @@ def test_tenant_relocation(
old_local_path_main = switch_pg_to_new_pageserver(
env,
pg_main,
ep_main,
new_pageserver_pg_port,
tenant_id,
timeline_id_main,
@@ -458,7 +458,7 @@ def test_tenant_relocation(
old_local_path_second = switch_pg_to_new_pageserver(
env,
pg_second,
ep_second,
new_pageserver_pg_port,
tenant_id,
timeline_id_second,
@@ -475,11 +475,11 @@ def test_tenant_relocation(
interval=1,
func=lambda: tenant_exists(pageserver_http, tenant_id),
)
post_migration_check(pg_main, 500500, old_local_path_main)
post_migration_check(pg_second, 1001000, old_local_path_second)
post_migration_check(ep_main, 500500, old_local_path_main)
post_migration_check(ep_second, 1001000, old_local_path_second)
# ensure that we can successfully read all relations on the new pageserver
with pg_cur(pg_second) as cur:
with pg_cur(ep_second) as cur:
cur.execute(
"""
DO $$

View File

@@ -4,9 +4,9 @@ from typing import List, Tuple
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
NeonEnv,
NeonEnvBuilder,
Postgres,
wait_for_last_flush_lsn,
wait_for_wal_insert_lsn,
)
@@ -28,12 +28,12 @@ def test_empty_tenant_size(neon_simple_env: NeonEnv, test_output_dir: Path):
branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0]
assert branch_name == main_branch_name
with env.postgres.create_start(
with env.endpoints.create_start(
main_branch_name,
tenant_id=tenant_id,
config_lines=["autovacuum=off", "checkpoint_timeout=10min"],
) as pg:
with pg.cursor() as cur:
) as endpoint:
with endpoint.cursor() as cur:
cur.execute("SELECT 1")
row = cur.fetchone()
assert row is not None
@@ -105,12 +105,12 @@ def test_branched_empty_timeline_size(neon_simple_env: NeonEnv, test_output_dir:
first_branch_timeline_id = env.neon_cli.create_branch("first-branch", tenant_id=tenant_id)
with env.postgres.create_start("first-branch", tenant_id=tenant_id) as pg:
with pg.cursor() as cur:
with env.endpoints.create_start("first-branch", tenant_id=tenant_id) as endpoint:
with endpoint.cursor() as cur:
cur.execute(
"CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)"
)
wait_for_last_flush_lsn(env, pg, tenant_id, first_branch_timeline_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, first_branch_timeline_id)
size_after_branching = http_client.tenant_size(tenant_id)
log.info(f"size_after_branching: {size_after_branching}")
@@ -164,12 +164,12 @@ def test_branched_from_many_empty_parents_size(neon_simple_env: NeonEnv, test_ou
assert last_branch is not None
with env.postgres.create_start(last_branch_name, tenant_id=tenant_id) as pg:
with pg.cursor() as cur:
with env.endpoints.create_start(last_branch_name, tenant_id=tenant_id) as endpoint:
with endpoint.cursor() as cur:
cur.execute(
"CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)"
)
wait_for_last_flush_lsn(env, pg, tenant_id, last_branch)
wait_for_last_flush_lsn(env, endpoint, tenant_id, last_branch)
size_after_writes = http_client.tenant_size(tenant_id)
assert size_after_writes > initial_size
@@ -194,11 +194,11 @@ def test_branch_point_within_horizon(neon_simple_env: NeonEnv, test_output_dir:
(tenant_id, main_id) = env.neon_cli.create_tenant(conf={"gc_horizon": str(gc_horizon)})
http_client = env.pageserver.http_client()
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
initdb_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id)
with pg.cursor() as cur:
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
initdb_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id)
with endpoint.cursor() as cur:
cur.execute("CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000) s(i)")
flushed_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id)
flushed_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id)
size_before_branching = http_client.tenant_size(tenant_id)
@@ -208,10 +208,10 @@ def test_branch_point_within_horizon(neon_simple_env: NeonEnv, test_output_dir:
"branch", tenant_id=tenant_id, ancestor_start_lsn=flushed_lsn
)
with env.postgres.create_start("branch", tenant_id=tenant_id) as pg:
with pg.cursor() as cur:
with env.endpoints.create_start("branch", tenant_id=tenant_id) as endpoint:
with endpoint.cursor() as cur:
cur.execute("CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 1000) s(i)")
wait_for_last_flush_lsn(env, pg, tenant_id, branch_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, branch_id)
size_after = http_client.tenant_size(tenant_id)
@@ -237,17 +237,17 @@ def test_parent_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path):
(tenant_id, main_id) = env.neon_cli.create_tenant(conf={"gc_horizon": str(gc_horizon)})
http_client = env.pageserver.http_client()
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
initdb_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id)
with pg.cursor() as cur:
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
initdb_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id)
with endpoint.cursor() as cur:
cur.execute("CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000) s(i)")
flushed_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id)
flushed_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id)
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("CREATE TABLE t00 AS SELECT i::bigint n FROM generate_series(0, 2000) s(i)")
wait_for_last_flush_lsn(env, pg, tenant_id, main_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id)
size_before_branching = http_client.tenant_size(tenant_id)
@@ -257,10 +257,10 @@ def test_parent_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path):
"branch", tenant_id=tenant_id, ancestor_start_lsn=flushed_lsn
)
with env.postgres.create_start("branch", tenant_id=tenant_id) as pg:
with pg.cursor() as cur:
with env.endpoints.create_start("branch", tenant_id=tenant_id) as endpoint:
with endpoint.cursor() as cur:
cur.execute("CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 10000) s(i)")
wait_for_last_flush_lsn(env, pg, tenant_id, branch_id)
wait_for_last_flush_lsn(env, endpoint, tenant_id, branch_id)
size_after = http_client.tenant_size(tenant_id)
@@ -297,12 +297,12 @@ def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Pa
# gc is not expected to change the results
for branch_name, amount in [("main", 2000), ("first", 15000), ("second", 3000)]:
with env.postgres.create_start(branch_name, tenant_id=tenant_id) as pg:
with pg.cursor() as cur:
with env.endpoints.create_start(branch_name, tenant_id=tenant_id) as endpoint:
with endpoint.cursor() as cur:
cur.execute(
f"CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, {amount}) s(i)"
)
wait_for_last_flush_lsn(env, pg, tenant_id, ids[branch_name])
wait_for_last_flush_lsn(env, endpoint, tenant_id, ids[branch_name])
size_now = http_client.tenant_size(tenant_id)
if latest_size is not None:
assert size_now > latest_size
@@ -359,7 +359,7 @@ def test_single_branch_get_tenant_size_grows(
def get_current_consistent_size(
env: NeonEnv,
pg: Postgres,
endpoint: Endpoint,
size_debug_file, # apparently there is no public signature for open()...
http_client: PageserverHttpClient,
tenant_id: TenantId,
@@ -368,7 +368,7 @@ def test_single_branch_get_tenant_size_grows(
consistent = False
size_debug = None
current_lsn = wait_for_wal_insert_lsn(env, pg, tenant_id, timeline_id)
current_lsn = wait_for_wal_insert_lsn(env, endpoint, tenant_id, timeline_id)
# We want to make sure we have a self-consistent set of values.
# Size changes with WAL, so only if both before and after getting
# the size of the tenant reports the same WAL insert LSN, we're OK
@@ -382,35 +382,35 @@ def test_single_branch_get_tenant_size_grows(
size, sizes = http_client.tenant_size_and_modelinputs(tenant_id)
size_debug = http_client.tenant_size_debug(tenant_id)
after_lsn = wait_for_wal_insert_lsn(env, pg, tenant_id, timeline_id)
after_lsn = wait_for_wal_insert_lsn(env, endpoint, tenant_id, timeline_id)
consistent = current_lsn == after_lsn
current_lsn = after_lsn
size_debug_file.write(size_debug)
return (current_lsn, size)
with env.postgres.create_start(
with env.endpoints.create_start(
branch_name,
tenant_id=tenant_id,
### autovacuum is disabled to limit WAL logging.
config_lines=["autovacuum=off"],
) as pg:
) as endpoint:
(initdb_lsn, size) = get_current_consistent_size(
env, pg, size_debug_file, http_client, tenant_id, timeline_id
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
)
collected_responses.append(("INITDB", initdb_lsn, size))
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("CREATE TABLE t0 (i BIGINT NOT NULL) WITH (fillfactor = 40)")
(current_lsn, size) = get_current_consistent_size(
env, pg, size_debug_file, http_client, tenant_id, timeline_id
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
)
collected_responses.append(("CREATE", current_lsn, size))
batch_size = 100
for i in range(3):
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute(
f"INSERT INTO t0(i) SELECT i FROM generate_series({batch_size} * %s, ({batch_size} * (%s + 1)) - 1) s(i)",
(i, i),
@@ -419,7 +419,7 @@ def test_single_branch_get_tenant_size_grows(
i += 1
(current_lsn, size) = get_current_consistent_size(
env, pg, size_debug_file, http_client, tenant_id, timeline_id
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
)
prev_size = collected_responses[-1][2]
@@ -438,7 +438,7 @@ def test_single_branch_get_tenant_size_grows(
collected_responses.append(("INSERT", current_lsn, size))
while True:
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute(
f"UPDATE t0 SET i = -i WHERE i IN (SELECT i FROM t0 WHERE i > 0 LIMIT {batch_size})"
)
@@ -448,7 +448,7 @@ def test_single_branch_get_tenant_size_grows(
break
(current_lsn, size) = get_current_consistent_size(
env, pg, size_debug_file, http_client, tenant_id, timeline_id
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
)
prev_size = collected_responses[-1][2]
@@ -458,7 +458,7 @@ def test_single_branch_get_tenant_size_grows(
collected_responses.append(("UPDATE", current_lsn, size))
while True:
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute(f"DELETE FROM t0 WHERE i IN (SELECT i FROM t0 LIMIT {batch_size})")
deleted = cur.rowcount
@@ -466,7 +466,7 @@ def test_single_branch_get_tenant_size_grows(
break
(current_lsn, size) = get_current_consistent_size(
env, pg, size_debug_file, http_client, tenant_id, timeline_id
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
)
prev_size = collected_responses[-1][2]
@@ -475,14 +475,14 @@ def test_single_branch_get_tenant_size_grows(
collected_responses.append(("DELETE", current_lsn, size))
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("DROP TABLE t0")
# The size of the tenant should still be as large as before we dropped
# the table, because the drop operation can still be undone in the PITR
# defined by gc_horizon.
(current_lsn, size) = get_current_consistent_size(
env, pg, size_debug_file, http_client, tenant_id, timeline_id
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
)
prev_size = collected_responses[-1][2]
@@ -532,16 +532,16 @@ def test_get_tenant_size_with_multiple_branches(
http_client = env.pageserver.http_client()
main_pg = env.postgres.create_start(main_branch_name, tenant_id=tenant_id)
main_endpoint = env.endpoints.create_start(main_branch_name, tenant_id=tenant_id)
batch_size = 10000
with main_pg.cursor() as cur:
with main_endpoint.cursor() as cur:
cur.execute(
f"CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, {batch_size}) s(i)"
)
wait_for_last_flush_lsn(env, main_pg, tenant_id, main_timeline_id)
wait_for_last_flush_lsn(env, main_endpoint, tenant_id, main_timeline_id)
size_at_branch = http_client.tenant_size(tenant_id)
assert size_at_branch > 0
@@ -552,23 +552,23 @@ def test_get_tenant_size_with_multiple_branches(
size_after_first_branch = http_client.tenant_size(tenant_id)
assert size_after_first_branch == size_at_branch
first_branch_pg = env.postgres.create_start("first-branch", tenant_id=tenant_id)
first_branch_endpoint = env.endpoints.create_start("first-branch", tenant_id=tenant_id)
with first_branch_pg.cursor() as cur:
with first_branch_endpoint.cursor() as cur:
cur.execute(
f"CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, {batch_size}) s(i)"
)
wait_for_last_flush_lsn(env, first_branch_pg, tenant_id, first_branch_timeline_id)
wait_for_last_flush_lsn(env, first_branch_endpoint, tenant_id, first_branch_timeline_id)
size_after_growing_first_branch = http_client.tenant_size(tenant_id)
assert size_after_growing_first_branch > size_after_first_branch
with main_pg.cursor() as cur:
with main_endpoint.cursor() as cur:
cur.execute(
f"CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 2*{batch_size}) s(i)"
)
wait_for_last_flush_lsn(env, main_pg, tenant_id, main_timeline_id)
wait_for_last_flush_lsn(env, main_endpoint, tenant_id, main_timeline_id)
size_after_continuing_on_main = http_client.tenant_size(tenant_id)
assert size_after_continuing_on_main > size_after_growing_first_branch
@@ -578,31 +578,31 @@ def test_get_tenant_size_with_multiple_branches(
size_after_second_branch = http_client.tenant_size(tenant_id)
assert size_after_second_branch == size_after_continuing_on_main
second_branch_pg = env.postgres.create_start("second-branch", tenant_id=tenant_id)
second_branch_endpoint = env.endpoints.create_start("second-branch", tenant_id=tenant_id)
with second_branch_pg.cursor() as cur:
with second_branch_endpoint.cursor() as cur:
cur.execute(
f"CREATE TABLE t2 AS SELECT i::bigint n FROM generate_series(0, 3*{batch_size}) s(i)"
)
wait_for_last_flush_lsn(env, second_branch_pg, tenant_id, second_branch_timeline_id)
wait_for_last_flush_lsn(env, second_branch_endpoint, tenant_id, second_branch_timeline_id)
size_after_growing_second_branch = http_client.tenant_size(tenant_id)
assert size_after_growing_second_branch > size_after_second_branch
with second_branch_pg.cursor() as cur:
with second_branch_endpoint.cursor() as cur:
cur.execute("DROP TABLE t0")
cur.execute("DROP TABLE t1")
cur.execute("VACUUM FULL")
wait_for_last_flush_lsn(env, second_branch_pg, tenant_id, second_branch_timeline_id)
wait_for_last_flush_lsn(env, second_branch_endpoint, tenant_id, second_branch_timeline_id)
size_after_thinning_branch = http_client.tenant_size(tenant_id)
assert (
size_after_thinning_branch > size_after_growing_second_branch
), "tenant_size should grow with dropped tables and full vacuum"
first_branch_pg.stop_and_destroy()
second_branch_pg.stop_and_destroy()
main_pg.stop()
first_branch_endpoint.stop_and_destroy()
second_branch_endpoint.stop_and_destroy()
main_endpoint.stop()
env.pageserver.stop()
env.pageserver.start()

View File

@@ -29,7 +29,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
# Create tenant, start compute
tenant, _ = env.neon_cli.create_tenant()
env.neon_cli.create_timeline(name, tenant_id=tenant)
pg = env.postgres.create_start(name, tenant_id=tenant)
endpoint = env.endpoints.create_start(name, tenant_id=tenant)
assert_tenant_state(
client,
tenant,
@@ -38,7 +38,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
)
# Stop compute
pg.stop()
endpoint.stop()
# Delete all timelines on all tenants.
#

View File

@@ -66,17 +66,17 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder):
env.neon_cli.create_timeline("test_tenants_normal_work", tenant_id=tenant_1)
env.neon_cli.create_timeline("test_tenants_normal_work", tenant_id=tenant_2)
pg_tenant1 = env.postgres.create_start(
endpoint_tenant1 = env.endpoints.create_start(
"test_tenants_normal_work",
tenant_id=tenant_1,
)
pg_tenant2 = env.postgres.create_start(
endpoint_tenant2 = env.endpoints.create_start(
"test_tenants_normal_work",
tenant_id=tenant_2,
)
for pg in [pg_tenant1, pg_tenant2]:
with closing(pg.connect()) as conn:
for endpoint in [endpoint_tenant1, endpoint_tenant2]:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
# we rely upon autocommit after each statement
# as waiting for acceptors happens there
@@ -97,11 +97,11 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
timeline_1 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_1)
timeline_2 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_2)
pg_tenant1 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_1)
pg_tenant2 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_2)
endpoint_tenant1 = env.endpoints.create_start("test_metrics_normal_work", tenant_id=tenant_1)
endpoint_tenant2 = env.endpoints.create_start("test_metrics_normal_work", tenant_id=tenant_2)
for pg in [pg_tenant1, pg_tenant2]:
with closing(pg.connect()) as conn:
for endpoint in [endpoint_tenant1, endpoint_tenant2]:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TABLE t(key int primary key, value text)")
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
@@ -242,11 +242,15 @@ def test_pageserver_metrics_removed_after_detach(
env.neon_cli.create_timeline("test_metrics_removed_after_detach", tenant_id=tenant_1)
env.neon_cli.create_timeline("test_metrics_removed_after_detach", tenant_id=tenant_2)
pg_tenant1 = env.postgres.create_start("test_metrics_removed_after_detach", tenant_id=tenant_1)
pg_tenant2 = env.postgres.create_start("test_metrics_removed_after_detach", tenant_id=tenant_2)
endpoint_tenant1 = env.endpoints.create_start(
"test_metrics_removed_after_detach", tenant_id=tenant_1
)
endpoint_tenant2 = env.endpoints.create_start(
"test_metrics_removed_after_detach", tenant_id=tenant_2
)
for pg in [pg_tenant1, pg_tenant2]:
with closing(pg.connect()) as conn:
for endpoint in [endpoint_tenant1, endpoint_tenant2]:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TABLE t(key int primary key, value text)")
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
@@ -317,7 +321,7 @@ def test_pageserver_with_empty_tenants(
), f"Tenant {tenant_with_empty_timelines_dir} should have an empty timelines/ directory"
# Trigger timeline re-initialization after pageserver restart
env.postgres.stop_all()
env.endpoints.stop_all()
env.pageserver.stop()
tenant_without_timelines_dir = env.initial_tenant

View File

@@ -15,10 +15,10 @@ from typing import List, Tuple
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
LocalFsStorage,
NeonEnv,
NeonEnvBuilder,
Postgres,
RemoteStorageKind,
available_remote_storages,
wait_for_sk_commit_lsn_to_reach_remote_storage,
@@ -32,10 +32,10 @@ from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import query_scalar, wait_until
async def tenant_workload(env: NeonEnv, pg: Postgres):
async def tenant_workload(env: NeonEnv, endpoint: Endpoint):
await env.pageserver.connect_async()
pg_conn = await pg.connect_async()
pg_conn = await endpoint.connect_async()
await pg_conn.execute("CREATE TABLE t(key int primary key, value text)")
for i in range(1, 100):
@@ -49,10 +49,10 @@ async def tenant_workload(env: NeonEnv, pg: Postgres):
assert res == i * 1000
async def all_tenants_workload(env: NeonEnv, tenants_pgs):
async def all_tenants_workload(env: NeonEnv, tenants_endpoints):
workers = []
for _, pg in tenants_pgs:
worker = tenant_workload(env, pg)
for _, endpoint in tenants_endpoints:
worker = tenant_workload(env, endpoint)
workers.append(asyncio.create_task(worker))
# await all workers
@@ -73,7 +73,7 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
)
tenants_pgs: List[Tuple[TenantId, Postgres]] = []
tenants_endpoints: List[Tuple[TenantId, Endpoint]] = []
for _ in range(1, 5):
# Use a tiny checkpoint distance, to create a lot of layers quickly
@@ -84,18 +84,18 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem
)
env.neon_cli.create_timeline("test_tenants_many", tenant_id=tenant)
pg = env.postgres.create_start(
endpoint = env.endpoints.create_start(
"test_tenants_many",
tenant_id=tenant,
)
tenants_pgs.append((tenant, pg))
tenants_endpoints.append((tenant, endpoint))
asyncio.run(all_tenants_workload(env, tenants_pgs))
asyncio.run(all_tenants_workload(env, tenants_endpoints))
# Wait for the remote storage uploads to finish
pageserver_http = env.pageserver.http_client()
for tenant, pg in tenants_pgs:
res = pg.safe_psql_many(
for tenant, endpoint in tenants_endpoints:
res = endpoint.safe_psql_many(
["SHOW neon.tenant_id", "SHOW neon.timeline_id", "SELECT pg_current_wal_flush_lsn()"]
)
tenant_id = TenantId(res[0][0][0])
@@ -137,15 +137,15 @@ def test_tenants_attached_after_download(
)
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
client = env.pageserver.http_client()
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
for checkpoint_number in range(1, 3):
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute(
f"""
CREATE TABLE t{checkpoint_number}(id int primary key, secret text);
@@ -174,7 +174,7 @@ def test_tenants_attached_after_download(
)
##### Stop the pageserver, erase its layer file to force it being downloaded from S3
env.postgres.stop_all()
env.endpoints.stop_all()
wait_for_sk_commit_lsn_to_reach_remote_storage(
tenant_id, timeline_id, env.safekeepers, env.pageserver
@@ -244,12 +244,12 @@ def test_tenant_redownloads_truncated_file_on_startup(
env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
pageserver_http = env.pageserver.http_client()
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("CREATE TABLE t1 AS VALUES (123, 'foobar');")
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
@@ -257,7 +257,7 @@ def test_tenant_redownloads_truncated_file_on_startup(
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn)
env.postgres.stop_all()
env.endpoints.stop_all()
env.pageserver.stop()
timeline_dir = Path(env.repo_dir) / "tenants" / str(tenant_id) / "timelines" / str(timeline_id)
@@ -313,9 +313,9 @@ def test_tenant_redownloads_truncated_file_on_startup(
os.stat(remote_layer_path).st_size == expected_size
), "truncated file should not had been uploaded around re-download"
pg = env.postgres.create_start("main")
endpoint = env.endpoints.create_start("main")
with pg.cursor() as cur:
with endpoint.cursor() as cur:
cur.execute("INSERT INTO t1 VALUES (234, 'test data');")
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))

View File

@@ -12,11 +12,11 @@ import psycopg2.extras
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
NeonEnv,
NeonEnvBuilder,
PgBin,
PortDistributor,
Postgres,
RemoteStorageKind,
VanillaPostgres,
wait_for_last_flush_lsn,
@@ -38,10 +38,10 @@ def test_timeline_size(neon_simple_env: NeonEnv):
client = env.pageserver.http_client()
wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id)
pgmain = env.postgres.create_start("test_timeline_size")
endpoint_main = env.endpoints.create_start("test_timeline_size")
log.info("postgres is running on 'test_timeline_size' branch")
with closing(pgmain.connect()) as conn:
with closing(endpoint_main.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TABLE foo (t text)")
cur.execute(
@@ -74,10 +74,10 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True
)
pgmain = env.postgres.create_start("test_timeline_size_createdropdb")
endpoint_main = env.endpoints.create_start("test_timeline_size_createdropdb")
log.info("postgres is running on 'test_timeline_size_createdropdb' branch")
with closing(pgmain.connect()) as conn:
with closing(endpoint_main.connect()) as conn:
with conn.cursor() as cur:
res = client.timeline_detail(
env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True
@@ -89,7 +89,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
), "no writes should not change the incremental logical size"
cur.execute("CREATE DATABASE foodb")
with closing(pgmain.connect(dbname="foodb")) as conn:
with closing(endpoint_main.connect(dbname="foodb")) as conn:
with conn.cursor() as cur2:
cur2.execute("CREATE TABLE foo (t text)")
cur2.execute(
@@ -118,7 +118,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
# wait until received_lsn_lag is 0
def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60):
def wait_for_pageserver_catchup(endpoint_main: Endpoint, polling_interval=1, timeout=60):
started_at = time.time()
received_lsn_lag = 1
@@ -129,7 +129,7 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60
"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()"
)
res = pgmain.safe_psql(
res = endpoint_main.safe_psql(
"""
SELECT
pg_size_pretty(pg_cluster_size()),
@@ -150,20 +150,20 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id)
pgmain = env.postgres.create_start(
endpoint_main = env.endpoints.create_start(
"test_timeline_size_quota",
# Set small limit for the test
config_lines=["neon.max_cluster_size=30MB"],
)
log.info("postgres is running on 'test_timeline_size_quota' branch")
with closing(pgmain.connect()) as conn:
with closing(endpoint_main.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE EXTENSION neon") # TODO move it to neon_fixtures?
cur.execute("CREATE TABLE foo (t text)")
wait_for_pageserver_catchup(pgmain)
wait_for_pageserver_catchup(endpoint_main)
# Insert many rows. This query must fail because of space limit
try:
@@ -175,7 +175,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
"""
)
wait_for_pageserver_catchup(pgmain)
wait_for_pageserver_catchup(endpoint_main)
cur.execute(
"""
@@ -195,7 +195,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
# drop table to free space
cur.execute("DROP TABLE foo")
wait_for_pageserver_catchup(pgmain)
wait_for_pageserver_catchup(endpoint_main)
# create it again and insert some rows. This query must succeed
cur.execute("CREATE TABLE foo (t text)")
@@ -207,7 +207,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
"""
)
wait_for_pageserver_catchup(pgmain)
wait_for_pageserver_catchup(endpoint_main)
cur.execute("SELECT * from pg_size_pretty(pg_cluster_size())")
pg_cluster_size = cur.fetchone()
@@ -231,15 +231,15 @@ def test_timeline_initial_logical_size_calculation_cancellation(
tenant_id, timeline_id = env.neon_cli.create_tenant()
# load in some data
pg = env.postgres.create_start("main", tenant_id=tenant_id)
pg.safe_psql_many(
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
endpoint.safe_psql_many(
[
"CREATE TABLE foo (x INTEGER)",
"INSERT INTO foo SELECT g FROM generate_series(1, 10000) g",
]
)
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
pg.stop()
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
endpoint.stop()
# restart with failpoint inside initial size calculation task
env.pageserver.stop()
@@ -311,9 +311,9 @@ def test_timeline_physical_size_init(
env = neon_env_builder.init_start()
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_init")
pg = env.postgres.create_start("test_timeline_physical_size_init")
endpoint = env.endpoints.create_start("test_timeline_physical_size_init")
pg.safe_psql_many(
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
@@ -322,7 +322,7 @@ def test_timeline_physical_size_init(
]
)
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
# restart the pageserer to force calculating timeline's initial physical size
env.pageserver.stop()
@@ -355,9 +355,9 @@ def test_timeline_physical_size_post_checkpoint(
pageserver_http = env.pageserver.http_client()
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_checkpoint")
pg = env.postgres.create_start("test_timeline_physical_size_post_checkpoint")
endpoint = env.endpoints.create_start("test_timeline_physical_size_post_checkpoint")
pg.safe_psql_many(
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
@@ -366,7 +366,7 @@ def test_timeline_physical_size_post_checkpoint(
]
)
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
assert_physical_size_invariants(
@@ -394,7 +394,7 @@ def test_timeline_physical_size_post_compaction(
pageserver_http = env.pageserver.http_client()
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_compaction")
pg = env.postgres.create_start("test_timeline_physical_size_post_compaction")
endpoint = env.endpoints.create_start("test_timeline_physical_size_post_compaction")
# We don't want autovacuum to run on the table, while we are calculating the
# physical size, because that could cause a new layer to be created and a
@@ -402,7 +402,7 @@ def test_timeline_physical_size_post_compaction(
# happens, because of some other background activity or autovacuum on other
# tables, we could simply retry the size calculations. It's unlikely that
# that would happen more than once.)
pg.safe_psql_many(
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)",
"""INSERT INTO foo
@@ -411,7 +411,7 @@ def test_timeline_physical_size_post_compaction(
]
)
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
# shutdown safekeepers to prevent new data from coming in
for sk in env.safekeepers:
@@ -446,10 +446,10 @@ def test_timeline_physical_size_post_gc(
pageserver_http = env.pageserver.http_client()
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_gc")
pg = env.postgres.create_start("test_timeline_physical_size_post_gc")
endpoint = env.endpoints.create_start("test_timeline_physical_size_post_gc")
# Like in test_timeline_physical_size_post_compaction, disable autovacuum
pg.safe_psql_many(
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)",
"""INSERT INTO foo
@@ -458,10 +458,10 @@ def test_timeline_physical_size_post_gc(
]
)
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
pg.safe_psql(
endpoint.safe_psql(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
@@ -469,7 +469,7 @@ def test_timeline_physical_size_post_gc(
"""
)
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
pageserver_http.timeline_gc(env.initial_tenant, new_timeline_id, gc_horizon=None)
@@ -495,9 +495,9 @@ def test_timeline_size_metrics(
pageserver_http = env.pageserver.http_client()
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_metrics")
pg = env.postgres.create_start("test_timeline_size_metrics")
endpoint = env.endpoints.create_start("test_timeline_size_metrics")
pg.safe_psql_many(
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
@@ -506,7 +506,7 @@ def test_timeline_size_metrics(
]
)
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
# get the metrics and parse the metric for the current timeline's physical size
@@ -558,7 +558,7 @@ def test_timeline_size_metrics(
# The sum of the sizes of all databases, as seen by pg_database_size(), should also
# be close. Again allow some slack, the logical size metric includes some things like
# the SLRUs that are not included in pg_database_size().
dbsize_sum = pg.safe_psql("select sum(pg_database_size(oid)) from pg_database")[0][0]
dbsize_sum = endpoint.safe_psql("select sum(pg_database_size(oid)) from pg_database")[0][0]
assert math.isclose(dbsize_sum, tl_logical_size_metric, abs_tol=2 * 1024 * 1024)
@@ -592,16 +592,16 @@ def test_tenant_physical_size(
n_rows = random.randint(100, 1000)
timeline = env.neon_cli.create_branch(f"test_tenant_physical_size_{i}", tenant_id=tenant)
pg = env.postgres.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant)
endpoint = env.endpoints.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant)
pg.safe_psql_many(
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text)",
f"INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, {n_rows}) g",
]
)
wait_for_last_flush_lsn(env, pg, tenant, timeline)
wait_for_last_flush_lsn(env, endpoint, tenant, timeline)
pageserver_http.timeline_checkpoint(tenant, timeline)
if remote_storage_kind is not None:
@@ -609,7 +609,7 @@ def test_tenant_physical_size(
timeline_total_resident_physical_size += get_timeline_resident_physical_size(timeline)
pg.stop()
endpoint.stop()
# ensure that tenant_status current_physical size reports sum of timeline current_physical_size
tenant_current_physical_size = int(

View File

@@ -27,8 +27,8 @@ def test_truncate(neon_env_builder: NeonEnvBuilder, zenbenchmark):
)
env.neon_cli.create_timeline("test_truncate", tenant_id=tenant)
pg = env.postgres.create_start("test_truncate", tenant_id=tenant)
cur = pg.connect().cursor()
endpoint = env.endpoints.create_start("test_truncate", tenant_id=tenant)
cur = endpoint.connect().cursor()
cur.execute("create table t1(x integer)")
cur.execute(f"insert into t1 values (generate_series(1,{n_records}))")
cur.execute("vacuum t1")

View File

@@ -10,10 +10,12 @@ from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn
def test_twophase(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_twophase", "empty")
pg = env.postgres.create_start("test_twophase", config_lines=["max_prepared_transactions=5"])
endpoint = env.endpoints.create_start(
"test_twophase", config_lines=["max_prepared_transactions=5"]
)
log.info("postgres is running on 'test_twophase' branch")
conn = pg.connect()
conn = endpoint.connect()
cur = conn.cursor()
cur.execute("CREATE TABLE foo (t text)")
@@ -42,7 +44,7 @@ def test_twophase(neon_simple_env: NeonEnv):
# pg_twophase directory and fsynced
cur.execute("CHECKPOINT")
twophase_files = os.listdir(pg.pg_twophase_dir_path())
twophase_files = os.listdir(endpoint.pg_twophase_dir_path())
log.info(twophase_files)
assert len(twophase_files) == 4
@@ -50,25 +52,25 @@ def test_twophase(neon_simple_env: NeonEnv):
cur.execute("ROLLBACK PREPARED 'insert_four'")
cur.execute("CHECKPOINT")
twophase_files = os.listdir(pg.pg_twophase_dir_path())
twophase_files = os.listdir(endpoint.pg_twophase_dir_path())
log.info(twophase_files)
assert len(twophase_files) == 2
# Create a branch with the transaction in prepared state
fork_at_current_lsn(env, pg, "test_twophase_prepared", "test_twophase")
fork_at_current_lsn(env, endpoint, "test_twophase_prepared", "test_twophase")
# Start compute on the new branch
pg2 = env.postgres.create_start(
endpoint2 = env.endpoints.create_start(
"test_twophase_prepared",
config_lines=["max_prepared_transactions=5"],
)
# Check that we restored only needed twophase files
twophase_files2 = os.listdir(pg2.pg_twophase_dir_path())
twophase_files2 = os.listdir(endpoint2.pg_twophase_dir_path())
log.info(twophase_files2)
assert twophase_files2.sort() == twophase_files.sort()
conn2 = pg2.connect()
conn2 = endpoint2.connect()
cur2 = conn2.cursor()
# On the new branch, commit one of the prepared transactions,

View File

@@ -9,9 +9,9 @@ from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn
def test_unlogged(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_unlogged", "empty")
pg = env.postgres.create_start("test_unlogged")
endpoint = env.endpoints.create_start("test_unlogged")
conn = pg.connect()
conn = endpoint.connect()
cur = conn.cursor()
cur.execute("CREATE UNLOGGED TABLE iut (id int);")
@@ -20,12 +20,10 @@ def test_unlogged(neon_simple_env: NeonEnv):
cur.execute("INSERT INTO iut values (42);")
# create another compute to fetch inital empty contents from pageserver
fork_at_current_lsn(env, pg, "test_unlogged_basebackup", "test_unlogged")
pg2 = env.postgres.create_start(
"test_unlogged_basebackup",
)
fork_at_current_lsn(env, endpoint, "test_unlogged_basebackup", "test_unlogged")
endpoint2 = env.endpoints.create_start("test_unlogged_basebackup")
conn2 = pg2.connect()
conn2 = endpoint2.connect()
cur2 = conn2.cursor()
# after restart table should be empty but valid
cur2.execute("PREPARE iut_plan (int) AS INSERT INTO iut VALUES ($1)")

View File

@@ -10,10 +10,10 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_vm_bit_clear", "empty")
pg = env.postgres.create_start("test_vm_bit_clear")
endpoint = env.endpoints.create_start("test_vm_bit_clear")
log.info("postgres is running on 'test_vm_bit_clear' branch")
pg_conn = pg.connect()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
# Install extension containing function needed for test
@@ -33,7 +33,7 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
cur.execute("UPDATE vmtest_update SET id = 5000 WHERE id = 1")
# Branch at this point, to test that later
fork_at_current_lsn(env, pg, "test_vm_bit_clear_new", "test_vm_bit_clear")
fork_at_current_lsn(env, endpoint, "test_vm_bit_clear_new", "test_vm_bit_clear")
# Clear the buffer cache, to force the VM page to be re-fetched from
# the page server
@@ -63,10 +63,10 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
# a dirty VM page is evicted. If the VM bit was not correctly cleared by the
# earlier WAL record, the full-page image hides the problem. Starting a new
# server at the right point-in-time avoids that full-page image.
pg_new = env.postgres.create_start("test_vm_bit_clear_new")
endpoint_new = env.endpoints.create_start("test_vm_bit_clear_new")
log.info("postgres is running on 'test_vm_bit_clear_new' branch")
pg_new_conn = pg_new.connect()
pg_new_conn = endpoint_new.connect()
cur_new = pg_new_conn.cursor()
cur_new.execute(

View File

@@ -16,6 +16,7 @@ from typing import Any, List, Optional
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
NeonBroker,
NeonEnv,
NeonEnvBuilder,
@@ -23,7 +24,6 @@ from fixtures.neon_fixtures import (
PgBin,
PgProtocol,
PortDistributor,
Postgres,
RemoteStorageKind,
RemoteStorageUsers,
Safekeeper,
@@ -39,11 +39,11 @@ from fixtures.utils import get_dir_size, query_scalar, start_in_background
def wait_lsn_force_checkpoint(
tenant_id: TenantId,
timeline_id: TimelineId,
pg: Postgres,
endpoint: Endpoint,
ps: NeonPageserver,
pageserver_conn_options={},
):
lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
log.info(f"pg_current_wal_flush_lsn is {lsn}, waiting for it on pageserver")
auth_token = None
@@ -97,10 +97,10 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
branch_names_to_timeline_ids = {}
# start postgres on each timeline
pgs = []
endpoints = []
for branch_name in branch_names:
new_timeline_id = env.neon_cli.create_branch(branch_name)
pgs.append(env.postgres.create_start(branch_name))
endpoints.append(env.endpoints.create_start(branch_name))
branch_names_to_timeline_ids[branch_name] = new_timeline_id
tenant_id = env.initial_tenant
@@ -160,8 +160,8 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
# Do everything in different loops to have actions on different timelines
# interleaved.
# create schema
for pg in pgs:
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
for endpoint in endpoints:
endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)")
init_m = collect_metrics("after CREATE TABLE")
# Populate data for 2/3 timelines
@@ -197,16 +197,16 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
metrics_checker = MetricsChecker()
metrics_checker.start()
for pg in pgs[:-1]:
pg.safe_psql("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
for endpoint in endpoints[:-1]:
endpoint.safe_psql("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
metrics_checker.stop()
collect_metrics("after INSERT INTO")
# Check data for 2/3 timelines
for pg in pgs[:-1]:
res = pg.safe_psql("SELECT sum(key) FROM t")
for endpoint in endpoints[:-1]:
res = endpoint.safe_psql("SELECT sum(key) FROM t")
assert res[0] == (5000050000,)
final_m = collect_metrics("after SELECT")
@@ -233,11 +233,11 @@ def test_restarts(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_safekeepers_restarts")
pg = env.postgres.create_start("test_safekeepers_restarts")
endpoint = env.endpoints.create_start("test_safekeepers_restarts")
# we rely upon autocommit after each statement
# as waiting for acceptors happens there
pg_conn = pg.connect()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
failed_node = None
@@ -268,22 +268,22 @@ def test_broker(neon_env_builder: NeonEnvBuilder):
".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
)
pg = env.postgres.create_start("test_broker")
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
endpoint = env.endpoints.create_start("test_broker")
endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)")
# learn neon timeline from compute
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
# wait until remote_consistent_lsn gets advanced on all safekeepers
clients = [sk.http_client() for sk in env.safekeepers]
stat_before = [cli.timeline_status(tenant_id, timeline_id) for cli in clients]
log.info(f"statuses is {stat_before}")
pg.safe_psql("INSERT INTO t SELECT generate_series(1,100), 'payload'")
endpoint.safe_psql("INSERT INTO t SELECT generate_series(1,100), 'payload'")
# force checkpoint in pageserver to advance remote_consistent_lsn
wait_lsn_force_checkpoint(tenant_id, timeline_id, pg, env.pageserver)
wait_lsn_force_checkpoint(tenant_id, timeline_id, endpoint, env.pageserver)
# and wait till remote_consistent_lsn propagates to all safekeepers
started_at = time.time()
@@ -317,26 +317,28 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
)
env.neon_cli.create_branch("test_safekeepers_wal_removal")
pg = env.postgres.create_start("test_safekeepers_wal_removal")
endpoint = env.endpoints.create_start("test_safekeepers_wal_removal")
# Note: it is important to insert at least two segments, as currently
# control file is synced roughly once in segment range and WAL is not
# removed until all horizons are persisted.
pg.safe_psql_many(
endpoint.safe_psql_many(
[
"CREATE TABLE t(key int primary key, value text)",
"INSERT INTO t SELECT generate_series(1,200000), 'payload'",
]
)
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
# force checkpoint to advance remote_consistent_lsn
pageserver_conn_options = {}
if auth_enabled:
pageserver_conn_options["password"] = env.auth_keys.generate_tenant_token(tenant_id)
wait_lsn_force_checkpoint(tenant_id, timeline_id, pg, env.pageserver, pageserver_conn_options)
wait_lsn_force_checkpoint(
tenant_id, timeline_id, endpoint, env.pageserver, pageserver_conn_options
)
# We will wait for first segment removal. Make sure they exist for starter.
first_segments = [
@@ -436,13 +438,13 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Remot
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_safekeepers_wal_backup")
pg = env.postgres.create_start("test_safekeepers_wal_backup")
endpoint = env.endpoints.create_start("test_safekeepers_wal_backup")
# learn neon timeline from compute
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
pg_conn = pg.connect()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
cur.execute("create table t(key int, value text)")
@@ -465,9 +467,9 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Remot
# put one of safekeepers down again
env.safekeepers[0].stop()
# restart postgres
pg.stop_and_destroy().create_start("test_safekeepers_wal_backup")
endpoint.stop_and_destroy().create_start("test_safekeepers_wal_backup")
# and ensure offloading still works
with closing(pg.connect()) as conn:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("insert into t select generate_series(1,250000), 'payload'")
seg_end = Lsn("0/5000000")
@@ -491,15 +493,15 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_s3_wal_replay")
pg = env.postgres.create_start("test_s3_wal_replay")
endpoint = env.endpoints.create_start("test_s3_wal_replay")
# learn neon timeline from compute
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
expected_sum = 0
with closing(pg.connect()) as conn:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("create table t(key int, value text)")
cur.execute("insert into t values (1, 'payload')")
@@ -547,7 +549,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
f"Pageserver last_record_lsn={pageserver_lsn}; flush_lsn={last_lsn}; lag before replay is {lag / 1024}kb"
)
pg.stop_and_destroy()
endpoint.stop_and_destroy()
ps_cli.timeline_delete(tenant_id, timeline_id)
# Also delete and manually create timeline on safekeepers -- this tests
@@ -609,9 +611,9 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
log.info(f"WAL redo took {elapsed} s")
# verify data
pg.create_start("test_s3_wal_replay")
endpoint.create_start("test_s3_wal_replay")
assert pg.safe_psql("select sum(key) from t")[0][0] == expected_sum
assert endpoint.safe_psql("select sum(key) from t")[0][0] == expected_sum
class ProposerPostgres(PgProtocol):
@@ -762,13 +764,13 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_timeline_status")
pg = env.postgres.create_start("test_timeline_status")
endpoint = env.endpoints.create_start("test_timeline_status")
wa = env.safekeepers[0]
# learn neon timeline from compute
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
if not auth_enabled:
wa_http_cli = wa.http_client()
@@ -806,11 +808,11 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
assert debug_dump_0["timelines_count"] == 1
assert debug_dump_0["timelines"][0]["timeline_id"] == str(timeline_id)
pg.safe_psql("create table t(i int)")
endpoint.safe_psql("create table t(i int)")
# ensure epoch goes up after reboot
pg.stop().start()
pg.safe_psql("insert into t values(10)")
endpoint.stop().start()
endpoint.safe_psql("insert into t values(10)")
tli_status = wa_http_cli.timeline_status(tenant_id, timeline_id)
epoch_after_reboot = tli_status.acceptor_epoch
@@ -992,8 +994,8 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
def safekeepers_guc(env: NeonEnv, sk_names: List[int]) -> str:
return ",".join([f"localhost:{sk.port.pg}" for sk in env.safekeepers if sk.id in sk_names])
def execute_payload(pg: Postgres):
with closing(pg.connect()) as conn:
def execute_payload(endpoint: Endpoint):
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
# we rely upon autocommit after each statement
# as waiting for acceptors happens there
@@ -1021,26 +1023,26 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
log.info("Use only first 3 safekeepers")
env.safekeepers[3].stop()
active_safekeepers = [1, 2, 3]
pg = env.postgres.create("test_replace_safekeeper")
pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
pg.start()
endpoint = env.endpoints.create("test_replace_safekeeper")
endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
endpoint.start()
# learn neon timeline from compute
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
execute_payload(pg)
execute_payload(endpoint)
show_statuses(env.safekeepers, tenant_id, timeline_id)
log.info("Restart all safekeepers to flush everything")
env.safekeepers[0].stop(immediate=True)
execute_payload(pg)
execute_payload(endpoint)
env.safekeepers[0].start()
env.safekeepers[1].stop(immediate=True)
execute_payload(pg)
execute_payload(endpoint)
env.safekeepers[1].start()
env.safekeepers[2].stop(immediate=True)
execute_payload(pg)
execute_payload(endpoint)
env.safekeepers[2].start()
env.safekeepers[0].stop(immediate=True)
@@ -1050,27 +1052,27 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
env.safekeepers[1].start()
env.safekeepers[2].start()
execute_payload(pg)
execute_payload(endpoint)
show_statuses(env.safekeepers, tenant_id, timeline_id)
log.info("Stop sk1 (simulate failure) and use only quorum of sk2 and sk3")
env.safekeepers[0].stop(immediate=True)
execute_payload(pg)
execute_payload(endpoint)
show_statuses(env.safekeepers, tenant_id, timeline_id)
log.info("Recreate postgres to replace failed sk1 with new sk4")
pg.stop_and_destroy().create("test_replace_safekeeper")
endpoint.stop_and_destroy().create("test_replace_safekeeper")
active_safekeepers = [2, 3, 4]
env.safekeepers[3].start()
pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
pg.start()
endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
endpoint.start()
execute_payload(pg)
execute_payload(endpoint)
show_statuses(env.safekeepers, tenant_id, timeline_id)
log.info("Stop sk2 to require quorum of sk3 and sk4 for normal work")
env.safekeepers[1].stop(immediate=True)
execute_payload(pg)
execute_payload(endpoint)
show_statuses(env.safekeepers, tenant_id, timeline_id)
@@ -1082,13 +1084,13 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder):
last_lsn = Lsn(0)
# returns pg_wal size in MB
def collect_stats(pg: Postgres, cur, enable_logs=True):
def collect_stats(endpoint: Endpoint, cur, enable_logs=True):
nonlocal last_lsn
assert pg.pgdata_dir is not None
assert endpoint.pgdata_dir is not None
log.info("executing INSERT to generate WAL")
current_lsn = Lsn(query_scalar(cur, "select pg_current_wal_lsn()"))
pg_wal_size_mb = get_dir_size(os.path.join(pg.pgdata_dir, "pg_wal")) / 1024 / 1024
pg_wal_size_mb = get_dir_size(os.path.join(endpoint.pgdata_dir, "pg_wal")) / 1024 / 1024
if enable_logs:
lsn_delta_mb = (current_lsn - last_lsn) / 1024 / 1024
log.info(f"LSN delta: {lsn_delta_mb} MB, current WAL size: {pg_wal_size_mb} MB")
@@ -1104,25 +1106,25 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder):
env.neon_cli.create_branch("test_wal_deleted_after_broadcast")
# Adjust checkpoint config to prevent keeping old WAL segments
pg = env.postgres.create_start(
endpoint = env.endpoints.create_start(
"test_wal_deleted_after_broadcast",
config_lines=["min_wal_size=32MB", "max_wal_size=32MB", "log_checkpoints=on"],
)
pg_conn = pg.connect()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
cur.execute("CREATE TABLE t(key int, value text)")
collect_stats(pg, cur)
collect_stats(endpoint, cur)
# generate WAL to simulate normal workload
for i in range(5):
generate_wal(cur)
collect_stats(pg, cur)
collect_stats(endpoint, cur)
log.info("executing checkpoint")
cur.execute("CHECKPOINT")
wal_size_after_checkpoint = collect_stats(pg, cur)
wal_size_after_checkpoint = collect_stats(endpoint, cur)
# there shouldn't be more than 2 WAL segments (but dir may have archive_status files)
assert wal_size_after_checkpoint < 16 * 2.5
@@ -1151,13 +1153,13 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
tenant_id_other, timeline_id_other = env.neon_cli.create_tenant()
# Populate branches
pg_1 = env.postgres.create_start("br1")
pg_2 = env.postgres.create_start("br2")
pg_3 = env.postgres.create_start("br3")
pg_4 = env.postgres.create_start("br4")
pg_other = env.postgres.create_start("main", tenant_id=tenant_id_other)
for pg in [pg_1, pg_2, pg_3, pg_4, pg_other]:
with closing(pg.connect()) as conn:
endpoint_1 = env.endpoints.create_start("br1")
endpoint_2 = env.endpoints.create_start("br2")
endpoint_3 = env.endpoints.create_start("br3")
endpoint_4 = env.endpoints.create_start("br4")
endpoint_other = env.endpoints.create_start("main", tenant_id=tenant_id_other)
for endpoint in [endpoint_1, endpoint_2, endpoint_3, endpoint_4, endpoint_other]:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TABLE t(key int primary key)")
sk = env.safekeepers[0]
@@ -1178,14 +1180,14 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir()
# Stop branches which should be inactive and restart Safekeeper to drop its in-memory state.
pg_2.stop_and_destroy()
pg_4.stop_and_destroy()
endpoint_2.stop_and_destroy()
endpoint_4.stop_and_destroy()
sk.stop()
sk.start()
# Ensure connections to Safekeeper are established
for pg in [pg_1, pg_3, pg_other]:
with closing(pg.connect()) as conn:
for endpoint in [endpoint_1, endpoint_3, endpoint_other]:
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
cur.execute("INSERT INTO t (key) VALUES (1)")
@@ -1244,6 +1246,6 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
# Ensure the other tenant still works
sk_http_other.timeline_status(tenant_id_other, timeline_id_other)
with closing(pg_other.connect()) as conn:
with closing(endpoint_other.connect()) as conn:
with conn.cursor() as cur:
cur.execute("INSERT INTO t (key) VALUES (123)")

View File

@@ -6,7 +6,7 @@ from typing import List, Optional
import asyncpg
from fixtures.log_helper import getLogger
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper
from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder, Safekeeper
from fixtures.types import Lsn, TenantId, TimelineId
log = getLogger("root.safekeeper_async")
@@ -82,8 +82,10 @@ class WorkerStats(object):
log.info("All workers made {} transactions".format(progress))
async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accounts, max_transfer):
pg_conn = await pg.connect_async()
async def run_random_worker(
stats: WorkerStats, endpoint: Endpoint, worker_id, n_accounts, max_transfer
):
pg_conn = await endpoint.connect_async()
log.debug("Started worker {}".format(worker_id))
while stats.running:
@@ -141,7 +143,7 @@ async def wait_for_lsn(
# consistent.
async def run_restarts_under_load(
env: NeonEnv,
pg: Postgres,
endpoint: Endpoint,
acceptors: List[Safekeeper],
n_workers=10,
n_accounts=100,
@@ -154,7 +156,7 @@ async def run_restarts_under_load(
# taking into account that this timeout is checked only at the beginning of every iteration.
test_timeout_at = time.monotonic() + 5 * 60
pg_conn = await pg.connect_async()
pg_conn = await endpoint.connect_async()
tenant_id = TenantId(await pg_conn.fetchval("show neon.tenant_id"))
timeline_id = TimelineId(await pg_conn.fetchval("show neon.timeline_id"))
@@ -165,7 +167,7 @@ async def run_restarts_under_load(
stats = WorkerStats(n_workers)
workers = []
for worker_id in range(n_workers):
worker = run_random_worker(stats, pg, worker_id, bank.n_accounts, max_transfer)
worker = run_random_worker(stats, endpoint, worker_id, bank.n_accounts, max_transfer)
workers.append(asyncio.create_task(worker))
for it in range(iterations):
@@ -212,11 +214,11 @@ def test_restarts_under_load(neon_env_builder: NeonEnvBuilder):
env.neon_cli.create_branch("test_safekeepers_restarts_under_load")
# Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
pg = env.postgres.create_start(
endpoint = env.endpoints.create_start(
"test_safekeepers_restarts_under_load", config_lines=["max_replication_write_lag=1MB"]
)
asyncio.run(run_restarts_under_load(env, pg, env.safekeepers))
asyncio.run(run_restarts_under_load(env, endpoint, env.safekeepers))
# Restart acceptors one by one and test that everything is working as expected
@@ -228,7 +230,7 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder):
env.neon_cli.create_branch("test_restarts_frequent_checkpoints")
# Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
pg = env.postgres.create_start(
endpoint = env.endpoints.create_start(
"test_restarts_frequent_checkpoints",
config_lines=[
"max_replication_write_lag=1MB",
@@ -240,11 +242,13 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder):
# we try to simulate large (flush_lsn - truncate_lsn) lag, to test that WAL segments
# are not removed before broadcasted to all safekeepers, with the help of replication slot
asyncio.run(run_restarts_under_load(env, pg, env.safekeepers, period_time=15, iterations=5))
asyncio.run(
run_restarts_under_load(env, endpoint, env.safekeepers, period_time=15, iterations=5)
)
def postgres_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]):
pg = Postgres(
def endpoint_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]):
endpoint = Endpoint(
env,
tenant_id=env.initial_tenant,
port=env.port_distributor.get_port(),
@@ -253,19 +257,19 @@ def postgres_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]):
check_stop_result=False,
)
# embed current time in node name
node_name = pgdir_name or f"pg_node_{time.time()}"
return pg.create_start(
branch_name=branch, node_name=node_name, config_lines=["log_statement=all"]
# embed current time in endpoint ID
endpoint_id = pgdir_name or f"ep-{time.time()}"
return endpoint.create_start(
branch_name=branch, endpoint_id=endpoint_id, config_lines=["log_statement=all"]
)
async def exec_compute_query(
env: NeonEnv, branch: str, query: str, pgdir_name: Optional[str] = None
):
with postgres_create_start(env, branch=branch, pgdir_name=pgdir_name) as pg:
with endpoint_create_start(env, branch=branch, pgdir_name=pgdir_name) as endpoint:
before_conn = time.time()
conn = await pg.connect_async()
conn = await endpoint.connect_async()
res = await conn.fetch(query)
await conn.close()
after_conn = time.time()
@@ -436,8 +440,8 @@ async def check_unavailability(
assert bg_query.done()
async def run_unavailability(env: NeonEnv, pg: Postgres):
conn = await pg.connect_async()
async def run_unavailability(env: NeonEnv, endpoint: Endpoint):
conn = await endpoint.connect_async()
# check basic work with table
await conn.execute("CREATE TABLE t(key int primary key, value text)")
@@ -462,9 +466,9 @@ def test_unavailability(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_safekeepers_unavailability")
pg = env.postgres.create_start("test_safekeepers_unavailability")
endpoint = env.endpoints.create_start("test_safekeepers_unavailability")
asyncio.run(run_unavailability(env, pg))
asyncio.run(run_unavailability(env, endpoint))
@dataclass
@@ -493,8 +497,8 @@ async def xmas_garland(safekeepers: List[Safekeeper], data: RaceConditionTest):
await asyncio.sleep(1)
async def run_race_conditions(env: NeonEnv, pg: Postgres):
conn = await pg.connect_async()
async def run_race_conditions(env: NeonEnv, endpoint: Endpoint):
conn = await endpoint.connect_async()
await conn.execute("CREATE TABLE t(key int primary key, value text)")
data = RaceConditionTest(0, False)
@@ -525,14 +529,14 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_safekeepers_race_conditions")
pg = env.postgres.create_start("test_safekeepers_race_conditions")
endpoint = env.endpoints.create_start("test_safekeepers_race_conditions")
asyncio.run(run_race_conditions(env, pg))
asyncio.run(run_race_conditions(env, endpoint))
# Check that pageserver can select safekeeper with largest commit_lsn
# and switch if LSN is not updated for some time (NoWalTimeout).
async def run_wal_lagging(env: NeonEnv, pg: Postgres):
async def run_wal_lagging(env: NeonEnv, endpoint: Endpoint):
def safekeepers_guc(env: NeonEnv, active_sk: List[bool]) -> str:
# use ports 10, 11 and 12 to simulate unavailable safekeepers
return ",".join(
@@ -542,10 +546,10 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres):
]
)
conn = await pg.connect_async()
conn = await endpoint.connect_async()
await conn.execute("CREATE TABLE t(key int primary key, value text)")
await conn.close()
pg.stop()
endpoint.stop()
n_iterations = 20
n_txes = 10000
@@ -561,11 +565,11 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres):
it -= 1
continue
pg.adjust_for_safekeepers(safekeepers_guc(env, active_sk))
endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_sk))
log.info(f"Iteration {it}: {active_sk}")
pg.start()
conn = await pg.connect_async()
endpoint.start()
conn = await endpoint.connect_async()
for _ in range(n_txes):
await conn.execute(f"INSERT INTO t values ({i}, 'payload')")
@@ -573,11 +577,11 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres):
i += 1
await conn.close()
pg.stop()
endpoint.stop()
pg.adjust_for_safekeepers(safekeepers_guc(env, [True] * len(env.safekeepers)))
pg.start()
conn = await pg.connect_async()
endpoint.adjust_for_safekeepers(safekeepers_guc(env, [True] * len(env.safekeepers)))
endpoint.start()
conn = await endpoint.connect_async()
log.info(f"Executed {i-1} queries")
@@ -591,6 +595,6 @@ def test_wal_lagging(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_wal_lagging")
pg = env.postgres.create_start("test_wal_lagging")
endpoint = env.endpoints.create_start("test_wal_lagging")
asyncio.run(run_wal_lagging(env, pg))
asyncio.run(run_wal_lagging(env, endpoint))

View File

@@ -19,9 +19,9 @@ def test_wal_restore(
):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_wal_restore")
pg = env.postgres.create_start("test_wal_restore")
pg.safe_psql("create table t as select generate_series(1,300000)")
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
endpoint = env.endpoints.create_start("test_wal_restore")
endpoint.safe_psql("create table t as select generate_series(1,300000)")
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
env.neon_cli.pageserver_stop()
port = port_distributor.get_port()
data_dir = test_output_dir / "pgsql.restored"

View File

@@ -45,9 +45,9 @@ def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder):
# assert tenant exists on disk
assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
pg = env.postgres.create_start("main", tenant_id=tenant_id)
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
pg_conn = pg.connect()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
# Create table, and insert some rows. Make it big enough that it doesn't fit in

View File

@@ -24,7 +24,7 @@ def test_broken(neon_simple_env: NeonEnv, pg_bin):
env = neon_simple_env
env.neon_cli.create_branch("test_broken", "empty")
env.postgres.create_start("test_broken")
env.endpoints.create_start("test_broken")
log.info("postgres is running")
log.info("THIS NEXT COMMAND WILL FAIL:")