diff --git a/README.md b/README.md index 55df67f6c7..810937aff7 100644 --- a/README.md +++ b/README.md @@ -147,15 +147,15 @@ Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one # start postgres compute node -> ./target/debug/neon_local pg start main -Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ... +> ./target/debug/neon_local endpoint start main +Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ... Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432 -Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres' +Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres' # check list of running postgres instances -> ./target/debug/neon_local pg list - NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS - main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running +> ./target/debug/neon_local endpoint list + ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS + main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running ``` 2. Now, it is possible to connect to postgres and run some queries: @@ -184,14 +184,14 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant: (L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601] # start postgres on that branch -> ./target/debug/neon_local pg start migration_check --branch-name migration_check -Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ... +> ./target/debug/neon_local endpoint start migration_check --branch-name migration_check +Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ... Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433 -Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres' +Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres' # check the new list of running postgres instances -> ./target/debug/neon_local pg list - NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS +> ./target/debug/neon_local endpoint list + ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running migration_check 127.0.0.1:55433 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index a9b66f479a..665cad8783 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -7,7 +7,7 @@ //! use anyhow::{anyhow, bail, Context, Result}; use clap::{value_parser, Arg, ArgAction, ArgMatches, Command}; -use control_plane::compute::ComputeControlPlane; +use control_plane::endpoint::ComputeControlPlane; use control_plane::local_env::LocalEnv; use control_plane::pageserver::PageServerNode; use control_plane::safekeeper::SafekeeperNode; @@ -106,8 +106,9 @@ fn main() -> Result<()> { "start" => handle_start_all(sub_args, &env), "stop" => handle_stop_all(sub_args, &env), "pageserver" => handle_pageserver(sub_args, &env), - "pg" => handle_pg(sub_args, &env), "safekeeper" => handle_safekeeper(sub_args, &env), + "endpoint" => handle_endpoint(sub_args, &env), + "pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"), _ => bail!("unexpected subcommand {sub_name}"), }; @@ -470,10 +471,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) - let mut cplane = ComputeControlPlane::load(env.clone())?; println!("Importing timeline into pageserver ..."); pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)?; - println!("Creating node for imported timeline ..."); env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?; - cplane.new_node(tenant_id, name, timeline_id, None, None, pg_version)?; + println!("Creating endpoint for imported timeline ..."); + cplane.new_endpoint(tenant_id, name, timeline_id, None, None, pg_version)?; println!("Done"); } Some(("branch", branch_match)) => { @@ -521,10 +522,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) - Ok(()) } -fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { - let (sub_name, sub_args) = match pg_match.subcommand() { - Some(pg_subcommand_data) => pg_subcommand_data, - None => bail!("no pg subcommand provided"), +fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { + let (sub_name, sub_args) = match ep_match.subcommand() { + Some(ep_subcommand_data) => ep_subcommand_data, + None => bail!("no endpoint subcommand provided"), }; let mut cplane = ComputeControlPlane::load(env.clone())?; @@ -546,7 +547,7 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { table.load_preset(comfy_table::presets::NOTHING); table.set_header([ - "NODE", + "ENDPOINT", "ADDRESS", "TIMELINE", "BRANCH NAME", @@ -554,39 +555,39 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { "STATUS", ]); - for ((_, node_name), node) in cplane - .nodes + for (endpoint_id, endpoint) in cplane + .endpoints .iter() - .filter(|((node_tenant_id, _), _)| node_tenant_id == &tenant_id) + .filter(|(_, endpoint)| endpoint.tenant_id == tenant_id) { - let lsn_str = match node.lsn { + let lsn_str = match endpoint.lsn { None => { - // -> primary node + // -> primary endpoint // Use the LSN at the end of the timeline. timeline_infos - .get(&node.timeline_id) + .get(&endpoint.timeline_id) .map(|bi| bi.last_record_lsn.to_string()) .unwrap_or_else(|| "?".to_string()) } Some(lsn) => { - // -> read-only node - // Use the node's LSN. + // -> read-only endpoint + // Use the endpoint's LSN. lsn.to_string() } }; let branch_name = timeline_name_mappings - .get(&TenantTimelineId::new(tenant_id, node.timeline_id)) + .get(&TenantTimelineId::new(tenant_id, endpoint.timeline_id)) .map(|name| name.as_str()) .unwrap_or("?"); table.add_row([ - node_name.as_str(), - &node.address.to_string(), - &node.timeline_id.to_string(), + endpoint_id.as_str(), + &endpoint.address.to_string(), + &endpoint.timeline_id.to_string(), branch_name, lsn_str.as_str(), - node.status(), + endpoint.status(), ]); } @@ -597,10 +598,10 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { .get_one::("branch-name") .map(|s| s.as_str()) .unwrap_or(DEFAULT_BRANCH_NAME); - let node_name = sub_args - .get_one::("node") - .map(|node_name| node_name.to_string()) - .unwrap_or_else(|| format!("{branch_name}_node")); + let endpoint_id = sub_args + .get_one::("endpoint_id") + .map(String::to_string) + .unwrap_or_else(|| format!("ep-{branch_name}")); let lsn = sub_args .get_one::("lsn") @@ -618,15 +619,15 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { .copied() .context("Failed to parse postgres version from the argument string")?; - cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port, pg_version)?; + cplane.new_endpoint(tenant_id, &endpoint_id, timeline_id, lsn, port, pg_version)?; } "start" => { let port: Option = sub_args.get_one::("port").copied(); - let node_name = sub_args - .get_one::("node") - .ok_or_else(|| anyhow!("No node name was provided to start"))?; + let endpoint_id = sub_args + .get_one::("endpoint_id") + .ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?; - let node = cplane.nodes.get(&(tenant_id, node_name.to_string())); + let endpoint = cplane.endpoints.get(endpoint_id.as_str()); let auth_token = if matches!(env.pageserver.pg_auth_type, AuthType::NeonJWT) { let claims = Claims::new(Some(tenant_id), Scope::Tenant); @@ -636,9 +637,9 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { None }; - if let Some(node) = node { - println!("Starting existing postgres {node_name}..."); - node.start(&auth_token)?; + if let Some(endpoint) = endpoint { + println!("Starting existing endpoint {endpoint_id}..."); + endpoint.start(&auth_token)?; } else { let branch_name = sub_args .get_one::("branch-name") @@ -663,27 +664,33 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { // start --port X // stop // start <-- will also use port X even without explicit port argument - println!("Starting new postgres (v{pg_version}) {node_name} on timeline {timeline_id} ..."); + println!("Starting new endpoint {endpoint_id} (PostgreSQL v{pg_version}) on timeline {timeline_id} ..."); - let node = - cplane.new_node(tenant_id, node_name, timeline_id, lsn, port, pg_version)?; - node.start(&auth_token)?; + let ep = cplane.new_endpoint( + tenant_id, + endpoint_id, + timeline_id, + lsn, + port, + pg_version, + )?; + ep.start(&auth_token)?; } } "stop" => { - let node_name = sub_args - .get_one::("node") - .ok_or_else(|| anyhow!("No node name was provided to stop"))?; + let endpoint_id = sub_args + .get_one::("endpoint_id") + .ok_or_else(|| anyhow!("No endpoint ID was provided to stop"))?; let destroy = sub_args.get_flag("destroy"); - let node = cplane - .nodes - .get(&(tenant_id, node_name.to_string())) - .with_context(|| format!("postgres {node_name} is not found"))?; - node.stop(destroy)?; + let endpoint = cplane + .endpoints + .get(endpoint_id.as_str()) + .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?; + endpoint.stop(destroy)?; } - _ => bail!("Unexpected pg subcommand '{sub_name}'"), + _ => bail!("Unexpected endpoint subcommand '{sub_name}'"), } Ok(()) @@ -802,7 +809,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul } fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> { - // Postgres nodes are not started automatically + // Endpoints are not started automatically broker::start_broker_process(env)?; @@ -836,10 +843,10 @@ fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result< fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) { let pageserver = PageServerNode::from_env(env); - // Stop all compute nodes + // Stop all endpoints match ComputeControlPlane::load(env.clone()) { Ok(cplane) => { - for (_k, node) in cplane.nodes { + for (_k, node) in cplane.endpoints { if let Err(e) = node.stop(false) { eprintln!("postgres stop failed: {e:#}"); } @@ -872,7 +879,9 @@ fn cli() -> Command { .help("Name of the branch to be created or used as an alias for other services") .required(false); - let pg_node_arg = Arg::new("node").help("Postgres node name").required(false); + let endpoint_id_arg = Arg::new("endpoint_id") + .help("Postgres endpoint id") + .required(false); let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false); @@ -1026,27 +1035,27 @@ fn cli() -> Command { ) ) .subcommand( - Command::new("pg") + Command::new("endpoint") .arg_required_else_help(true) .about("Manage postgres instances") .subcommand(Command::new("list").arg(tenant_id_arg.clone())) .subcommand(Command::new("create") - .about("Create a postgres compute node") - .arg(pg_node_arg.clone()) + .about("Create a compute endpoint") + .arg(endpoint_id_arg.clone()) .arg(branch_name_arg.clone()) .arg(tenant_id_arg.clone()) .arg(lsn_arg.clone()) .arg(port_arg.clone()) .arg( Arg::new("config-only") - .help("Don't do basebackup, create compute node with only config files") + .help("Don't do basebackup, create endpoint directory with only config files") .long("config-only") .required(false)) .arg(pg_version_arg.clone()) ) .subcommand(Command::new("start") - .about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files") - .arg(pg_node_arg.clone()) + .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.") + .arg(endpoint_id_arg.clone()) .arg(tenant_id_arg.clone()) .arg(branch_name_arg) .arg(timeline_id_arg) @@ -1056,7 +1065,7 @@ fn cli() -> Command { ) .subcommand( Command::new("stop") - .arg(pg_node_arg) + .arg(endpoint_id_arg) .arg(tenant_id_arg) .arg( Arg::new("destroy") @@ -1068,6 +1077,13 @@ fn cli() -> Command { ) ) + // Obsolete old name for 'endpoint'. We now just print an error if it's used. + .subcommand( + Command::new("pg") + .hide(true) + .arg(Arg::new("ignore-rest").allow_hyphen_values(true).num_args(0..).required(false)) + .trailing_var_arg(true) + ) .subcommand( Command::new("start") .about("Start page server and safekeepers") diff --git a/control_plane/src/compute.rs b/control_plane/src/endpoint.rs similarity index 88% rename from control_plane/src/compute.rs rename to control_plane/src/endpoint.rs index bc81107706..9e85138e68 100644 --- a/control_plane/src/compute.rs +++ b/control_plane/src/endpoint.rs @@ -25,54 +25,45 @@ use crate::postgresql_conf::PostgresConf; // pub struct ComputeControlPlane { base_port: u16, - pageserver: Arc, - pub nodes: BTreeMap<(TenantId, String), Arc>, + + // endpoint ID is the key + pub endpoints: BTreeMap>, + env: LocalEnv, + pageserver: Arc, } impl ComputeControlPlane { - // Load current nodes with ports from data directories on disk - // Directory structure has the following layout: - // pgdatadirs - // |- tenants - // | |- - // | | |- + // Load current endpoints from the endpoints/ subdirectories pub fn load(env: LocalEnv) -> Result { let pageserver = Arc::new(PageServerNode::from_env(&env)); - let mut nodes = BTreeMap::default(); - let pgdatadirspath = &env.pg_data_dirs_path(); - - for tenant_dir in fs::read_dir(pgdatadirspath) - .with_context(|| format!("failed to list {}", pgdatadirspath.display()))? + let mut endpoints = BTreeMap::default(); + for endpoint_dir in fs::read_dir(env.endpoints_path()) + .with_context(|| format!("failed to list {}", env.endpoints_path().display()))? { - let tenant_dir = tenant_dir?; - for timeline_dir in fs::read_dir(tenant_dir.path()) - .with_context(|| format!("failed to list {}", tenant_dir.path().display()))? - { - let node = PostgresNode::from_dir_entry(timeline_dir?, &env, &pageserver)?; - nodes.insert((node.tenant_id, node.name.clone()), Arc::new(node)); - } + let ep = Endpoint::from_dir_entry(endpoint_dir?, &env, &pageserver)?; + endpoints.insert(ep.name.clone(), Arc::new(ep)); } Ok(ComputeControlPlane { base_port: 55431, - pageserver, - nodes, + endpoints, env, + pageserver, }) } fn get_port(&mut self) -> u16 { 1 + self - .nodes + .endpoints .values() - .map(|node| node.address.port()) + .map(|ep| ep.address.port()) .max() .unwrap_or(self.base_port) } - pub fn new_node( + pub fn new_endpoint( &mut self, tenant_id: TenantId, name: &str, @@ -80,9 +71,9 @@ impl ComputeControlPlane { lsn: Option, port: Option, pg_version: u32, - ) -> Result> { + ) -> Result> { let port = port.unwrap_or_else(|| self.get_port()); - let node = Arc::new(PostgresNode { + let ep = Arc::new(Endpoint { name: name.to_owned(), address: SocketAddr::new("127.0.0.1".parse().unwrap(), port), env: self.env.clone(), @@ -93,39 +84,45 @@ impl ComputeControlPlane { pg_version, }); - node.create_pgdata()?; - node.setup_pg_conf()?; + ep.create_pgdata()?; + ep.setup_pg_conf()?; - self.nodes - .insert((tenant_id, node.name.clone()), Arc::clone(&node)); + self.endpoints.insert(ep.name.clone(), Arc::clone(&ep)); - Ok(node) + Ok(ep) } } /////////////////////////////////////////////////////////////////////////////// #[derive(Debug)] -pub struct PostgresNode { - pub address: SocketAddr, +pub struct Endpoint { + /// used as the directory name name: String, + pub tenant_id: TenantId, + pub timeline_id: TimelineId, + // Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary. + pub lsn: Option, + + // port and address of the Postgres server + pub address: SocketAddr, + pg_version: u32, + + // These are not part of the endpoint as such, but the environment + // the endpoint runs in. pub env: LocalEnv, pageserver: Arc, - pub timeline_id: TimelineId, - pub lsn: Option, // if it's a read-only node. None for primary - pub tenant_id: TenantId, - pg_version: u32, } -impl PostgresNode { +impl Endpoint { fn from_dir_entry( entry: std::fs::DirEntry, env: &LocalEnv, pageserver: &Arc, - ) -> Result { + ) -> Result { if !entry.file_type()?.is_dir() { anyhow::bail!( - "PostgresNode::from_dir_entry failed: '{}' is not a directory", + "Endpoint::from_dir_entry failed: '{}' is not a directory", entry.path().display() ); } @@ -135,7 +132,7 @@ impl PostgresNode { let name = fname.to_str().unwrap().to_string(); // Read config file into memory - let cfg_path = entry.path().join("postgresql.conf"); + let cfg_path = entry.path().join("pgdata").join("postgresql.conf"); let cfg_path_str = cfg_path.to_string_lossy(); let mut conf_file = File::open(&cfg_path) .with_context(|| format!("failed to open config file in {}", cfg_path_str))?; @@ -161,7 +158,7 @@ impl PostgresNode { conf.parse_field_optional("recovery_target_lsn", &context)?; // ok now - Ok(PostgresNode { + Ok(Endpoint { address: SocketAddr::new("127.0.0.1".parse().unwrap(), port), name, env: env.clone(), @@ -269,7 +266,7 @@ impl PostgresNode { } // Write postgresql.conf with default configuration - // and PG_VERSION file to the data directory of a new node. + // and PG_VERSION file to the data directory of a new endpoint. fn setup_pg_conf(&self) -> Result<()> { let mut conf = PostgresConf::new(); conf.append("max_wal_senders", "10"); @@ -289,7 +286,7 @@ impl PostgresNode { // walproposer panics when basebackup is invalid, it is pointless to restart in this case. conf.append("restart_after_crash", "off"); - // Configure the node to fetch pages from pageserver + // Configure the Neon Postgres extension to fetch pages from pageserver let pageserver_connstr = { let config = &self.pageserver.pg_connection_config; let (host, port) = (config.host(), config.port()); @@ -325,7 +322,7 @@ impl PostgresNode { conf.append("max_replication_flush_lag", "10GB"); if !self.env.safekeepers.is_empty() { - // Configure the node to connect to the safekeepers + // Configure Postgres to connect to the safekeepers conf.append("synchronous_standby_names", "walproposer"); let safekeepers = self @@ -380,8 +377,12 @@ impl PostgresNode { Ok(()) } + pub fn endpoint_path(&self) -> PathBuf { + self.env.endpoints_path().join(&self.name) + } + pub fn pgdata(&self) -> PathBuf { - self.env.pg_data_dir(&self.tenant_id, &self.name) + self.endpoint_path().join("pgdata") } pub fn status(&self) -> &str { @@ -443,12 +444,11 @@ impl PostgresNode { } pub fn start(&self, auth_token: &Option) -> Result<()> { - // Bail if the node already running. if self.status() == "running" { - anyhow::bail!("The node is already running"); + anyhow::bail!("The endpoint is already running"); } - // 1. We always start compute node from scratch, so + // 1. We always start Postgres from scratch, so // if old dir exists, preserve 'postgresql.conf' and drop the directory let postgresql_conf_path = self.pgdata().join("postgresql.conf"); let postgresql_conf = fs::read(&postgresql_conf_path).with_context(|| { @@ -470,8 +470,8 @@ impl PostgresNode { File::create(self.pgdata().join("standby.signal"))?; } - // 4. Finally start the compute node postgres - println!("Starting postgres node at '{}'", self.connstr()); + // 4. Finally start postgres + println!("Starting postgres at '{}'", self.connstr()); self.pg_ctl(&["start"], auth_token) } @@ -480,7 +480,7 @@ impl PostgresNode { // use immediate shutdown mode, otherwise, // shutdown gracefully to leave the data directory sane. // - // Compute node always starts from scratch, so stop + // Postgres is always started from scratch, so stop // without destroy only used for testing and debugging. // if destroy { @@ -489,7 +489,7 @@ impl PostgresNode { "Destroying postgres data directory '{}'", self.pgdata().to_str().unwrap() ); - fs::remove_dir_all(self.pgdata())?; + fs::remove_dir_all(self.endpoint_path())?; } else { self.pg_ctl(&["stop"], &None)?; } diff --git a/control_plane/src/lib.rs b/control_plane/src/lib.rs index 6829479ad5..a773b8dcc3 100644 --- a/control_plane/src/lib.rs +++ b/control_plane/src/lib.rs @@ -9,7 +9,7 @@ mod background_process; pub mod broker; -pub mod compute; +pub mod endpoint; pub mod local_env; pub mod pageserver; pub mod postgresql_conf; diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index 8cc6329ce6..2b1eec7c4b 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -200,14 +200,8 @@ impl LocalEnv { self.neon_distrib_dir.join("storage_broker") } - pub fn pg_data_dirs_path(&self) -> PathBuf { - self.base_data_dir.join("pgdatadirs").join("tenants") - } - - pub fn pg_data_dir(&self, tenant_id: &TenantId, branch_name: &str) -> PathBuf { - self.pg_data_dirs_path() - .join(tenant_id.to_string()) - .join(branch_name) + pub fn endpoints_path(&self) -> PathBuf { + self.base_data_dir.join("endpoints") } // TODO: move pageserver files into ./pageserver @@ -427,7 +421,7 @@ impl LocalEnv { } } - fs::create_dir_all(self.pg_data_dirs_path())?; + fs::create_dir_all(self.endpoints_path())?; for safekeeper in &self.safekeepers { fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?; diff --git a/test_runner/fixtures/compare_fixtures.py b/test_runner/fixtures/compare_fixtures.py index b328cea5c6..f0d9ce4af2 100644 --- a/test_runner/fixtures/compare_fixtures.py +++ b/test_runner/fixtures/compare_fixtures.py @@ -114,7 +114,7 @@ class NeonCompare(PgCompare): self.timeline = self.env.neon_cli.create_timeline(branch_name, tenant_id=self.tenant) # Start pg - self._pg = self.env.postgres.create_start(branch_name, "main", self.tenant) + self._pg = self.env.endpoints.create_start(branch_name, "main", self.tenant) @property def pg(self) -> PgProtocol: diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 5b6f2e5c96..e9f0363843 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -830,7 +830,7 @@ class NeonEnvBuilder: # Stop all the nodes. if self.env: log.info("Cleaning up all storage and compute nodes") - self.env.postgres.stop_all() + self.env.endpoints.stop_all() for sk in self.env.safekeepers: sk.stop(immediate=True) self.env.pageserver.stop(immediate=True) @@ -894,7 +894,7 @@ class NeonEnv: self.port_distributor = config.port_distributor self.s3_mock_server = config.mock_s3_server self.neon_cli = NeonCli(env=self) - self.postgres = PostgresFactory(self) + self.endpoints = EndpointFactory(self) self.safekeepers: List[Safekeeper] = [] self.broker = config.broker self.remote_storage = config.remote_storage @@ -902,6 +902,7 @@ class NeonEnv: self.pg_version = config.pg_version self.neon_binpath = config.neon_binpath self.pg_distrib_dir = config.pg_distrib_dir + self.endpoint_counter = 0 # generate initial tenant ID here instead of letting 'neon init' generate it, # so that we don't need to dig it out of the config file afterwards. @@ -1015,6 +1016,13 @@ class NeonEnv: priv = (Path(self.repo_dir) / "auth_private_key.pem").read_text() return AuthKeys(pub=pub, priv=priv) + def generate_endpoint_id(self) -> str: + """ + Generate a unique endpoint ID + """ + self.endpoint_counter += 1 + return "ep-" + str(self.endpoint_counter) + @pytest.fixture(scope=shareable_scope) def _shared_simple_env( @@ -1073,7 +1081,7 @@ def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]: """ yield _shared_simple_env - _shared_simple_env.postgres.stop_all() + _shared_simple_env.endpoints.stop_all() @pytest.fixture(scope="function") @@ -1097,7 +1105,7 @@ def neon_env_builder( neon_env_builder.init_start(). After the initialization, you can launch compute nodes by calling - the functions in the 'env.postgres' factory object, stop/start the + the functions in the 'env.endpoints' factory object, stop/start the nodes, etc. """ @@ -1438,16 +1446,16 @@ class NeonCli(AbstractNeonCli): args.extend(["-m", "immediate"]) return self.raw_cli(args) - def pg_create( + def endpoint_create( self, branch_name: str, - node_name: Optional[str] = None, + endpoint_id: Optional[str] = None, tenant_id: Optional[TenantId] = None, lsn: Optional[Lsn] = None, port: Optional[int] = None, ) -> "subprocess.CompletedProcess[str]": args = [ - "pg", + "endpoint", "create", "--tenant-id", str(tenant_id or self.env.initial_tenant), @@ -1460,22 +1468,22 @@ class NeonCli(AbstractNeonCli): args.extend(["--lsn", str(lsn)]) if port is not None: args.extend(["--port", str(port)]) - if node_name is not None: - args.append(node_name) + if endpoint_id is not None: + args.append(endpoint_id) res = self.raw_cli(args) res.check_returncode() return res - def pg_start( + def endpoint_start( self, - node_name: str, + endpoint_id: str, tenant_id: Optional[TenantId] = None, lsn: Optional[Lsn] = None, port: Optional[int] = None, ) -> "subprocess.CompletedProcess[str]": args = [ - "pg", + "endpoint", "start", "--tenant-id", str(tenant_id or self.env.initial_tenant), @@ -1486,30 +1494,30 @@ class NeonCli(AbstractNeonCli): args.append(f"--lsn={lsn}") if port is not None: args.append(f"--port={port}") - if node_name is not None: - args.append(node_name) + if endpoint_id is not None: + args.append(endpoint_id) res = self.raw_cli(args) res.check_returncode() return res - def pg_stop( + def endpoint_stop( self, - node_name: str, + endpoint_id: str, tenant_id: Optional[TenantId] = None, destroy=False, check_return_code=True, ) -> "subprocess.CompletedProcess[str]": args = [ - "pg", + "endpoint", "stop", "--tenant-id", str(tenant_id or self.env.initial_tenant), ] if destroy: args.append("--destroy") - if node_name is not None: - args.append(node_name) + if endpoint_id is not None: + args.append(endpoint_id) return self.raw_cli(args, check_return_code=check_return_code) @@ -2167,8 +2175,8 @@ def static_proxy( yield proxy -class Postgres(PgProtocol): - """An object representing a running postgres daemon.""" +class Endpoint(PgProtocol): + """An object representing a Postgres compute endpoint managed by the control plane.""" def __init__( self, env: NeonEnv, tenant_id: TenantId, port: int, check_stop_result: bool = True @@ -2176,33 +2184,40 @@ class Postgres(PgProtocol): super().__init__(host="localhost", port=port, user="cloud_admin", dbname="postgres") self.env = env self.running = False - self.node_name: Optional[str] = None # dubious, see asserts below + self.endpoint_id: Optional[str] = None # dubious, see asserts below self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA self.tenant_id = tenant_id self.port = port self.check_stop_result = check_stop_result - # path to conf is /pgdatadirs/tenants///postgresql.conf + # path to conf is /endpoints//pgdata/postgresql.conf def create( self, branch_name: str, - node_name: Optional[str] = None, + endpoint_id: Optional[str] = None, lsn: Optional[Lsn] = None, config_lines: Optional[List[str]] = None, - ) -> "Postgres": + ) -> "Endpoint": """ - Create the pg data directory. + Create a new Postgres endpoint. Returns self. """ if not config_lines: config_lines = [] - self.node_name = node_name or f"{branch_name}_pg_node" - self.env.neon_cli.pg_create( - branch_name, node_name=self.node_name, tenant_id=self.tenant_id, lsn=lsn, port=self.port + if endpoint_id is None: + endpoint_id = self.env.generate_endpoint_id() + self.endpoint_id = endpoint_id + + self.env.neon_cli.endpoint_create( + branch_name, + endpoint_id=self.endpoint_id, + tenant_id=self.tenant_id, + lsn=lsn, + port=self.port, ) - path = Path("pgdatadirs") / "tenants" / str(self.tenant_id) / self.node_name + path = Path("endpoints") / self.endpoint_id / "pgdata" self.pgdata_dir = os.path.join(self.env.repo_dir, path) if config_lines is None: @@ -2215,26 +2230,30 @@ class Postgres(PgProtocol): return self - def start(self) -> "Postgres": + def start(self) -> "Endpoint": """ Start the Postgres instance. Returns self. """ - assert self.node_name is not None + assert self.endpoint_id is not None - log.info(f"Starting postgres node {self.node_name}") + log.info(f"Starting postgres endpoint {self.endpoint_id}") - self.env.neon_cli.pg_start(self.node_name, tenant_id=self.tenant_id, port=self.port) + self.env.neon_cli.endpoint_start(self.endpoint_id, tenant_id=self.tenant_id, port=self.port) self.running = True return self + def endpoint_path(self) -> Path: + """Path to endpoint directory""" + assert self.endpoint_id + path = Path("endpoints") / self.endpoint_id + return self.env.repo_dir / path + def pg_data_dir_path(self) -> str: - """Path to data directory""" - assert self.node_name - path = Path("pgdatadirs") / "tenants" / str(self.tenant_id) / self.node_name - return os.path.join(self.env.repo_dir, path) + """Path to Postgres data directory""" + return os.path.join(self.endpoint_path(), "pgdata") def pg_xact_dir_path(self) -> str: """Path to pg_xact dir""" @@ -2248,7 +2267,7 @@ class Postgres(PgProtocol): """Path to postgresql.conf""" return os.path.join(self.pg_data_dir_path(), "postgresql.conf") - def adjust_for_safekeepers(self, safekeepers: str) -> "Postgres": + def adjust_for_safekeepers(self, safekeepers: str) -> "Endpoint": """ Adjust instance config for working with wal acceptors instead of pageserver (pre-configured by CLI) directly. @@ -2272,7 +2291,7 @@ class Postgres(PgProtocol): f.write("neon.safekeepers = '{}'\n".format(safekeepers)) return self - def config(self, lines: List[str]) -> "Postgres": + def config(self, lines: List[str]) -> "Endpoint": """ Add lines to postgresql.conf. Lines should be an array of valid postgresql.conf rows. @@ -2286,32 +2305,32 @@ class Postgres(PgProtocol): return self - def stop(self) -> "Postgres": + def stop(self) -> "Endpoint": """ Stop the Postgres instance if it's running. Returns self. """ if self.running: - assert self.node_name is not None - self.env.neon_cli.pg_stop( - self.node_name, self.tenant_id, check_return_code=self.check_stop_result + assert self.endpoint_id is not None + self.env.neon_cli.endpoint_stop( + self.endpoint_id, self.tenant_id, check_return_code=self.check_stop_result ) self.running = False return self - def stop_and_destroy(self) -> "Postgres": + def stop_and_destroy(self) -> "Endpoint": """ - Stop the Postgres instance, then destroy it. + Stop the Postgres instance, then destroy the endpoint. Returns self. """ - assert self.node_name is not None - self.env.neon_cli.pg_stop( - self.node_name, self.tenant_id, True, check_return_code=self.check_stop_result + assert self.endpoint_id is not None + self.env.neon_cli.endpoint_stop( + self.endpoint_id, self.tenant_id, True, check_return_code=self.check_stop_result ) - self.node_name = None + self.endpoint_id = None self.running = False return self @@ -2319,13 +2338,12 @@ class Postgres(PgProtocol): def create_start( self, branch_name: str, - node_name: Optional[str] = None, + endpoint_id: Optional[str] = None, lsn: Optional[Lsn] = None, config_lines: Optional[List[str]] = None, - ) -> "Postgres": + ) -> "Endpoint": """ - Create a Postgres instance, apply config - and then start it. + Create an endpoint, apply config, and start Postgres. Returns self. """ @@ -2333,7 +2351,7 @@ class Postgres(PgProtocol): self.create( branch_name=branch_name, - node_name=node_name, + endpoint_id=endpoint_id, config_lines=config_lines, lsn=lsn, ).start() @@ -2342,7 +2360,7 @@ class Postgres(PgProtocol): return self - def __enter__(self) -> "Postgres": + def __enter__(self) -> "Endpoint": return self def __exit__( @@ -2354,33 +2372,33 @@ class Postgres(PgProtocol): self.stop() -class PostgresFactory: - """An object representing multiple running postgres daemons.""" +class EndpointFactory: + """An object representing multiple compute endpoints.""" def __init__(self, env: NeonEnv): self.env = env self.num_instances: int = 0 - self.instances: List[Postgres] = [] + self.endpoints: List[Endpoint] = [] def create_start( self, branch_name: str, - node_name: Optional[str] = None, + endpoint_id: Optional[str] = None, tenant_id: Optional[TenantId] = None, lsn: Optional[Lsn] = None, config_lines: Optional[List[str]] = None, - ) -> Postgres: - pg = Postgres( + ) -> Endpoint: + ep = Endpoint( self.env, tenant_id=tenant_id or self.env.initial_tenant, port=self.env.port_distributor.get_port(), ) self.num_instances += 1 - self.instances.append(pg) + self.endpoints.append(ep) - return pg.create_start( + return ep.create_start( branch_name=branch_name, - node_name=node_name, + endpoint_id=endpoint_id, config_lines=config_lines, lsn=lsn, ) @@ -2388,30 +2406,33 @@ class PostgresFactory: def create( self, branch_name: str, - node_name: Optional[str] = None, + endpoint_id: Optional[str] = None, tenant_id: Optional[TenantId] = None, lsn: Optional[Lsn] = None, config_lines: Optional[List[str]] = None, - ) -> Postgres: - pg = Postgres( + ) -> Endpoint: + ep = Endpoint( self.env, tenant_id=tenant_id or self.env.initial_tenant, port=self.env.port_distributor.get_port(), ) - self.num_instances += 1 - self.instances.append(pg) + if endpoint_id is None: + endpoint_id = self.env.generate_endpoint_id() - return pg.create( + self.num_instances += 1 + self.endpoints.append(ep) + + return ep.create( branch_name=branch_name, - node_name=node_name, + endpoint_id=endpoint_id, lsn=lsn, config_lines=config_lines, ) - def stop_all(self) -> "PostgresFactory": - for pg in self.instances: - pg.stop() + def stop_all(self) -> "EndpointFactory": + for ep in self.endpoints: + ep.stop() return self @@ -2786,16 +2807,16 @@ def list_files_to_compare(pgdata_dir: Path) -> List[str]: def check_restored_datadir_content( test_output_dir: Path, env: NeonEnv, - pg: Postgres, + endpoint: Endpoint, ): # Get the timeline ID. We need it for the 'basebackup' command - timeline = TimelineId(pg.safe_psql("SHOW neon.timeline_id")[0][0]) + timeline = TimelineId(endpoint.safe_psql("SHOW neon.timeline_id")[0][0]) # stop postgres to ensure that files won't change - pg.stop() + endpoint.stop() # Take a basebackup from pageserver - restored_dir_path = env.repo_dir / f"{pg.node_name}_restored_datadir" + restored_dir_path = env.repo_dir / f"{endpoint.endpoint_id}_restored_datadir" restored_dir_path.mkdir(exist_ok=True) pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version) @@ -2805,7 +2826,7 @@ def check_restored_datadir_content( {psql_path} \ --no-psqlrc \ postgres://localhost:{env.pageserver.service_port.pg} \ - -c 'basebackup {pg.tenant_id} {timeline}' \ + -c 'basebackup {endpoint.tenant_id} {timeline}' \ | tar -x -C {restored_dir_path} """ @@ -2822,8 +2843,8 @@ def check_restored_datadir_content( assert result.returncode == 0 # list files we're going to compare - assert pg.pgdata_dir - pgdata_files = list_files_to_compare(Path(pg.pgdata_dir)) + assert endpoint.pgdata_dir + pgdata_files = list_files_to_compare(Path(endpoint.pgdata_dir)) restored_files = list_files_to_compare(restored_dir_path) # check that file sets are equal @@ -2834,12 +2855,12 @@ def check_restored_datadir_content( # We've already filtered all mismatching files in list_files_to_compare(), # so here expect that the content is identical (match, mismatch, error) = filecmp.cmpfiles( - pg.pgdata_dir, restored_dir_path, pgdata_files, shallow=False + endpoint.pgdata_dir, restored_dir_path, pgdata_files, shallow=False ) log.info(f"filecmp result mismatch and error lists:\n\t mismatch={mismatch}\n\t error={error}") for f in mismatch: - f1 = os.path.join(pg.pgdata_dir, f) + f1 = os.path.join(endpoint.pgdata_dir, f) f2 = os.path.join(restored_dir_path, f) stdout_filename = "{}.filediff".format(f2) @@ -2854,24 +2875,24 @@ def check_restored_datadir_content( def wait_for_last_flush_lsn( - env: NeonEnv, pg: Postgres, tenant: TenantId, timeline: TimelineId + env: NeonEnv, endpoint: Endpoint, tenant: TenantId, timeline: TimelineId ) -> Lsn: """Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn.""" - last_flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) + last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn) def wait_for_wal_insert_lsn( - env: NeonEnv, pg: Postgres, tenant: TenantId, timeline: TimelineId + env: NeonEnv, endpoint: Endpoint, tenant: TenantId, timeline: TimelineId ) -> Lsn: """Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn.""" - last_flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_insert_lsn()")[0][0]) + last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_insert_lsn()")[0][0]) return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn) def fork_at_current_lsn( env: NeonEnv, - pg: Postgres, + endpoint: Endpoint, new_branch_name: str, ancestor_branch_name: str, tenant_id: Optional[TenantId] = None, @@ -2881,7 +2902,7 @@ def fork_at_current_lsn( The "last LSN" is taken from the given Postgres instance. The pageserver will wait for all the the WAL up to that LSN to arrive in the pageserver before creating the branch. """ - current_lsn = pg.safe_psql("SELECT pg_current_wal_lsn()")[0][0] + current_lsn = endpoint.safe_psql("SELECT pg_current_wal_lsn()")[0][0] return env.neon_cli.create_branch(new_branch_name, ancestor_branch_name, tenant_id, current_lsn) diff --git a/test_runner/performance/test_branch_creation.py b/test_runner/performance/test_branch_creation.py index 16c5438b8f..6edcb8f1f2 100644 --- a/test_runner/performance/test_branch_creation.py +++ b/test_runner/performance/test_branch_creation.py @@ -52,13 +52,13 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int) def run_pgbench(branch: str): log.info(f"Start a pgbench workload on branch {branch}") - pg = env.postgres.create_start(branch, tenant_id=tenant) - connstr = pg.connstr() + endpoint = env.endpoints.create_start(branch, tenant_id=tenant) + connstr = endpoint.connstr() pg_bin.run_capture(["pgbench", "-i", connstr]) pg_bin.run_capture(["pgbench", "-c10", "-T10", connstr]) - pg.stop() + endpoint.stop() env.neon_cli.create_branch("b0", tenant_id=tenant) @@ -96,8 +96,8 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int): env.neon_cli.create_branch("b0") - pg = env.postgres.create_start("b0") - neon_compare.pg_bin.run_capture(["pgbench", "-i", "-s10", pg.connstr()]) + endpoint = env.endpoints.create_start("b0") + neon_compare.pg_bin.run_capture(["pgbench", "-i", "-s10", endpoint.connstr()]) branch_creation_durations = [] @@ -124,15 +124,15 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare): timeline_id = env.neon_cli.create_branch("root") - pg = env.postgres.create_start("root") - with closing(pg.connect()) as conn: + endpoint = env.endpoints.create_start("root") + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: for i in range(10000): cur.execute(f"CREATE TABLE t{i} as SELECT g FROM generate_series(1, 1000) g") # Wait for the pageserver to finish processing all the pending WALs, # as we don't want the LSN wait time to be included during the branch creation - flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) + flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) wait_for_last_record_lsn( env.pageserver.http_client(), env.initial_tenant, timeline_id, flush_lsn ) @@ -142,7 +142,7 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare): # run a concurrent insertion to make the ancestor "busy" during the branch creation thread = threading.Thread( - target=pg.safe_psql, args=("INSERT INTO t0 VALUES (generate_series(1, 100000))",) + target=endpoint.safe_psql, args=("INSERT INTO t0 VALUES (generate_series(1, 100000))",) ) thread.start() diff --git a/test_runner/performance/test_branching.py b/test_runner/performance/test_branching.py index 4eaec40096..667d1a4c4a 100644 --- a/test_runner/performance/test_branching.py +++ b/test_runner/performance/test_branching.py @@ -42,41 +42,41 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare): neon_compare.zenbenchmark.record_pg_bench_result(branch, res) env.neon_cli.create_branch("root") - pg_root = env.postgres.create_start("root") - pg_bin.run_capture(["pgbench", "-i", pg_root.connstr(), "-s10"]) + endpoint_root = env.endpoints.create_start("root") + pg_bin.run_capture(["pgbench", "-i", endpoint_root.connstr(), "-s10"]) - fork_at_current_lsn(env, pg_root, "child", "root") + fork_at_current_lsn(env, endpoint_root, "child", "root") - pg_child = env.postgres.create_start("child") + endpoint_child = env.endpoints.create_start("child") - run_pgbench_on_branch("root", ["pgbench", "-c10", "-T10", pg_root.connstr()]) - run_pgbench_on_branch("child", ["pgbench", "-c10", "-T10", pg_child.connstr()]) + run_pgbench_on_branch("root", ["pgbench", "-c10", "-T10", endpoint_root.connstr()]) + run_pgbench_on_branch("child", ["pgbench", "-c10", "-T10", endpoint_child.connstr()]) def test_compare_child_and_root_write_perf(neon_compare: NeonCompare): env = neon_compare.env env.neon_cli.create_branch("root") - pg_root = env.postgres.create_start("root") + endpoint_root = env.endpoints.create_start("root") - pg_root.safe_psql( + endpoint_root.safe_psql( "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')", ) env.neon_cli.create_branch("child", "root") - pg_child = env.postgres.create_start("child") + endpoint_child = env.endpoints.create_start("child") with neon_compare.record_duration("root_run_duration"): - pg_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)") + endpoint_root.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)") with neon_compare.record_duration("child_run_duration"): - pg_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)") + endpoint_child.safe_psql("INSERT INTO foo SELECT FROM generate_series(1,1000000)") def test_compare_child_and_root_read_perf(neon_compare: NeonCompare): env = neon_compare.env env.neon_cli.create_branch("root") - pg_root = env.postgres.create_start("root") + endpoint_root = env.endpoints.create_start("root") - pg_root.safe_psql_many( + endpoint_root.safe_psql_many( [ "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')", "INSERT INTO foo SELECT FROM generate_series(1,1000000)", @@ -84,12 +84,12 @@ def test_compare_child_and_root_read_perf(neon_compare: NeonCompare): ) env.neon_cli.create_branch("child", "root") - pg_child = env.postgres.create_start("child") + endpoint_child = env.endpoints.create_start("child") with neon_compare.record_duration("root_run_duration"): - pg_root.safe_psql("SELECT count(*) from foo") + endpoint_root.safe_psql("SELECT count(*) from foo") with neon_compare.record_duration("child_run_duration"): - pg_child.safe_psql("SELECT count(*) from foo") + endpoint_child.safe_psql("SELECT count(*) from foo") # ----------------------------------------------------------------------- diff --git a/test_runner/performance/test_bulk_tenant_create.py b/test_runner/performance/test_bulk_tenant_create.py index cef7ce0c6b..9b05903cfa 100644 --- a/test_runner/performance/test_bulk_tenant_create.py +++ b/test_runner/performance/test_bulk_tenant_create.py @@ -35,14 +35,14 @@ def test_bulk_tenant_create( # if use_safekeepers == 'with_sa': # wa_factory.start_n_new(3) - pg_tenant = env.postgres.create_start( + endpoint_tenant = env.endpoints.create_start( f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant ) end = timeit.default_timer() time_slices.append(end - start) - pg_tenant.stop() + endpoint_tenant.stop() zenbenchmark.record( "tenant_creation_time", diff --git a/test_runner/performance/test_bulk_update.py b/test_runner/performance/test_bulk_update.py index 7aa6f09a40..2ace31a2d7 100644 --- a/test_runner/performance/test_bulk_update.py +++ b/test_runner/performance/test_bulk_update.py @@ -18,8 +18,8 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor) timeline_id = env.neon_cli.create_branch("test_bulk_update") tenant_id = env.initial_tenant - pg = env.postgres.create_start("test_bulk_update") - cur = pg.connect().cursor() + endpoint = env.endpoints.create_start("test_bulk_update") + cur = endpoint.connect().cursor() cur.execute("set statement_timeout=0") cur.execute(f"create table t(x integer) WITH (fillfactor={fillfactor})") @@ -28,13 +28,13 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor) cur.execute(f"insert into t values (generate_series(1,{n_records}))") cur.execute("vacuum t") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) with zenbenchmark.record_duration("update-no-prefetch"): cur.execute("update t set x=x+1") cur.execute("vacuum t") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) with zenbenchmark.record_duration("delete-no-prefetch"): cur.execute("delete from t") @@ -50,13 +50,13 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor) cur.execute(f"insert into t2 values (generate_series(1,{n_records}))") cur.execute("vacuum t2") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) with zenbenchmark.record_duration("update-with-prefetch"): cur.execute("update t2 set x=x+1") cur.execute("vacuum t2") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) with zenbenchmark.record_duration("delete-with-prefetch"): cur.execute("delete from t2") diff --git a/test_runner/performance/test_compaction.py b/test_runner/performance/test_compaction.py index 89818ee8bd..326c4f5c6f 100644 --- a/test_runner/performance/test_compaction.py +++ b/test_runner/performance/test_compaction.py @@ -33,11 +33,11 @@ def test_compaction(neon_compare: NeonCompare): # Create some tables, and run a bunch of INSERTs and UPDATes on them, # to generate WAL and layers - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "main", tenant_id=tenant_id, config_lines=["shared_buffers=512MB"] ) - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: for i in range(100): cur.execute(f"create table tbl{i} (i int, j int);") @@ -45,7 +45,7 @@ def test_compaction(neon_compare: NeonCompare): for j in range(100): cur.execute(f"update tbl{i} set j = {j};") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) # First compaction generates L1 layers with neon_compare.zenbenchmark.record_duration("compaction"): diff --git a/test_runner/performance/test_latency.py b/test_runner/performance/test_latency.py index 257e0421af..6c94ecc482 100644 --- a/test_runner/performance/test_latency.py +++ b/test_runner/performance/test_latency.py @@ -2,13 +2,13 @@ import threading import pytest from fixtures.compare_fixtures import PgCompare -from fixtures.neon_fixtures import Postgres +from fixtures.neon_fixtures import PgProtocol from performance.test_perf_pgbench import get_scales_matrix from performance.test_wal_backpressure import record_read_latency -def start_write_workload(pg: Postgres, scale: int = 10): +def start_write_workload(pg: PgProtocol, scale: int = 10): with pg.connect().cursor() as cur: cur.execute(f"create table big as select generate_series(1,{scale*100_000})") diff --git a/test_runner/performance/test_layer_map.py b/test_runner/performance/test_layer_map.py index fb29c05273..18308e1077 100644 --- a/test_runner/performance/test_layer_map.py +++ b/test_runner/performance/test_layer_map.py @@ -25,8 +25,8 @@ def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark): ) env.neon_cli.create_timeline("test_layer_map", tenant_id=tenant) - pg = env.postgres.create_start("test_layer_map", tenant_id=tenant) - cur = pg.connect().cursor() + endpoint = env.endpoints.create_start("test_layer_map", tenant_id=tenant) + cur = endpoint.connect().cursor() cur.execute("create table t(x integer)") for i in range(n_iters): cur.execute(f"insert into t values (generate_series(1,{n_records}))") diff --git a/test_runner/performance/test_startup.py b/test_runner/performance/test_startup.py index e91b180154..fa2e058491 100644 --- a/test_runner/performance/test_startup.py +++ b/test_runner/performance/test_startup.py @@ -14,19 +14,19 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker # Start env.neon_cli.create_branch("test_startup") with zenbenchmark.record_duration("startup_time"): - pg = env.postgres.create_start("test_startup") - pg.safe_psql("select 1;") + endpoint = env.endpoints.create_start("test_startup") + endpoint.safe_psql("select 1;") # Restart - pg.stop_and_destroy() + endpoint.stop_and_destroy() with zenbenchmark.record_duration("restart_time"): - pg.create_start("test_startup") - pg.safe_psql("select 1;") + endpoint.create_start("test_startup") + endpoint.safe_psql("select 1;") # Fill up num_rows = 1000000 # 30 MB num_tables = 100 - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: for i in range(num_tables): cur.execute(f"create table t_{i} (i integer);") @@ -34,18 +34,18 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker # Read with zenbenchmark.record_duration("read_time"): - pg.safe_psql("select * from t_0;") + endpoint.safe_psql("select * from t_0;") # Read again with zenbenchmark.record_duration("second_read_time"): - pg.safe_psql("select * from t_0;") + endpoint.safe_psql("select * from t_0;") # Restart - pg.stop_and_destroy() + endpoint.stop_and_destroy() with zenbenchmark.record_duration("restart_with_data"): - pg.create_start("test_startup") - pg.safe_psql("select 1;") + endpoint.create_start("test_startup") + endpoint.safe_psql("select 1;") # Read with zenbenchmark.record_duration("read_after_restart"): - pg.safe_psql("select * from t_0;") + endpoint.safe_psql("select * from t_0;") diff --git a/test_runner/regress/test_ancestor_branch.py b/test_runner/regress/test_ancestor_branch.py index 2406102756..e8c1a2f34c 100644 --- a/test_runner/regress/test_ancestor_branch.py +++ b/test_runner/regress/test_ancestor_branch.py @@ -22,8 +22,8 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder): pageserver_http.configure_failpoints(("flush-frozen-before-sync", "sleep(10000)")) - pg_branch0 = env.postgres.create_start("main", tenant_id=tenant) - branch0_cur = pg_branch0.connect().cursor() + endpoint_branch0 = env.endpoints.create_start("main", tenant_id=tenant) + branch0_cur = endpoint_branch0.connect().cursor() branch0_timeline = TimelineId(query_scalar(branch0_cur, "SHOW neon.timeline_id")) log.info(f"b0 timeline {branch0_timeline}") @@ -44,10 +44,10 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder): # Create branch1. env.neon_cli.create_branch("branch1", "main", tenant_id=tenant, ancestor_start_lsn=lsn_100) - pg_branch1 = env.postgres.create_start("branch1", tenant_id=tenant) + endpoint_branch1 = env.endpoints.create_start("branch1", tenant_id=tenant) log.info("postgres is running on 'branch1' branch") - branch1_cur = pg_branch1.connect().cursor() + branch1_cur = endpoint_branch1.connect().cursor() branch1_timeline = TimelineId(query_scalar(branch1_cur, "SHOW neon.timeline_id")) log.info(f"b1 timeline {branch1_timeline}") @@ -67,9 +67,9 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder): # Create branch2. env.neon_cli.create_branch("branch2", "branch1", tenant_id=tenant, ancestor_start_lsn=lsn_200) - pg_branch2 = env.postgres.create_start("branch2", tenant_id=tenant) + endpoint_branch2 = env.endpoints.create_start("branch2", tenant_id=tenant) log.info("postgres is running on 'branch2' branch") - branch2_cur = pg_branch2.connect().cursor() + branch2_cur = endpoint_branch2.connect().cursor() branch2_timeline = TimelineId(query_scalar(branch2_cur, "SHOW neon.timeline_id")) log.info(f"b2 timeline {branch2_timeline}") diff --git a/test_runner/regress/test_auth.py b/test_runner/regress/test_auth.py index f7c4736e04..3305869dce 100644 --- a/test_runner/regress/test_auth.py +++ b/test_runner/regress/test_auth.py @@ -64,9 +64,9 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder): branch = "test_compute_auth_to_pageserver" env.neon_cli.create_branch(branch) - pg = env.postgres.create_start(branch) + endpoint = env.endpoints.create_start(branch) - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there @@ -83,7 +83,7 @@ def test_auth_failures(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): branch = f"test_auth_failures_auth_enabled_{auth_enabled}" timeline_id = env.neon_cli.create_branch(branch) - env.postgres.create_start(branch) + env.endpoints.create_start(branch) tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant) invalid_tenant_token = env.auth_keys.generate_tenant_token(TenantId.generate()) diff --git a/test_runner/regress/test_backpressure.py b/test_runner/regress/test_backpressure.py index a81fa380a9..352e149171 100644 --- a/test_runner/regress/test_backpressure.py +++ b/test_runner/regress/test_backpressure.py @@ -5,7 +5,7 @@ from contextlib import closing, contextmanager import psycopg2.extras import pytest from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnvBuilder, Postgres +from fixtures.neon_fixtures import Endpoint, NeonEnvBuilder pytest_plugins = "fixtures.neon_fixtures" @@ -20,10 +20,10 @@ def pg_cur(pg): # Periodically check that all backpressure lags are below the configured threshold, # assert if they are not. # If the check query fails, stop the thread. Main thread should notice that and stop the test. -def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interval=5): +def check_backpressure(endpoint: Endpoint, stop_event: threading.Event, polling_interval=5): log.info("checks started") - with pg_cur(pg) as cur: + with pg_cur(endpoint) as cur: cur.execute("CREATE EXTENSION neon") # TODO move it to neon_fixtures? cur.execute("select pg_size_bytes(current_setting('max_replication_write_lag'))") @@ -41,7 +41,7 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv max_replication_apply_lag_bytes = res[0] log.info(f"max_replication_apply_lag: {max_replication_apply_lag_bytes} bytes") - with pg_cur(pg) as cur: + with pg_cur(endpoint) as cur: while not stop_event.is_set(): try: cur.execute( @@ -102,14 +102,14 @@ def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder): # Create a branch for us env.neon_cli.create_branch("test_backpressure") - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "test_backpressure", config_lines=["max_replication_write_lag=30MB"] ) log.info("postgres is running on 'test_backpressure' branch") # setup check thread check_stop_event = threading.Event() - check_thread = threading.Thread(target=check_backpressure, args=(pg, check_stop_event)) + check_thread = threading.Thread(target=check_backpressure, args=(endpoint, check_stop_event)) check_thread.start() # Configure failpoint to slow down walreceiver ingest @@ -125,7 +125,7 @@ def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder): # because of the lag and waiting for lsn to replay to arrive. time.sleep(2) - with pg_cur(pg) as cur: + with pg_cur(endpoint) as cur: # Create and initialize test table cur.execute("CREATE TABLE foo(x bigint)") diff --git a/test_runner/regress/test_basebackup_error.py b/test_runner/regress/test_basebackup_error.py index 94d3999d17..170b494884 100644 --- a/test_runner/regress/test_basebackup_error.py +++ b/test_runner/regress/test_basebackup_error.py @@ -15,4 +15,4 @@ def test_basebackup_error(neon_simple_env: NeonEnv): pageserver_http.configure_failpoints(("basebackup-before-control-file", "return")) with pytest.raises(Exception, match="basebackup-before-control-file"): - env.postgres.create_start("test_basebackup_error") + env.endpoints.create_start("test_basebackup_error") diff --git a/test_runner/regress/test_branch_and_gc.py b/test_runner/regress/test_branch_and_gc.py index cc807b7ff3..4a03421fcf 100644 --- a/test_runner/regress/test_branch_and_gc.py +++ b/test_runner/regress/test_branch_and_gc.py @@ -67,9 +67,9 @@ def test_branch_and_gc(neon_simple_env: NeonEnv): ) timeline_main = env.neon_cli.create_timeline("test_main", tenant_id=tenant) - pg_main = env.postgres.create_start("test_main", tenant_id=tenant) + endpoint_main = env.endpoints.create_start("test_main", tenant_id=tenant) - main_cur = pg_main.connect().cursor() + main_cur = endpoint_main.connect().cursor() main_cur.execute( "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')" @@ -90,9 +90,9 @@ def test_branch_and_gc(neon_simple_env: NeonEnv): env.neon_cli.create_branch( "test_branch", "test_main", tenant_id=tenant, ancestor_start_lsn=lsn1 ) - pg_branch = env.postgres.create_start("test_branch", tenant_id=tenant) + endpoint_branch = env.endpoints.create_start("test_branch", tenant_id=tenant) - branch_cur = pg_branch.connect().cursor() + branch_cur = endpoint_branch.connect().cursor() branch_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)") assert query_scalar(branch_cur, "SELECT count(*) FROM foo") == 200000 @@ -142,8 +142,8 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv): ) b0 = env.neon_cli.create_branch("b0", tenant_id=tenant) - pg0 = env.postgres.create_start("b0", tenant_id=tenant) - res = pg0.safe_psql_many( + endpoint0 = env.endpoints.create_start("b0", tenant_id=tenant) + res = endpoint0.safe_psql_many( queries=[ "CREATE TABLE t(key serial primary key)", "INSERT INTO t SELECT FROM generate_series(1, 100000)", diff --git a/test_runner/regress/test_branch_behind.py b/test_runner/regress/test_branch_behind.py index d19f6a7d39..3f7d49ab03 100644 --- a/test_runner/regress/test_branch_behind.py +++ b/test_runner/regress/test_branch_behind.py @@ -18,10 +18,10 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): # Branch at the point where only 100 rows were inserted env.neon_cli.create_branch("test_branch_behind") - pgmain = env.postgres.create_start("test_branch_behind") + endpoint_main = env.endpoints.create_start("test_branch_behind") log.info("postgres is running on 'test_branch_behind' branch") - main_cur = pgmain.connect().cursor() + main_cur = endpoint_main.connect().cursor() timeline = TimelineId(query_scalar(main_cur, "SHOW neon.timeline_id")) @@ -74,15 +74,15 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): "test_branch_behind_more", "test_branch_behind", ancestor_start_lsn=lsn_b ) - pg_hundred = env.postgres.create_start("test_branch_behind_hundred") - pg_more = env.postgres.create_start("test_branch_behind_more") + endpoint_hundred = env.endpoints.create_start("test_branch_behind_hundred") + endpoint_more = env.endpoints.create_start("test_branch_behind_more") # On the 'hundred' branch, we should see only 100 rows - hundred_cur = pg_hundred.connect().cursor() + hundred_cur = endpoint_hundred.connect().cursor() assert query_scalar(hundred_cur, "SELECT count(*) FROM foo") == 100 # On the 'more' branch, we should see 100200 rows - more_cur = pg_more.connect().cursor() + more_cur = endpoint_more.connect().cursor() assert query_scalar(more_cur, "SELECT count(*) FROM foo") == 200100 # All the rows are visible on the main branch @@ -94,8 +94,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): env.neon_cli.create_branch( "test_branch_segment_boundary", "test_branch_behind", ancestor_start_lsn=Lsn("0/3000000") ) - pg = env.postgres.create_start("test_branch_segment_boundary") - assert pg.safe_psql("SELECT 1")[0][0] == 1 + endpoint = env.endpoints.create_start("test_branch_segment_boundary") + assert endpoint.safe_psql("SELECT 1")[0][0] == 1 # branch at pre-initdb lsn with pytest.raises(Exception, match="invalid branch start lsn: .*"): diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py index 3b78700e9f..31f9df6ebe 100644 --- a/test_runner/regress/test_branching.py +++ b/test_runner/regress/test_branching.py @@ -5,7 +5,7 @@ from typing import List import pytest from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres +from fixtures.neon_fixtures import Endpoint, NeonEnv, PgBin from fixtures.types import Lsn from fixtures.utils import query_scalar from performance.test_perf_pgbench import get_scales_matrix @@ -40,20 +40,20 @@ def test_branching_with_pgbench( } ) - def run_pgbench(pg: Postgres): - connstr = pg.connstr() - + def run_pgbench(connstr: str): log.info(f"Start a pgbench workload on pg {connstr}") pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr]) pg_bin.run_capture(["pgbench", "-T15", connstr]) env.neon_cli.create_branch("b0", tenant_id=tenant) - pgs: List[Postgres] = [] - pgs.append(env.postgres.create_start("b0", tenant_id=tenant)) + endpoints: List[Endpoint] = [] + endpoints.append(env.endpoints.create_start("b0", tenant_id=tenant)) threads: List[threading.Thread] = [] - threads.append(threading.Thread(target=run_pgbench, args=(pgs[0],), daemon=True)) + threads.append( + threading.Thread(target=run_pgbench, args=(endpoints[0].connstr(),), daemon=True) + ) threads[-1].start() thread_limit = 4 @@ -79,16 +79,18 @@ def test_branching_with_pgbench( else: env.neon_cli.create_branch("b{}".format(i + 1), "b0", tenant_id=tenant) - pgs.append(env.postgres.create_start("b{}".format(i + 1), tenant_id=tenant)) + endpoints.append(env.endpoints.create_start("b{}".format(i + 1), tenant_id=tenant)) - threads.append(threading.Thread(target=run_pgbench, args=(pgs[-1],), daemon=True)) + threads.append( + threading.Thread(target=run_pgbench, args=(endpoints[-1].connstr(),), daemon=True) + ) threads[-1].start() for thread in threads: thread.join() - for pg in pgs: - res = pg.safe_psql("SELECT count(*) from pgbench_accounts") + for ep in endpoints: + res = ep.safe_psql("SELECT count(*) from pgbench_accounts") assert res[0] == (100000 * scale,) @@ -110,11 +112,11 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi env = neon_simple_env env.neon_cli.create_branch("b0") - pg0 = env.postgres.create_start("b0") + endpoint0 = env.endpoints.create_start("b0") - pg_bin.run_capture(["pgbench", "-i", pg0.connstr()]) + pg_bin.run_capture(["pgbench", "-i", endpoint0.connstr()]) - with pg0.cursor() as cur: + with endpoint0.cursor() as cur: curr_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) # Specify the `start_lsn` as a number that is divided by `XLOG_BLCKSZ` @@ -123,6 +125,6 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi log.info(f"Branching b1 from b0 starting at lsn {start_lsn}...") env.neon_cli.create_branch("b1", "b0", ancestor_start_lsn=start_lsn) - pg1 = env.postgres.create_start("b1") + endpoint1 = env.endpoints.create_start("b1") - pg_bin.run_capture(["pgbench", "-i", pg1.connstr()]) + pg_bin.run_capture(["pgbench", "-i", endpoint1.connstr()]) diff --git a/test_runner/regress/test_broken_timeline.py b/test_runner/regress/test_broken_timeline.py index d12a0223a1..fb592bfbc3 100644 --- a/test_runner/regress/test_broken_timeline.py +++ b/test_runner/regress/test_broken_timeline.py @@ -4,7 +4,7 @@ from typing import List, Tuple import pytest from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres +from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder from fixtures.types import TenantId, TimelineId @@ -24,17 +24,17 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder): ] ) - tenant_timelines: List[Tuple[TenantId, TimelineId, Postgres]] = [] + tenant_timelines: List[Tuple[TenantId, TimelineId, Endpoint]] = [] for n in range(4): tenant_id, timeline_id = env.neon_cli.create_tenant() - pg = env.postgres.create_start("main", tenant_id=tenant_id) - with pg.cursor() as cur: + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100), 'payload'") - pg.stop() - tenant_timelines.append((tenant_id, timeline_id, pg)) + endpoint.stop() + tenant_timelines.append((tenant_id, timeline_id, endpoint)) # Stop the pageserver env.pageserver.stop() diff --git a/test_runner/regress/test_clog_truncate.py b/test_runner/regress/test_clog_truncate.py index f47e4a99bf..f22eca02cc 100644 --- a/test_runner/regress/test_clog_truncate.py +++ b/test_runner/regress/test_clog_truncate.py @@ -24,14 +24,14 @@ def test_clog_truncate(neon_simple_env: NeonEnv): "autovacuum_freeze_max_age=100000", ] - pg = env.postgres.create_start("test_clog_truncate", config_lines=config) + endpoint = env.endpoints.create_start("test_clog_truncate", config_lines=config) log.info("postgres is running on test_clog_truncate branch") # Install extension containing function needed for test - pg.safe_psql("CREATE EXTENSION neon_test_utils") + endpoint.safe_psql("CREATE EXTENSION neon_test_utils") # Consume many xids to advance clog - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("select test_consume_xids(1000*1000*10);") log.info("xids consumed") @@ -44,7 +44,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv): # wait for autovacuum to truncate the pg_xact # XXX Is it worth to add a timeout here? - pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), "0000") + pg_xact_0000_path = os.path.join(endpoint.pg_xact_dir_path(), "0000") log.info(f"pg_xact_0000_path = {pg_xact_0000_path}") while os.path.isfile(pg_xact_0000_path): @@ -52,7 +52,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv): time.sleep(5) # checkpoint to advance latest lsn - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CHECKPOINT;") lsn_after_truncation = query_scalar(cur, "select pg_current_wal_insert_lsn()") @@ -61,10 +61,10 @@ def test_clog_truncate(neon_simple_env: NeonEnv): env.neon_cli.create_branch( "test_clog_truncate_new", "test_clog_truncate", ancestor_start_lsn=lsn_after_truncation ) - pg2 = env.postgres.create_start("test_clog_truncate_new") + endpoint2 = env.endpoints.create_start("test_clog_truncate_new") log.info("postgres is running on test_clog_truncate_new branch") # check that new node doesn't contain truncated segment - pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), "0000") + pg_xact_0000_path_new = os.path.join(endpoint2.pg_xact_dir_path(), "0000") log.info(f"pg_xact_0000_path_new = {pg_xact_0000_path_new}") assert os.path.isfile(pg_xact_0000_path_new) is False diff --git a/test_runner/regress/test_close_fds.py b/test_runner/regress/test_close_fds.py index 22f245f79b..7059f3360e 100644 --- a/test_runner/regress/test_close_fds.py +++ b/test_runner/regress/test_close_fds.py @@ -24,8 +24,8 @@ def test_lsof_pageserver_pid(neon_simple_env: NeonEnv): def start_workload(): env.neon_cli.create_branch("test_lsof_pageserver_pid") - pg = env.postgres.create_start("test_lsof_pageserver_pid") - with closing(pg.connect()) as conn: + endpoint = env.endpoints.create_start("test_lsof_pageserver_pid") + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE TABLE foo as SELECT x FROM generate_series(1,100000) x") cur.execute("update foo set x=x+1") diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index 0cc111bd8c..e262202a73 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -1,3 +1,4 @@ +import copy import os import shutil import subprocess @@ -55,29 +56,31 @@ def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_o neon_env_builder.preserve_database_files = True env = neon_env_builder.init_start() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") # FIXME: Is this expected? env.pageserver.allowed_errors.append( ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" ) - pg_bin.run(["pgbench", "--initialize", "--scale=10", pg.connstr()]) - pg_bin.run(["pgbench", "--time=60", "--progress=2", pg.connstr()]) - pg_bin.run(["pg_dumpall", f"--dbname={pg.connstr()}", f"--file={test_output_dir / 'dump.sql'}"]) + pg_bin.run(["pgbench", "--initialize", "--scale=10", endpoint.connstr()]) + pg_bin.run(["pgbench", "--time=60", "--progress=2", endpoint.connstr()]) + pg_bin.run( + ["pg_dumpall", f"--dbname={endpoint.connstr()}", f"--file={test_output_dir / 'dump.sql'}"] + ) snapshot_config = toml.load(test_output_dir / "repo" / "config") tenant_id = snapshot_config["default_tenant_id"] timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id] pageserver_http = env.pageserver.http_client() - lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) + lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, lsn) pageserver_http.timeline_checkpoint(tenant_id, timeline_id) wait_for_upload(pageserver_http, tenant_id, timeline_id, lsn) - env.postgres.stop_all() + env.endpoints.stop_all() for sk in env.safekeepers: sk.stop() env.pageserver.stop() @@ -98,6 +101,9 @@ def test_backward_compatibility( pg_version: str, request: FixtureRequest, ): + """ + Test that the new binaries can read old data + """ compatibility_snapshot_dir_env = os.environ.get("COMPATIBILITY_SNAPSHOT_DIR") assert ( compatibility_snapshot_dir_env is not None @@ -120,6 +126,7 @@ def test_backward_compatibility( check_neon_works( test_output_dir / "compatibility_snapshot" / "repo", neon_binpath, + neon_binpath, pg_distrib_dir, pg_version, port_distributor, @@ -148,7 +155,11 @@ def test_forward_compatibility( port_distributor: PortDistributor, pg_version: str, request: FixtureRequest, + neon_binpath: Path, ): + """ + Test that the old binaries can read new data + """ compatibility_neon_bin_env = os.environ.get("COMPATIBILITY_NEON_BIN") assert compatibility_neon_bin_env is not None, ( "COMPATIBILITY_NEON_BIN is not set. It should be set to a path with Neon binaries " @@ -183,6 +194,7 @@ def test_forward_compatibility( check_neon_works( test_output_dir / "compatibility_snapshot" / "repo", compatibility_neon_bin, + neon_binpath, compatibility_postgres_distrib_dir, pg_version, port_distributor, @@ -223,9 +235,13 @@ def prepare_snapshot( for logfile in repo_dir.glob("**/*.log"): logfile.unlink() - # Remove tenants data for compute - for tenant in (repo_dir / "pgdatadirs" / "tenants").glob("*"): - shutil.rmtree(tenant) + # Remove old computes in 'endpoints'. Old versions of the control plane used a directory + # called "pgdatadirs". Delete it, too. + if (repo_dir / "endpoints").exists(): + shutil.rmtree(repo_dir / "endpoints") + if (repo_dir / "pgdatadirs").exists(): + shutil.rmtree(repo_dir / "pgdatadirs") + os.mkdir(repo_dir / "endpoints") # Remove wal-redo temp directory if it exists. Newer pageserver versions don't create # them anymore, but old versions did. @@ -326,7 +342,8 @@ def get_neon_version(neon_binpath: Path): def check_neon_works( repo_dir: Path, - neon_binpath: Path, + neon_target_binpath: Path, + neon_current_binpath: Path, pg_distrib_dir: Path, pg_version: str, port_distributor: PortDistributor, @@ -336,7 +353,7 @@ def check_neon_works( ): snapshot_config_toml = repo_dir / "config" snapshot_config = toml.load(snapshot_config_toml) - snapshot_config["neon_distrib_dir"] = str(neon_binpath) + snapshot_config["neon_distrib_dir"] = str(neon_target_binpath) snapshot_config["postgres_distrib_dir"] = str(pg_distrib_dir) with (snapshot_config_toml).open("w") as f: toml.dump(snapshot_config, f) @@ -347,17 +364,25 @@ def check_neon_works( config.repo_dir = repo_dir config.pg_version = pg_version config.initial_tenant = snapshot_config["default_tenant_id"] - config.neon_binpath = neon_binpath config.pg_distrib_dir = pg_distrib_dir config.preserve_database_files = True - cli = NeonCli(config) - cli.raw_cli(["start"]) - request.addfinalizer(lambda: cli.raw_cli(["stop"])) + # Use the "target" binaries to launch the storage nodes + config_target = config + config_target.neon_binpath = neon_target_binpath + cli_target = NeonCli(config_target) + + # And the current binaries to launch computes + config_current = copy.copy(config) + config_current.neon_binpath = neon_current_binpath + cli_current = NeonCli(config_current) + + cli_target.raw_cli(["start"]) + request.addfinalizer(lambda: cli_target.raw_cli(["stop"])) pg_port = port_distributor.get_port() - cli.pg_start("main", port=pg_port) - request.addfinalizer(lambda: cli.pg_stop("main")) + cli_current.endpoint_start("main", port=pg_port) + request.addfinalizer(lambda: cli_current.endpoint_stop("main")) connstr = f"host=127.0.0.1 port={pg_port} user=cloud_admin dbname=postgres" pg_bin.run(["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump.sql'}"]) diff --git a/test_runner/regress/test_compute_ctl.py b/test_runner/regress/test_compute_ctl.py index 05ac3841dc..aa99a01c83 100644 --- a/test_runner/regress/test_compute_ctl.py +++ b/test_runner/regress/test_compute_ctl.py @@ -13,10 +13,10 @@ def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): ctl = ComputeCtl(env) env.neon_cli.create_branch("test_compute_ctl", "main") - pg = env.postgres.create_start("test_compute_ctl") - pg.safe_psql("CREATE TABLE t(key int primary key, value text)") + endpoint = env.endpoints.create_start("test_compute_ctl") + endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)") - with open(pg.config_file_path(), "r") as f: + with open(endpoint.config_file_path(), "r") as f: cfg_lines = f.readlines() cfg_map = {} for line in cfg_lines: @@ -24,10 +24,13 @@ def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): k, v = line.split("=") cfg_map[k] = v.strip("\n '\"") log.info(f"postgres config: {cfg_map}") - pgdata = pg.pg_data_dir_path() + pgdata = endpoint.pg_data_dir_path() pg_bin_path = os.path.join(pg_bin.pg_bin_path, "postgres") - pg.stop_and_destroy() + endpoint.stop_and_destroy() + + # stop_and_destroy removes the whole endpoint directory. Recreate it. + Path(pgdata).mkdir(parents=True) spec = ( """ diff --git a/test_runner/regress/test_config.py b/test_runner/regress/test_config.py index 3477d96b89..0ea5784b67 100755 --- a/test_runner/regress/test_config.py +++ b/test_runner/regress/test_config.py @@ -12,10 +12,10 @@ def test_config(neon_simple_env: NeonEnv): env.neon_cli.create_branch("test_config", "empty") # change config - pg = env.postgres.create_start("test_config", config_lines=["log_min_messages=debug1"]) + endpoint = env.endpoints.create_start("test_config", config_lines=["log_min_messages=debug1"]) log.info("postgres is running on test_config branch") - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute( """ diff --git a/test_runner/regress/test_crafted_wal_end.py b/test_runner/regress/test_crafted_wal_end.py index 9899d424d1..7ec901af34 100644 --- a/test_runner/regress/test_crafted_wal_end.py +++ b/test_runner/regress/test_crafted_wal_end.py @@ -21,11 +21,11 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_crafted_wal_end") - pg = env.postgres.create("test_crafted_wal_end") + endpoint = env.endpoints.create("test_crafted_wal_end") wal_craft = WalCraft(env) - pg.config(wal_craft.postgres_config()) - pg.start() - res = pg.safe_psql_many( + endpoint.config(wal_craft.postgres_config()) + endpoint.start() + res = endpoint.safe_psql_many( queries=[ "CREATE TABLE keys(key int primary key)", "INSERT INTO keys SELECT generate_series(1, 100)", @@ -34,7 +34,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): ) assert res[-1][0] == (5050,) - wal_craft.in_existing(wal_type, pg.connstr()) + wal_craft.in_existing(wal_type, endpoint.connstr()) log.info("Restarting all safekeepers and pageservers") env.pageserver.stop() @@ -43,7 +43,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): env.pageserver.start() log.info("Trying more queries") - res = pg.safe_psql_many( + res = endpoint.safe_psql_many( queries=[ "SELECT SUM(key) FROM keys", "INSERT INTO keys SELECT generate_series(101, 200)", @@ -60,7 +60,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): env.pageserver.start() log.info("Trying more queries (again)") - res = pg.safe_psql_many( + res = endpoint.safe_psql_many( queries=[ "SELECT SUM(key) FROM keys", "INSERT INTO keys SELECT generate_series(201, 300)", diff --git a/test_runner/regress/test_createdropdb.py b/test_runner/regress/test_createdropdb.py index 036e50e6e8..68035b1b14 100644 --- a/test_runner/regress/test_createdropdb.py +++ b/test_runner/regress/test_createdropdb.py @@ -13,10 +13,10 @@ def test_createdb(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_createdb", "empty") - pg = env.postgres.create_start("test_createdb") + endpoint = env.endpoints.create_start("test_createdb") log.info("postgres is running on 'test_createdb' branch") - with pg.cursor() as cur: + with endpoint.cursor() as cur: # Cause a 'relmapper' change in the original branch cur.execute("VACUUM FULL pg_class") @@ -26,10 +26,10 @@ def test_createdb(neon_simple_env: NeonEnv): # Create a branch env.neon_cli.create_branch("test_createdb2", "test_createdb", ancestor_start_lsn=lsn) - pg2 = env.postgres.create_start("test_createdb2") + endpoint2 = env.endpoints.create_start("test_createdb2") # Test that you can connect to the new database on both branches - for db in (pg, pg2): + for db in (endpoint, endpoint2): with db.cursor(dbname="foodb") as cur: # Check database size in both branches cur.execute( @@ -55,17 +55,17 @@ def test_createdb(neon_simple_env: NeonEnv): def test_dropdb(neon_simple_env: NeonEnv, test_output_dir): env = neon_simple_env env.neon_cli.create_branch("test_dropdb", "empty") - pg = env.postgres.create_start("test_dropdb") + endpoint = env.endpoints.create_start("test_dropdb") log.info("postgres is running on 'test_dropdb' branch") - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CREATE DATABASE foodb") lsn_before_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()") dboid = query_scalar(cur, "SELECT oid FROM pg_database WHERE datname='foodb';") - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("DROP DATABASE foodb") cur.execute("CHECKPOINT") @@ -76,29 +76,29 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir): env.neon_cli.create_branch( "test_before_dropdb", "test_dropdb", ancestor_start_lsn=lsn_before_drop ) - pg_before = env.postgres.create_start("test_before_dropdb") + endpoint_before = env.endpoints.create_start("test_before_dropdb") env.neon_cli.create_branch( "test_after_dropdb", "test_dropdb", ancestor_start_lsn=lsn_after_drop ) - pg_after = env.postgres.create_start("test_after_dropdb") + endpoint_after = env.endpoints.create_start("test_after_dropdb") # Test that database exists on the branch before drop - pg_before.connect(dbname="foodb").close() + endpoint_before.connect(dbname="foodb").close() # Test that database subdir exists on the branch before drop - assert pg_before.pgdata_dir - dbpath = pathlib.Path(pg_before.pgdata_dir) / "base" / str(dboid) + assert endpoint_before.pgdata_dir + dbpath = pathlib.Path(endpoint_before.pgdata_dir) / "base" / str(dboid) log.info(dbpath) assert os.path.isdir(dbpath) is True # Test that database subdir doesn't exist on the branch after drop - assert pg_after.pgdata_dir - dbpath = pathlib.Path(pg_after.pgdata_dir) / "base" / str(dboid) + assert endpoint_after.pgdata_dir + dbpath = pathlib.Path(endpoint_after.pgdata_dir) / "base" / str(dboid) log.info(dbpath) assert os.path.isdir(dbpath) is False # Check that we restore the content of the datadir correctly - check_restored_datadir_content(test_output_dir, env, pg) + check_restored_datadir_content(test_output_dir, env, endpoint) diff --git a/test_runner/regress/test_createuser.py b/test_runner/regress/test_createuser.py index c5f8246f5b..f1bc405287 100644 --- a/test_runner/regress/test_createuser.py +++ b/test_runner/regress/test_createuser.py @@ -9,10 +9,10 @@ from fixtures.utils import query_scalar def test_createuser(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_createuser", "empty") - pg = env.postgres.create_start("test_createuser") + endpoint = env.endpoints.create_start("test_createuser") log.info("postgres is running on 'test_createuser' branch") - with pg.cursor() as cur: + with endpoint.cursor() as cur: # Cause a 'relmapper' change in the original branch cur.execute("CREATE USER testuser with password %s", ("testpwd",)) @@ -22,7 +22,7 @@ def test_createuser(neon_simple_env: NeonEnv): # Create a branch env.neon_cli.create_branch("test_createuser2", "test_createuser", ancestor_start_lsn=lsn) - pg2 = env.postgres.create_start("test_createuser2") + endpoint2 = env.endpoints.create_start("test_createuser2") # Test that you can connect to new branch as a new user - assert pg2.safe_psql("select current_user", user="testuser") == [("testuser",)] + assert endpoint2.safe_psql("select current_user", user="testuser") == [("testuser",)] diff --git a/test_runner/regress/test_disk_usage_eviction.py b/test_runner/regress/test_disk_usage_eviction.py index 413d6c9d5a..31c7ef2b17 100644 --- a/test_runner/regress/test_disk_usage_eviction.py +++ b/test_runner/regress/test_disk_usage_eviction.py @@ -91,8 +91,8 @@ class EvictionEnv: This assumes that the tenant is still at the state after pbench -i. """ lsn = self.pgbench_init_lsns[tenant_id] - with self.neon_env.postgres.create_start("main", tenant_id=tenant_id, lsn=lsn) as pg: - self.pg_bin.run(["pgbench", "-S", pg.connstr()]) + with self.neon_env.endpoints.create_start("main", tenant_id=tenant_id, lsn=lsn) as endpoint: + self.pg_bin.run(["pgbench", "-S", endpoint.connstr()]) def pageserver_start_with_disk_usage_eviction( self, period, max_usage_pct, min_avail_bytes, mock_behavior @@ -168,9 +168,9 @@ def eviction_env(request, neon_env_builder: NeonEnvBuilder, pg_bin: PgBin) -> Ev } ) - with env.postgres.create_start("main", tenant_id=tenant_id) as pg: - pg_bin.run(["pgbench", "-i", f"-s{scale}", pg.connstr()]) - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + pg_bin.run(["pgbench", "-i", f"-s{scale}", endpoint.connstr()]) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) timelines.append((tenant_id, timeline_id)) diff --git a/test_runner/regress/test_fsm_truncate.py b/test_runner/regress/test_fsm_truncate.py index 4551ff97e0..80e4da8380 100644 --- a/test_runner/regress/test_fsm_truncate.py +++ b/test_runner/regress/test_fsm_truncate.py @@ -4,7 +4,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder def test_fsm_truncate(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_fsm_truncate") - pg = env.postgres.create_start("test_fsm_truncate") - pg.safe_psql( + endpoint = env.endpoints.create_start("test_fsm_truncate") + endpoint.safe_psql( "CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;" ) diff --git a/test_runner/regress/test_fullbackup.py b/test_runner/regress/test_fullbackup.py index b3d58edf6b..ece9dccf93 100644 --- a/test_runner/regress/test_fullbackup.py +++ b/test_runner/regress/test_fullbackup.py @@ -24,10 +24,10 @@ def test_fullbackup( env = neon_env_builder.init_start() env.neon_cli.create_branch("test_fullbackup") - pgmain = env.postgres.create_start("test_fullbackup") + endpoint_main = env.endpoints.create_start("test_fullbackup") log.info("postgres is running on 'test_fullbackup' branch") - with pgmain.cursor() as cur: + with endpoint_main.cursor() as cur: timeline = TimelineId(query_scalar(cur, "SHOW neon.timeline_id")) # data loading may take a while, so increase statement timeout diff --git a/test_runner/regress/test_gc_aggressive.py b/test_runner/regress/test_gc_aggressive.py index 702d94c691..d38be057d3 100644 --- a/test_runner/regress/test_gc_aggressive.py +++ b/test_runner/regress/test_gc_aggressive.py @@ -5,9 +5,9 @@ import random import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonEnv, NeonEnvBuilder, - Postgres, RemoteStorageKind, wait_for_last_flush_lsn, ) @@ -26,9 +26,9 @@ updates_performed = 0 # Run random UPDATEs on test table -async def update_table(pg: Postgres): +async def update_table(endpoint: Endpoint): global updates_performed - pg_conn = await pg.connect_async() + pg_conn = await endpoint.connect_async() while updates_performed < updates_to_perform: updates_performed += 1 @@ -52,10 +52,10 @@ async def gc(env: NeonEnv, timeline: TimelineId): # At the same time, run UPDATEs and GC -async def update_and_gc(env: NeonEnv, pg: Postgres, timeline: TimelineId): +async def update_and_gc(env: NeonEnv, endpoint: Endpoint, timeline: TimelineId): workers = [] for worker_id in range(num_connections): - workers.append(asyncio.create_task(update_table(pg))) + workers.append(asyncio.create_task(update_table(endpoint))) workers.append(asyncio.create_task(gc(env, timeline))) # await all workers @@ -72,10 +72,10 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder): neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" env = neon_env_builder.init_start() env.neon_cli.create_branch("test_gc_aggressive", "main") - pg = env.postgres.create_start("test_gc_aggressive") + endpoint = env.endpoints.create_start("test_gc_aggressive") log.info("postgres is running on test_gc_aggressive branch") - with pg.cursor() as cur: + with endpoint.cursor() as cur: timeline = TimelineId(query_scalar(cur, "SHOW neon.timeline_id")) # Create table, and insert the first 100 rows @@ -89,7 +89,7 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder): ) cur.execute("CREATE INDEX ON foo(id)") - asyncio.run(update_and_gc(env, pg, timeline)) + asyncio.run(update_and_gc(env, endpoint, timeline)) cur.execute("SELECT COUNT(*), SUM(counter) FROM foo") r = cur.fetchone() @@ -110,11 +110,11 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder, remote_storage_kind: env = neon_env_builder.init_start() env.neon_cli.create_branch("test_gc_index_upload", "main") - pg = env.postgres.create_start("test_gc_index_upload") + endpoint = env.endpoints.create_start("test_gc_index_upload") pageserver_http = env.pageserver.http_client() - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() tenant_id = TenantId(query_scalar(cur, "SHOW neon.tenant_id")) @@ -146,7 +146,7 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder, remote_storage_kind: return int(total) # Sanity check that the metric works - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) pageserver_http.timeline_checkpoint(tenant_id, timeline_id) pageserver_http.timeline_gc(tenant_id, timeline_id, 10000) before = get_num_remote_ops("index", "upload") diff --git a/test_runner/regress/test_gc_cutoff.py b/test_runner/regress/test_gc_cutoff.py index 1b98a414da..79453c1bdc 100644 --- a/test_runner/regress/test_gc_cutoff.py +++ b/test_runner/regress/test_gc_cutoff.py @@ -31,8 +31,8 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): "image_creation_threshold": "2", } ) - pg = env.postgres.create_start("main", tenant_id=tenant_id) - connstr = pg.connstr(options="-csynchronous_commit=off") + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + connstr = endpoint.connstr(options="-csynchronous_commit=off") pg_bin.run_capture(["pgbench", "-i", "-s10", connstr]) pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit")) diff --git a/test_runner/regress/test_import.py b/test_runner/regress/test_import.py index 774ed98563..137ce457bc 100644 --- a/test_runner/regress/test_import.py +++ b/test_runner/regress/test_import.py @@ -9,10 +9,10 @@ from pathlib import Path import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonEnv, NeonEnvBuilder, PgBin, - Postgres, ) from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload from fixtures.types import Lsn, TenantId, TimelineId @@ -72,7 +72,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"] end_lsn = manifest["WAL-Ranges"][0]["End-LSN"] - node_name = "import_from_vanilla" + endpoint_id = "ep-import_from_vanilla" tenant = TenantId.generate() timeline = TimelineId.generate() @@ -113,7 +113,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build "--timeline-id", str(timeline), "--node-name", - node_name, + endpoint_id, "--base-lsn", start_lsn, "--base-tarfile", @@ -153,8 +153,8 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build wait_for_upload(client, tenant, timeline, Lsn(end_lsn)) # Check it worked - pg = env.postgres.create_start(node_name, tenant_id=tenant) - assert pg.safe_psql("select count(*) from t") == [(300000,)] + endpoint = env.endpoints.create_start(endpoint_id, tenant_id=tenant) + assert endpoint.safe_psql("select count(*) from t") == [(300000,)] @pytest.mark.timeout(600) @@ -168,10 +168,10 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu ) timeline = env.neon_cli.create_branch("test_import_from_pageserver_small") - pg = env.postgres.create_start("test_import_from_pageserver_small") + endpoint = env.endpoints.create_start("test_import_from_pageserver_small") num_rows = 3000 - lsn = _generate_data(num_rows, pg) + lsn = _generate_data(num_rows, endpoint) _import(num_rows, lsn, env, pg_bin, timeline, env.pg_distrib_dir) @@ -185,14 +185,14 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne env = neon_env_builder.init_start() timeline = env.neon_cli.create_branch("test_import_from_pageserver_multisegment") - pg = env.postgres.create_start("test_import_from_pageserver_multisegment") + endpoint = env.endpoints.create_start("test_import_from_pageserver_multisegment") # For `test_import_from_pageserver_multisegment`, we want to make sure that the data # is large enough to create multi-segment files. Typically, a segment file's size is # at most 1GB. A large number of inserted rows (`30000000`) is used to increase the # DB size to above 1GB. Related: https://github.com/neondatabase/neon/issues/2097. num_rows = 30000000 - lsn = _generate_data(num_rows, pg) + lsn = _generate_data(num_rows, endpoint) logical_size = env.pageserver.http_client().timeline_detail(env.initial_tenant, timeline)[ "current_logical_size" @@ -213,12 +213,12 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne assert cnt_seg_files > 0 -def _generate_data(num_rows: int, pg: Postgres) -> Lsn: +def _generate_data(num_rows: int, endpoint: Endpoint) -> Lsn: """Generate a table with `num_rows` rows. Returns: the latest insert WAL's LSN""" - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: # data loading may take a while, so increase statement timeout cur.execute("SET statement_timeout='300s'") @@ -263,7 +263,7 @@ def _import( tar_output_file = result_basepath + ".stdout" # Stop the first pageserver instance, erase all its data - env.postgres.stop_all() + env.endpoints.stop_all() env.pageserver.stop() dir_to_clear = Path(env.repo_dir) / "tenants" @@ -278,7 +278,7 @@ def _import( tenant = TenantId.generate() # Import to pageserver - node_name = "import_from_pageserver" + endpoint_id = "ep-import_from_pageserver" client = env.pageserver.http_client() client.tenant_create(tenant) env.neon_cli.raw_cli( @@ -290,7 +290,7 @@ def _import( "--timeline-id", str(timeline), "--node-name", - node_name, + endpoint_id, "--base-lsn", str(lsn), "--base-tarfile", @@ -305,8 +305,8 @@ def _import( wait_for_upload(client, tenant, timeline, lsn) # Check it worked - pg = env.postgres.create_start(node_name, tenant_id=tenant) - assert pg.safe_psql("select count(*) from tbl") == [(expected_num_rows,)] + endpoint = env.endpoints.create_start(endpoint_id, tenant_id=tenant) + assert endpoint.safe_psql("select count(*) from tbl") == [(expected_num_rows,)] # Take another fullbackup query = f"fullbackup { tenant} {timeline} {lsn}" diff --git a/test_runner/regress/test_large_schema.py b/test_runner/regress/test_large_schema.py index f14265f6fd..ac83131ba2 100644 --- a/test_runner/regress/test_large_schema.py +++ b/test_runner/regress/test_large_schema.py @@ -15,9 +15,9 @@ from fixtures.neon_fixtures import NeonEnvBuilder def test_large_schema(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() tables = 2 # 10 is too much for debug build @@ -27,18 +27,18 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): # Restart compute. Restart is actually not strictly needed. # It is done mostly because this test originally tries to model the problem reported by Ketteq. - pg.stop() + endpoint.stop() # Kill and restart the pageserver. # env.pageserver.stop(immediate=True) # env.pageserver.start() - pg.start() + endpoint.start() retry_sleep = 0.5 max_retries = 200 retries = 0 while True: try: - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute(f"CREATE TABLE if not exists t_{i}(pk integer) partition by range (pk)") for j in range(1, partitions + 1): @@ -63,7 +63,7 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): raise break - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() for i in range(1, tables + 1): @@ -74,8 +74,8 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): cur.execute("select * from pg_depend order by refclassid, refobjid, refobjsubid") # Check layer file sizes - tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] - timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] + tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0] + timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0] timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant_id, timeline_id) for filename in os.listdir(timeline_path): if filename.startswith("00000"): diff --git a/test_runner/regress/test_layer_eviction.py b/test_runner/regress/test_layer_eviction.py index 2d07d02ce7..1ae32fb398 100644 --- a/test_runner/regress/test_layer_eviction.py +++ b/test_runner/regress/test_layer_eviction.py @@ -27,13 +27,13 @@ def test_basic_eviction( env = neon_env_builder.init_start() client = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) # Create a number of layers in the tenant - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE foo (t text)") cur.execute( """ @@ -172,15 +172,15 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder): env.initial_tenant = tenant_id # update_and_gc relies on this ps_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") log.info("fill with data, creating delta & image layers, some of which are GC'able after") # no particular reason to create the layers like this, but we are sure # not to hit the image_creation_threshold here. - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("create table a (id bigserial primary key, some_value bigint not null)") cur.execute("insert into a(some_value) select i from generate_series(1, 10000) s(i)") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) ps_http.timeline_checkpoint(tenant_id, timeline_id) # Create delta layers, then turn them into image layers. @@ -191,19 +191,19 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder): for i in range(0, 2): for j in range(0, 3): # create a minimal amount of "delta difficulty" for this table - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("update a set some_value = -some_value + %s", (j,)) - with pg.cursor() as cur: + with endpoint.cursor() as cur: # vacuuming should aid to reuse keys, though it's not really important # with image_creation_threshold=1 which we will use on the last compaction cur.execute("vacuum") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) if i == 1 and j == 2 and k == 1: # last iteration; stop before checkpoint to avoid leaving an inmemory layer - pg.stop_and_destroy() + endpoint.stop_and_destroy() ps_http.timeline_checkpoint(tenant_id, timeline_id) diff --git a/test_runner/regress/test_layer_writers_fail.py b/test_runner/regress/test_layer_writers_fail.py index e8ba0e7d91..d2d85a43e0 100644 --- a/test_runner/regress/test_layer_writers_fail.py +++ b/test_runner/regress/test_layer_writers_fail.py @@ -20,7 +20,7 @@ def test_image_layer_writer_fail_before_finish(neon_simple_env: NeonEnv): } ) - pg = env.postgres.create_start("main", tenant_id=tenant_id) + pg = env.endpoints.create_start("main", tenant_id=tenant_id) pg.safe_psql_many( [ "CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)", @@ -64,8 +64,8 @@ def test_delta_layer_writer_fail_before_finish(neon_simple_env: NeonEnv): } ) - pg = env.postgres.create_start("main", tenant_id=tenant_id) - pg.safe_psql_many( + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)", """INSERT INTO foo diff --git a/test_runner/regress/test_lsn_mapping.py b/test_runner/regress/test_lsn_mapping.py index c5a49a6704..8ccfc21cf7 100644 --- a/test_runner/regress/test_lsn_mapping.py +++ b/test_runner/regress/test_lsn_mapping.py @@ -12,10 +12,10 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping") - pgmain = env.postgres.create_start("test_lsn_mapping") + endpoint_main = env.endpoints.create_start("test_lsn_mapping") log.info("postgres is running on 'test_lsn_mapping' branch") - cur = pgmain.connect().cursor() + cur = endpoint_main.connect().cursor() # Create table, and insert rows, each in a separate transaction # Disable synchronous_commit to make this initialization go faster. # @@ -35,7 +35,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): cur.execute("INSERT INTO foo VALUES (-1)") # Wait until WAL is received by pageserver - wait_for_last_flush_lsn(env, pgmain, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id) with env.pageserver.http_client() as client: # Check edge cases: timestamp in the future @@ -61,9 +61,9 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): # Call get_lsn_by_timestamp to get the LSN # Launch a new read-only node at that LSN, and check that only the rows # that were supposed to be committed at that point in time are visible. - pg_here = env.postgres.create_start( - branch_name="test_lsn_mapping", node_name="test_lsn_mapping_read", lsn=lsn + endpoint_here = env.endpoints.create_start( + branch_name="test_lsn_mapping", endpoint_id="ep-lsn_mapping_read", lsn=lsn ) - assert pg_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i + assert endpoint_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i - pg_here.stop_and_destroy() + endpoint_here.stop_and_destroy() diff --git a/test_runner/regress/test_metric_collection.py b/test_runner/regress/test_metric_collection.py index a33af9a3b2..ecbce1f8f7 100644 --- a/test_runner/regress/test_metric_collection.py +++ b/test_runner/regress/test_metric_collection.py @@ -123,9 +123,9 @@ def test_metric_collection( # before pageserver, pageserver log might contain such errors in the end. env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*") env.neon_cli.create_branch("test_metric_collection") - pg = env.postgres.create_start("test_metric_collection") + endpoint = env.endpoints.create_start("test_metric_collection") - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() tenant_id = TenantId(query_scalar(cur, "SHOW neon.tenant_id")) @@ -158,7 +158,7 @@ def test_metric_collection( # upload some data to remote storage if remote_storage_kind == RemoteStorageKind.LOCAL_FS: - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) pageserver_http = env.pageserver.http_client() pageserver_http.timeline_checkpoint(tenant_id, timeline_id) pageserver_http.timeline_gc(tenant_id, timeline_id, 10000) diff --git a/test_runner/regress/test_multixact.py b/test_runner/regress/test_multixact.py index 635beb16b7..fe50969a0a 100644 --- a/test_runner/regress/test_multixact.py +++ b/test_runner/regress/test_multixact.py @@ -12,10 +12,10 @@ from fixtures.utils import query_scalar def test_multixact(neon_simple_env: NeonEnv, test_output_dir): env = neon_simple_env env.neon_cli.create_branch("test_multixact", "empty") - pg = env.postgres.create_start("test_multixact") + endpoint = env.endpoints.create_start("test_multixact") log.info("postgres is running on 'test_multixact' branch") - cur = pg.connect().cursor() + cur = endpoint.connect().cursor() cur.execute( """ CREATE TABLE t1(i int primary key); @@ -32,7 +32,7 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir): connections = [] for i in range(nclients): # Do not turn on autocommit. We want to hold the key-share locks. - conn = pg.connect(autocommit=False) + conn = endpoint.connect(autocommit=False) connections.append(conn) # On each iteration, we commit the previous transaction on a connection, @@ -65,10 +65,10 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir): # Branch at this point env.neon_cli.create_branch("test_multixact_new", "test_multixact", ancestor_start_lsn=lsn) - pg_new = env.postgres.create_start("test_multixact_new") + endpoint_new = env.endpoints.create_start("test_multixact_new") log.info("postgres is running on 'test_multixact_new' branch") - next_multixact_id_new = pg_new.safe_psql( + next_multixact_id_new = endpoint_new.safe_psql( "SELECT next_multixact_id FROM pg_control_checkpoint()" )[0][0] @@ -76,4 +76,4 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir): assert next_multixact_id_new == next_multixact_id # Check that we can restore the content of the datadir correctly - check_restored_datadir_content(test_output_dir, env, pg) + check_restored_datadir_content(test_output_dir, env, endpoint) diff --git a/test_runner/regress/test_neon_local_cli.py b/test_runner/regress/test_neon_local_cli.py index bd0f550ba5..f6629c54f9 100644 --- a/test_runner/regress/test_neon_local_cli.py +++ b/test_runner/regress/test_neon_local_cli.py @@ -9,9 +9,11 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por try: env.neon_cli.start() env.neon_cli.create_tenant(tenant_id=env.initial_tenant, set_default=True) - env.neon_cli.pg_start(node_name="main", port=port_distributor.get_port()) + env.neon_cli.endpoint_start(endpoint_id="ep-main", port=port_distributor.get_port()) env.neon_cli.create_branch(new_branch_name="migration_check") - env.neon_cli.pg_start(node_name="migration_check", port=port_distributor.get_port()) + env.neon_cli.endpoint_start( + endpoint_id="ep-migration_check", port=port_distributor.get_port() + ) finally: env.neon_cli.stop() diff --git a/test_runner/regress/test_next_xid.py b/test_runner/regress/test_next_xid.py index 698ea0e1d3..6e94e15227 100644 --- a/test_runner/regress/test_next_xid.py +++ b/test_runner/regress/test_next_xid.py @@ -8,9 +8,9 @@ from fixtures.neon_fixtures import NeonEnvBuilder def test_next_xid(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute("CREATE TABLE t(x integer)") @@ -19,17 +19,17 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder): print(f"iteration {i} / {iterations}") # Kill and restart the pageserver. - pg.stop() + endpoint.stop() env.pageserver.stop(immediate=True) env.pageserver.start() - pg.start() + endpoint.start() retry_sleep = 0.5 max_retries = 200 retries = 0 while True: try: - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute(f"INSERT INTO t values({i})") conn.close() @@ -48,7 +48,7 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder): raise break - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute("SELECT count(*) FROM t") assert cur.fetchone() == (iterations,) diff --git a/test_runner/regress/test_normal_work.py b/test_runner/regress/test_normal_work.py index aa37a2411c..50de99adb5 100644 --- a/test_runner/regress/test_normal_work.py +++ b/test_runner/regress/test_normal_work.py @@ -6,9 +6,9 @@ from fixtures.pageserver.http import PageserverHttpClient def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient): tenant_id, timeline_id = env.neon_cli.create_tenant() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) # we rely upon autocommit after each statement - res_1 = pg.safe_psql_many( + res_1 = endpoint.safe_psql_many( queries=[ "CREATE TABLE t(key int primary key, value text)", "INSERT INTO t SELECT generate_series(1,100000), 'payload'", @@ -19,14 +19,14 @@ def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient): assert res_1[-1][0] == (5000050000,) # TODO check detach on live instance log.info("stopping compute") - pg.stop() + endpoint.stop() log.info("compute stopped") - pg.start() - res_2 = pg.safe_psql("SELECT sum(key) FROM t") + endpoint.start() + res_2 = endpoint.safe_psql("SELECT sum(key) FROM t") assert res_2[0] == (5000050000,) - pg.stop() + endpoint.stop() pageserver_http.tenant_detach(tenant_id) diff --git a/test_runner/regress/test_old_request_lsn.py b/test_runner/regress/test_old_request_lsn.py index 9885a811e1..814b9f3de0 100644 --- a/test_runner/regress/test_old_request_lsn.py +++ b/test_runner/regress/test_old_request_lsn.py @@ -19,10 +19,10 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder): neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" env = neon_env_builder.init_start() env.neon_cli.create_branch("test_old_request_lsn", "main") - pg = env.postgres.create_start("test_old_request_lsn") + endpoint = env.endpoints.create_start("test_old_request_lsn") log.info("postgres is running on test_old_request_lsn branch") - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() # Get the timeline ID of our branch. We need it for the 'do_gc' command diff --git a/test_runner/regress/test_ondemand_download.py b/test_runner/regress/test_ondemand_download.py index 07410b64df..cb08b014fd 100644 --- a/test_runner/regress/test_ondemand_download.py +++ b/test_runner/regress/test_ondemand_download.py @@ -73,17 +73,17 @@ def test_ondemand_download_large_rel( ) env.initial_tenant = tenant - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") client = env.pageserver.http_client() - tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] - timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] + tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0] + timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0] # We want to make sure that the data is large enough that the keyspace is partitioned. num_rows = 1000000 - with pg.cursor() as cur: + with endpoint.cursor() as cur: # data loading may take a while, so increase statement timeout cur.execute("SET statement_timeout='300s'") cur.execute( @@ -106,7 +106,7 @@ def test_ondemand_download_large_rel( log.info("uploads have finished") ##### Stop the first pageserver instance, erase all its data - pg.stop() + endpoint.stop() env.pageserver.stop() # remove all the layer files @@ -117,7 +117,7 @@ def test_ondemand_download_large_rel( ##### Second start, restore the data and ensure it's the same env.pageserver.start() - pg.start() + endpoint.start() before_downloads = get_num_downloaded_layers(client, tenant_id, timeline_id) # Probe in the middle of the table. There's a high chance that the beginning @@ -125,7 +125,7 @@ def test_ondemand_download_large_rel( # from other tables, and with the entry that stores the size of the # relation, so they are likely already downloaded. But the middle of the # table should not have been needed by anything yet. - with pg.cursor() as cur: + with endpoint.cursor() as cur: assert query_scalar(cur, "select count(*) from tbl where id = 500000") == 1 after_downloads = get_num_downloaded_layers(client, tenant_id, timeline_id) @@ -167,17 +167,17 @@ def test_ondemand_download_timetravel( ) env.initial_tenant = tenant - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") client = env.pageserver.http_client() - tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] - timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] + tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0] + timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0] lsns = [] table_len = 10000 - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f""" CREATE TABLE testtab(id serial primary key, checkpoint_number int, data text); @@ -192,7 +192,7 @@ def test_ondemand_download_timetravel( lsns.append((0, current_lsn)) for checkpoint_number in range(1, 20): - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute(f"UPDATE testtab SET checkpoint_number = {checkpoint_number}") current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) lsns.append((checkpoint_number, current_lsn)) @@ -204,7 +204,7 @@ def test_ondemand_download_timetravel( client.timeline_checkpoint(tenant_id, timeline_id) ##### Stop the first pageserver instance, erase all its data - env.postgres.stop_all() + env.endpoints.stop_all() # wait until pageserver has successfully uploaded all the data to remote storage wait_for_sk_commit_lsn_to_reach_remote_storage( @@ -251,10 +251,10 @@ def test_ondemand_download_timetravel( num_layers_downloaded = [0] resident_size = [get_resident_physical_size()] for checkpoint_number, lsn in lsns: - pg_old = env.postgres.create_start( - branch_name="main", node_name=f"test_old_lsn_{checkpoint_number}", lsn=lsn + endpoint_old = env.endpoints.create_start( + branch_name="main", endpoint_id=f"ep-old_lsn_{checkpoint_number}", lsn=lsn ) - with pg_old.cursor() as cur: + with endpoint_old.cursor() as cur: # assert query_scalar(cur, f"select count(*) from testtab where checkpoint_number={checkpoint_number}") == 100000 assert ( query_scalar( @@ -331,15 +331,15 @@ def test_download_remote_layers_api( ) env.initial_tenant = tenant - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") client = env.pageserver.http_client() - tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] - timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] + tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0] + timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0] table_len = 10000 - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f""" CREATE TABLE testtab(id serial primary key, checkpoint_number int, data text); @@ -347,7 +347,7 @@ def test_download_remote_layers_api( """ ) - env.postgres.stop_all() + env.endpoints.stop_all() wait_for_sk_commit_lsn_to_reach_remote_storage( tenant_id, timeline_id, env.safekeepers, env.pageserver @@ -463,8 +463,8 @@ def test_download_remote_layers_api( sk.start() # ensure that all the data is back - pg_old = env.postgres.create_start(branch_name="main") - with pg_old.cursor() as cur: + endpoint_old = env.endpoints.create_start(branch_name="main") + with endpoint_old.cursor() as cur: assert query_scalar(cur, "select count(*) from testtab") == table_len @@ -513,17 +513,17 @@ def test_compaction_downloads_on_demand_without_image_creation( env.initial_tenant = tenant_id pageserver_http = env.pageserver.http_client() - with env.postgres.create_start("main") as pg: + with env.endpoints.create_start("main") as endpoint: # no particular reason to create the layers like this, but we are sure # not to hit the image_creation_threshold here. - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("create table a as select id::bigint from generate_series(1, 204800) s(id)") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) pageserver_http.timeline_checkpoint(tenant_id, timeline_id) - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("update a set id = -id") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) pageserver_http.timeline_checkpoint(tenant_id, timeline_id) layers = pageserver_http.layer_map_info(tenant_id, timeline_id) @@ -589,32 +589,32 @@ def test_compaction_downloads_on_demand_with_image_creation( env.initial_tenant = tenant_id pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") # no particular reason to create the layers like this, but we are sure # not to hit the image_creation_threshold here. - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("create table a (id bigserial primary key, some_value bigint not null)") cur.execute("insert into a(some_value) select i from generate_series(1, 10000) s(i)") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) pageserver_http.timeline_checkpoint(tenant_id, timeline_id) for i in range(0, 2): for j in range(0, 3): # create a minimal amount of "delta difficulty" for this table - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("update a set some_value = -some_value + %s", (j,)) - with pg.cursor() as cur: + with endpoint.cursor() as cur: # vacuuming should aid to reuse keys, though it's not really important # with image_creation_threshold=1 which we will use on the last compaction cur.execute("vacuum") - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) if i == 1 and j == 2: # last iteration; stop before checkpoint to avoid leaving an inmemory layer - pg.stop_and_destroy() + endpoint.stop_and_destroy() pageserver_http.timeline_checkpoint(tenant_id, timeline_id) diff --git a/test_runner/regress/test_pageserver_api.py b/test_runner/regress/test_pageserver_api.py index 5b05989ae4..e86cd18f58 100644 --- a/test_runner/regress/test_pageserver_api.py +++ b/test_runner/regress/test_pageserver_api.py @@ -150,7 +150,7 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv): env = neon_simple_env with env.pageserver.http_client() as client: tenant_id, timeline_id = env.neon_cli.create_tenant() - pg = env.postgres.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id) + endpoint = env.endpoints.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id) # Wait to make sure that we get a latest WAL receiver data. # We need to wait here because it's possible that we don't have access to @@ -163,7 +163,7 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv): ) # Make a DB modification then expect getting a new WAL receiver's data. - pg.safe_psql("CREATE TABLE t(key int primary key, value text)") + endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)") wait_until( number_of_iterations=5, interval=1, diff --git a/test_runner/regress/test_pageserver_catchup.py b/test_runner/regress/test_pageserver_catchup.py index cba3203591..c16cbcb4ba 100644 --- a/test_runner/regress/test_pageserver_catchup.py +++ b/test_runner/regress/test_pageserver_catchup.py @@ -11,11 +11,11 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder) env.neon_cli.create_branch("test_pageserver_catchup_while_compute_down") # Make shared_buffers large to ensure we won't query pageserver while it is down. - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "test_pageserver_catchup_while_compute_down", config_lines=["shared_buffers=512MB"] ) - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() # Create table, and insert some rows. @@ -59,10 +59,10 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder) env.safekeepers[2].start() # restart compute node - pg.stop_and_destroy().create_start("test_pageserver_catchup_while_compute_down") + endpoint.stop_and_destroy().create_start("test_pageserver_catchup_while_compute_down") # Ensure that basebackup went correct and pageserver returned all data - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() cur.execute("SELECT count(*) FROM foo") diff --git a/test_runner/regress/test_pageserver_restart.py b/test_runner/regress/test_pageserver_restart.py index 77db729880..6da5503fb1 100644 --- a/test_runner/regress/test_pageserver_restart.py +++ b/test_runner/regress/test_pageserver_restart.py @@ -11,9 +11,9 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_pageserver_restart") - pg = env.postgres.create_start("test_pageserver_restart") + endpoint = env.endpoints.create_start("test_pageserver_restart") - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() # Create table, and insert some rows. Make it big enough that it doesn't fit in @@ -84,13 +84,13 @@ def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder): } ) env.neon_cli.create_timeline("test_pageserver_chaos", tenant_id=tenant) - pg = env.postgres.create_start("test_pageserver_chaos", tenant_id=tenant) + endpoint = env.endpoints.create_start("test_pageserver_chaos", tenant_id=tenant) # Create table, and insert some rows. Make it big enough that it doesn't fit in # shared_buffers, otherwise the SELECT after restart will just return answer # from shared_buffers without hitting the page server, which defeats the point # of this test. - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE TABLE foo (id int, t text, updates int)") cur.execute("CREATE INDEX ON foo (id)") @@ -116,12 +116,12 @@ def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder): # Update the whole table, then immediately kill and restart the pageserver for i in range(1, 15): - pg.safe_psql("UPDATE foo set updates = updates + 1") + endpoint.safe_psql("UPDATE foo set updates = updates + 1") # This kills the pageserver immediately, to simulate a crash env.pageserver.stop(immediate=True) env.pageserver.start() # Check that all the updates are visible - num_updates = pg.safe_psql("SELECT sum(updates) FROM foo")[0][0] + num_updates = endpoint.safe_psql("SELECT sum(updates) FROM foo")[0][0] assert num_updates == i * 100000 diff --git a/test_runner/regress/test_pageserver_restarts_under_workload.py b/test_runner/regress/test_pageserver_restarts_under_workload.py index eab8b112f0..bc3f3f2be4 100644 --- a/test_runner/regress/test_pageserver_restarts_under_workload.py +++ b/test_runner/regress/test_pageserver_restarts_under_workload.py @@ -5,7 +5,7 @@ import threading import time from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres +from fixtures.neon_fixtures import NeonEnv, PgBin # Test restarting page server, while safekeeper and compute node keep @@ -13,7 +13,7 @@ from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgBin): env = neon_simple_env env.neon_cli.create_branch("test_pageserver_restarts") - pg = env.postgres.create_start("test_pageserver_restarts") + endpoint = env.endpoints.create_start("test_pageserver_restarts") n_restarts = 10 scale = 10 @@ -23,13 +23,12 @@ def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgB r".*Gc failed, retrying in \S+: Cannot run GC iteration on inactive tenant" ) - def run_pgbench(pg: Postgres): - connstr = pg.connstr() + def run_pgbench(connstr: str): log.info(f"Start a pgbench workload on pg {connstr}") pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr]) pg_bin.run_capture(["pgbench", f"-T{n_restarts}", connstr]) - thread = threading.Thread(target=run_pgbench, args=(pg,), daemon=True) + thread = threading.Thread(target=run_pgbench, args=(endpoint.connstr(),), daemon=True) thread.start() for i in range(n_restarts): diff --git a/test_runner/regress/test_parallel_copy.py b/test_runner/regress/test_parallel_copy.py index 59f19026cc..577bbc21bf 100644 --- a/test_runner/regress/test_parallel_copy.py +++ b/test_runner/regress/test_parallel_copy.py @@ -2,7 +2,7 @@ import asyncio from io import BytesIO from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, Postgres +from fixtures.neon_fixtures import Endpoint, NeonEnv async def repeat_bytes(buf, repetitions: int): @@ -10,7 +10,7 @@ async def repeat_bytes(buf, repetitions: int): yield buf -async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str): +async def copy_test_data_to_table(endpoint: Endpoint, worker_id: int, table_name: str): buf = BytesIO() for i in range(1000): buf.write( @@ -20,7 +20,7 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str) copy_input = repeat_bytes(buf.read(), 5000) - pg_conn = await pg.connect_async() + pg_conn = await endpoint.connect_async() # PgProtocol.connect_async sets statement_timeout to 2 minutes. # That's not enough for this test, on a slow system in debug mode. @@ -29,10 +29,10 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str) await pg_conn.copy_to_table(table_name, source=copy_input) -async def parallel_load_same_table(pg: Postgres, n_parallel: int): +async def parallel_load_same_table(endpoint: Endpoint, n_parallel: int): workers = [] for worker_id in range(n_parallel): - worker = copy_test_data_to_table(pg, worker_id, "copytest") + worker = copy_test_data_to_table(endpoint, worker_id, "copytest") workers.append(asyncio.create_task(worker)) # await all workers @@ -43,13 +43,13 @@ async def parallel_load_same_table(pg: Postgres, n_parallel: int): def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5): env = neon_simple_env env.neon_cli.create_branch("test_parallel_copy", "empty") - pg = env.postgres.create_start("test_parallel_copy") + endpoint = env.endpoints.create_start("test_parallel_copy") log.info("postgres is running on 'test_parallel_copy' branch") # Create test table - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute("CREATE TABLE copytest (i int, t text)") # Run COPY TO to load the table with parallel connections. - asyncio.run(parallel_load_same_table(pg, n_parallel)) + asyncio.run(parallel_load_same_table(endpoint, n_parallel)) diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py index 5eb1ebb3de..64625ea4ee 100644 --- a/test_runner/regress/test_pg_regress.py +++ b/test_runner/regress/test_pg_regress.py @@ -24,8 +24,8 @@ def test_pg_regress( env.neon_cli.create_branch("test_pg_regress", "empty") # Connect to postgres and create a database called "regression". - pg = env.postgres.create_start("test_pg_regress") - pg.safe_psql("CREATE DATABASE regression") + endpoint = env.endpoints.create_start("test_pg_regress") + endpoint.safe_psql("CREATE DATABASE regression") # Create some local directories for pg_regress to run in. runpath = test_output_dir / "regress" @@ -49,9 +49,9 @@ def test_pg_regress( ] env_vars = { - "PGPORT": str(pg.default_options["port"]), - "PGUSER": pg.default_options["user"], - "PGHOST": pg.default_options["host"], + "PGPORT": str(endpoint.default_options["port"]), + "PGUSER": endpoint.default_options["user"], + "PGHOST": endpoint.default_options["host"], } # Run the command. @@ -61,10 +61,10 @@ def test_pg_regress( pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) # checkpoint one more time to ensure that the lsn we get is the latest one - pg.safe_psql("CHECKPOINT") + endpoint.safe_psql("CHECKPOINT") # Check that we restore the content of the datadir correctly - check_restored_datadir_content(test_output_dir, env, pg) + check_restored_datadir_content(test_output_dir, env, endpoint) # Run the PostgreSQL "isolation" tests, in src/test/isolation. @@ -85,8 +85,10 @@ def test_isolation( env.neon_cli.create_branch("test_isolation", "empty") # Connect to postgres and create a database called "regression". # isolation tests use prepared transactions, so enable them - pg = env.postgres.create_start("test_isolation", config_lines=["max_prepared_transactions=100"]) - pg.safe_psql("CREATE DATABASE isolation_regression") + endpoint = env.endpoints.create_start( + "test_isolation", config_lines=["max_prepared_transactions=100"] + ) + endpoint.safe_psql("CREATE DATABASE isolation_regression") # Create some local directories for pg_isolation_regress to run in. runpath = test_output_dir / "regress" @@ -109,9 +111,9 @@ def test_isolation( ] env_vars = { - "PGPORT": str(pg.default_options["port"]), - "PGUSER": pg.default_options["user"], - "PGHOST": pg.default_options["host"], + "PGPORT": str(endpoint.default_options["port"]), + "PGUSER": endpoint.default_options["user"], + "PGHOST": endpoint.default_options["host"], } # Run the command. @@ -135,8 +137,8 @@ def test_sql_regress( env.neon_cli.create_branch("test_sql_regress", "empty") # Connect to postgres and create a database called "regression". - pg = env.postgres.create_start("test_sql_regress") - pg.safe_psql("CREATE DATABASE regression") + endpoint = env.endpoints.create_start("test_sql_regress") + endpoint.safe_psql("CREATE DATABASE regression") # Create some local directories for pg_regress to run in. runpath = test_output_dir / "regress" @@ -160,9 +162,9 @@ def test_sql_regress( ] env_vars = { - "PGPORT": str(pg.default_options["port"]), - "PGUSER": pg.default_options["user"], - "PGHOST": pg.default_options["host"], + "PGPORT": str(endpoint.default_options["port"]), + "PGUSER": endpoint.default_options["user"], + "PGHOST": endpoint.default_options["host"], } # Run the command. @@ -172,8 +174,8 @@ def test_sql_regress( pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) # checkpoint one more time to ensure that the lsn we get is the latest one - pg.safe_psql("CHECKPOINT") - pg.safe_psql("select pg_current_wal_insert_lsn()")[0][0] + endpoint.safe_psql("CHECKPOINT") + endpoint.safe_psql("select pg_current_wal_insert_lsn()")[0][0] # Check that we restore the content of the datadir correctly - check_restored_datadir_content(test_output_dir, env, pg) + check_restored_datadir_content(test_output_dir, env, endpoint) diff --git a/test_runner/regress/test_pitr_gc.py b/test_runner/regress/test_pitr_gc.py index fe4fbc0927..c2ea5b332a 100644 --- a/test_runner/regress/test_pitr_gc.py +++ b/test_runner/regress/test_pitr_gc.py @@ -15,10 +15,10 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder): ) env = neon_env_builder.init_start() - pgmain = env.postgres.create_start("main") + endpoint_main = env.endpoints.create_start("main") log.info("postgres is running on 'main' branch") - main_pg_conn = pgmain.connect() + main_pg_conn = endpoint_main.connect() main_cur = main_pg_conn.cursor() timeline = TimelineId(query_scalar(main_cur, "SHOW neon.timeline_id")) @@ -62,10 +62,10 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder): # It must have been preserved by PITR setting env.neon_cli.create_branch("test_pitr_gc_hundred", "main", ancestor_start_lsn=lsn_a) - pg_hundred = env.postgres.create_start("test_pitr_gc_hundred") + endpoint_hundred = env.endpoints.create_start("test_pitr_gc_hundred") # On the 'hundred' branch, we should see only 100 rows - hundred_pg_conn = pg_hundred.connect() + hundred_pg_conn = endpoint_hundred.connect() hundred_cur = hundred_pg_conn.cursor() hundred_cur.execute("SELECT count(*) FROM foo") assert hundred_cur.fetchone() == (100,) diff --git a/test_runner/regress/test_read_trace.py b/test_runner/regress/test_read_trace.py index be0eb76ccd..9ebe53fc17 100644 --- a/test_runner/regress/test_read_trace.py +++ b/test_runner/regress/test_read_trace.py @@ -21,22 +21,22 @@ def test_read_request_tracing(neon_env_builder: NeonEnvBuilder): ) timeline = env.neon_cli.create_timeline("test_trace_replay", tenant_id=tenant) - pg = env.postgres.create_start("test_trace_replay", "main", tenant) + endpoint = env.endpoints.create_start("test_trace_replay", "main", tenant) - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("create table t (i integer);") cur.execute(f"insert into t values (generate_series(1,{10000}));") cur.execute("select count(*) from t;") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) # wait until pageserver receives that data pageserver_http = env.pageserver.http_client() wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, current_lsn) - # Stop pg so we drop the connection and flush the traces - pg.stop() + # Stop postgres so we drop the connection and flush the traces + endpoint.stop() trace_path = env.repo_dir / "traces" / str(tenant) / str(timeline) assert trace_path.exists() diff --git a/test_runner/regress/test_read_validation.py b/test_runner/regress/test_read_validation.py index 47135dc56c..47a06359bb 100644 --- a/test_runner/regress/test_read_validation.py +++ b/test_runner/regress/test_read_validation.py @@ -17,10 +17,10 @@ def test_read_validation(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_read_validation", "empty") - pg = env.postgres.create_start("test_read_validation") + endpoint = env.endpoints.create_start("test_read_validation") log.info("postgres is running on 'test_read_validation' branch") - with closing(pg.connect()) as con: + with closing(endpoint.connect()) as con: with con.cursor() as c: for e in extensions: c.execute("create extension if not exists {};".format(e)) @@ -144,10 +144,10 @@ def test_read_validation_neg(neon_simple_env: NeonEnv): env.pageserver.allowed_errors.append(".*invalid LSN\\(0\\) in request.*") - pg = env.postgres.create_start("test_read_validation_neg") + endpoint = env.endpoints.create_start("test_read_validation_neg") log.info("postgres is running on 'test_read_validation_neg' branch") - with closing(pg.connect()) as con: + with closing(endpoint.connect()) as con: with con.cursor() as c: for e in extensions: c.execute("create extension if not exists {};".format(e)) diff --git a/test_runner/regress/test_readonly_node.py b/test_runner/regress/test_readonly_node.py index 69d6e427ce..2d641e36a7 100644 --- a/test_runner/regress/test_readonly_node.py +++ b/test_runner/regress/test_readonly_node.py @@ -15,12 +15,12 @@ from fixtures.utils import query_scalar def test_readonly_node(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_readonly_node", "empty") - pgmain = env.postgres.create_start("test_readonly_node") + endpoint_main = env.endpoints.create_start("test_readonly_node") log.info("postgres is running on 'test_readonly_node' branch") env.pageserver.allowed_errors.append(".*basebackup .* failed: invalid basebackup lsn.*") - main_pg_conn = pgmain.connect() + main_pg_conn = endpoint_main.connect() main_cur = main_pg_conn.cursor() # Create table, and insert the first 100 rows @@ -61,23 +61,23 @@ def test_readonly_node(neon_simple_env: NeonEnv): log.info("LSN after 400100 rows: " + lsn_c) # Create first read-only node at the point where only 100 rows were inserted - pg_hundred = env.postgres.create_start( - branch_name="test_readonly_node", node_name="test_readonly_node_hundred", lsn=lsn_a + endpoint_hundred = env.endpoints.create_start( + branch_name="test_readonly_node", endpoint_id="ep-readonly_node_hundred", lsn=lsn_a ) # And another at the point where 200100 rows were inserted - pg_more = env.postgres.create_start( - branch_name="test_readonly_node", node_name="test_readonly_node_more", lsn=lsn_b + endpoint_more = env.endpoints.create_start( + branch_name="test_readonly_node", endpoint_id="ep-readonly_node_more", lsn=lsn_b ) # On the 'hundred' node, we should see only 100 rows - hundred_pg_conn = pg_hundred.connect() + hundred_pg_conn = endpoint_hundred.connect() hundred_cur = hundred_pg_conn.cursor() hundred_cur.execute("SELECT count(*) FROM foo") assert hundred_cur.fetchone() == (100,) # On the 'more' node, we should see 100200 rows - more_pg_conn = pg_more.connect() + more_pg_conn = endpoint_more.connect() more_cur = more_pg_conn.cursor() more_cur.execute("SELECT count(*) FROM foo") assert more_cur.fetchone() == (200100,) @@ -87,21 +87,21 @@ def test_readonly_node(neon_simple_env: NeonEnv): assert main_cur.fetchone() == (400100,) # Check creating a node at segment boundary - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( branch_name="test_readonly_node", - node_name="test_branch_segment_boundary", + endpoint_id="ep-branch_segment_boundary", lsn=Lsn("0/3000000"), ) - cur = pg.connect().cursor() + cur = endpoint.connect().cursor() cur.execute("SELECT 1") assert cur.fetchone() == (1,) # Create node at pre-initdb lsn with pytest.raises(Exception, match="invalid basebackup lsn"): # compute node startup with invalid LSN should fail - env.postgres.create_start( + env.endpoints.create_start( branch_name="test_readonly_node", - node_name="test_readonly_node_preinitdb", + endpoint_id="ep-readonly_node_preinitdb", lsn=Lsn("0/42"), ) @@ -111,16 +111,16 @@ def test_timetravel(neon_simple_env: NeonEnv): env = neon_simple_env pageserver_http_client = env.pageserver.http_client() env.neon_cli.create_branch("test_timetravel", "empty") - pg = env.postgres.create_start("test_timetravel") + endpoint = env.endpoints.create_start("test_timetravel") client = env.pageserver.http_client() - tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] - timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] + tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0] + timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0] lsns = [] - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( """ CREATE TABLE testtab(id serial primary key, iteration int, data text); @@ -131,7 +131,7 @@ def test_timetravel(neon_simple_env: NeonEnv): lsns.append((0, current_lsn)) for i in range(1, 5): - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute(f"UPDATE testtab SET iteration = {i}") current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) lsns.append((i, current_lsn)) @@ -143,14 +143,14 @@ def test_timetravel(neon_simple_env: NeonEnv): pageserver_http_client.timeline_checkpoint(tenant_id, timeline_id) ##### Restart pageserver - env.postgres.stop_all() + env.endpoints.stop_all() env.pageserver.stop() env.pageserver.start() for i, lsn in lsns: - pg_old = env.postgres.create_start( - branch_name="test_timetravel", node_name=f"test_old_lsn_{i}", lsn=lsn + endpoint_old = env.endpoints.create_start( + branch_name="test_timetravel", endpoint_id=f"ep-old_lsn_{i}", lsn=lsn ) - with pg_old.cursor() as cur: + with endpoint_old.cursor() as cur: assert query_scalar(cur, f"select count(*) from testtab where iteration={i}") == 100000 assert query_scalar(cur, f"select count(*) from testtab where iteration<>{i}") == 0 diff --git a/test_runner/regress/test_recovery.py b/test_runner/regress/test_recovery.py index 09644eaaa1..76e97a35a4 100644 --- a/test_runner/regress/test_recovery.py +++ b/test_runner/regress/test_recovery.py @@ -22,10 +22,10 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): # Create a branch for us env.neon_cli.create_branch("test_pageserver_recovery", "main") - pg = env.postgres.create_start("test_pageserver_recovery") + endpoint = env.endpoints.create_start("test_pageserver_recovery") log.info("postgres is running on 'test_pageserver_recovery' branch") - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: with env.pageserver.http_client() as pageserver_http: # Create and initialize test table @@ -54,7 +54,7 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): env.pageserver.stop() env.pageserver.start() - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("select count(*) from foo") assert cur.fetchone() == (100000,) diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 222305f006..6de5f7db04 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -87,17 +87,17 @@ def test_remote_storage_backup_and_restore( env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*") pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") client = env.pageserver.http_client() - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) checkpoint_numbers = range(1, 3) for checkpoint_number in checkpoint_numbers: - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f""" CREATE TABLE t{checkpoint_number}(id int primary key, data text); @@ -126,7 +126,7 @@ def test_remote_storage_backup_and_restore( ) ##### Stop the first pageserver instance, erase all its data - env.postgres.stop_all() + env.endpoints.stop_all() env.pageserver.stop() dir_to_clear = Path(env.repo_dir) / "tenants" @@ -187,8 +187,8 @@ def test_remote_storage_backup_and_restore( ), "current db Lsn should should not be less than the one stored on remote storage" log.info("select some data, this will cause layers to be downloaded") - pg = env.postgres.create_start("main") - with pg.cursor() as cur: + endpoint = env.endpoints.create_start("main") + with endpoint.cursor() as cur: for checkpoint_number in checkpoint_numbers: assert ( query_scalar(cur, f"SELECT data FROM t{checkpoint_number} WHERE id = {data_id};") @@ -238,9 +238,9 @@ def test_remote_storage_upload_queue_retries( client = env.pageserver.http_client() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) - pg.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)") + endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)") def configure_storage_sync_failpoints(action): client.configure_failpoints( @@ -253,7 +253,7 @@ def test_remote_storage_upload_queue_retries( def overwrite_data_and_wait_for_it_to_arrive_at_pageserver(data): # create initial set of layers & upload them with failpoints configured - pg.safe_psql_many( + endpoint.safe_psql_many( [ f""" INSERT INTO foo (id, val) @@ -266,7 +266,7 @@ def test_remote_storage_upload_queue_retries( "VACUUM foo", ] ) - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) def get_queued_count(file_kind, op_kind): val = client.get_remote_timeline_client_metric( @@ -343,7 +343,7 @@ def test_remote_storage_upload_queue_retries( # but how do we validate the result after restore? env.pageserver.stop(immediate=True) - env.postgres.stop_all() + env.endpoints.stop_all() dir_to_clear = Path(env.repo_dir) / "tenants" shutil.rmtree(dir_to_clear) @@ -357,8 +357,8 @@ def test_remote_storage_upload_queue_retries( wait_until_tenant_active(client, tenant_id) log.info("restarting postgres to validate") - pg = env.postgres.create_start("main", tenant_id=tenant_id) - with pg.cursor() as cur: + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + with endpoint.cursor() as cur: assert query_scalar(cur, "SELECT COUNT(*) FROM foo WHERE val = 'd'") == 10000 @@ -394,13 +394,13 @@ def test_remote_timeline_client_calls_started_metric( client = env.pageserver.http_client() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) - pg.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)") + endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)") def overwrite_data_and_wait_for_it_to_arrive_at_pageserver(data): # create initial set of layers & upload them with failpoints configured - pg.safe_psql_many( + endpoint.safe_psql_many( [ f""" INSERT INTO foo (id, val) @@ -413,7 +413,7 @@ def test_remote_timeline_client_calls_started_metric( "VACUUM foo", ] ) - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) calls_started: Dict[Tuple[str, str], List[int]] = { ("layer", "upload"): [0], @@ -478,7 +478,7 @@ def test_remote_timeline_client_calls_started_metric( ) env.pageserver.stop(immediate=True) - env.postgres.stop_all() + env.endpoints.stop_all() dir_to_clear = Path(env.repo_dir) / "tenants" shutil.rmtree(dir_to_clear) @@ -492,8 +492,8 @@ def test_remote_timeline_client_calls_started_metric( wait_until_tenant_active(client, tenant_id) log.info("restarting postgres to validate") - pg = env.postgres.create_start("main", tenant_id=tenant_id) - with pg.cursor() as cur: + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + with endpoint.cursor() as cur: assert query_scalar(cur, "SELECT COUNT(*) FROM foo WHERE val = 'd'") == 10000 # ensure that we updated the calls_started download metric @@ -543,17 +543,17 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue( ) return int(val) if val is not None else val - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) client.configure_failpoints(("before-upload-layer", "return")) - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (x INTEGER)", "INSERT INTO foo SELECT g FROM generate_series(1, 10000) g", ] ) - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) # Kick off a checkpoint operation. # It will get stuck in remote_client.wait_completion(), since the select query will have @@ -627,8 +627,8 @@ def test_empty_branch_remote_storage_upload( new_branch_name = "new_branch" new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant) - with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg: - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id) + with env.endpoints.create_start(new_branch_name, tenant_id=env.initial_tenant) as endpoint: + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_branch_timeline_id) wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id) timelines_before_detach = set( @@ -676,8 +676,8 @@ def test_empty_branch_remote_storage_upload_on_restart( new_branch_name = "new_branch" new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant) - with env.postgres.create_start(new_branch_name, tenant_id=env.initial_tenant) as pg: - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_branch_timeline_id) + with env.endpoints.create_start(new_branch_name, tenant_id=env.initial_tenant) as endpoint: + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_branch_timeline_id) wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id) env.pageserver.stop() diff --git a/test_runner/regress/test_subxacts.py b/test_runner/regress/test_subxacts.py index 42234bf535..494820ef8e 100644 --- a/test_runner/regress/test_subxacts.py +++ b/test_runner/regress/test_subxacts.py @@ -11,10 +11,10 @@ from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content def test_subxacts(neon_simple_env: NeonEnv, test_output_dir): env = neon_simple_env env.neon_cli.create_branch("test_subxacts", "empty") - pg = env.postgres.create_start("test_subxacts") + endpoint = env.endpoints.create_start("test_subxacts") log.info("postgres is running on 'test_subxacts' branch") - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() cur.execute( @@ -37,4 +37,4 @@ def test_subxacts(neon_simple_env: NeonEnv, test_output_dir): cur.execute("checkpoint") # Check that we can restore the content of the datadir correctly - check_restored_datadir_content(test_output_dir, env, pg) + check_restored_datadir_content(test_output_dir, env, endpoint) diff --git a/test_runner/regress/test_tenant_conf.py b/test_runner/regress/test_tenant_conf.py index 80d4b99504..28f1a960df 100644 --- a/test_runner/regress/test_tenant_conf.py +++ b/test_runner/regress/test_tenant_conf.py @@ -43,11 +43,7 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}""" tenant, _ = env.neon_cli.create_tenant(conf=new_conf) env.neon_cli.create_timeline("test_tenant_conf", tenant_id=tenant) - env.postgres.create_start( - "test_tenant_conf", - "main", - tenant, - ) + env.endpoints.create_start("test_tenant_conf", "main", tenant) # check the configuration of the default tenant # it should match global configuration diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index 58a010951e..847ae4b2b8 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -7,9 +7,9 @@ import asyncpg import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonEnv, NeonEnvBuilder, - Postgres, RemoteStorageKind, available_remote_storages, ) @@ -59,8 +59,8 @@ def test_tenant_reattach( # create new nenant tenant_id, timeline_id = env.neon_cli.create_tenant() - with env.postgres.create_start("main", tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) @@ -99,8 +99,8 @@ def test_tenant_reattach( assert pageserver_last_record_lsn_before_detach == pageserver_last_record_lsn - with env.postgres.create_start("main", tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: assert query_scalar(cur, "SELECT count(*) FROM t") == 100000 # Check that we had to retry the downloads @@ -157,11 +157,11 @@ async def sleep_and_reattach(pageserver_http: PageserverHttpClient, tenant_id: T # async guts of test_tenant_reattach_while_bysy test async def reattach_while_busy( - env: NeonEnv, pg: Postgres, pageserver_http: PageserverHttpClient, tenant_id: TenantId + env: NeonEnv, endpoint: Endpoint, pageserver_http: PageserverHttpClient, tenant_id: TenantId ): workers = [] for worker_id in range(num_connections): - pg_conn = await pg.connect_async() + pg_conn = await endpoint.connect_async() workers.append(asyncio.create_task(update_table(pg_conn))) workers.append(asyncio.create_task(sleep_and_reattach(pageserver_http, tenant_id))) @@ -238,15 +238,15 @@ def test_tenant_reattach_while_busy( conf={"checkpoint_distance": "100000"} ) - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) - cur = pg.connect().cursor() + cur = endpoint.connect().cursor() cur.execute("CREATE TABLE t(id int primary key, counter int)") cur.execute(f"INSERT INTO t SELECT generate_series(1,{num_rows}), 0") # Run the test - asyncio.run(reattach_while_busy(env, pg, pageserver_http, tenant_id)) + asyncio.run(reattach_while_busy(env, endpoint, pageserver_http, tenant_id)) # Verify table contents assert query_scalar(cur, "SELECT count(*) FROM t") == num_rows @@ -278,9 +278,9 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): # assert tenant exists on disk assert (env.repo_dir / "tenants" / str(tenant_id)).exists() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) # we rely upon autocommit after each statement - pg.safe_psql_many( + endpoint.safe_psql_many( queries=[ "CREATE TABLE t(key int primary key, value text)", "INSERT INTO t SELECT generate_series(1,100000), 'payload'", @@ -339,9 +339,9 @@ def test_tenant_detach_ignored_tenant(neon_simple_env: NeonEnv): # assert tenant exists on disk assert (env.repo_dir / "tenants" / str(tenant_id)).exists() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) # we rely upon autocommit after each statement - pg.safe_psql_many( + endpoint.safe_psql_many( queries=[ "CREATE TABLE t(key int primary key, value text)", "INSERT INTO t SELECT generate_series(1,100000), 'payload'", @@ -388,9 +388,9 @@ def test_tenant_detach_regular_tenant(neon_simple_env: NeonEnv): # assert tenant exists on disk assert (env.repo_dir / "tenants" / str(tenant_id)).exists() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) # we rely upon autocommit after each statement - pg.safe_psql_many( + endpoint.safe_psql_many( queries=[ "CREATE TABLE t(key int primary key, value text)", "INSERT INTO t SELECT generate_series(1,100000), 'payload'", @@ -425,18 +425,18 @@ def test_detach_while_attaching( ##### First start, insert secret data and upload it to the remote storage env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") client = env.pageserver.http_client() - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) # Create table, and insert some rows. Make it big enough that it doesn't fit in # shared_buffers, otherwise the SELECT after restart will just return answer # from shared_buffers without hitting the page server, which defeats the point # of this test. - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE foo (t text)") cur.execute( """ @@ -477,7 +477,7 @@ def test_detach_while_attaching( # cycle are still running, things could get really confusing.. pageserver_http.tenant_attach(tenant_id) - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("SELECT COUNT(*) FROM foo") @@ -572,14 +572,14 @@ def test_ignored_tenant_download_missing_layers( ) env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) data_id = 1 data_secret = "very secret secret" - insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, pg) + insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, endpoint) tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()] tenants_before_ignore.sort() @@ -611,9 +611,9 @@ def test_ignored_tenant_download_missing_layers( ] assert timelines_before_ignore == timelines_after_ignore, "Should have all timelines back" - pg.stop() - pg.start() - ensure_test_data(data_id, data_secret, pg) + endpoint.stop() + endpoint.start() + ensure_test_data(data_id, data_secret, endpoint) # Tests that it's possible to `load` broken tenants: @@ -631,10 +631,10 @@ def test_ignored_tenant_stays_broken_without_metadata( ) env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) # ignore the tenant and remove its metadata pageserver_http.tenant_ignore(tenant_id) @@ -666,9 +666,9 @@ def test_load_attach_negatives( ) env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*") with pytest.raises( @@ -707,16 +707,16 @@ def test_ignore_while_attaching( env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") pageserver_http = env.pageserver.http_client() - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) data_id = 1 data_secret = "very secret secret" - insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, pg) + insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, endpoint) tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()] @@ -754,9 +754,9 @@ def test_ignore_while_attaching( wait_until_tenant_state(pageserver_http, tenant_id, "Active", 5) - pg.stop() - pg.start() - ensure_test_data(data_id, data_secret, pg) + endpoint.stop() + endpoint.start() + ensure_test_data(data_id, data_secret, endpoint) def insert_test_data( @@ -765,9 +765,9 @@ def insert_test_data( timeline_id: TimelineId, data_id: int, data: str, - pg: Postgres, + endpoint: Endpoint, ): - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f""" CREATE TABLE test(id int primary key, secret text); @@ -787,8 +787,8 @@ def insert_test_data( wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn) -def ensure_test_data(data_id: int, data: str, pg: Postgres): - with pg.cursor() as cur: +def ensure_test_data(data_id: int, data: str, endpoint: Endpoint): + with endpoint.cursor() as cur: assert ( query_scalar(cur, f"SELECT secret FROM test WHERE id = {data_id};") == data ), "Should have timeline data back" diff --git a/test_runner/regress/test_tenant_relocation.py b/test_runner/regress/test_tenant_relocation.py index 3569ab0c53..180afd88cd 100644 --- a/test_runner/regress/test_tenant_relocation.py +++ b/test_runner/regress/test_tenant_relocation.py @@ -7,11 +7,11 @@ from typing import Any, Dict, Optional, Tuple import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonBroker, NeonEnv, NeonEnvBuilder, PortDistributor, - Postgres, ) from fixtures.pageserver.http import PageserverHttpClient from fixtures.pageserver.utils import ( @@ -87,20 +87,20 @@ def new_pageserver_service( @contextmanager -def pg_cur(pg): - with closing(pg.connect()) as conn: +def pg_cur(endpoint): + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: yield cur -def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Event): +def load(endpoint: Endpoint, stop_event: threading.Event, load_ok_event: threading.Event): log.info("load started") inserted_ctr = 0 failed = False while not stop_event.is_set(): try: - with pg_cur(pg) as cur: + with pg_cur(endpoint) as cur: cur.execute("INSERT INTO load VALUES ('some payload')") inserted_ctr += 1 except: # noqa: E722 @@ -110,7 +110,7 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve load_ok_event.clear() else: if failed: - with pg_cur(pg) as cur: + with pg_cur(endpoint) as cur: # if we recovered after failure verify that we have correct number of rows log.info("recovering at %s", inserted_ctr) cur.execute("SELECT count(*) FROM load") @@ -124,14 +124,14 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve def populate_branch( - pg: Postgres, + endpoint: Endpoint, tenant_id: TenantId, ps_http: PageserverHttpClient, create_table: bool, expected_sum: Optional[int], ) -> Tuple[TimelineId, Lsn]: # insert some data - with pg_cur(pg) as cur: + with pg_cur(endpoint) as cur: cur.execute("SHOW neon.timeline_id") timeline_id = TimelineId(cur.fetchone()[0]) log.info("timeline to relocate %s", timeline_id) @@ -196,19 +196,19 @@ def check_timeline_attached( def switch_pg_to_new_pageserver( env: NeonEnv, - pg: Postgres, + endpoint: Endpoint, new_pageserver_port: int, tenant_id: TenantId, timeline_id: TimelineId, ) -> Path: - pg.stop() + endpoint.stop() - pg_config_file_path = Path(pg.config_file_path()) + pg_config_file_path = Path(endpoint.config_file_path()) pg_config_file_path.open("a").write( f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_port}'" ) - pg.start() + endpoint.start() timeline_to_detach_local_path = ( env.repo_dir / "tenants" / str(tenant_id) / "timelines" / str(timeline_id) @@ -226,8 +226,8 @@ def switch_pg_to_new_pageserver( return timeline_to_detach_local_path -def post_migration_check(pg: Postgres, sum_before_migration: int, old_local_path: Path): - with pg_cur(pg) as cur: +def post_migration_check(endpoint: Endpoint, sum_before_migration: int, old_local_path: Path): + with pg_cur(endpoint) as cur: # check that data is still there cur.execute("SELECT sum(key) FROM t") assert cur.fetchone() == (sum_before_migration,) @@ -288,12 +288,12 @@ def test_tenant_relocation( log.info("tenant to relocate %s initial_timeline_id %s", tenant_id, initial_timeline_id) env.neon_cli.create_branch("test_tenant_relocation_main", tenant_id=tenant_id) - pg_main = env.postgres.create_start( + ep_main = env.endpoints.create_start( branch_name="test_tenant_relocation_main", tenant_id=tenant_id ) timeline_id_main, current_lsn_main = populate_branch( - pg_main, + ep_main, tenant_id=tenant_id, ps_http=pageserver_http, create_table=True, @@ -306,12 +306,12 @@ def test_tenant_relocation( ancestor_start_lsn=current_lsn_main, tenant_id=tenant_id, ) - pg_second = env.postgres.create_start( + ep_second = env.endpoints.create_start( branch_name="test_tenant_relocation_second", tenant_id=tenant_id ) timeline_id_second, current_lsn_second = populate_branch( - pg_second, + ep_second, tenant_id=tenant_id, ps_http=pageserver_http, create_table=False, @@ -327,14 +327,14 @@ def test_tenant_relocation( if with_load == "with_load": # create load table - with pg_cur(pg_main) as cur: + with pg_cur(ep_main) as cur: cur.execute("CREATE TABLE load(value text)") load_stop_event = threading.Event() load_ok_event = threading.Event() load_thread = threading.Thread( target=load, - args=(pg_main, load_stop_event, load_ok_event), + args=(ep_main, load_stop_event, load_ok_event), daemon=True, # To make sure the child dies when the parent errors ) load_thread.start() @@ -450,7 +450,7 @@ def test_tenant_relocation( old_local_path_main = switch_pg_to_new_pageserver( env, - pg_main, + ep_main, new_pageserver_pg_port, tenant_id, timeline_id_main, @@ -458,7 +458,7 @@ def test_tenant_relocation( old_local_path_second = switch_pg_to_new_pageserver( env, - pg_second, + ep_second, new_pageserver_pg_port, tenant_id, timeline_id_second, @@ -475,11 +475,11 @@ def test_tenant_relocation( interval=1, func=lambda: tenant_exists(pageserver_http, tenant_id), ) - post_migration_check(pg_main, 500500, old_local_path_main) - post_migration_check(pg_second, 1001000, old_local_path_second) + post_migration_check(ep_main, 500500, old_local_path_main) + post_migration_check(ep_second, 1001000, old_local_path_second) # ensure that we can successfully read all relations on the new pageserver - with pg_cur(pg_second) as cur: + with pg_cur(ep_second) as cur: cur.execute( """ DO $$ diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index 9037fe0045..e8d534142e 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -4,9 +4,9 @@ from typing import List, Tuple import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonEnv, NeonEnvBuilder, - Postgres, wait_for_last_flush_lsn, wait_for_wal_insert_lsn, ) @@ -28,12 +28,12 @@ def test_empty_tenant_size(neon_simple_env: NeonEnv, test_output_dir: Path): branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0] assert branch_name == main_branch_name - with env.postgres.create_start( + with env.endpoints.create_start( main_branch_name, tenant_id=tenant_id, config_lines=["autovacuum=off", "checkpoint_timeout=10min"], - ) as pg: - with pg.cursor() as cur: + ) as endpoint: + with endpoint.cursor() as cur: cur.execute("SELECT 1") row = cur.fetchone() assert row is not None @@ -105,12 +105,12 @@ def test_branched_empty_timeline_size(neon_simple_env: NeonEnv, test_output_dir: first_branch_timeline_id = env.neon_cli.create_branch("first-branch", tenant_id=tenant_id) - with env.postgres.create_start("first-branch", tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start("first-branch", tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: cur.execute( "CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" ) - wait_for_last_flush_lsn(env, pg, tenant_id, first_branch_timeline_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, first_branch_timeline_id) size_after_branching = http_client.tenant_size(tenant_id) log.info(f"size_after_branching: {size_after_branching}") @@ -164,12 +164,12 @@ def test_branched_from_many_empty_parents_size(neon_simple_env: NeonEnv, test_ou assert last_branch is not None - with env.postgres.create_start(last_branch_name, tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start(last_branch_name, tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: cur.execute( "CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" ) - wait_for_last_flush_lsn(env, pg, tenant_id, last_branch) + wait_for_last_flush_lsn(env, endpoint, tenant_id, last_branch) size_after_writes = http_client.tenant_size(tenant_id) assert size_after_writes > initial_size @@ -194,11 +194,11 @@ def test_branch_point_within_horizon(neon_simple_env: NeonEnv, test_output_dir: (tenant_id, main_id) = env.neon_cli.create_tenant(conf={"gc_horizon": str(gc_horizon)}) http_client = env.pageserver.http_client() - with env.postgres.create_start("main", tenant_id=tenant_id) as pg: - initdb_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id) - with pg.cursor() as cur: + with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + initdb_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id) + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000) s(i)") - flushed_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id) + flushed_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id) size_before_branching = http_client.tenant_size(tenant_id) @@ -208,10 +208,10 @@ def test_branch_point_within_horizon(neon_simple_env: NeonEnv, test_output_dir: "branch", tenant_id=tenant_id, ancestor_start_lsn=flushed_lsn ) - with env.postgres.create_start("branch", tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start("branch", tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 1000) s(i)") - wait_for_last_flush_lsn(env, pg, tenant_id, branch_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, branch_id) size_after = http_client.tenant_size(tenant_id) @@ -237,17 +237,17 @@ def test_parent_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path): (tenant_id, main_id) = env.neon_cli.create_tenant(conf={"gc_horizon": str(gc_horizon)}) http_client = env.pageserver.http_client() - with env.postgres.create_start("main", tenant_id=tenant_id) as pg: - initdb_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id) - with pg.cursor() as cur: + with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + initdb_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id) + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000) s(i)") - flushed_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, main_id) + flushed_lsn = wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id) - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t00 AS SELECT i::bigint n FROM generate_series(0, 2000) s(i)") - wait_for_last_flush_lsn(env, pg, tenant_id, main_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, main_id) size_before_branching = http_client.tenant_size(tenant_id) @@ -257,10 +257,10 @@ def test_parent_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path): "branch", tenant_id=tenant_id, ancestor_start_lsn=flushed_lsn ) - with env.postgres.create_start("branch", tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start("branch", tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 10000) s(i)") - wait_for_last_flush_lsn(env, pg, tenant_id, branch_id) + wait_for_last_flush_lsn(env, endpoint, tenant_id, branch_id) size_after = http_client.tenant_size(tenant_id) @@ -297,12 +297,12 @@ def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Pa # gc is not expected to change the results for branch_name, amount in [("main", 2000), ("first", 15000), ("second", 3000)]: - with env.postgres.create_start(branch_name, tenant_id=tenant_id) as pg: - with pg.cursor() as cur: + with env.endpoints.create_start(branch_name, tenant_id=tenant_id) as endpoint: + with endpoint.cursor() as cur: cur.execute( f"CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, {amount}) s(i)" ) - wait_for_last_flush_lsn(env, pg, tenant_id, ids[branch_name]) + wait_for_last_flush_lsn(env, endpoint, tenant_id, ids[branch_name]) size_now = http_client.tenant_size(tenant_id) if latest_size is not None: assert size_now > latest_size @@ -359,7 +359,7 @@ def test_single_branch_get_tenant_size_grows( def get_current_consistent_size( env: NeonEnv, - pg: Postgres, + endpoint: Endpoint, size_debug_file, # apparently there is no public signature for open()... http_client: PageserverHttpClient, tenant_id: TenantId, @@ -368,7 +368,7 @@ def test_single_branch_get_tenant_size_grows( consistent = False size_debug = None - current_lsn = wait_for_wal_insert_lsn(env, pg, tenant_id, timeline_id) + current_lsn = wait_for_wal_insert_lsn(env, endpoint, tenant_id, timeline_id) # We want to make sure we have a self-consistent set of values. # Size changes with WAL, so only if both before and after getting # the size of the tenant reports the same WAL insert LSN, we're OK @@ -382,35 +382,35 @@ def test_single_branch_get_tenant_size_grows( size, sizes = http_client.tenant_size_and_modelinputs(tenant_id) size_debug = http_client.tenant_size_debug(tenant_id) - after_lsn = wait_for_wal_insert_lsn(env, pg, tenant_id, timeline_id) + after_lsn = wait_for_wal_insert_lsn(env, endpoint, tenant_id, timeline_id) consistent = current_lsn == after_lsn current_lsn = after_lsn size_debug_file.write(size_debug) return (current_lsn, size) - with env.postgres.create_start( + with env.endpoints.create_start( branch_name, tenant_id=tenant_id, ### autovacuum is disabled to limit WAL logging. config_lines=["autovacuum=off"], - ) as pg: + ) as endpoint: (initdb_lsn, size) = get_current_consistent_size( - env, pg, size_debug_file, http_client, tenant_id, timeline_id + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) collected_responses.append(("INITDB", initdb_lsn, size)) - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t0 (i BIGINT NOT NULL) WITH (fillfactor = 40)") (current_lsn, size) = get_current_consistent_size( - env, pg, size_debug_file, http_client, tenant_id, timeline_id + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) collected_responses.append(("CREATE", current_lsn, size)) batch_size = 100 for i in range(3): - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f"INSERT INTO t0(i) SELECT i FROM generate_series({batch_size} * %s, ({batch_size} * (%s + 1)) - 1) s(i)", (i, i), @@ -419,7 +419,7 @@ def test_single_branch_get_tenant_size_grows( i += 1 (current_lsn, size) = get_current_consistent_size( - env, pg, size_debug_file, http_client, tenant_id, timeline_id + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) prev_size = collected_responses[-1][2] @@ -438,7 +438,7 @@ def test_single_branch_get_tenant_size_grows( collected_responses.append(("INSERT", current_lsn, size)) while True: - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f"UPDATE t0 SET i = -i WHERE i IN (SELECT i FROM t0 WHERE i > 0 LIMIT {batch_size})" ) @@ -448,7 +448,7 @@ def test_single_branch_get_tenant_size_grows( break (current_lsn, size) = get_current_consistent_size( - env, pg, size_debug_file, http_client, tenant_id, timeline_id + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) prev_size = collected_responses[-1][2] @@ -458,7 +458,7 @@ def test_single_branch_get_tenant_size_grows( collected_responses.append(("UPDATE", current_lsn, size)) while True: - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute(f"DELETE FROM t0 WHERE i IN (SELECT i FROM t0 LIMIT {batch_size})") deleted = cur.rowcount @@ -466,7 +466,7 @@ def test_single_branch_get_tenant_size_grows( break (current_lsn, size) = get_current_consistent_size( - env, pg, size_debug_file, http_client, tenant_id, timeline_id + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) prev_size = collected_responses[-1][2] @@ -475,14 +475,14 @@ def test_single_branch_get_tenant_size_grows( collected_responses.append(("DELETE", current_lsn, size)) - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("DROP TABLE t0") # The size of the tenant should still be as large as before we dropped # the table, because the drop operation can still be undone in the PITR # defined by gc_horizon. (current_lsn, size) = get_current_consistent_size( - env, pg, size_debug_file, http_client, tenant_id, timeline_id + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) prev_size = collected_responses[-1][2] @@ -532,16 +532,16 @@ def test_get_tenant_size_with_multiple_branches( http_client = env.pageserver.http_client() - main_pg = env.postgres.create_start(main_branch_name, tenant_id=tenant_id) + main_endpoint = env.endpoints.create_start(main_branch_name, tenant_id=tenant_id) batch_size = 10000 - with main_pg.cursor() as cur: + with main_endpoint.cursor() as cur: cur.execute( f"CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, {batch_size}) s(i)" ) - wait_for_last_flush_lsn(env, main_pg, tenant_id, main_timeline_id) + wait_for_last_flush_lsn(env, main_endpoint, tenant_id, main_timeline_id) size_at_branch = http_client.tenant_size(tenant_id) assert size_at_branch > 0 @@ -552,23 +552,23 @@ def test_get_tenant_size_with_multiple_branches( size_after_first_branch = http_client.tenant_size(tenant_id) assert size_after_first_branch == size_at_branch - first_branch_pg = env.postgres.create_start("first-branch", tenant_id=tenant_id) + first_branch_endpoint = env.endpoints.create_start("first-branch", tenant_id=tenant_id) - with first_branch_pg.cursor() as cur: + with first_branch_endpoint.cursor() as cur: cur.execute( f"CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, {batch_size}) s(i)" ) - wait_for_last_flush_lsn(env, first_branch_pg, tenant_id, first_branch_timeline_id) + wait_for_last_flush_lsn(env, first_branch_endpoint, tenant_id, first_branch_timeline_id) size_after_growing_first_branch = http_client.tenant_size(tenant_id) assert size_after_growing_first_branch > size_after_first_branch - with main_pg.cursor() as cur: + with main_endpoint.cursor() as cur: cur.execute( f"CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 2*{batch_size}) s(i)" ) - wait_for_last_flush_lsn(env, main_pg, tenant_id, main_timeline_id) + wait_for_last_flush_lsn(env, main_endpoint, tenant_id, main_timeline_id) size_after_continuing_on_main = http_client.tenant_size(tenant_id) assert size_after_continuing_on_main > size_after_growing_first_branch @@ -578,31 +578,31 @@ def test_get_tenant_size_with_multiple_branches( size_after_second_branch = http_client.tenant_size(tenant_id) assert size_after_second_branch == size_after_continuing_on_main - second_branch_pg = env.postgres.create_start("second-branch", tenant_id=tenant_id) + second_branch_endpoint = env.endpoints.create_start("second-branch", tenant_id=tenant_id) - with second_branch_pg.cursor() as cur: + with second_branch_endpoint.cursor() as cur: cur.execute( f"CREATE TABLE t2 AS SELECT i::bigint n FROM generate_series(0, 3*{batch_size}) s(i)" ) - wait_for_last_flush_lsn(env, second_branch_pg, tenant_id, second_branch_timeline_id) + wait_for_last_flush_lsn(env, second_branch_endpoint, tenant_id, second_branch_timeline_id) size_after_growing_second_branch = http_client.tenant_size(tenant_id) assert size_after_growing_second_branch > size_after_second_branch - with second_branch_pg.cursor() as cur: + with second_branch_endpoint.cursor() as cur: cur.execute("DROP TABLE t0") cur.execute("DROP TABLE t1") cur.execute("VACUUM FULL") - wait_for_last_flush_lsn(env, second_branch_pg, tenant_id, second_branch_timeline_id) + wait_for_last_flush_lsn(env, second_branch_endpoint, tenant_id, second_branch_timeline_id) size_after_thinning_branch = http_client.tenant_size(tenant_id) assert ( size_after_thinning_branch > size_after_growing_second_branch ), "tenant_size should grow with dropped tables and full vacuum" - first_branch_pg.stop_and_destroy() - second_branch_pg.stop_and_destroy() - main_pg.stop() + first_branch_endpoint.stop_and_destroy() + second_branch_endpoint.stop_and_destroy() + main_endpoint.stop() env.pageserver.stop() env.pageserver.start() diff --git a/test_runner/regress/test_tenant_tasks.py b/test_runner/regress/test_tenant_tasks.py index 8c89100745..21e4af4127 100644 --- a/test_runner/regress/test_tenant_tasks.py +++ b/test_runner/regress/test_tenant_tasks.py @@ -29,7 +29,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder): # Create tenant, start compute tenant, _ = env.neon_cli.create_tenant() env.neon_cli.create_timeline(name, tenant_id=tenant) - pg = env.postgres.create_start(name, tenant_id=tenant) + endpoint = env.endpoints.create_start(name, tenant_id=tenant) assert_tenant_state( client, tenant, @@ -38,7 +38,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder): ) # Stop compute - pg.stop() + endpoint.stop() # Delete all timelines on all tenants. # diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py index 2162520217..8026d7f5c6 100644 --- a/test_runner/regress/test_tenants.py +++ b/test_runner/regress/test_tenants.py @@ -66,17 +66,17 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder): env.neon_cli.create_timeline("test_tenants_normal_work", tenant_id=tenant_1) env.neon_cli.create_timeline("test_tenants_normal_work", tenant_id=tenant_2) - pg_tenant1 = env.postgres.create_start( + endpoint_tenant1 = env.endpoints.create_start( "test_tenants_normal_work", tenant_id=tenant_1, ) - pg_tenant2 = env.postgres.create_start( + endpoint_tenant2 = env.endpoints.create_start( "test_tenants_normal_work", tenant_id=tenant_2, ) - for pg in [pg_tenant1, pg_tenant2]: - with closing(pg.connect()) as conn: + for endpoint in [endpoint_tenant1, endpoint_tenant2]: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there @@ -97,11 +97,11 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder): timeline_1 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_1) timeline_2 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_2) - pg_tenant1 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_1) - pg_tenant2 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_2) + endpoint_tenant1 = env.endpoints.create_start("test_metrics_normal_work", tenant_id=tenant_1) + endpoint_tenant2 = env.endpoints.create_start("test_metrics_normal_work", tenant_id=tenant_2) - for pg in [pg_tenant1, pg_tenant2]: - with closing(pg.connect()) as conn: + for endpoint in [endpoint_tenant1, endpoint_tenant2]: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") @@ -242,11 +242,15 @@ def test_pageserver_metrics_removed_after_detach( env.neon_cli.create_timeline("test_metrics_removed_after_detach", tenant_id=tenant_1) env.neon_cli.create_timeline("test_metrics_removed_after_detach", tenant_id=tenant_2) - pg_tenant1 = env.postgres.create_start("test_metrics_removed_after_detach", tenant_id=tenant_1) - pg_tenant2 = env.postgres.create_start("test_metrics_removed_after_detach", tenant_id=tenant_2) + endpoint_tenant1 = env.endpoints.create_start( + "test_metrics_removed_after_detach", tenant_id=tenant_1 + ) + endpoint_tenant2 = env.endpoints.create_start( + "test_metrics_removed_after_detach", tenant_id=tenant_2 + ) - for pg in [pg_tenant1, pg_tenant2]: - with closing(pg.connect()) as conn: + for endpoint in [endpoint_tenant1, endpoint_tenant2]: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") @@ -317,7 +321,7 @@ def test_pageserver_with_empty_tenants( ), f"Tenant {tenant_with_empty_timelines_dir} should have an empty timelines/ directory" # Trigger timeline re-initialization after pageserver restart - env.postgres.stop_all() + env.endpoints.stop_all() env.pageserver.stop() tenant_without_timelines_dir = env.initial_tenant diff --git a/test_runner/regress/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py index 514e2b6fa0..d7c0814570 100644 --- a/test_runner/regress/test_tenants_with_remote_storage.py +++ b/test_runner/regress/test_tenants_with_remote_storage.py @@ -15,10 +15,10 @@ from typing import List, Tuple import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, LocalFsStorage, NeonEnv, NeonEnvBuilder, - Postgres, RemoteStorageKind, available_remote_storages, wait_for_sk_commit_lsn_to_reach_remote_storage, @@ -32,10 +32,10 @@ from fixtures.types import Lsn, TenantId, TimelineId from fixtures.utils import query_scalar, wait_until -async def tenant_workload(env: NeonEnv, pg: Postgres): +async def tenant_workload(env: NeonEnv, endpoint: Endpoint): await env.pageserver.connect_async() - pg_conn = await pg.connect_async() + pg_conn = await endpoint.connect_async() await pg_conn.execute("CREATE TABLE t(key int primary key, value text)") for i in range(1, 100): @@ -49,10 +49,10 @@ async def tenant_workload(env: NeonEnv, pg: Postgres): assert res == i * 1000 -async def all_tenants_workload(env: NeonEnv, tenants_pgs): +async def all_tenants_workload(env: NeonEnv, tenants_endpoints): workers = [] - for _, pg in tenants_pgs: - worker = tenant_workload(env, pg) + for _, endpoint in tenants_endpoints: + worker = tenant_workload(env, endpoint) workers.append(asyncio.create_task(worker)) # await all workers @@ -73,7 +73,7 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" ) - tenants_pgs: List[Tuple[TenantId, Postgres]] = [] + tenants_endpoints: List[Tuple[TenantId, Endpoint]] = [] for _ in range(1, 5): # Use a tiny checkpoint distance, to create a lot of layers quickly @@ -84,18 +84,18 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem ) env.neon_cli.create_timeline("test_tenants_many", tenant_id=tenant) - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "test_tenants_many", tenant_id=tenant, ) - tenants_pgs.append((tenant, pg)) + tenants_endpoints.append((tenant, endpoint)) - asyncio.run(all_tenants_workload(env, tenants_pgs)) + asyncio.run(all_tenants_workload(env, tenants_endpoints)) # Wait for the remote storage uploads to finish pageserver_http = env.pageserver.http_client() - for tenant, pg in tenants_pgs: - res = pg.safe_psql_many( + for tenant, endpoint in tenants_endpoints: + res = endpoint.safe_psql_many( ["SHOW neon.tenant_id", "SHOW neon.timeline_id", "SELECT pg_current_wal_flush_lsn()"] ) tenant_id = TenantId(res[0][0][0]) @@ -137,15 +137,15 @@ def test_tenants_attached_after_download( ) pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") client = env.pageserver.http_client() - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) for checkpoint_number in range(1, 3): - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute( f""" CREATE TABLE t{checkpoint_number}(id int primary key, secret text); @@ -174,7 +174,7 @@ def test_tenants_attached_after_download( ) ##### Stop the pageserver, erase its layer file to force it being downloaded from S3 - env.postgres.stop_all() + env.endpoints.stop_all() wait_for_sk_commit_lsn_to_reach_remote_storage( tenant_id, timeline_id, env.safekeepers, env.pageserver @@ -244,12 +244,12 @@ def test_tenant_redownloads_truncated_file_on_startup( env.pageserver.allowed_errors.append(".*No timelines to attach received.*") pageserver_http = env.pageserver.http_client() - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("CREATE TABLE t1 AS VALUES (123, 'foobar');") current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) @@ -257,7 +257,7 @@ def test_tenant_redownloads_truncated_file_on_startup( pageserver_http.timeline_checkpoint(tenant_id, timeline_id) wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn) - env.postgres.stop_all() + env.endpoints.stop_all() env.pageserver.stop() timeline_dir = Path(env.repo_dir) / "tenants" / str(tenant_id) / "timelines" / str(timeline_id) @@ -313,9 +313,9 @@ def test_tenant_redownloads_truncated_file_on_startup( os.stat(remote_layer_path).st_size == expected_size ), "truncated file should not had been uploaded around re-download" - pg = env.postgres.create_start("main") + endpoint = env.endpoints.create_start("main") - with pg.cursor() as cur: + with endpoint.cursor() as cur: cur.execute("INSERT INTO t1 VALUES (234, 'test data');") current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py index 28da3c5a48..db278d5646 100644 --- a/test_runner/regress/test_timeline_size.py +++ b/test_runner/regress/test_timeline_size.py @@ -12,11 +12,11 @@ import psycopg2.extras import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonEnv, NeonEnvBuilder, PgBin, PortDistributor, - Postgres, RemoteStorageKind, VanillaPostgres, wait_for_last_flush_lsn, @@ -38,10 +38,10 @@ def test_timeline_size(neon_simple_env: NeonEnv): client = env.pageserver.http_client() wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id) - pgmain = env.postgres.create_start("test_timeline_size") + endpoint_main = env.endpoints.create_start("test_timeline_size") log.info("postgres is running on 'test_timeline_size' branch") - with closing(pgmain.connect()) as conn: + with closing(endpoint_main.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE TABLE foo (t text)") cur.execute( @@ -74,10 +74,10 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True ) - pgmain = env.postgres.create_start("test_timeline_size_createdropdb") + endpoint_main = env.endpoints.create_start("test_timeline_size_createdropdb") log.info("postgres is running on 'test_timeline_size_createdropdb' branch") - with closing(pgmain.connect()) as conn: + with closing(endpoint_main.connect()) as conn: with conn.cursor() as cur: res = client.timeline_detail( env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True @@ -89,7 +89,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): ), "no writes should not change the incremental logical size" cur.execute("CREATE DATABASE foodb") - with closing(pgmain.connect(dbname="foodb")) as conn: + with closing(endpoint_main.connect(dbname="foodb")) as conn: with conn.cursor() as cur2: cur2.execute("CREATE TABLE foo (t text)") cur2.execute( @@ -118,7 +118,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): # wait until received_lsn_lag is 0 -def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60): +def wait_for_pageserver_catchup(endpoint_main: Endpoint, polling_interval=1, timeout=60): started_at = time.time() received_lsn_lag = 1 @@ -129,7 +129,7 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60 "timed out waiting for pageserver to reach pg_current_wal_flush_lsn()" ) - res = pgmain.safe_psql( + res = endpoint_main.safe_psql( """ SELECT pg_size_pretty(pg_cluster_size()), @@ -150,20 +150,20 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id) - pgmain = env.postgres.create_start( + endpoint_main = env.endpoints.create_start( "test_timeline_size_quota", # Set small limit for the test config_lines=["neon.max_cluster_size=30MB"], ) log.info("postgres is running on 'test_timeline_size_quota' branch") - with closing(pgmain.connect()) as conn: + with closing(endpoint_main.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE EXTENSION neon") # TODO move it to neon_fixtures? cur.execute("CREATE TABLE foo (t text)") - wait_for_pageserver_catchup(pgmain) + wait_for_pageserver_catchup(endpoint_main) # Insert many rows. This query must fail because of space limit try: @@ -175,7 +175,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): """ ) - wait_for_pageserver_catchup(pgmain) + wait_for_pageserver_catchup(endpoint_main) cur.execute( """ @@ -195,7 +195,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): # drop table to free space cur.execute("DROP TABLE foo") - wait_for_pageserver_catchup(pgmain) + wait_for_pageserver_catchup(endpoint_main) # create it again and insert some rows. This query must succeed cur.execute("CREATE TABLE foo (t text)") @@ -207,7 +207,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): """ ) - wait_for_pageserver_catchup(pgmain) + wait_for_pageserver_catchup(endpoint_main) cur.execute("SELECT * from pg_size_pretty(pg_cluster_size())") pg_cluster_size = cur.fetchone() @@ -231,15 +231,15 @@ def test_timeline_initial_logical_size_calculation_cancellation( tenant_id, timeline_id = env.neon_cli.create_tenant() # load in some data - pg = env.postgres.create_start("main", tenant_id=tenant_id) - pg.safe_psql_many( + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + endpoint.safe_psql_many( [ "CREATE TABLE foo (x INTEGER)", "INSERT INTO foo SELECT g FROM generate_series(1, 10000) g", ] ) - wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) - pg.stop() + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) + endpoint.stop() # restart with failpoint inside initial size calculation task env.pageserver.stop() @@ -311,9 +311,9 @@ def test_timeline_physical_size_init( env = neon_env_builder.init_start() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_init") - pg = env.postgres.create_start("test_timeline_physical_size_init") + endpoint = env.endpoints.create_start("test_timeline_physical_size_init") - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text)", """INSERT INTO foo @@ -322,7 +322,7 @@ def test_timeline_physical_size_init( ] ) - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id) # restart the pageserer to force calculating timeline's initial physical size env.pageserver.stop() @@ -355,9 +355,9 @@ def test_timeline_physical_size_post_checkpoint( pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_checkpoint") - pg = env.postgres.create_start("test_timeline_physical_size_post_checkpoint") + endpoint = env.endpoints.create_start("test_timeline_physical_size_post_checkpoint") - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text)", """INSERT INTO foo @@ -366,7 +366,7 @@ def test_timeline_physical_size_post_checkpoint( ] ) - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id) pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) assert_physical_size_invariants( @@ -394,7 +394,7 @@ def test_timeline_physical_size_post_compaction( pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_compaction") - pg = env.postgres.create_start("test_timeline_physical_size_post_compaction") + endpoint = env.endpoints.create_start("test_timeline_physical_size_post_compaction") # We don't want autovacuum to run on the table, while we are calculating the # physical size, because that could cause a new layer to be created and a @@ -402,7 +402,7 @@ def test_timeline_physical_size_post_compaction( # happens, because of some other background activity or autovacuum on other # tables, we could simply retry the size calculations. It's unlikely that # that would happen more than once.) - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)", """INSERT INTO foo @@ -411,7 +411,7 @@ def test_timeline_physical_size_post_compaction( ] ) - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id) # shutdown safekeepers to prevent new data from coming in for sk in env.safekeepers: @@ -446,10 +446,10 @@ def test_timeline_physical_size_post_gc( pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_gc") - pg = env.postgres.create_start("test_timeline_physical_size_post_gc") + endpoint = env.endpoints.create_start("test_timeline_physical_size_post_gc") # Like in test_timeline_physical_size_post_compaction, disable autovacuum - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)", """INSERT INTO foo @@ -458,10 +458,10 @@ def test_timeline_physical_size_post_gc( ] ) - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id) pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) - pg.safe_psql( + endpoint.safe_psql( """ INSERT INTO foo SELECT 'long string to consume some space' || g @@ -469,7 +469,7 @@ def test_timeline_physical_size_post_gc( """ ) - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id) pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) pageserver_http.timeline_gc(env.initial_tenant, new_timeline_id, gc_horizon=None) @@ -495,9 +495,9 @@ def test_timeline_size_metrics( pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_size_metrics") - pg = env.postgres.create_start("test_timeline_size_metrics") + endpoint = env.endpoints.create_start("test_timeline_size_metrics") - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text)", """INSERT INTO foo @@ -506,7 +506,7 @@ def test_timeline_size_metrics( ] ) - wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id) pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) # get the metrics and parse the metric for the current timeline's physical size @@ -558,7 +558,7 @@ def test_timeline_size_metrics( # The sum of the sizes of all databases, as seen by pg_database_size(), should also # be close. Again allow some slack, the logical size metric includes some things like # the SLRUs that are not included in pg_database_size(). - dbsize_sum = pg.safe_psql("select sum(pg_database_size(oid)) from pg_database")[0][0] + dbsize_sum = endpoint.safe_psql("select sum(pg_database_size(oid)) from pg_database")[0][0] assert math.isclose(dbsize_sum, tl_logical_size_metric, abs_tol=2 * 1024 * 1024) @@ -592,16 +592,16 @@ def test_tenant_physical_size( n_rows = random.randint(100, 1000) timeline = env.neon_cli.create_branch(f"test_tenant_physical_size_{i}", tenant_id=tenant) - pg = env.postgres.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant) + endpoint = env.endpoints.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant) - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE foo (t text)", f"INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, {n_rows}) g", ] ) - wait_for_last_flush_lsn(env, pg, tenant, timeline) + wait_for_last_flush_lsn(env, endpoint, tenant, timeline) pageserver_http.timeline_checkpoint(tenant, timeline) if remote_storage_kind is not None: @@ -609,7 +609,7 @@ def test_tenant_physical_size( timeline_total_resident_physical_size += get_timeline_resident_physical_size(timeline) - pg.stop() + endpoint.stop() # ensure that tenant_status current_physical size reports sum of timeline current_physical_size tenant_current_physical_size = int( diff --git a/test_runner/regress/test_truncate.py b/test_runner/regress/test_truncate.py index cfe8a7f067..b1ddd93a40 100644 --- a/test_runner/regress/test_truncate.py +++ b/test_runner/regress/test_truncate.py @@ -27,8 +27,8 @@ def test_truncate(neon_env_builder: NeonEnvBuilder, zenbenchmark): ) env.neon_cli.create_timeline("test_truncate", tenant_id=tenant) - pg = env.postgres.create_start("test_truncate", tenant_id=tenant) - cur = pg.connect().cursor() + endpoint = env.endpoints.create_start("test_truncate", tenant_id=tenant) + cur = endpoint.connect().cursor() cur.execute("create table t1(x integer)") cur.execute(f"insert into t1 values (generate_series(1,{n_records}))") cur.execute("vacuum t1") diff --git a/test_runner/regress/test_twophase.py b/test_runner/regress/test_twophase.py index f3b0f9ca06..305271c715 100644 --- a/test_runner/regress/test_twophase.py +++ b/test_runner/regress/test_twophase.py @@ -10,10 +10,12 @@ from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn def test_twophase(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_twophase", "empty") - pg = env.postgres.create_start("test_twophase", config_lines=["max_prepared_transactions=5"]) + endpoint = env.endpoints.create_start( + "test_twophase", config_lines=["max_prepared_transactions=5"] + ) log.info("postgres is running on 'test_twophase' branch") - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute("CREATE TABLE foo (t text)") @@ -42,7 +44,7 @@ def test_twophase(neon_simple_env: NeonEnv): # pg_twophase directory and fsynced cur.execute("CHECKPOINT") - twophase_files = os.listdir(pg.pg_twophase_dir_path()) + twophase_files = os.listdir(endpoint.pg_twophase_dir_path()) log.info(twophase_files) assert len(twophase_files) == 4 @@ -50,25 +52,25 @@ def test_twophase(neon_simple_env: NeonEnv): cur.execute("ROLLBACK PREPARED 'insert_four'") cur.execute("CHECKPOINT") - twophase_files = os.listdir(pg.pg_twophase_dir_path()) + twophase_files = os.listdir(endpoint.pg_twophase_dir_path()) log.info(twophase_files) assert len(twophase_files) == 2 # Create a branch with the transaction in prepared state - fork_at_current_lsn(env, pg, "test_twophase_prepared", "test_twophase") + fork_at_current_lsn(env, endpoint, "test_twophase_prepared", "test_twophase") # Start compute on the new branch - pg2 = env.postgres.create_start( + endpoint2 = env.endpoints.create_start( "test_twophase_prepared", config_lines=["max_prepared_transactions=5"], ) # Check that we restored only needed twophase files - twophase_files2 = os.listdir(pg2.pg_twophase_dir_path()) + twophase_files2 = os.listdir(endpoint2.pg_twophase_dir_path()) log.info(twophase_files2) assert twophase_files2.sort() == twophase_files.sort() - conn2 = pg2.connect() + conn2 = endpoint2.connect() cur2 = conn2.cursor() # On the new branch, commit one of the prepared transactions, diff --git a/test_runner/regress/test_unlogged.py b/test_runner/regress/test_unlogged.py index b6b20f1230..708bf0dfeb 100644 --- a/test_runner/regress/test_unlogged.py +++ b/test_runner/regress/test_unlogged.py @@ -9,9 +9,9 @@ from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn def test_unlogged(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_unlogged", "empty") - pg = env.postgres.create_start("test_unlogged") + endpoint = env.endpoints.create_start("test_unlogged") - conn = pg.connect() + conn = endpoint.connect() cur = conn.cursor() cur.execute("CREATE UNLOGGED TABLE iut (id int);") @@ -20,12 +20,10 @@ def test_unlogged(neon_simple_env: NeonEnv): cur.execute("INSERT INTO iut values (42);") # create another compute to fetch inital empty contents from pageserver - fork_at_current_lsn(env, pg, "test_unlogged_basebackup", "test_unlogged") - pg2 = env.postgres.create_start( - "test_unlogged_basebackup", - ) + fork_at_current_lsn(env, endpoint, "test_unlogged_basebackup", "test_unlogged") + endpoint2 = env.endpoints.create_start("test_unlogged_basebackup") - conn2 = pg2.connect() + conn2 = endpoint2.connect() cur2 = conn2.cursor() # after restart table should be empty but valid cur2.execute("PREPARE iut_plan (int) AS INSERT INTO iut VALUES ($1)") diff --git a/test_runner/regress/test_vm_bits.py b/test_runner/regress/test_vm_bits.py index 16a870471b..d8034b31b0 100644 --- a/test_runner/regress/test_vm_bits.py +++ b/test_runner/regress/test_vm_bits.py @@ -10,10 +10,10 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_vm_bit_clear", "empty") - pg = env.postgres.create_start("test_vm_bit_clear") + endpoint = env.endpoints.create_start("test_vm_bit_clear") log.info("postgres is running on 'test_vm_bit_clear' branch") - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() # Install extension containing function needed for test @@ -33,7 +33,7 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv): cur.execute("UPDATE vmtest_update SET id = 5000 WHERE id = 1") # Branch at this point, to test that later - fork_at_current_lsn(env, pg, "test_vm_bit_clear_new", "test_vm_bit_clear") + fork_at_current_lsn(env, endpoint, "test_vm_bit_clear_new", "test_vm_bit_clear") # Clear the buffer cache, to force the VM page to be re-fetched from # the page server @@ -63,10 +63,10 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv): # a dirty VM page is evicted. If the VM bit was not correctly cleared by the # earlier WAL record, the full-page image hides the problem. Starting a new # server at the right point-in-time avoids that full-page image. - pg_new = env.postgres.create_start("test_vm_bit_clear_new") + endpoint_new = env.endpoints.create_start("test_vm_bit_clear_new") log.info("postgres is running on 'test_vm_bit_clear_new' branch") - pg_new_conn = pg_new.connect() + pg_new_conn = endpoint_new.connect() cur_new = pg_new_conn.cursor() cur_new.execute( diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index c24c77bb95..77a2987a96 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -16,6 +16,7 @@ from typing import Any, List, Optional import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( + Endpoint, NeonBroker, NeonEnv, NeonEnvBuilder, @@ -23,7 +24,6 @@ from fixtures.neon_fixtures import ( PgBin, PgProtocol, PortDistributor, - Postgres, RemoteStorageKind, RemoteStorageUsers, Safekeeper, @@ -39,11 +39,11 @@ from fixtures.utils import get_dir_size, query_scalar, start_in_background def wait_lsn_force_checkpoint( tenant_id: TenantId, timeline_id: TimelineId, - pg: Postgres, + endpoint: Endpoint, ps: NeonPageserver, pageserver_conn_options={}, ): - lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) + lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) log.info(f"pg_current_wal_flush_lsn is {lsn}, waiting for it on pageserver") auth_token = None @@ -97,10 +97,10 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): branch_names_to_timeline_ids = {} # start postgres on each timeline - pgs = [] + endpoints = [] for branch_name in branch_names: new_timeline_id = env.neon_cli.create_branch(branch_name) - pgs.append(env.postgres.create_start(branch_name)) + endpoints.append(env.endpoints.create_start(branch_name)) branch_names_to_timeline_ids[branch_name] = new_timeline_id tenant_id = env.initial_tenant @@ -160,8 +160,8 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): # Do everything in different loops to have actions on different timelines # interleaved. # create schema - for pg in pgs: - pg.safe_psql("CREATE TABLE t(key int primary key, value text)") + for endpoint in endpoints: + endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)") init_m = collect_metrics("after CREATE TABLE") # Populate data for 2/3 timelines @@ -197,16 +197,16 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): metrics_checker = MetricsChecker() metrics_checker.start() - for pg in pgs[:-1]: - pg.safe_psql("INSERT INTO t SELECT generate_series(1,100000), 'payload'") + for endpoint in endpoints[:-1]: + endpoint.safe_psql("INSERT INTO t SELECT generate_series(1,100000), 'payload'") metrics_checker.stop() collect_metrics("after INSERT INTO") # Check data for 2/3 timelines - for pg in pgs[:-1]: - res = pg.safe_psql("SELECT sum(key) FROM t") + for endpoint in endpoints[:-1]: + res = endpoint.safe_psql("SELECT sum(key) FROM t") assert res[0] == (5000050000,) final_m = collect_metrics("after SELECT") @@ -233,11 +233,11 @@ def test_restarts(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_safekeepers_restarts") - pg = env.postgres.create_start("test_safekeepers_restarts") + endpoint = env.endpoints.create_start("test_safekeepers_restarts") # we rely upon autocommit after each statement # as waiting for acceptors happens there - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() failed_node = None @@ -268,22 +268,22 @@ def test_broker(neon_env_builder: NeonEnvBuilder): ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*" ) - pg = env.postgres.create_start("test_broker") - pg.safe_psql("CREATE TABLE t(key int primary key, value text)") + endpoint = env.endpoints.create_start("test_broker") + endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)") # learn neon timeline from compute - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) # wait until remote_consistent_lsn gets advanced on all safekeepers clients = [sk.http_client() for sk in env.safekeepers] stat_before = [cli.timeline_status(tenant_id, timeline_id) for cli in clients] log.info(f"statuses is {stat_before}") - pg.safe_psql("INSERT INTO t SELECT generate_series(1,100), 'payload'") + endpoint.safe_psql("INSERT INTO t SELECT generate_series(1,100), 'payload'") # force checkpoint in pageserver to advance remote_consistent_lsn - wait_lsn_force_checkpoint(tenant_id, timeline_id, pg, env.pageserver) + wait_lsn_force_checkpoint(tenant_id, timeline_id, endpoint, env.pageserver) # and wait till remote_consistent_lsn propagates to all safekeepers started_at = time.time() @@ -317,26 +317,28 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): ) env.neon_cli.create_branch("test_safekeepers_wal_removal") - pg = env.postgres.create_start("test_safekeepers_wal_removal") + endpoint = env.endpoints.create_start("test_safekeepers_wal_removal") # Note: it is important to insert at least two segments, as currently # control file is synced roughly once in segment range and WAL is not # removed until all horizons are persisted. - pg.safe_psql_many( + endpoint.safe_psql_many( [ "CREATE TABLE t(key int primary key, value text)", "INSERT INTO t SELECT generate_series(1,200000), 'payload'", ] ) - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) # force checkpoint to advance remote_consistent_lsn pageserver_conn_options = {} if auth_enabled: pageserver_conn_options["password"] = env.auth_keys.generate_tenant_token(tenant_id) - wait_lsn_force_checkpoint(tenant_id, timeline_id, pg, env.pageserver, pageserver_conn_options) + wait_lsn_force_checkpoint( + tenant_id, timeline_id, endpoint, env.pageserver, pageserver_conn_options + ) # We will wait for first segment removal. Make sure they exist for starter. first_segments = [ @@ -436,13 +438,13 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Remot env = neon_env_builder.init_start() env.neon_cli.create_branch("test_safekeepers_wal_backup") - pg = env.postgres.create_start("test_safekeepers_wal_backup") + endpoint = env.endpoints.create_start("test_safekeepers_wal_backup") # learn neon timeline from compute - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() cur.execute("create table t(key int, value text)") @@ -465,9 +467,9 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Remot # put one of safekeepers down again env.safekeepers[0].stop() # restart postgres - pg.stop_and_destroy().create_start("test_safekeepers_wal_backup") + endpoint.stop_and_destroy().create_start("test_safekeepers_wal_backup") # and ensure offloading still works - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("insert into t select generate_series(1,250000), 'payload'") seg_end = Lsn("0/5000000") @@ -491,15 +493,15 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re env = neon_env_builder.init_start() env.neon_cli.create_branch("test_s3_wal_replay") - pg = env.postgres.create_start("test_s3_wal_replay") + endpoint = env.endpoints.create_start("test_s3_wal_replay") # learn neon timeline from compute - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) expected_sum = 0 - with closing(pg.connect()) as conn: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("create table t(key int, value text)") cur.execute("insert into t values (1, 'payload')") @@ -547,7 +549,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re f"Pageserver last_record_lsn={pageserver_lsn}; flush_lsn={last_lsn}; lag before replay is {lag / 1024}kb" ) - pg.stop_and_destroy() + endpoint.stop_and_destroy() ps_cli.timeline_delete(tenant_id, timeline_id) # Also delete and manually create timeline on safekeepers -- this tests @@ -609,9 +611,9 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re log.info(f"WAL redo took {elapsed} s") # verify data - pg.create_start("test_s3_wal_replay") + endpoint.create_start("test_s3_wal_replay") - assert pg.safe_psql("select sum(key) from t")[0][0] == expected_sum + assert endpoint.safe_psql("select sum(key) from t")[0][0] == expected_sum class ProposerPostgres(PgProtocol): @@ -762,13 +764,13 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_timeline_status") - pg = env.postgres.create_start("test_timeline_status") + endpoint = env.endpoints.create_start("test_timeline_status") wa = env.safekeepers[0] # learn neon timeline from compute - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) if not auth_enabled: wa_http_cli = wa.http_client() @@ -806,11 +808,11 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): assert debug_dump_0["timelines_count"] == 1 assert debug_dump_0["timelines"][0]["timeline_id"] == str(timeline_id) - pg.safe_psql("create table t(i int)") + endpoint.safe_psql("create table t(i int)") # ensure epoch goes up after reboot - pg.stop().start() - pg.safe_psql("insert into t values(10)") + endpoint.stop().start() + endpoint.safe_psql("insert into t values(10)") tli_status = wa_http_cli.timeline_status(tenant_id, timeline_id) epoch_after_reboot = tli_status.acceptor_epoch @@ -992,8 +994,8 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): def safekeepers_guc(env: NeonEnv, sk_names: List[int]) -> str: return ",".join([f"localhost:{sk.port.pg}" for sk in env.safekeepers if sk.id in sk_names]) - def execute_payload(pg: Postgres): - with closing(pg.connect()) as conn: + def execute_payload(endpoint: Endpoint): + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there @@ -1021,26 +1023,26 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): log.info("Use only first 3 safekeepers") env.safekeepers[3].stop() active_safekeepers = [1, 2, 3] - pg = env.postgres.create("test_replace_safekeeper") - pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers)) - pg.start() + endpoint = env.endpoints.create("test_replace_safekeeper") + endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers)) + endpoint.start() # learn neon timeline from compute - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) - timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0]) + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0]) - execute_payload(pg) + execute_payload(endpoint) show_statuses(env.safekeepers, tenant_id, timeline_id) log.info("Restart all safekeepers to flush everything") env.safekeepers[0].stop(immediate=True) - execute_payload(pg) + execute_payload(endpoint) env.safekeepers[0].start() env.safekeepers[1].stop(immediate=True) - execute_payload(pg) + execute_payload(endpoint) env.safekeepers[1].start() env.safekeepers[2].stop(immediate=True) - execute_payload(pg) + execute_payload(endpoint) env.safekeepers[2].start() env.safekeepers[0].stop(immediate=True) @@ -1050,27 +1052,27 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): env.safekeepers[1].start() env.safekeepers[2].start() - execute_payload(pg) + execute_payload(endpoint) show_statuses(env.safekeepers, tenant_id, timeline_id) log.info("Stop sk1 (simulate failure) and use only quorum of sk2 and sk3") env.safekeepers[0].stop(immediate=True) - execute_payload(pg) + execute_payload(endpoint) show_statuses(env.safekeepers, tenant_id, timeline_id) log.info("Recreate postgres to replace failed sk1 with new sk4") - pg.stop_and_destroy().create("test_replace_safekeeper") + endpoint.stop_and_destroy().create("test_replace_safekeeper") active_safekeepers = [2, 3, 4] env.safekeepers[3].start() - pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers)) - pg.start() + endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers)) + endpoint.start() - execute_payload(pg) + execute_payload(endpoint) show_statuses(env.safekeepers, tenant_id, timeline_id) log.info("Stop sk2 to require quorum of sk3 and sk4 for normal work") env.safekeepers[1].stop(immediate=True) - execute_payload(pg) + execute_payload(endpoint) show_statuses(env.safekeepers, tenant_id, timeline_id) @@ -1082,13 +1084,13 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder): last_lsn = Lsn(0) # returns pg_wal size in MB - def collect_stats(pg: Postgres, cur, enable_logs=True): + def collect_stats(endpoint: Endpoint, cur, enable_logs=True): nonlocal last_lsn - assert pg.pgdata_dir is not None + assert endpoint.pgdata_dir is not None log.info("executing INSERT to generate WAL") current_lsn = Lsn(query_scalar(cur, "select pg_current_wal_lsn()")) - pg_wal_size_mb = get_dir_size(os.path.join(pg.pgdata_dir, "pg_wal")) / 1024 / 1024 + pg_wal_size_mb = get_dir_size(os.path.join(endpoint.pgdata_dir, "pg_wal")) / 1024 / 1024 if enable_logs: lsn_delta_mb = (current_lsn - last_lsn) / 1024 / 1024 log.info(f"LSN delta: {lsn_delta_mb} MB, current WAL size: {pg_wal_size_mb} MB") @@ -1104,25 +1106,25 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder): env.neon_cli.create_branch("test_wal_deleted_after_broadcast") # Adjust checkpoint config to prevent keeping old WAL segments - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "test_wal_deleted_after_broadcast", config_lines=["min_wal_size=32MB", "max_wal_size=32MB", "log_checkpoints=on"], ) - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() cur.execute("CREATE TABLE t(key int, value text)") - collect_stats(pg, cur) + collect_stats(endpoint, cur) # generate WAL to simulate normal workload for i in range(5): generate_wal(cur) - collect_stats(pg, cur) + collect_stats(endpoint, cur) log.info("executing checkpoint") cur.execute("CHECKPOINT") - wal_size_after_checkpoint = collect_stats(pg, cur) + wal_size_after_checkpoint = collect_stats(endpoint, cur) # there shouldn't be more than 2 WAL segments (but dir may have archive_status files) assert wal_size_after_checkpoint < 16 * 2.5 @@ -1151,13 +1153,13 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): tenant_id_other, timeline_id_other = env.neon_cli.create_tenant() # Populate branches - pg_1 = env.postgres.create_start("br1") - pg_2 = env.postgres.create_start("br2") - pg_3 = env.postgres.create_start("br3") - pg_4 = env.postgres.create_start("br4") - pg_other = env.postgres.create_start("main", tenant_id=tenant_id_other) - for pg in [pg_1, pg_2, pg_3, pg_4, pg_other]: - with closing(pg.connect()) as conn: + endpoint_1 = env.endpoints.create_start("br1") + endpoint_2 = env.endpoints.create_start("br2") + endpoint_3 = env.endpoints.create_start("br3") + endpoint_4 = env.endpoints.create_start("br4") + endpoint_other = env.endpoints.create_start("main", tenant_id=tenant_id_other) + for endpoint in [endpoint_1, endpoint_2, endpoint_3, endpoint_4, endpoint_other]: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key)") sk = env.safekeepers[0] @@ -1178,14 +1180,14 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() # Stop branches which should be inactive and restart Safekeeper to drop its in-memory state. - pg_2.stop_and_destroy() - pg_4.stop_and_destroy() + endpoint_2.stop_and_destroy() + endpoint_4.stop_and_destroy() sk.stop() sk.start() # Ensure connections to Safekeeper are established - for pg in [pg_1, pg_3, pg_other]: - with closing(pg.connect()) as conn: + for endpoint in [endpoint_1, endpoint_3, endpoint_other]: + with closing(endpoint.connect()) as conn: with conn.cursor() as cur: cur.execute("INSERT INTO t (key) VALUES (1)") @@ -1244,6 +1246,6 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): # Ensure the other tenant still works sk_http_other.timeline_status(tenant_id_other, timeline_id_other) - with closing(pg_other.connect()) as conn: + with closing(endpoint_other.connect()) as conn: with conn.cursor() as cur: cur.execute("INSERT INTO t (key) VALUES (123)") diff --git a/test_runner/regress/test_wal_acceptor_async.py b/test_runner/regress/test_wal_acceptor_async.py index f10a40690e..7debeed140 100644 --- a/test_runner/regress/test_wal_acceptor_async.py +++ b/test_runner/regress/test_wal_acceptor_async.py @@ -6,7 +6,7 @@ from typing import List, Optional import asyncpg from fixtures.log_helper import getLogger -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper +from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder, Safekeeper from fixtures.types import Lsn, TenantId, TimelineId log = getLogger("root.safekeeper_async") @@ -82,8 +82,10 @@ class WorkerStats(object): log.info("All workers made {} transactions".format(progress)) -async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accounts, max_transfer): - pg_conn = await pg.connect_async() +async def run_random_worker( + stats: WorkerStats, endpoint: Endpoint, worker_id, n_accounts, max_transfer +): + pg_conn = await endpoint.connect_async() log.debug("Started worker {}".format(worker_id)) while stats.running: @@ -141,7 +143,7 @@ async def wait_for_lsn( # consistent. async def run_restarts_under_load( env: NeonEnv, - pg: Postgres, + endpoint: Endpoint, acceptors: List[Safekeeper], n_workers=10, n_accounts=100, @@ -154,7 +156,7 @@ async def run_restarts_under_load( # taking into account that this timeout is checked only at the beginning of every iteration. test_timeout_at = time.monotonic() + 5 * 60 - pg_conn = await pg.connect_async() + pg_conn = await endpoint.connect_async() tenant_id = TenantId(await pg_conn.fetchval("show neon.tenant_id")) timeline_id = TimelineId(await pg_conn.fetchval("show neon.timeline_id")) @@ -165,7 +167,7 @@ async def run_restarts_under_load( stats = WorkerStats(n_workers) workers = [] for worker_id in range(n_workers): - worker = run_random_worker(stats, pg, worker_id, bank.n_accounts, max_transfer) + worker = run_random_worker(stats, endpoint, worker_id, bank.n_accounts, max_transfer) workers.append(asyncio.create_task(worker)) for it in range(iterations): @@ -212,11 +214,11 @@ def test_restarts_under_load(neon_env_builder: NeonEnvBuilder): env.neon_cli.create_branch("test_safekeepers_restarts_under_load") # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "test_safekeepers_restarts_under_load", config_lines=["max_replication_write_lag=1MB"] ) - asyncio.run(run_restarts_under_load(env, pg, env.safekeepers)) + asyncio.run(run_restarts_under_load(env, endpoint, env.safekeepers)) # Restart acceptors one by one and test that everything is working as expected @@ -228,7 +230,7 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder): env.neon_cli.create_branch("test_restarts_frequent_checkpoints") # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long - pg = env.postgres.create_start( + endpoint = env.endpoints.create_start( "test_restarts_frequent_checkpoints", config_lines=[ "max_replication_write_lag=1MB", @@ -240,11 +242,13 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder): # we try to simulate large (flush_lsn - truncate_lsn) lag, to test that WAL segments # are not removed before broadcasted to all safekeepers, with the help of replication slot - asyncio.run(run_restarts_under_load(env, pg, env.safekeepers, period_time=15, iterations=5)) + asyncio.run( + run_restarts_under_load(env, endpoint, env.safekeepers, period_time=15, iterations=5) + ) -def postgres_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]): - pg = Postgres( +def endpoint_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]): + endpoint = Endpoint( env, tenant_id=env.initial_tenant, port=env.port_distributor.get_port(), @@ -253,19 +257,19 @@ def postgres_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]): check_stop_result=False, ) - # embed current time in node name - node_name = pgdir_name or f"pg_node_{time.time()}" - return pg.create_start( - branch_name=branch, node_name=node_name, config_lines=["log_statement=all"] + # embed current time in endpoint ID + endpoint_id = pgdir_name or f"ep-{time.time()}" + return endpoint.create_start( + branch_name=branch, endpoint_id=endpoint_id, config_lines=["log_statement=all"] ) async def exec_compute_query( env: NeonEnv, branch: str, query: str, pgdir_name: Optional[str] = None ): - with postgres_create_start(env, branch=branch, pgdir_name=pgdir_name) as pg: + with endpoint_create_start(env, branch=branch, pgdir_name=pgdir_name) as endpoint: before_conn = time.time() - conn = await pg.connect_async() + conn = await endpoint.connect_async() res = await conn.fetch(query) await conn.close() after_conn = time.time() @@ -436,8 +440,8 @@ async def check_unavailability( assert bg_query.done() -async def run_unavailability(env: NeonEnv, pg: Postgres): - conn = await pg.connect_async() +async def run_unavailability(env: NeonEnv, endpoint: Endpoint): + conn = await endpoint.connect_async() # check basic work with table await conn.execute("CREATE TABLE t(key int primary key, value text)") @@ -462,9 +466,9 @@ def test_unavailability(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_safekeepers_unavailability") - pg = env.postgres.create_start("test_safekeepers_unavailability") + endpoint = env.endpoints.create_start("test_safekeepers_unavailability") - asyncio.run(run_unavailability(env, pg)) + asyncio.run(run_unavailability(env, endpoint)) @dataclass @@ -493,8 +497,8 @@ async def xmas_garland(safekeepers: List[Safekeeper], data: RaceConditionTest): await asyncio.sleep(1) -async def run_race_conditions(env: NeonEnv, pg: Postgres): - conn = await pg.connect_async() +async def run_race_conditions(env: NeonEnv, endpoint: Endpoint): + conn = await endpoint.connect_async() await conn.execute("CREATE TABLE t(key int primary key, value text)") data = RaceConditionTest(0, False) @@ -525,14 +529,14 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_safekeepers_race_conditions") - pg = env.postgres.create_start("test_safekeepers_race_conditions") + endpoint = env.endpoints.create_start("test_safekeepers_race_conditions") - asyncio.run(run_race_conditions(env, pg)) + asyncio.run(run_race_conditions(env, endpoint)) # Check that pageserver can select safekeeper with largest commit_lsn # and switch if LSN is not updated for some time (NoWalTimeout). -async def run_wal_lagging(env: NeonEnv, pg: Postgres): +async def run_wal_lagging(env: NeonEnv, endpoint: Endpoint): def safekeepers_guc(env: NeonEnv, active_sk: List[bool]) -> str: # use ports 10, 11 and 12 to simulate unavailable safekeepers return ",".join( @@ -542,10 +546,10 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres): ] ) - conn = await pg.connect_async() + conn = await endpoint.connect_async() await conn.execute("CREATE TABLE t(key int primary key, value text)") await conn.close() - pg.stop() + endpoint.stop() n_iterations = 20 n_txes = 10000 @@ -561,11 +565,11 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres): it -= 1 continue - pg.adjust_for_safekeepers(safekeepers_guc(env, active_sk)) + endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_sk)) log.info(f"Iteration {it}: {active_sk}") - pg.start() - conn = await pg.connect_async() + endpoint.start() + conn = await endpoint.connect_async() for _ in range(n_txes): await conn.execute(f"INSERT INTO t values ({i}, 'payload')") @@ -573,11 +577,11 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres): i += 1 await conn.close() - pg.stop() + endpoint.stop() - pg.adjust_for_safekeepers(safekeepers_guc(env, [True] * len(env.safekeepers))) - pg.start() - conn = await pg.connect_async() + endpoint.adjust_for_safekeepers(safekeepers_guc(env, [True] * len(env.safekeepers))) + endpoint.start() + conn = await endpoint.connect_async() log.info(f"Executed {i-1} queries") @@ -591,6 +595,6 @@ def test_wal_lagging(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_wal_lagging") - pg = env.postgres.create_start("test_wal_lagging") + endpoint = env.endpoints.create_start("test_wal_lagging") - asyncio.run(run_wal_lagging(env, pg)) + asyncio.run(run_wal_lagging(env, endpoint)) diff --git a/test_runner/regress/test_wal_restore.py b/test_runner/regress/test_wal_restore.py index 63d0b46f63..dd944af7eb 100644 --- a/test_runner/regress/test_wal_restore.py +++ b/test_runner/regress/test_wal_restore.py @@ -19,9 +19,9 @@ def test_wal_restore( ): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_wal_restore") - pg = env.postgres.create_start("test_wal_restore") - pg.safe_psql("create table t as select generate_series(1,300000)") - tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0]) + endpoint = env.endpoints.create_start("test_wal_restore") + endpoint.safe_psql("create table t as select generate_series(1,300000)") + tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0]) env.neon_cli.pageserver_stop() port = port_distributor.get_port() data_dir = test_output_dir / "pgsql.restored" diff --git a/test_runner/regress/test_walredo_not_left_behind_on_detach.py b/test_runner/regress/test_walredo_not_left_behind_on_detach.py index d6302f8632..7d944bebb3 100644 --- a/test_runner/regress/test_walredo_not_left_behind_on_detach.py +++ b/test_runner/regress/test_walredo_not_left_behind_on_detach.py @@ -45,9 +45,9 @@ def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder): # assert tenant exists on disk assert (env.repo_dir / "tenants" / str(tenant_id)).exists() - pg = env.postgres.create_start("main", tenant_id=tenant_id) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) - pg_conn = pg.connect() + pg_conn = endpoint.connect() cur = pg_conn.cursor() # Create table, and insert some rows. Make it big enough that it doesn't fit in diff --git a/test_runner/test_broken.py b/test_runner/test_broken.py index 0281f4f48b..7e8aef5a5f 100644 --- a/test_runner/test_broken.py +++ b/test_runner/test_broken.py @@ -24,7 +24,7 @@ def test_broken(neon_simple_env: NeonEnv, pg_bin): env = neon_simple_env env.neon_cli.create_branch("test_broken", "empty") - env.postgres.create_start("test_broken") + env.endpoints.create_start("test_broken") log.info("postgres is running") log.info("THIS NEXT COMMAND WILL FAIL:")