From cbeb67067c7f5e3c6688bfead646b136af4589c9 Mon Sep 17 00:00:00 2001 From: anastasia Date: Fri, 13 Aug 2021 20:18:44 +0300 Subject: [PATCH] Issue #367. Change CLI so that we always create node from scratch at 'pg start'. This operation preserve previously existing config Add new flag '--config-only' to 'pg create'. If this flag is passed, don't perform basebackup, just fill initial postgresql.conf for the node. --- Cargo.lock | 44 +++---- control_plane/src/compute.rs | 155 +++++++++++++++--------- test_runner/fixtures/zenith_fixtures.py | 58 ++++++++- zenith/src/main.rs | 21 +++- 4 files changed, 191 insertions(+), 87 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index adc0b26428..a36f8aee0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "ahash" version = "0.4.7" @@ -1317,24 +1319,6 @@ dependencies = [ "tokio-postgres 0.7.1", ] -[[package]] -name = "postgres-protocol" -version = "0.6.1" -source = "git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858#9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" -dependencies = [ - "base64 0.13.0", - "byteorder", - "bytes", - "fallible-iterator", - "hmac", - "lazy_static", - "md-5", - "memchr", - "rand", - "sha2", - "stringprep", -] - [[package]] name = "postgres-protocol" version = "0.6.1" @@ -1354,13 +1338,21 @@ dependencies = [ ] [[package]] -name = "postgres-types" -version = "0.2.1" +name = "postgres-protocol" +version = "0.6.1" source = "git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858#9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" dependencies = [ + "base64 0.13.0", + "byteorder", "bytes", "fallible-iterator", - "postgres-protocol 0.6.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)", + "hmac", + "lazy_static", + "md-5", + "memchr", + "rand", + "sha2", + "stringprep", ] [[package]] @@ -1374,6 +1366,16 @@ dependencies = [ "postgres-protocol 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "postgres-types" +version = "0.2.1" +source = "git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858#9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" +dependencies = [ + "bytes", + "fallible-iterator", + "postgres-protocol 0.6.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)", +] + [[package]] name = "postgres_ffi" version = "0.1.0" diff --git a/control_plane/src/compute.rs b/control_plane/src/compute.rs index 434f4f167c..a03dc15b2e 100644 --- a/control_plane/src/compute.rs +++ b/control_plane/src/compute.rs @@ -7,7 +7,7 @@ use std::sync::Arc; use std::time::Duration; use std::{collections::BTreeMap, path::PathBuf}; use std::{ - fs::{self, OpenOptions}, + fs::{self, File, OpenOptions}, io::Read, }; @@ -85,48 +85,36 @@ impl ComputeControlPlane { } } - /// Connect to a page server, get base backup, and untar it to initialize a - /// new data directory - pub fn new_from_page_server( - &mut self, - is_test: bool, - timelineid: ZTimelineId, - name: &str, - tenantid: ZTenantId, - ) -> Result> { - let node = Arc::new(PostgresNode { - name: name.to_owned(), - address: SocketAddr::new("127.0.0.1".parse().unwrap(), self.get_port()), - env: self.env.clone(), - pageserver: Arc::clone(&self.pageserver), - is_test, - timelineid, - tenantid, - }); - - node.init_from_page_server(self.env.auth_type)?; - self.nodes - .insert((tenantid, node.name.clone()), Arc::clone(&node)); - - Ok(node) - } - pub fn new_node( &mut self, tenantid: ZTenantId, branch_name: &str, + config_only: bool, ) -> Result> { let timeline_id = self .pageserver .branch_get_by_name(&tenantid, branch_name)? .timeline_id; - let node = self.new_from_page_server(false, timeline_id, branch_name, tenantid)?; + + let node = Arc::new(PostgresNode { + name: branch_name.to_owned(), + address: SocketAddr::new("127.0.0.1".parse().unwrap(), self.get_port()), + env: self.env.clone(), + pageserver: Arc::clone(&self.pageserver), + is_test: false, + timelineid: timeline_id, + tenantid, + }); + + node.init_from_page_server(self.env.auth_type, config_only)?; + self.nodes + .insert((tenantid, node.name.clone()), Arc::clone(&node)); + // Configure the node to stream WAL directly to the pageserver node.append_conf( "postgresql.conf", format!( concat!( - "shared_preload_libraries = zenith\n", "synchronous_standby_names = 'pageserver'\n", // TODO: add a new function arg? "zenith.callmemaybe_connstring = '{}'\n", // FIXME escaping ), @@ -246,39 +234,15 @@ impl PostgresNode { }) } - // Connect to a page server, get base backup, and untar it to initialize a - // new data directory - pub fn init_from_page_server(&self, auth_type: AuthType) -> Result<()> { + pub fn do_basebackup(&self) -> Result<()> { let pgdata = self.pgdata(); - println!( - "Extracting base backup to create postgres instance: path={} port={}", - pgdata.display(), - self.address.port() - ); - - // initialize data directory - if self.is_test { - fs::remove_dir_all(&pgdata).ok(); - } - let sql = format!("basebackup {} {}", self.tenantid, self.timelineid); let mut client = self .pageserver .page_server_psql_client() .with_context(|| "connecting to page server failed")?; - fs::create_dir_all(&pgdata) - .with_context(|| format!("could not create data directory {}", pgdata.display()))?; - fs::set_permissions(pgdata.as_path(), fs::Permissions::from_mode(0o700)).with_context( - || { - format!( - "could not set permissions in data directory {}", - pgdata.display() - ) - }, - )?; - let mut copyreader = client .copy_out(sql.as_str()) .with_context(|| "page server 'basebackup' command failed")?; @@ -294,6 +258,45 @@ impl PostgresNode { ar.unpack(&pgdata) .with_context(|| "extracting page backup failed")?; + Ok(()) + } + + // Connect to a page server, get base backup, and untar it to initialize a + // new data directory + pub fn init_from_page_server(&self, auth_type: AuthType, config_only: bool) -> Result<()> { + let pgdata = self.pgdata(); + + println!( + "Extracting base backup to create postgres instance: path={} port={}", + pgdata.display(), + self.address.port() + ); + + // initialize data directory + if self.is_test { + fs::remove_dir_all(&pgdata).ok(); + } + + fs::create_dir_all(&pgdata) + .with_context(|| format!("could not create data directory {}", pgdata.display()))?; + fs::set_permissions(pgdata.as_path(), fs::Permissions::from_mode(0o700)).with_context( + || { + format!( + "could not set permissions in data directory {}", + pgdata.display() + ) + }, + )?; + + if config_only { + //Just create an empty config file + File::create(self.pgdata().join("postgresql.conf").to_str().unwrap())?; + } else { + self.do_basebackup()?; + fs::create_dir_all(self.pgdata().join("pg_wal"))?; + fs::create_dir_all(self.pgdata().join("pg_wal").join("archive_status"))?; + } + // wal_log_hints is mandatory when running against pageserver (see gh issue#192) // TODO: is it possible to check wal_log_hints at pageserver side via XLOG_PARAMETER_CHANGE? self.append_conf( @@ -321,8 +324,6 @@ impl PostgresNode { // page server yet. (gh issue #349) self.append_conf("postgresql.conf", "wal_keep_size='10TB'\n")?; - // Connect it to the page server. - // set up authentication let password = if let AuthType::ZenithJWT = auth_type { "$ZENITH_AUTH_TOKEN" @@ -348,8 +349,6 @@ impl PostgresNode { .as_str(), )?; - fs::create_dir_all(self.pgdata().join("pg_wal"))?; - fs::create_dir_all(self.pgdata().join("pg_wal").join("archive_status"))?; Ok(()) } @@ -410,6 +409,46 @@ impl PostgresNode { } pub fn start(&self, auth_token: &Option) -> Result<()> { + // Bail if the node already running. + if self.status() == "running" { + anyhow::bail!("The node is already running"); + } + + // 1. We always start compute node from scratch, so + // if old dir exists, preserve config files and drop the directory + + // XXX Now we only use 'postgresql.conf'. + // If we will need 'pg_hba.conf', support it here too + + let postgresql_conf_path = self.pgdata().join("postgresql.conf"); + let postgresql_conf = fs::read(postgresql_conf_path.clone()).with_context(|| { + format!( + "failed to read config file in {}", + postgresql_conf_path.to_str().unwrap() + ) + })?; + + println!( + "Destroying postgres data directory '{}'", + self.pgdata().to_str().unwrap() + ); + fs::remove_dir_all(&self.pgdata())?; + + // 2. Create new node + self.init_from_page_server(self.env.auth_type, false)?; + + // 3. Bring back config files + + if let Ok(mut file) = OpenOptions::new() + .append(false) + .write(true) + .open(&postgresql_conf_path) + { + file.write_all(&postgresql_conf)?; + file.sync_all()?; + } + + // 4. Finally start the compute node postgres println!("Starting postgres node at '{}'", self.connstr()); self.pg_ctl(&["start"], auth_token) } diff --git a/test_runner/fixtures/zenith_fixtures.py b/test_runner/fixtures/zenith_fixtures.py index f4813d2230..2a1081af8f 100644 --- a/test_runner/fixtures/zenith_fixtures.py +++ b/test_runner/fixtures/zenith_fixtures.py @@ -267,6 +267,7 @@ class Postgres(PgProtocol): branch: str, wal_acceptors: Optional[str] = None, config_lines: Optional[List[str]] = None, + config_only: bool = False, ) -> 'Postgres': """ Create the pg data directory. @@ -278,7 +279,10 @@ class Postgres(PgProtocol): if not config_lines: config_lines = [] - self.zenith_cli.run(['pg', 'create', branch, f'--tenantid={self.tenant_id}']) + if config_only: + self.zenith_cli.run(['pg', 'create', '--config-only', branch, f'--tenantid={self.tenant_id}']) + else: + self.zenith_cli.run(['pg', 'create', branch, f'--tenantid={self.tenant_id}']) self.branch = branch if wal_acceptors is not None: self.adjust_for_wal_acceptors(wal_acceptors) @@ -377,7 +381,8 @@ class Postgres(PgProtocol): config_lines: Optional[List[str]] = None, ) -> 'Postgres': """ - Create a Postgres instance, then start it. + Create a Postgres instance, apply config + and then start it. Returns self. """ @@ -385,6 +390,7 @@ class Postgres(PgProtocol): branch=branch, wal_acceptors=wal_acceptors, config_lines=config_lines, + config_only=True, ).start() return self @@ -430,6 +436,54 @@ class PostgresFactory: config_lines=config_lines, ) + def create( + self, + branch: str = "main", + tenant_id: Optional[str] = None, + wal_acceptors: Optional[str] = None, + config_lines: Optional[List[str]] = None + ) -> Postgres: + + pg = Postgres( + zenith_cli=self.zenith_cli, + repo_dir=self.repo_dir, + tenant_id=tenant_id or self.initial_tenant, + port=self.base_port + self.num_instances + 1, + ) + + self.num_instances += 1 + self.instances.append(pg) + + return pg.create( + branch=branch, + wal_acceptors=wal_acceptors, + config_lines=config_lines, + ) + + def config( + self, + branch: str = "main", + tenant_id: Optional[str] = None, + wal_acceptors: Optional[str] = None, + config_lines: Optional[List[str]] = None + ) -> Postgres: + + pg = Postgres( + zenith_cli=self.zenith_cli, + repo_dir=self.repo_dir, + tenant_id=tenant_id or self.initial_tenant, + port=self.base_port + self.num_instances + 1, + ) + + self.num_instances += 1 + self.instances.append(pg) + + return pg.config( + branch=branch, + wal_acceptors=wal_acceptors, + config_lines=config_lines, + ) + def stop_all(self) -> 'PostgresFactory': for pg in self.instances: pg.stop() diff --git a/zenith/src/main.rs b/zenith/src/main.rs index abaa972cc1..82aa39c46f 100644 --- a/zenith/src/main.rs +++ b/zenith/src/main.rs @@ -92,8 +92,18 @@ fn main() -> Result<()> { .setting(AppSettings::ArgRequiredElseHelp) .about("Manage postgres instances") .subcommand(SubCommand::with_name("list").arg(tenantid_arg.clone())) - .subcommand(SubCommand::with_name("create").arg(timeline_arg.clone()).arg(tenantid_arg.clone())) - .subcommand(SubCommand::with_name("start").arg(timeline_arg.clone()).arg(tenantid_arg.clone())) + .subcommand(SubCommand::with_name("create") + .about("Create a postgres compute node") + .arg(timeline_arg.clone()).arg(tenantid_arg.clone()) + .arg( + Arg::with_name("config-only") + .help("Don't do basebackup, create compute node with only config files") + .long("config-only") + .required(false) + )) + .subcommand(SubCommand::with_name("start") + .about("Start a postrges compute node.\n This command actually creates new node from scrath, but preserves existing config files") + .arg(timeline_arg.clone()).arg(tenantid_arg.clone())) .subcommand( SubCommand::with_name("stop") .arg(timeline_arg.clone()) @@ -459,10 +469,9 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { .value_of("tenantid") .map_or(Ok(env.tenantid), |value| value.parse())?; let timeline_name = create_match.value_of("timeline").unwrap_or("main"); - // check is that timeline doesnt already exist - // this check here is because it + let config_only = create_match.is_present("config-only"); - cplane.new_node(tenantid, timeline_name)?; + cplane.new_node(tenantid, timeline_name, config_only)?; } ("start", Some(start_match)) => { let tenantid: ZTenantId = start_match @@ -483,7 +492,7 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { if let Some(node) = node { node.start(&auth_token)?; } else { - let node = cplane.new_node(tenantid, timeline_name)?; + let node = cplane.new_node(tenantid, timeline_name, false)?; node.start(&auth_token)?; } }