Change CLI so that we always create node from scratch at 'pg start'.
This operation preserve previously existing config

Add new flag '--config-only' to 'pg create'.
If this flag is passed, don't perform basebackup, just fill initial postgresql.conf for the node.
This commit is contained in:
anastasia
2021-08-13 20:18:44 +03:00
committed by lubennikovaav
parent 921ec390bc
commit cbeb67067c
4 changed files with 191 additions and 87 deletions

44
Cargo.lock generated
View File

@@ -1,5 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.4.7"
@@ -1317,24 +1319,6 @@ dependencies = [
"tokio-postgres 0.7.1",
]
[[package]]
name = "postgres-protocol"
version = "0.6.1"
source = "git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858#9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858"
dependencies = [
"base64 0.13.0",
"byteorder",
"bytes",
"fallible-iterator",
"hmac",
"lazy_static",
"md-5",
"memchr",
"rand",
"sha2",
"stringprep",
]
[[package]]
name = "postgres-protocol"
version = "0.6.1"
@@ -1354,13 +1338,21 @@ dependencies = [
]
[[package]]
name = "postgres-types"
version = "0.2.1"
name = "postgres-protocol"
version = "0.6.1"
source = "git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858#9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858"
dependencies = [
"base64 0.13.0",
"byteorder",
"bytes",
"fallible-iterator",
"postgres-protocol 0.6.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)",
"hmac",
"lazy_static",
"md-5",
"memchr",
"rand",
"sha2",
"stringprep",
]
[[package]]
@@ -1374,6 +1366,16 @@ dependencies = [
"postgres-protocol 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "postgres-types"
version = "0.2.1"
source = "git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858#9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858"
dependencies = [
"bytes",
"fallible-iterator",
"postgres-protocol 0.6.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)",
]
[[package]]
name = "postgres_ffi"
version = "0.1.0"

View File

@@ -7,7 +7,7 @@ use std::sync::Arc;
use std::time::Duration;
use std::{collections::BTreeMap, path::PathBuf};
use std::{
fs::{self, OpenOptions},
fs::{self, File, OpenOptions},
io::Read,
};
@@ -85,48 +85,36 @@ impl ComputeControlPlane {
}
}
/// Connect to a page server, get base backup, and untar it to initialize a
/// new data directory
pub fn new_from_page_server(
&mut self,
is_test: bool,
timelineid: ZTimelineId,
name: &str,
tenantid: ZTenantId,
) -> Result<Arc<PostgresNode>> {
let node = Arc::new(PostgresNode {
name: name.to_owned(),
address: SocketAddr::new("127.0.0.1".parse().unwrap(), self.get_port()),
env: self.env.clone(),
pageserver: Arc::clone(&self.pageserver),
is_test,
timelineid,
tenantid,
});
node.init_from_page_server(self.env.auth_type)?;
self.nodes
.insert((tenantid, node.name.clone()), Arc::clone(&node));
Ok(node)
}
pub fn new_node(
&mut self,
tenantid: ZTenantId,
branch_name: &str,
config_only: bool,
) -> Result<Arc<PostgresNode>> {
let timeline_id = self
.pageserver
.branch_get_by_name(&tenantid, branch_name)?
.timeline_id;
let node = self.new_from_page_server(false, timeline_id, branch_name, tenantid)?;
let node = Arc::new(PostgresNode {
name: branch_name.to_owned(),
address: SocketAddr::new("127.0.0.1".parse().unwrap(), self.get_port()),
env: self.env.clone(),
pageserver: Arc::clone(&self.pageserver),
is_test: false,
timelineid: timeline_id,
tenantid,
});
node.init_from_page_server(self.env.auth_type, config_only)?;
self.nodes
.insert((tenantid, node.name.clone()), Arc::clone(&node));
// Configure the node to stream WAL directly to the pageserver
node.append_conf(
"postgresql.conf",
format!(
concat!(
"shared_preload_libraries = zenith\n",
"synchronous_standby_names = 'pageserver'\n", // TODO: add a new function arg?
"zenith.callmemaybe_connstring = '{}'\n", // FIXME escaping
),
@@ -246,39 +234,15 @@ impl PostgresNode {
})
}
// Connect to a page server, get base backup, and untar it to initialize a
// new data directory
pub fn init_from_page_server(&self, auth_type: AuthType) -> Result<()> {
pub fn do_basebackup(&self) -> Result<()> {
let pgdata = self.pgdata();
println!(
"Extracting base backup to create postgres instance: path={} port={}",
pgdata.display(),
self.address.port()
);
// initialize data directory
if self.is_test {
fs::remove_dir_all(&pgdata).ok();
}
let sql = format!("basebackup {} {}", self.tenantid, self.timelineid);
let mut client = self
.pageserver
.page_server_psql_client()
.with_context(|| "connecting to page server failed")?;
fs::create_dir_all(&pgdata)
.with_context(|| format!("could not create data directory {}", pgdata.display()))?;
fs::set_permissions(pgdata.as_path(), fs::Permissions::from_mode(0o700)).with_context(
|| {
format!(
"could not set permissions in data directory {}",
pgdata.display()
)
},
)?;
let mut copyreader = client
.copy_out(sql.as_str())
.with_context(|| "page server 'basebackup' command failed")?;
@@ -294,6 +258,45 @@ impl PostgresNode {
ar.unpack(&pgdata)
.with_context(|| "extracting page backup failed")?;
Ok(())
}
// Connect to a page server, get base backup, and untar it to initialize a
// new data directory
pub fn init_from_page_server(&self, auth_type: AuthType, config_only: bool) -> Result<()> {
let pgdata = self.pgdata();
println!(
"Extracting base backup to create postgres instance: path={} port={}",
pgdata.display(),
self.address.port()
);
// initialize data directory
if self.is_test {
fs::remove_dir_all(&pgdata).ok();
}
fs::create_dir_all(&pgdata)
.with_context(|| format!("could not create data directory {}", pgdata.display()))?;
fs::set_permissions(pgdata.as_path(), fs::Permissions::from_mode(0o700)).with_context(
|| {
format!(
"could not set permissions in data directory {}",
pgdata.display()
)
},
)?;
if config_only {
//Just create an empty config file
File::create(self.pgdata().join("postgresql.conf").to_str().unwrap())?;
} else {
self.do_basebackup()?;
fs::create_dir_all(self.pgdata().join("pg_wal"))?;
fs::create_dir_all(self.pgdata().join("pg_wal").join("archive_status"))?;
}
// wal_log_hints is mandatory when running against pageserver (see gh issue#192)
// TODO: is it possible to check wal_log_hints at pageserver side via XLOG_PARAMETER_CHANGE?
self.append_conf(
@@ -321,8 +324,6 @@ impl PostgresNode {
// page server yet. (gh issue #349)
self.append_conf("postgresql.conf", "wal_keep_size='10TB'\n")?;
// Connect it to the page server.
// set up authentication
let password = if let AuthType::ZenithJWT = auth_type {
"$ZENITH_AUTH_TOKEN"
@@ -348,8 +349,6 @@ impl PostgresNode {
.as_str(),
)?;
fs::create_dir_all(self.pgdata().join("pg_wal"))?;
fs::create_dir_all(self.pgdata().join("pg_wal").join("archive_status"))?;
Ok(())
}
@@ -410,6 +409,46 @@ impl PostgresNode {
}
pub fn start(&self, auth_token: &Option<String>) -> Result<()> {
// Bail if the node already running.
if self.status() == "running" {
anyhow::bail!("The node is already running");
}
// 1. We always start compute node from scratch, so
// if old dir exists, preserve config files and drop the directory
// XXX Now we only use 'postgresql.conf'.
// If we will need 'pg_hba.conf', support it here too
let postgresql_conf_path = self.pgdata().join("postgresql.conf");
let postgresql_conf = fs::read(postgresql_conf_path.clone()).with_context(|| {
format!(
"failed to read config file in {}",
postgresql_conf_path.to_str().unwrap()
)
})?;
println!(
"Destroying postgres data directory '{}'",
self.pgdata().to_str().unwrap()
);
fs::remove_dir_all(&self.pgdata())?;
// 2. Create new node
self.init_from_page_server(self.env.auth_type, false)?;
// 3. Bring back config files
if let Ok(mut file) = OpenOptions::new()
.append(false)
.write(true)
.open(&postgresql_conf_path)
{
file.write_all(&postgresql_conf)?;
file.sync_all()?;
}
// 4. Finally start the compute node postgres
println!("Starting postgres node at '{}'", self.connstr());
self.pg_ctl(&["start"], auth_token)
}

View File

@@ -267,6 +267,7 @@ class Postgres(PgProtocol):
branch: str,
wal_acceptors: Optional[str] = None,
config_lines: Optional[List[str]] = None,
config_only: bool = False,
) -> 'Postgres':
"""
Create the pg data directory.
@@ -278,7 +279,10 @@ class Postgres(PgProtocol):
if not config_lines:
config_lines = []
self.zenith_cli.run(['pg', 'create', branch, f'--tenantid={self.tenant_id}'])
if config_only:
self.zenith_cli.run(['pg', 'create', '--config-only', branch, f'--tenantid={self.tenant_id}'])
else:
self.zenith_cli.run(['pg', 'create', branch, f'--tenantid={self.tenant_id}'])
self.branch = branch
if wal_acceptors is not None:
self.adjust_for_wal_acceptors(wal_acceptors)
@@ -377,7 +381,8 @@ class Postgres(PgProtocol):
config_lines: Optional[List[str]] = None,
) -> 'Postgres':
"""
Create a Postgres instance, then start it.
Create a Postgres instance, apply config
and then start it.
Returns self.
"""
@@ -385,6 +390,7 @@ class Postgres(PgProtocol):
branch=branch,
wal_acceptors=wal_acceptors,
config_lines=config_lines,
config_only=True,
).start()
return self
@@ -430,6 +436,54 @@ class PostgresFactory:
config_lines=config_lines,
)
def create(
self,
branch: str = "main",
tenant_id: Optional[str] = None,
wal_acceptors: Optional[str] = None,
config_lines: Optional[List[str]] = None
) -> Postgres:
pg = Postgres(
zenith_cli=self.zenith_cli,
repo_dir=self.repo_dir,
tenant_id=tenant_id or self.initial_tenant,
port=self.base_port + self.num_instances + 1,
)
self.num_instances += 1
self.instances.append(pg)
return pg.create(
branch=branch,
wal_acceptors=wal_acceptors,
config_lines=config_lines,
)
def config(
self,
branch: str = "main",
tenant_id: Optional[str] = None,
wal_acceptors: Optional[str] = None,
config_lines: Optional[List[str]] = None
) -> Postgres:
pg = Postgres(
zenith_cli=self.zenith_cli,
repo_dir=self.repo_dir,
tenant_id=tenant_id or self.initial_tenant,
port=self.base_port + self.num_instances + 1,
)
self.num_instances += 1
self.instances.append(pg)
return pg.config(
branch=branch,
wal_acceptors=wal_acceptors,
config_lines=config_lines,
)
def stop_all(self) -> 'PostgresFactory':
for pg in self.instances:
pg.stop()

View File

@@ -92,8 +92,18 @@ fn main() -> Result<()> {
.setting(AppSettings::ArgRequiredElseHelp)
.about("Manage postgres instances")
.subcommand(SubCommand::with_name("list").arg(tenantid_arg.clone()))
.subcommand(SubCommand::with_name("create").arg(timeline_arg.clone()).arg(tenantid_arg.clone()))
.subcommand(SubCommand::with_name("start").arg(timeline_arg.clone()).arg(tenantid_arg.clone()))
.subcommand(SubCommand::with_name("create")
.about("Create a postgres compute node")
.arg(timeline_arg.clone()).arg(tenantid_arg.clone())
.arg(
Arg::with_name("config-only")
.help("Don't do basebackup, create compute node with only config files")
.long("config-only")
.required(false)
))
.subcommand(SubCommand::with_name("start")
.about("Start a postrges compute node.\n This command actually creates new node from scrath, but preserves existing config files")
.arg(timeline_arg.clone()).arg(tenantid_arg.clone()))
.subcommand(
SubCommand::with_name("stop")
.arg(timeline_arg.clone())
@@ -459,10 +469,9 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
.value_of("tenantid")
.map_or(Ok(env.tenantid), |value| value.parse())?;
let timeline_name = create_match.value_of("timeline").unwrap_or("main");
// check is that timeline doesnt already exist
// this check here is because it
let config_only = create_match.is_present("config-only");
cplane.new_node(tenantid, timeline_name)?;
cplane.new_node(tenantid, timeline_name, config_only)?;
}
("start", Some(start_match)) => {
let tenantid: ZTenantId = start_match
@@ -483,7 +492,7 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
if let Some(node) = node {
node.start(&auth_token)?;
} else {
let node = cplane.new_node(tenantid, timeline_name)?;
let node = cplane.new_node(tenantid, timeline_name, false)?;
node.start(&auth_token)?;
}
}