mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-01 17:50:38 +00:00
Compare commits
7 Commits
chunk_load
...
proxy-scra
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c743e7a5cc | ||
|
|
d44873246d | ||
|
|
4c65cc3be7 | ||
|
|
b791f47eb3 | ||
|
|
7dc933b741 | ||
|
|
7e714ce8be | ||
|
|
61a5b59224 |
79
Cargo.lock
generated
79
Cargo.lock
generated
@@ -1193,8 +1193,8 @@ dependencies = [
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"postgres",
|
||||
"postgres-protocol",
|
||||
"postgres-types",
|
||||
"postgres-protocol 0.6.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)",
|
||||
"postgres-types 0.2.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)",
|
||||
"postgres_ffi",
|
||||
"rand",
|
||||
"regex",
|
||||
@@ -1208,7 +1208,7 @@ dependencies = [
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres 0.7.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)",
|
||||
"tokio-stream",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
@@ -1326,9 +1326,9 @@ dependencies = [
|
||||
"fallible-iterator",
|
||||
"futures",
|
||||
"log",
|
||||
"postgres-protocol",
|
||||
"postgres-protocol 0.6.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres 0.7.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1349,6 +1349,24 @@ dependencies = [
|
||||
"stringprep",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.1"
|
||||
source = "git+https://github.com/zenithdb/rust-postgres.git?rev=f1f16657aaebe2b9b4b16ef7abf6dc42301bad5d#f1f16657aaebe2b9b4b16ef7abf6dc42301bad5d"
|
||||
dependencies = [
|
||||
"base64 0.13.0",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"hmac 0.10.1",
|
||||
"lazy_static",
|
||||
"md-5",
|
||||
"memchr",
|
||||
"rand",
|
||||
"sha2",
|
||||
"stringprep",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.1"
|
||||
@@ -1356,7 +1374,17 @@ source = "git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"postgres-protocol",
|
||||
"postgres-protocol 0.6.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.1"
|
||||
source = "git+https://github.com/zenithdb/rust-postgres.git?rev=f1f16657aaebe2b9b4b16ef7abf6dc42301bad5d#f1f16657aaebe2b9b4b16ef7abf6dc42301bad5d"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"postgres-protocol 0.6.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=f1f16657aaebe2b9b4b16ef7abf6dc42301bad5d)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1421,22 +1449,23 @@ name = "proxy"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64 0.13.0",
|
||||
"bytes",
|
||||
"clap",
|
||||
"hex",
|
||||
"hyper",
|
||||
"hmac 0.10.1",
|
||||
"lazy_static",
|
||||
"md5",
|
||||
"parking_lot",
|
||||
"rand",
|
||||
"reqwest",
|
||||
"routerify",
|
||||
"rustls 0.19.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"zenith_metrics",
|
||||
"tokio-postgres 0.7.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=f1f16657aaebe2b9b4b16ef7abf6dc42301bad5d)",
|
||||
"zenith_utils",
|
||||
]
|
||||
|
||||
@@ -2097,8 +2126,30 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
"phf",
|
||||
"pin-project-lite",
|
||||
"postgres-protocol",
|
||||
"postgres-types",
|
||||
"postgres-protocol 0.6.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)",
|
||||
"postgres-types 0.2.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)",
|
||||
"socket2",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.1"
|
||||
source = "git+https://github.com/zenithdb/rust-postgres.git?rev=f1f16657aaebe2b9b4b16ef7abf6dc42301bad5d#f1f16657aaebe2b9b4b16ef7abf6dc42301bad5d"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"futures",
|
||||
"log",
|
||||
"parking_lot",
|
||||
"percent-encoding",
|
||||
"phf",
|
||||
"pin-project-lite",
|
||||
"postgres-protocol 0.6.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=f1f16657aaebe2b9b4b16ef7abf6dc42301bad5d)",
|
||||
"postgres-types 0.2.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=f1f16657aaebe2b9b4b16ef7abf6dc42301bad5d)",
|
||||
"socket2",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -2336,7 +2387,7 @@ dependencies = [
|
||||
"hyper",
|
||||
"lazy_static",
|
||||
"postgres",
|
||||
"postgres-protocol",
|
||||
"postgres-protocol 0.6.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)",
|
||||
"postgres_ffi",
|
||||
"regex",
|
||||
"routerify",
|
||||
@@ -2346,7 +2397,7 @@ dependencies = [
|
||||
"signal-hook",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres 0.7.1 (git+https://github.com/zenithdb/rust-postgres.git?rev=9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858)",
|
||||
"tracing",
|
||||
"walkdir",
|
||||
"workspace_hack",
|
||||
|
||||
@@ -294,7 +294,6 @@ impl PostgresNode {
|
||||
conf.append("max_replication_slots", "10");
|
||||
conf.append("hot_standby", "on");
|
||||
conf.append("shared_buffers", "1MB");
|
||||
conf.append("zenith.file_cache_size", "4096");
|
||||
conf.append("fsync", "off");
|
||||
conf.append("max_connections", "100");
|
||||
conf.append("wal_level", "replica");
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
pub mod compute;
|
||||
pub mod local_env;
|
||||
@@ -32,19 +31,3 @@ pub fn read_pidfile(pidfile: &Path) -> Result<i32> {
|
||||
}
|
||||
Ok(pid)
|
||||
}
|
||||
|
||||
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
|
||||
let cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
|
||||
|
||||
let var = "LLVM_PROFILE_FILE";
|
||||
if let Some(val) = std::env::var_os(var) {
|
||||
cmd.env(var, val);
|
||||
}
|
||||
|
||||
const RUST_LOG_KEY: &str = "RUST_LOG";
|
||||
if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
|
||||
cmd.env(RUST_LOG_KEY, rust_log_value)
|
||||
} else {
|
||||
cmd
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,8 +17,8 @@ use thiserror::Error;
|
||||
use zenith_utils::http::error::HttpErrorBody;
|
||||
|
||||
use crate::local_env::{LocalEnv, SafekeeperConf};
|
||||
use crate::read_pidfile;
|
||||
use crate::storage::PageServerNode;
|
||||
use crate::{fill_rust_env_vars, read_pidfile};
|
||||
use zenith_utils::connstring::connection_address;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
@@ -118,17 +118,22 @@ impl SafekeeperNode {
|
||||
let listen_http = format!("localhost:{}", self.conf.http_port);
|
||||
|
||||
let mut cmd = Command::new(self.env.safekeeper_bin()?);
|
||||
fill_rust_env_vars(
|
||||
cmd.args(&["-D", self.datadir_path().to_str().unwrap()])
|
||||
.args(&["--listen-pg", &listen_pg])
|
||||
.args(&["--listen-http", &listen_http])
|
||||
.args(&["--recall", "1 second"])
|
||||
.arg("--daemonize"),
|
||||
);
|
||||
cmd.args(&["-D", self.datadir_path().to_str().unwrap()])
|
||||
.args(&["--listen-pg", &listen_pg])
|
||||
.args(&["--listen-http", &listen_http])
|
||||
.args(&["--recall", "1 second"])
|
||||
.arg("--daemonize")
|
||||
.env_clear()
|
||||
.env("RUST_BACKTRACE", "1");
|
||||
if !self.conf.sync {
|
||||
cmd.arg("--no-sync");
|
||||
}
|
||||
|
||||
let var = "LLVM_PROFILE_FILE";
|
||||
if let Some(val) = std::env::var_os(var) {
|
||||
cmd.env(var, val);
|
||||
}
|
||||
|
||||
if !cmd.status()?.success() {
|
||||
bail!(
|
||||
"Safekeeper failed to start. See '{}' for details.",
|
||||
|
||||
@@ -19,7 +19,7 @@ use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::ZTenantId;
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
use crate::{fill_rust_env_vars, read_pidfile};
|
||||
use crate::read_pidfile;
|
||||
use pageserver::branches::BranchInfo;
|
||||
use pageserver::tenant_mgr::TenantInfo;
|
||||
use zenith_utils::connstring::connection_address;
|
||||
@@ -96,49 +96,46 @@ impl PageServerNode {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn init(
|
||||
&self,
|
||||
create_tenant: Option<&str>,
|
||||
config_overrides: &[&str],
|
||||
) -> anyhow::Result<()> {
|
||||
pub fn init(&self, create_tenant: Option<&str>) -> anyhow::Result<()> {
|
||||
let mut cmd = Command::new(self.env.pageserver_bin()?);
|
||||
let var = "LLVM_PROFILE_FILE";
|
||||
if let Some(val) = std::env::var_os(var) {
|
||||
cmd.env(var, val);
|
||||
}
|
||||
|
||||
// FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
|
||||
let base_data_dir_param = self.env.base_data_dir.display().to_string();
|
||||
let pg_distrib_dir_param =
|
||||
format!("pg_distrib_dir='{}'", self.env.pg_distrib_dir.display());
|
||||
let authg_type_param = format!("auth_type='{}'", self.env.pageserver.auth_type);
|
||||
let listen_http_addr_param = format!(
|
||||
"listen_http_addr='{}'",
|
||||
self.env.pageserver.listen_http_addr
|
||||
);
|
||||
let listen_pg_addr_param =
|
||||
format!("listen_pg_addr='{}'", self.env.pageserver.listen_pg_addr);
|
||||
let mut args = Vec::with_capacity(20);
|
||||
|
||||
args.push("--init");
|
||||
args.extend(["-D", &base_data_dir_param]);
|
||||
args.extend(["-c", &pg_distrib_dir_param]);
|
||||
args.extend(["-c", &authg_type_param]);
|
||||
args.extend(["-c", &listen_http_addr_param]);
|
||||
args.extend(["-c", &listen_pg_addr_param]);
|
||||
|
||||
for config_override in config_overrides {
|
||||
args.extend(["-c", config_override]);
|
||||
}
|
||||
let mut args = vec![
|
||||
"--init".to_string(),
|
||||
"-D".to_string(),
|
||||
self.env.base_data_dir.display().to_string(),
|
||||
"-c".to_string(),
|
||||
format!("pg_distrib_dir='{}'", self.env.pg_distrib_dir.display()),
|
||||
"-c".to_string(),
|
||||
format!("auth_type='{}'", self.env.pageserver.auth_type),
|
||||
"-c".to_string(),
|
||||
format!(
|
||||
"listen_http_addr='{}'",
|
||||
self.env.pageserver.listen_http_addr
|
||||
),
|
||||
"-c".to_string(),
|
||||
format!("listen_pg_addr='{}'", self.env.pageserver.listen_pg_addr),
|
||||
];
|
||||
|
||||
if self.env.pageserver.auth_type != AuthType::Trust {
|
||||
args.extend([
|
||||
"-c",
|
||||
"auth_validation_public_key_path='auth_public_key.pem'",
|
||||
"-c".to_string(),
|
||||
"auth_validation_public_key_path='auth_public_key.pem'".to_string(),
|
||||
]);
|
||||
}
|
||||
|
||||
if let Some(tenantid) = create_tenant {
|
||||
args.extend(["--create-tenant", tenantid])
|
||||
args.extend(["--create-tenant".to_string(), tenantid.to_string()])
|
||||
}
|
||||
|
||||
let status = fill_rust_env_vars(cmd.args(args))
|
||||
let status = cmd
|
||||
.args(args)
|
||||
.env_clear()
|
||||
.env("RUST_BACKTRACE", "1")
|
||||
.status()
|
||||
.expect("pageserver init failed");
|
||||
|
||||
@@ -157,7 +154,7 @@ impl PageServerNode {
|
||||
self.repo_path().join("pageserver.pid")
|
||||
}
|
||||
|
||||
pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
|
||||
pub fn start(&self) -> anyhow::Result<()> {
|
||||
print!(
|
||||
"Starting pageserver at '{}' in '{}'",
|
||||
connection_address(&self.pg_connection_config),
|
||||
@@ -166,16 +163,16 @@ impl PageServerNode {
|
||||
io::stdout().flush().unwrap();
|
||||
|
||||
let mut cmd = Command::new(self.env.pageserver_bin()?);
|
||||
cmd.args(&["-D", self.repo_path().to_str().unwrap()])
|
||||
.arg("--daemonize")
|
||||
.env_clear()
|
||||
.env("RUST_BACKTRACE", "1");
|
||||
|
||||
let repo_path = self.repo_path();
|
||||
let mut args = vec!["-D", repo_path.to_str().unwrap()];
|
||||
|
||||
for config_override in config_overrides {
|
||||
args.extend(["-c", config_override]);
|
||||
let var = "LLVM_PROFILE_FILE";
|
||||
if let Some(val) = std::env::var_os(var) {
|
||||
cmd.env(var, val);
|
||||
}
|
||||
|
||||
fill_rust_env_vars(cmd.args(&args).arg("--daemonize"));
|
||||
|
||||
if !cmd.status()?.success() {
|
||||
bail!(
|
||||
"Pageserver failed to start. See '{}' for details.",
|
||||
|
||||
@@ -147,10 +147,6 @@ bucket_name = 'some-sample-bucket'
|
||||
# Name of the region where the bucket is located at
|
||||
bucket_region = 'eu-north-1'
|
||||
|
||||
# A "subfolder" in the bucket, to use the same bucket separately by multiple pageservers at once.
|
||||
# Optional, pageserver uses entire bucket if the prefix is not specified.
|
||||
prefix_in_bucket = '/some/prefix/'
|
||||
|
||||
# Access key to connect to the bucket ("login" part of the credentials)
|
||||
access_key_id = 'SOMEKEYAAAAASADSAH*#'
|
||||
|
||||
|
||||
@@ -129,13 +129,13 @@ There are the following implementations present:
|
||||
* local filesystem — to use in tests mainly
|
||||
* AWS S3 - to use in production
|
||||
|
||||
Implementation details are covered in the [backup readme](./src/remote_storage/README.md) and corresponding Rust file docs, parameters documentation can be found at [settings docs](../docs/settings.md).
|
||||
Implementation details are covered in the [backup readme](./src/remote_storage/README.md) and corresponding Rust file docs.
|
||||
|
||||
The backup service is disabled by default and can be enabled to interact with a single remote storage.
|
||||
|
||||
CLI examples:
|
||||
* Local FS: `${PAGESERVER_BIN} -c "remote_storage={local_path='/some/local/path/'}"`
|
||||
* AWS S3 : `${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/',access_key_id='SOMEKEYAAAAASADSAH*#',secret_access_key='SOMEsEcReTsd292v'}"`
|
||||
* AWS S3 : `${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1',access_key_id='SOMEKEYAAAAASADSAH*#',secret_access_key='SOMEsEcReTsd292v'}"`
|
||||
|
||||
For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.
|
||||
For local S3 installations, refer to the their documentation for name format and credentials.
|
||||
@@ -154,7 +154,6 @@ or
|
||||
[remote_storage]
|
||||
bucket_name = 'some-sample-bucket'
|
||||
bucket_region = 'eu-north-1'
|
||||
prefix_in_bucket = '/test_prefix/'
|
||||
access_key_id = 'SOMEKEYAAAAASADSAH*#'
|
||||
secret_access_key = 'SOMEsEcReTsd292v'
|
||||
```
|
||||
|
||||
@@ -53,12 +53,12 @@ fn main() -> Result<()> {
|
||||
)
|
||||
// See `settings.md` for more details on the extra configuration patameters pageserver can process
|
||||
.arg(
|
||||
Arg::with_name("config-override")
|
||||
Arg::with_name("config-option")
|
||||
.short("c")
|
||||
.takes_value(true)
|
||||
.number_of_values(1)
|
||||
.multiple(true)
|
||||
.help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
|
||||
.help("Additional configuration options or overrides of the ones from the toml config file.
|
||||
Any option has to be a valid toml document, example: `-c \"foo='hey'\"` `-c \"foo={value=1}\"`"),
|
||||
)
|
||||
.get_matches();
|
||||
@@ -105,7 +105,7 @@ fn main() -> Result<()> {
|
||||
};
|
||||
|
||||
// Process any extra options given with -c
|
||||
if let Some(values) = arg_matches.values_of("config-override") {
|
||||
if let Some(values) = arg_matches.values_of("config-option") {
|
||||
for option_line in values {
|
||||
let doc = toml_edit::Document::from_str(option_line).with_context(|| {
|
||||
format!(
|
||||
@@ -195,10 +195,9 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
}
|
||||
|
||||
let signals = signals::install_shutdown_handlers()?;
|
||||
let (async_shutdown_tx, async_shutdown_rx) = tokio::sync::watch::channel(());
|
||||
let mut threads = Vec::new();
|
||||
|
||||
let sync_startup = remote_storage::start_local_timeline_sync(conf, async_shutdown_rx)
|
||||
let sync_startup = remote_storage::start_local_timeline_sync(conf)
|
||||
.context("Failed to set up local files sync with external storage")?;
|
||||
|
||||
if let Some(handle) = sync_startup.sync_loop_handle {
|
||||
@@ -256,7 +255,6 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
signal.name()
|
||||
);
|
||||
|
||||
async_shutdown_tx.send(())?;
|
||||
postgres_backend::set_pgbackend_shutdown_requested();
|
||||
tenant_mgr::shutdown_all_tenants()?;
|
||||
endpoint::shutdown();
|
||||
|
||||
@@ -45,16 +45,14 @@ impl BranchInfo {
|
||||
repo: &Arc<dyn Repository>,
|
||||
include_non_incremental_logical_size: bool,
|
||||
) -> Result<Self> {
|
||||
let path = path.as_ref();
|
||||
let name = path.file_name().unwrap().to_string_lossy().to_string();
|
||||
let timeline_id = std::fs::read_to_string(path)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to read branch file contents at path '{}'",
|
||||
path.display()
|
||||
)
|
||||
})?
|
||||
.parse::<ZTimelineId>()?;
|
||||
let name = path
|
||||
.as_ref()
|
||||
.file_name()
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let timeline_id = std::fs::read_to_string(path)?.parse::<ZTimelineId>()?;
|
||||
|
||||
let timeline = match repo.get_timeline(timeline_id)? {
|
||||
RepositoryTimeline::Local(local_entry) => local_entry,
|
||||
|
||||
@@ -43,9 +43,6 @@ pub mod defaults {
|
||||
pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
|
||||
pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
|
||||
|
||||
pub const DEFAULT_MAX_DELTA_LAYERS: usize = 10;
|
||||
pub const DEFAULT_IMAGE_LAYER_GENERATION_THRESHOLD: usize = 50;
|
||||
|
||||
///
|
||||
/// Default built-in configuration file.
|
||||
///
|
||||
@@ -93,21 +90,6 @@ pub struct PageServerConf {
|
||||
pub page_cache_size: usize,
|
||||
pub max_file_descriptors: usize,
|
||||
|
||||
//
|
||||
// Minimal total size of delta layeres which triggers generation of image layer by checkpointer.
|
||||
// It is specified as percent of maximal sigment size (RELISH_SEG_SIZE).
|
||||
// I.e. it means that checkpoint will create image layer in addition to delta layer only when total size
|
||||
// of delta layers since last image layer exceeds specified percent of segment size.
|
||||
//
|
||||
pub image_layer_generation_threshold: usize,
|
||||
|
||||
//
|
||||
// Maximal number of delta layers which can be stored before image layere should be generated.
|
||||
// The garbage collector needs image layers in order to delete files.
|
||||
// If this number is too large it can result in too many small files on disk.
|
||||
//
|
||||
pub max_delta_layers: usize,
|
||||
|
||||
// Repository directory, relative to current working directory.
|
||||
// Normally, the page server changes the current working directory
|
||||
// to the repository, and 'workdir' is always '.'. But we don't do
|
||||
@@ -153,8 +135,6 @@ pub struct S3Config {
|
||||
pub bucket_name: String,
|
||||
/// The region where the bucket is located at.
|
||||
pub bucket_region: String,
|
||||
/// A "subfolder" in the bucket, to use the same bucket separately by multiple pageservers at once.
|
||||
pub prefix_in_bucket: Option<String>,
|
||||
/// "Login" to use when connecting to bucket.
|
||||
/// Can be empty for cases like AWS k8s IAM
|
||||
/// where we can allow certain pods to connect
|
||||
@@ -169,7 +149,6 @@ impl std::fmt::Debug for S3Config {
|
||||
f.debug_struct("S3Config")
|
||||
.field("bucket_name", &self.bucket_name)
|
||||
.field("bucket_region", &self.bucket_region)
|
||||
.field("prefix_in_bucket", &self.prefix_in_bucket)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
@@ -246,9 +225,6 @@ impl PageServerConf {
|
||||
page_cache_size: DEFAULT_PAGE_CACHE_SIZE,
|
||||
max_file_descriptors: DEFAULT_MAX_FILE_DESCRIPTORS,
|
||||
|
||||
max_delta_layers: DEFAULT_MAX_DELTA_LAYERS,
|
||||
image_layer_generation_threshold: DEFAULT_IMAGE_LAYER_GENERATION_THRESHOLD,
|
||||
|
||||
pg_distrib_dir: PathBuf::new(),
|
||||
auth_validation_public_key_path: None,
|
||||
auth_type: AuthType::Trust,
|
||||
@@ -271,10 +247,6 @@ impl PageServerConf {
|
||||
"max_file_descriptors" => {
|
||||
conf.max_file_descriptors = parse_toml_u64(key, item)? as usize
|
||||
}
|
||||
"max_delta_layers" => conf.max_delta_layers = parse_toml_u64(key, item)? as usize,
|
||||
"image_layer_generation_threshold" => {
|
||||
conf.image_layer_generation_threshold = parse_toml_u64(key, item)? as usize
|
||||
}
|
||||
"pg_distrib_dir" => {
|
||||
conf.pg_distrib_dir = PathBuf::from(parse_toml_string(key, item)?)
|
||||
}
|
||||
@@ -360,26 +332,18 @@ impl PageServerConf {
|
||||
bail!("'bucket_name' option is mandatory if 'bucket_region' is given ")
|
||||
}
|
||||
(None, Some(bucket_name), Some(bucket_region)) => RemoteStorageKind::AwsS3(S3Config {
|
||||
bucket_name: parse_toml_string("bucket_name", bucket_name)?,
|
||||
bucket_region: parse_toml_string("bucket_region", bucket_region)?,
|
||||
bucket_name: bucket_name.as_str().unwrap().to_string(),
|
||||
bucket_region: bucket_region.as_str().unwrap().to_string(),
|
||||
access_key_id: toml
|
||||
.get("access_key_id")
|
||||
.map(|access_key_id| parse_toml_string("access_key_id", access_key_id))
|
||||
.transpose()?,
|
||||
.map(|x| x.as_str().unwrap().to_string()),
|
||||
secret_access_key: toml
|
||||
.get("secret_access_key")
|
||||
.map(|secret_access_key| {
|
||||
parse_toml_string("secret_access_key", secret_access_key)
|
||||
})
|
||||
.transpose()?,
|
||||
prefix_in_bucket: toml
|
||||
.get("prefix_in_bucket")
|
||||
.map(|prefix_in_bucket| parse_toml_string("prefix_in_bucket", prefix_in_bucket))
|
||||
.transpose()?,
|
||||
.map(|x| x.as_str().unwrap().to_string()),
|
||||
}),
|
||||
(Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
|
||||
parse_toml_string("local_path", local_path)?,
|
||||
)),
|
||||
(Some(local_path), None, None) => {
|
||||
RemoteStorageKind::LocalFs(PathBuf::from(local_path.as_str().unwrap()))
|
||||
}
|
||||
(Some(_), Some(_), _) => bail!("local_path and bucket_name are mutually exclusive"),
|
||||
};
|
||||
|
||||
@@ -404,8 +368,6 @@ impl PageServerConf {
|
||||
gc_period: Duration::from_secs(10),
|
||||
page_cache_size: defaults::DEFAULT_PAGE_CACHE_SIZE,
|
||||
max_file_descriptors: defaults::DEFAULT_MAX_FILE_DESCRIPTORS,
|
||||
max_delta_layers: defaults::DEFAULT_MAX_DELTA_LAYERS,
|
||||
image_layer_generation_threshold: defaults::DEFAULT_IMAGE_LAYER_GENERATION_THRESHOLD,
|
||||
listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
|
||||
listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
|
||||
superuser: "zenith_admin".to_string(),
|
||||
@@ -477,9 +439,6 @@ gc_horizon = 222
|
||||
page_cache_size = 444
|
||||
max_file_descriptors = 333
|
||||
|
||||
max_delta_layers = 10
|
||||
image_layer_generation_threshold = 50
|
||||
|
||||
# initial superuser role name to use when creating a new tenant
|
||||
initial_superuser_name = 'zzzz'
|
||||
|
||||
@@ -510,9 +469,6 @@ initial_superuser_name = 'zzzz'
|
||||
superuser: defaults::DEFAULT_SUPERUSER.to_string(),
|
||||
page_cache_size: defaults::DEFAULT_PAGE_CACHE_SIZE,
|
||||
max_file_descriptors: defaults::DEFAULT_MAX_FILE_DESCRIPTORS,
|
||||
max_delta_layers: defaults::DEFAULT_MAX_DELTA_LAYERS,
|
||||
image_layer_generation_threshold:
|
||||
defaults::DEFAULT_IMAGE_LAYER_GENERATION_THRESHOLD,
|
||||
workdir,
|
||||
pg_distrib_dir,
|
||||
auth_type: AuthType::Trust,
|
||||
@@ -554,8 +510,6 @@ initial_superuser_name = 'zzzz'
|
||||
superuser: "zzzz".to_string(),
|
||||
page_cache_size: 444,
|
||||
max_file_descriptors: 333,
|
||||
max_delta_layers: 10,
|
||||
image_layer_generation_threshold: 50,
|
||||
workdir,
|
||||
pg_distrib_dir,
|
||||
auth_type: AuthType::Trust,
|
||||
@@ -631,7 +585,6 @@ pg_distrib_dir='{}'
|
||||
|
||||
let bucket_name = "some-sample-bucket".to_string();
|
||||
let bucket_region = "eu-north-1".to_string();
|
||||
let prefix_in_bucket = "test_prefix".to_string();
|
||||
let access_key_id = "SOMEKEYAAAAASADSAH*#".to_string();
|
||||
let secret_access_key = "SOMEsEcReTsd292v".to_string();
|
||||
let max_concurrent_sync = NonZeroUsize::new(111).unwrap();
|
||||
@@ -644,14 +597,13 @@ max_concurrent_sync = {}
|
||||
max_sync_errors = {}
|
||||
bucket_name = '{}'
|
||||
bucket_region = '{}'
|
||||
prefix_in_bucket = '{}'
|
||||
access_key_id = '{}'
|
||||
secret_access_key = '{}'"#,
|
||||
max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key
|
||||
max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, access_key_id, secret_access_key
|
||||
),
|
||||
format!(
|
||||
"remote_storage={{max_concurrent_sync={}, max_sync_errors={}, bucket_name='{}', bucket_region='{}', prefix_in_bucket='{}', access_key_id='{}', secret_access_key='{}'}}",
|
||||
max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key
|
||||
"remote_storage={{max_concurrent_sync = {}, max_sync_errors = {}, bucket_name='{}', bucket_region='{}', access_key_id='{}', secret_access_key='{}'}}",
|
||||
max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, access_key_id, secret_access_key
|
||||
),
|
||||
];
|
||||
|
||||
@@ -685,7 +637,6 @@ pg_distrib_dir='{}'
|
||||
bucket_region: bucket_region.clone(),
|
||||
access_key_id: Some(access_key_id.clone()),
|
||||
secret_access_key: Some(secret_access_key.clone()),
|
||||
prefix_in_bucket: Some(prefix_in_bucket.clone())
|
||||
}),
|
||||
},
|
||||
"Remote storage config should correctly parse the S3 config"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use hyper::header;
|
||||
use hyper::StatusCode;
|
||||
use hyper::{Body, Request, Response, Uri};
|
||||
@@ -190,27 +190,18 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(tag = "type")]
|
||||
enum TimelineInfo {
|
||||
Local {
|
||||
#[serde(with = "hex")]
|
||||
timeline_id: ZTimelineId,
|
||||
#[serde(with = "hex")]
|
||||
tenant_id: ZTenantId,
|
||||
#[serde(with = "opt_display_serde")]
|
||||
ancestor_timeline_id: Option<ZTimelineId>,
|
||||
last_record_lsn: Lsn,
|
||||
prev_record_lsn: Lsn,
|
||||
start_lsn: Lsn,
|
||||
disk_consistent_lsn: Lsn,
|
||||
timeline_state: Option<TimelineSyncState>,
|
||||
},
|
||||
Remote {
|
||||
#[serde(with = "hex")]
|
||||
timeline_id: ZTimelineId,
|
||||
#[serde(with = "hex")]
|
||||
tenant_id: ZTenantId,
|
||||
},
|
||||
struct TimelineInfo {
|
||||
#[serde(with = "hex")]
|
||||
timeline_id: ZTimelineId,
|
||||
#[serde(with = "hex")]
|
||||
tenant_id: ZTenantId,
|
||||
#[serde(with = "opt_display_serde")]
|
||||
ancestor_timeline_id: Option<ZTimelineId>,
|
||||
last_record_lsn: Lsn,
|
||||
prev_record_lsn: Lsn,
|
||||
start_lsn: Lsn,
|
||||
disk_consistent_lsn: Lsn,
|
||||
timeline_state: Option<TimelineSyncState>,
|
||||
}
|
||||
|
||||
async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
@@ -224,12 +215,9 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
info_span!("timeline_detail_handler", tenant = %tenant_id, timeline = %timeline_id)
|
||||
.entered();
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
|
||||
Ok::<_, anyhow::Error>(match repo.get_timeline(timeline_id)?.local_timeline() {
|
||||
None => TimelineInfo::Remote {
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
},
|
||||
Some(timeline) => TimelineInfo::Local {
|
||||
match repo.get_timeline(timeline_id)?.local_timeline() {
|
||||
None => bail!("Timeline with id {} is not present locally", timeline_id),
|
||||
Some(timeline) => Ok::<_, anyhow::Error>(TimelineInfo {
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
|
||||
@@ -238,8 +226,8 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
prev_record_lsn: timeline.get_prev_record_lsn(),
|
||||
start_lsn: timeline.get_start_lsn(),
|
||||
timeline_state: repo.get_timeline_state(timeline_id),
|
||||
},
|
||||
})
|
||||
}),
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
@@ -41,7 +41,6 @@ use crate::repository::{
|
||||
TimelineWriter, ZenithWalRecord,
|
||||
};
|
||||
use crate::tenant_mgr;
|
||||
use crate::virtual_file::VirtualFile;
|
||||
use crate::walreceiver;
|
||||
use crate::walreceiver::IS_WAL_RECEIVER;
|
||||
use crate::walredo::WalRedoManager;
|
||||
@@ -368,7 +367,7 @@ fn shutdown_timeline(
|
||||
timeline
|
||||
.upload_relishes
|
||||
.store(false, atomic::Ordering::Relaxed);
|
||||
walreceiver::stop_wal_receiver(tenant_id, timeline_id);
|
||||
walreceiver::stop_wal_receiver(timeline_id);
|
||||
trace!("repo shutdown. checkpoint timeline {}", timeline_id);
|
||||
// Do not reconstruct pages to reduce shutdown time
|
||||
timeline.checkpoint(CheckpointConfig::Flush)?;
|
||||
@@ -515,10 +514,10 @@ impl LayeredRepository {
|
||||
let _enter = info_span!("saving metadata").entered();
|
||||
let path = metadata_path(conf, timelineid, tenantid);
|
||||
// use OpenOptions to ensure file presence is consistent with first_save
|
||||
let mut file = VirtualFile::open_with_options(
|
||||
&path,
|
||||
OpenOptions::new().write(true).create_new(first_save),
|
||||
)?;
|
||||
let mut file = OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(first_save)
|
||||
.open(&path)?;
|
||||
|
||||
let metadata_bytes = data.to_bytes().context("Failed to get metadata bytes")?;
|
||||
|
||||
@@ -623,6 +622,7 @@ impl LayeredRepository {
|
||||
};
|
||||
|
||||
if let Some(ancestor_timeline) = &timeline.ancestor_timeline {
|
||||
drop(timelines);
|
||||
let ancestor_timeline =
|
||||
match ancestor_timeline.local_or_schedule_download(self.tenantid) {
|
||||
Some(timeline) => timeline,
|
||||
@@ -646,6 +646,7 @@ impl LayeredRepository {
|
||||
else {
|
||||
all_branchpoints.insert((ancestor_timeline.timelineid, timeline.ancestor_lsn));
|
||||
}
|
||||
timelines = self.timelines.lock().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -674,7 +675,6 @@ impl LayeredRepository {
|
||||
}
|
||||
|
||||
if let Some(cutoff) = timeline.get_last_record_lsn().checked_sub(horizon) {
|
||||
drop(timelines);
|
||||
let branchpoints: Vec<Lsn> = all_branchpoints
|
||||
.range((
|
||||
Included((timelineid, Lsn(0))),
|
||||
@@ -694,7 +694,6 @@ impl LayeredRepository {
|
||||
let result = timeline.gc_timeline(branchpoints, cutoff)?;
|
||||
|
||||
totals += result;
|
||||
timelines = self.timelines.lock().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -772,12 +771,6 @@ pub struct LayeredTimeline {
|
||||
/// to avoid deadlock.
|
||||
write_lock: Mutex<()>,
|
||||
|
||||
// Prevent concurrent checkpoints.
|
||||
// Checkpoints are normally performed by one thread. But checkpoint can also be manually requested by admin
|
||||
// (that's used in tests), and shutdown also forces a checkpoint. These forced checkpoints run in a different thread
|
||||
// and could be triggered at the same time as a normal checkpoint.
|
||||
checkpoint_cs: Mutex<()>,
|
||||
|
||||
// Needed to ensure that we can't create a branch at a point that was already garbage collected
|
||||
latest_gc_cutoff_lsn: AtomicLsn,
|
||||
|
||||
@@ -1137,7 +1130,6 @@ impl LayeredTimeline {
|
||||
upload_relishes: AtomicBool::new(upload_relishes),
|
||||
|
||||
write_lock: Mutex::new(()),
|
||||
checkpoint_cs: Mutex::new(()),
|
||||
|
||||
latest_gc_cutoff_lsn: AtomicLsn::from(metadata.latest_gc_cutoff_lsn()),
|
||||
initdb_lsn: metadata.initdb_lsn(),
|
||||
@@ -1455,9 +1447,6 @@ impl LayeredTimeline {
|
||||
///
|
||||
/// NOTE: This has nothing to do with checkpoint in PostgreSQL.
|
||||
fn checkpoint_internal(&self, checkpoint_distance: u64, reconstruct_pages: bool) -> Result<()> {
|
||||
// Prevent concurrent checkpoints
|
||||
let _checkpoint_cs = self.checkpoint_cs.lock().unwrap();
|
||||
|
||||
let mut write_guard = self.write_lock.lock().unwrap();
|
||||
let mut layers = self.layers.lock().unwrap();
|
||||
|
||||
@@ -1598,7 +1587,7 @@ impl LayeredTimeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evict_layer(&self, layer_id: LayerId, mut reconstruct_pages: bool) -> Result<Vec<PathBuf>> {
|
||||
fn evict_layer(&self, layer_id: LayerId, reconstruct_pages: bool) -> Result<Vec<PathBuf>> {
|
||||
// Mark the layer as no longer accepting writes and record the end_lsn.
|
||||
// This happens in-place, no new layers are created now.
|
||||
// We call `get_last_record_lsn` again, which may be different from the
|
||||
@@ -1611,27 +1600,8 @@ impl LayeredTimeline {
|
||||
|
||||
let global_layer_map = GLOBAL_LAYER_MAP.read().unwrap();
|
||||
if let Some(oldest_layer) = global_layer_map.get(&layer_id) {
|
||||
let last_lsn = self.get_last_record_lsn();
|
||||
// Avoid creation of image layers if there are not so much deltas
|
||||
if reconstruct_pages
|
||||
&& oldest_layer.get_seg_tag().rel.is_blocky()
|
||||
&& self.conf.image_layer_generation_threshold != 0
|
||||
{
|
||||
let (n_delta_layers, total_delta_size) =
|
||||
layers.count_delta_layers(oldest_layer.get_seg_tag(), last_lsn)?;
|
||||
let logical_segment_size =
|
||||
oldest_layer.get_seg_size(last_lsn)? as u64 * BLCKSZ as u64;
|
||||
let physical_deltas_size = total_delta_size + oldest_layer.get_physical_size()?;
|
||||
if logical_segment_size * self.conf.image_layer_generation_threshold as u64
|
||||
> physical_deltas_size * 100
|
||||
&& n_delta_layers < self.conf.max_delta_layers
|
||||
{
|
||||
reconstruct_pages = false;
|
||||
}
|
||||
}
|
||||
|
||||
drop(global_layer_map);
|
||||
oldest_layer.freeze(last_lsn);
|
||||
oldest_layer.freeze(self.get_last_record_lsn());
|
||||
|
||||
// The layer is no longer open, update the layer map to reflect this.
|
||||
// We will replace it with on-disk historics below.
|
||||
|
||||
@@ -161,14 +161,6 @@ pub struct DeltaLayerInner {
|
||||
}
|
||||
|
||||
impl DeltaLayerInner {
|
||||
fn get_physical_size(&self) -> Result<u64> {
|
||||
Ok(if let Some(book) = &self.book {
|
||||
book.chapter_reader(PAGE_VERSIONS_CHAPTER)?.len()
|
||||
} else {
|
||||
0
|
||||
})
|
||||
}
|
||||
|
||||
fn get_seg_size(&self, lsn: Lsn) -> Result<SegmentBlk> {
|
||||
// Scan the VecMap backwards, starting from the given entry.
|
||||
let slice = self
|
||||
@@ -297,12 +289,6 @@ impl Layer for DeltaLayer {
|
||||
}
|
||||
}
|
||||
|
||||
// Get physical size of the layer
|
||||
fn get_physical_size(&self) -> Result<u64> {
|
||||
// TODO: is it actually necessary to load layer to get it's size?
|
||||
self.load()?.get_physical_size()
|
||||
}
|
||||
|
||||
/// Get size of the relation at given LSN
|
||||
fn get_seg_size(&self, lsn: Lsn) -> Result<SegmentBlk> {
|
||||
assert!(lsn >= self.start_lsn);
|
||||
|
||||
@@ -41,7 +41,7 @@ pub struct EphemeralFile {
|
||||
_timelineid: ZTimelineId,
|
||||
file: Arc<VirtualFile>,
|
||||
|
||||
pub pos: u64,
|
||||
pos: u64,
|
||||
}
|
||||
|
||||
impl EphemeralFile {
|
||||
|
||||
@@ -201,11 +201,6 @@ impl Layer for ImageLayer {
|
||||
}
|
||||
}
|
||||
|
||||
// Get physical size of the layer
|
||||
fn get_physical_size(&self) -> Result<u64> {
|
||||
Ok(self.get_seg_size(Lsn(0))? as u64 * BLOCK_SIZE as u64)
|
||||
}
|
||||
|
||||
/// Does this segment exist at given LSN?
|
||||
fn get_seg_exists(&self, _lsn: Lsn) -> Result<bool> {
|
||||
Ok(true)
|
||||
|
||||
@@ -80,10 +80,6 @@ impl InMemoryLayerInner {
|
||||
assert!(self.end_lsn.is_none());
|
||||
}
|
||||
|
||||
fn get_physical_size(&self) -> u64 {
|
||||
self.page_versions.size()
|
||||
}
|
||||
|
||||
fn get_seg_size(&self, lsn: Lsn) -> SegmentBlk {
|
||||
// Scan the BTreeMap backwards, starting from the given entry.
|
||||
let slice = self.seg_sizes.slice_range(..=lsn);
|
||||
@@ -225,12 +221,7 @@ impl Layer for InMemoryLayer {
|
||||
}
|
||||
}
|
||||
|
||||
// Get physical size of the layer
|
||||
fn get_physical_size(&self) -> Result<u64> {
|
||||
Ok(self.inner.read().unwrap().get_physical_size() as u64)
|
||||
}
|
||||
|
||||
/// Get logical size of the relation at given LSN
|
||||
/// Get size of the relation at given LSN
|
||||
fn get_seg_size(&self, lsn: Lsn) -> Result<SegmentBlk> {
|
||||
assert!(lsn >= self.start_lsn);
|
||||
ensure!(
|
||||
@@ -625,7 +616,7 @@ impl InMemoryLayer {
|
||||
let image_lsn: Option<Lsn>;
|
||||
let delta_end_lsn: Option<Lsn>;
|
||||
if self.is_dropped() || !reconstruct_pages {
|
||||
// Create just a delta layer containing all the
|
||||
// The segment was dropped. Create just a delta layer containing all the
|
||||
// changes up to and including the drop.
|
||||
delta_end_lsn = Some(end_lsn_exclusive);
|
||||
image_lsn = None;
|
||||
|
||||
@@ -111,14 +111,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterate over all items with start bound <= 'key'
|
||||
pub fn iter_older(&self, key: I::Key) -> IntervalIter<I> {
|
||||
IntervalIter {
|
||||
point_iter: self.points.range(..key),
|
||||
elem_iter: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterate over all items
|
||||
pub fn iter(&self) -> IntervalIter<I> {
|
||||
IntervalIter {
|
||||
@@ -238,35 +230,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, I> DoubleEndedIterator for IntervalIter<'a, I>
|
||||
where
|
||||
I: IntervalItem + ?Sized,
|
||||
{
|
||||
fn next_back(&mut self) -> Option<Self::Item> {
|
||||
// Iterate over all elements in all the points in 'point_iter'. To avoid
|
||||
// returning the same element twice, we only return each element at its
|
||||
// starting point.
|
||||
loop {
|
||||
// Return next remaining element from the current point
|
||||
if let Some((point_key, elem_iter)) = &mut self.elem_iter {
|
||||
while let Some(elem) = elem_iter.next_back() {
|
||||
if elem.start_key() == *point_key {
|
||||
return Some(Arc::clone(elem));
|
||||
}
|
||||
}
|
||||
}
|
||||
// No more elements at this point. Move to next point.
|
||||
if let Some((point_key, point)) = self.point_iter.next_back() {
|
||||
self.elem_iter = Some((*point_key, point.elements.iter()));
|
||||
continue;
|
||||
} else {
|
||||
// No more points, all done
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: ?Sized> Default for IntervalTree<I>
|
||||
where
|
||||
I: IntervalItem,
|
||||
|
||||
@@ -199,14 +199,6 @@ impl LayerMap {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn count_delta_layers(&self, seg: SegmentTag, lsn: Lsn) -> Result<(usize, u64)> {
|
||||
if let Some(segentry) = self.segs.get(&seg) {
|
||||
segentry.count_delta_layers(lsn)
|
||||
} else {
|
||||
Ok((0, 0))
|
||||
}
|
||||
}
|
||||
|
||||
/// Is there any layer for given segment that is alive at the lsn?
|
||||
///
|
||||
/// This is a public wrapper for SegEntry fucntion,
|
||||
@@ -328,22 +320,6 @@ impl SegEntry {
|
||||
.any(|layer| !layer.is_incremental())
|
||||
}
|
||||
|
||||
// Count number of delta layers preceeding specified `lsn`.
|
||||
// Perform backward iteration from exclusive upper bound until image layer is reached.
|
||||
pub fn count_delta_layers(&self, lsn: Lsn) -> Result<(usize, u64)> {
|
||||
let mut count: usize = 0;
|
||||
let mut total_size: u64 = 0;
|
||||
let mut iter = self.historic.iter_older(lsn);
|
||||
while let Some(layer) = iter.next_back() {
|
||||
if !layer.is_incremental() {
|
||||
break;
|
||||
}
|
||||
count += 1;
|
||||
total_size += layer.get_physical_size()?;
|
||||
}
|
||||
Ok((count, total_size))
|
||||
}
|
||||
|
||||
// Set new open layer for a SegEntry.
|
||||
// It's ok to rewrite previous open layer,
|
||||
// but only if it is not writeable anymore.
|
||||
|
||||
@@ -39,10 +39,6 @@ impl PageVersions {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
self.file.pos
|
||||
}
|
||||
|
||||
pub fn append_or_update_last(
|
||||
&mut self,
|
||||
blknum: u32,
|
||||
|
||||
@@ -154,15 +154,12 @@ pub trait Layer: Send + Sync {
|
||||
reconstruct_data: &mut PageReconstructData,
|
||||
) -> Result<PageReconstructResult>;
|
||||
|
||||
/// Return logical size of the segment at given LSN. (Only for blocky relations.)
|
||||
/// Return size of the segment at given LSN. (Only for blocky relations.)
|
||||
fn get_seg_size(&self, lsn: Lsn) -> Result<SegmentBlk>;
|
||||
|
||||
/// Does the segment exist at given LSN? Or was it dropped before it.
|
||||
fn get_seg_exists(&self, lsn: Lsn) -> Result<bool>;
|
||||
|
||||
// Get physical size of the layer
|
||||
fn get_physical_size(&self) -> Result<u64>;
|
||||
|
||||
/// Does this layer only contain some data for the segment (incremental),
|
||||
/// or does it contain a version of every page? This is important to know
|
||||
/// for garbage collecting old layers: an incremental layer depends on
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use lazy_static::lazy_static;
|
||||
use postgres_ffi::pg_constants::BLCKSZ;
|
||||
use regex::Regex;
|
||||
use std::net::TcpListener;
|
||||
use std::str;
|
||||
@@ -43,8 +42,6 @@ use crate::tenant_mgr;
|
||||
use crate::walreceiver;
|
||||
use crate::CheckpointConfig;
|
||||
|
||||
const CHUNK_SIZE: u32 = 128; // 1Mb
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
enum PagestreamFeMessage {
|
||||
Exists(PagestreamExistsRequest),
|
||||
@@ -94,8 +91,7 @@ struct PagestreamNblocksResponse {
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PagestreamGetPageResponse {
|
||||
n_blocks: u32,
|
||||
data: Bytes,
|
||||
page: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -166,8 +162,7 @@ impl PagestreamBeMessage {
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(102); /* tag from pagestore_client.h */
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
bytes.put(&resp.data[..]);
|
||||
bytes.put(&resp.page[..]);
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
@@ -443,18 +438,11 @@ impl PageServerHandler {
|
||||
.entered();
|
||||
let tag = RelishTag::Relation(req.rel);
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
|
||||
let rel_size = timeline.get_relish_size(tag, lsn)?.unwrap_or(0);
|
||||
let blkno = req.blkno;
|
||||
let n_blocks = u32::min(blkno + CHUNK_SIZE, rel_size) - blkno;
|
||||
let mut data = BytesMut::with_capacity(n_blocks as usize * BLCKSZ as usize);
|
||||
for i in 0..n_blocks {
|
||||
let page = timeline.get_page_at_lsn(tag, blkno + i, lsn)?;
|
||||
data.extend_from_slice(&page);
|
||||
}
|
||||
assert!(data.len() == n_blocks as usize * BLCKSZ as usize);
|
||||
|
||||
let page = timeline.get_page_at_lsn(tag, req.blkno, lsn)?;
|
||||
|
||||
Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
|
||||
n_blocks,
|
||||
data: data.freeze(),
|
||||
page,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -606,7 +594,7 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)
|
||||
.context("Failed to fetch local timeline for callmemaybe requests")?;
|
||||
|
||||
walreceiver::launch_wal_receiver(self.conf, tenantid, timelineid, &connstr);
|
||||
walreceiver::launch_wal_receiver(self.conf, timelineid, &connstr, tenantid.to_owned());
|
||||
|
||||
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
} else if query_string.starts_with("branch_create ") {
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
//! There are a few components the storage machinery consists of:
|
||||
//! * [`RemoteStorage`] trait a CRUD-like generic abstraction to use for adapting external storages with a few implementations:
|
||||
//! * [`local_fs`] allows to use local file system as an external storage
|
||||
//! * [`rust_s3`] uses AWS S3 bucket as an external storage
|
||||
//! * [`rust_s3`] uses AWS S3 bucket entirely as an external storage
|
||||
//!
|
||||
//! * synchronization logic at [`storage_sync`] module that keeps pageserver state (both runtime one and the workdir files) and storage state in sync.
|
||||
//! Synchronization internals are split into submodules
|
||||
@@ -93,7 +93,7 @@ use std::{
|
||||
};
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use tokio::{io, sync};
|
||||
use tokio::io;
|
||||
use tracing::{error, info};
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
@@ -135,7 +135,6 @@ pub struct SyncStartupData {
|
||||
/// Along with that, scans tenant files local and remote (if the sync gets enabled) to check the initial timeline states.
|
||||
pub fn start_local_timeline_sync(
|
||||
config: &'static PageServerConf,
|
||||
shutdown_hook: sync::watch::Receiver<()>,
|
||||
) -> anyhow::Result<SyncStartupData> {
|
||||
let local_timeline_files = local_tenant_timeline_files(config)
|
||||
.context("Failed to collect local tenant timeline files")?;
|
||||
@@ -143,7 +142,6 @@ pub fn start_local_timeline_sync(
|
||||
match &config.remote_storage_config {
|
||||
Some(storage_config) => match &storage_config.storage {
|
||||
RemoteStorageKind::LocalFs(root) => storage_sync::spawn_storage_sync_thread(
|
||||
shutdown_hook,
|
||||
config,
|
||||
local_timeline_files,
|
||||
LocalFs::new(root.clone(), &config.workdir)?,
|
||||
@@ -151,7 +149,6 @@ pub fn start_local_timeline_sync(
|
||||
storage_config.max_sync_errors,
|
||||
),
|
||||
RemoteStorageKind::AwsS3(s3_config) => storage_sync::spawn_storage_sync_thread(
|
||||
shutdown_hook,
|
||||
config,
|
||||
local_timeline_files,
|
||||
S3::new(s3_config, &config.workdir)?,
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
//! AWS S3 storage wrapper around `rust_s3` library.
|
||||
//!
|
||||
//! Respects `prefix_in_bucket` property from [`S3Config`],
|
||||
//! allowing multiple pageservers to independently work with the same S3 bucket, if
|
||||
//! their bucket prefixes are both specified and different.
|
||||
//! Currently does not allow multiple pageservers to use the same bucket concurrently: objects are
|
||||
//! placed in the root of the bucket.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
@@ -25,26 +23,8 @@ impl S3ObjectKey {
|
||||
&self.0
|
||||
}
|
||||
|
||||
fn download_destination(
|
||||
&self,
|
||||
pageserver_workdir: &Path,
|
||||
prefix_to_strip: Option<&str>,
|
||||
) -> PathBuf {
|
||||
let path_without_prefix = match prefix_to_strip {
|
||||
Some(prefix) => self.0.strip_prefix(prefix).unwrap_or_else(|| {
|
||||
panic!(
|
||||
"Could not strip prefix '{}' from S3 object key '{}'",
|
||||
prefix, self.0
|
||||
)
|
||||
}),
|
||||
None => &self.0,
|
||||
};
|
||||
|
||||
pageserver_workdir.join(
|
||||
path_without_prefix
|
||||
.split(S3_FILE_SEPARATOR)
|
||||
.collect::<PathBuf>(),
|
||||
)
|
||||
fn download_destination(&self, pageserver_workdir: &Path) -> PathBuf {
|
||||
pageserver_workdir.join(self.0.split(S3_FILE_SEPARATOR).collect::<PathBuf>())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,7 +32,6 @@ impl S3ObjectKey {
|
||||
pub struct S3 {
|
||||
pageserver_workdir: &'static Path,
|
||||
bucket: Bucket,
|
||||
prefix_in_bucket: Option<String>,
|
||||
}
|
||||
|
||||
impl S3 {
|
||||
@@ -70,20 +49,6 @@ impl S3 {
|
||||
None,
|
||||
)
|
||||
.context("Failed to create the s3 credentials")?;
|
||||
|
||||
let prefix_in_bucket = aws_config.prefix_in_bucket.as_deref().map(|prefix| {
|
||||
let mut prefix = prefix;
|
||||
while prefix.starts_with(S3_FILE_SEPARATOR) {
|
||||
prefix = &prefix[1..]
|
||||
}
|
||||
|
||||
let mut prefix = prefix.to_string();
|
||||
while prefix.ends_with(S3_FILE_SEPARATOR) {
|
||||
prefix.pop();
|
||||
}
|
||||
prefix
|
||||
});
|
||||
|
||||
Ok(Self {
|
||||
bucket: Bucket::new_with_path_style(
|
||||
aws_config.bucket_name.as_str(),
|
||||
@@ -92,7 +57,6 @@ impl S3 {
|
||||
)
|
||||
.context("Failed to create the s3 bucket")?,
|
||||
pageserver_workdir,
|
||||
prefix_in_bucket,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -103,7 +67,7 @@ impl RemoteStorage for S3 {
|
||||
|
||||
fn storage_path(&self, local_path: &Path) -> anyhow::Result<Self::StoragePath> {
|
||||
let relative_path = strip_path_prefix(self.pageserver_workdir, local_path)?;
|
||||
let mut key = self.prefix_in_bucket.clone().unwrap_or_default();
|
||||
let mut key = String::new();
|
||||
for segment in relative_path {
|
||||
key.push(S3_FILE_SEPARATOR);
|
||||
key.push_str(&segment.to_string_lossy());
|
||||
@@ -112,14 +76,13 @@ impl RemoteStorage for S3 {
|
||||
}
|
||||
|
||||
fn local_path(&self, storage_path: &Self::StoragePath) -> anyhow::Result<PathBuf> {
|
||||
Ok(storage_path
|
||||
.download_destination(self.pageserver_workdir, self.prefix_in_bucket.as_deref()))
|
||||
Ok(storage_path.download_destination(self.pageserver_workdir))
|
||||
}
|
||||
|
||||
async fn list(&self) -> anyhow::Result<Vec<Self::StoragePath>> {
|
||||
let list_response = self
|
||||
.bucket
|
||||
.list(self.prefix_in_bucket.clone().unwrap_or_default(), None)
|
||||
.list(String::new(), None)
|
||||
.await
|
||||
.context("Failed to list s3 objects")?;
|
||||
|
||||
@@ -262,7 +225,7 @@ mod tests {
|
||||
|
||||
assert_eq!(
|
||||
local_path,
|
||||
key.download_destination(&repo_harness.conf.workdir, None),
|
||||
key.download_destination(&repo_harness.conf.workdir),
|
||||
"Download destination should consist of s3 path joined with the pageserver workdir prefix"
|
||||
);
|
||||
|
||||
@@ -276,18 +239,14 @@ mod tests {
|
||||
let segment_1 = "matching";
|
||||
let segment_2 = "file";
|
||||
let local_path = &repo_harness.conf.workdir.join(segment_1).join(segment_2);
|
||||
|
||||
let storage = dummy_storage(&repo_harness.conf.workdir);
|
||||
|
||||
let expected_key = S3ObjectKey(format!(
|
||||
"{}{SEPARATOR}{}{SEPARATOR}{}",
|
||||
storage.prefix_in_bucket.as_deref().unwrap_or_default(),
|
||||
"{SEPARATOR}{}{SEPARATOR}{}",
|
||||
segment_1,
|
||||
segment_2,
|
||||
SEPARATOR = S3_FILE_SEPARATOR,
|
||||
));
|
||||
|
||||
let actual_key = storage
|
||||
let actual_key = dummy_storage(&repo_harness.conf.workdir)
|
||||
.storage_path(local_path)
|
||||
.expect("Matching path should map to S3 path normally");
|
||||
assert_eq!(
|
||||
@@ -349,30 +308,18 @@ mod tests {
|
||||
let timeline_dir = repo_harness.timeline_path(&TIMELINE_ID);
|
||||
let relative_timeline_path = timeline_dir.strip_prefix(&repo_harness.conf.workdir)?;
|
||||
|
||||
let s3_key = create_s3_key(
|
||||
&relative_timeline_path.join("not a metadata"),
|
||||
storage.prefix_in_bucket.as_deref(),
|
||||
);
|
||||
let s3_key = create_s3_key(&relative_timeline_path.join("not a metadata"));
|
||||
assert_eq!(
|
||||
s3_key.download_destination(
|
||||
&repo_harness.conf.workdir,
|
||||
storage.prefix_in_bucket.as_deref()
|
||||
),
|
||||
s3_key.download_destination(&repo_harness.conf.workdir),
|
||||
storage
|
||||
.local_path(&s3_key)
|
||||
.expect("For a valid input, valid S3 info should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote delta file"
|
||||
);
|
||||
|
||||
let s3_key = create_s3_key(
|
||||
&relative_timeline_path.join(METADATA_FILE_NAME),
|
||||
storage.prefix_in_bucket.as_deref(),
|
||||
);
|
||||
let s3_key = create_s3_key(&relative_timeline_path.join(METADATA_FILE_NAME));
|
||||
assert_eq!(
|
||||
s3_key.download_destination(
|
||||
&repo_harness.conf.workdir,
|
||||
storage.prefix_in_bucket.as_deref()
|
||||
),
|
||||
s3_key.download_destination(&repo_harness.conf.workdir),
|
||||
storage
|
||||
.local_path(&s3_key)
|
||||
.expect("For a valid input, valid S3 info should be parsed"),
|
||||
@@ -409,18 +356,18 @@ mod tests {
|
||||
Credentials::anonymous().unwrap(),
|
||||
)
|
||||
.unwrap(),
|
||||
prefix_in_bucket: Some("dummy_prefix/".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_s3_key(relative_file_path: &Path, prefix: Option<&str>) -> S3ObjectKey {
|
||||
S3ObjectKey(relative_file_path.iter().fold(
|
||||
prefix.unwrap_or_default().to_string(),
|
||||
|mut path_string, segment| {
|
||||
path_string.push(S3_FILE_SEPARATOR);
|
||||
path_string.push_str(segment.to_str().unwrap());
|
||||
path_string
|
||||
},
|
||||
))
|
||||
fn create_s3_key(relative_file_path: &Path) -> S3ObjectKey {
|
||||
S3ObjectKey(
|
||||
relative_file_path
|
||||
.iter()
|
||||
.fold(String::new(), |mut path_string, segment| {
|
||||
path_string.push(S3_FILE_SEPARATOR);
|
||||
path_string.push_str(segment.to_str().unwrap());
|
||||
path_string
|
||||
}),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,15 +86,10 @@ use std::{
|
||||
use anyhow::{bail, Context};
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use lazy_static::lazy_static;
|
||||
use tokio::{fs, sync::RwLock};
|
||||
use tokio::{
|
||||
fs,
|
||||
runtime::Runtime,
|
||||
sync::{
|
||||
mpsc::{self, UnboundedReceiver},
|
||||
watch::Receiver,
|
||||
RwLock,
|
||||
},
|
||||
time::{Duration, Instant},
|
||||
sync::mpsc::{self, UnboundedReceiver},
|
||||
time::Instant,
|
||||
};
|
||||
use tracing::*;
|
||||
|
||||
@@ -351,7 +346,6 @@ pub(super) fn spawn_storage_sync_thread<
|
||||
P: std::fmt::Debug + Send + Sync + 'static,
|
||||
S: RemoteStorage<StoragePath = P> + Send + Sync + 'static,
|
||||
>(
|
||||
shutdown_hook: Receiver<()>,
|
||||
conf: &'static PageServerConf,
|
||||
local_timeline_files: HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>,
|
||||
storage: S,
|
||||
@@ -390,7 +384,6 @@ pub(super) fn spawn_storage_sync_thread<
|
||||
.spawn(move || {
|
||||
storage_sync_loop(
|
||||
runtime,
|
||||
shutdown_hook,
|
||||
conf,
|
||||
receiver,
|
||||
remote_index,
|
||||
@@ -406,18 +399,11 @@ pub(super) fn spawn_storage_sync_thread<
|
||||
})
|
||||
}
|
||||
|
||||
enum LoopStep {
|
||||
NewStates(HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncState>>),
|
||||
Shutdown,
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn storage_sync_loop<
|
||||
P: std::fmt::Debug + Send + Sync + 'static,
|
||||
S: RemoteStorage<StoragePath = P> + Send + Sync + 'static,
|
||||
>(
|
||||
runtime: Runtime,
|
||||
mut shutdown_hook: Receiver<()>,
|
||||
runtime: tokio::runtime::Runtime,
|
||||
conf: &'static PageServerConf,
|
||||
mut receiver: UnboundedReceiver<SyncTask>,
|
||||
index: RemoteTimelineIndex,
|
||||
@@ -426,34 +412,23 @@ fn storage_sync_loop<
|
||||
max_sync_errors: NonZeroU32,
|
||||
) -> anyhow::Result<()> {
|
||||
let remote_assets = Arc::new((storage, RwLock::new(index)));
|
||||
loop {
|
||||
let loop_step = runtime.block_on(async {
|
||||
tokio::select! {
|
||||
new_timeline_states = loop_step(
|
||||
conf,
|
||||
&mut receiver,
|
||||
Arc::clone(&remote_assets),
|
||||
max_concurrent_sync,
|
||||
max_sync_errors,
|
||||
)
|
||||
.instrument(debug_span!("storage_sync_loop_step")) => LoopStep::NewStates(new_timeline_states),
|
||||
_ = shutdown_hook.changed() => LoopStep::Shutdown,
|
||||
}
|
||||
});
|
||||
|
||||
match loop_step {
|
||||
LoopStep::NewStates(new_timeline_states) => {
|
||||
// Batch timeline download registration to ensure that the external registration code won't block any running tasks before.
|
||||
set_timeline_states(conf, new_timeline_states);
|
||||
debug!("Sync loop step completed");
|
||||
}
|
||||
LoopStep::Shutdown => {
|
||||
debug!("Shutdown requested, stopping");
|
||||
break;
|
||||
}
|
||||
}
|
||||
while !crate::tenant_mgr::shutdown_requested() {
|
||||
let new_timeline_states = runtime.block_on(
|
||||
loop_step(
|
||||
conf,
|
||||
&mut receiver,
|
||||
Arc::clone(&remote_assets),
|
||||
max_concurrent_sync,
|
||||
max_sync_errors,
|
||||
)
|
||||
.instrument(debug_span!("storage_sync_loop_step")),
|
||||
);
|
||||
// Batch timeline download registration to ensure that the external registration code won't block any running tasks before.
|
||||
set_timeline_states(conf, new_timeline_states);
|
||||
debug!("Sync loop step completed");
|
||||
}
|
||||
|
||||
debug!("Shutdown requested, stopping");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -564,7 +539,7 @@ async fn process_task<
|
||||
"Waiting {} seconds before starting the task",
|
||||
seconds_to_wait
|
||||
);
|
||||
tokio::time::sleep(Duration::from_secs_f64(seconds_to_wait)).await;
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs_f64(seconds_to_wait)).await;
|
||||
}
|
||||
|
||||
let sync_start = Instant::now();
|
||||
|
||||
@@ -202,7 +202,7 @@ async fn try_download_archive<
|
||||
archive_to_download.disk_consistent_lsn(),
|
||||
local_metadata.disk_consistent_lsn()
|
||||
),
|
||||
Err(e) => warn!("Failed to read local metadata file, assuming it's safe to override its with the download. Read: {:#}", e),
|
||||
Err(e) => warn!("Failed to read local metadata file, assuing it's safe to override its with the download. Read: {:#}", e),
|
||||
}
|
||||
compression::uncompress_file_stream_with_index(
|
||||
conf.timeline_path(&timeline_id, &tenant_id),
|
||||
|
||||
@@ -198,7 +198,7 @@ pub fn get_repository_for_tenant(tenantid: ZTenantId) -> Result<Arc<dyn Reposito
|
||||
|
||||
match &tenant.repo {
|
||||
Some(repo) => Ok(Arc::clone(repo)),
|
||||
None => bail!("Repository for tenant {} is not yet valid", tenantid),
|
||||
None => anyhow::bail!("Repository for tenant {} is not yet valid", tenantid),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,46 +10,15 @@
|
||||
//! This is similar to PostgreSQL's virtual file descriptor facility in
|
||||
//! src/backend/storage/file/fd.c
|
||||
//!
|
||||
use lazy_static::lazy_static;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::{Error, ErrorKind, Read, Seek, SeekFrom, Write};
|
||||
use std::os::unix::fs::FileExt;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use std::sync::{RwLock, RwLockWriteGuard};
|
||||
use zenith_metrics::{register_histogram_vec, register_int_gauge_vec, HistogramVec, IntGaugeVec};
|
||||
|
||||
use once_cell::sync::OnceCell;
|
||||
|
||||
// Metrics collected on disk IO operations
|
||||
const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
|
||||
0.000001, // 1 usec
|
||||
0.00001, // 10 usec
|
||||
0.0001, // 100 usec
|
||||
0.001, // 1 msec
|
||||
0.01, // 10 msec
|
||||
0.1, // 100 msec
|
||||
1.0, // 1 sec
|
||||
];
|
||||
|
||||
lazy_static! {
|
||||
static ref STORAGE_IO_TIME: HistogramVec = register_histogram_vec!(
|
||||
"pageserver_io_time",
|
||||
"Time spent in IO operations",
|
||||
&["operation", "tenant_id", "timeline_id"],
|
||||
STORAGE_IO_TIME_BUCKETS.into()
|
||||
)
|
||||
.expect("failed to define a metric");
|
||||
}
|
||||
lazy_static! {
|
||||
static ref STORAGE_IO_SIZE: IntGaugeVec = register_int_gauge_vec!(
|
||||
"pageserver_io_size",
|
||||
"Amount of bytes",
|
||||
&["operation", "tenant_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric");
|
||||
}
|
||||
|
||||
///
|
||||
/// A virtual file descriptor. You can use this just like std::fs::File, but internally
|
||||
/// the underlying file is closed if the system is low on file descriptors,
|
||||
@@ -82,10 +51,6 @@ pub struct VirtualFile {
|
||||
/// storing it here.
|
||||
pub path: PathBuf,
|
||||
open_options: OpenOptions,
|
||||
|
||||
/// For metrics
|
||||
tenantid: String,
|
||||
timelineid: String,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Clone, Copy)]
|
||||
@@ -180,13 +145,7 @@ impl OpenFiles {
|
||||
// old file.
|
||||
//
|
||||
if let Some(old_file) = slot_guard.file.take() {
|
||||
// We do not have information about tenantid/timelineid of evicted file.
|
||||
// It is possible to store path together with file or use filepath crate,
|
||||
// but as far as close() is not expected to be fast, it is not so critical to gather
|
||||
// precise per-tenant statistic here.
|
||||
STORAGE_IO_TIME
|
||||
.with_label_values(&["close", "-", "-"])
|
||||
.observe_closure_duration(|| drop(old_file));
|
||||
drop(old_file);
|
||||
}
|
||||
|
||||
// Prepare the slot for reuse and return it
|
||||
@@ -226,20 +185,9 @@ impl VirtualFile {
|
||||
path: &Path,
|
||||
open_options: &OpenOptions,
|
||||
) -> Result<VirtualFile, std::io::Error> {
|
||||
let parts = path.to_str().unwrap().split('/').collect::<Vec<&str>>();
|
||||
let tenantid;
|
||||
let timelineid;
|
||||
if parts.len() > 5 && parts[parts.len() - 5] == "tenants" {
|
||||
tenantid = parts[parts.len() - 4].to_string();
|
||||
timelineid = parts[parts.len() - 2].to_string();
|
||||
} else {
|
||||
tenantid = "*".to_string();
|
||||
timelineid = "*".to_string();
|
||||
}
|
||||
let (handle, mut slot_guard) = get_open_files().find_victim_slot();
|
||||
let file = STORAGE_IO_TIME
|
||||
.with_label_values(&["open", &tenantid, &timelineid])
|
||||
.observe_closure_duration(|| open_options.open(path))?;
|
||||
|
||||
let file = open_options.open(path)?;
|
||||
|
||||
// Strip all options other than read and write.
|
||||
//
|
||||
@@ -256,8 +204,6 @@ impl VirtualFile {
|
||||
pos: 0,
|
||||
path: path.to_path_buf(),
|
||||
open_options: reopen_options,
|
||||
tenantid,
|
||||
timelineid,
|
||||
};
|
||||
|
||||
slot_guard.file.replace(file);
|
||||
@@ -267,13 +213,13 @@ impl VirtualFile {
|
||||
|
||||
/// Call File::sync_all() on the underlying File.
|
||||
pub fn sync_all(&self) -> Result<(), Error> {
|
||||
self.with_file("fsync", |file| file.sync_all())?
|
||||
self.with_file(|file| file.sync_all())?
|
||||
}
|
||||
|
||||
/// Helper function that looks up the underlying File for this VirtualFile,
|
||||
/// opening it and evicting some other File if necessary. It calls 'func'
|
||||
/// with the physical File.
|
||||
fn with_file<F, R>(&self, op: &str, mut func: F) -> Result<R, Error>
|
||||
fn with_file<F, R>(&self, mut func: F) -> Result<R, Error>
|
||||
where
|
||||
F: FnMut(&File) -> R,
|
||||
{
|
||||
@@ -296,9 +242,7 @@ impl VirtualFile {
|
||||
if let Some(file) = &slot_guard.file {
|
||||
// Found a cached file descriptor.
|
||||
slot.recently_used.store(true, Ordering::Relaxed);
|
||||
return Ok(STORAGE_IO_TIME
|
||||
.with_label_values(&[op, &self.tenantid, &self.timelineid])
|
||||
.observe_closure_duration(|| func(file)));
|
||||
return Ok(func(file));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -323,9 +267,7 @@ impl VirtualFile {
|
||||
let (handle, mut slot_guard) = open_files.find_victim_slot();
|
||||
|
||||
// Open the physical file
|
||||
let file = STORAGE_IO_TIME
|
||||
.with_label_values(&["open", &self.tenantid, &self.timelineid])
|
||||
.observe_closure_duration(|| self.open_options.open(&self.path))?;
|
||||
let file = self.open_options.open(&self.path)?;
|
||||
|
||||
// Perform the requested operation on it
|
||||
//
|
||||
@@ -334,9 +276,7 @@ impl VirtualFile {
|
||||
// library RwLock doesn't allow downgrading without releasing the lock,
|
||||
// and that doesn't seem worth the trouble. (parking_lot RwLock would
|
||||
// allow it)
|
||||
let result = STORAGE_IO_TIME
|
||||
.with_label_values(&[op, &self.tenantid, &self.timelineid])
|
||||
.observe_closure_duration(|| func(&file));
|
||||
let result = func(&file);
|
||||
|
||||
// Store the File in the slot and update the handle in the VirtualFile
|
||||
// to point to it.
|
||||
@@ -359,13 +299,7 @@ impl Drop for VirtualFile {
|
||||
let mut slot_guard = slot.inner.write().unwrap();
|
||||
if slot_guard.tag == handle.tag {
|
||||
slot.recently_used.store(false, Ordering::Relaxed);
|
||||
// Unlike files evicted by replacement algorithm, here
|
||||
// we group close time by tenantid/timelineid.
|
||||
// At allows to compare number/time of "normal" file closes
|
||||
// with file eviction.
|
||||
STORAGE_IO_TIME
|
||||
.with_label_values(&["close", &self.tenantid, &self.timelineid])
|
||||
.observe_closure_duration(|| slot_guard.file.take());
|
||||
slot_guard.file.take();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -401,7 +335,7 @@ impl Seek for VirtualFile {
|
||||
self.pos = offset;
|
||||
}
|
||||
SeekFrom::End(offset) => {
|
||||
self.pos = self.with_file("seek", |mut file| file.seek(SeekFrom::End(offset)))??
|
||||
self.pos = self.with_file(|mut file| file.seek(SeekFrom::End(offset)))??
|
||||
}
|
||||
SeekFrom::Current(offset) => {
|
||||
let pos = self.pos as i128 + offset as i128;
|
||||
@@ -423,23 +357,11 @@ impl Seek for VirtualFile {
|
||||
|
||||
impl FileExt for VirtualFile {
|
||||
fn read_at(&self, buf: &mut [u8], offset: u64) -> Result<usize, Error> {
|
||||
let result = self.with_file("read", |file| file.read_at(buf, offset))?;
|
||||
if let Ok(size) = result {
|
||||
STORAGE_IO_SIZE
|
||||
.with_label_values(&["read", &self.tenantid, &self.timelineid])
|
||||
.add(size as i64);
|
||||
}
|
||||
result
|
||||
self.with_file(|file| file.read_at(buf, offset))?
|
||||
}
|
||||
|
||||
fn write_at(&self, buf: &[u8], offset: u64) -> Result<usize, Error> {
|
||||
let result = self.with_file("write", |file| file.write_at(buf, offset))?;
|
||||
if let Ok(size) = result {
|
||||
STORAGE_IO_SIZE
|
||||
.with_label_values(&["write", &self.tenantid, &self.timelineid])
|
||||
.add(size as i64);
|
||||
}
|
||||
result
|
||||
self.with_file(|file| file.write_at(buf, offset))?
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ struct WalReceiverEntry {
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref WAL_RECEIVERS: Mutex<HashMap<(ZTenantId, ZTimelineId), WalReceiverEntry>> =
|
||||
static ref WAL_RECEIVERS: Mutex<HashMap<ZTimelineId, WalReceiverEntry>> =
|
||||
Mutex::new(HashMap::new());
|
||||
}
|
||||
|
||||
@@ -60,10 +60,10 @@ thread_local! {
|
||||
// In future we can make this more granular and send shutdown signals
|
||||
// per tenant/timeline to cancel inactive walreceivers.
|
||||
// TODO deal with blocking pg connections
|
||||
pub fn stop_wal_receiver(tenantid: ZTenantId, timelineid: ZTimelineId) {
|
||||
pub fn stop_wal_receiver(timelineid: ZTimelineId) {
|
||||
let mut receivers = WAL_RECEIVERS.lock();
|
||||
|
||||
if let Some(r) = receivers.get_mut(&(tenantid, timelineid)) {
|
||||
if let Some(r) = receivers.get_mut(&timelineid) {
|
||||
match r.wal_receiver_interrupt_sender.take() {
|
||||
Some(s) => {
|
||||
if s.send(()).is_err() {
|
||||
@@ -84,9 +84,9 @@ pub fn stop_wal_receiver(tenantid: ZTenantId, timelineid: ZTimelineId) {
|
||||
}
|
||||
}
|
||||
|
||||
fn drop_wal_receiver(tenantid: ZTenantId, timelineid: ZTimelineId) {
|
||||
pub fn drop_wal_receiver(timelineid: ZTimelineId, tenantid: ZTenantId) {
|
||||
let mut receivers = WAL_RECEIVERS.lock();
|
||||
receivers.remove(&(tenantid, timelineid));
|
||||
receivers.remove(&timelineid);
|
||||
|
||||
// Check if it was the last walreceiver of the tenant.
|
||||
// TODO now we store one WalReceiverEntry per timeline,
|
||||
@@ -104,13 +104,13 @@ fn drop_wal_receiver(tenantid: ZTenantId, timelineid: ZTimelineId) {
|
||||
// Launch a new WAL receiver, or tell one that's running about change in connection string
|
||||
pub fn launch_wal_receiver(
|
||||
conf: &'static PageServerConf,
|
||||
tenantid: ZTenantId,
|
||||
timelineid: ZTimelineId,
|
||||
wal_producer_connstr: &str,
|
||||
tenantid: ZTenantId,
|
||||
) {
|
||||
let mut receivers = WAL_RECEIVERS.lock();
|
||||
|
||||
match receivers.get_mut(&(tenantid, timelineid)) {
|
||||
match receivers.get_mut(&timelineid) {
|
||||
Some(receiver) => {
|
||||
receiver.wal_producer_connstr = wal_producer_connstr.into();
|
||||
}
|
||||
@@ -121,7 +121,7 @@ pub fn launch_wal_receiver(
|
||||
.name("WAL receiver thread".into())
|
||||
.spawn(move || {
|
||||
IS_WAL_RECEIVER.with(|c| c.set(true));
|
||||
thread_main(conf, tenantid, timelineid, rx);
|
||||
thread_main(conf, timelineid, tenantid, rx);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -131,7 +131,7 @@ pub fn launch_wal_receiver(
|
||||
wal_receiver_interrupt_sender: Some(tx),
|
||||
tenantid,
|
||||
};
|
||||
receivers.insert((tenantid, timelineid), receiver);
|
||||
receivers.insert(timelineid, receiver);
|
||||
|
||||
// Update tenant state and start tenant threads, if they are not running yet.
|
||||
tenant_mgr::set_tenant_state(tenantid, TenantState::Active).unwrap();
|
||||
@@ -141,11 +141,11 @@ pub fn launch_wal_receiver(
|
||||
}
|
||||
|
||||
// Look up current WAL producer connection string in the hash table
|
||||
fn get_wal_producer_connstr(tenantid: ZTenantId, timelineid: ZTimelineId) -> String {
|
||||
fn get_wal_producer_connstr(timelineid: ZTimelineId) -> String {
|
||||
let receivers = WAL_RECEIVERS.lock();
|
||||
|
||||
receivers
|
||||
.get(&(tenantid, timelineid))
|
||||
.get(&timelineid)
|
||||
.unwrap()
|
||||
.wal_producer_connstr
|
||||
.clone()
|
||||
@@ -156,15 +156,15 @@ fn get_wal_producer_connstr(tenantid: ZTenantId, timelineid: ZTimelineId) -> Str
|
||||
//
|
||||
fn thread_main(
|
||||
conf: &'static PageServerConf,
|
||||
tenantid: ZTenantId,
|
||||
timelineid: ZTimelineId,
|
||||
tenantid: ZTenantId,
|
||||
interrupt_receiver: oneshot::Receiver<()>,
|
||||
) {
|
||||
let _enter = info_span!("WAL receiver", timeline = %timelineid, tenant = %tenantid).entered();
|
||||
info!("WAL receiver thread started");
|
||||
|
||||
// Look up the current WAL producer address
|
||||
let wal_producer_connstr = get_wal_producer_connstr(tenantid, timelineid);
|
||||
let wal_producer_connstr = get_wal_producer_connstr(timelineid);
|
||||
|
||||
// Make a connection to the WAL safekeeper, or directly to the primary PostgreSQL server,
|
||||
// and start streaming WAL from it.
|
||||
@@ -188,7 +188,7 @@ fn thread_main(
|
||||
|
||||
// Drop it from list of active WAL_RECEIVERS
|
||||
// so that next callmemaybe request launched a new thread
|
||||
drop_wal_receiver(tenantid, timelineid);
|
||||
drop_wal_receiver(timelineid, tenantid);
|
||||
}
|
||||
|
||||
fn walreceiver_main(
|
||||
|
||||
@@ -8,21 +8,23 @@ edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
base64 = "0.13.0"
|
||||
bytes = { version = "1.0.1", features = ['serde'] }
|
||||
clap = "2.33.0"
|
||||
hex = "0.4.3"
|
||||
hmac = "0.10.1"
|
||||
lazy_static = "1.4.0"
|
||||
md5 = "0.7.0"
|
||||
rand = "0.8.3"
|
||||
hex = "0.4.3"
|
||||
hyper = "0.14"
|
||||
routerify = "2"
|
||||
parking_lot = "0.11.2"
|
||||
rand = "0.8.3"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
rustls = "0.19.1"
|
||||
serde = "1"
|
||||
serde_json = "1"
|
||||
tokio = { version = "1.11", features = ["macros"] }
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
clap = "2.33.0"
|
||||
rustls = "0.19.1"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
sha2 = "0.9.8"
|
||||
thiserror = "1.0.30"
|
||||
tokio = { version = "1.11", features = ['macros'] }
|
||||
# tokio-postgres = { path = "../../rust-postgres/tokio-postgres" } TODO remove this
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev = "f1f16657aaebe2b9b4b16ef7abf6dc42301bad5d"}
|
||||
|
||||
zenith_utils = { path = "../zenith_utils" }
|
||||
zenith_metrics = { path = "../zenith_metrics" }
|
||||
|
||||
140
proxy/src/auth.rs
Normal file
140
proxy/src/auth.rs
Normal file
@@ -0,0 +1,140 @@
|
||||
//! Authentication machinery.
|
||||
|
||||
use crate::sasl::{SaslFirstMessage, SaslMechanism, SaslMessage, SaslStream};
|
||||
use crate::scram::key::ScramKey;
|
||||
use crate::scram::{ScramExchangeServer, ScramSecret};
|
||||
use anyhow::{bail, Context};
|
||||
use zenith_utils::postgres_backend::{PostgresBackend, ProtoState};
|
||||
use zenith_utils::pq_proto::{
|
||||
BeAuthenticationSaslMessage as BeSaslMessage, BeMessage as Be, FeMessage as Fe, *,
|
||||
};
|
||||
|
||||
// TODO: add SCRAM-SHA-256-PLUS
|
||||
/// A list of supported SCRAM methods.
|
||||
const SCRAM_METHODS: &[&str] = &["SCRAM-SHA-256"];
|
||||
|
||||
/// Initial state of [`AuthStream`].
|
||||
pub struct Begin;
|
||||
|
||||
/// Use [SCRAM](crate::scram)-based auth in [`AuthStream`].
|
||||
pub struct Scram<'a>(pub &'a ScramSecret);
|
||||
|
||||
/// Use password-based auth in [`AuthStream`].
|
||||
pub struct Md5(
|
||||
/// Salt for client.
|
||||
pub [u8; 4],
|
||||
);
|
||||
|
||||
/// Every authentication selector is supposed to implement this trait.
|
||||
pub trait AuthMethod {
|
||||
/// Any authentication selector should provide initial backend message
|
||||
/// containing auth method name and parameters, e.g. md5 salt.
|
||||
fn first_message(&self) -> BeMessage<'_>;
|
||||
}
|
||||
|
||||
impl AuthMethod for Scram<'_> {
|
||||
#[inline(always)]
|
||||
fn first_message(&self) -> BeMessage<'_> {
|
||||
Be::AuthenticationSasl(BeSaslMessage::Methods(SCRAM_METHODS))
|
||||
}
|
||||
}
|
||||
|
||||
impl AuthMethod for Md5 {
|
||||
#[inline(always)]
|
||||
fn first_message(&self) -> BeMessage<'_> {
|
||||
Be::AuthenticationMD5Password(self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// This wrapper for [`PostgresBackend`] performs client authentication.
|
||||
#[must_use]
|
||||
pub struct AuthStream<'a, State> {
|
||||
/// The underlying stream which implements libpq's protocol.
|
||||
pgb: &'a mut PostgresBackend,
|
||||
/// State might contain ancillary data (see [`AuthStream::begin`]).
|
||||
state: State,
|
||||
}
|
||||
|
||||
/// Initial state of the stream wrapper.
|
||||
impl<'a> AuthStream<'a, Begin> {
|
||||
/// Create a new wrapper for client authentication.
|
||||
pub fn new(pgb: &'a mut PostgresBackend) -> Self {
|
||||
Self { pgb, state: Begin }
|
||||
}
|
||||
|
||||
/// Move to the next step by sending auth method's name & params to client.
|
||||
pub fn begin<M: AuthMethod>(self, method: M) -> anyhow::Result<AuthStream<'a, M>> {
|
||||
self.pgb.write_message(&method.first_message())?;
|
||||
self.pgb.state = ProtoState::Authentication;
|
||||
|
||||
Ok(AuthStream {
|
||||
pgb: self.pgb,
|
||||
state: method,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Stream wrapper for handling simple MD5 password auth.
|
||||
impl AuthStream<'_, Md5> {
|
||||
/// Perform user authentication; Raise an error in case authentication failed.
|
||||
pub fn authenticate(mut self) -> anyhow::Result<()> {
|
||||
let msg = self.read_password_message()?;
|
||||
let (_trailing_null, _md5_response) = msg.split_last().context("bad password message")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Stream wrapper for handling [SCRAM](crate::scram) auth.
|
||||
impl AuthStream<'_, Scram<'_>> {
|
||||
/// Perform user authentication; Raise an error in case authentication failed.
|
||||
pub fn authenticate(mut self) -> anyhow::Result<ScramKey> {
|
||||
// Initial client message contains the chosen auth method's name
|
||||
let msg = self.read_password_message()?;
|
||||
let sasl = SaslFirstMessage::parse(&msg).context("bad SASL message")?;
|
||||
|
||||
// Currently, the only supported SASL method is SCRAM
|
||||
if !SCRAM_METHODS.contains(&sasl.method) {
|
||||
bail!("unsupported SASL method: {}", sasl.method);
|
||||
}
|
||||
|
||||
let secret = self.state.0;
|
||||
let stream = (Some(msg.slice_ref(sasl.message)), &mut self);
|
||||
let client_key = ScramExchangeServer::new(secret).authenticate(stream)?;
|
||||
|
||||
Ok(client_key)
|
||||
}
|
||||
}
|
||||
|
||||
/// Only [`AuthMethod`] states should receive password messages.
|
||||
impl<M: AuthMethod> AuthStream<'_, M> {
|
||||
/// Receive a new [`PasswordMessage`](FeMessage::PasswordMessage) and extract its payload.
|
||||
fn read_password_message(&mut self) -> anyhow::Result<bytes::Bytes> {
|
||||
match self.pgb.read_message()? {
|
||||
Some(Fe::PasswordMessage(msg)) => Ok(msg),
|
||||
None => bail!("connection is lost"),
|
||||
bad => bail!("unexpected message type: {:?}", bad),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Abstract away all intricacies of [`PostgresBackend`],
|
||||
/// since [SASL](crate::sasl) protocols are text-based.
|
||||
impl SaslStream for AuthStream<'_, Scram<'_>> {
|
||||
type In = bytes::Bytes;
|
||||
|
||||
fn recv(&mut self) -> anyhow::Result<Self::In> {
|
||||
self.read_password_message()
|
||||
}
|
||||
|
||||
fn send(&mut self, data: &SaslMessage<impl AsRef<[u8]>>) -> anyhow::Result<()> {
|
||||
let reply = match data {
|
||||
SaslMessage::Continue(reply) => BeSaslMessage::Continue(reply.as_ref()),
|
||||
SaslMessage::Final(reply) => BeSaslMessage::Final(reply.as_ref()),
|
||||
};
|
||||
|
||||
self.pgb.write_message(&Be::AuthenticationSasl(reply))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -21,35 +21,6 @@ enum ProxyAuthResponse {
|
||||
NotReady { ready: bool }, // TODO: get rid of `ready`
|
||||
}
|
||||
|
||||
impl DatabaseInfo {
|
||||
pub fn socket_addr(&self) -> anyhow::Result<SocketAddr> {
|
||||
let host_port = format!("{}:{}", self.host, self.port);
|
||||
host_port
|
||||
.to_socket_addrs()
|
||||
.with_context(|| format!("cannot resolve {} to SocketAddr", host_port))?
|
||||
.next()
|
||||
.ok_or_else(|| anyhow!("cannot resolve at least one SocketAddr"))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DatabaseInfo> for tokio_postgres::Config {
|
||||
fn from(db_info: DatabaseInfo) -> Self {
|
||||
let mut config = tokio_postgres::Config::new();
|
||||
|
||||
config
|
||||
.host(&db_info.host)
|
||||
.port(db_info.port)
|
||||
.dbname(&db_info.dbname)
|
||||
.user(&db_info.user);
|
||||
|
||||
if let Some(password) = db_info.password {
|
||||
config.password(password);
|
||||
}
|
||||
|
||||
config
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CPlaneApi<'a> {
|
||||
auth_endpoint: &'a str,
|
||||
waiters: &'a ProxyWaiters,
|
||||
|
||||
69
proxy/src/db.rs
Normal file
69
proxy/src/db.rs
Normal file
@@ -0,0 +1,69 @@
|
||||
///
|
||||
/// Utils for connecting with the postgres dataabase.
|
||||
///
|
||||
|
||||
use crate::scram::key::ScramKey;
|
||||
use std::net::{SocketAddr, ToSocketAddrs};
|
||||
use anyhow::{Context, anyhow};
|
||||
|
||||
/// Sufficient information to authenticate as client.
|
||||
pub struct ScramAuthSecret {
|
||||
pub iterations: u32,
|
||||
pub salt_base64: String,
|
||||
pub client_key: ScramKey,
|
||||
pub server_key: ScramKey,
|
||||
}
|
||||
|
||||
#[non_exhaustive]
|
||||
pub enum AuthSecret {
|
||||
Scram(ScramAuthSecret),
|
||||
Password(String),
|
||||
}
|
||||
|
||||
|
||||
pub struct DatabaseAuthInfo {
|
||||
pub host: String,
|
||||
pub port: u16,
|
||||
pub dbname: String,
|
||||
pub user: String,
|
||||
pub auth_secret: AuthSecret,
|
||||
}
|
||||
|
||||
impl From<DatabaseAuthInfo> for tokio_postgres::Config {
|
||||
fn from(auth_info: DatabaseAuthInfo) -> Self {
|
||||
let mut config = tokio_postgres::Config::new();
|
||||
|
||||
config
|
||||
.host(&auth_info.host)
|
||||
.port(auth_info.port)
|
||||
.dbname(&auth_info.dbname)
|
||||
.user(&auth_info.user);
|
||||
|
||||
match auth_info.auth_secret {
|
||||
AuthSecret::Scram(scram_secret) => {
|
||||
config.add_scram_key(
|
||||
base64::decode(scram_secret.salt_base64).unwrap(),
|
||||
scram_secret.iterations,
|
||||
scram_secret.client_key.bytes.to_vec(),
|
||||
scram_secret.server_key.bytes.to_vec(),
|
||||
);
|
||||
},
|
||||
AuthSecret::Password(password) => {
|
||||
config.password(password);
|
||||
}
|
||||
}
|
||||
|
||||
config
|
||||
}
|
||||
}
|
||||
|
||||
impl DatabaseAuthInfo {
|
||||
pub fn socket_addr(&self) -> anyhow::Result<SocketAddr> {
|
||||
let host_port = format!("{}:{}", self.host, self.port);
|
||||
host_port
|
||||
.to_socket_addrs()
|
||||
.with_context(|| format!("cannot resolve {} to SocketAddr", host_port))?
|
||||
.next()
|
||||
.ok_or_else(|| anyhow!("cannot resolve at least one SocketAddr"))
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
use hyper::{Body, Request, Response, StatusCode};
|
||||
use routerify::RouterBuilder;
|
||||
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::http::error::ApiError;
|
||||
use zenith_utils::http::json::json_response;
|
||||
|
||||
async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
Ok(json_response(StatusCode::OK, "")?)
|
||||
}
|
||||
|
||||
pub fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
|
||||
let router = endpoint::make_router();
|
||||
router.get("/v1/status", status_handler)
|
||||
}
|
||||
@@ -9,18 +9,20 @@ use anyhow::bail;
|
||||
use clap::{App, Arg};
|
||||
use state::{ProxyConfig, ProxyState};
|
||||
use std::thread;
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::{tcp_listener, GIT_VERSION};
|
||||
|
||||
mod db;
|
||||
mod auth;
|
||||
mod cplane_api;
|
||||
mod http;
|
||||
mod mgmt;
|
||||
mod parse;
|
||||
mod proxy;
|
||||
mod sasl;
|
||||
mod scram;
|
||||
mod state;
|
||||
mod waiters;
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
zenith_metrics::set_common_metrics_prefix("zenith_proxy");
|
||||
let arg_matches = App::new("Zenith proxy/router")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
@@ -39,14 +41,6 @@ fn main() -> anyhow::Result<()> {
|
||||
.help("listen for management callback connection on ip:port")
|
||||
.default_value("127.0.0.1:7000"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("http")
|
||||
.short("h")
|
||||
.long("http")
|
||||
.takes_value(true)
|
||||
.help("listen for incoming http connections (metrics, etc) on ip:port")
|
||||
.default_value("127.0.0.1:7001"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("uri")
|
||||
.short("u")
|
||||
@@ -93,7 +87,6 @@ fn main() -> anyhow::Result<()> {
|
||||
let config = ProxyConfig {
|
||||
proxy_address: arg_matches.value_of("proxy").unwrap().parse()?,
|
||||
mgmt_address: arg_matches.value_of("mgmt").unwrap().parse()?,
|
||||
http_address: arg_matches.value_of("http").unwrap().parse()?,
|
||||
redirect_uri: arg_matches.value_of("uri").unwrap().parse()?,
|
||||
auth_endpoint: arg_matches.value_of("auth-endpoint").unwrap().parse()?,
|
||||
ssl_config,
|
||||
@@ -103,9 +96,6 @@ fn main() -> anyhow::Result<()> {
|
||||
println!("Version: {}", GIT_VERSION);
|
||||
|
||||
// Check that we can bind to address before further initialization
|
||||
println!("Starting http on {}", state.conf.http_address);
|
||||
let http_listener = tcp_listener::bind(state.conf.http_address)?;
|
||||
|
||||
println!("Starting proxy on {}", state.conf.proxy_address);
|
||||
let pageserver_listener = tcp_listener::bind(state.conf.proxy_address)?;
|
||||
|
||||
@@ -113,12 +103,6 @@ fn main() -> anyhow::Result<()> {
|
||||
let mgmt_listener = tcp_listener::bind(state.conf.mgmt_address)?;
|
||||
|
||||
let threads = [
|
||||
thread::Builder::new()
|
||||
.name("Http thread".into())
|
||||
.spawn(move || {
|
||||
let router = http::make_router();
|
||||
endpoint::serve_thread_main(router, http_listener)
|
||||
})?,
|
||||
// Spawn a thread to listen for connections. It will spawn further threads
|
||||
// for each connection.
|
||||
thread::Builder::new()
|
||||
|
||||
18
proxy/src/parse.rs
Normal file
18
proxy/src/parse.rs
Normal file
@@ -0,0 +1,18 @@
|
||||
//! Small parsing helpers.
|
||||
|
||||
use std::convert::TryInto;
|
||||
use std::ffi::CStr;
|
||||
|
||||
pub fn split_cstr(bytes: &[u8]) -> Option<(&CStr, &[u8])> {
|
||||
let pos = bytes.iter().position(|&x| x == 0)?;
|
||||
let (cstr, other) = bytes.split_at(pos + 1);
|
||||
// SAFETY: we've already checked that there's a terminator
|
||||
Some((unsafe { CStr::from_bytes_with_nul_unchecked(cstr) }, other))
|
||||
}
|
||||
|
||||
pub fn split_at_const<const N: usize>(bytes: &[u8]) -> Option<(&[u8; N], &[u8])> {
|
||||
(bytes.len() >= N).then(|| {
|
||||
let (head, tail) = bytes.split_at(N);
|
||||
(head.try_into().unwrap(), tail)
|
||||
})
|
||||
}
|
||||
@@ -1,5 +1,7 @@
|
||||
use crate::cplane_api::{CPlaneApi, DatabaseInfo};
|
||||
use crate::auth::{self, AuthStream};
|
||||
use crate::cplane_api::CPlaneApi;
|
||||
use crate::ProxyState;
|
||||
use crate::db::{AuthSecret, DatabaseAuthInfo, ScramAuthSecret};
|
||||
use anyhow::{anyhow, bail};
|
||||
use lazy_static::lazy_static;
|
||||
use parking_lot::Mutex;
|
||||
@@ -10,7 +12,7 @@ use std::collections::HashMap;
|
||||
use std::net::{SocketAddr, TcpStream};
|
||||
use std::{io, thread};
|
||||
use tokio_postgres::NoTls;
|
||||
use zenith_utils::postgres_backend::{self, PostgresBackend, ProtoState, Stream};
|
||||
use zenith_utils::postgres_backend::{self, PostgresBackend, Stream};
|
||||
use zenith_utils::pq_proto::{BeMessage as Be, FeMessage as Fe, *};
|
||||
use zenith_utils::sock_split::{ReadStream, WriteStream};
|
||||
|
||||
@@ -117,8 +119,9 @@ impl ProxyConnection {
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
// HACK for local testing only
|
||||
// Both scenarios here should end up producing database credentials
|
||||
if username.ends_with("@zenith") {
|
||||
if true || username.ends_with("@zenith") {
|
||||
self.handle_existing_user(&username, &dbname).map(Some)
|
||||
} else {
|
||||
self.handle_new_user().map(Some)
|
||||
@@ -126,7 +129,7 @@ impl ProxyConnection {
|
||||
};
|
||||
|
||||
let conn = match authenticate() {
|
||||
Ok(Some(db_info)) => connect_to_db(db_info),
|
||||
Ok(Some(db_auth_info)) => connect_to_db(db_auth_info),
|
||||
Ok(None) => return Ok(None),
|
||||
Err(e) => {
|
||||
// Report the error to the client
|
||||
@@ -211,43 +214,43 @@ impl ProxyConnection {
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_existing_user(&mut self, user: &str, db: &str) -> anyhow::Result<DatabaseInfo> {
|
||||
let md5_salt = rand::random::<[u8; 4]>();
|
||||
fn handle_existing_user(&mut self, user: &str, db: &str) -> anyhow::Result<DatabaseAuthInfo> {
|
||||
let _cplane = CPlaneApi::new(&self.state.conf.auth_endpoint, &self.state.waiters);
|
||||
|
||||
// Ask password
|
||||
self.pgb
|
||||
.write_message(&Be::AuthenticationMD5Password(&md5_salt))?;
|
||||
self.pgb.state = ProtoState::Authentication; // XXX
|
||||
// TODO read from console
|
||||
// I got this by running `select rolname, rolpassword from pg_authid;`
|
||||
let secret = crate::scram::ScramSecret::parse("SCRAM-SHA-256$4096:tExym9TW7MBl7OsE1FcZVQ==$Ao3nb0bStHOVIqOEUSfXdlvF9XIynqIGzSmDCs2O4p8=:WV6Eenyz5FGwuuVfKh0AXQVnzz4NLnKVV7FpVz1/1zY=").unwrap();
|
||||
|
||||
// Check password
|
||||
let msg = match self.pgb.read_message()? {
|
||||
Some(Fe::PasswordMessage(msg)) => msg,
|
||||
None => bail!("connection is lost"),
|
||||
bad => bail!("unexpected message type: {:?}", bad),
|
||||
};
|
||||
println!("got message: {:?}", msg);
|
||||
|
||||
let (_trailing_null, md5_response) = msg
|
||||
.split_last()
|
||||
.ok_or_else(|| anyhow!("unexpected password message"))?;
|
||||
|
||||
let cplane = CPlaneApi::new(&self.state.conf.auth_endpoint, &self.state.waiters);
|
||||
let db_info = cplane.authenticate_proxy_request(
|
||||
user,
|
||||
db,
|
||||
md5_response,
|
||||
&md5_salt,
|
||||
&self.psql_session_id,
|
||||
)?;
|
||||
let client_key = AuthStream::new(&mut self.pgb)
|
||||
.begin(auth::Scram(&secret))?
|
||||
.authenticate()?;
|
||||
|
||||
self.pgb
|
||||
.write_message_noflush(&Be::AuthenticationOk)?
|
||||
.write_message_noflush(&BeParameterStatusMessage::encoding())?;
|
||||
|
||||
Ok(db_info)
|
||||
// TODO get this info from console and tell it to start the db
|
||||
let host = "127.0.0.1";
|
||||
let port = 5432;
|
||||
|
||||
let scram_auth_secret = ScramAuthSecret {
|
||||
iterations: secret.iterations,
|
||||
salt_base64: secret.salt_base64,
|
||||
client_key,
|
||||
server_key: secret.server_key,
|
||||
};
|
||||
let auth_info = DatabaseAuthInfo {
|
||||
host: host.into(),
|
||||
port,
|
||||
dbname: db.into(),
|
||||
user: user.into(),
|
||||
auth_secret: AuthSecret::Scram(scram_auth_secret)
|
||||
};
|
||||
|
||||
Ok(auth_info)
|
||||
}
|
||||
|
||||
fn handle_new_user(&mut self) -> anyhow::Result<DatabaseInfo> {
|
||||
fn handle_new_user(&mut self) -> anyhow::Result<DatabaseAuthInfo> {
|
||||
let greeting = hello_message(&self.state.conf.redirect_uri, &self.psql_session_id);
|
||||
|
||||
// First, register this session
|
||||
@@ -265,7 +268,15 @@ impl ProxyConnection {
|
||||
self.pgb
|
||||
.write_message_noflush(&Be::NoticeResponse("Connecting to database.".into()))?;
|
||||
|
||||
Ok(db_info)
|
||||
let db_auth_info = DatabaseAuthInfo {
|
||||
host: db_info.host,
|
||||
port: db_info.port,
|
||||
dbname: db_info.dbname,
|
||||
user: db_info.user,
|
||||
auth_secret: AuthSecret::Password(db_info.password.unwrap())
|
||||
};
|
||||
|
||||
Ok(db_auth_info)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -286,7 +297,7 @@ fn hello_message(redirect_uri: &str, session_id: &str) -> String {
|
||||
|
||||
/// Create a TCP connection to a postgres database, authenticate with it, and receive the ReadyForQuery message
|
||||
async fn connect_to_db(
|
||||
db_info: DatabaseInfo,
|
||||
db_info: DatabaseAuthInfo,
|
||||
) -> anyhow::Result<(String, tokio::net::TcpStream, CancelKeyData)> {
|
||||
// Make raw connection. When connect_raw finishes we've received ReadyForQuery.
|
||||
let socket_addr = db_info.socket_addr()?;
|
||||
|
||||
161
proxy/src/sasl.rs
Normal file
161
proxy/src/sasl.rs
Normal file
@@ -0,0 +1,161 @@
|
||||
//! Simple Authentication and Security Layer.
|
||||
//!
|
||||
//! RFC: <https://datatracker.ietf.org/doc/html/rfc4422>.
|
||||
//!
|
||||
//! Reference implementation:
|
||||
//! * <https://github.com/postgres/postgres/blob/94226d4506e66d6e7cbf4b391f1e7393c1962841/src/backend/libpq/auth-sasl.c>
|
||||
//! * <https://github.com/postgres/postgres/blob/94226d4506e66d6e7cbf4b391f1e7393c1962841/src/interfaces/libpq/fe-auth.c>
|
||||
|
||||
use crate::parse::{split_at_const, split_cstr};
|
||||
use anyhow::Context;
|
||||
use thiserror::Error;
|
||||
|
||||
/// SASL-specific payload of [`PasswordMessage`](zenith_utils::pq_proto::FeMessage::PasswordMessage).
|
||||
#[derive(Debug)]
|
||||
pub struct SaslFirstMessage<'a> {
|
||||
/// Authentication method, e.g. `"SCRAM-SHA-256"`.
|
||||
pub method: &'a str,
|
||||
/// Initial client message.
|
||||
pub message: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> SaslFirstMessage<'a> {
|
||||
// NB: FromStr doesn't work with lifetimes
|
||||
pub fn parse(bytes: &'a [u8]) -> Option<Self> {
|
||||
let (method_cstr, tail) = split_cstr(bytes)?;
|
||||
let method = method_cstr.to_str().ok()?;
|
||||
|
||||
let (len_bytes, message) = split_at_const(tail)?;
|
||||
let len = u32::from_be_bytes(*len_bytes) as usize;
|
||||
if len != message.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Self { method, message })
|
||||
}
|
||||
}
|
||||
|
||||
/// A single SASL message.
|
||||
/// This struct is deliberately decoupled from lower-level
|
||||
/// [`BeAuthenticationSaslMessage`](zenith_utils::pq_proto::BeAuthenticationSaslMessage).
|
||||
#[derive(Debug)]
|
||||
pub enum SaslMessage<T> {
|
||||
/// We expect to see more steps.
|
||||
Continue(T),
|
||||
/// This is the final step.
|
||||
Final(T),
|
||||
}
|
||||
|
||||
/// This specialized trait provides capabilities akin to
|
||||
/// [`std::io::Read`]+[`std::io::Write`] in oder to
|
||||
/// abstract away underlying stream implementations.
|
||||
pub trait SaslStream {
|
||||
/// We'd like to use `AsRef<[str]>` here, but afaik there's
|
||||
/// no cheap way to make [`String`] out of [`bytes::Bytes`];
|
||||
/// On the other hand, byte slices are a decent middle ground.
|
||||
type In: AsRef<[u8]>;
|
||||
|
||||
/// Receive a [SASL](crate::sasl) message from a client.
|
||||
fn recv(&mut self) -> anyhow::Result<Self::In>;
|
||||
|
||||
/// Send a [SASL](crate::sasl) message to a client.
|
||||
fn send(&mut self, data: &SaslMessage<impl AsRef<[u8]>>) -> anyhow::Result<()>;
|
||||
}
|
||||
|
||||
impl<S: SaslStream> SaslStream for &mut S {
|
||||
type In = S::In;
|
||||
|
||||
#[inline(always)]
|
||||
fn recv(&mut self) -> anyhow::Result<Self::In> {
|
||||
S::recv(self)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn send(&mut self, data: &SaslMessage<impl AsRef<[u8]>>) -> anyhow::Result<()> {
|
||||
S::send(self, data)
|
||||
}
|
||||
}
|
||||
|
||||
/// Sometimes it's necessary to mix in a message we got from somewhere else.
|
||||
impl<'a, V: AsRef<[u8]>, S: SaslStream<In = V>> SaslStream for (Option<V>, S) {
|
||||
type In = S::In;
|
||||
|
||||
#[inline(always)]
|
||||
fn recv(&mut self) -> anyhow::Result<Self::In> {
|
||||
// Try returning a stashed message first
|
||||
match self.0.take() {
|
||||
Some(value) => Ok(value),
|
||||
None => self.1.recv(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn send(&mut self, data: &SaslMessage<impl AsRef<[u8]>>) -> anyhow::Result<()> {
|
||||
self.1.send(data)
|
||||
}
|
||||
}
|
||||
|
||||
/// Fine-grained auth errors help in writing tests.
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SaslError {
|
||||
#[error("failed to authenticate client: {0}")]
|
||||
AuthenticationFailed(&'static str),
|
||||
#[error("bad client message")]
|
||||
BadClientMessage,
|
||||
#[error(transparent)]
|
||||
Other(#[from] anyhow::Error),
|
||||
}
|
||||
|
||||
/// A convenient result type for SASL exchange.
|
||||
pub type Result<T> = std::result::Result<T, SaslError>;
|
||||
|
||||
pub enum SaslStep<T, R> {
|
||||
Transition(T),
|
||||
Authenticated(R),
|
||||
}
|
||||
|
||||
/// Every SASL mechanism (e.g. [SCRAM](crate::scram)) is expected to implement this trait.
|
||||
pub trait SaslMechanism<T>: Sized {
|
||||
/// Produce a server challenge to be sent to the client.
|
||||
/// This is how this method is called in PostgreSQL (libpq/sasl.h).
|
||||
fn exchange(self, input: &str) -> Result<(SaslStep<Self, T>, String)>;
|
||||
|
||||
/// Perform SASL message exchange according to the underlying algorithm
|
||||
/// until user is either authenticated or denied access.
|
||||
fn authenticate(mut self, mut stream: impl SaslStream) -> Result<T> {
|
||||
loop {
|
||||
let msg = stream.recv()?;
|
||||
let input = std::str::from_utf8(msg.as_ref()).context("bad encoding")?;
|
||||
|
||||
let (this, reply) = self.exchange(input)?;
|
||||
match this {
|
||||
SaslStep::Transition(this) => {
|
||||
stream.send(&SaslMessage::Continue(reply))?;
|
||||
self = this;
|
||||
}
|
||||
SaslStep::Authenticated(outcome) => {
|
||||
stream.send(&SaslMessage::Final(reply))?;
|
||||
return Ok(outcome);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::ffi::CStr;
|
||||
|
||||
#[test]
|
||||
fn parse_sasl_first_message() {
|
||||
let proto = CStr::from_bytes_with_nul(b"SCRAM-SHA-256\0").unwrap();
|
||||
let sasl = "n,,n=,r=KHQ2Gjc7NptyB8aov5/TnUy4".as_bytes();
|
||||
let sasl_len = (sasl.len() as u32).to_be_bytes();
|
||||
let bytes = [proto.to_bytes_with_nul(), sasl_len.as_ref(), sasl].concat();
|
||||
|
||||
let password = SaslFirstMessage::parse(&bytes).unwrap();
|
||||
assert_eq!(password.method, proto.to_str().unwrap());
|
||||
assert_eq!(password.message, sasl);
|
||||
}
|
||||
}
|
||||
132
proxy/src/scram.rs
Normal file
132
proxy/src/scram.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
//! Salted Challenge Response Authentication Mechanism.
|
||||
//!
|
||||
//! RFC: <https://datatracker.ietf.org/doc/html/rfc5802>.
|
||||
//!
|
||||
//! Reference implementation:
|
||||
//! * <https://github.com/postgres/postgres/blob/94226d4506e66d6e7cbf4b391f1e7393c1962841/src/backend/libpq/auth-scram.c>
|
||||
//! * <https://github.com/postgres/postgres/blob/94226d4506e66d6e7cbf4b391f1e7393c1962841/src/interfaces/libpq/fe-auth-scram.c>
|
||||
|
||||
mod channel_binding;
|
||||
pub mod key; // TODO do I have to make it pub?
|
||||
mod messages;
|
||||
mod secret;
|
||||
mod signature;
|
||||
|
||||
pub use channel_binding::*;
|
||||
pub use secret::*;
|
||||
|
||||
use crate::sasl::{self, SaslError, SaslMechanism, SaslStep};
|
||||
use messages::{ClientFinalMessage, ClientFirstMessage, OwnedServerFirstMessage};
|
||||
use signature::SignatureBuilder;
|
||||
|
||||
pub use self::secret::ScramSecret;
|
||||
|
||||
/// Decode base64 into array without any heap allocations
|
||||
fn base64_decode_array<const N: usize>(input: impl AsRef<[u8]>) -> Option<[u8; N]> {
|
||||
let mut bytes = [0u8; N];
|
||||
|
||||
let size = base64::decode_config_slice(input, base64::STANDARD, &mut bytes).ok()?;
|
||||
if size != N {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(bytes)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ScramExchangeServerState {
|
||||
/// Waiting for [`ClientFirstMessage`].
|
||||
Initial,
|
||||
/// Waiting for [`ClientFinalMessage`].
|
||||
SaltSent {
|
||||
cbind_flag: ChannelBinding<String>,
|
||||
client_first_message_bare: String,
|
||||
server_first_message: OwnedServerFirstMessage,
|
||||
},
|
||||
}
|
||||
|
||||
/// Server's side of SCRAM auth algorithm.
|
||||
#[derive(Debug)]
|
||||
pub struct ScramExchangeServer<'a> {
|
||||
state: ScramExchangeServerState,
|
||||
secret: &'a ScramSecret,
|
||||
}
|
||||
|
||||
impl<'a> ScramExchangeServer<'a> {
|
||||
pub fn new(secret: &'a ScramSecret) -> Self {
|
||||
Self {
|
||||
state: ScramExchangeServerState::Initial,
|
||||
secret,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SaslMechanism<key::ScramKey> for ScramExchangeServer<'_> {
|
||||
fn exchange(mut self, input: &str) -> sasl::Result<(sasl::SaslStep<Self, key::ScramKey>, String)> {
|
||||
use ScramExchangeServerState::*;
|
||||
use sasl::SaslStep::*;
|
||||
match &self.state {
|
||||
Initial => {
|
||||
let client_first_message =
|
||||
ClientFirstMessage::parse(input).ok_or(SaslError::BadClientMessage)?;
|
||||
|
||||
let server_first_message = client_first_message.build_server_first_message(
|
||||
// TODO: use secure random
|
||||
&rand::random(),
|
||||
&self.secret.salt_base64,
|
||||
self.secret.iterations,
|
||||
);
|
||||
let msg = server_first_message.as_str().to_owned();
|
||||
|
||||
self.state = SaltSent {
|
||||
cbind_flag: client_first_message.cbind_flag.map(str::to_owned),
|
||||
client_first_message_bare: client_first_message.bare.to_owned(),
|
||||
server_first_message,
|
||||
};
|
||||
|
||||
Ok((Transition(self), msg))
|
||||
}
|
||||
SaltSent {
|
||||
cbind_flag,
|
||||
client_first_message_bare,
|
||||
server_first_message,
|
||||
} => {
|
||||
let client_final_message =
|
||||
ClientFinalMessage::parse(input).ok_or(SaslError::BadClientMessage)?;
|
||||
|
||||
let channel_binding = cbind_flag.encode(|_| {
|
||||
// TODO: make global design decision regarding the certificate
|
||||
todo!("fetch TLS certificate data")
|
||||
});
|
||||
|
||||
// This might've been caused by a MITM attack
|
||||
if client_final_message.channel_binding != channel_binding {
|
||||
return Err(SaslError::AuthenticationFailed("channel binding failed"));
|
||||
}
|
||||
|
||||
if client_final_message.nonce != server_first_message.nonce() {
|
||||
return Err(SaslError::AuthenticationFailed("bad nonce"));
|
||||
}
|
||||
|
||||
let signature_builder = SignatureBuilder {
|
||||
client_first_message_bare,
|
||||
server_first_message: server_first_message.as_str(),
|
||||
client_final_message_without_proof: client_final_message.without_proof,
|
||||
};
|
||||
|
||||
let client_key = signature_builder
|
||||
.build(&self.secret.stored_key)
|
||||
.derive_client_key(&client_final_message.proof);
|
||||
|
||||
if client_key.sha256() != self.secret.stored_key {
|
||||
return Err(SaslError::AuthenticationFailed("keys don't match"));
|
||||
}
|
||||
|
||||
let msg = client_final_message
|
||||
.build_server_final_message(signature_builder, &self.secret.server_key);
|
||||
|
||||
Ok((Authenticated(client_key), msg))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
77
proxy/src/scram/channel_binding.rs
Normal file
77
proxy/src/scram/channel_binding.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
//! Definition and parser for channel binding flag (a part of GS2 header).
|
||||
|
||||
/// Channel binding flag (possibly with params).
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum ChannelBinding<T> {
|
||||
/// Client doesn't support channel binding.
|
||||
NotSupportedClient,
|
||||
/// Client thinks server doesn't support channel binding.
|
||||
NotSupportedServer,
|
||||
/// Client wants to use this type of channel binding.
|
||||
Required(T),
|
||||
}
|
||||
|
||||
impl<T> ChannelBinding<T> {
|
||||
pub fn map<R>(self, f: impl FnOnce(T) -> R) -> ChannelBinding<R> {
|
||||
use ChannelBinding::*;
|
||||
match self {
|
||||
NotSupportedClient => NotSupportedClient,
|
||||
NotSupportedServer => NotSupportedServer,
|
||||
Required(x) => Required(f(x)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ChannelBinding<&'a str> {
|
||||
// NB: FromStr doesn't work with lifetimes
|
||||
pub fn parse(input: &'a str) -> Option<Self> {
|
||||
use ChannelBinding::*;
|
||||
Some(match input {
|
||||
"n" => NotSupportedClient,
|
||||
"y" => NotSupportedServer,
|
||||
other => Required(other.strip_prefix("p=")?),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<str>> ChannelBinding<T> {
|
||||
/// Encode channel binding data as base64 for subsequent checks.
|
||||
pub fn encode(&self, get_cbind_data: impl FnOnce(&str) -> String) -> String {
|
||||
use ChannelBinding::*;
|
||||
match self {
|
||||
NotSupportedClient => {
|
||||
// base64::encode("n,,")
|
||||
"biws".into()
|
||||
}
|
||||
NotSupportedServer => {
|
||||
// base64::encode("y,,")
|
||||
"eSws".into()
|
||||
}
|
||||
Required(s) => {
|
||||
let s = s.as_ref();
|
||||
let msg = format!("p={mode},,{data}", mode = s, data = get_cbind_data(s));
|
||||
base64::encode(msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn channel_binding_encode() {
|
||||
use ChannelBinding::*;
|
||||
|
||||
let cases = [
|
||||
(NotSupportedClient, base64::encode("n,,")),
|
||||
(NotSupportedServer, base64::encode("y,,")),
|
||||
(Required("foo"), base64::encode("p=foo,,bar")),
|
||||
];
|
||||
|
||||
for (cb, input) in cases {
|
||||
assert_eq!(cb.encode(|_| "bar".to_owned()), input);
|
||||
}
|
||||
}
|
||||
}
|
||||
40
proxy/src/scram/key.rs
Normal file
40
proxy/src/scram/key.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
//! Tools for client/server/stored keys management.
|
||||
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
/// Faithfully taken from PostgreSQL.
|
||||
pub const SCRAM_KEY_LEN: usize = 32;
|
||||
|
||||
/// Thin wrapper for byte array.
|
||||
#[derive(Debug, PartialEq, Eq)] // TODO maybe no debug? Avoid accidental logging.
|
||||
#[repr(transparent)]
|
||||
pub struct ScramKey {
|
||||
pub bytes: [u8; SCRAM_KEY_LEN], // TODO does it have to be public?
|
||||
}
|
||||
|
||||
impl ScramKey {
|
||||
pub fn sha256(&self) -> ScramKey {
|
||||
let mut bytes = [0u8; SCRAM_KEY_LEN];
|
||||
bytes.copy_from_slice({
|
||||
let mut hash = Sha256::new();
|
||||
hash.update(&self.bytes);
|
||||
hash.finalize().as_slice()
|
||||
});
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<[u8; SCRAM_KEY_LEN]> for ScramKey {
|
||||
#[inline(always)]
|
||||
fn from(bytes: [u8; SCRAM_KEY_LEN]) -> Self {
|
||||
Self { bytes }
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<[u8]> for ScramKey {
|
||||
#[inline(always)]
|
||||
fn as_ref(&self) -> &[u8] {
|
||||
&self.bytes
|
||||
}
|
||||
}
|
||||
228
proxy/src/scram/messages.rs
Normal file
228
proxy/src/scram/messages.rs
Normal file
@@ -0,0 +1,228 @@
|
||||
//! Definitions for SCRAM messages.
|
||||
|
||||
use super::base64_decode_array;
|
||||
use super::channel_binding::ChannelBinding;
|
||||
use super::key::{ScramKey, SCRAM_KEY_LEN};
|
||||
use super::signature::SignatureBuilder;
|
||||
use std::fmt;
|
||||
use std::ops::Range;
|
||||
|
||||
/// Faithfully taken from PostgreSQL.
|
||||
const SCRAM_RAW_NONCE_LEN: usize = 18;
|
||||
|
||||
/// Although we ignore all extensions, we still have to validate the message.
|
||||
fn validate_sasl_extensions<'a>(parts: impl Iterator<Item = &'a str>) -> Option<()> {
|
||||
for mut chars in parts.map(|s| s.chars()) {
|
||||
let attr = chars.next()?;
|
||||
if !('a'..'z').contains(&attr) && !('A'..'Z').contains(&attr) {
|
||||
return None;
|
||||
}
|
||||
let eq = chars.next()?;
|
||||
if eq != '=' {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
Some(())
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ClientFirstMessage<'a> {
|
||||
/// `client-first-message-bare`.
|
||||
pub bare: &'a str,
|
||||
/// Channel binding mode.
|
||||
pub cbind_flag: ChannelBinding<&'a str>,
|
||||
/// (Client username)[<https://github.com/postgres/postgres/blob/94226d4506e66d6e7cbf/src/backend/libpq/auth-scram.c#L13>].
|
||||
pub username: &'a str,
|
||||
/// Client nonce.
|
||||
pub nonce: &'a str,
|
||||
}
|
||||
|
||||
impl<'a> ClientFirstMessage<'a> {
|
||||
// NB: FromStr doesn't work with lifetimes
|
||||
pub fn parse(input: &'a str) -> Option<Self> {
|
||||
let mut parts = input.split(',');
|
||||
|
||||
let cbind_flag = ChannelBinding::parse(parts.next()?)?;
|
||||
|
||||
// PG doesn't support authorization identity,
|
||||
// so we don't bother defining GS2 header type
|
||||
let authzid = parts.next()?;
|
||||
if !authzid.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Unfortunately, `parts.as_str()` is unstable
|
||||
let pos = authzid.as_ptr() as usize - input.as_ptr() as usize + 1;
|
||||
let (_, bare) = input.split_at(pos);
|
||||
|
||||
// In theory, these might be preceded by "reserved-mext" (i.e. "m=")
|
||||
let username = parts.next()?.strip_prefix("n=")?;
|
||||
let nonce = parts.next()?.strip_prefix("r=")?;
|
||||
|
||||
// Validate but ignore auth extensions
|
||||
validate_sasl_extensions(parts)?;
|
||||
|
||||
Some(Self {
|
||||
bare,
|
||||
cbind_flag,
|
||||
username,
|
||||
nonce,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn build_server_first_message(
|
||||
&self,
|
||||
nonce: &[u8; SCRAM_RAW_NONCE_LEN],
|
||||
salt_base64: &str,
|
||||
iterations: u32,
|
||||
) -> OwnedServerFirstMessage {
|
||||
use std::fmt::Write;
|
||||
|
||||
let mut message = String::new();
|
||||
write!(&mut message, "r={}", self.nonce).unwrap();
|
||||
base64::encode_config_buf(nonce, base64::STANDARD, &mut message);
|
||||
let combined_nonce = 2..message.len();
|
||||
write!(&mut message, ",s={},i={}", salt_base64, iterations).unwrap();
|
||||
|
||||
// This design guarantees that it's impossible to create a
|
||||
// server-first-message without receiving a client-first-message
|
||||
OwnedServerFirstMessage {
|
||||
message,
|
||||
nonce: combined_nonce,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ClientFinalMessage<'a> {
|
||||
/// `client-final-message-without-proof`.
|
||||
pub without_proof: &'a str,
|
||||
/// Channel binding data (base64).
|
||||
pub channel_binding: &'a str,
|
||||
/// Combined client & server nonce.
|
||||
pub nonce: &'a str,
|
||||
/// Client auth proof.
|
||||
pub proof: [u8; SCRAM_KEY_LEN],
|
||||
}
|
||||
|
||||
impl<'a> ClientFinalMessage<'a> {
|
||||
// NB: FromStr doesn't work with lifetimes
|
||||
pub fn parse(input: &'a str) -> Option<Self> {
|
||||
let (without_proof, proof) = input.rsplit_once(',')?;
|
||||
|
||||
let mut parts = without_proof.split(',');
|
||||
let channel_binding = parts.next()?.strip_prefix("c=")?;
|
||||
let nonce = parts.next()?.strip_prefix("r=")?;
|
||||
|
||||
// Validate but ignore auth extensions
|
||||
validate_sasl_extensions(parts)?;
|
||||
|
||||
let proof = base64_decode_array(proof.strip_prefix("p=")?)?;
|
||||
|
||||
Some(Self {
|
||||
without_proof,
|
||||
channel_binding,
|
||||
nonce,
|
||||
proof,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn build_server_final_message(
|
||||
&self,
|
||||
signature_builder: SignatureBuilder,
|
||||
server_key: &ScramKey,
|
||||
) -> String {
|
||||
let mut buf = String::from("v=");
|
||||
base64::encode_config_buf(
|
||||
signature_builder.build(server_key),
|
||||
base64::STANDARD,
|
||||
&mut buf,
|
||||
);
|
||||
|
||||
buf
|
||||
}
|
||||
}
|
||||
|
||||
pub struct OwnedServerFirstMessage {
|
||||
/// Owned `server-first-message`.
|
||||
message: String,
|
||||
/// Slice into `message`.
|
||||
nonce: Range<usize>,
|
||||
}
|
||||
|
||||
impl OwnedServerFirstMessage {
|
||||
/// Extract combined nonce from the message.
|
||||
#[inline(always)]
|
||||
pub fn nonce(&self) -> &str {
|
||||
&self.message[self.nonce.clone()]
|
||||
}
|
||||
|
||||
/// Get reference to a text representation of the message.
|
||||
#[inline(always)]
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.message
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for OwnedServerFirstMessage {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("ServerFirstMessage")
|
||||
.field("message", &self.as_str())
|
||||
.field("nonce", &self.nonce())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_client_first_message() {
|
||||
use ChannelBinding::*;
|
||||
|
||||
// (Almost) real strings captured during debug sessions
|
||||
let cases = [
|
||||
(NotSupportedClient, "n,,n=pepe,r=t8JwklwKecDLwSsA72rHmVju"),
|
||||
(NotSupportedServer, "y,,n=pepe,r=t8JwklwKecDLwSsA72rHmVju"),
|
||||
(
|
||||
Required("tls-server-end-point"),
|
||||
"p=tls-server-end-point,,n=pepe,r=t8JwklwKecDLwSsA72rHmVju",
|
||||
),
|
||||
];
|
||||
|
||||
for (cb, input) in cases {
|
||||
let msg = ClientFirstMessage::parse(input).unwrap();
|
||||
|
||||
assert_eq!(msg.bare, "n=pepe,r=t8JwklwKecDLwSsA72rHmVju");
|
||||
assert_eq!(msg.username, "pepe");
|
||||
assert_eq!(msg.nonce, "t8JwklwKecDLwSsA72rHmVju");
|
||||
assert_eq!(msg.cbind_flag, cb);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_client_final_message() {
|
||||
let input = [
|
||||
"c=eSws",
|
||||
"r=iiYEfS3rOgn8S3rtpSdrOsHtPLWvIkdgmHxA0hf3JNOAG4dU",
|
||||
"p=SRpfsIVS4Gk11w1LqQ4QvCUBZYQmqXNSDEcHqbQ3CHI=",
|
||||
]
|
||||
.join(",");
|
||||
|
||||
let msg = ClientFinalMessage::parse(&input).unwrap();
|
||||
assert_eq!(
|
||||
msg.without_proof,
|
||||
"c=eSws,r=iiYEfS3rOgn8S3rtpSdrOsHtPLWvIkdgmHxA0hf3JNOAG4dU"
|
||||
);
|
||||
assert_eq!(
|
||||
msg.nonce,
|
||||
"iiYEfS3rOgn8S3rtpSdrOsHtPLWvIkdgmHxA0hf3JNOAG4dU"
|
||||
);
|
||||
assert_eq!(
|
||||
base64::encode(msg.proof),
|
||||
"SRpfsIVS4Gk11w1LqQ4QvCUBZYQmqXNSDEcHqbQ3CHI="
|
||||
);
|
||||
}
|
||||
}
|
||||
65
proxy/src/scram/secret.rs
Normal file
65
proxy/src/scram/secret.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
//! Tools for SCRAM server secret management.
|
||||
|
||||
use super::base64_decode_array;
|
||||
use super::key::ScramKey;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ScramSecret {
|
||||
pub iterations: u32,
|
||||
pub salt_base64: String,
|
||||
pub stored_key: ScramKey,
|
||||
pub server_key: ScramKey,
|
||||
}
|
||||
|
||||
impl ScramSecret {
|
||||
pub fn parse(input: &str) -> Option<Self> {
|
||||
// SCRAM-SHA-256$<iterations>:<salt>$<storedkey>:<serverkey>
|
||||
let s = input.strip_prefix("SCRAM-SHA-256$")?;
|
||||
let (params, keys) = s.split_once('$')?;
|
||||
|
||||
let ((iterations, salt), (stored_key, server_key)) =
|
||||
params.split_once(':').zip(keys.split_once(':'))?;
|
||||
|
||||
let secret = ScramSecret {
|
||||
iterations: iterations.parse().ok()?,
|
||||
salt_base64: salt.to_owned(),
|
||||
stored_key: base64_decode_array(stored_key)?.into(),
|
||||
server_key: base64_decode_array(server_key)?.into(),
|
||||
};
|
||||
|
||||
Some(secret)
|
||||
}
|
||||
|
||||
pub fn mock() -> Self {
|
||||
todo!("see auth-scram.c : mock_scram_secret")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_scram_secret() {
|
||||
let iterations = 4096;
|
||||
let salt = "+/tQQax7twvwTj64mjBsxQ==";
|
||||
let stored_key = "D5h6KTMBlUvDJk2Y8ELfC1Sjtc6k9YHjRyuRZyBNJns=";
|
||||
let server_key = "Pi3QHbcluX//NDfVkKlFl88GGzlJ5LkyPwcdlN/QBvI=";
|
||||
|
||||
let secret = format!(
|
||||
"SCRAM-SHA-256${iterations}:{salt}${stored_key}:{server_key}",
|
||||
iterations = iterations,
|
||||
salt = salt,
|
||||
stored_key = stored_key,
|
||||
server_key = server_key,
|
||||
);
|
||||
|
||||
let parsed = ScramSecret::parse(&secret).unwrap();
|
||||
assert_eq!(parsed.iterations, iterations);
|
||||
assert_eq!(parsed.salt_base64, salt);
|
||||
|
||||
// TODO: derive from 'password'
|
||||
assert_eq!(base64::encode(parsed.stored_key), stored_key);
|
||||
assert_eq!(base64::encode(parsed.server_key), server_key);
|
||||
}
|
||||
}
|
||||
70
proxy/src/scram/signature.rs
Normal file
70
proxy/src/scram/signature.rs
Normal file
@@ -0,0 +1,70 @@
|
||||
//! Tools for client/server signature management.
|
||||
|
||||
use super::key::{ScramKey, SCRAM_KEY_LEN};
|
||||
use hmac::{Hmac, Mac, NewMac};
|
||||
use sha2::Sha256;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SignatureBuilder<'a> {
|
||||
pub client_first_message_bare: &'a str,
|
||||
pub server_first_message: &'a str,
|
||||
pub client_final_message_without_proof: &'a str,
|
||||
}
|
||||
|
||||
impl SignatureBuilder<'_> {
|
||||
pub fn build(&self, key: &ScramKey) -> Signature {
|
||||
let mut mac = Hmac::<Sha256>::new_varkey(key.as_ref()).expect("bad key size");
|
||||
|
||||
mac.update(self.client_first_message_bare.as_bytes());
|
||||
mac.update(b",");
|
||||
mac.update(self.server_first_message.as_bytes());
|
||||
mac.update(b",");
|
||||
mac.update(self.client_final_message_without_proof.as_bytes());
|
||||
|
||||
// TODO: maybe newer `hmac` et al already migrated to regular arrays?
|
||||
let mut signature = [0u8; SCRAM_KEY_LEN];
|
||||
signature.copy_from_slice(mac.finalize().into_bytes().as_slice());
|
||||
signature.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[repr(transparent)]
|
||||
pub struct Signature {
|
||||
bytes: [u8; SCRAM_KEY_LEN],
|
||||
}
|
||||
|
||||
impl Signature {
|
||||
/// Derive ClientKey from client's signature and proof
|
||||
pub fn derive_client_key(&self, proof: &[u8; SCRAM_KEY_LEN]) -> ScramKey {
|
||||
let signature = self.as_ref().iter();
|
||||
|
||||
// This is how the proof is calculated:
|
||||
//
|
||||
// 1. sha256(ClientKey) -> StoredKey
|
||||
// 2. hmac_sha256(StoredKey, [messages...]) -> ClientSignature
|
||||
// 3. ClientKey ^ ClientSignature -> ClientProof
|
||||
//
|
||||
// Step 3 implies that we can restore ClientKey from the proof
|
||||
// by xoring the latter with the ClientSignature again. Afterwards
|
||||
// we can check that the presumed ClientKey meets our expectations.
|
||||
let mut bytes = [0u8; SCRAM_KEY_LEN];
|
||||
for (i, value) in signature.zip(proof).map(|(x, y)| x ^ y).enumerate() {
|
||||
bytes[i] = value;
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<[u8; SCRAM_KEY_LEN]> for Signature {
|
||||
fn from(bytes: [u8; SCRAM_KEY_LEN]) -> Self {
|
||||
Self { bytes }
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<[u8]> for Signature {
|
||||
fn as_ref(&self) -> &[u8] {
|
||||
&self.bytes
|
||||
}
|
||||
}
|
||||
@@ -10,12 +10,8 @@ pub struct ProxyConfig {
|
||||
/// main entrypoint for users to connect to
|
||||
pub proxy_address: SocketAddr,
|
||||
|
||||
/// internally used for status and prometheus metrics
|
||||
pub http_address: SocketAddr,
|
||||
|
||||
/// management endpoint. Upon user account creation control plane
|
||||
/// http management endpoint. Upon user account creation control plane
|
||||
/// will notify us here, so that we can 'unfreeze' user session.
|
||||
/// TODO It uses postgres protocol over TCP but should be migrated to http.
|
||||
pub mgmt_address: SocketAddr,
|
||||
|
||||
/// send unauthenticated users to this URI
|
||||
|
||||
@@ -47,9 +47,6 @@ Useful environment variables:
|
||||
`TEST_OUTPUT`: Set the directory where test state and test output files
|
||||
should go.
|
||||
`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
|
||||
`ZENITH_PAGESERVER_OVERRIDES`: add a `;`-separated set of configs that will be passed as
|
||||
`--pageserver-config-override=${value}` parameter values when zenith cli is invoked
|
||||
`RUST_LOG`: logging configuration to pass into Zenith CLI
|
||||
|
||||
Let stdout, stderr and `INFO` log messages go to the terminal instead of capturing them:
|
||||
`pytest -s --log-cli-level=INFO ...`
|
||||
|
||||
@@ -5,7 +5,7 @@ import psycopg2.extras
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.utils import print_gc_result
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
@@ -13,18 +13,10 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
#
|
||||
# Create a couple of branches off the main branch, at a historical point in time.
|
||||
#
|
||||
def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
|
||||
|
||||
# Use safekeeper in this test to avoid a subtle race condition.
|
||||
# Without safekeeper, walreceiver reconnection can stuck
|
||||
# because of IO deadlock.
|
||||
#
|
||||
# See https://github.com/zenithdb/zenith/issues/1068
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
def test_branch_behind(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
env.zenith_cli(["branch", "test_branch_behind", "main"])
|
||||
env.zenith_cli(["branch", "test_branch_behind", "empty"])
|
||||
|
||||
pgmain = env.postgres.create_start('test_branch_behind')
|
||||
log.info("postgres is running on 'test_branch_behind' branch")
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import json
|
||||
from uuid import uuid4, UUID
|
||||
from uuid import uuid4
|
||||
import pytest
|
||||
import psycopg2
|
||||
import requests
|
||||
@@ -96,15 +96,6 @@ def check_client(client: ZenithPageserverHttpClient, initial_tenant: str):
|
||||
client.tenant_create(tenant_id)
|
||||
assert tenant_id.hex in {t['id'] for t in client.tenant_list()}
|
||||
|
||||
# check its timelines
|
||||
timelines = client.timeline_list(tenant_id)
|
||||
assert len(timelines) > 0
|
||||
for timeline_id_str in timelines:
|
||||
timeline_details = client.timeline_details(tenant_id.hex, timeline_id_str)
|
||||
assert timeline_details['type'] == 'Local'
|
||||
assert timeline_details['tenant_id'] == tenant_id.hex
|
||||
assert timeline_details['timeline_id'] == timeline_id_str
|
||||
|
||||
# create branch
|
||||
branch_name = uuid4().hex
|
||||
client.branch_create(tenant_id, branch_name, "main")
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
# It's possible to run any regular test with the local fs remote storage via
|
||||
# env ZENITH_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/zenith_zzz/'}" pipenv ......
|
||||
|
||||
import tempfile, time, shutil, os
|
||||
from contextlib import closing
|
||||
from pathlib import Path
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder, LocalFsStorage, check_restored_datadir_content
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
#
|
||||
# Tests that a piece of data is backed up and restored correctly:
|
||||
#
|
||||
# 1. Initial pageserver
|
||||
# * starts a pageserver with remote storage, stores specific data in its tables
|
||||
# * triggers a checkpoint (which produces a local data scheduled for backup), gets the corresponding timeline id
|
||||
# * polls the timeline status to ensure it's copied remotely
|
||||
# * stops the pageserver, clears all local directories
|
||||
#
|
||||
# 2. Second pageserver
|
||||
# * starts another pageserver, connected to the same remote storage
|
||||
# * same timeline id is queried for status, triggering timeline's download
|
||||
# * timeline status is polled until it's downloaded
|
||||
# * queries the specific data, ensuring that it matches the one stored before
|
||||
#
|
||||
def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.rust_log_override = 'debug'
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
zenith_env_builder.enable_local_fs_remote_storage()
|
||||
|
||||
data_id = 1
|
||||
data_secret = 'very secret secret'
|
||||
|
||||
##### First start, insert secret data and upload it to the remote storage
|
||||
env = zenith_env_builder.init()
|
||||
pg = env.postgres.create_start()
|
||||
|
||||
tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
|
||||
timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0]
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f'''
|
||||
CREATE TABLE t1(id int primary key, secret text);
|
||||
INSERT INTO t1 VALUES ({data_id}, '{data_secret}');
|
||||
''')
|
||||
|
||||
# run checkpoint manually to be sure that data landed in remote storage
|
||||
with closing(env.pageserver.connect()) as psconn:
|
||||
with psconn.cursor() as pscur:
|
||||
pscur.execute(f"do_gc {tenant_id} {timeline_id}")
|
||||
log.info("waiting for upload") # TODO api to check if upload is done
|
||||
time.sleep(2)
|
||||
|
||||
##### Stop the first pageserver instance, erase all its data
|
||||
env.postgres.stop_all()
|
||||
env.pageserver.stop()
|
||||
|
||||
dir_to_clear = Path(env.repo_dir) / 'tenants'
|
||||
shutil.rmtree(dir_to_clear)
|
||||
os.mkdir(dir_to_clear)
|
||||
|
||||
##### Second start, restore the data and ensure it's the same
|
||||
env.pageserver.start()
|
||||
|
||||
log.info("waiting for timeline redownload")
|
||||
client = env.pageserver.http_client()
|
||||
attempts = 0
|
||||
while True:
|
||||
timeline_details = client.timeline_details(tenant_id, timeline_id)
|
||||
assert timeline_details['timeline_id'] == timeline_id
|
||||
assert timeline_details['tenant_id'] == tenant_id
|
||||
if timeline_details['type'] == 'Local':
|
||||
log.info("timeline downloaded, checking its data")
|
||||
break
|
||||
attempts += 1
|
||||
if attempts > 10:
|
||||
raise Exception("timeline redownload failed")
|
||||
log.debug("still waiting")
|
||||
time.sleep(1)
|
||||
|
||||
pg = env.postgres.create_start()
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f'SELECT secret FROM t1 WHERE id = {data_id};')
|
||||
assert cur.fetchone() == (data_secret, )
|
||||
@@ -66,8 +66,8 @@ def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row)
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain + 1
|
||||
assert row['layer_relfiles_removed'] == 0
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
|
||||
assert row['layer_relfiles_removed'] == 2
|
||||
assert row['layer_relfiles_dropped'] == 0
|
||||
|
||||
# Insert two more rows and run GC.
|
||||
@@ -81,7 +81,7 @@ def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row)
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
|
||||
assert row['layer_relfiles_removed'] == 0
|
||||
assert row['layer_relfiles_removed'] == 2
|
||||
assert row['layer_relfiles_dropped'] == 0
|
||||
|
||||
# Do it again. Should again create two new layer files and remove old ones.
|
||||
@@ -92,8 +92,8 @@ def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row)
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain + 3
|
||||
assert row['layer_relfiles_removed'] == 0
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
|
||||
assert row['layer_relfiles_removed'] == 2
|
||||
assert row['layer_relfiles_dropped'] == 0
|
||||
|
||||
# Run GC again, with no changes in the database. Should not remove anything.
|
||||
@@ -101,7 +101,7 @@ def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row)
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain + 3
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain
|
||||
assert row['layer_relfiles_removed'] == 0
|
||||
assert row['layer_relfiles_dropped'] == 0
|
||||
|
||||
@@ -121,7 +121,9 @@ def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
|
||||
# Each relation fork is counted separately, hence 3.
|
||||
assert row['layer_relfiles_needed_as_tombstone'] == 3
|
||||
|
||||
assert row['layer_relfiles_removed'] == 0
|
||||
# The catalog updates also create new layer files of the catalogs, which
|
||||
# are counted as 'removed'
|
||||
assert row['layer_relfiles_removed'] > 0
|
||||
|
||||
# TODO Change the test to check actual CG of dropped layers.
|
||||
# Each relation fork is counted separately, hence 3.
|
||||
|
||||
@@ -25,7 +25,7 @@ from dataclasses import dataclass
|
||||
|
||||
# Type-related stuff
|
||||
from psycopg2.extensions import connection as PgConnection
|
||||
from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union
|
||||
from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast
|
||||
from typing_extensions import Literal
|
||||
import pytest
|
||||
|
||||
@@ -342,14 +342,10 @@ class ZenithEnvBuilder:
|
||||
def __init__(self,
|
||||
repo_dir: Path,
|
||||
port_distributor: PortDistributor,
|
||||
pageserver_remote_storage: Optional[RemoteStorage] = None,
|
||||
num_safekeepers: int = 0,
|
||||
pageserver_auth_enabled: bool = False,
|
||||
rust_log_override: Optional[str] = None):
|
||||
pageserver_auth_enabled: bool = False):
|
||||
self.repo_dir = repo_dir
|
||||
self.rust_log_override = rust_log_override
|
||||
self.port_distributor = port_distributor
|
||||
self.pageserver_remote_storage = pageserver_remote_storage
|
||||
self.num_safekeepers = num_safekeepers
|
||||
self.pageserver_auth_enabled = pageserver_auth_enabled
|
||||
self.env: Optional[ZenithEnv] = None
|
||||
@@ -360,11 +356,6 @@ class ZenithEnvBuilder:
|
||||
self.env = ZenithEnv(self)
|
||||
return self.env
|
||||
|
||||
def enable_local_fs_remote_storage(self):
|
||||
assert self.pageserver_remote_storage is None, "remote storage is enabled already"
|
||||
self.pageserver_remote_storage = LocalFsStorage(
|
||||
Path(self.repo_dir / 'local_fs_remote_storage'))
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
@@ -413,7 +404,6 @@ class ZenithEnv:
|
||||
"""
|
||||
def __init__(self, config: ZenithEnvBuilder):
|
||||
self.repo_dir = config.repo_dir
|
||||
self.rust_log_override = config.rust_log_override
|
||||
self.port_distributor = config.port_distributor
|
||||
|
||||
self.postgres = PostgresFactory(self)
|
||||
@@ -444,9 +434,7 @@ auth_type = '{pageserver_auth_type}'
|
||||
"""
|
||||
|
||||
# Create a corresponding ZenithPageserver object
|
||||
self.pageserver = ZenithPageserver(self,
|
||||
port=pageserver_port,
|
||||
remote_storage=config.pageserver_remote_storage)
|
||||
self.pageserver = ZenithPageserver(self, port=pageserver_port)
|
||||
|
||||
# Create config and a Safekeeper object for each safekeeper
|
||||
for i in range(1, config.num_safekeepers + 1):
|
||||
@@ -477,8 +465,6 @@ sync = false # Disable fsyncs to make the tests go faster
|
||||
tmp.flush()
|
||||
|
||||
cmd = ['init', f'--config={tmp.name}']
|
||||
append_pageserver_param_overrides(cmd, config.pageserver_remote_storage)
|
||||
|
||||
self.zenith_cli(cmd)
|
||||
|
||||
# Start up the page server and all the safekeepers
|
||||
@@ -523,9 +509,6 @@ sync = false # Disable fsyncs to make the tests go faster
|
||||
env_vars['ZENITH_REPO_DIR'] = str(self.repo_dir)
|
||||
env_vars['POSTGRES_DISTRIB_DIR'] = str(pg_distrib_dir)
|
||||
|
||||
if self.rust_log_override is not None:
|
||||
env_vars['RUST_LOG'] = self.rust_log_override
|
||||
|
||||
# Pass coverage settings
|
||||
var = 'LLVM_PROFILE_FILE'
|
||||
val = os.environ.get(var)
|
||||
@@ -682,20 +665,6 @@ class ZenithPageserverHttpClient(requests.Session):
|
||||
res.raise_for_status()
|
||||
return res.json()
|
||||
|
||||
def timeline_list(self, tenant_id: uuid.UUID) -> List[str]:
|
||||
res = self.get(f"http://localhost:{self.port}/v1/timeline/{tenant_id.hex}")
|
||||
res.raise_for_status()
|
||||
res_json = res.json()
|
||||
assert isinstance(res_json, list)
|
||||
return res_json
|
||||
|
||||
def timeline_details(self, tenant_id: str, timeline_id: str) -> Dict[Any, Any]:
|
||||
res = self.get(f"http://localhost:{self.port}/v1/timeline/{tenant_id}/{timeline_id}")
|
||||
res.raise_for_status()
|
||||
res_json = res.json()
|
||||
assert isinstance(res_json, dict)
|
||||
return res_json
|
||||
|
||||
def get_metrics(self) -> str:
|
||||
res = self.get(f"http://localhost:{self.port}/metrics")
|
||||
res.raise_for_status()
|
||||
@@ -708,38 +677,17 @@ class PageserverPort:
|
||||
http: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class LocalFsStorage:
|
||||
root: Path
|
||||
|
||||
|
||||
@dataclass
|
||||
class S3Storage:
|
||||
bucket: str
|
||||
region: str
|
||||
access_key: str
|
||||
secret_key: str
|
||||
|
||||
|
||||
RemoteStorage = Union[LocalFsStorage, S3Storage]
|
||||
|
||||
|
||||
class ZenithPageserver(PgProtocol):
|
||||
"""
|
||||
An object representing a running pageserver.
|
||||
|
||||
Initializes the repository via `zenith init`.
|
||||
"""
|
||||
def __init__(self,
|
||||
env: ZenithEnv,
|
||||
port: PageserverPort,
|
||||
remote_storage: Optional[RemoteStorage] = None,
|
||||
enable_auth=False):
|
||||
def __init__(self, env: ZenithEnv, port: PageserverPort, enable_auth=False):
|
||||
super().__init__(host='localhost', port=port.pg)
|
||||
self.env = env
|
||||
self.running = False
|
||||
self.service_port = port # do not shadow PgProtocol.port which is just int
|
||||
self.remote_storage = remote_storage
|
||||
|
||||
def start(self) -> 'ZenithPageserver':
|
||||
"""
|
||||
@@ -748,10 +696,7 @@ class ZenithPageserver(PgProtocol):
|
||||
"""
|
||||
assert self.running == False
|
||||
|
||||
start_args = ['pageserver', 'start']
|
||||
append_pageserver_param_overrides(start_args, self.remote_storage)
|
||||
|
||||
self.env.zenith_cli(start_args)
|
||||
self.env.zenith_cli(['pageserver', 'start'])
|
||||
self.running = True
|
||||
return self
|
||||
|
||||
@@ -784,28 +729,6 @@ class ZenithPageserver(PgProtocol):
|
||||
)
|
||||
|
||||
|
||||
def append_pageserver_param_overrides(params_to_update: List[str],
|
||||
pageserver_remote_storage: Optional[RemoteStorage]):
|
||||
if pageserver_remote_storage is not None:
|
||||
if isinstance(pageserver_remote_storage, LocalFsStorage):
|
||||
pageserver_storage_override = f"local_path='{pageserver_remote_storage.root}'"
|
||||
elif isinstance(pageserver_remote_storage, S3Storage):
|
||||
pageserver_storage_override = f"bucket_name='{pageserver_remote_storage.bucket}',\
|
||||
bucket_region='{pageserver_remote_storage.region}',access_key_id='{pageserver_remote_storage.access_key}',\
|
||||
secret_access_key='{pageserver_remote_storage.secret_key}'"
|
||||
|
||||
else:
|
||||
raise Exception(f'Unknown storage configuration {pageserver_remote_storage}')
|
||||
params_to_update.append(
|
||||
f'--pageserver-config-override=remote_storage={{{pageserver_storage_override}}}')
|
||||
|
||||
env_overrides = os.getenv('ZENITH_PAGESERVER_OVERRIDES')
|
||||
if env_overrides is not None:
|
||||
params_to_update += [
|
||||
f'--pageserver-config-override={o.strip()}' for o in env_overrides.split(';')
|
||||
]
|
||||
|
||||
|
||||
class PgBin:
|
||||
""" A helper class for executing postgres binaries """
|
||||
def __init__(self, log_dir: str):
|
||||
|
||||
2
vendor/postgres
vendored
2
vendor/postgres
vendored
Submodule vendor/postgres updated: 6309cf1b52...12250cf3af
@@ -10,7 +10,6 @@ use std::fs::File;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::thread;
|
||||
use tracing::*;
|
||||
use walkeeper::timeline::{CreateControlFile, FileStorage};
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::{logging, tcp_listener, GIT_VERSION};
|
||||
|
||||
@@ -87,21 +86,8 @@ fn main() -> Result<()> {
|
||||
.takes_value(false)
|
||||
.help("Do not wait for changes to be written safely to disk"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("dump-control-file")
|
||||
.long("dump-control-file")
|
||||
.takes_value(true)
|
||||
.help("Dump control file at path specifed by this argument and exit"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
if let Some(addr) = arg_matches.value_of("dump-control-file") {
|
||||
let state = FileStorage::load_control_file(Path::new(addr), CreateControlFile::False)?;
|
||||
let json = serde_json::to_string(&state)?;
|
||||
print!("{}", json);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut conf: SafeKeeperConf = Default::default();
|
||||
|
||||
if let Some(dir) = arg_matches.value_of("datadir") {
|
||||
|
||||
@@ -62,7 +62,7 @@ impl Default for SafeKeeperConf {
|
||||
daemonize: false,
|
||||
no_sync: false,
|
||||
listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
|
||||
listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
|
||||
listen_http_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
|
||||
ttl: None,
|
||||
recall_period: defaults::DEFAULT_RECALL_PERIOD,
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ use zenith_utils::pq_proto::SystemId;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
pub const SK_MAGIC: u32 = 0xcafeceefu32;
|
||||
pub const SK_FORMAT_VERSION: u32 = 3;
|
||||
pub const SK_FORMAT_VERSION: u32 = 2;
|
||||
const SK_PROTOCOL_VERSION: u32 = 1;
|
||||
const UNKNOWN_SERVER_VERSION: u32 = 0;
|
||||
|
||||
@@ -133,11 +133,9 @@ pub struct ServerInfo {
|
||||
/// Postgres server version
|
||||
pub pg_version: u32,
|
||||
pub system_id: SystemId,
|
||||
#[serde(with = "hex")]
|
||||
pub tenant_id: ZTenantId,
|
||||
/// Zenith timelineid
|
||||
#[serde(with = "hex")]
|
||||
pub timeline_id: ZTimelineId,
|
||||
pub ztli: ZTimelineId,
|
||||
pub wal_seg_size: u32,
|
||||
}
|
||||
|
||||
@@ -151,7 +149,6 @@ pub struct SafeKeeperState {
|
||||
pub server: ServerInfo,
|
||||
/// Unique id of the last *elected* proposer we dealed with. Not needed
|
||||
/// for correctness, exists for monitoring purposes.
|
||||
#[serde(with = "hex")]
|
||||
pub proposer_uuid: PgUuid,
|
||||
/// part of WAL acknowledged by quorum and available locally
|
||||
pub commit_lsn: Lsn,
|
||||
@@ -174,7 +171,7 @@ impl SafeKeeperState {
|
||||
pg_version: UNKNOWN_SERVER_VERSION, /* Postgres server version */
|
||||
system_id: 0, /* Postgres system identifier */
|
||||
tenant_id: ZTenantId::from([0u8; 16]),
|
||||
timeline_id: ZTimelineId::from([0u8; 16]),
|
||||
ztli: ZTimelineId::from([0u8; 16]),
|
||||
wal_seg_size: 0,
|
||||
},
|
||||
proposer_uuid: [0; 16],
|
||||
@@ -563,13 +560,13 @@ where
|
||||
// set basic info about server, if not yet
|
||||
self.s.server.system_id = msg.system_id;
|
||||
self.s.server.tenant_id = msg.tenant_id;
|
||||
self.s.server.timeline_id = msg.ztli;
|
||||
self.s.server.ztli = msg.ztli;
|
||||
self.s.server.wal_seg_size = msg.wal_seg_size;
|
||||
self.storage
|
||||
.persist(&self.s)
|
||||
.with_context(|| "failed to persist shared state")?;
|
||||
|
||||
self.metrics = SafeKeeperMetrics::new(self.s.server.timeline_id);
|
||||
self.metrics = SafeKeeperMetrics::new(self.s.server.ztli);
|
||||
|
||||
info!(
|
||||
"processed greeting from proposer {:?}, sending term {:?}",
|
||||
|
||||
@@ -79,7 +79,7 @@ struct ReplicationConnGuard {
|
||||
|
||||
impl Drop for ReplicationConnGuard {
|
||||
fn drop(&mut self) {
|
||||
self.timeline.remove_replica(self.replica);
|
||||
self.timeline.update_replica_state(self.replica, None);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,12 +120,14 @@ impl ReplicationConn {
|
||||
/// This is spawned into the background by `handle_start_replication`.
|
||||
fn background_thread(
|
||||
mut stream_in: ReadStream,
|
||||
replica_guard: Arc<ReplicationConnGuard>,
|
||||
timeline: Arc<Timeline>,
|
||||
replica_id: usize,
|
||||
) -> Result<()> {
|
||||
let replica_id = replica_guard.replica;
|
||||
let timeline = &replica_guard.timeline;
|
||||
|
||||
let mut state = ReplicaState::new();
|
||||
let _guard = ReplicationConnGuard {
|
||||
replica: replica_id,
|
||||
timeline: timeline.clone(),
|
||||
};
|
||||
// Wait for replica's feedback.
|
||||
while let Some(msg) = FeMessage::read(&mut stream_in)? {
|
||||
match &msg {
|
||||
@@ -138,7 +140,7 @@ impl ReplicationConn {
|
||||
// Note: deserializing is on m[1..] because we skip the tag byte.
|
||||
state.hs_feedback = HotStandbyFeedback::des(&m[1..])
|
||||
.context("failed to deserialize HotStandbyFeedback")?;
|
||||
timeline.update_replica_state(replica_id, state);
|
||||
timeline.update_replica_state(replica_id, Some(state));
|
||||
}
|
||||
Some(STANDBY_STATUS_UPDATE_TAG_BYTE) => {
|
||||
let reply = StandbyReply::des(&m[1..])
|
||||
@@ -146,7 +148,7 @@ impl ReplicationConn {
|
||||
state.last_received_lsn = reply.write_lsn;
|
||||
state.disk_consistent_lsn = reply.flush_lsn;
|
||||
state.remote_consistent_lsn = reply.apply_lsn;
|
||||
timeline.update_replica_state(replica_id, state);
|
||||
timeline.update_replica_state(replica_id, Some(state));
|
||||
}
|
||||
_ => warn!("unexpected message {:?}", msg),
|
||||
}
|
||||
@@ -205,23 +207,16 @@ impl ReplicationConn {
|
||||
// This replica_id is used below to check if it's time to stop replication.
|
||||
let replica_id = bg_timeline.add_replica(state);
|
||||
|
||||
// Use a guard object to remove our entry from the timeline, when the background
|
||||
// thread and us have both finished using it.
|
||||
let replica_guard = Arc::new(ReplicationConnGuard {
|
||||
replica: replica_id,
|
||||
timeline: bg_timeline,
|
||||
});
|
||||
let bg_replica_guard = Arc::clone(&replica_guard);
|
||||
|
||||
// TODO: here we got two threads, one for writing WAL and one for receiving
|
||||
// feedback. If one of them fails, we should shutdown the other one too.
|
||||
let _ = thread::Builder::new()
|
||||
.name("HotStandbyFeedback thread".into())
|
||||
.spawn(move || {
|
||||
if let Err(err) = Self::background_thread(bg_stream_in, bg_replica_guard) {
|
||||
if let Err(err) = Self::background_thread(bg_stream_in, bg_timeline, replica_id) {
|
||||
error!("Replication background thread failed: {}", err);
|
||||
}
|
||||
})?;
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let mut wal_seg_size: usize;
|
||||
loop {
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::cmp::{max, min};
|
||||
use std::collections::HashMap;
|
||||
use std::fs::{self, File, OpenOptions};
|
||||
use std::io::{Read, Seek, SeekFrom, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, Condvar, Mutex};
|
||||
use std::time::Duration;
|
||||
use tracing::*;
|
||||
@@ -121,7 +121,7 @@ impl SharedState {
|
||||
}
|
||||
|
||||
/// Assign new replica ID. We choose first empty cell in the replicas vector
|
||||
/// or extend the vector if there are no free slots.
|
||||
/// or extend the vector if there are not free items.
|
||||
pub fn add_replica(&mut self, state: ReplicaState) -> usize {
|
||||
if let Some(pos) = self.replicas.iter().position(|r| r.is_none()) {
|
||||
self.replicas[pos] = Some(state);
|
||||
@@ -136,14 +136,13 @@ impl SharedState {
|
||||
/// If create=false and file doesn't exist, bails out.
|
||||
fn create_restore(
|
||||
conf: &SafeKeeperConf,
|
||||
timeline_id: ZTimelineId,
|
||||
timelineid: ZTimelineId,
|
||||
create: CreateControlFile,
|
||||
) -> Result<Self> {
|
||||
let state = FileStorage::load_control_file_conf(conf, timeline_id, create)
|
||||
let (file_storage, state) = FileStorage::load_from_control_file(conf, timelineid, create)
|
||||
.with_context(|| "failed to load from control file")?;
|
||||
let file_storage = FileStorage::new(timeline_id, conf);
|
||||
let flush_lsn = if state.server.wal_seg_size != 0 {
|
||||
let wal_dir = conf.timeline_dir(&timeline_id);
|
||||
let wal_dir = conf.timeline_dir(&timelineid);
|
||||
find_end_of_wal(
|
||||
&wal_dir,
|
||||
state.server.wal_seg_size as usize,
|
||||
@@ -298,15 +297,9 @@ impl Timeline {
|
||||
shared_state.add_replica(state)
|
||||
}
|
||||
|
||||
pub fn update_replica_state(&self, id: usize, state: ReplicaState) {
|
||||
pub fn update_replica_state(&self, id: usize, state: Option<ReplicaState>) {
|
||||
let mut shared_state = self.mutex.lock().unwrap();
|
||||
shared_state.replicas[id] = Some(state);
|
||||
}
|
||||
|
||||
pub fn remove_replica(&self, id: usize) {
|
||||
let mut shared_state = self.mutex.lock().unwrap();
|
||||
assert!(shared_state.replicas[id].is_some());
|
||||
shared_state.replicas[id] = None;
|
||||
shared_state.replicas[id] = state;
|
||||
}
|
||||
|
||||
pub fn get_end_of_wal(&self) -> Lsn {
|
||||
@@ -388,7 +381,7 @@ impl GlobalTimelines {
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FileStorage {
|
||||
struct FileStorage {
|
||||
// save timeline dir to avoid reconstructing it every time
|
||||
timeline_dir: PathBuf,
|
||||
conf: SafeKeeperConf,
|
||||
@@ -396,17 +389,6 @@ pub struct FileStorage {
|
||||
}
|
||||
|
||||
impl FileStorage {
|
||||
fn new(timeline_id: ZTimelineId, conf: &SafeKeeperConf) -> FileStorage {
|
||||
let timeline_dir = conf.timeline_dir(&timeline_id);
|
||||
let timelineid_str = format!("{}", timeline_id);
|
||||
FileStorage {
|
||||
timeline_dir,
|
||||
conf: conf.clone(),
|
||||
persist_control_file_seconds: PERSIST_CONTROL_FILE_SECONDS
|
||||
.with_label_values(&[&timelineid_str]),
|
||||
}
|
||||
}
|
||||
|
||||
// Check the magic/version in the on-disk data and deserialize it, if possible.
|
||||
fn deser_sk_state(buf: &mut &[u8]) -> Result<SafeKeeperState> {
|
||||
// Read the version independent part
|
||||
@@ -427,24 +409,20 @@ impl FileStorage {
|
||||
upgrade_control_file(buf, version)
|
||||
}
|
||||
|
||||
fn load_control_file_conf(
|
||||
conf: &SafeKeeperConf,
|
||||
timeline_id: ZTimelineId,
|
||||
create: CreateControlFile,
|
||||
) -> Result<SafeKeeperState> {
|
||||
let path = conf.timeline_dir(&timeline_id).join(CONTROL_FILE_NAME);
|
||||
Self::load_control_file(path, create)
|
||||
}
|
||||
|
||||
/// Read in the control file.
|
||||
/// Fetch and lock control file (prevent running more than one instance of safekeeper)
|
||||
/// If create=false and file doesn't exist, bails out.
|
||||
pub fn load_control_file<P: AsRef<Path>>(
|
||||
control_file_path: P,
|
||||
fn load_from_control_file(
|
||||
conf: &SafeKeeperConf,
|
||||
timelineid: ZTimelineId,
|
||||
create: CreateControlFile,
|
||||
) -> Result<SafeKeeperState> {
|
||||
) -> Result<(FileStorage, SafeKeeperState)> {
|
||||
let timeline_dir = conf.timeline_dir(&timelineid);
|
||||
|
||||
let control_file_path = timeline_dir.join(CONTROL_FILE_NAME);
|
||||
|
||||
info!(
|
||||
"loading control file {}, create={:?}",
|
||||
control_file_path.as_ref().display(),
|
||||
control_file_path.display(),
|
||||
create,
|
||||
);
|
||||
|
||||
@@ -456,7 +434,7 @@ impl FileStorage {
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"failed to open control file at {}",
|
||||
control_file_path.as_ref().display(),
|
||||
control_file_path.display(),
|
||||
)
|
||||
})?;
|
||||
|
||||
@@ -487,15 +465,21 @@ impl FileStorage {
|
||||
);
|
||||
|
||||
FileStorage::deser_sk_state(&mut &buf[..buf.len() - CHECKSUM_SIZE]).with_context(
|
||||
|| {
|
||||
format!(
|
||||
"while reading control file {}",
|
||||
control_file_path.as_ref().display(),
|
||||
)
|
||||
},
|
||||
|| format!("while reading control file {}", control_file_path.display(),),
|
||||
)?
|
||||
};
|
||||
Ok(state)
|
||||
|
||||
let timelineid_str = format!("{}", timelineid);
|
||||
|
||||
Ok((
|
||||
FileStorage {
|
||||
timeline_dir,
|
||||
conf: conf.clone(),
|
||||
persist_control_file_seconds: PERSIST_CONTROL_FILE_SECONDS
|
||||
.with_label_values(&[&timelineid_str]),
|
||||
},
|
||||
state,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -565,7 +549,7 @@ impl Storage for FileStorage {
|
||||
let mut start_pos = startpos;
|
||||
const ZERO_BLOCK: &[u8] = &[0u8; XLOG_BLCKSZ];
|
||||
let wal_seg_size = server.wal_seg_size as usize;
|
||||
let ztli = server.timeline_id;
|
||||
let ztli = server.ztli;
|
||||
|
||||
/* Extract WAL location for this block */
|
||||
let mut xlogoff = start_pos.segment_offset(wal_seg_size) as usize;
|
||||
@@ -653,7 +637,7 @@ impl Storage for FileStorage {
|
||||
let partial;
|
||||
const ZERO_BLOCK: &[u8] = &[0u8; XLOG_BLCKSZ];
|
||||
let wal_seg_size = server.wal_seg_size as usize;
|
||||
let ztli = server.timeline_id;
|
||||
let ztli = server.ztli;
|
||||
|
||||
/* Extract WAL location for this block */
|
||||
let mut xlogoff = end_pos.segment_offset(wal_seg_size) as usize;
|
||||
@@ -753,10 +737,7 @@ mod test {
|
||||
) -> Result<(FileStorage, SafeKeeperState)> {
|
||||
fs::create_dir_all(&conf.timeline_dir(&timeline_id))
|
||||
.expect("failed to create timeline dir");
|
||||
Ok((
|
||||
FileStorage::new(timeline_id, conf),
|
||||
FileStorage::load_control_file_conf(conf, timeline_id, create)?,
|
||||
))
|
||||
FileStorage::load_from_control_file(conf, timeline_id, create)
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -5,12 +5,7 @@ use crate::safekeeper::{
|
||||
use anyhow::{bail, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::*;
|
||||
use zenith_utils::{
|
||||
bin_ser::LeSer,
|
||||
lsn::Lsn,
|
||||
pq_proto::SystemId,
|
||||
zid::{ZTenantId, ZTimelineId},
|
||||
};
|
||||
use zenith_utils::{bin_ser::LeSer, lsn::Lsn};
|
||||
|
||||
/// Persistent consensus state of the acceptor.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -40,36 +35,6 @@ struct SafeKeeperStateV1 {
|
||||
wal_start_lsn: Lsn,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ServerInfoV2 {
|
||||
/// Postgres server version
|
||||
pub pg_version: u32,
|
||||
pub system_id: SystemId,
|
||||
pub tenant_id: ZTenantId,
|
||||
/// Zenith timelineid
|
||||
pub ztli: ZTimelineId,
|
||||
pub wal_seg_size: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SafeKeeperStateV2 {
|
||||
/// persistent acceptor state
|
||||
pub acceptor_state: AcceptorState,
|
||||
/// information about server
|
||||
pub server: ServerInfoV2,
|
||||
/// Unique id of the last *elected* proposer we dealed with. Not needed
|
||||
/// for correctness, exists for monitoring purposes.
|
||||
pub proposer_uuid: PgUuid,
|
||||
/// part of WAL acknowledged by quorum and available locally
|
||||
pub commit_lsn: Lsn,
|
||||
/// minimal LSN which may be needed for recovery of some safekeeper (end_lsn
|
||||
/// of last record streamed to everyone)
|
||||
pub truncate_lsn: Lsn,
|
||||
// Safekeeper starts receiving WAL from this LSN, zeros before it ought to
|
||||
// be skipped during decoding.
|
||||
pub wal_start_lsn: Lsn,
|
||||
}
|
||||
|
||||
pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<SafeKeeperState> {
|
||||
// migrate to storing full term history
|
||||
if version == 1 {
|
||||
@@ -90,25 +55,6 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<SafeKeeperState>
|
||||
truncate_lsn: oldstate.truncate_lsn,
|
||||
wal_start_lsn: oldstate.wal_start_lsn,
|
||||
});
|
||||
// migrate to hexing some zids
|
||||
} else if version == 2 {
|
||||
info!("reading safekeeper control file version {}", version);
|
||||
let oldstate = SafeKeeperStateV2::des(&buf[..buf.len()])?;
|
||||
let server = ServerInfo {
|
||||
pg_version: oldstate.server.pg_version,
|
||||
system_id: oldstate.server.system_id,
|
||||
tenant_id: oldstate.server.tenant_id,
|
||||
timeline_id: oldstate.server.ztli,
|
||||
wal_seg_size: oldstate.server.wal_seg_size,
|
||||
};
|
||||
return Ok(SafeKeeperState {
|
||||
acceptor_state: oldstate.acceptor_state,
|
||||
server,
|
||||
proposer_uuid: oldstate.proposer_uuid,
|
||||
commit_lsn: oldstate.commit_lsn,
|
||||
truncate_lsn: oldstate.truncate_lsn,
|
||||
wal_start_lsn: oldstate.wal_start_lsn,
|
||||
});
|
||||
}
|
||||
bail!("unsupported safekeeper control file version {}", version)
|
||||
}
|
||||
|
||||
@@ -102,21 +102,12 @@ fn main() -> Result<()> {
|
||||
.required(false)
|
||||
.value_name("stop-mode");
|
||||
|
||||
let pageserver_config_args = Arg::with_name("pageserver-config-override")
|
||||
.long("pageserver-config-override")
|
||||
.takes_value(true)
|
||||
.number_of_values(1)
|
||||
.multiple(true)
|
||||
.help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
|
||||
.required(false);
|
||||
|
||||
let matches = App::new("Zenith CLI")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.version(GIT_VERSION)
|
||||
.subcommand(
|
||||
SubCommand::with_name("init")
|
||||
.about("Initialize a new Zenith repository")
|
||||
.arg(pageserver_config_args.clone())
|
||||
.arg(
|
||||
Arg::with_name("config")
|
||||
.long("config")
|
||||
@@ -142,10 +133,10 @@ fn main() -> Result<()> {
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage pageserver")
|
||||
.subcommand(SubCommand::with_name("status"))
|
||||
.subcommand(SubCommand::with_name("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
|
||||
.subcommand(SubCommand::with_name("start").about("Start local pageserver"))
|
||||
.subcommand(SubCommand::with_name("stop").about("Stop local pageserver")
|
||||
.arg(stop_mode_arg.clone()))
|
||||
.subcommand(SubCommand::with_name("restart").about("Restart local pageserver").arg(pageserver_config_args))
|
||||
.subcommand(SubCommand::with_name("restart").about("Restart local pageserver"))
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("safekeeper")
|
||||
@@ -412,7 +403,6 @@ fn handle_init(init_match: &ArgMatches) -> Result<()> {
|
||||
if let Err(e) = pageserver.init(
|
||||
// default_tenantid was generated by the `env.init()` call above
|
||||
Some(&env.default_tenantid.unwrap().to_string()),
|
||||
&pageserver_config_overrides(init_match),
|
||||
) {
|
||||
eprintln!("pageserver init failed: {}", e);
|
||||
exit(1);
|
||||
@@ -421,14 +411,6 @@ fn handle_init(init_match: &ArgMatches) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn pageserver_config_overrides<'a>(init_match: &'a ArgMatches) -> Vec<&'a str> {
|
||||
init_match
|
||||
.values_of("pageserver-config-override")
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn handle_tenant(tenant_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
match tenant_match.subcommand() {
|
||||
@@ -590,8 +572,8 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
|
||||
match sub_match.subcommand() {
|
||||
("start", Some(start_match)) => {
|
||||
if let Err(e) = pageserver.start(&pageserver_config_overrides(start_match)) {
|
||||
("start", Some(_sub_m)) => {
|
||||
if let Err(e) = pageserver.start() {
|
||||
eprintln!("pageserver start failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
@@ -606,20 +588,22 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
}
|
||||
}
|
||||
|
||||
("restart", Some(restart_match)) => {
|
||||
("restart", Some(_sub_m)) => {
|
||||
//TODO what shutdown strategy should we use here?
|
||||
if let Err(e) = pageserver.stop(false) {
|
||||
eprintln!("pageserver stop failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if let Err(e) = pageserver.start(&pageserver_config_overrides(restart_match)) {
|
||||
if let Err(e) = pageserver.start() {
|
||||
eprintln!("pageserver start failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
(sub_name, _) => bail!("Unexpected pageserver subcommand '{}'", sub_name),
|
||||
(sub_name, _) => {
|
||||
bail!("Unexpected pageserver subcommand '{}'", sub_name)
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -678,12 +662,12 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
fn handle_start_all(_sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
|
||||
// Postgres nodes are not started automatically
|
||||
|
||||
if let Err(e) = pageserver.start(&pageserver_config_overrides(sub_match)) {
|
||||
if let Err(e) = pageserver.start() {
|
||||
eprintln!("pageserver start failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@@ -373,9 +373,8 @@ impl PostgresBackend {
|
||||
}
|
||||
AuthType::MD5 => {
|
||||
rand::thread_rng().fill(&mut self.md5_salt);
|
||||
let md5_salt = self.md5_salt;
|
||||
self.write_message(&BeMessage::AuthenticationMD5Password(
|
||||
&md5_salt,
|
||||
self.md5_salt,
|
||||
))?;
|
||||
self.state = ProtoState::Authentication;
|
||||
}
|
||||
|
||||
@@ -353,7 +353,8 @@ fn read_null_terminated(buf: &mut Bytes) -> anyhow::Result<Bytes> {
|
||||
#[derive(Debug)]
|
||||
pub enum BeMessage<'a> {
|
||||
AuthenticationOk,
|
||||
AuthenticationMD5Password(&'a [u8; 4]),
|
||||
AuthenticationMD5Password([u8; 4]),
|
||||
AuthenticationSasl(BeAuthenticationSaslMessage<'a>),
|
||||
AuthenticationCleartextPassword,
|
||||
BackendKeyData(CancelKeyData),
|
||||
BindComplete,
|
||||
@@ -381,6 +382,13 @@ pub enum BeMessage<'a> {
|
||||
KeepAlive(WalSndKeepAlive),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BeAuthenticationSaslMessage<'a> {
|
||||
Methods(&'a [&'a str]),
|
||||
Continue(&'a [u8]),
|
||||
Final(&'a [u8]),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BeParameterStatusMessage<'a> {
|
||||
Encoding(&'a str),
|
||||
@@ -552,6 +560,32 @@ impl<'a> BeMessage<'a> {
|
||||
.unwrap(); // write into BytesMut can't fail
|
||||
}
|
||||
|
||||
BeMessage::AuthenticationSasl(msg) => {
|
||||
buf.put_u8(b'R');
|
||||
write_body(buf, |buf| {
|
||||
use BeAuthenticationSaslMessage::*;
|
||||
match msg {
|
||||
Methods(methods) => {
|
||||
buf.put_i32(10); // Specifies that SASL auth method is used.
|
||||
for method in methods.iter() {
|
||||
write_cstr(method.as_bytes(), buf)?;
|
||||
}
|
||||
buf.put_u8(0); // zero terminator for the list
|
||||
}
|
||||
Continue(extra) => {
|
||||
buf.put_i32(11); // Continue SASL auth.
|
||||
buf.put_slice(extra);
|
||||
}
|
||||
Final(extra) => {
|
||||
buf.put_i32(12); // Send final SASL message.
|
||||
buf.put_slice(extra);
|
||||
}
|
||||
}
|
||||
Ok::<_, io::Error>(())
|
||||
})
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
BeMessage::BackendKeyData(key_data) => {
|
||||
buf.put_u8(b'K');
|
||||
write_body(buf, |buf| {
|
||||
|
||||
Reference in New Issue
Block a user