mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-19 03:12:55 +00:00
Compare commits
55 Commits
try-no-loc
...
layer-id
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
32dd786650 | ||
|
|
e6ef27637b | ||
|
|
b532470792 | ||
|
|
e0d7ecf91c | ||
|
|
edba2e9744 | ||
|
|
7e552b645f | ||
|
|
ea5900f155 | ||
|
|
28ab40c8b7 | ||
|
|
d423142623 | ||
|
|
1c0e85f9a0 | ||
|
|
5bc09074ea | ||
|
|
1fac4a3c91 | ||
|
|
1bc917324d | ||
|
|
af429fb401 | ||
|
|
710fe02d0b | ||
|
|
de87aad990 | ||
|
|
41d48719e1 | ||
|
|
d88377f9f0 | ||
|
|
ecd577c934 | ||
|
|
f43f8401ee | ||
|
|
1877bbc7cb | ||
|
|
a064ebb64c | ||
|
|
4726870e8d | ||
|
|
3bbc106c70 | ||
|
|
66eb081876 | ||
|
|
f291ab2b87 | ||
|
|
66ec135676 | ||
|
|
28af3e5008 | ||
|
|
f337d73a6c | ||
|
|
57ce541521 | ||
|
|
e14f24034f | ||
|
|
04fb0a0342 | ||
|
|
8c42dcc041 | ||
|
|
9070a4dc02 | ||
|
|
86a28458c6 | ||
|
|
c058d04250 | ||
|
|
c310932121 | ||
|
|
ff563ff080 | ||
|
|
7f9d2a7d05 | ||
|
|
13f4e173c9 | ||
|
|
85116a8375 | ||
|
|
e42c884c2b | ||
|
|
eb706bc9f4 | ||
|
|
798df756de | ||
|
|
732d13fe06 | ||
|
|
feae7f39c1 | ||
|
|
c2b468c958 | ||
|
|
e272a380b4 | ||
|
|
0dc7a3fc15 | ||
|
|
a1bc0ada59 | ||
|
|
e9b5224a8a | ||
|
|
bdd039a9ee | ||
|
|
b405eef324 | ||
|
|
ba557d126b | ||
|
|
2dde20a227 |
@@ -1,13 +1,13 @@
|
||||
version: 2.1
|
||||
|
||||
orbs:
|
||||
python: circleci/python@1.4.0
|
||||
|
||||
executors:
|
||||
zenith-build-executor:
|
||||
resource_class: xlarge
|
||||
docker:
|
||||
- image: cimg/rust:1.52.1
|
||||
- image: cimg/rust:1.55.0
|
||||
zenith-python-executor:
|
||||
docker:
|
||||
- image: cimg/python:3.7.10 # Oldest available 3.7 with Ubuntu 20.04 (for GLIBC and Rust) at CirlceCI
|
||||
|
||||
jobs:
|
||||
check-codestyle:
|
||||
@@ -110,7 +110,7 @@ jobs:
|
||||
# Require an exact match. While an out of date cache might speed up the build,
|
||||
# there's no way to clean out old packages, so the cache grows every time something
|
||||
# changes.
|
||||
- v03-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
||||
- v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
||||
|
||||
# Build the rust code, including test binaries
|
||||
- run:
|
||||
@@ -128,7 +128,7 @@ jobs:
|
||||
|
||||
- save_cache:
|
||||
name: Save rust cache
|
||||
key: v03-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
||||
key: v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
||||
paths:
|
||||
- ~/.cargo/registry
|
||||
- ~/.cargo/git
|
||||
@@ -182,9 +182,27 @@ jobs:
|
||||
paths:
|
||||
- "*"
|
||||
|
||||
check-python:
|
||||
executor: zenith-python-executor
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Install deps
|
||||
working_directory: test_runner
|
||||
command: pipenv --python 3.7 install --dev
|
||||
- run:
|
||||
name: Run yapf to ensure code format
|
||||
when: always
|
||||
working_directory: test_runner
|
||||
command: pipenv run yapf --recursive --diff .
|
||||
- run:
|
||||
name: Run mypy to check types
|
||||
when: always
|
||||
working_directory: test_runner
|
||||
command: pipenv run mypy .
|
||||
|
||||
run-pytest:
|
||||
#description: "Run pytest"
|
||||
executor: python/default
|
||||
executor: zenith-python-executor
|
||||
parameters:
|
||||
# pytest args to specify the tests to run.
|
||||
#
|
||||
@@ -219,11 +237,9 @@ jobs:
|
||||
steps:
|
||||
- run: git submodule update --init --depth 1
|
||||
- run:
|
||||
name: Install pipenv & deps
|
||||
name: Install deps
|
||||
working_directory: test_runner
|
||||
command: |
|
||||
pip install pipenv
|
||||
pipenv install
|
||||
command: pipenv --python 3.7 install
|
||||
- run:
|
||||
name: Run pytest
|
||||
working_directory: test_runner
|
||||
@@ -241,8 +257,6 @@ jobs:
|
||||
if << parameters.run_in_parallel >>; then
|
||||
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
|
||||
fi;
|
||||
./netstat-script.sh &
|
||||
NS_PID=$!
|
||||
# Run the tests.
|
||||
#
|
||||
# The junit.xml file allows CircleCI to display more fine-grained test information
|
||||
@@ -254,8 +268,6 @@ jobs:
|
||||
# -s is not used to prevent pytest from capturing output, because tests are running
|
||||
# in parallel and logs are mixed between different tests
|
||||
pipenv run pytest --junitxml=$TEST_OUTPUT/junit.xml --tb=short --verbose -rA $TEST_SELECTION $EXTRA_PARAMS
|
||||
kill $NS_PID
|
||||
awk '/===/ {if (count) print count; print; count=0; next} {count++} END {print count}' $TEST_OUTPUT/netstat.stdout > $TEST_OUTPUT/netstat_stats.stdout
|
||||
- run:
|
||||
# CircleCI artifacts are preserved one file at a time, so skipping
|
||||
# this step isn't a good idea. If you want to extract the
|
||||
@@ -337,6 +349,7 @@ workflows:
|
||||
build_and_test:
|
||||
jobs:
|
||||
- check-codestyle
|
||||
- check-python
|
||||
- build-postgres:
|
||||
name: build-postgres-<< matrix.build_type >>
|
||||
matrix:
|
||||
|
||||
457
Cargo.lock
generated
457
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -38,8 +38,6 @@ RUN apt-get update && apt-get -yq install libreadline-dev libseccomp-dev openssl
|
||||
|
||||
COPY --from=build /zenith/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
|
||||
# TODO: temporary alias for compatibility, see https://github.com/zenithdb/zenith/pull/740
|
||||
RUN ln -s /usr/local/bin/safekeeper /usr/local/bin/wal_acceptor
|
||||
COPY --from=build /zenith/target/release/proxy /usr/local/bin
|
||||
COPY --from=pg-build /zenith/tmp_install postgres_install
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
|
||||
@@ -82,8 +82,6 @@ RUN apk add --update openssl build-base libseccomp-dev
|
||||
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb
|
||||
COPY --from=build /zenith/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
|
||||
# TODO: temporary alias for compatibility, see https://github.com/zenithdb/zenith/pull/740
|
||||
RUN ln -s /usr/local/bin/safekeeper /usr/local/bin/wal_acceptor
|
||||
COPY --from=build /zenith/target/release/proxy /usr/local/bin
|
||||
COPY --from=pg-build /zenith/tmp_install /usr/local
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
|
||||
26
README.md
26
README.md
@@ -28,12 +28,12 @@ apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libsec
|
||||
libssl-dev clang pkg-config libpq-dev
|
||||
```
|
||||
|
||||
[Rust] 1.52 or later is also required.
|
||||
[Rust] 1.55 or later is also required.
|
||||
|
||||
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.
|
||||
|
||||
To run the integration tests (not required to use the code), install
|
||||
Python (3.6 or higher), and install python3 packages with `pipenv` using `pipenv install` in the project directory.
|
||||
Python (3.7 or higher), and install python3 packages with `pipenv` using `pipenv install` in the project directory.
|
||||
|
||||
2. Build zenith and patched postgres
|
||||
```sh
|
||||
@@ -47,17 +47,26 @@ make -j5
|
||||
# Create repository in .zenith with proper paths to binaries and data
|
||||
# Later that would be responsibility of a package install script
|
||||
> ./target/debug/zenith init
|
||||
initializing tenantid c03ba6b7ad4c5e9cf556f059ade44229
|
||||
created initial timeline 5b014a9e41b4b63ce1a1febc04503636 timeline.lsn 0/169C3C8
|
||||
created main branch
|
||||
pageserver init succeeded
|
||||
|
||||
# start pageserver
|
||||
# start pageserver and safekeeper
|
||||
> ./target/debug/zenith start
|
||||
Starting pageserver at '127.0.0.1:64000' in .zenith
|
||||
Starting pageserver at 'localhost:64000' in '.zenith'
|
||||
Pageserver started
|
||||
initializing for single for 7676
|
||||
Starting safekeeper at 'localhost:5454' in '.zenith/safekeepers/single'
|
||||
Safekeeper started
|
||||
|
||||
# start postgres on top on the pageserver
|
||||
# start postgres compute node
|
||||
> ./target/debug/zenith pg start main
|
||||
Starting postgres node at 'host=127.0.0.1 port=55432 user=stas'
|
||||
Starting new postgres main on main...
|
||||
Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/c03ba6b7ad4c5e9cf556f059ade44229/main port=55432
|
||||
Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=postgres'
|
||||
waiting for server to start.... done
|
||||
server started
|
||||
|
||||
# check list of running postgres instances
|
||||
> ./target/debug/zenith pg list
|
||||
@@ -108,10 +117,9 @@ postgres=# insert into t values(2,2);
|
||||
INSERT 0 1
|
||||
```
|
||||
|
||||
6. If you want to run tests afterwards (see below), you have to stop pageserver and all postgres instances you have just started:
|
||||
6. If you want to run tests afterwards (see below), you have to stop all the running the pageserver, safekeeper and postgres instances
|
||||
you have just started. You can stop them all with one command:
|
||||
```sh
|
||||
> ./target/debug/zenith pg stop migration_check
|
||||
> ./target/debug/zenith pg stop main
|
||||
> ./target/debug/zenith stop
|
||||
```
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ regex = "1"
|
||||
anyhow = "1.0"
|
||||
thiserror = "1"
|
||||
bytes = "1.0.1"
|
||||
nix = "0.20"
|
||||
nix = "0.23"
|
||||
url = "2.2.2"
|
||||
hex = { version = "0.4.3", features = ["serde"] }
|
||||
reqwest = { version = "0.11", features = ["blocking", "json"] }
|
||||
|
||||
20
control_plane/safekeepers.conf
Normal file
20
control_plane/safekeepers.conf
Normal file
@@ -0,0 +1,20 @@
|
||||
# Page server and three safekeepers.
|
||||
[pageserver]
|
||||
pg_port = 64000
|
||||
http_port = 9898
|
||||
auth_type = 'Trust'
|
||||
|
||||
[[safekeepers]]
|
||||
name = 'sk1'
|
||||
pg_port = 5454
|
||||
http_port = 7676
|
||||
|
||||
[[safekeepers]]
|
||||
name = 'sk2'
|
||||
pg_port = 5455
|
||||
http_port = 7677
|
||||
|
||||
[[safekeepers]]
|
||||
name = 'sk3'
|
||||
pg_port = 5456
|
||||
http_port = 7678
|
||||
11
control_plane/simple.conf
Normal file
11
control_plane/simple.conf
Normal file
@@ -0,0 +1,11 @@
|
||||
# Minimal zenith environment with one safekeeper. This is equivalent to the built-in
|
||||
# defaults that you get with no --config
|
||||
[pageserver]
|
||||
pg_port = 64000
|
||||
http_port = 9898
|
||||
auth_type = 'Trust'
|
||||
|
||||
[[safekeepers]]
|
||||
name = 'single'
|
||||
pg_port = 5454
|
||||
http_port = 7676
|
||||
@@ -39,8 +39,6 @@ impl ComputeControlPlane {
|
||||
// | |- <tenant_id>
|
||||
// | | |- <branch name>
|
||||
pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
|
||||
// TODO: since pageserver do not have config file yet we believe here that
|
||||
// it is running on default port. Change that when pageserver will have config.
|
||||
let pageserver = Arc::new(PageServerNode::from_env(&env));
|
||||
|
||||
let mut nodes = BTreeMap::default();
|
||||
@@ -75,40 +73,59 @@ impl ComputeControlPlane {
|
||||
.unwrap_or(self.base_port)
|
||||
}
|
||||
|
||||
pub fn local(local_env: &LocalEnv, pageserver: &Arc<PageServerNode>) -> ComputeControlPlane {
|
||||
ComputeControlPlane {
|
||||
base_port: 65431,
|
||||
pageserver: Arc::clone(pageserver),
|
||||
nodes: BTreeMap::new(),
|
||||
env: local_env.clone(),
|
||||
// FIXME: see also parse_point_in_time in branches.rs.
|
||||
fn parse_point_in_time(
|
||||
&self,
|
||||
tenantid: ZTenantId,
|
||||
s: &str,
|
||||
) -> Result<(ZTimelineId, Option<Lsn>)> {
|
||||
let mut strings = s.split('@');
|
||||
let name = strings.next().unwrap();
|
||||
|
||||
let lsn: Option<Lsn>;
|
||||
if let Some(lsnstr) = strings.next() {
|
||||
lsn = Some(
|
||||
Lsn::from_str(lsnstr)
|
||||
.with_context(|| "invalid LSN in point-in-time specification")?,
|
||||
);
|
||||
} else {
|
||||
lsn = None
|
||||
}
|
||||
|
||||
// Resolve the timeline ID, given the human-readable branch name
|
||||
let timeline_id = self
|
||||
.pageserver
|
||||
.branch_get_by_name(&tenantid, name)?
|
||||
.timeline_id;
|
||||
|
||||
Ok((timeline_id, lsn))
|
||||
}
|
||||
|
||||
pub fn new_node(
|
||||
&mut self,
|
||||
tenantid: ZTenantId,
|
||||
branch_name: &str,
|
||||
name: &str,
|
||||
timeline_spec: &str,
|
||||
port: Option<u16>,
|
||||
) -> Result<Arc<PostgresNode>> {
|
||||
let timeline_id = self
|
||||
.pageserver
|
||||
.branch_get_by_name(&tenantid, branch_name)?
|
||||
.timeline_id;
|
||||
// Resolve the human-readable timeline spec into timeline ID and LSN
|
||||
let (timelineid, lsn) = self.parse_point_in_time(tenantid, timeline_spec)?;
|
||||
|
||||
let port = port.unwrap_or_else(|| self.get_port());
|
||||
let node = Arc::new(PostgresNode {
|
||||
name: branch_name.to_owned(),
|
||||
name: name.to_owned(),
|
||||
address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
|
||||
env: self.env.clone(),
|
||||
pageserver: Arc::clone(&self.pageserver),
|
||||
is_test: false,
|
||||
timelineid: timeline_id,
|
||||
timelineid,
|
||||
lsn,
|
||||
tenantid,
|
||||
uses_wal_proposer: false,
|
||||
});
|
||||
|
||||
node.create_pgdata()?;
|
||||
node.setup_pg_conf(self.env.auth_type)?;
|
||||
node.setup_pg_conf(self.env.pageserver.auth_type)?;
|
||||
|
||||
self.nodes
|
||||
.insert((tenantid, node.name.clone()), Arc::clone(&node));
|
||||
@@ -127,6 +144,7 @@ pub struct PostgresNode {
|
||||
pageserver: Arc<PageServerNode>,
|
||||
is_test: bool,
|
||||
pub timelineid: ZTimelineId,
|
||||
pub lsn: Option<Lsn>, // if it's a read-only node. None for primary
|
||||
pub tenantid: ZTenantId,
|
||||
uses_wal_proposer: bool,
|
||||
}
|
||||
@@ -161,9 +179,12 @@ impl PostgresNode {
|
||||
let port: u16 = conf.parse_field("port", &context)?;
|
||||
let timelineid: ZTimelineId = conf.parse_field("zenith.zenith_timeline", &context)?;
|
||||
let tenantid: ZTenantId = conf.parse_field("zenith.zenith_tenant", &context)?;
|
||||
|
||||
let uses_wal_proposer = conf.get("wal_acceptors").is_some();
|
||||
|
||||
// parse recovery_target_lsn, if any
|
||||
let recovery_target_lsn: Option<Lsn> =
|
||||
conf.parse_field_optional("recovery_target_lsn", &context)?;
|
||||
|
||||
// ok now
|
||||
Ok(PostgresNode {
|
||||
address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
|
||||
@@ -172,12 +193,13 @@ impl PostgresNode {
|
||||
pageserver: Arc::clone(pageserver),
|
||||
is_test: false,
|
||||
timelineid,
|
||||
lsn: recovery_target_lsn,
|
||||
tenantid,
|
||||
uses_wal_proposer,
|
||||
})
|
||||
}
|
||||
|
||||
fn sync_walkeepers(&self) -> Result<Lsn> {
|
||||
fn sync_safekeepers(&self) -> Result<Lsn> {
|
||||
let pg_path = self.env.pg_bin_dir().join("postgres");
|
||||
let sync_handle = Command::new(pg_path)
|
||||
.arg("--sync-safekeepers")
|
||||
@@ -202,7 +224,7 @@ impl PostgresNode {
|
||||
}
|
||||
|
||||
let lsn = Lsn::from_str(std::str::from_utf8(&sync_output.stdout)?.trim())?;
|
||||
println!("Walkeepers synced on {}", lsn);
|
||||
println!("Safekeepers synced on {}", lsn);
|
||||
Ok(lsn)
|
||||
}
|
||||
|
||||
@@ -233,7 +255,7 @@ impl PostgresNode {
|
||||
// Read the archive directly from the `CopyOutReader`
|
||||
tar::Archive::new(copyreader)
|
||||
.unpack(&self.pgdata())
|
||||
.with_context(|| "extracting page backup failed")?;
|
||||
.with_context(|| "extracting base backup failed")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -301,11 +323,30 @@ impl PostgresNode {
|
||||
conf.append("zenith.page_server_connstring", &pageserver_connstr);
|
||||
conf.append("zenith.zenith_tenant", &self.tenantid.to_string());
|
||||
conf.append("zenith.zenith_timeline", &self.timelineid.to_string());
|
||||
if let Some(lsn) = self.lsn {
|
||||
conf.append("recovery_target_lsn", &lsn.to_string());
|
||||
}
|
||||
conf.append_line("");
|
||||
|
||||
// Configure the node to stream WAL directly to the pageserver
|
||||
conf.append("synchronous_standby_names", "pageserver"); // TODO: add a new function arg?
|
||||
conf.append("zenith.callmemaybe_connstring", &self.connstr());
|
||||
if !self.env.safekeepers.is_empty() {
|
||||
// Configure the node to connect to the safekeepers
|
||||
conf.append("synchronous_standby_names", "walproposer");
|
||||
|
||||
let wal_acceptors = self
|
||||
.env
|
||||
.safekeepers
|
||||
.iter()
|
||||
.map(|sk| format!("localhost:{}", sk.pg_port))
|
||||
.collect::<Vec<String>>()
|
||||
.join(",");
|
||||
conf.append("wal_acceptors", &wal_acceptors);
|
||||
} else {
|
||||
// Configure the node to stream WAL directly to the pageserver
|
||||
// This isn't really a supported configuration, but can be useful for
|
||||
// testing.
|
||||
conf.append("synchronous_standby_names", "pageserver");
|
||||
conf.append("zenith.callmemaybe_connstring", &self.connstr());
|
||||
}
|
||||
|
||||
let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
|
||||
file.write_all(conf.to_string().as_bytes())?;
|
||||
@@ -314,12 +355,14 @@ impl PostgresNode {
|
||||
}
|
||||
|
||||
fn load_basebackup(&self) -> Result<()> {
|
||||
let lsn = if self.uses_wal_proposer {
|
||||
let backup_lsn = if let Some(lsn) = self.lsn {
|
||||
Some(lsn)
|
||||
} else if self.uses_wal_proposer {
|
||||
// LSN 0 means that it is bootstrap and we need to download just
|
||||
// latest data from the pageserver. That is a bit clumsy but whole bootstrap
|
||||
// procedure evolves quite actively right now, so let's think about it again
|
||||
// when things would be more stable (TODO).
|
||||
let lsn = self.sync_walkeepers()?;
|
||||
let lsn = self.sync_safekeepers()?;
|
||||
if lsn == Lsn(0) {
|
||||
None
|
||||
} else {
|
||||
@@ -329,7 +372,7 @@ impl PostgresNode {
|
||||
None
|
||||
};
|
||||
|
||||
self.do_basebackup(lsn)?;
|
||||
self.do_basebackup(backup_lsn)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -406,6 +449,10 @@ impl PostgresNode {
|
||||
// 3. Load basebackup
|
||||
self.load_basebackup()?;
|
||||
|
||||
if self.lsn.is_some() {
|
||||
File::create(self.pgdata().join("standby.signal"))?;
|
||||
}
|
||||
|
||||
// 4. Finally start the compute node postgres
|
||||
println!("Starting postgres node at '{}'", self.connstr());
|
||||
self.pg_ctl(&["start"], auth_token)
|
||||
|
||||
@@ -13,6 +13,7 @@ use std::path::Path;
|
||||
pub mod compute;
|
||||
pub mod local_env;
|
||||
pub mod postgresql_conf;
|
||||
pub mod safekeeper;
|
||||
pub mod storage;
|
||||
|
||||
/// Read a PID file
|
||||
|
||||
@@ -7,46 +7,102 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::env;
|
||||
use std::fmt::Write;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use zenith_utils::auth::{encode_from_key_path, Claims, Scope};
|
||||
use zenith_utils::auth::{encode_from_key_file, Claims, Scope};
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::ZTenantId;
|
||||
|
||||
//
|
||||
// This data structures represent deserialized zenith CLI config
|
||||
// This data structures represents zenith CLI config
|
||||
//
|
||||
// It is deserialized from the .zenith/config file, or the config file passed
|
||||
// to 'zenith init --config=<path>' option. See control_plane/simple.conf for
|
||||
// an example.
|
||||
//
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct LocalEnv {
|
||||
// Pageserver connection settings
|
||||
pub pageserver_pg_port: u16,
|
||||
pub pageserver_http_port: u16,
|
||||
|
||||
// Base directory for both pageserver and compute nodes
|
||||
// Base directory for all the nodes (the pageserver, safekeepers and
|
||||
// compute nodes).
|
||||
//
|
||||
// This is not stored in the config file. Rather, this is the path where the
|
||||
// config file itself is. It is read from the ZENITH_REPO_DIR env variable or
|
||||
// '.zenith' if not given.
|
||||
#[serde(skip)]
|
||||
pub base_data_dir: PathBuf,
|
||||
|
||||
// Path to postgres distribution. It's expected that "bin", "include",
|
||||
// "lib", "share" from postgres distribution are there. If at some point
|
||||
// in time we will be able to run against vanilla postgres we may split that
|
||||
// to four separate paths and match OS-specific installation layout.
|
||||
#[serde(default)]
|
||||
pub pg_distrib_dir: PathBuf,
|
||||
|
||||
// Path to pageserver binary.
|
||||
#[serde(default)]
|
||||
pub zenith_distrib_dir: PathBuf,
|
||||
|
||||
// keeping tenant id in config to reduce copy paste when running zenith locally with single tenant
|
||||
#[serde(with = "hex")]
|
||||
pub tenantid: ZTenantId,
|
||||
// Default tenant ID to use with the 'zenith' command line utility, when
|
||||
// --tenantid is not explicitly specified.
|
||||
#[serde(with = "opt_tenantid_serde")]
|
||||
#[serde(default)]
|
||||
pub default_tenantid: Option<ZTenantId>,
|
||||
|
||||
// jwt auth token used for communication with pageserver
|
||||
pub auth_token: String,
|
||||
// used to issue tokens during e.g pg start
|
||||
#[serde(default)]
|
||||
pub private_key_path: PathBuf,
|
||||
|
||||
pub pageserver: PageServerConf,
|
||||
|
||||
#[serde(default)]
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct PageServerConf {
|
||||
// Pageserver connection settings
|
||||
pub pg_port: u16,
|
||||
pub http_port: u16,
|
||||
|
||||
// used to determine which auth type is used
|
||||
pub auth_type: AuthType,
|
||||
|
||||
// used to issue tokens during e.g pg start
|
||||
pub private_key_path: PathBuf,
|
||||
// jwt auth token used for communication with pageserver
|
||||
pub auth_token: String,
|
||||
}
|
||||
|
||||
impl Default for PageServerConf {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
pg_port: 0,
|
||||
http_port: 0,
|
||||
auth_type: AuthType::Trust,
|
||||
auth_token: "".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct SafekeeperConf {
|
||||
pub name: String,
|
||||
pub pg_port: u16,
|
||||
pub http_port: u16,
|
||||
pub sync: bool,
|
||||
}
|
||||
|
||||
impl Default for SafekeeperConf {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: "".to_string(),
|
||||
pg_port: 0,
|
||||
http_port: 0,
|
||||
sync: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LocalEnv {
|
||||
@@ -62,6 +118,10 @@ impl LocalEnv {
|
||||
Ok(self.zenith_distrib_dir.join("pageserver"))
|
||||
}
|
||||
|
||||
pub fn safekeeper_bin(&self) -> Result<PathBuf> {
|
||||
Ok(self.zenith_distrib_dir.join("safekeeper"))
|
||||
}
|
||||
|
||||
pub fn pg_data_dirs_path(&self) -> PathBuf {
|
||||
self.base_data_dir.join("pgdatadirs").join("tenants")
|
||||
}
|
||||
@@ -76,6 +136,187 @@ impl LocalEnv {
|
||||
pub fn pageserver_data_dir(&self) -> PathBuf {
|
||||
self.base_data_dir.clone()
|
||||
}
|
||||
|
||||
pub fn safekeeper_data_dir(&self, node_name: &str) -> PathBuf {
|
||||
self.base_data_dir.join("safekeepers").join(node_name)
|
||||
}
|
||||
|
||||
/// Create a LocalEnv from a config file.
|
||||
///
|
||||
/// Unlike 'load_config', this function fills in any defaults that are missing
|
||||
/// from the config file.
|
||||
pub fn create_config(toml: &str) -> Result<LocalEnv> {
|
||||
let mut env: LocalEnv = toml::from_str(toml)?;
|
||||
|
||||
// Find postgres binaries.
|
||||
// Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "tmp_install".
|
||||
if env.pg_distrib_dir == Path::new("") {
|
||||
if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
|
||||
env.pg_distrib_dir = postgres_bin.into();
|
||||
} else {
|
||||
let cwd = env::current_dir()?;
|
||||
env.pg_distrib_dir = cwd.join("tmp_install")
|
||||
}
|
||||
}
|
||||
if !env.pg_distrib_dir.join("bin/postgres").exists() {
|
||||
anyhow::bail!(
|
||||
"Can't find postgres binary at {}",
|
||||
env.pg_distrib_dir.display()
|
||||
);
|
||||
}
|
||||
|
||||
// Find zenith binaries.
|
||||
if env.zenith_distrib_dir == Path::new("") {
|
||||
env.zenith_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
|
||||
}
|
||||
if !env.zenith_distrib_dir.join("pageserver").exists() {
|
||||
anyhow::bail!("Can't find pageserver binary.");
|
||||
}
|
||||
if !env.zenith_distrib_dir.join("safekeeper").exists() {
|
||||
anyhow::bail!("Can't find safekeeper binary.");
|
||||
}
|
||||
|
||||
// If no initial tenant ID was given, generate it.
|
||||
if env.default_tenantid.is_none() {
|
||||
env.default_tenantid = Some(ZTenantId::generate());
|
||||
}
|
||||
|
||||
env.base_data_dir = base_path();
|
||||
|
||||
Ok(env)
|
||||
}
|
||||
|
||||
/// Locate and load config
|
||||
pub fn load_config() -> Result<LocalEnv> {
|
||||
let repopath = base_path();
|
||||
|
||||
if !repopath.exists() {
|
||||
anyhow::bail!(
|
||||
"Zenith config is not found in {}. You need to run 'zenith init' first",
|
||||
repopath.to_str().unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: check that it looks like a zenith repository
|
||||
|
||||
// load and parse file
|
||||
let config = fs::read_to_string(repopath.join("config"))?;
|
||||
let mut env: LocalEnv = toml::from_str(config.as_str())?;
|
||||
|
||||
env.base_data_dir = repopath;
|
||||
|
||||
Ok(env)
|
||||
}
|
||||
|
||||
// this function is used only for testing purposes in CLI e g generate tokens during init
|
||||
pub fn generate_auth_token(&self, claims: &Claims) -> Result<String> {
|
||||
let private_key_path = if self.private_key_path.is_absolute() {
|
||||
self.private_key_path.to_path_buf()
|
||||
} else {
|
||||
self.base_data_dir.join(&self.private_key_path)
|
||||
};
|
||||
|
||||
let key_data = fs::read(private_key_path)?;
|
||||
encode_from_key_file(claims, &key_data)
|
||||
}
|
||||
|
||||
//
|
||||
// Initialize a new Zenith repository
|
||||
//
|
||||
pub fn init(&mut self) -> Result<()> {
|
||||
// check if config already exists
|
||||
let base_path = &self.base_data_dir;
|
||||
if base_path == Path::new("") {
|
||||
anyhow::bail!("repository base path is missing");
|
||||
}
|
||||
if base_path.exists() {
|
||||
anyhow::bail!(
|
||||
"directory '{}' already exists. Perhaps already initialized?",
|
||||
base_path.to_str().unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
fs::create_dir(&base_path)?;
|
||||
|
||||
// generate keys for jwt
|
||||
// openssl genrsa -out private_key.pem 2048
|
||||
let private_key_path;
|
||||
if self.private_key_path == PathBuf::new() {
|
||||
private_key_path = base_path.join("auth_private_key.pem");
|
||||
let keygen_output = Command::new("openssl")
|
||||
.arg("genrsa")
|
||||
.args(&["-out", private_key_path.to_str().unwrap()])
|
||||
.arg("2048")
|
||||
.stdout(Stdio::null())
|
||||
.output()
|
||||
.with_context(|| "failed to generate auth private key")?;
|
||||
if !keygen_output.status.success() {
|
||||
anyhow::bail!(
|
||||
"openssl failed: '{}'",
|
||||
String::from_utf8_lossy(&keygen_output.stderr)
|
||||
);
|
||||
}
|
||||
self.private_key_path = Path::new("auth_private_key.pem").to_path_buf();
|
||||
|
||||
let public_key_path = base_path.join("auth_public_key.pem");
|
||||
// openssl rsa -in private_key.pem -pubout -outform PEM -out public_key.pem
|
||||
let keygen_output = Command::new("openssl")
|
||||
.arg("rsa")
|
||||
.args(&["-in", private_key_path.to_str().unwrap()])
|
||||
.arg("-pubout")
|
||||
.args(&["-outform", "PEM"])
|
||||
.args(&["-out", public_key_path.to_str().unwrap()])
|
||||
.stdout(Stdio::null())
|
||||
.output()
|
||||
.with_context(|| "failed to generate auth private key")?;
|
||||
if !keygen_output.status.success() {
|
||||
anyhow::bail!(
|
||||
"openssl failed: '{}'",
|
||||
String::from_utf8_lossy(&keygen_output.stderr)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
self.pageserver.auth_token =
|
||||
self.generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;
|
||||
|
||||
fs::create_dir_all(self.pg_data_dirs_path())?;
|
||||
|
||||
for safekeeper in self.safekeepers.iter() {
|
||||
fs::create_dir_all(self.safekeeper_data_dir(&safekeeper.name))?;
|
||||
}
|
||||
|
||||
let mut conf_content = String::new();
|
||||
|
||||
// Currently, the user first passes a config file with 'zenith init --config=<path>'
|
||||
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
|
||||
// to .zenith/config. TODO: We lose any formatting and comments along the way, which is
|
||||
// a bit sad.
|
||||
write!(
|
||||
&mut conf_content,
|
||||
r#"# This file describes a locale deployment of the page server
|
||||
# and safekeeeper node. It is read by the 'zenith' command-line
|
||||
# utility.
|
||||
"#
|
||||
)?;
|
||||
|
||||
// Convert the LocalEnv to a toml file.
|
||||
//
|
||||
// This could be as simple as this:
|
||||
//
|
||||
// conf_content += &toml::to_string_pretty(env)?;
|
||||
//
|
||||
// But it results in a "values must be emitted before tables". I'm not sure
|
||||
// why, AFAICS the table, i.e. 'safekeepers: Vec<SafekeeperConf>' is last.
|
||||
// Maybe rust reorders the fields to squeeze avoid padding or something?
|
||||
// In any case, converting to toml::Value first, and serializing that, works.
|
||||
// See https://github.com/alexcrichton/toml-rs/issues/142
|
||||
conf_content += &toml::to_string_pretty(&toml::Value::try_from(&self)?)?;
|
||||
|
||||
fs::write(base_path.join("config"), conf_content)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn base_path() -> PathBuf {
|
||||
@@ -85,118 +326,29 @@ fn base_path() -> PathBuf {
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Initialize a new Zenith repository
|
||||
//
|
||||
pub fn init(
|
||||
pageserver_pg_port: u16,
|
||||
pageserver_http_port: u16,
|
||||
tenantid: ZTenantId,
|
||||
auth_type: AuthType,
|
||||
) -> Result<()> {
|
||||
// check if config already exists
|
||||
let base_path = base_path();
|
||||
if base_path.exists() {
|
||||
anyhow::bail!(
|
||||
"{} already exists. Perhaps already initialized?",
|
||||
base_path.to_str().unwrap()
|
||||
);
|
||||
/// Serde routines for Option<ZTenantId>. The serialized form is a hex string.
|
||||
mod opt_tenantid_serde {
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use std::str::FromStr;
|
||||
use zenith_utils::zid::ZTenantId;
|
||||
|
||||
pub fn serialize<S>(tenantid: &Option<ZTenantId>, ser: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
tenantid.map(|t| t.to_string()).serialize(ser)
|
||||
}
|
||||
fs::create_dir(&base_path)?;
|
||||
|
||||
// ok, now check that expected binaries are present
|
||||
|
||||
// Find postgres binaries. Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "tmp_install".
|
||||
let pg_distrib_dir: PathBuf = {
|
||||
if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
|
||||
postgres_bin.into()
|
||||
} else {
|
||||
let cwd = env::current_dir()?;
|
||||
cwd.join("tmp_install")
|
||||
pub fn deserialize<'de, D>(des: D) -> Result<Option<ZTenantId>, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let s: Option<String> = Option::deserialize(des)?;
|
||||
if let Some(s) = s {
|
||||
return Ok(Some(
|
||||
ZTenantId::from_str(&s).map_err(serde::de::Error::custom)?,
|
||||
));
|
||||
}
|
||||
};
|
||||
if !pg_distrib_dir.join("bin/postgres").exists() {
|
||||
anyhow::bail!("Can't find postgres binary at {:?}", pg_distrib_dir);
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
// generate keys for jwt
|
||||
// openssl genrsa -out private_key.pem 2048
|
||||
let private_key_path = base_path.join("auth_private_key.pem");
|
||||
let keygen_output = Command::new("openssl")
|
||||
.arg("genrsa")
|
||||
.args(&["-out", private_key_path.to_str().unwrap()])
|
||||
.arg("2048")
|
||||
.stdout(Stdio::null())
|
||||
.output()
|
||||
.with_context(|| "failed to generate auth private key")?;
|
||||
if !keygen_output.status.success() {
|
||||
anyhow::bail!(
|
||||
"openssl failed: '{}'",
|
||||
String::from_utf8_lossy(&keygen_output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
let public_key_path = base_path.join("auth_public_key.pem");
|
||||
// openssl rsa -in private_key.pem -pubout -outform PEM -out public_key.pem
|
||||
let keygen_output = Command::new("openssl")
|
||||
.arg("rsa")
|
||||
.args(&["-in", private_key_path.to_str().unwrap()])
|
||||
.arg("-pubout")
|
||||
.args(&["-outform", "PEM"])
|
||||
.args(&["-out", public_key_path.to_str().unwrap()])
|
||||
.stdout(Stdio::null())
|
||||
.output()
|
||||
.with_context(|| "failed to generate auth private key")?;
|
||||
if !keygen_output.status.success() {
|
||||
anyhow::bail!(
|
||||
"openssl failed: '{}'",
|
||||
String::from_utf8_lossy(&keygen_output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
let auth_token =
|
||||
encode_from_key_path(&Claims::new(None, Scope::PageServerApi), &private_key_path)?;
|
||||
|
||||
// Find zenith binaries.
|
||||
let zenith_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
|
||||
if !zenith_distrib_dir.join("pageserver").exists() {
|
||||
anyhow::bail!("Can't find pageserver binary.",);
|
||||
}
|
||||
|
||||
let conf = LocalEnv {
|
||||
pageserver_pg_port,
|
||||
pageserver_http_port,
|
||||
pg_distrib_dir,
|
||||
zenith_distrib_dir,
|
||||
base_data_dir: base_path,
|
||||
tenantid,
|
||||
auth_token,
|
||||
auth_type,
|
||||
private_key_path,
|
||||
};
|
||||
|
||||
fs::create_dir_all(conf.pg_data_dirs_path())?;
|
||||
|
||||
let toml = toml::to_string_pretty(&conf)?;
|
||||
fs::write(conf.base_data_dir.join("config"), toml)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Locate and load config
|
||||
pub fn load_config() -> Result<LocalEnv> {
|
||||
let repopath = base_path();
|
||||
|
||||
if !repopath.exists() {
|
||||
anyhow::bail!(
|
||||
"Zenith config is not found in {}. You need to run 'zenith init' first",
|
||||
repopath.to_str().unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: check that it looks like a zenith repository
|
||||
|
||||
// load and parse file
|
||||
let config = fs::read_to_string(repopath.join("config"))?;
|
||||
toml::from_str(config.as_str()).map_err(|e| e.into())
|
||||
}
|
||||
|
||||
@@ -83,6 +83,22 @@ impl PostgresConf {
|
||||
.with_context(|| format!("could not parse '{}' option {}", field_name, context))
|
||||
}
|
||||
|
||||
pub fn parse_field_optional<T>(&self, field_name: &str, context: &str) -> Result<Option<T>>
|
||||
where
|
||||
T: FromStr,
|
||||
<T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
|
||||
{
|
||||
if let Some(val) = self.get(field_name) {
|
||||
let result = val
|
||||
.parse::<T>()
|
||||
.with_context(|| format!("could not parse '{}' option {}", field_name, context))?;
|
||||
|
||||
Ok(Some(result))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Note: if you call this multiple times for the same option, the config
|
||||
/// file will a line for each call. It would be nice to have a function
|
||||
|
||||
277
control_plane/src/safekeeper.rs
Normal file
277
control_plane/src/safekeeper.rs
Normal file
@@ -0,0 +1,277 @@
|
||||
use std::io::Write;
|
||||
use std::net::TcpStream;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::{io, result, thread};
|
||||
|
||||
use anyhow::bail;
|
||||
use nix::errno::Errno;
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use postgres::Config;
|
||||
use reqwest::blocking::{Client, RequestBuilder, Response};
|
||||
use reqwest::{IntoUrl, Method};
|
||||
use thiserror::Error;
|
||||
use zenith_utils::http::error::HttpErrorBody;
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
|
||||
use crate::local_env::{LocalEnv, SafekeeperConf};
|
||||
use crate::read_pidfile;
|
||||
use crate::storage::PageServerNode;
|
||||
use zenith_utils::connstring::connection_address;
|
||||
use zenith_utils::connstring::connection_host_port;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SafekeeperHttpError {
|
||||
#[error("Reqwest error: {0}")]
|
||||
Transport(#[from] reqwest::Error),
|
||||
|
||||
#[error("Error: {0}")]
|
||||
Response(String),
|
||||
}
|
||||
|
||||
type Result<T> = result::Result<T, SafekeeperHttpError>;
|
||||
|
||||
pub trait ResponseErrorMessageExt: Sized {
|
||||
fn error_from_body(self) -> Result<Self>;
|
||||
}
|
||||
|
||||
impl ResponseErrorMessageExt for Response {
|
||||
fn error_from_body(self) -> Result<Self> {
|
||||
let status = self.status();
|
||||
if !(status.is_client_error() || status.is_server_error()) {
|
||||
return Ok(self);
|
||||
}
|
||||
|
||||
// reqwest do not export it's error construction utility functions, so lets craft the message ourselves
|
||||
let url = self.url().to_owned();
|
||||
Err(SafekeeperHttpError::Response(
|
||||
match self.json::<HttpErrorBody>() {
|
||||
Ok(err_body) => format!("Error: {}", err_body.msg),
|
||||
Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
|
||||
},
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Control routines for safekeeper.
|
||||
//
|
||||
// Used in CLI and tests.
|
||||
//
|
||||
#[derive(Debug)]
|
||||
pub struct SafekeeperNode {
|
||||
pub name: String,
|
||||
|
||||
pub conf: SafekeeperConf,
|
||||
|
||||
pub pg_connection_config: Config,
|
||||
pub env: LocalEnv,
|
||||
pub http_client: Client,
|
||||
pub http_base_url: String,
|
||||
|
||||
pub pageserver: Arc<PageServerNode>,
|
||||
}
|
||||
|
||||
impl SafekeeperNode {
|
||||
pub fn from_env(env: &LocalEnv, conf: &SafekeeperConf) -> SafekeeperNode {
|
||||
let pageserver = Arc::new(PageServerNode::from_env(env));
|
||||
|
||||
println!("initializing for {} for {}", conf.name, conf.http_port);
|
||||
|
||||
SafekeeperNode {
|
||||
name: conf.name.clone(),
|
||||
conf: conf.clone(),
|
||||
pg_connection_config: Self::safekeeper_connection_config(conf.pg_port),
|
||||
env: env.clone(),
|
||||
http_client: Client::new(),
|
||||
http_base_url: format!("http://localhost:{}/v1", conf.http_port),
|
||||
pageserver,
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct libpq connection string for connecting to this safekeeper.
|
||||
fn safekeeper_connection_config(port: u16) -> Config {
|
||||
// TODO safekeeper authentication not implemented yet
|
||||
format!("postgresql://no_user@localhost:{}/no_db", port)
|
||||
.parse()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn datadir_path(&self) -> PathBuf {
|
||||
self.env.safekeeper_data_dir(&self.name)
|
||||
}
|
||||
|
||||
pub fn pid_file(&self) -> PathBuf {
|
||||
self.datadir_path().join("safekeeper.pid")
|
||||
}
|
||||
|
||||
pub fn start(&self) -> anyhow::Result<()> {
|
||||
print!(
|
||||
"Starting safekeeper at '{}' in '{}'",
|
||||
connection_address(&self.pg_connection_config),
|
||||
self.datadir_path().display()
|
||||
);
|
||||
io::stdout().flush().unwrap();
|
||||
|
||||
// Configure connection to page server
|
||||
//
|
||||
// FIXME: We extract the host and port from the connection string instead of using
|
||||
// the connection string directly, because the 'safekeeper' binary expects
|
||||
// host:port format. That's a bit silly when we already have a full libpq connection
|
||||
// string at hand.
|
||||
let pageserver_conn = {
|
||||
let (host, port) = connection_host_port(&self.pageserver.pg_connection_config);
|
||||
format!("{}:{}", host, port)
|
||||
};
|
||||
|
||||
let listen_pg = format!("localhost:{}", self.conf.pg_port);
|
||||
let listen_http = format!("localhost:{}", self.conf.http_port);
|
||||
|
||||
let mut cmd: &mut Command = &mut Command::new(self.env.safekeeper_bin()?);
|
||||
cmd = cmd
|
||||
.args(&["-D", self.datadir_path().to_str().unwrap()])
|
||||
.args(&["--listen-pg", &listen_pg])
|
||||
.args(&["--listen-http", &listen_http])
|
||||
.args(&["--pageserver", &pageserver_conn])
|
||||
.args(&["--recall", "1 second"])
|
||||
.arg("--daemonize")
|
||||
.env_clear()
|
||||
.env("RUST_BACKTRACE", "1");
|
||||
if !self.conf.sync {
|
||||
cmd = cmd.arg("--no-sync");
|
||||
}
|
||||
|
||||
if self.env.pageserver.auth_type == AuthType::ZenithJWT {
|
||||
cmd.env("PAGESERVER_AUTH_TOKEN", &self.env.pageserver.auth_token);
|
||||
}
|
||||
|
||||
if !cmd.status()?.success() {
|
||||
bail!(
|
||||
"Safekeeper failed to start. See '{}' for details.",
|
||||
self.datadir_path().join("safekeeper.log").display()
|
||||
);
|
||||
}
|
||||
|
||||
// It takes a while for the safekeeper to start up. Wait until it is
|
||||
// open for business.
|
||||
const RETRIES: i8 = 15;
|
||||
for retries in 1..RETRIES {
|
||||
match self.check_status() {
|
||||
Ok(_) => {
|
||||
println!("\nSafekeeper started");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => {
|
||||
match err {
|
||||
SafekeeperHttpError::Transport(err) => {
|
||||
if err.is_connect() && retries < 5 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
} else {
|
||||
if retries == 5 {
|
||||
println!() // put a line break after dots for second message
|
||||
}
|
||||
println!(
|
||||
"Safekeeper not responding yet, err {} retrying ({})...",
|
||||
err, retries
|
||||
);
|
||||
}
|
||||
}
|
||||
SafekeeperHttpError::Response(msg) => {
|
||||
bail!("safekeeper failed to start: {} ", msg)
|
||||
}
|
||||
}
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
bail!("safekeeper failed to start in {} seconds", RETRIES);
|
||||
}
|
||||
|
||||
///
|
||||
/// Stop the server.
|
||||
///
|
||||
/// If 'immediate' is true, we use SIGQUIT, killing the process immediately.
|
||||
/// Otherwise we use SIGTERM, triggering a clean shutdown
|
||||
///
|
||||
/// If the server is not running, returns success
|
||||
///
|
||||
pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
|
||||
let pid_file = self.pid_file();
|
||||
if !pid_file.exists() {
|
||||
println!("Safekeeper {} is already stopped", self.name);
|
||||
return Ok(());
|
||||
}
|
||||
let pid = read_pidfile(&pid_file)?;
|
||||
let pid = Pid::from_raw(pid);
|
||||
|
||||
let sig = if immediate {
|
||||
println!("Stop safekeeper immediately");
|
||||
Signal::SIGQUIT
|
||||
} else {
|
||||
println!("Stop safekeeper gracefully");
|
||||
Signal::SIGTERM
|
||||
};
|
||||
match kill(pid, sig) {
|
||||
Ok(_) => (),
|
||||
Err(Errno::ESRCH) => {
|
||||
println!(
|
||||
"Safekeeper with pid {} does not exist, but a PID file was found",
|
||||
pid
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => bail!(
|
||||
"Failed to send signal to safekeeper with pid {}: {}",
|
||||
pid,
|
||||
err.desc()
|
||||
),
|
||||
}
|
||||
|
||||
let address = connection_address(&self.pg_connection_config);
|
||||
|
||||
// TODO Remove this "timeout" and handle it on caller side instead.
|
||||
// Shutting down may take a long time,
|
||||
// if safekeeper flushes a lot of data
|
||||
for _ in 0..100 {
|
||||
if let Err(_e) = TcpStream::connect(&address) {
|
||||
println!("Safekeeper stopped receiving connections");
|
||||
|
||||
//Now check status
|
||||
match self.check_status() {
|
||||
Ok(_) => {
|
||||
println!("Safekeeper status is OK. Wait a bit.");
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
Err(err) => {
|
||||
println!("Safekeeper status is: {}", err);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
println!("Safekeeper still receives connections");
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
|
||||
bail!("Failed to stop safekeeper with pid {}", pid);
|
||||
}
|
||||
|
||||
fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> RequestBuilder {
|
||||
// TODO: authentication
|
||||
//if self.env.auth_type == AuthType::ZenithJWT {
|
||||
// builder = builder.bearer_auth(&self.env.safekeeper_auth_token)
|
||||
//}
|
||||
self.http_client.request(method, url)
|
||||
}
|
||||
|
||||
pub fn check_status(&self) -> Result<()> {
|
||||
self.http_request(Method::GET, format!("{}/{}", self.http_base_url, "status"))
|
||||
.send()?
|
||||
.error_from_body()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@ use std::time::Duration;
|
||||
use std::{io, result, thread};
|
||||
|
||||
use anyhow::{anyhow, bail};
|
||||
use nix::errno::Errno;
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use pageserver::http::models::{BranchCreateRequest, TenantCreateRequest};
|
||||
@@ -20,6 +21,7 @@ use zenith_utils::zid::ZTenantId;
|
||||
use crate::local_env::LocalEnv;
|
||||
use crate::read_pidfile;
|
||||
use pageserver::branches::BranchInfo;
|
||||
use pageserver::tenant_mgr::TenantInfo;
|
||||
use zenith_utils::connstring::connection_address;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
@@ -62,7 +64,6 @@ impl ResponseErrorMessageExt for Response {
|
||||
//
|
||||
#[derive(Debug)]
|
||||
pub struct PageServerNode {
|
||||
pub kill_on_exit: bool,
|
||||
pub pg_connection_config: Config,
|
||||
pub env: LocalEnv,
|
||||
pub http_client: Client,
|
||||
@@ -71,34 +72,34 @@ pub struct PageServerNode {
|
||||
|
||||
impl PageServerNode {
|
||||
pub fn from_env(env: &LocalEnv) -> PageServerNode {
|
||||
let password = if env.auth_type == AuthType::ZenithJWT {
|
||||
&env.auth_token
|
||||
let password = if env.pageserver.auth_type == AuthType::ZenithJWT {
|
||||
&env.pageserver.auth_token
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
PageServerNode {
|
||||
kill_on_exit: false,
|
||||
pg_connection_config: Self::pageserver_connection_config(
|
||||
password,
|
||||
env.pageserver_pg_port,
|
||||
env.pageserver.pg_port,
|
||||
),
|
||||
env: env.clone(),
|
||||
http_client: Client::new(),
|
||||
http_base_url: format!("http://127.0.0.1:{}/v1", env.pageserver_http_port),
|
||||
http_base_url: format!("http://localhost:{}/v1", env.pageserver.http_port),
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct libpq connection string for connecting to the pageserver.
|
||||
fn pageserver_connection_config(password: &str, port: u16) -> Config {
|
||||
format!("postgresql://no_user:{}@127.0.0.1:{}/no_db", password, port)
|
||||
format!("postgresql://no_user:{}@localhost:{}/no_db", password, port)
|
||||
.parse()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn init(&self, create_tenant: Option<&str>, enable_auth: bool) -> anyhow::Result<()> {
|
||||
pub fn init(&self, create_tenant: Option<&str>) -> anyhow::Result<()> {
|
||||
let mut cmd = Command::new(self.env.pageserver_bin()?);
|
||||
let listen_pg = format!("127.0.0.1:{}", self.env.pageserver_pg_port);
|
||||
let listen_http = format!("127.0.0.1:{}", self.env.pageserver_http_port);
|
||||
let listen_pg = format!("localhost:{}", self.env.pageserver.pg_port);
|
||||
let listen_http = format!("localhost:{}", self.env.pageserver.http_port);
|
||||
let mut args = vec![
|
||||
"--init",
|
||||
"-D",
|
||||
@@ -111,10 +112,11 @@ impl PageServerNode {
|
||||
&listen_http,
|
||||
];
|
||||
|
||||
if enable_auth {
|
||||
let auth_type_str = &self.env.pageserver.auth_type.to_string();
|
||||
if self.env.pageserver.auth_type != AuthType::Trust {
|
||||
args.extend(&["--auth-validation-public-key-path", "auth_public_key.pem"]);
|
||||
args.extend(&["--auth-type", "ZenithJWT"]);
|
||||
}
|
||||
args.extend(&["--auth-type", auth_type_str]);
|
||||
|
||||
if let Some(tenantid) = create_tenant {
|
||||
args.extend(&["--create-tenant", tenantid])
|
||||
@@ -152,7 +154,7 @@ impl PageServerNode {
|
||||
|
||||
let mut cmd = Command::new(self.env.pageserver_bin()?);
|
||||
cmd.args(&["-D", self.repo_path().to_str().unwrap()])
|
||||
.arg("-d")
|
||||
.arg("--daemonize")
|
||||
.env_clear()
|
||||
.env("RUST_BACKTRACE", "1");
|
||||
|
||||
@@ -199,19 +201,43 @@ impl PageServerNode {
|
||||
bail!("pageserver failed to start in {} seconds", RETRIES);
|
||||
}
|
||||
|
||||
///
|
||||
/// Stop the server.
|
||||
///
|
||||
/// If 'immediate' is true, we use SIGQUIT, killing the process immediately.
|
||||
/// Otherwise we use SIGTERM, triggering a clean shutdown
|
||||
///
|
||||
/// If the server is not running, returns success
|
||||
///
|
||||
pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
|
||||
let pid = read_pidfile(&self.pid_file())?;
|
||||
let pid = Pid::from_raw(pid);
|
||||
if immediate {
|
||||
let pid_file = self.pid_file();
|
||||
if !pid_file.exists() {
|
||||
println!("Pageserver is already stopped");
|
||||
return Ok(());
|
||||
}
|
||||
let pid = Pid::from_raw(read_pidfile(&pid_file)?);
|
||||
|
||||
let sig = if immediate {
|
||||
println!("Stop pageserver immediately");
|
||||
if kill(pid, Signal::SIGQUIT).is_err() {
|
||||
bail!("Failed to kill pageserver with pid {}", pid);
|
||||
}
|
||||
Signal::SIGQUIT
|
||||
} else {
|
||||
println!("Stop pageserver gracefully");
|
||||
if kill(pid, Signal::SIGTERM).is_err() {
|
||||
bail!("Failed to stop pageserver with pid {}", pid);
|
||||
Signal::SIGTERM
|
||||
};
|
||||
match kill(pid, sig) {
|
||||
Ok(_) => (),
|
||||
Err(Errno::ESRCH) => {
|
||||
println!(
|
||||
"Pageserver with pid {} does not exist, but a PID file was found",
|
||||
pid
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => bail!(
|
||||
"Failed to send signal to pageserver with pid {}: {}",
|
||||
pid,
|
||||
err.desc()
|
||||
),
|
||||
}
|
||||
|
||||
let address = connection_address(&self.pg_connection_config);
|
||||
@@ -256,8 +282,8 @@ impl PageServerNode {
|
||||
|
||||
fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> RequestBuilder {
|
||||
let mut builder = self.http_client.request(method, url);
|
||||
if self.env.auth_type == AuthType::ZenithJWT {
|
||||
builder = builder.bearer_auth(&self.env.auth_token)
|
||||
if self.env.pageserver.auth_type == AuthType::ZenithJWT {
|
||||
builder = builder.bearer_auth(&self.env.pageserver.auth_token)
|
||||
}
|
||||
builder
|
||||
}
|
||||
@@ -269,7 +295,7 @@ impl PageServerNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn tenant_list(&self) -> Result<Vec<String>> {
|
||||
pub fn tenant_list(&self) -> Result<Vec<TenantInfo>> {
|
||||
Ok(self
|
||||
.http_request(Method::GET, format!("{}/{}", self.http_base_url, "tenant"))
|
||||
.send()?
|
||||
@@ -332,12 +358,3 @@ impl PageServerNode {
|
||||
.json()?)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PageServerNode {
|
||||
fn drop(&mut self) {
|
||||
// TODO Looks like this flag is never set
|
||||
if self.kill_on_exit {
|
||||
let _ = self.stop(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ lazy_static = "1.4.0"
|
||||
log = "0.4.14"
|
||||
clap = "2.33.0"
|
||||
daemonize = "0.4.1"
|
||||
tokio = { version = "1.11", features = ["process", "macros", "fs", "rt"] }
|
||||
tokio = { version = "1.11", features = ["process", "macros", "fs", "rt", "io-util"] }
|
||||
postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
@@ -37,6 +37,7 @@ async-trait = "0.1"
|
||||
const_format = "0.2.21"
|
||||
tracing = "0.1.27"
|
||||
signal-hook = {version = "0.3.10", features = ["extended-siginfo"] }
|
||||
url = "2"
|
||||
|
||||
postgres_ffi = { path = "../postgres_ffi" }
|
||||
zenith_metrics = { path = "../zenith_metrics" }
|
||||
@@ -45,3 +46,4 @@ workspace_hack = { path = "../workspace_hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
hex-literal = "0.3"
|
||||
tempfile = "3.2"
|
||||
|
||||
@@ -41,7 +41,7 @@ Legend:
|
||||
+--+
|
||||
|
||||
....
|
||||
. . Component that we will need, but doesn't exist at the moment. A TODO.
|
||||
. . Component at its early development phase.
|
||||
....
|
||||
|
||||
---> Data flow
|
||||
@@ -116,13 +116,49 @@ Remove old on-disk layer files that are no longer needed according to the
|
||||
PITR retention policy
|
||||
|
||||
|
||||
TODO: Backup service
|
||||
--------------------
|
||||
### Backup service
|
||||
|
||||
The backup service is responsible for periodically pushing the chunks to S3.
|
||||
The backup service, responsible for storing pageserver recovery data externally.
|
||||
|
||||
TODO: How/when do restore from S3? Whenever we get a GetPage@LSN request for
|
||||
a chunk we don't currently have? Or when an external Control Plane tells us?
|
||||
Currently, pageserver stores its files in a filesystem directory it's pointed to.
|
||||
That working directory could be rather ephemeral for such cases as "a pageserver pod running in k8s with no persistent volumes attached".
|
||||
Therefore, the server interacts with external, more reliable storage to back up and restore its state.
|
||||
|
||||
The code for storage support is extensible and can support arbitrary ones as long as they implement a certain Rust trait.
|
||||
There are the following implementations present:
|
||||
* local filesystem — to use in tests mainly
|
||||
* AWS S3 - to use in production
|
||||
|
||||
Implementation details are covered in the [storage readme](./src/relish_storage/README.md) and corresponding Rust file docs.
|
||||
|
||||
The backup service is disabled by default and can be enabled to interact with a single remote storage.
|
||||
|
||||
CLI examples:
|
||||
* Local FS: `${PAGESERVER_BIN} --relish-storage-local-path="/some/local/path/"`
|
||||
* AWS S3 : `${PAGESERVER_BIN} --relish-storage-s3-bucket="some-sample-bucket" --relish-storage-region="eu-north-1" --relish-storage-access-key="SOMEKEYAAAAASADSAH*#" --relish-storage-secret-access-key="SOMEsEcReTsd292v"`
|
||||
|
||||
For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.
|
||||
For local S3 installations, refer to the their documentation for name format and credentials.
|
||||
|
||||
Similar to other pageserver settings, toml config file can be used to configure either of the storages as backup backup targets.
|
||||
Required sections are:
|
||||
|
||||
```toml
|
||||
[relish_storage]
|
||||
local_path = '/Users/someonetoignore/Downloads/tmp_dir/'
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```toml
|
||||
[relish_storage]
|
||||
bucket_name = 'some-sample-bucket'
|
||||
bucket_region = 'eu-north-1'
|
||||
access_key_id = 'SOMEKEYAAAAASADSAH*#'
|
||||
secret_access_key = 'SOMEsEcReTsd292v'
|
||||
```
|
||||
|
||||
Also, `AWS_SECRET_ACCESS_KEY` and `AWS_ACCESS_KEY_ID` variables can be used to specify the credentials instead of any of the ways above.
|
||||
|
||||
TODO: Sharding
|
||||
--------------------
|
||||
@@ -13,6 +13,7 @@
|
||||
use anyhow::Result;
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use log::*;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::sync::Arc;
|
||||
@@ -83,7 +84,7 @@ impl<'a> Basebackup<'a> {
|
||||
|
||||
info!(
|
||||
"taking basebackup lsn={}, prev_lsn={}",
|
||||
backup_prev, backup_lsn
|
||||
backup_lsn, backup_prev
|
||||
);
|
||||
|
||||
Ok(Basebackup {
|
||||
@@ -248,13 +249,7 @@ impl<'a> Basebackup<'a> {
|
||||
let mut pg_control = ControlFileData::decode(&pg_control_bytes)?;
|
||||
let mut checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
|
||||
|
||||
// Generate new pg_control and WAL needed for bootstrap
|
||||
let checkpoint_segno = self.lsn.segment_number(pg_constants::WAL_SEGMENT_SIZE);
|
||||
let checkpoint_lsn = XLogSegNoOffsetToRecPtr(
|
||||
checkpoint_segno,
|
||||
XLOG_SIZE_OF_XLOG_LONG_PHD as u32,
|
||||
pg_constants::WAL_SEGMENT_SIZE,
|
||||
);
|
||||
// Generate new pg_control needed for bootstrap
|
||||
checkpoint.redo = normalize_lsn(self.lsn, pg_constants::WAL_SEGMENT_SIZE).0;
|
||||
|
||||
//reset some fields we don't want to preserve
|
||||
@@ -263,19 +258,24 @@ impl<'a> Basebackup<'a> {
|
||||
checkpoint.oldestActiveXid = 0;
|
||||
|
||||
//save new values in pg_control
|
||||
pg_control.checkPoint = checkpoint_lsn;
|
||||
pg_control.checkPoint = 0;
|
||||
pg_control.checkPointCopy = checkpoint;
|
||||
pg_control.state = pg_constants::DB_SHUTDOWNED;
|
||||
|
||||
// add zenith.signal file
|
||||
let xl_prev = if self.prev_record_lsn == Lsn(0) {
|
||||
0xBAD0 // magic value to indicate that we don't know prev_lsn
|
||||
let mut zenith_signal = String::new();
|
||||
if self.prev_record_lsn == Lsn(0) {
|
||||
if self.lsn == self.timeline.get_ancestor_lsn() {
|
||||
write!(zenith_signal, "PREV LSN: none")?;
|
||||
} else {
|
||||
write!(zenith_signal, "PREV LSN: invalid")?;
|
||||
}
|
||||
} else {
|
||||
self.prev_record_lsn.0
|
||||
};
|
||||
write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn)?;
|
||||
}
|
||||
self.ar.append(
|
||||
&new_tar_header("zenith.signal", 8)?,
|
||||
&xl_prev.to_le_bytes()[..],
|
||||
&new_tar_header("zenith.signal", zenith_signal.len() as u64)?,
|
||||
zenith_signal.as_bytes(),
|
||||
)?;
|
||||
|
||||
//send pg_control
|
||||
@@ -284,14 +284,15 @@ impl<'a> Basebackup<'a> {
|
||||
self.ar.append(&header, &pg_control_bytes[..])?;
|
||||
|
||||
//send wal segment
|
||||
let segno = self.lsn.segment_number(pg_constants::WAL_SEGMENT_SIZE);
|
||||
let wal_file_name = XLogFileName(
|
||||
1, // FIXME: always use Postgres timeline 1
|
||||
checkpoint_segno,
|
||||
segno,
|
||||
pg_constants::WAL_SEGMENT_SIZE,
|
||||
);
|
||||
let wal_file_path = format!("pg_wal/{}", wal_file_name);
|
||||
let header = new_tar_header(&wal_file_path, pg_constants::WAL_SEGMENT_SIZE as u64)?;
|
||||
let wal_seg = generate_wal_segment(&pg_control);
|
||||
let wal_seg = generate_wal_segment(segno, pg_control.system_identifier);
|
||||
assert!(wal_seg.len() == pg_constants::WAL_SEGMENT_SIZE);
|
||||
self.ar.append(&header, &wal_seg[..])?;
|
||||
Ok(())
|
||||
|
||||
@@ -2,17 +2,15 @@
|
||||
// Main entry point for the Page Server executable
|
||||
//
|
||||
|
||||
use pageserver::defaults::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
env,
|
||||
net::TcpListener,
|
||||
path::{Path, PathBuf},
|
||||
str::FromStr,
|
||||
thread,
|
||||
};
|
||||
use tracing::*;
|
||||
use zenith_utils::{auth::JwtAuth, logging, postgres_backend::AuthType};
|
||||
use zenith_utils::{auth::JwtAuth, logging, postgres_backend::AuthType, tcp_listener};
|
||||
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use signal_hook::consts::signal::*;
|
||||
@@ -28,13 +26,8 @@ use clap::{App, Arg, ArgMatches};
|
||||
use daemonize::Daemonize;
|
||||
|
||||
use pageserver::{
|
||||
branches,
|
||||
defaults::{
|
||||
DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_PG_LISTEN_ADDR,
|
||||
DEFAULT_RELISH_STORAGE_MAX_CONCURRENT_SYNC_LIMITS,
|
||||
},
|
||||
http, page_service, relish_storage, tenant_mgr, PageServerConf, RelishStorageConfig,
|
||||
RelishStorageKind, S3Config, LOG_FILE_NAME,
|
||||
branches, defaults::*, http, page_service, relish_storage, tenant_mgr, PageServerConf,
|
||||
RelishStorageConfig, RelishStorageKind, S3Config, LOG_FILE_NAME,
|
||||
};
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::postgres_backend;
|
||||
@@ -42,7 +35,7 @@ use zenith_utils::postgres_backend;
|
||||
use const_format::formatcp;
|
||||
|
||||
/// String arguments that can be declared via CLI or config file
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
struct CfgFileParams {
|
||||
listen_pg_addr: Option<String>,
|
||||
listen_http_addr: Option<String>,
|
||||
@@ -53,12 +46,21 @@ struct CfgFileParams {
|
||||
pg_distrib_dir: Option<String>,
|
||||
auth_validation_public_key_path: Option<String>,
|
||||
auth_type: Option<String>,
|
||||
// see https://github.com/alexcrichton/toml-rs/blob/6c162e6562c3e432bf04c82a3d1d789d80761a86/examples/enum_external.rs for enum deserialisation examples
|
||||
relish_storage: Option<RelishStorage>,
|
||||
relish_storage_max_concurrent_sync: Option<String>,
|
||||
/////////////////////////////////
|
||||
//// Don't put `Option<String>` and other "simple" values below.
|
||||
////
|
||||
/// `Option<RelishStorage>` is a <a href='https://toml.io/en/v1.0.0#table'>table</a> in TOML.
|
||||
/// Values in TOML cannot be defined after tables (other tables can),
|
||||
/// and [`toml`] crate serializes all fields in the order of their appearance.
|
||||
////////////////////////////////
|
||||
relish_storage: Option<RelishStorage>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
// Without this attribute, enums with values won't be serialized by the `toml` library (but can be deserialized nonetheless!).
|
||||
// See https://github.com/alexcrichton/toml-rs/blob/6c162e6562c3e432bf04c82a3d1d789d80761a86/examples/enum_external.rs for the examples
|
||||
#[serde(untagged)]
|
||||
enum RelishStorage {
|
||||
Local {
|
||||
local_path: String,
|
||||
@@ -477,13 +479,13 @@ fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
|
||||
"Starting pageserver http handler on {}",
|
||||
conf.listen_http_addr
|
||||
);
|
||||
let http_listener = TcpListener::bind(conf.listen_http_addr.clone())?;
|
||||
let http_listener = tcp_listener::bind(conf.listen_http_addr.clone())?;
|
||||
|
||||
info!(
|
||||
"Starting pageserver pg protocol handler on {}",
|
||||
conf.listen_pg_addr
|
||||
);
|
||||
let pageserver_listener = TcpListener::bind(conf.listen_pg_addr.clone())?;
|
||||
let pageserver_listener = tcp_listener::bind(conf.listen_pg_addr.clone())?;
|
||||
|
||||
if conf.daemonize {
|
||||
info!("daemonizing...");
|
||||
@@ -552,7 +554,7 @@ fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
|
||||
info!("Got SIGQUIT. Terminate pageserver in immediate shutdown mode");
|
||||
exit(111);
|
||||
}
|
||||
SIGTERM => {
|
||||
SIGINT | SIGTERM => {
|
||||
info!("Got SIGINT/SIGTERM. Terminate gracefully in fast shutdown mode");
|
||||
// Terminate postgres backends
|
||||
postgres_backend::set_pgbackend_shutdown_requested();
|
||||
@@ -577,11 +579,142 @@ fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
|
||||
info!("Pageserver shut down successfully completed");
|
||||
exit(0);
|
||||
}
|
||||
_ => {
|
||||
debug!("Unknown signal.");
|
||||
unknown_signal => {
|
||||
debug!("Unknown signal {}", unknown_signal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn page_server_conf_toml_serde() {
|
||||
let params = CfgFileParams {
|
||||
listen_pg_addr: Some("listen_pg_addr_VALUE".to_string()),
|
||||
listen_http_addr: Some("listen_http_addr_VALUE".to_string()),
|
||||
checkpoint_distance: Some("checkpoint_distance_VALUE".to_string()),
|
||||
checkpoint_period: Some("checkpoint_period_VALUE".to_string()),
|
||||
gc_horizon: Some("gc_horizon_VALUE".to_string()),
|
||||
gc_period: Some("gc_period_VALUE".to_string()),
|
||||
pg_distrib_dir: Some("pg_distrib_dir_VALUE".to_string()),
|
||||
auth_validation_public_key_path: Some(
|
||||
"auth_validation_public_key_path_VALUE".to_string(),
|
||||
),
|
||||
auth_type: Some("auth_type_VALUE".to_string()),
|
||||
relish_storage: Some(RelishStorage::Local {
|
||||
local_path: "relish_storage_local_VALUE".to_string(),
|
||||
}),
|
||||
relish_storage_max_concurrent_sync: Some(
|
||||
"relish_storage_max_concurrent_sync_VALUE".to_string(),
|
||||
),
|
||||
};
|
||||
|
||||
let toml_string = toml::to_string(¶ms).expect("Failed to serialize correct config");
|
||||
let toml_pretty_string =
|
||||
toml::to_string_pretty(¶ms).expect("Failed to serialize correct config");
|
||||
assert_eq!(
|
||||
r#"listen_pg_addr = 'listen_pg_addr_VALUE'
|
||||
listen_http_addr = 'listen_http_addr_VALUE'
|
||||
checkpoint_distance = 'checkpoint_distance_VALUE'
|
||||
checkpoint_period = 'checkpoint_period_VALUE'
|
||||
gc_horizon = 'gc_horizon_VALUE'
|
||||
gc_period = 'gc_period_VALUE'
|
||||
pg_distrib_dir = 'pg_distrib_dir_VALUE'
|
||||
auth_validation_public_key_path = 'auth_validation_public_key_path_VALUE'
|
||||
auth_type = 'auth_type_VALUE'
|
||||
relish_storage_max_concurrent_sync = 'relish_storage_max_concurrent_sync_VALUE'
|
||||
|
||||
[relish_storage]
|
||||
local_path = 'relish_storage_local_VALUE'
|
||||
"#,
|
||||
toml_pretty_string
|
||||
);
|
||||
|
||||
let params_from_serialized: CfgFileParams = toml::from_str(&toml_string)
|
||||
.expect("Failed to deserialize the serialization result of the config");
|
||||
let params_from_serialized_pretty: CfgFileParams = toml::from_str(&toml_pretty_string)
|
||||
.expect("Failed to deserialize the prettified serialization result of the config");
|
||||
assert!(
|
||||
params_from_serialized == params,
|
||||
"Expected the same config in the end of config -> serialize -> deserialize chain"
|
||||
);
|
||||
assert!(
|
||||
params_from_serialized_pretty == params,
|
||||
"Expected the same config in the end of config -> serialize pretty -> deserialize chain"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn credentials_omitted_during_serialization() {
|
||||
let params = CfgFileParams {
|
||||
listen_pg_addr: Some("listen_pg_addr_VALUE".to_string()),
|
||||
listen_http_addr: Some("listen_http_addr_VALUE".to_string()),
|
||||
checkpoint_distance: Some("checkpoint_distance_VALUE".to_string()),
|
||||
checkpoint_period: Some("checkpoint_period_VALUE".to_string()),
|
||||
gc_horizon: Some("gc_horizon_VALUE".to_string()),
|
||||
gc_period: Some("gc_period_VALUE".to_string()),
|
||||
pg_distrib_dir: Some("pg_distrib_dir_VALUE".to_string()),
|
||||
auth_validation_public_key_path: Some(
|
||||
"auth_validation_public_key_path_VALUE".to_string(),
|
||||
),
|
||||
auth_type: Some("auth_type_VALUE".to_string()),
|
||||
relish_storage: Some(RelishStorage::AwsS3 {
|
||||
bucket_name: "bucket_name_VALUE".to_string(),
|
||||
bucket_region: "bucket_region_VALUE".to_string(),
|
||||
access_key_id: Some("access_key_id_VALUE".to_string()),
|
||||
secret_access_key: Some("secret_access_key_VALUE".to_string()),
|
||||
}),
|
||||
relish_storage_max_concurrent_sync: Some(
|
||||
"relish_storage_max_concurrent_sync_VALUE".to_string(),
|
||||
),
|
||||
};
|
||||
|
||||
let toml_string = toml::to_string(¶ms).expect("Failed to serialize correct config");
|
||||
let toml_pretty_string =
|
||||
toml::to_string_pretty(¶ms).expect("Failed to serialize correct config");
|
||||
assert_eq!(
|
||||
r#"listen_pg_addr = 'listen_pg_addr_VALUE'
|
||||
listen_http_addr = 'listen_http_addr_VALUE'
|
||||
checkpoint_distance = 'checkpoint_distance_VALUE'
|
||||
checkpoint_period = 'checkpoint_period_VALUE'
|
||||
gc_horizon = 'gc_horizon_VALUE'
|
||||
gc_period = 'gc_period_VALUE'
|
||||
pg_distrib_dir = 'pg_distrib_dir_VALUE'
|
||||
auth_validation_public_key_path = 'auth_validation_public_key_path_VALUE'
|
||||
auth_type = 'auth_type_VALUE'
|
||||
relish_storage_max_concurrent_sync = 'relish_storage_max_concurrent_sync_VALUE'
|
||||
|
||||
[relish_storage]
|
||||
bucket_name = 'bucket_name_VALUE'
|
||||
bucket_region = 'bucket_region_VALUE'
|
||||
"#,
|
||||
toml_pretty_string
|
||||
);
|
||||
|
||||
let params_from_serialized: CfgFileParams = toml::from_str(&toml_string)
|
||||
.expect("Failed to deserialize the serialization result of the config");
|
||||
let params_from_serialized_pretty: CfgFileParams = toml::from_str(&toml_pretty_string)
|
||||
.expect("Failed to deserialize the prettified serialization result of the config");
|
||||
|
||||
let mut expected_params = params;
|
||||
expected_params.relish_storage = Some(RelishStorage::AwsS3 {
|
||||
bucket_name: "bucket_name_VALUE".to_string(),
|
||||
bucket_region: "bucket_region_VALUE".to_string(),
|
||||
access_key_id: None,
|
||||
secret_access_key: None,
|
||||
});
|
||||
assert!(
|
||||
params_from_serialized == expected_params,
|
||||
"Expected the config without credentials in the end of a 'config -> serialize -> deserialize' chain"
|
||||
);
|
||||
assert!(
|
||||
params_from_serialized_pretty == expected_params,
|
||||
"Expected the config without credentials in the end of a 'config -> serialize pretty -> deserialize' chain"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
// TODO: move all paths construction to conf impl
|
||||
//
|
||||
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use postgres_ffi::ControlFileData;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
@@ -23,6 +23,7 @@ use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
use crate::tenant_mgr;
|
||||
use crate::walredo::WalRedoManager;
|
||||
use crate::CheckpointConfig;
|
||||
use crate::{repository::Repository, PageServerConf};
|
||||
use crate::{restore_local_repo, LOG_FILE_NAME};
|
||||
|
||||
@@ -35,7 +36,7 @@ pub struct BranchInfo {
|
||||
pub ancestor_id: Option<String>,
|
||||
pub ancestor_lsn: Option<String>,
|
||||
pub current_logical_size: usize,
|
||||
pub current_logical_size_non_incremental: usize,
|
||||
pub current_logical_size_non_incremental: Option<usize>,
|
||||
}
|
||||
|
||||
impl BranchInfo {
|
||||
@@ -44,6 +45,7 @@ impl BranchInfo {
|
||||
conf: &PageServerConf,
|
||||
tenantid: &ZTenantId,
|
||||
repo: &Arc<dyn Repository>,
|
||||
include_non_incremental_logical_size: bool,
|
||||
) -> Result<Self> {
|
||||
let name = path
|
||||
.as_ref()
|
||||
@@ -78,6 +80,14 @@ impl BranchInfo {
|
||||
);
|
||||
}
|
||||
|
||||
// non incremental size calculation can be heavy, so let it be optional
|
||||
// needed for tests to check size calculation
|
||||
let current_logical_size_non_incremental = include_non_incremental_logical_size
|
||||
.then(|| {
|
||||
timeline.get_current_logical_size_non_incremental(timeline.get_last_record_lsn())
|
||||
})
|
||||
.transpose()?;
|
||||
|
||||
Ok(BranchInfo {
|
||||
name,
|
||||
timeline_id,
|
||||
@@ -85,8 +95,7 @@ impl BranchInfo {
|
||||
ancestor_id,
|
||||
ancestor_lsn,
|
||||
current_logical_size: timeline.get_current_logical_size(),
|
||||
current_logical_size_non_incremental: timeline
|
||||
.get_current_logical_size_non_incremental(timeline.get_last_record_lsn())?,
|
||||
current_logical_size_non_incremental,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -230,7 +239,7 @@ fn bootstrap_timeline(
|
||||
timeline.writer().as_ref(),
|
||||
lsn,
|
||||
)?;
|
||||
timeline.checkpoint()?;
|
||||
timeline.checkpoint(CheckpointConfig::Forced)?;
|
||||
|
||||
println!(
|
||||
"created initial timeline {} timeline.lsn {}",
|
||||
@@ -248,19 +257,11 @@ fn bootstrap_timeline(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_tenants(conf: &PageServerConf) -> Result<Vec<String>> {
|
||||
let tenants_dir = conf.tenants_path();
|
||||
|
||||
std::fs::read_dir(&tenants_dir)?
|
||||
.map(|dir_entry_res| {
|
||||
let dir_entry = dir_entry_res?;
|
||||
ensure!(dir_entry.file_type()?.is_dir());
|
||||
Ok(dir_entry.file_name().to_str().unwrap().to_owned())
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) fn get_branches(conf: &PageServerConf, tenantid: &ZTenantId) -> Result<Vec<BranchInfo>> {
|
||||
pub(crate) fn get_branches(
|
||||
conf: &PageServerConf,
|
||||
tenantid: &ZTenantId,
|
||||
include_non_incremental_logical_size: bool,
|
||||
) -> Result<Vec<BranchInfo>> {
|
||||
let repo = tenant_mgr::get_repository_for_tenant(*tenantid)?;
|
||||
|
||||
// Each branch has a corresponding record (text file) in the refs/branches
|
||||
@@ -270,7 +271,13 @@ pub(crate) fn get_branches(conf: &PageServerConf, tenantid: &ZTenantId) -> Resul
|
||||
std::fs::read_dir(&branches_dir)?
|
||||
.map(|dir_entry_res| {
|
||||
let dir_entry = dir_entry_res?;
|
||||
BranchInfo::from_path(dir_entry.path(), conf, tenantid, &repo)
|
||||
BranchInfo::from_path(
|
||||
dir_entry.path(),
|
||||
conf,
|
||||
tenantid,
|
||||
&repo,
|
||||
include_non_incremental_logical_size,
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
@@ -332,7 +339,7 @@ pub(crate) fn create_branch(
|
||||
ancestor_id: None,
|
||||
ancestor_lsn: None,
|
||||
current_logical_size: 0,
|
||||
current_logical_size_non_incremental: 0,
|
||||
current_logical_size_non_incremental: Some(0),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -25,6 +25,11 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: include-non-incremental-logical-size
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: Controls calculation of current_logical_size_non_incremental
|
||||
get:
|
||||
description: Get branches for tenant
|
||||
responses:
|
||||
@@ -73,6 +78,11 @@ paths:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: include-non-incremental-logical-size
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: Controls calculation of current_logical_size_non_incremental
|
||||
get:
|
||||
description: Get branches for tenant
|
||||
responses:
|
||||
@@ -164,13 +174,13 @@ paths:
|
||||
description: Get tenants list
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
description: TenantInfo
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
$ref: "#/components/schemas/TenantInfo"
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
@@ -243,6 +253,16 @@ components:
|
||||
scheme: bearer
|
||||
bearerFormat: JWT
|
||||
schemas:
|
||||
TenantInfo:
|
||||
type: object
|
||||
required:
|
||||
- id
|
||||
- state
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
state:
|
||||
type: string
|
||||
BranchInfo:
|
||||
type: object
|
||||
required:
|
||||
@@ -250,7 +270,6 @@ components:
|
||||
- timeline_id
|
||||
- latest_valid_lsn
|
||||
- current_logical_size
|
||||
- current_logical_size_non_incremental
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
|
||||
@@ -86,31 +86,59 @@ async fn branch_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
Ok(json_response(StatusCode::CREATED, response_data)?)
|
||||
}
|
||||
|
||||
// Gate non incremental logical size calculation behind a flag
|
||||
// after pgbench -i -s100 calculation took 28ms so if multiplied by the number of timelines
|
||||
// and tenants it can take noticeable amount of time. Also the value currently used only in tests
|
||||
fn get_include_non_incremental_logical_size(request: &Request<Body>) -> bool {
|
||||
request
|
||||
.uri()
|
||||
.query()
|
||||
.map(|v| {
|
||||
url::form_urlencoded::parse(v.as_bytes())
|
||||
.into_owned()
|
||||
.any(|(param, _)| param == "include-non-incremental-logical-size")
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
async fn branch_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenantid: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
|
||||
let include_non_incremental_logical_size = get_include_non_incremental_logical_size(&request);
|
||||
|
||||
check_permission(&request, Some(tenantid))?;
|
||||
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("branch_list", tenant = %tenantid).entered();
|
||||
crate::branches::get_branches(get_config(&request), &tenantid)
|
||||
crate::branches::get_branches(
|
||||
get_config(&request),
|
||||
&tenantid,
|
||||
include_non_incremental_logical_size,
|
||||
)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
Ok(json_response(StatusCode::OK, response_data)?)
|
||||
}
|
||||
|
||||
// TODO add to swagger
|
||||
async fn branch_detail_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenantid: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let branch_name: String = get_request_param(&request, "branch_name")?.to_string();
|
||||
let conf = get_state(&request).conf;
|
||||
let path = conf.branch_path(&branch_name, &tenantid);
|
||||
|
||||
let include_non_incremental_logical_size = get_include_non_incremental_logical_size(&request);
|
||||
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("branch_detail", tenant = %tenantid, branch=%branch_name).entered();
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
|
||||
BranchInfo::from_path(path, conf, &tenantid, &repo)
|
||||
BranchInfo::from_path(
|
||||
path,
|
||||
conf,
|
||||
&tenantid,
|
||||
&repo,
|
||||
include_non_incremental_logical_size,
|
||||
)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
@@ -124,7 +152,7 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_list").entered();
|
||||
crate::branches::get_tenants(get_config(&request))
|
||||
crate::tenant_mgr::list_tenants()
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
@@ -16,13 +16,11 @@ use bookfile::Book;
|
||||
use bytes::Bytes;
|
||||
use lazy_static::lazy_static;
|
||||
use postgres_ffi::pg_constants::BLCKSZ;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::*;
|
||||
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::convert::TryInto;
|
||||
use std::fs;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::Write;
|
||||
@@ -30,9 +28,9 @@ use std::ops::{Bound::Included, Deref};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, Mutex, MutexGuard};
|
||||
use std::thread::JoinHandle;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use self::metadata::{metadata_path, TimelineMetadata};
|
||||
use crate::relish::*;
|
||||
use crate::relish_storage::schedule_timeline_upload;
|
||||
use crate::repository::{GcResult, Repository, Timeline, TimelineWriter, WALRecord};
|
||||
@@ -40,6 +38,7 @@ use crate::tenant_mgr;
|
||||
use crate::walreceiver;
|
||||
use crate::walreceiver::IS_WAL_RECEIVER;
|
||||
use crate::walredo::WalRedoManager;
|
||||
use crate::CheckpointConfig;
|
||||
use crate::PageServerConf;
|
||||
use crate::{ZTenantId, ZTimelineId};
|
||||
|
||||
@@ -47,7 +46,6 @@ use zenith_metrics::{
|
||||
register_histogram, register_int_gauge_vec, Histogram, IntGauge, IntGaugeVec,
|
||||
};
|
||||
use zenith_metrics::{register_histogram_vec, HistogramVec};
|
||||
use zenith_utils::bin_ser::BeSer;
|
||||
use zenith_utils::crashsafe_dir;
|
||||
use zenith_utils::lsn::{AtomicLsn, Lsn, RecordLsn};
|
||||
use zenith_utils::seqwait::SeqWait;
|
||||
@@ -59,6 +57,7 @@ mod image_layer;
|
||||
mod inmemory_layer;
|
||||
mod interval_tree;
|
||||
mod layer_map;
|
||||
pub mod metadata;
|
||||
mod page_versions;
|
||||
mod storage_layer;
|
||||
|
||||
@@ -111,8 +110,9 @@ lazy_static! {
|
||||
.expect("failed to define a metric");
|
||||
}
|
||||
|
||||
/// The name of the metadata file pageserver creates per timeline.
|
||||
pub const METADATA_FILE_NAME: &str = "metadata";
|
||||
/// Parts of the `.zenith/tenants/<tenantid>/timelines/<timelineid>` directory prefix.
|
||||
pub const TENANTS_SEGMENT_NAME: &str = "tenants";
|
||||
pub const TIMELINES_SEGMENT_NAME: &str = "timelines";
|
||||
|
||||
///
|
||||
/// Repository consists of multiple timelines. Keep them in a hash table.
|
||||
@@ -142,12 +142,7 @@ impl Repository for LayeredRepository {
|
||||
// Create the timeline directory, and write initial metadata to file.
|
||||
crashsafe_dir::create_dir_all(self.conf.timeline_path(&timelineid, &self.tenantid))?;
|
||||
|
||||
let metadata = TimelineMetadata {
|
||||
disk_consistent_lsn: Lsn(0),
|
||||
prev_record_lsn: None,
|
||||
ancestor_timeline: None,
|
||||
ancestor_lsn: Lsn(0),
|
||||
};
|
||||
let metadata = TimelineMetadata::new(Lsn(0), None, None, Lsn(0));
|
||||
Self::save_metadata(self.conf, timelineid, self.tenantid, &metadata, true)?;
|
||||
|
||||
let timeline = LayeredTimeline::new(
|
||||
@@ -186,12 +181,7 @@ impl Repository for LayeredRepository {
|
||||
// Create the metadata file, noting the ancestor of the new timeline.
|
||||
// There is initially no data in it, but all the read-calls know to look
|
||||
// into the ancestor.
|
||||
let metadata = TimelineMetadata {
|
||||
disk_consistent_lsn: start_lsn,
|
||||
prev_record_lsn: dst_prev,
|
||||
ancestor_timeline: Some(src),
|
||||
ancestor_lsn: start_lsn,
|
||||
};
|
||||
let metadata = TimelineMetadata::new(start_lsn, dst_prev, Some(src), start_lsn);
|
||||
crashsafe_dir::create_dir_all(self.conf.timeline_path(&dst, &self.tenantid))?;
|
||||
Self::save_metadata(self.conf, dst, self.tenantid, &metadata, true)?;
|
||||
|
||||
@@ -216,6 +206,22 @@ impl Repository for LayeredRepository {
|
||||
})
|
||||
}
|
||||
|
||||
fn checkpoint_iteration(&self, cconf: CheckpointConfig) -> Result<()> {
|
||||
{
|
||||
let timelines = self.timelines.lock().unwrap();
|
||||
|
||||
for (timelineid, timeline) in timelines.iter() {
|
||||
let _entered =
|
||||
info_span!("checkpoint", timeline = %timelineid, tenant = %self.tenantid)
|
||||
.entered();
|
||||
|
||||
timeline.checkpoint(cconf)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Wait for all threads to complete and persist repository data before pageserver shutdown.
|
||||
fn shutdown(&self) -> Result<()> {
|
||||
trace!("LayeredRepository shutdown for tenant {}", self.tenantid);
|
||||
@@ -225,7 +231,7 @@ impl Repository for LayeredRepository {
|
||||
walreceiver::stop_wal_receiver(*timelineid);
|
||||
// Wait for syncing data to disk
|
||||
trace!("repo shutdown. checkpoint timeline {}", timelineid);
|
||||
timeline.checkpoint()?;
|
||||
timeline.checkpoint(CheckpointConfig::Forced)?;
|
||||
|
||||
//TODO Wait for walredo process to shutdown too
|
||||
}
|
||||
@@ -247,14 +253,14 @@ impl LayeredRepository {
|
||||
Some(timeline) => Ok(timeline.clone()),
|
||||
None => {
|
||||
let metadata = Self::load_metadata(self.conf, timelineid, self.tenantid)?;
|
||||
let disk_consistent_lsn = metadata.disk_consistent_lsn;
|
||||
let disk_consistent_lsn = metadata.disk_consistent_lsn();
|
||||
|
||||
// Recurse to look up the ancestor timeline.
|
||||
//
|
||||
// TODO: If you have a very deep timeline history, this could become
|
||||
// expensive. Perhaps delay this until we need to look up a page in
|
||||
// ancestor.
|
||||
let ancestor = if let Some(ancestor_timelineid) = metadata.ancestor_timeline {
|
||||
let ancestor = if let Some(ancestor_timelineid) = metadata.ancestor_timeline() {
|
||||
Some(self.get_timeline_locked(ancestor_timelineid, timelines)?)
|
||||
} else {
|
||||
None
|
||||
@@ -266,7 +272,7 @@ impl LayeredRepository {
|
||||
|
||||
let mut timeline = LayeredTimeline::new(
|
||||
self.conf,
|
||||
metadata,
|
||||
metadata.clone(),
|
||||
ancestor,
|
||||
timelineid,
|
||||
self.tenantid,
|
||||
@@ -276,15 +282,9 @@ impl LayeredRepository {
|
||||
)?;
|
||||
|
||||
// List the layers on disk, and load them into the layer map
|
||||
let _loaded_layers = timeline.load_layer_map(disk_consistent_lsn)?;
|
||||
let loaded_layers = timeline.load_layer_map(disk_consistent_lsn)?;
|
||||
if self.upload_relishes {
|
||||
schedule_timeline_upload(());
|
||||
// schedule_timeline_upload(
|
||||
// self.tenantid,
|
||||
// timelineid,
|
||||
// loaded_layers,
|
||||
// disk_consistent_lsn,
|
||||
// );
|
||||
schedule_timeline_upload(self.tenantid, timelineid, loaded_layers, metadata);
|
||||
}
|
||||
|
||||
// needs to be after load_layer_map
|
||||
@@ -312,90 +312,6 @@ impl LayeredRepository {
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Launch the checkpointer thread in given repository.
|
||||
///
|
||||
pub fn launch_checkpointer_thread(
|
||||
conf: &'static PageServerConf,
|
||||
rc: Arc<LayeredRepository>,
|
||||
) -> JoinHandle<()> {
|
||||
std::thread::Builder::new()
|
||||
.name("Checkpointer thread".into())
|
||||
.spawn(move || {
|
||||
// FIXME: relaunch it? Panic is not good.
|
||||
rc.checkpoint_loop(conf).expect("Checkpointer thread died");
|
||||
})
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
///
|
||||
/// Checkpointer thread's main loop
|
||||
///
|
||||
fn checkpoint_loop(&self, conf: &'static PageServerConf) -> Result<()> {
|
||||
while !tenant_mgr::shutdown_requested() {
|
||||
std::thread::sleep(conf.checkpoint_period);
|
||||
info!("checkpointer thread for tenant {} waking up", self.tenantid);
|
||||
|
||||
// checkpoint timelines that have accumulated more than CHECKPOINT_DISTANCE
|
||||
// bytes of WAL since last checkpoint.
|
||||
{
|
||||
let timelines = self.timelines.lock().unwrap();
|
||||
for (timelineid, timeline) in timelines.iter() {
|
||||
let _entered =
|
||||
info_span!("checkpoint", timeline = %timelineid, tenant = %self.tenantid)
|
||||
.entered();
|
||||
|
||||
STORAGE_TIME
|
||||
.with_label_values(&["checkpoint_timed"])
|
||||
.observe_closure_duration(|| {
|
||||
timeline.checkpoint_internal(conf.checkpoint_distance, false)
|
||||
})?
|
||||
}
|
||||
// release lock on 'timelines'
|
||||
}
|
||||
}
|
||||
trace!("Checkpointer thread shut down");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Launch the GC thread in given repository.
|
||||
///
|
||||
pub fn launch_gc_thread(
|
||||
conf: &'static PageServerConf,
|
||||
rc: Arc<LayeredRepository>,
|
||||
) -> JoinHandle<()> {
|
||||
std::thread::Builder::new()
|
||||
.name("GC thread".into())
|
||||
.spawn(move || {
|
||||
// FIXME: relaunch it? Panic is not good.
|
||||
rc.gc_loop(conf).expect("GC thread died");
|
||||
})
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
///
|
||||
/// GC thread's main loop
|
||||
///
|
||||
fn gc_loop(&self, conf: &'static PageServerConf) -> Result<()> {
|
||||
while !tenant_mgr::shutdown_requested() {
|
||||
// Garbage collect old files that are not needed for PITR anymore
|
||||
if conf.gc_horizon > 0 {
|
||||
self.gc_iteration(None, conf.gc_horizon, false).unwrap();
|
||||
}
|
||||
|
||||
// TODO Write it in more adequate way using
|
||||
// condvar.wait_timeout() or something
|
||||
let mut sleep_time = conf.gc_period.as_secs();
|
||||
while sleep_time > 0 && !tenant_mgr::shutdown_requested() {
|
||||
sleep_time -= 1;
|
||||
std::thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
info!("gc thread for tenant {} waking up", self.tenantid);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Save timeline metadata to file
|
||||
fn save_metadata(
|
||||
conf: &'static PageServerConf,
|
||||
@@ -412,13 +328,7 @@ impl LayeredRepository {
|
||||
.create_new(first_save)
|
||||
.open(&path)?;
|
||||
|
||||
let mut metadata_bytes = TimelineMetadata::ser(data)?;
|
||||
|
||||
assert!(metadata_bytes.len() <= METADATA_MAX_DATA_SIZE);
|
||||
metadata_bytes.resize(METADATA_MAX_SAFE_SIZE, 0u8);
|
||||
|
||||
let checksum = crc32c::crc32c(&metadata_bytes[..METADATA_MAX_DATA_SIZE]);
|
||||
metadata_bytes[METADATA_MAX_DATA_SIZE..].copy_from_slice(&u32::to_le_bytes(checksum));
|
||||
let metadata_bytes = data.to_bytes().context("Failed to get metadata bytes")?;
|
||||
|
||||
if file.write(&metadata_bytes)? != metadata_bytes.len() {
|
||||
bail!("Could not write all the metadata bytes in a single call");
|
||||
@@ -445,20 +355,7 @@ impl LayeredRepository {
|
||||
) -> Result<TimelineMetadata> {
|
||||
let path = metadata_path(conf, timelineid, tenantid);
|
||||
let metadata_bytes = std::fs::read(&path)?;
|
||||
ensure!(metadata_bytes.len() == METADATA_MAX_SAFE_SIZE);
|
||||
|
||||
let data = &metadata_bytes[..METADATA_MAX_DATA_SIZE];
|
||||
let calculated_checksum = crc32c::crc32c(data);
|
||||
|
||||
let checksum_bytes: &[u8; METADATA_CHECKSUM_SIZE] =
|
||||
metadata_bytes[METADATA_MAX_DATA_SIZE..].try_into()?;
|
||||
let expected_checksum = u32::from_le_bytes(*checksum_bytes);
|
||||
ensure!(calculated_checksum == expected_checksum);
|
||||
|
||||
let data = TimelineMetadata::des_prefix(data)?;
|
||||
assert!(data.disk_consistent_lsn.is_aligned());
|
||||
|
||||
Ok(data)
|
||||
TimelineMetadata::from_bytes(&metadata_bytes)
|
||||
}
|
||||
|
||||
//
|
||||
@@ -568,7 +465,7 @@ impl LayeredRepository {
|
||||
// so that they too can be garbage collected. That's
|
||||
// used in tests, so we want as deterministic results as possible.
|
||||
if checkpoint_before_gc {
|
||||
timeline.checkpoint()?;
|
||||
timeline.checkpoint(CheckpointConfig::Forced)?;
|
||||
info!("timeline {} checkpoint_before_gc done", timelineid);
|
||||
}
|
||||
|
||||
@@ -583,29 +480,6 @@ impl LayeredRepository {
|
||||
}
|
||||
}
|
||||
|
||||
/// Metadata stored on disk for each timeline
|
||||
///
|
||||
/// The fields correspond to the values we hold in memory, in LayeredTimeline.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TimelineMetadata {
|
||||
disk_consistent_lsn: Lsn,
|
||||
|
||||
// This is only set if we know it. We track it in memory when the page
|
||||
// server is running, but we only track the value corresponding to
|
||||
// 'last_record_lsn', not 'disk_consistent_lsn' which can lag behind by a
|
||||
// lot. We only store it in the metadata file when we flush *all* the
|
||||
// in-memory data so that 'last_record_lsn' is the same as
|
||||
// 'disk_consistent_lsn'. That's OK, because after page server restart, as
|
||||
// soon as we reprocess at least one record, we will have a valid
|
||||
// 'prev_record_lsn' value in memory again. This is only really needed when
|
||||
// doing a clean shutdown, so that there is no more WAL beyond
|
||||
// 'disk_consistent_lsn'
|
||||
prev_record_lsn: Option<Lsn>,
|
||||
|
||||
ancestor_timeline: Option<ZTimelineId>,
|
||||
ancestor_lsn: Lsn,
|
||||
}
|
||||
|
||||
pub struct LayeredTimeline {
|
||||
conf: &'static PageServerConf,
|
||||
|
||||
@@ -678,6 +552,10 @@ pub struct LayeredTimeline {
|
||||
|
||||
/// Public interface functions
|
||||
impl Timeline for LayeredTimeline {
|
||||
fn get_ancestor_lsn(&self) -> Lsn {
|
||||
self.ancestor_lsn
|
||||
}
|
||||
|
||||
/// Wait until WAL has been received up to the given LSN.
|
||||
fn wait_lsn(&self, lsn: Lsn) -> Result<()> {
|
||||
// This should never be called from the WAL receiver thread, because that could lead
|
||||
@@ -691,8 +569,8 @@ impl Timeline for LayeredTimeline {
|
||||
.wait_for_timeout(lsn, TIMEOUT)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Timed out while waiting for WAL record at LSN {} to arrive",
|
||||
lsn
|
||||
"Timed out while waiting for WAL record at LSN {} to arrive, disk consistent LSN={}",
|
||||
lsn, self.get_disk_consistent_lsn()
|
||||
)
|
||||
})?;
|
||||
|
||||
@@ -848,11 +726,15 @@ impl Timeline for LayeredTimeline {
|
||||
/// Public entry point for checkpoint(). All the logic is in the private
|
||||
/// checkpoint_internal function, this public facade just wraps it for
|
||||
/// metrics collection.
|
||||
fn checkpoint(&self) -> Result<()> {
|
||||
STORAGE_TIME
|
||||
.with_label_values(&["checkpoint_force"])
|
||||
//pass checkpoint_distance=0 to force checkpoint
|
||||
.observe_closure_duration(|| self.checkpoint_internal(0, true))
|
||||
fn checkpoint(&self, cconf: CheckpointConfig) -> Result<()> {
|
||||
match cconf {
|
||||
CheckpointConfig::Forced => STORAGE_TIME
|
||||
.with_label_values(&["forced checkpoint"])
|
||||
.observe_closure_duration(|| self.checkpoint_internal(0)),
|
||||
CheckpointConfig::Distance(distance) => STORAGE_TIME
|
||||
.with_label_values(&["checkpoint"])
|
||||
.observe_closure_duration(|| self.checkpoint_internal(distance)),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_last_record_lsn(&self) -> Lsn {
|
||||
@@ -906,6 +788,10 @@ impl Timeline for LayeredTimeline {
|
||||
Ok(total_blocks * BLCKSZ as usize)
|
||||
}
|
||||
|
||||
fn get_disk_consistent_lsn(&self) -> Lsn {
|
||||
self.disk_consistent_lsn.load()
|
||||
}
|
||||
|
||||
fn writer<'a>(&'a self) -> Box<dyn TimelineWriter + 'a> {
|
||||
Box::new(LayeredTimelineWriter {
|
||||
tl: self,
|
||||
@@ -942,13 +828,13 @@ impl LayeredTimeline {
|
||||
|
||||
// initialize in-memory 'last_record_lsn' from 'disk_consistent_lsn'.
|
||||
last_record_lsn: SeqWait::new(RecordLsn {
|
||||
last: metadata.disk_consistent_lsn,
|
||||
prev: metadata.prev_record_lsn.unwrap_or(Lsn(0)),
|
||||
last: metadata.disk_consistent_lsn(),
|
||||
prev: metadata.prev_record_lsn().unwrap_or(Lsn(0)),
|
||||
}),
|
||||
disk_consistent_lsn: AtomicLsn::new(metadata.disk_consistent_lsn.0),
|
||||
disk_consistent_lsn: AtomicLsn::new(metadata.disk_consistent_lsn().0),
|
||||
|
||||
ancestor_timeline: ancestor,
|
||||
ancestor_lsn: metadata.ancestor_lsn,
|
||||
ancestor_lsn: metadata.ancestor_lsn(),
|
||||
current_logical_size: AtomicUsize::new(current_logical_size),
|
||||
current_logical_size_gauge,
|
||||
upload_relishes,
|
||||
@@ -1227,7 +1113,7 @@ impl LayeredTimeline {
|
||||
/// Flush to disk all data that was written with the put_* functions
|
||||
///
|
||||
/// NOTE: This has nothing to do with checkpoint in PostgreSQL.
|
||||
fn checkpoint_internal(&self, checkpoint_distance: u64, forced: bool) -> Result<()> {
|
||||
fn checkpoint_internal(&self, checkpoint_distance: u64) -> Result<()> {
|
||||
let mut write_guard = self.write_lock.lock().unwrap();
|
||||
let mut layers = self.layers.lock().unwrap();
|
||||
|
||||
@@ -1258,10 +1144,6 @@ impl LayeredTimeline {
|
||||
while let Some((oldest_layer, oldest_generation)) = layers.peek_oldest_open() {
|
||||
let oldest_pending_lsn = oldest_layer.get_oldest_pending_lsn();
|
||||
|
||||
if tenant_mgr::shutdown_requested() && !forced {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Does this layer need freezing?
|
||||
//
|
||||
// Write out all in-memory layers that contain WAL older than CHECKPOINT_DISTANCE.
|
||||
@@ -1340,50 +1222,48 @@ impl LayeredTimeline {
|
||||
timeline_dir.sync_all()?;
|
||||
}
|
||||
|
||||
// Save the metadata, with updated 'disk_consistent_lsn', to a
|
||||
// file in the timeline dir. After crash, we will restart WAL
|
||||
// streaming and processing from that point.
|
||||
// If we were able to advance 'disk_consistent_lsn', save it the metadata file.
|
||||
// After crash, we will restart WAL streaming and processing from that point.
|
||||
let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
|
||||
if disk_consistent_lsn != old_disk_consistent_lsn {
|
||||
assert!(disk_consistent_lsn > old_disk_consistent_lsn);
|
||||
|
||||
// We can only save a valid 'prev_record_lsn' value on disk if we
|
||||
// flushed *all* in-memory changes to disk. We only track
|
||||
// 'prev_record_lsn' in memory for the latest processed record, so we
|
||||
// don't remember what the correct value that corresponds to some old
|
||||
// LSN is. But if we flush everything, then the value corresponding
|
||||
// current 'last_record_lsn' is correct and we can store it on disk.
|
||||
let ondisk_prev_record_lsn = if disk_consistent_lsn == last_record_lsn {
|
||||
Some(prev_record_lsn)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
// We can only save a valid 'prev_record_lsn' value on disk if we
|
||||
// flushed *all* in-memory changes to disk. We only track
|
||||
// 'prev_record_lsn' in memory for the latest processed record, so we
|
||||
// don't remember what the correct value that corresponds to some old
|
||||
// LSN is. But if we flush everything, then the value corresponding
|
||||
// current 'last_record_lsn' is correct and we can store it on disk.
|
||||
let ondisk_prev_record_lsn = if disk_consistent_lsn == last_record_lsn {
|
||||
Some(prev_record_lsn)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let ancestor_timelineid = self.ancestor_timeline.as_ref().map(|x| x.timelineid);
|
||||
let ancestor_timelineid = self.ancestor_timeline.as_ref().map(|x| x.timelineid);
|
||||
|
||||
let metadata = TimelineMetadata {
|
||||
disk_consistent_lsn,
|
||||
prev_record_lsn: ondisk_prev_record_lsn,
|
||||
ancestor_timeline: ancestor_timelineid,
|
||||
ancestor_lsn: self.ancestor_lsn,
|
||||
};
|
||||
LayeredRepository::save_metadata(
|
||||
self.conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
&metadata,
|
||||
false,
|
||||
)?;
|
||||
if self.upload_relishes {
|
||||
schedule_timeline_upload(())
|
||||
// schedule_timeline_upload(
|
||||
// self.tenantid,
|
||||
// self.timelineid,
|
||||
// layer_uploads,
|
||||
// disk_consistent_lsn,
|
||||
// });
|
||||
let metadata = TimelineMetadata::new(
|
||||
disk_consistent_lsn,
|
||||
ondisk_prev_record_lsn,
|
||||
ancestor_timelineid,
|
||||
self.ancestor_lsn,
|
||||
);
|
||||
|
||||
LayeredRepository::save_metadata(
|
||||
self.conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
&metadata,
|
||||
false,
|
||||
)?;
|
||||
if self.upload_relishes {
|
||||
schedule_timeline_upload(self.tenantid, self.timelineid, layer_uploads, metadata);
|
||||
}
|
||||
|
||||
// Also update the in-memory copy
|
||||
self.disk_consistent_lsn.store(disk_consistent_lsn);
|
||||
}
|
||||
|
||||
// Also update the in-memory copy
|
||||
self.disk_consistent_lsn.store(disk_consistent_lsn);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1935,15 +1815,6 @@ pub fn dump_layerfile_from_path(path: &Path) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn metadata_path(
|
||||
conf: &'static PageServerConf,
|
||||
timelineid: ZTimelineId,
|
||||
tenantid: ZTenantId,
|
||||
) -> PathBuf {
|
||||
conf.timeline_path(&timelineid, &tenantid)
|
||||
.join(METADATA_FILE_NAME)
|
||||
}
|
||||
|
||||
/// Add a suffix to a layer file's name: .{num}.old
|
||||
/// Uses the first available num (starts at 0)
|
||||
fn rename_to_backup(path: PathBuf) -> anyhow::Result<()> {
|
||||
|
||||
@@ -442,12 +442,7 @@ impl DeltaLayer {
|
||||
}
|
||||
|
||||
fn open_book(&self) -> Result<(PathBuf, Book<File>)> {
|
||||
let path = Self::path_for(
|
||||
&self.path_or_conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
&self.layer_name(),
|
||||
);
|
||||
let path = self.path();
|
||||
|
||||
let file = File::open(&path)?;
|
||||
let book = Book::new(file)?;
|
||||
|
||||
@@ -13,7 +13,7 @@ use anyhow::Result;
|
||||
use log::*;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
|
||||
use super::METADATA_FILE_NAME;
|
||||
use super::metadata::METADATA_FILE_NAME;
|
||||
|
||||
// Note: LayeredTimeline::load_layer_map() relies on this sort order
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
|
||||
@@ -17,6 +17,7 @@ use anyhow::Result;
|
||||
use lazy_static::lazy_static;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{BinaryHeap, HashMap};
|
||||
use std::sync::atomic::{self, AtomicU64};
|
||||
use std::sync::Arc;
|
||||
use zenith_metrics::{register_int_gauge, IntGauge};
|
||||
use zenith_utils::lsn::Lsn;
|
||||
@@ -30,6 +31,17 @@ lazy_static! {
|
||||
.expect("failed to define a metric");
|
||||
}
|
||||
|
||||
static NEXT_LAYER_ID: AtomicU64 = AtomicU64::new(0);
|
||||
|
||||
#[derive(PartialEq, Eq, Hash, Clone, Copy)]
|
||||
pub struct LayerId(u64);
|
||||
|
||||
impl LayerId {
|
||||
fn next() -> LayerId {
|
||||
Self(NEXT_LAYER_ID.fetch_add(1, atomic::Ordering::Relaxed))
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// LayerMap tracks what layers exist on a timeline.
|
||||
///
|
||||
@@ -43,6 +55,8 @@ pub struct LayerMap {
|
||||
/// contains the oldest WAL record.
|
||||
open_layers: BinaryHeap<OpenLayerEntry>,
|
||||
|
||||
open_layers_by_id: HashMap<LayerId, Arc<InMemoryLayer>>,
|
||||
|
||||
/// Generation number, used to distinguish newly inserted entries in the
|
||||
/// binary heap from older entries during checkpoint.
|
||||
current_generation: u64,
|
||||
@@ -71,10 +85,15 @@ impl LayerMap {
|
||||
segentry.open.as_ref().map(Arc::clone)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn get_open_by_id(&self, layer_id: &LayerId) -> Option<Arc<InMemoryLayer>> {
|
||||
self.open_layers_by_id.get(layer_id).cloned()
|
||||
}
|
||||
|
||||
///
|
||||
/// Insert an open in-memory layer
|
||||
///
|
||||
pub fn insert_open(&mut self, layer: Arc<InMemoryLayer>) {
|
||||
pub fn insert_open(&mut self, layer: Arc<InMemoryLayer>) -> LayerId {
|
||||
let segentry = self.segs.entry(layer.get_seg_tag()).or_default();
|
||||
|
||||
segentry.update_open(Arc::clone(&layer));
|
||||
@@ -86,15 +105,23 @@ impl LayerMap {
|
||||
// in the middle of a WAL record.
|
||||
assert!(oldest_pending_lsn.is_aligned());
|
||||
|
||||
let id = LayerId::next();
|
||||
|
||||
// Also add it to the binary heap
|
||||
let open_layer_entry = OpenLayerEntry {
|
||||
oldest_pending_lsn: layer.get_oldest_pending_lsn(),
|
||||
layer,
|
||||
layer: Arc::clone(&layer),
|
||||
generation: self.current_generation,
|
||||
id,
|
||||
};
|
||||
self.open_layers.push(open_layer_entry);
|
||||
|
||||
let old_layer = self.open_layers_by_id.insert(id, layer);
|
||||
assert!(old_layer.is_none());
|
||||
|
||||
NUM_INMEMORY_LAYERS.inc();
|
||||
|
||||
id
|
||||
}
|
||||
|
||||
/// Remove the oldest in-memory layer
|
||||
@@ -114,6 +141,8 @@ impl LayerMap {
|
||||
assert!(oldest_entry.layer.is_dropped());
|
||||
}
|
||||
|
||||
self.open_layers_by_id.remove(&oldest_entry.id).unwrap();
|
||||
|
||||
NUM_INMEMORY_LAYERS.dec();
|
||||
}
|
||||
|
||||
@@ -319,6 +348,7 @@ struct OpenLayerEntry {
|
||||
pub oldest_pending_lsn: Lsn, // copy of layer.get_oldest_pending_lsn()
|
||||
pub generation: u64,
|
||||
pub layer: Arc<InMemoryLayer>,
|
||||
id: LayerId,
|
||||
}
|
||||
impl Ord for OpenLayerEntry {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
|
||||
202
pageserver/src/layered_repository/metadata.rs
Normal file
202
pageserver/src/layered_repository/metadata.rs
Normal file
@@ -0,0 +1,202 @@
|
||||
//! Every image of a certain timeline from [`crate::layered_repository::LayeredRepository`]
|
||||
//! has a metadata that needs to be stored persistently.
|
||||
//!
|
||||
//! Later, the file gets is used in [`crate::relish_storage::storage_sync`] as a part of
|
||||
//! external storage import and export operations.
|
||||
//!
|
||||
//! The module contains all structs and related helper methods related to timeline metadata.
|
||||
|
||||
use std::{convert::TryInto, path::PathBuf};
|
||||
|
||||
use anyhow::ensure;
|
||||
use zenith_utils::{
|
||||
bin_ser::BeSer,
|
||||
lsn::Lsn,
|
||||
zid::{ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
use crate::{
|
||||
layered_repository::{METADATA_CHECKSUM_SIZE, METADATA_MAX_DATA_SIZE, METADATA_MAX_SAFE_SIZE},
|
||||
PageServerConf,
|
||||
};
|
||||
|
||||
/// The name of the metadata file pageserver creates per timeline.
|
||||
pub const METADATA_FILE_NAME: &str = "metadata";
|
||||
|
||||
/// Metadata stored on disk for each timeline
|
||||
///
|
||||
/// The fields correspond to the values we hold in memory, in LayeredTimeline.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct TimelineMetadata {
|
||||
disk_consistent_lsn: Lsn,
|
||||
// This is only set if we know it. We track it in memory when the page
|
||||
// server is running, but we only track the value corresponding to
|
||||
// 'last_record_lsn', not 'disk_consistent_lsn' which can lag behind by a
|
||||
// lot. We only store it in the metadata file when we flush *all* the
|
||||
// in-memory data so that 'last_record_lsn' is the same as
|
||||
// 'disk_consistent_lsn'. That's OK, because after page server restart, as
|
||||
// soon as we reprocess at least one record, we will have a valid
|
||||
// 'prev_record_lsn' value in memory again. This is only really needed when
|
||||
// doing a clean shutdown, so that there is no more WAL beyond
|
||||
// 'disk_consistent_lsn'
|
||||
prev_record_lsn: Option<Lsn>,
|
||||
ancestor_timeline: Option<ZTimelineId>,
|
||||
ancestor_lsn: Lsn,
|
||||
}
|
||||
|
||||
/// Points to a place in pageserver's local directory,
|
||||
/// where certain timeline's metadata file should be located.
|
||||
pub fn metadata_path(
|
||||
conf: &'static PageServerConf,
|
||||
timelineid: ZTimelineId,
|
||||
tenantid: ZTenantId,
|
||||
) -> PathBuf {
|
||||
conf.timeline_path(&timelineid, &tenantid)
|
||||
.join(METADATA_FILE_NAME)
|
||||
}
|
||||
|
||||
impl TimelineMetadata {
|
||||
pub fn new(
|
||||
disk_consistent_lsn: Lsn,
|
||||
prev_record_lsn: Option<Lsn>,
|
||||
ancestor_timeline: Option<ZTimelineId>,
|
||||
ancestor_lsn: Lsn,
|
||||
) -> Self {
|
||||
Self {
|
||||
disk_consistent_lsn,
|
||||
prev_record_lsn,
|
||||
ancestor_timeline,
|
||||
ancestor_lsn,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_bytes(metadata_bytes: &[u8]) -> anyhow::Result<Self> {
|
||||
ensure!(
|
||||
metadata_bytes.len() == METADATA_MAX_SAFE_SIZE,
|
||||
"metadata bytes size is wrong"
|
||||
);
|
||||
|
||||
let data = &metadata_bytes[..METADATA_MAX_DATA_SIZE];
|
||||
let calculated_checksum = crc32c::crc32c(data);
|
||||
|
||||
let checksum_bytes: &[u8; METADATA_CHECKSUM_SIZE] =
|
||||
metadata_bytes[METADATA_MAX_DATA_SIZE..].try_into()?;
|
||||
let expected_checksum = u32::from_le_bytes(*checksum_bytes);
|
||||
ensure!(
|
||||
calculated_checksum == expected_checksum,
|
||||
"metadata checksum mismatch"
|
||||
);
|
||||
|
||||
let data = TimelineMetadata::from(serialize::DeTimelineMetadata::des_prefix(data)?);
|
||||
assert!(data.disk_consistent_lsn.is_aligned());
|
||||
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
pub fn to_bytes(&self) -> anyhow::Result<Vec<u8>> {
|
||||
let serializeable_metadata = serialize::SeTimelineMetadata::from(self);
|
||||
let mut metadata_bytes = serialize::SeTimelineMetadata::ser(&serializeable_metadata)?;
|
||||
assert!(metadata_bytes.len() <= METADATA_MAX_DATA_SIZE);
|
||||
metadata_bytes.resize(METADATA_MAX_SAFE_SIZE, 0u8);
|
||||
|
||||
let checksum = crc32c::crc32c(&metadata_bytes[..METADATA_MAX_DATA_SIZE]);
|
||||
metadata_bytes[METADATA_MAX_DATA_SIZE..].copy_from_slice(&u32::to_le_bytes(checksum));
|
||||
Ok(metadata_bytes)
|
||||
}
|
||||
|
||||
/// [`Lsn`] that corresponds to the corresponding timeline directory
|
||||
/// contents, stored locally in the pageserver workdir.
|
||||
pub fn disk_consistent_lsn(&self) -> Lsn {
|
||||
self.disk_consistent_lsn
|
||||
}
|
||||
|
||||
pub fn prev_record_lsn(&self) -> Option<Lsn> {
|
||||
self.prev_record_lsn
|
||||
}
|
||||
|
||||
pub fn ancestor_timeline(&self) -> Option<ZTimelineId> {
|
||||
self.ancestor_timeline
|
||||
}
|
||||
|
||||
pub fn ancestor_lsn(&self) -> Lsn {
|
||||
self.ancestor_lsn
|
||||
}
|
||||
}
|
||||
|
||||
/// This module is for direct conversion of metadata to bytes and back.
|
||||
/// For a certain metadata, besides the conversion a few verification steps has to
|
||||
/// be done, so all serde derives are hidden from the user, to avoid accidental
|
||||
/// verification-less metadata creation.
|
||||
mod serialize {
|
||||
use serde::{Deserialize, Serialize};
|
||||
use zenith_utils::{lsn::Lsn, zid::ZTimelineId};
|
||||
|
||||
use super::TimelineMetadata;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct SeTimelineMetadata<'a> {
|
||||
disk_consistent_lsn: &'a Lsn,
|
||||
prev_record_lsn: &'a Option<Lsn>,
|
||||
ancestor_timeline: &'a Option<ZTimelineId>,
|
||||
ancestor_lsn: &'a Lsn,
|
||||
}
|
||||
|
||||
impl<'a> From<&'a TimelineMetadata> for SeTimelineMetadata<'a> {
|
||||
fn from(other: &'a TimelineMetadata) -> Self {
|
||||
Self {
|
||||
disk_consistent_lsn: &other.disk_consistent_lsn,
|
||||
prev_record_lsn: &other.prev_record_lsn,
|
||||
ancestor_timeline: &other.ancestor_timeline,
|
||||
ancestor_lsn: &other.ancestor_lsn,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub(super) struct DeTimelineMetadata {
|
||||
disk_consistent_lsn: Lsn,
|
||||
prev_record_lsn: Option<Lsn>,
|
||||
ancestor_timeline: Option<ZTimelineId>,
|
||||
ancestor_lsn: Lsn,
|
||||
}
|
||||
|
||||
impl From<DeTimelineMetadata> for TimelineMetadata {
|
||||
fn from(other: DeTimelineMetadata) -> Self {
|
||||
Self {
|
||||
disk_consistent_lsn: other.disk_consistent_lsn,
|
||||
prev_record_lsn: other.prev_record_lsn,
|
||||
ancestor_timeline: other.ancestor_timeline,
|
||||
ancestor_lsn: other.ancestor_lsn,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::repository::repo_harness::TIMELINE_ID;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn metadata_serializes_correctly() {
|
||||
let original_metadata = TimelineMetadata {
|
||||
disk_consistent_lsn: Lsn(0x200),
|
||||
prev_record_lsn: Some(Lsn(0x100)),
|
||||
ancestor_timeline: Some(TIMELINE_ID),
|
||||
ancestor_lsn: Lsn(0),
|
||||
};
|
||||
|
||||
let metadata_bytes = original_metadata
|
||||
.to_bytes()
|
||||
.expect("Should serialize correct metadata to bytes");
|
||||
|
||||
let deserialized_metadata = TimelineMetadata::from_bytes(&metadata_bytes)
|
||||
.expect("Should deserialize its own bytes");
|
||||
|
||||
assert_eq!(
|
||||
deserialized_metadata, original_metadata,
|
||||
"Metadata that was serialized to bytes and deserialized back should not change"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
use layered_repository::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
@@ -17,6 +18,7 @@ pub mod relish_storage;
|
||||
pub mod repository;
|
||||
pub mod restore_local_repo;
|
||||
pub mod tenant_mgr;
|
||||
pub mod tenant_threads;
|
||||
pub mod waldecoder;
|
||||
pub mod walreceiver;
|
||||
pub mod walredo;
|
||||
@@ -91,7 +93,7 @@ impl PageServerConf {
|
||||
//
|
||||
|
||||
fn tenants_path(&self) -> PathBuf {
|
||||
self.workdir.join("tenants")
|
||||
self.workdir.join(TENANTS_SEGMENT_NAME)
|
||||
}
|
||||
|
||||
fn tenant_path(&self, tenantid: &ZTenantId) -> PathBuf {
|
||||
@@ -115,7 +117,7 @@ impl PageServerConf {
|
||||
}
|
||||
|
||||
fn timelines_path(&self, tenantid: &ZTenantId) -> PathBuf {
|
||||
self.tenant_path(tenantid).join("timelines")
|
||||
self.tenant_path(tenantid).join(TIMELINES_SEGMENT_NAME)
|
||||
}
|
||||
|
||||
fn timeline_path(&self, timelineid: &ZTimelineId, tenantid: &ZTenantId) -> PathBuf {
|
||||
@@ -163,6 +165,15 @@ impl PageServerConf {
|
||||
}
|
||||
}
|
||||
|
||||
/// Config for the Repository checkpointer
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum CheckpointConfig {
|
||||
// Flush in-memory data that is older than this
|
||||
Distance(u64),
|
||||
// Flush all in-memory data
|
||||
Forced,
|
||||
}
|
||||
|
||||
/// External relish storage configuration, enough for creating a client for that storage.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RelishStorageConfig {
|
||||
|
||||
@@ -630,14 +630,16 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
|
||||
let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
|
||||
|
||||
let branches = crate::branches::get_branches(self.conf, &tenantid)?;
|
||||
// since these handlers for tenant/branch commands are deprecated (in favor of http based ones)
|
||||
// just use false in place of include non incremental logical size
|
||||
let branches = crate::branches::get_branches(self.conf, &tenantid, false)?;
|
||||
let branches_buf = serde_json::to_vec(&branches)?;
|
||||
|
||||
pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
|
||||
.write_message_noflush(&BeMessage::DataRow(&[Some(&branches_buf)]))?
|
||||
.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
} else if query_string.starts_with("tenant_list") {
|
||||
let tenants = crate::branches::get_tenants(self.conf)?;
|
||||
let tenants = crate::tenant_mgr::list_tenants()?;
|
||||
let tenants_buf = serde_json::to_vec(&tenants)?;
|
||||
|
||||
pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
|
||||
|
||||
@@ -1,60 +1,138 @@
|
||||
//! Abstractions for the page server to store its relish layer data in the external storage.
|
||||
//! A set of generic storage abstractions for the page server to use when backing up and restoring its state from the external storage.
|
||||
//! This particular module serves as a public API border between pageserver and the internal storage machinery.
|
||||
//! No other modules from this tree are supposed to be used directly by the external code.
|
||||
//!
|
||||
//! Main purpose of this module subtree is to provide a set of abstractions to manage the storage state
|
||||
//! in a way, optimal for page server.
|
||||
//! There are a few components the storage machinery consists of:
|
||||
//! * [`RelishStorage`] trait a CRUD-like generic abstraction to use for adapting external storages with a few implementations:
|
||||
//! * [`local_fs`] allows to use local file system as an external storage
|
||||
//! * [`rust_s3`] uses AWS S3 bucket entirely as an external storage
|
||||
//!
|
||||
//! The abstractions hide multiple custom external storage API implementations,
|
||||
//! such as AWS S3, local filesystem, etc., located in the submodules.
|
||||
//! * synchronization logic at [`storage_sync`] module that keeps pageserver state (both runtime one and the workdir files) and storage state in sync.
|
||||
//!
|
||||
//! * public API via to interact with the external world: [`run_storage_sync_thread`] and [`schedule_timeline_upload`]
|
||||
//!
|
||||
//! Here's a schematic overview of all interactions relish storage and the rest of the pageserver perform:
|
||||
//!
|
||||
//! +------------------------+ +--------->-------+
|
||||
//! | | - - - (init async loop) - - - -> | |
|
||||
//! | | | |
|
||||
//! | | -------------------------------> | async |
|
||||
//! | pageserver | (schedule frozen layer upload) | upload/download |
|
||||
//! | | | loop |
|
||||
//! | | <------------------------------- | |
|
||||
//! | | (register downloaded layers) | |
|
||||
//! +------------------------+ +---------<-------+
|
||||
//! |
|
||||
//! |
|
||||
//! CRUD layer file operations |
|
||||
//! (upload/download/delete/list, etc.) |
|
||||
//! V
|
||||
//! +------------------------+
|
||||
//! | |
|
||||
//! | [`RelishStorage`] impl |
|
||||
//! | |
|
||||
//! | pageserver assumes it |
|
||||
//! | owns exclusive write |
|
||||
//! | access to this storage |
|
||||
//! +------------------------+
|
||||
//!
|
||||
//! First, during startup, the pageserver inits the storage sync thread with the async loop, or leaves the loop unitialised, if configured so.
|
||||
//! Some time later, during pageserver checkpoints, in-memory data is flushed onto disk along with its metadata.
|
||||
//! If the storage sync loop was successfully started before, pageserver schedules the new image uploads after every checkpoint.
|
||||
//! See [`crate::layered_repository`] for the upload calls and the adjacent logic.
|
||||
//!
|
||||
//! The storage logic considers `image` as a set of local files, fully representing a certain timeline at given moment (identified with `disk_consistent_lsn`).
|
||||
//! Timeline can change its state, by adding more files on disk and advancing its `disk_consistent_lsn`: this happens after pageserver checkpointing and is followed
|
||||
//! by the storage upload, if enabled.
|
||||
//! When a certain image gets uploaded, the sync loop remembers the fact, preventing further reuploads of the same image state.
|
||||
//! No files are deleted from either local or remote storage, only the missing ones locally/remotely get downloaded/uploaded, local metadata file will be overwritten
|
||||
//! when the newer timeline is downloaded.
|
||||
//!
|
||||
//! Meanwhile, the loop inits the storage connection and checks the remote files stored.
|
||||
//! This is done once at startup only, relying on the fact that pageserver uses the storage alone (ergo, nobody else uploads the files to the storage but this server).
|
||||
//! Based on the remote image data, the storage sync logic queues image downloads, while accepting any potential upload tasks from pageserver and managing the tasks by their priority.
|
||||
//! On the image download, a [`crate::tenant_mgr::register_relish_download`] function is called to register the new image in pageserver, initializing all related threads and internal state.
|
||||
//!
|
||||
//! When the pageserver terminates, the upload loop finishes a current image sync task (if any) and exits.
|
||||
//!
|
||||
//! NOTES:
|
||||
//! * pageserver assumes it has exclusive write access to the relish storage. If supported, the way multiple pageservers can be separated in the same storage
|
||||
//! (i.e. using different directories in the local filesystem external storage), but totally up to the storage implementation and not covered with the trait API.
|
||||
//!
|
||||
//! * the uploads do not happen right after pageserver startup, they are registered when
|
||||
//! 1. pageserver does the checkpoint, which happens further in the future after the server start
|
||||
//! 2. pageserver loads the timeline from disk for the first time
|
||||
//!
|
||||
//! * the uploads do not happen right after the upload registration: the sync loop might be occupied with other tasks, or tasks with bigger priority could be waiting already
|
||||
//!
|
||||
//! * all synchronization tasks (including the public API to register uploads and downloads and the sync queue management) happens on an image scale: a big set of relish files,
|
||||
//! enough to represent (and recover, if needed) a certain timeline state. On the contrary, all internal storage CRUD calls are made per reilsh file from those images.
|
||||
//! This way, the synchronization is able to download the image partially, if some state was synced before, but exposes correctly synced images only.
|
||||
|
||||
mod local_fs;
|
||||
mod rust_s3;
|
||||
/// A queue-based storage with the background machinery behind it to synchronize
|
||||
/// local page server layer files with external storage.
|
||||
mod synced_storage;
|
||||
mod storage_sync;
|
||||
|
||||
use std::{path::Path, thread};
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
thread,
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::{anyhow, ensure, Context};
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
pub use self::synced_storage::schedule_timeline_upload;
|
||||
use self::{local_fs::LocalFs, rust_s3::RustS3};
|
||||
use crate::{PageServerConf, RelishStorageKind};
|
||||
pub use self::storage_sync::schedule_timeline_upload;
|
||||
use self::{local_fs::LocalFs, rust_s3::S3};
|
||||
use crate::{
|
||||
layered_repository::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME},
|
||||
PageServerConf, RelishStorageKind,
|
||||
};
|
||||
|
||||
/// Based on the config, initiates the remote storage connection and starts a separate thread
|
||||
/// that ensures that pageserver and the remote storage are in sync with each other.
|
||||
/// If no external configuraion connection given, no thread or storage initialization is done.
|
||||
pub fn run_storage_sync_thread(
|
||||
config: &'static PageServerConf,
|
||||
) -> anyhow::Result<Option<thread::JoinHandle<anyhow::Result<()>>>> {
|
||||
match &config.relish_storage_config {
|
||||
Some(relish_storage_config) => {
|
||||
let max_concurrent_sync = relish_storage_config.max_concurrent_sync;
|
||||
match &relish_storage_config.storage {
|
||||
RelishStorageKind::LocalFs(root) => synced_storage::run_storage_sync_thread(
|
||||
let handle = match &relish_storage_config.storage {
|
||||
RelishStorageKind::LocalFs(root) => storage_sync::spawn_storage_sync_thread(
|
||||
config,
|
||||
LocalFs::new(root.clone())?,
|
||||
LocalFs::new(root.clone(), &config.workdir)?,
|
||||
max_concurrent_sync,
|
||||
),
|
||||
RelishStorageKind::AwsS3(s3_config) => synced_storage::run_storage_sync_thread(
|
||||
RelishStorageKind::AwsS3(s3_config) => storage_sync::spawn_storage_sync_thread(
|
||||
config,
|
||||
RustS3::new(s3_config)?,
|
||||
S3::new(s3_config, &config.workdir)?,
|
||||
max_concurrent_sync,
|
||||
),
|
||||
}
|
||||
};
|
||||
handle.map(Some)
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage (potentially remote) API to manage its state.
|
||||
/// This storage tries to be unaware of any layered repository context,
|
||||
/// providing basic CRUD operations with storage files.
|
||||
#[async_trait::async_trait]
|
||||
pub trait RelishStorage: Send + Sync {
|
||||
trait RelishStorage: Send + Sync {
|
||||
/// A way to uniquely reference relish in the remote storage.
|
||||
type RelishStoragePath;
|
||||
|
||||
fn derive_destination(
|
||||
page_server_workdir: &Path,
|
||||
relish_local_path: &Path,
|
||||
) -> anyhow::Result<Self::RelishStoragePath>;
|
||||
/// Attempts to derive the storage path out of the local path, if the latter is correct.
|
||||
fn storage_path(&self, local_path: &Path) -> anyhow::Result<Self::RelishStoragePath>;
|
||||
|
||||
/// Gets the layered storage information about the given entry.
|
||||
fn info(&self, storage_path: &Self::RelishStoragePath) -> anyhow::Result<RemoteRelishInfo>;
|
||||
|
||||
/// Lists all items the storage has right now.
|
||||
async fn list_relishes(&self) -> anyhow::Result<Vec<Self::RelishStoragePath>>;
|
||||
|
||||
/// Streams the remote storage entry contents into the buffered writer given, returns the filled writer.
|
||||
async fn download_relish<W: 'static + std::io::Write + Send>(
|
||||
&self,
|
||||
from: &Self::RelishStoragePath,
|
||||
@@ -65,6 +143,7 @@ pub trait RelishStorage: Send + Sync {
|
||||
|
||||
async fn delete_relish(&self, path: &Self::RelishStoragePath) -> anyhow::Result<()>;
|
||||
|
||||
/// Streams the local file contents into remote into the remote storage entry.
|
||||
async fn upload_relish<R: tokio::io::AsyncRead + std::marker::Unpin + Send>(
|
||||
&self,
|
||||
from: &mut tokio::io::BufReader<R>,
|
||||
@@ -72,16 +151,173 @@ pub trait RelishStorage: Send + Sync {
|
||||
) -> anyhow::Result<()>;
|
||||
}
|
||||
|
||||
fn strip_workspace_prefix<'a>(
|
||||
page_server_workdir: &'a Path,
|
||||
relish_local_path: &'a Path,
|
||||
) -> anyhow::Result<&'a Path> {
|
||||
relish_local_path
|
||||
.strip_prefix(page_server_workdir)
|
||||
.with_context(|| {
|
||||
/// Information about a certain remote storage entry.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct RemoteRelishInfo {
|
||||
tenant_id: ZTenantId,
|
||||
timeline_id: ZTimelineId,
|
||||
/// Path in the pageserver workdir where the file should go to.
|
||||
download_destination: PathBuf,
|
||||
is_metadata: bool,
|
||||
}
|
||||
|
||||
fn strip_path_prefix<'a>(prefix: &'a Path, path: &'a Path) -> anyhow::Result<&'a Path> {
|
||||
if prefix == path {
|
||||
anyhow::bail!(
|
||||
"Prefix and the path are equal, cannot strip: '{}'",
|
||||
prefix.display()
|
||||
)
|
||||
} else {
|
||||
path.strip_prefix(prefix).with_context(|| {
|
||||
format!(
|
||||
"Unexpected: relish local path '{}' is not relevant to server workdir",
|
||||
relish_local_path.display(),
|
||||
"Path '{}' is not prefixed with '{}'",
|
||||
path.display(),
|
||||
prefix.display(),
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_ids_from_path<'a, R: std::fmt::Display>(
|
||||
path_segments: impl Iterator<Item = &'a str>,
|
||||
path_log_representation: &R,
|
||||
) -> anyhow::Result<(ZTenantId, ZTimelineId)> {
|
||||
let mut segments = path_segments.skip_while(|&segment| segment != TENANTS_SEGMENT_NAME);
|
||||
let tenants_segment = segments.next().ok_or_else(|| {
|
||||
anyhow!(
|
||||
"Found no '{}' segment in the storage path '{}'",
|
||||
TENANTS_SEGMENT_NAME,
|
||||
path_log_representation
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
tenants_segment == TENANTS_SEGMENT_NAME,
|
||||
"Failed to extract '{}' segment from storage path '{}'",
|
||||
TENANTS_SEGMENT_NAME,
|
||||
path_log_representation
|
||||
);
|
||||
let tenant_id = segments
|
||||
.next()
|
||||
.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"Found no tenant id in the storage path '{}'",
|
||||
path_log_representation
|
||||
)
|
||||
})?
|
||||
.parse::<ZTenantId>()
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to parse tenant id from storage path '{}'",
|
||||
path_log_representation
|
||||
)
|
||||
})?;
|
||||
|
||||
let timelines_segment = segments.next().ok_or_else(|| {
|
||||
anyhow!(
|
||||
"Found no '{}' segment in the storage path '{}'",
|
||||
TIMELINES_SEGMENT_NAME,
|
||||
path_log_representation
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
timelines_segment == TIMELINES_SEGMENT_NAME,
|
||||
"Failed to extract '{}' segment from storage path '{}'",
|
||||
TIMELINES_SEGMENT_NAME,
|
||||
path_log_representation
|
||||
);
|
||||
let timeline_id = segments
|
||||
.next()
|
||||
.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"Found no timeline id in the storage path '{}'",
|
||||
path_log_representation
|
||||
)
|
||||
})?
|
||||
.parse::<ZTimelineId>()
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to parse timeline id from storage path '{}'",
|
||||
path_log_representation
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok((tenant_id, timeline_id))
|
||||
}
|
||||
|
||||
/// A set of common test utils to share in unit tests inside the module tree.
|
||||
#[cfg(test)]
|
||||
mod test_utils {
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::ensure;
|
||||
|
||||
use crate::{
|
||||
layered_repository::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME},
|
||||
repository::repo_harness::{RepoHarness, TIMELINE_ID},
|
||||
};
|
||||
|
||||
/// Gives a timeline path with pageserver workdir stripped off.
|
||||
pub fn relative_timeline_path(harness: &RepoHarness) -> anyhow::Result<PathBuf> {
|
||||
let timeline_path = harness.timeline_path(&TIMELINE_ID);
|
||||
Ok(timeline_path
|
||||
.strip_prefix(&harness.conf.workdir)?
|
||||
.to_path_buf())
|
||||
}
|
||||
|
||||
/// Creates a path with custom tenant id in one of its segments.
|
||||
/// Useful for emulating paths with wrong ids.
|
||||
pub fn custom_tenant_id_path(
|
||||
path_with_tenant_id: &Path,
|
||||
new_tenant_id: &str,
|
||||
) -> anyhow::Result<PathBuf> {
|
||||
let mut new_path = PathBuf::new();
|
||||
let mut is_tenant_id = false;
|
||||
let mut tenant_id_replaced = false;
|
||||
for segment in path_with_tenant_id {
|
||||
match segment.to_str() {
|
||||
Some(TENANTS_SEGMENT_NAME) => is_tenant_id = true,
|
||||
Some(_tenant_id_str) if is_tenant_id => {
|
||||
is_tenant_id = false;
|
||||
new_path.push(new_tenant_id);
|
||||
tenant_id_replaced = true;
|
||||
continue;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
new_path.push(segment)
|
||||
}
|
||||
|
||||
ensure!(tenant_id_replaced, "Found no tenant id segment to replace");
|
||||
Ok(new_path)
|
||||
}
|
||||
|
||||
/// Creates a path with custom timeline id in one of its segments.
|
||||
/// Useful for emulating paths with wrong ids.
|
||||
pub fn custom_timeline_id_path(
|
||||
path_with_timeline_id: &Path,
|
||||
new_timeline_id: &str,
|
||||
) -> anyhow::Result<PathBuf> {
|
||||
let mut new_path = PathBuf::new();
|
||||
let mut is_timeline_id = false;
|
||||
let mut timeline_id_replaced = false;
|
||||
for segment in path_with_timeline_id {
|
||||
match segment.to_str() {
|
||||
Some(TIMELINES_SEGMENT_NAME) => is_timeline_id = true,
|
||||
Some(_timeline_id_str) if is_timeline_id => {
|
||||
is_timeline_id = false;
|
||||
new_path.push(new_timeline_id);
|
||||
timeline_id_replaced = true;
|
||||
continue;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
new_path.push(segment)
|
||||
}
|
||||
|
||||
ensure!(
|
||||
timeline_id_replaced,
|
||||
"Found no timeline id segment to replace"
|
||||
);
|
||||
Ok(new_path)
|
||||
}
|
||||
}
|
||||
|
||||
82
pageserver/src/relish_storage/README.md
Normal file
82
pageserver/src/relish_storage/README.md
Normal file
@@ -0,0 +1,82 @@
|
||||
# Non-implementation details
|
||||
|
||||
This document describes the current state of the backup system in pageserver, existing limitations and concerns, why some things are done the way they are the future development plans.
|
||||
Detailed description on how the synchronization works and how it fits into the rest of the pageserver can be found in the [storage module](./../relish_storage.rs) and its submodules.
|
||||
Ideally, this document should disappear after current implementation concerns are mitigated, with the remaining useful knowledge bits moved into rustdocs.
|
||||
|
||||
## Approach
|
||||
|
||||
Backup functionality is a new component, appeared way after the core DB functionality was implemented.
|
||||
Pageserver layer functionality is also quite volatile at the moment, there's a risk its local file management changes over time.
|
||||
|
||||
To avoid adding more chaos into that, backup functionality is currently designed as a relatively standalone component, with the majority of its logic placed in a standalone async loop.
|
||||
This way, the backups are managed in background, not affecting directly other pageserver parts: this way the backup and restoration process may lag behind, but eventually keep up with the reality. To track that, a set of prometheus metrics is exposed from pageserver.
|
||||
|
||||
## What's done
|
||||
|
||||
Current implementation
|
||||
* provides remote storage wrappers for AWS S3 and local FS
|
||||
* uploads layers, frozen by pageserver checkpoint thread
|
||||
* downloads and registers layers, found on the remote storage, but missing locally
|
||||
|
||||
No good optimisations or performance testing is done, the feature is disabled by default and gets polished over time.
|
||||
It's planned to deal with all questions that are currently on and prepare the feature to be enabled by default in cloud environments.
|
||||
|
||||
### Peculiarities
|
||||
|
||||
As mentioned, the backup component is rather new and under development currently, so not all things are done properly from the start.
|
||||
Here's the list of known compromises with comments:
|
||||
|
||||
* Remote storage model is the same as the `tenants/` directory contents of the pageserver's local workdir storage.
|
||||
This is relatively simple to implement, but may be costly to use in AWS S3: an initial data image contains ~782 relish file and a metadata file, ~31 MB combined.
|
||||
AWS charges per API call and for traffic either, layers are expected to be updated frequently, so this model most probably is ineffective.
|
||||
Additionally, pageservers might need to migrate images between tenants, which does not improve the situation.
|
||||
|
||||
Storage sync API operates images when backing up or restoring a backup, so we're fluent to repack the layer contents the way we want to, which most probably will be done later.
|
||||
|
||||
* no proper file comparison
|
||||
|
||||
Currently, every layer contains `Lsn` in their name, to map the data it holds against a certain DB state.
|
||||
Then the images with same ids and different `Lsn`'s are compared, files are considered equal if their local file paths are equal (for remote files, "local file path" is their download destination).
|
||||
No file contents assertion is done currently, but should be.
|
||||
AWS S3 returns file checksums during the `list` operation, so that can be used to ensure the backup consistency, but that needs further research and, since current pageserver impl also needs to deal with layer file checksums.
|
||||
|
||||
For now, due to this, we consider local workdir files as source of truth, not removing them ever and adjusting remote files instead, if image files mismatch.
|
||||
|
||||
* no proper retry management
|
||||
|
||||
Now, the storage sync attempts to redo the upload/download operation for the image files that failed.
|
||||
No proper task eviction or backpressure is implemented currently: the tasks will stay in the queue forever, reattempting the downloads.
|
||||
|
||||
This will be fixed when more details on the file consistency model will be agreed on.
|
||||
|
||||
* sad rust-s3 api
|
||||
|
||||
rust-s3 is not very pleasant to use:
|
||||
1. it returns `anyhow::Result` and it's hard to distinguish "missing file" cases from "no connection" one, for instance
|
||||
2. at least one function it its API that we need (`get_object_stream`) has `async` keyword and blocks (!), see details [here](https://github.com/zenithdb/zenith/pull/752#discussion_r728373091)
|
||||
3. it's a prerelease library with unclear maintenance status
|
||||
4. noisy on debug level
|
||||
|
||||
But it's already used in the project, so for now it's reused to avoid bloating the dependency tree.
|
||||
Based on previous evaluation, even `rusoto-s3` could be a better choice over this library, but needs further benchmarking.
|
||||
|
||||
|
||||
* gc and branches are ignored
|
||||
|
||||
So far, we don't consider non-main images and don't adjust the remote storage based on GC thread loop results.
|
||||
Only checkpointer loop affects the remote storage.
|
||||
|
||||
* more layers should be downloaded on demand
|
||||
|
||||
Since we download and load remote layers into pageserver, there's a possibility a need for those layers' ancestors arise.
|
||||
Most probably, every downloaded image's ancestor is not present in locally too, but currently there's no logic for downloading such ancestors and their metadata,
|
||||
so the pageserver is unable to respond property on requests to such ancestors.
|
||||
|
||||
To implement the downloading, more `tenant_mgr` refactoring is needed to properly handle web requests for layers and handle the state changes.
|
||||
[Here](https://github.com/zenithdb/zenith/pull/689#issuecomment-931216193) are the details about initial state management updates needed.
|
||||
|
||||
* no IT tests
|
||||
|
||||
Automated S3 testing is lacking currently, due to no convenient way to enable backups during the tests.
|
||||
After it's fixed, benchmark runs should also be carried out to find bottlenecks.
|
||||
@@ -1,13 +1,11 @@
|
||||
//! Local filesystem relish storage.
|
||||
//! Multiple pageservers can use the same "storage" of this kind by using different storage roots.
|
||||
//!
|
||||
//! Page server already stores layer data on the server, when freezing it.
|
||||
//! This storage serves a way to
|
||||
//!
|
||||
//! * test things locally simply
|
||||
//! * allow to compabre both binary sets
|
||||
//! * help validating the relish storage API
|
||||
//! This storage used in pageserver tests, but can also be used in cases when a certain persistent
|
||||
//! volume is mounted to the local FS.
|
||||
|
||||
use std::{
|
||||
ffi::OsStr,
|
||||
future::Future,
|
||||
io::Write,
|
||||
path::{Path, PathBuf},
|
||||
@@ -16,25 +14,32 @@ use std::{
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use tokio::{fs, io};
|
||||
use tracing::*;
|
||||
|
||||
use super::{strip_workspace_prefix, RelishStorage};
|
||||
use crate::layered_repository::metadata::METADATA_FILE_NAME;
|
||||
|
||||
use super::{parse_ids_from_path, strip_path_prefix, RelishStorage, RemoteRelishInfo};
|
||||
|
||||
pub struct LocalFs {
|
||||
pageserver_workdir: &'static Path,
|
||||
root: PathBuf,
|
||||
}
|
||||
|
||||
impl LocalFs {
|
||||
/// Atetmpts to create local FS relish storage, also creates the directory provided, if not exists.
|
||||
pub fn new(root: PathBuf) -> anyhow::Result<Self> {
|
||||
/// Attempts to create local FS relish storage, along with the storage root directory.
|
||||
pub fn new(root: PathBuf, pageserver_workdir: &'static Path) -> anyhow::Result<Self> {
|
||||
if !root.exists() {
|
||||
std::fs::create_dir_all(&root).with_context(|| {
|
||||
format!(
|
||||
"Failed to create all directories in the given root path {}",
|
||||
"Failed to create all directories in the given root path '{}'",
|
||||
root.display(),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
Ok(Self { root })
|
||||
Ok(Self {
|
||||
pageserver_workdir,
|
||||
root,
|
||||
})
|
||||
}
|
||||
|
||||
fn resolve_in_storage(&self, path: &Path) -> anyhow::Result<PathBuf> {
|
||||
@@ -55,11 +60,29 @@ impl LocalFs {
|
||||
impl RelishStorage for LocalFs {
|
||||
type RelishStoragePath = PathBuf;
|
||||
|
||||
fn derive_destination(
|
||||
page_server_workdir: &Path,
|
||||
relish_local_path: &Path,
|
||||
) -> anyhow::Result<Self::RelishStoragePath> {
|
||||
Ok(strip_workspace_prefix(page_server_workdir, relish_local_path)?.to_path_buf())
|
||||
fn storage_path(&self, local_path: &Path) -> anyhow::Result<Self::RelishStoragePath> {
|
||||
Ok(self.root.join(
|
||||
strip_path_prefix(self.pageserver_workdir, local_path)
|
||||
.context("local path does not belong to this storage")?,
|
||||
))
|
||||
}
|
||||
|
||||
fn info(&self, storage_path: &Self::RelishStoragePath) -> anyhow::Result<RemoteRelishInfo> {
|
||||
let is_metadata =
|
||||
storage_path.file_name().and_then(OsStr::to_str) == Some(METADATA_FILE_NAME);
|
||||
let relative_path = strip_path_prefix(&self.root, storage_path)
|
||||
.context("local path does not belong to this storage")?;
|
||||
let download_destination = self.pageserver_workdir.join(relative_path);
|
||||
let (tenant_id, timeline_id) = parse_ids_from_path(
|
||||
relative_path.iter().filter_map(|segment| segment.to_str()),
|
||||
&relative_path.display(),
|
||||
)?;
|
||||
Ok(RemoteRelishInfo {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
download_destination,
|
||||
is_metadata,
|
||||
})
|
||||
}
|
||||
|
||||
async fn list_relishes(&self) -> anyhow::Result<Vec<Self::RelishStoragePath>> {
|
||||
@@ -72,6 +95,7 @@ impl RelishStorage for LocalFs {
|
||||
mut to: std::io::BufWriter<W>,
|
||||
) -> anyhow::Result<std::io::BufWriter<W>> {
|
||||
let file_path = self.resolve_in_storage(from)?;
|
||||
|
||||
if file_path.exists() && file_path.is_file() {
|
||||
let updated_buffer = tokio::task::spawn_blocking(move || {
|
||||
let mut source = std::io::BufReader::new(
|
||||
@@ -104,7 +128,7 @@ impl RelishStorage for LocalFs {
|
||||
async fn delete_relish(&self, path: &Self::RelishStoragePath) -> anyhow::Result<()> {
|
||||
let file_path = self.resolve_in_storage(path)?;
|
||||
if file_path.exists() && file_path.is_file() {
|
||||
Ok(tokio::fs::remove_file(file_path).await?)
|
||||
Ok(fs::remove_file(file_path).await?)
|
||||
} else {
|
||||
bail!(
|
||||
"File '{}' either does not exist or is not a file",
|
||||
@@ -152,12 +176,12 @@ where
|
||||
if directory_path.exists() {
|
||||
if directory_path.is_dir() {
|
||||
let mut paths = Vec::new();
|
||||
let mut dir_contents = tokio::fs::read_dir(directory_path).await?;
|
||||
let mut dir_contents = fs::read_dir(directory_path).await?;
|
||||
while let Some(dir_entry) = dir_contents.next_entry().await? {
|
||||
let file_type = dir_entry.file_type().await?;
|
||||
let entry_path = dir_entry.path();
|
||||
if file_type.is_symlink() {
|
||||
log::debug!("{:?} us a symlink, skipping", entry_path)
|
||||
debug!("{:?} us a symlink, skipping", entry_path)
|
||||
} else if file_type.is_dir() {
|
||||
paths.extend(get_all_files(entry_path).await?.into_iter())
|
||||
} else {
|
||||
@@ -183,7 +207,370 @@ async fn create_target_directory(target_file_path: &Path) -> anyhow::Result<()>
|
||||
),
|
||||
};
|
||||
if !target_dir.exists() {
|
||||
tokio::fs::create_dir_all(target_dir).await?;
|
||||
fs::create_dir_all(target_dir).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod pure_tests {
|
||||
use crate::{
|
||||
layered_repository::metadata::METADATA_FILE_NAME,
|
||||
relish_storage::test_utils::{
|
||||
custom_tenant_id_path, custom_timeline_id_path, relative_timeline_path,
|
||||
},
|
||||
repository::repo_harness::{RepoHarness, TIMELINE_ID},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn storage_path_positive() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("storage_path_positive")?;
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let storage = LocalFs {
|
||||
pageserver_workdir: &repo_harness.conf.workdir,
|
||||
root: storage_root.clone(),
|
||||
};
|
||||
|
||||
let local_path = repo_harness.timeline_path(&TIMELINE_ID).join("relish_name");
|
||||
let expected_path = storage_root.join(local_path.strip_prefix(&repo_harness.conf.workdir)?);
|
||||
|
||||
assert_eq!(
|
||||
expected_path,
|
||||
storage.storage_path(&local_path).expect("Matching path should map to storage path normally"),
|
||||
"Relish paths from pageserver workdir should be stored in local fs storage with the same path they have relative to the workdir"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn storage_path_negatives() -> anyhow::Result<()> {
|
||||
#[track_caller]
|
||||
fn storage_path_error(storage: &LocalFs, mismatching_path: &Path) -> String {
|
||||
match storage.storage_path(mismatching_path) {
|
||||
Ok(wrong_path) => panic!(
|
||||
"Expected path '{}' to error, but got storage path: {:?}",
|
||||
mismatching_path.display(),
|
||||
wrong_path,
|
||||
),
|
||||
Err(e) => format!("{:?}", e),
|
||||
}
|
||||
}
|
||||
|
||||
let repo_harness = RepoHarness::create("storage_path_negatives")?;
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let storage = LocalFs {
|
||||
pageserver_workdir: &repo_harness.conf.workdir,
|
||||
root: storage_root,
|
||||
};
|
||||
|
||||
let error_string = storage_path_error(&storage, &repo_harness.conf.workdir);
|
||||
assert!(error_string.contains("does not belong to this storage"));
|
||||
assert!(error_string.contains(repo_harness.conf.workdir.to_str().unwrap()));
|
||||
|
||||
let mismatching_path_str = "/something/else";
|
||||
let error_message = storage_path_error(&storage, Path::new(mismatching_path_str));
|
||||
assert!(
|
||||
error_message.contains(mismatching_path_str),
|
||||
"Error should mention wrong path"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains(repo_harness.conf.workdir.to_str().unwrap()),
|
||||
"Error should mention server workdir"
|
||||
);
|
||||
assert!(error_message.contains("does not belong to this storage"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn info_positive() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("info_positive")?;
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let storage = LocalFs {
|
||||
pageserver_workdir: &repo_harness.conf.workdir,
|
||||
root: storage_root.clone(),
|
||||
};
|
||||
|
||||
let name = "not a metadata";
|
||||
let local_path = repo_harness.timeline_path(&TIMELINE_ID).join(name);
|
||||
assert_eq!(
|
||||
RemoteRelishInfo {
|
||||
tenant_id: repo_harness.tenant_id,
|
||||
timeline_id: TIMELINE_ID,
|
||||
download_destination: local_path.clone(),
|
||||
is_metadata: false,
|
||||
},
|
||||
storage
|
||||
.info(&storage_root.join(local_path.strip_prefix(&repo_harness.conf.workdir)?))
|
||||
.expect("For a valid input, valid S3 info should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote delta relish"
|
||||
);
|
||||
|
||||
let local_metadata_path = repo_harness
|
||||
.timeline_path(&TIMELINE_ID)
|
||||
.join(METADATA_FILE_NAME);
|
||||
let remote_metadata_path = storage.storage_path(&local_metadata_path)?;
|
||||
assert_eq!(
|
||||
RemoteRelishInfo {
|
||||
tenant_id: repo_harness.tenant_id,
|
||||
timeline_id: TIMELINE_ID,
|
||||
download_destination: local_metadata_path,
|
||||
is_metadata: true,
|
||||
},
|
||||
storage
|
||||
.info(&remote_metadata_path)
|
||||
.expect("For a valid input, valid S3 info should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote metadata file"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn info_negatives() -> anyhow::Result<()> {
|
||||
#[track_caller]
|
||||
#[allow(clippy::ptr_arg)] // have to use &PathBuf due to `storage.info` parameter requirements
|
||||
fn storage_info_error(storage: &LocalFs, storage_path: &PathBuf) -> String {
|
||||
match storage.info(storage_path) {
|
||||
Ok(wrong_info) => panic!(
|
||||
"Expected storage path input {:?} to cause an error, but got relish info: {:?}",
|
||||
storage_path, wrong_info,
|
||||
),
|
||||
Err(e) => format!("{:?}", e),
|
||||
}
|
||||
}
|
||||
|
||||
let repo_harness = RepoHarness::create("info_negatives")?;
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let storage = LocalFs {
|
||||
pageserver_workdir: &repo_harness.conf.workdir,
|
||||
root: storage_root.clone(),
|
||||
};
|
||||
|
||||
let totally_wrong_path = "wrong_wrong_wrong";
|
||||
let error_message = storage_info_error(&storage, &PathBuf::from(totally_wrong_path));
|
||||
assert!(error_message.contains(totally_wrong_path));
|
||||
|
||||
let relative_timeline_path = relative_timeline_path(&repo_harness)?;
|
||||
|
||||
let relative_relish_path =
|
||||
custom_tenant_id_path(&relative_timeline_path, "wrong_tenant_id")?
|
||||
.join("wrong_tenant_id_name");
|
||||
let wrong_tenant_id_path = storage_root.join(&relative_relish_path);
|
||||
let error_message = storage_info_error(&storage, &wrong_tenant_id_path);
|
||||
assert!(
|
||||
error_message.contains(relative_relish_path.to_str().unwrap()),
|
||||
"Error message '{}' does not contain the expected substring",
|
||||
error_message
|
||||
);
|
||||
|
||||
let relative_relish_path =
|
||||
custom_timeline_id_path(&relative_timeline_path, "wrong_timeline_id")?
|
||||
.join("wrong_timeline_id_name");
|
||||
let wrong_timeline_id_path = storage_root.join(&relative_relish_path);
|
||||
let error_message = storage_info_error(&storage, &wrong_timeline_id_path);
|
||||
assert!(
|
||||
error_message.contains(relative_relish_path.to_str().unwrap()),
|
||||
"Error message '{}' does not contain the expected substring",
|
||||
error_message
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn download_destination_matches_original_path() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("download_destination_matches_original_path")?;
|
||||
let original_path = repo_harness.timeline_path(&TIMELINE_ID).join("some name");
|
||||
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let dummy_storage = LocalFs {
|
||||
pageserver_workdir: &repo_harness.conf.workdir,
|
||||
root: storage_root,
|
||||
};
|
||||
|
||||
let storage_path = dummy_storage.storage_path(&original_path)?;
|
||||
let download_destination = dummy_storage.info(&storage_path)?.download_destination;
|
||||
|
||||
assert_eq!(
|
||||
original_path, download_destination,
|
||||
"'original path -> storage path -> matching fs path' transformation should produce the same path as the input one for the correct path"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod fs_tests {
|
||||
use crate::{
|
||||
relish_storage::test_utils::relative_timeline_path, repository::repo_harness::RepoHarness,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[tokio::test]
|
||||
async fn upload_relish() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("upload_relish")?;
|
||||
let storage = create_storage()?;
|
||||
|
||||
let mut source = create_file_for_upload(
|
||||
&storage.pageserver_workdir.join("whatever"),
|
||||
"whatever_contents",
|
||||
)
|
||||
.await?;
|
||||
let target_path = PathBuf::from("/").join("somewhere").join("else");
|
||||
match storage.upload_relish(&mut source, &target_path).await {
|
||||
Ok(()) => panic!("Should not allow storing files with wrong target path"),
|
||||
Err(e) => {
|
||||
let message = format!("{:?}", e);
|
||||
assert!(message.contains(&target_path.display().to_string()));
|
||||
assert!(message.contains("does not belong to the current storage"));
|
||||
}
|
||||
}
|
||||
assert!(storage.list_relishes().await?.is_empty());
|
||||
|
||||
let target_path_1 = upload_dummy_file(&repo_harness, &storage, "upload_1").await?;
|
||||
assert_eq!(
|
||||
storage.list_relishes().await?,
|
||||
vec![target_path_1.clone()],
|
||||
"Should list a single file after first upload"
|
||||
);
|
||||
|
||||
let target_path_2 = upload_dummy_file(&repo_harness, &storage, "upload_2").await?;
|
||||
assert_eq!(
|
||||
list_relishes_sorted(&storage).await?,
|
||||
vec![target_path_1.clone(), target_path_2.clone()],
|
||||
"Should list a two different files after second upload"
|
||||
);
|
||||
|
||||
// match storage.upload_relish(&mut source, &target_path_1).await {
|
||||
// Ok(()) => panic!("Should not allow reuploading storage files"),
|
||||
// Err(e) => {
|
||||
// let message = format!("{:?}", e);
|
||||
// assert!(message.contains(&target_path_1.display().to_string()));
|
||||
// assert!(message.contains("File exists"));
|
||||
// }
|
||||
// }
|
||||
assert_eq!(
|
||||
list_relishes_sorted(&storage).await?,
|
||||
vec![target_path_1, target_path_2],
|
||||
"Should list a two different files after all upload attempts"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_storage() -> anyhow::Result<LocalFs> {
|
||||
let pageserver_workdir = Box::leak(Box::new(tempdir()?.path().to_owned()));
|
||||
let storage = LocalFs::new(tempdir()?.path().to_owned(), pageserver_workdir)?;
|
||||
Ok(storage)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn download_relish() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("download_relish")?;
|
||||
let storage = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let upload_target = upload_dummy_file(&repo_harness, &storage, upload_name).await?;
|
||||
|
||||
let contents_bytes = storage
|
||||
.download_relish(&upload_target, std::io::BufWriter::new(Vec::new()))
|
||||
.await?
|
||||
.into_inner()?;
|
||||
let contents = String::from_utf8(contents_bytes)?;
|
||||
assert_eq!(
|
||||
dummy_contents(upload_name),
|
||||
contents,
|
||||
"We should upload and download the same contents"
|
||||
);
|
||||
|
||||
let non_existing_path = PathBuf::from("somewhere").join("else");
|
||||
match storage
|
||||
.download_relish(&non_existing_path, std::io::BufWriter::new(Vec::new()))
|
||||
.await
|
||||
{
|
||||
Ok(_) => panic!("Should not allow downloading non-existing storage files"),
|
||||
Err(e) => {
|
||||
let error_string = e.to_string();
|
||||
assert!(error_string.contains("does not exist"));
|
||||
assert!(error_string.contains(&non_existing_path.display().to_string()));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn delete_relish() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("delete_relish")?;
|
||||
let storage = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let upload_target = upload_dummy_file(&repo_harness, &storage, upload_name).await?;
|
||||
|
||||
storage.delete_relish(&upload_target).await?;
|
||||
assert!(storage.list_relishes().await?.is_empty());
|
||||
|
||||
match storage.delete_relish(&upload_target).await {
|
||||
Ok(()) => panic!("Should not allow deleting non-existing storage files"),
|
||||
Err(e) => {
|
||||
let error_string = e.to_string();
|
||||
assert!(error_string.contains("does not exist"));
|
||||
assert!(error_string.contains(&upload_target.display().to_string()));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn upload_dummy_file(
|
||||
harness: &RepoHarness,
|
||||
storage: &LocalFs,
|
||||
name: &str,
|
||||
) -> anyhow::Result<PathBuf> {
|
||||
let storage_path = storage
|
||||
.root
|
||||
.join(relative_timeline_path(harness)?)
|
||||
.join(name);
|
||||
storage
|
||||
.upload_relish(
|
||||
&mut create_file_for_upload(
|
||||
&storage.pageserver_workdir.join(name),
|
||||
&dummy_contents(name),
|
||||
)
|
||||
.await?,
|
||||
&storage_path,
|
||||
)
|
||||
.await?;
|
||||
Ok(storage_path)
|
||||
}
|
||||
|
||||
async fn create_file_for_upload(
|
||||
path: &Path,
|
||||
contents: &str,
|
||||
) -> anyhow::Result<io::BufReader<fs::File>> {
|
||||
std::fs::create_dir_all(path.parent().unwrap())?;
|
||||
let mut file_for_writing = std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(path)?;
|
||||
write!(file_for_writing, "{}", contents)?;
|
||||
drop(file_for_writing);
|
||||
Ok(io::BufReader::new(
|
||||
fs::OpenOptions::new().read(true).open(&path).await?,
|
||||
))
|
||||
}
|
||||
|
||||
fn dummy_contents(name: &str) -> String {
|
||||
format!("contents for {}", name)
|
||||
}
|
||||
|
||||
async fn list_relishes_sorted(storage: &LocalFs) -> anyhow::Result<Vec<PathBuf>> {
|
||||
let mut relishes = storage.list_relishes().await?;
|
||||
relishes.sort();
|
||||
Ok(relishes)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,35 +1,45 @@
|
||||
//! A wrapper around AWS S3 client library `rust_s3` to be used a relish storage.
|
||||
//! AWS S3 relish storage wrapper around `rust_s3` library.
|
||||
//! Currently does not allow multiple pageservers to use the same bucket concurrently: relishes are
|
||||
//! placed in the root of the bucket.
|
||||
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::{
|
||||
io::Write,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
use s3::{bucket::Bucket, creds::Credentials, region::Region};
|
||||
|
||||
use crate::{
|
||||
relish_storage::{strip_workspace_prefix, RelishStorage},
|
||||
layered_repository::metadata::METADATA_FILE_NAME,
|
||||
relish_storage::{parse_ids_from_path, strip_path_prefix, RelishStorage, RemoteRelishInfo},
|
||||
S3Config,
|
||||
};
|
||||
|
||||
const S3_FILE_SEPARATOR: char = '/';
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct S3ObjectKey(String);
|
||||
|
||||
impl S3ObjectKey {
|
||||
fn key(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
|
||||
fn download_destination(&self, pageserver_workdir: &Path) -> PathBuf {
|
||||
pageserver_workdir.join(self.0.split(S3_FILE_SEPARATOR).collect::<PathBuf>())
|
||||
}
|
||||
}
|
||||
|
||||
/// AWS S3 relish storage.
|
||||
pub struct RustS3 {
|
||||
pub struct S3 {
|
||||
pageserver_workdir: &'static Path,
|
||||
bucket: Bucket,
|
||||
}
|
||||
|
||||
impl RustS3 {
|
||||
impl S3 {
|
||||
/// Creates the relish storage, errors if incorrect AWS S3 configuration provided.
|
||||
pub fn new(aws_config: &S3Config) -> anyhow::Result<Self> {
|
||||
pub fn new(aws_config: &S3Config, pageserver_workdir: &'static Path) -> anyhow::Result<Self> {
|
||||
let region = aws_config
|
||||
.bucket_region
|
||||
.parse::<Region>()
|
||||
@@ -49,19 +59,17 @@ impl RustS3 {
|
||||
credentials,
|
||||
)
|
||||
.context("Failed to create the s3 bucket")?,
|
||||
pageserver_workdir,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl RelishStorage for RustS3 {
|
||||
impl RelishStorage for S3 {
|
||||
type RelishStoragePath = S3ObjectKey;
|
||||
|
||||
fn derive_destination(
|
||||
page_server_workdir: &Path,
|
||||
relish_local_path: &Path,
|
||||
) -> anyhow::Result<Self::RelishStoragePath> {
|
||||
let relative_path = strip_workspace_prefix(page_server_workdir, relish_local_path)?;
|
||||
fn storage_path(&self, local_path: &Path) -> anyhow::Result<Self::RelishStoragePath> {
|
||||
let relative_path = strip_path_prefix(self.pageserver_workdir, local_path)?;
|
||||
let mut key = String::new();
|
||||
for segment in relative_path {
|
||||
key.push(S3_FILE_SEPARATOR);
|
||||
@@ -70,6 +78,21 @@ impl RelishStorage for RustS3 {
|
||||
Ok(S3ObjectKey(key))
|
||||
}
|
||||
|
||||
fn info(&self, storage_path: &Self::RelishStoragePath) -> anyhow::Result<RemoteRelishInfo> {
|
||||
let storage_path_key = &storage_path.0;
|
||||
let is_metadata =
|
||||
storage_path_key.ends_with(&format!("{}{}", S3_FILE_SEPARATOR, METADATA_FILE_NAME));
|
||||
let download_destination = storage_path.download_destination(self.pageserver_workdir);
|
||||
let (tenant_id, timeline_id) =
|
||||
parse_ids_from_path(storage_path_key.split(S3_FILE_SEPARATOR), storage_path_key)?;
|
||||
Ok(RemoteRelishInfo {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
download_destination,
|
||||
is_metadata,
|
||||
})
|
||||
}
|
||||
|
||||
async fn list_relishes(&self) -> anyhow::Result<Vec<Self::RelishStoragePath>> {
|
||||
let list_response = self
|
||||
.bucket
|
||||
@@ -101,11 +124,11 @@ impl RelishStorage for RustS3 {
|
||||
))
|
||||
} else {
|
||||
tokio::task::spawn_blocking(move || {
|
||||
to.flush().context("Failed to fluch the downoad buffer")?;
|
||||
to.flush().context("Failed to flush the download buffer")?;
|
||||
Ok::<_, anyhow::Error>(to)
|
||||
})
|
||||
.await
|
||||
.context("Failed to joim the download buffer flush task")?
|
||||
.context("Failed to join the download buffer flush task")?
|
||||
}
|
||||
}
|
||||
|
||||
@@ -115,9 +138,9 @@ impl RelishStorage for RustS3 {
|
||||
.delete_object(path.key())
|
||||
.await
|
||||
.with_context(|| format!("Failed to delete s3 object with key {}", path.key()))?;
|
||||
if code != 200 {
|
||||
if code != 204 {
|
||||
Err(anyhow::format_err!(
|
||||
"Received non-200 exit code during deleting object with key '{}', code: {}",
|
||||
"Received non-204 exit code during deleting object with key '{}', code: {}",
|
||||
path.key(),
|
||||
code
|
||||
))
|
||||
@@ -147,3 +170,226 @@ impl RelishStorage for RustS3 {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
relish_storage::test_utils::{
|
||||
custom_tenant_id_path, custom_timeline_id_path, relative_timeline_path,
|
||||
},
|
||||
repository::repo_harness::{RepoHarness, TIMELINE_ID},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn download_destination() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("download_destination")?;
|
||||
|
||||
let local_path = repo_harness.timeline_path(&TIMELINE_ID).join("test_name");
|
||||
let relative_path = local_path.strip_prefix(&repo_harness.conf.workdir)?;
|
||||
|
||||
let key = S3ObjectKey(format!(
|
||||
"{}{}",
|
||||
S3_FILE_SEPARATOR,
|
||||
relative_path
|
||||
.iter()
|
||||
.map(|segment| segment.to_str().unwrap())
|
||||
.collect::<Vec<_>>()
|
||||
.join(&S3_FILE_SEPARATOR.to_string()),
|
||||
));
|
||||
|
||||
assert_eq!(
|
||||
local_path,
|
||||
key.download_destination(&repo_harness.conf.workdir),
|
||||
"Download destination should consist of s3 path joined with the pageserver workdir prefix"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn storage_path_positive() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("storage_path_positive")?;
|
||||
|
||||
let segment_1 = "matching";
|
||||
let segment_2 = "relish";
|
||||
let local_path = &repo_harness.conf.workdir.join(segment_1).join(segment_2);
|
||||
let expected_key = S3ObjectKey(format!(
|
||||
"{SEPARATOR}{}{SEPARATOR}{}",
|
||||
segment_1,
|
||||
segment_2,
|
||||
SEPARATOR = S3_FILE_SEPARATOR,
|
||||
));
|
||||
|
||||
let actual_key = dummy_storage(&repo_harness.conf.workdir)
|
||||
.storage_path(local_path)
|
||||
.expect("Matching path should map to S3 path normally");
|
||||
assert_eq!(
|
||||
expected_key,
|
||||
actual_key,
|
||||
"S3 key from the matching path should contain all segments after the workspace prefix, separated with S3 separator"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn storage_path_negatives() -> anyhow::Result<()> {
|
||||
#[track_caller]
|
||||
fn storage_path_error(storage: &S3, mismatching_path: &Path) -> String {
|
||||
match storage.storage_path(mismatching_path) {
|
||||
Ok(wrong_key) => panic!(
|
||||
"Expected path '{}' to error, but got S3 key: {:?}",
|
||||
mismatching_path.display(),
|
||||
wrong_key,
|
||||
),
|
||||
Err(e) => e.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
let repo_harness = RepoHarness::create("storage_path_negatives")?;
|
||||
let storage = dummy_storage(&repo_harness.conf.workdir);
|
||||
|
||||
let error_message = storage_path_error(&storage, &repo_harness.conf.workdir);
|
||||
assert!(
|
||||
error_message.contains("Prefix and the path are equal"),
|
||||
"Message '{}' does not contain the required string",
|
||||
error_message
|
||||
);
|
||||
|
||||
let mismatching_path = PathBuf::from("somewhere").join("else");
|
||||
let error_message = storage_path_error(&storage, &mismatching_path);
|
||||
assert!(
|
||||
error_message.contains(mismatching_path.to_str().unwrap()),
|
||||
"Error should mention wrong path"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains(repo_harness.conf.workdir.to_str().unwrap()),
|
||||
"Error should mention server workdir"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains("is not prefixed with"),
|
||||
"Message '{}' does not contain a required string",
|
||||
error_message
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn info_positive() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("info_positive")?;
|
||||
let storage = dummy_storage(&repo_harness.conf.workdir);
|
||||
let relative_timeline_path = relative_timeline_path(&repo_harness)?;
|
||||
|
||||
let s3_key = create_s3_key(&relative_timeline_path.join("not a metadata"));
|
||||
assert_eq!(
|
||||
RemoteRelishInfo {
|
||||
tenant_id: repo_harness.tenant_id,
|
||||
timeline_id: TIMELINE_ID,
|
||||
download_destination: s3_key.download_destination(&repo_harness.conf.workdir),
|
||||
is_metadata: false,
|
||||
},
|
||||
storage
|
||||
.info(&s3_key)
|
||||
.expect("For a valid input, valid S3 info should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote delta relish"
|
||||
);
|
||||
|
||||
let s3_key = create_s3_key(&relative_timeline_path.join(METADATA_FILE_NAME));
|
||||
assert_eq!(
|
||||
RemoteRelishInfo {
|
||||
tenant_id: repo_harness.tenant_id,
|
||||
timeline_id: TIMELINE_ID,
|
||||
download_destination: s3_key.download_destination(&repo_harness.conf.workdir),
|
||||
is_metadata: true,
|
||||
},
|
||||
storage
|
||||
.info(&s3_key)
|
||||
.expect("For a valid input, valid S3 info should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote metadata file"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn info_negatives() -> anyhow::Result<()> {
|
||||
#[track_caller]
|
||||
fn storage_info_error(storage: &S3, s3_key: &S3ObjectKey) -> String {
|
||||
match storage.info(s3_key) {
|
||||
Ok(wrong_info) => panic!(
|
||||
"Expected key {:?} to error, but got relish info: {:?}",
|
||||
s3_key, wrong_info,
|
||||
),
|
||||
Err(e) => e.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
let repo_harness = RepoHarness::create("info_negatives")?;
|
||||
let storage = dummy_storage(&repo_harness.conf.workdir);
|
||||
let relative_timeline_path = relative_timeline_path(&repo_harness)?;
|
||||
|
||||
let totally_wrong_path = "wrong_wrong_wrong";
|
||||
let error_message =
|
||||
storage_info_error(&storage, &S3ObjectKey(totally_wrong_path.to_string()));
|
||||
assert!(error_message.contains(totally_wrong_path));
|
||||
|
||||
let wrong_tenant_id = create_s3_key(
|
||||
&custom_tenant_id_path(&relative_timeline_path, "wrong_tenant_id")?.join("name"),
|
||||
);
|
||||
let error_message = storage_info_error(&storage, &wrong_tenant_id);
|
||||
assert!(error_message.contains(&wrong_tenant_id.0));
|
||||
|
||||
let wrong_timeline_id = create_s3_key(
|
||||
&custom_timeline_id_path(&relative_timeline_path, "wrong_timeline_id")?.join("name"),
|
||||
);
|
||||
let error_message = storage_info_error(&storage, &wrong_timeline_id);
|
||||
assert!(error_message.contains(&wrong_timeline_id.0));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn download_destination_matches_original_path() -> anyhow::Result<()> {
|
||||
let repo_harness = RepoHarness::create("download_destination_matches_original_path")?;
|
||||
let original_path = repo_harness.timeline_path(&TIMELINE_ID).join("some name");
|
||||
|
||||
let dummy_storage = dummy_storage(&repo_harness.conf.workdir);
|
||||
|
||||
let key = dummy_storage.storage_path(&original_path)?;
|
||||
let download_destination = dummy_storage.info(&key)?.download_destination;
|
||||
|
||||
assert_eq!(
|
||||
original_path, download_destination,
|
||||
"'original path -> storage key -> matching fs path' transformation should produce the same path as the input one for the correct path"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dummy_storage(pageserver_workdir: &'static Path) -> S3 {
|
||||
S3 {
|
||||
pageserver_workdir,
|
||||
bucket: Bucket::new(
|
||||
"dummy-bucket",
|
||||
"us-east-1".parse().unwrap(),
|
||||
Credentials::anonymous().unwrap(),
|
||||
)
|
||||
.unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_s3_key(relative_relish_path: &Path) -> S3ObjectKey {
|
||||
S3ObjectKey(
|
||||
relative_relish_path
|
||||
.iter()
|
||||
.fold(String::new(), |mut path_string, segment| {
|
||||
path_string.push(S3_FILE_SEPARATOR);
|
||||
path_string.push_str(segment.to_str().unwrap());
|
||||
path_string
|
||||
}),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
1558
pageserver/src/relish_storage/storage_sync.rs
Normal file
1558
pageserver/src/relish_storage/storage_sync.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,57 +0,0 @@
|
||||
use std::time::Duration;
|
||||
use std::{collections::BinaryHeap, sync::Mutex, thread};
|
||||
|
||||
use crate::tenant_mgr;
|
||||
use crate::{relish_storage::RelishStorage, PageServerConf};
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref UPLOAD_QUEUE: Mutex<BinaryHeap<SyncTask>> = Mutex::new(BinaryHeap::new());
|
||||
}
|
||||
|
||||
pub fn schedule_timeline_upload(_local_timeline: ()) {
|
||||
// UPLOAD_QUEUE
|
||||
// .lock()
|
||||
// .unwrap()
|
||||
// .push(SyncTask::Upload(local_timeline))
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
enum SyncTask {}
|
||||
|
||||
pub fn run_storage_sync_thread<
|
||||
P: std::fmt::Debug,
|
||||
S: 'static + RelishStorage<RelishStoragePath = P>,
|
||||
>(
|
||||
config: &'static PageServerConf,
|
||||
relish_storage: S,
|
||||
max_concurrent_sync: usize,
|
||||
) -> anyhow::Result<Option<thread::JoinHandle<anyhow::Result<()>>>> {
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()?;
|
||||
|
||||
let handle = thread::Builder::new()
|
||||
.name("Queue based relish storage sync".to_string())
|
||||
.spawn(move || {
|
||||
while !tenant_mgr::shutdown_requested() {
|
||||
let mut queue_accessor = UPLOAD_QUEUE.lock().unwrap();
|
||||
log::debug!("Upload queue length: {}", queue_accessor.len());
|
||||
let next_task = queue_accessor.pop();
|
||||
drop(queue_accessor);
|
||||
match next_task {
|
||||
Some(task) => runtime.block_on(async {
|
||||
// suppress warnings
|
||||
let _ = (config, task, &relish_storage, max_concurrent_sync);
|
||||
todo!("omitted for brevity")
|
||||
}),
|
||||
None => {
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
log::debug!("Queue based relish storage sync thread shut down");
|
||||
Ok(())
|
||||
})?;
|
||||
Ok(Some(handle))
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
use crate::relish::*;
|
||||
use crate::CheckpointConfig;
|
||||
use anyhow::Result;
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -24,9 +25,9 @@ pub trait Repository: Send + Sync {
|
||||
/// Branch a timeline
|
||||
fn branch_timeline(&self, src: ZTimelineId, dst: ZTimelineId, start_lsn: Lsn) -> Result<()>;
|
||||
|
||||
/// perform one garbage collection iteration.
|
||||
/// garbage collection is periodically performed by gc thread,
|
||||
/// but it can be explicitly requested through page server api.
|
||||
/// perform one garbage collection iteration, removing old data files from disk.
|
||||
/// this funtion is periodically called by gc thread.
|
||||
/// also it can be explicitly requested through page server api 'do_gc' command.
|
||||
///
|
||||
/// 'timelineid' specifies the timeline to GC, or None for all.
|
||||
/// `horizon` specifies delta from last lsn to preserve all object versions (pitr interval).
|
||||
@@ -39,6 +40,10 @@ pub trait Repository: Send + Sync {
|
||||
horizon: u64,
|
||||
checkpoint_before_gc: bool,
|
||||
) -> Result<GcResult>;
|
||||
|
||||
/// perform one checkpoint iteration, flushing in-memory data on disk.
|
||||
/// this function is periodically called by checkponter thread.
|
||||
fn checkpoint_iteration(&self, cconf: CheckpointConfig) -> Result<()>;
|
||||
}
|
||||
|
||||
///
|
||||
@@ -119,6 +124,9 @@ pub trait Timeline: Send + Sync {
|
||||
/// Get a list of all existing non-relational objects
|
||||
fn list_nonrels(&self, lsn: Lsn) -> Result<HashSet<RelishTag>>;
|
||||
|
||||
/// Get the LSN where this branch was created
|
||||
fn get_ancestor_lsn(&self) -> Lsn;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Public PUT functions, to update the repository with new page versions.
|
||||
//
|
||||
@@ -131,6 +139,7 @@ pub trait Timeline: Send + Sync {
|
||||
fn get_last_record_lsn(&self) -> Lsn;
|
||||
fn get_prev_record_lsn(&self) -> Lsn;
|
||||
fn get_start_lsn(&self) -> Lsn;
|
||||
fn get_disk_consistent_lsn(&self) -> Lsn;
|
||||
|
||||
/// Mutate the timeline with a [`TimelineWriter`].
|
||||
fn writer<'a>(&'a self) -> Box<dyn TimelineWriter + 'a>;
|
||||
@@ -140,7 +149,7 @@ pub trait Timeline: Send + Sync {
|
||||
///
|
||||
/// NOTE: This has nothing to do with checkpoint in PostgreSQL. We don't
|
||||
/// know anything about them here in the repository.
|
||||
fn checkpoint(&self) -> Result<()>;
|
||||
fn checkpoint(&self, cconf: CheckpointConfig) -> Result<()>;
|
||||
|
||||
/// Retrieve current logical size of the timeline
|
||||
///
|
||||
@@ -210,27 +219,115 @@ impl WALRecord {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod repo_harness {
|
||||
use std::{fs, path::PathBuf};
|
||||
|
||||
use crate::{
|
||||
layered_repository::{LayeredRepository, TIMELINES_SEGMENT_NAME},
|
||||
walredo::{WalRedoError, WalRedoManager},
|
||||
PageServerConf,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
use hex_literal::hex;
|
||||
use zenith_utils::zid::ZTenantId;
|
||||
|
||||
pub const TIMELINE_ID: ZTimelineId =
|
||||
ZTimelineId::from_array(hex!("11223344556677881122334455667788"));
|
||||
pub const NEW_TIMELINE_ID: ZTimelineId =
|
||||
ZTimelineId::from_array(hex!("AA223344556677881122334455667788"));
|
||||
|
||||
/// Convenience function to create a page image with given string as the only content
|
||||
#[allow(non_snake_case)]
|
||||
pub fn TEST_IMG(s: &str) -> Bytes {
|
||||
let mut buf = BytesMut::new();
|
||||
buf.extend_from_slice(s.as_bytes());
|
||||
buf.resize(8192, 0);
|
||||
|
||||
buf.freeze()
|
||||
}
|
||||
|
||||
pub struct RepoHarness {
|
||||
pub conf: &'static PageServerConf,
|
||||
pub tenant_id: ZTenantId,
|
||||
}
|
||||
|
||||
impl RepoHarness {
|
||||
pub fn create(test_name: &'static str) -> Result<Self> {
|
||||
let repo_dir = PageServerConf::test_repo_dir(test_name);
|
||||
let _ = fs::remove_dir_all(&repo_dir);
|
||||
fs::create_dir_all(&repo_dir)?;
|
||||
fs::create_dir_all(&repo_dir.join(TIMELINES_SEGMENT_NAME))?;
|
||||
|
||||
let conf = PageServerConf::dummy_conf(repo_dir);
|
||||
// Make a static copy of the config. This can never be free'd, but that's
|
||||
// OK in a test.
|
||||
let conf: &'static PageServerConf = Box::leak(Box::new(conf));
|
||||
|
||||
let tenant_id = ZTenantId::generate();
|
||||
fs::create_dir_all(conf.tenant_path(&tenant_id))?;
|
||||
|
||||
Ok(Self { conf, tenant_id })
|
||||
}
|
||||
|
||||
pub fn load(&self) -> Box<dyn Repository> {
|
||||
let walredo_mgr = Arc::new(TestRedoManager);
|
||||
|
||||
Box::new(LayeredRepository::new(
|
||||
self.conf,
|
||||
walredo_mgr,
|
||||
self.tenant_id,
|
||||
false,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn timeline_path(&self, timeline_id: &ZTimelineId) -> PathBuf {
|
||||
self.conf.timeline_path(timeline_id, &self.tenant_id)
|
||||
}
|
||||
}
|
||||
|
||||
// Mock WAL redo manager that doesn't do much
|
||||
struct TestRedoManager;
|
||||
|
||||
impl WalRedoManager for TestRedoManager {
|
||||
fn request_redo(
|
||||
&self,
|
||||
rel: RelishTag,
|
||||
blknum: u32,
|
||||
lsn: Lsn,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, WALRecord)>,
|
||||
) -> Result<Bytes, WalRedoError> {
|
||||
let s = format!(
|
||||
"redo for {} blk {} to get to {}, with {} and {} records",
|
||||
rel,
|
||||
blknum,
|
||||
lsn,
|
||||
if base_img.is_some() {
|
||||
"base image"
|
||||
} else {
|
||||
"no base image"
|
||||
},
|
||||
records.len()
|
||||
);
|
||||
println!("{}", s);
|
||||
Ok(TEST_IMG(&s))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Tests that should work the same with any Repository/Timeline implementation.
|
||||
///
|
||||
#[allow(clippy::bool_assert_comparison)]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::layered_repository::{LayeredRepository, METADATA_FILE_NAME};
|
||||
use crate::walredo::{WalRedoError, WalRedoManager};
|
||||
use crate::PageServerConf;
|
||||
use hex_literal::hex;
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::xlog_utils::SIZEOF_CHECKPOINT;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use zenith_utils::zid::ZTenantId;
|
||||
use crate::layered_repository::metadata::METADATA_FILE_NAME;
|
||||
|
||||
const TIMELINE_ID: ZTimelineId =
|
||||
ZTimelineId::from_array(hex!("11223344556677881122334455667788"));
|
||||
const NEW_TIMELINE_ID: ZTimelineId =
|
||||
ZTimelineId::from_array(hex!("AA223344556677881122334455667788"));
|
||||
use super::repo_harness::*;
|
||||
use super::*;
|
||||
use postgres_ffi::{pg_constants, xlog_utils::SIZEOF_CHECKPOINT};
|
||||
|
||||
/// Arbitrary relation tag, for testing.
|
||||
const TESTREL_A: RelishTag = RelishTag::Relation(RelTag {
|
||||
@@ -246,16 +343,6 @@ mod tests {
|
||||
forknum: 0,
|
||||
});
|
||||
|
||||
/// Convenience function to create a page image with given string as the only content
|
||||
#[allow(non_snake_case)]
|
||||
fn TEST_IMG(s: &str) -> Bytes {
|
||||
let mut buf = BytesMut::new();
|
||||
buf.extend_from_slice(s.as_bytes());
|
||||
buf.resize(8192, 0);
|
||||
|
||||
buf.freeze()
|
||||
}
|
||||
|
||||
fn assert_current_logical_size(timeline: &Arc<dyn Timeline>, lsn: Lsn) {
|
||||
let incremental = timeline.get_current_logical_size();
|
||||
let non_incremental = timeline
|
||||
@@ -267,45 +354,6 @@ mod tests {
|
||||
static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]);
|
||||
static ZERO_CHECKPOINT: Bytes = Bytes::from_static(&[0u8; SIZEOF_CHECKPOINT]);
|
||||
|
||||
struct RepoHarness {
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: ZTenantId,
|
||||
}
|
||||
|
||||
impl RepoHarness {
|
||||
fn create(test_name: &'static str) -> Result<Self> {
|
||||
let repo_dir = PageServerConf::test_repo_dir(test_name);
|
||||
let _ = fs::remove_dir_all(&repo_dir);
|
||||
fs::create_dir_all(&repo_dir)?;
|
||||
fs::create_dir_all(&repo_dir.join("timelines"))?;
|
||||
|
||||
let conf = PageServerConf::dummy_conf(repo_dir);
|
||||
// Make a static copy of the config. This can never be free'd, but that's
|
||||
// OK in a test.
|
||||
let conf: &'static PageServerConf = Box::leak(Box::new(conf));
|
||||
|
||||
let tenant_id = ZTenantId::generate();
|
||||
fs::create_dir_all(conf.tenant_path(&tenant_id))?;
|
||||
|
||||
Ok(Self { conf, tenant_id })
|
||||
}
|
||||
|
||||
fn load(&self) -> Box<dyn Repository> {
|
||||
let walredo_mgr = Arc::new(TestRedoManager);
|
||||
|
||||
Box::new(LayeredRepository::new(
|
||||
self.conf,
|
||||
walredo_mgr,
|
||||
self.tenant_id,
|
||||
false,
|
||||
))
|
||||
}
|
||||
|
||||
fn timeline_path(&self, timeline_id: &ZTimelineId) -> PathBuf {
|
||||
self.conf.timeline_path(timeline_id, &self.tenant_id)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_relsize() -> Result<()> {
|
||||
let repo = RepoHarness::create("test_relsize")?.load();
|
||||
@@ -672,7 +720,7 @@ mod tests {
|
||||
.contains(&TESTREL_A));
|
||||
|
||||
// Run checkpoint and garbage collection and check that it's still not visible
|
||||
newtline.checkpoint()?;
|
||||
newtline.checkpoint(CheckpointConfig::Forced)?;
|
||||
repo.gc_iteration(Some(NEW_TIMELINE_ID), 0, true)?;
|
||||
|
||||
assert!(!newtline
|
||||
@@ -817,33 +865,4 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Mock WAL redo manager that doesn't do much
|
||||
struct TestRedoManager;
|
||||
|
||||
impl WalRedoManager for TestRedoManager {
|
||||
fn request_redo(
|
||||
&self,
|
||||
rel: RelishTag,
|
||||
blknum: u32,
|
||||
lsn: Lsn,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, WALRecord)>,
|
||||
) -> Result<Bytes, WalRedoError> {
|
||||
let s = format!(
|
||||
"redo for {} blk {} to get to {}, with {} and {} records",
|
||||
rel,
|
||||
blknum,
|
||||
lsn,
|
||||
if base_img.is_some() {
|
||||
"base image"
|
||||
} else {
|
||||
"no base image"
|
||||
},
|
||||
records.len()
|
||||
);
|
||||
println!("{}", s);
|
||||
Ok(TEST_IMG(&s))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,10 +7,10 @@ use postgres_ffi::nonrelfile_utils::slru_may_delete_clogsegment;
|
||||
use std::cmp::min;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
use std::path::Path;
|
||||
use std::io::{Read, Seek, SeekFrom};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
use bytes::{Buf, Bytes};
|
||||
use tracing::*;
|
||||
|
||||
@@ -37,6 +37,8 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
writer: &dyn TimelineWriter,
|
||||
lsn: Lsn,
|
||||
) -> Result<()> {
|
||||
let mut pg_control: Option<ControlFileData> = None;
|
||||
|
||||
// Scan 'global'
|
||||
for direntry in fs::read_dir(path.join("global"))? {
|
||||
let direntry = direntry?;
|
||||
@@ -44,7 +46,7 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
None => continue,
|
||||
|
||||
Some("pg_control") => {
|
||||
import_control_file(writer, lsn, &direntry.path())?;
|
||||
pg_control = Some(import_control_file(writer, lsn, &direntry.path())?);
|
||||
}
|
||||
Some("pg_filenode.map") => import_nonrel_file(
|
||||
writer,
|
||||
@@ -127,6 +129,18 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
|
||||
writer.advance_last_record_lsn(lsn);
|
||||
|
||||
// Import WAL. This is needed even when starting from a shutdown checkpoint, because
|
||||
// this reads the checkpoint record itself, advancing the tip of the timeline to
|
||||
// *after* the checkpoint record. And crucially, it initializes the 'prev_lsn'
|
||||
let pg_control = pg_control.ok_or_else(|| anyhow!("pg_control file not found"))?;
|
||||
import_wal(
|
||||
&path.join("pg_wal"),
|
||||
writer,
|
||||
Lsn(pg_control.checkPointCopy.redo),
|
||||
lsn,
|
||||
&mut pg_control.checkPointCopy.clone(),
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -212,7 +226,11 @@ fn import_nonrel_file(
|
||||
///
|
||||
/// The control file is imported as is, but we also extract the checkpoint record
|
||||
/// from it and store it separated.
|
||||
fn import_control_file(timeline: &dyn TimelineWriter, lsn: Lsn, path: &Path) -> Result<()> {
|
||||
fn import_control_file(
|
||||
timeline: &dyn TimelineWriter,
|
||||
lsn: Lsn,
|
||||
path: &Path,
|
||||
) -> Result<ControlFileData> {
|
||||
let mut file = File::open(path)?;
|
||||
let mut buffer = Vec::new();
|
||||
// read the whole file
|
||||
@@ -233,7 +251,7 @@ fn import_control_file(timeline: &dyn TimelineWriter, lsn: Lsn, path: &Path) ->
|
||||
let checkpoint_bytes = pg_control.checkPointCopy.encode();
|
||||
timeline.put_page_image(RelishTag::Checkpoint, 0, lsn, checkpoint_bytes)?;
|
||||
|
||||
Ok(())
|
||||
Ok(pg_control)
|
||||
}
|
||||
|
||||
///
|
||||
@@ -285,6 +303,104 @@ fn import_slru_file(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Scan PostgreSQL WAL files in given directory and load all records between
|
||||
/// 'startpoint' and 'endpoint' into the repository.
|
||||
fn import_wal(
|
||||
walpath: &Path,
|
||||
timeline: &dyn TimelineWriter,
|
||||
startpoint: Lsn,
|
||||
endpoint: Lsn,
|
||||
checkpoint: &mut CheckPoint,
|
||||
) -> Result<()> {
|
||||
let mut waldecoder = WalStreamDecoder::new(startpoint);
|
||||
|
||||
let mut segno = startpoint.segment_number(pg_constants::WAL_SEGMENT_SIZE);
|
||||
let mut offset = startpoint.segment_offset(pg_constants::WAL_SEGMENT_SIZE);
|
||||
let mut last_lsn = startpoint;
|
||||
|
||||
while last_lsn <= endpoint {
|
||||
// FIXME: assume postgresql tli 1 for now
|
||||
let filename = XLogFileName(1, segno, pg_constants::WAL_SEGMENT_SIZE);
|
||||
let mut buf = Vec::new();
|
||||
|
||||
// Read local file
|
||||
let mut path = walpath.join(&filename);
|
||||
|
||||
// It could be as .partial
|
||||
if !PathBuf::from(&path).exists() {
|
||||
path = walpath.join(filename + ".partial");
|
||||
}
|
||||
|
||||
// Slurp the WAL file
|
||||
let mut file = File::open(&path)?;
|
||||
|
||||
if offset > 0 {
|
||||
file.seek(SeekFrom::Start(offset as u64))?;
|
||||
}
|
||||
|
||||
let nread = file.read_to_end(&mut buf)?;
|
||||
if nread != pg_constants::WAL_SEGMENT_SIZE - offset as usize {
|
||||
// Maybe allow this for .partial files?
|
||||
error!("read only {} bytes from WAL file", nread);
|
||||
}
|
||||
|
||||
waldecoder.feed_bytes(&buf);
|
||||
|
||||
let mut nrecords = 0;
|
||||
while last_lsn <= endpoint {
|
||||
if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
|
||||
let mut checkpoint_modified = false;
|
||||
|
||||
let decoded = decode_wal_record(recdata.clone());
|
||||
save_decoded_record(
|
||||
checkpoint,
|
||||
&mut checkpoint_modified,
|
||||
timeline,
|
||||
&decoded,
|
||||
recdata,
|
||||
lsn,
|
||||
)?;
|
||||
last_lsn = lsn;
|
||||
|
||||
if checkpoint_modified {
|
||||
let checkpoint_bytes = checkpoint.encode();
|
||||
timeline.put_page_image(
|
||||
RelishTag::Checkpoint,
|
||||
0,
|
||||
last_lsn,
|
||||
checkpoint_bytes,
|
||||
)?;
|
||||
}
|
||||
|
||||
// Now that this record has been fully handled, including updating the
|
||||
// checkpoint data, let the repository know that it is up-to-date to this LSN
|
||||
timeline.advance_last_record_lsn(last_lsn);
|
||||
nrecords += 1;
|
||||
|
||||
trace!("imported record at {} (end {})", lsn, endpoint);
|
||||
}
|
||||
}
|
||||
|
||||
debug!("imported {} records up to {}", nrecords, last_lsn);
|
||||
|
||||
segno += 1;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
if last_lsn != startpoint {
|
||||
debug!(
|
||||
"reached end of WAL at {}, updating checkpoint info",
|
||||
last_lsn
|
||||
);
|
||||
|
||||
timeline.advance_last_record_lsn(last_lsn);
|
||||
} else {
|
||||
info!("no WAL to import at {}", last_lsn);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Helper function to parse a WAL record and call the Timeline's PUT functions for all the
|
||||
/// relations/pages that the record affects.
|
||||
|
||||
@@ -4,54 +4,89 @@
|
||||
use crate::branches;
|
||||
use crate::layered_repository::LayeredRepository;
|
||||
use crate::repository::{Repository, Timeline};
|
||||
use crate::tenant_threads;
|
||||
use crate::walredo::PostgresRedoManager;
|
||||
use crate::PageServerConf;
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use log::{debug, info};
|
||||
use std::collections::hash_map::Entry;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::fs;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Mutex, MutexGuard};
|
||||
use std::thread::JoinHandle;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
lazy_static! {
|
||||
static ref REPOSITORY: Mutex<HashMap<ZTenantId, Arc<dyn Repository>>> =
|
||||
Mutex::new(HashMap::new());
|
||||
static ref TENANTS: Mutex<HashMap<ZTenantId, Tenant>> = Mutex::new(HashMap::new());
|
||||
}
|
||||
|
||||
fn access_repository() -> MutexGuard<'static, HashMap<ZTenantId, Arc<dyn Repository>>> {
|
||||
REPOSITORY.lock().unwrap()
|
||||
}
|
||||
struct TenantHandleEntry {
|
||||
checkpointer_handle: Option<JoinHandle<()>>,
|
||||
gc_handle: Option<JoinHandle<()>>,
|
||||
struct Tenant {
|
||||
state: TenantState,
|
||||
repo: Option<Arc<dyn Repository>>,
|
||||
}
|
||||
|
||||
// Logically these handles belong to Repository,
|
||||
// but it's just simpler to store them separately
|
||||
lazy_static! {
|
||||
static ref TENANT_HANDLES: Mutex<HashMap<ZTenantId, TenantHandleEntry>> =
|
||||
Mutex::new(HashMap::new());
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TenantState {
|
||||
// This tenant only exists in cloud storage. It cannot be accessed.
|
||||
CloudOnly,
|
||||
// This tenant exists in cloud storage, and we are currently downloading it to local disk.
|
||||
// It cannot be accessed yet, not until it's been fully downloaded to local disk.
|
||||
Downloading,
|
||||
// All data for this tenant is complete on local disk, but we haven't loaded the Repository,
|
||||
// Timeline and Layer structs into memory yet, so it cannot be accessed yet.
|
||||
//Ready,
|
||||
// This tenant exists on local disk, and the layer map has been loaded into memory.
|
||||
// The local disk might have some newer files that don't exist in cloud storage yet.
|
||||
Active,
|
||||
// Tenant is active, but there is no walreceiver connection.
|
||||
Idle,
|
||||
// This tenant exists on local disk, and the layer map has been loaded into memory.
|
||||
// The local disk might have some newer files that don't exist in cloud storage yet.
|
||||
// The tenant cannot be accessed anymore for any reason, but graceful shutdown.
|
||||
Stopping,
|
||||
}
|
||||
|
||||
impl fmt::Display for TenantState {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
TenantState::CloudOnly => f.write_str("CloudOnly"),
|
||||
TenantState::Downloading => f.write_str("Downloading"),
|
||||
TenantState::Active => f.write_str("Active"),
|
||||
TenantState::Idle => f.write_str("Idle"),
|
||||
TenantState::Stopping => f.write_str("Stopping"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn access_tenants() -> MutexGuard<'static, HashMap<ZTenantId, Tenant>> {
|
||||
TENANTS.lock().unwrap()
|
||||
}
|
||||
|
||||
static SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
pub fn init(conf: &'static PageServerConf) {
|
||||
let mut m = access_repository();
|
||||
for dir_entry in fs::read_dir(conf.tenants_path()).unwrap() {
|
||||
let tenantid =
|
||||
ZTenantId::from_str(dir_entry.unwrap().file_name().to_str().unwrap()).unwrap();
|
||||
let repo = init_repo(conf, tenantid);
|
||||
|
||||
{
|
||||
let mut m = access_tenants();
|
||||
let tenant = Tenant {
|
||||
state: TenantState::CloudOnly,
|
||||
repo: None,
|
||||
};
|
||||
m.insert(tenantid, tenant);
|
||||
}
|
||||
|
||||
init_repo(conf, tenantid);
|
||||
info!("initialized storage for tenant: {}", &tenantid);
|
||||
m.insert(tenantid, repo);
|
||||
}
|
||||
}
|
||||
|
||||
fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) -> Arc<LayeredRepository> {
|
||||
fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) {
|
||||
// Set up a WAL redo manager, for applying WAL records.
|
||||
let walredo_mgr = PostgresRedoManager::new(conf, tenant_id);
|
||||
|
||||
@@ -63,22 +98,15 @@ fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) -> Arc<Layered
|
||||
true,
|
||||
));
|
||||
|
||||
let checkpointer_handle = LayeredRepository::launch_checkpointer_thread(conf, repo.clone());
|
||||
let gc_handle = LayeredRepository::launch_gc_thread(conf, repo.clone());
|
||||
let mut m = access_tenants();
|
||||
let tenant = m.get_mut(&tenant_id).unwrap();
|
||||
tenant.repo = Some(repo);
|
||||
tenant.state = TenantState::Active;
|
||||
|
||||
let mut handles = TENANT_HANDLES.lock().unwrap();
|
||||
let h = TenantHandleEntry {
|
||||
checkpointer_handle: Some(checkpointer_handle),
|
||||
gc_handle: Some(gc_handle),
|
||||
};
|
||||
|
||||
handles.insert(tenant_id, h);
|
||||
|
||||
repo
|
||||
// TODO Start these threads only if tenant actively receives some WAL
|
||||
tenant_threads::start_tenant_threads(conf, tenant_id);
|
||||
}
|
||||
|
||||
// TODO kb Currently unused function, will later be used when the relish storage downloads a new layer.
|
||||
// Relevant PR: https://github.com/zenithdb/zenith/pull/686
|
||||
pub fn register_relish_download(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: ZTenantId,
|
||||
@@ -89,15 +117,29 @@ pub fn register_relish_download(
|
||||
tenant_id,
|
||||
timeline_id
|
||||
);
|
||||
match access_repository().entry(tenant_id) {
|
||||
Entry::Occupied(o) => init_timeline(o.get().as_ref(), timeline_id),
|
||||
Entry::Vacant(v) => {
|
||||
log::info!("New repo initialized");
|
||||
let new_repo = init_repo(conf, tenant_id);
|
||||
init_timeline(new_repo.as_ref(), timeline_id);
|
||||
v.insert(new_repo);
|
||||
|
||||
{
|
||||
let mut m = access_tenants();
|
||||
let tenant = m.entry(tenant_id).or_insert_with(|| Tenant {
|
||||
state: TenantState::Downloading,
|
||||
repo: None,
|
||||
});
|
||||
tenant.state = TenantState::Downloading;
|
||||
match &tenant.repo {
|
||||
Some(repo) => {
|
||||
init_timeline(repo.as_ref(), timeline_id);
|
||||
tenant.state = TenantState::Active;
|
||||
return;
|
||||
}
|
||||
None => log::warn!("Initialize new repo"),
|
||||
}
|
||||
tenant.state = TenantState::Active;
|
||||
}
|
||||
|
||||
// init repo updates Tenant state
|
||||
init_repo(conf, tenant_id);
|
||||
let new_repo = get_repository_for_tenant(tenant_id).unwrap();
|
||||
init_timeline(new_repo.as_ref(), timeline_id);
|
||||
}
|
||||
|
||||
fn init_timeline(repo: &dyn Repository, timeline_id: ZTimelineId) {
|
||||
@@ -112,27 +154,23 @@ pub fn shutdown_requested() -> bool {
|
||||
SHUTDOWN_REQUESTED.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn stop_tenant_threads(tenantid: ZTenantId) {
|
||||
let mut handles = TENANT_HANDLES.lock().unwrap();
|
||||
if let Some(h) = handles.get_mut(&tenantid) {
|
||||
h.checkpointer_handle.take().map(JoinHandle::join);
|
||||
debug!("checkpointer for tenant {} has stopped", tenantid);
|
||||
h.gc_handle.take().map(JoinHandle::join);
|
||||
debug!("gc for tenant {} has stopped", tenantid);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn shutdown_all_tenants() -> Result<()> {
|
||||
SHUTDOWN_REQUESTED.swap(true, Ordering::Relaxed);
|
||||
|
||||
let tenants = list_tenants()?;
|
||||
for tenantid in tenants {
|
||||
stop_tenant_threads(tenantid);
|
||||
let tenantids = list_tenantids()?;
|
||||
|
||||
for tenantid in &tenantids {
|
||||
set_tenant_state(*tenantid, TenantState::Stopping)?;
|
||||
}
|
||||
|
||||
for tenantid in tenantids {
|
||||
// Wait for checkpointer and GC to finish their job
|
||||
tenant_threads::wait_for_tenant_threads_to_stop(tenantid);
|
||||
|
||||
let repo = get_repository_for_tenant(tenantid)?;
|
||||
debug!("shutdown tenant {}", tenantid);
|
||||
repo.shutdown()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -140,25 +178,67 @@ pub fn create_repository_for_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenantid: ZTenantId,
|
||||
) -> Result<()> {
|
||||
let mut m = access_repository();
|
||||
|
||||
// First check that the tenant doesn't exist already
|
||||
if m.get(&tenantid).is_some() {
|
||||
bail!("tenant {} already exists", tenantid);
|
||||
{
|
||||
let mut m = access_tenants();
|
||||
// First check that the tenant doesn't exist already
|
||||
if m.get(&tenantid).is_some() {
|
||||
bail!("tenant {} already exists", tenantid);
|
||||
}
|
||||
let tenant = Tenant {
|
||||
state: TenantState::CloudOnly,
|
||||
repo: None,
|
||||
};
|
||||
m.insert(tenantid, tenant);
|
||||
}
|
||||
|
||||
let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenantid));
|
||||
let repo = branches::create_repo(conf, tenantid, wal_redo_manager)?;
|
||||
|
||||
m.insert(tenantid, repo);
|
||||
let mut m = access_tenants();
|
||||
let tenant = m.get_mut(&tenantid).unwrap();
|
||||
tenant.repo = Some(repo);
|
||||
tenant.state = TenantState::Active;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// If tenant is not found in the repository, return CloudOnly state
|
||||
pub fn get_tenant_state(tenantid: ZTenantId) -> TenantState {
|
||||
let m = access_tenants();
|
||||
match m.get(&tenantid) {
|
||||
Some(tenant) => tenant.state,
|
||||
None => TenantState::CloudOnly,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_tenant_state(tenantid: ZTenantId, state: TenantState) -> Result<TenantState> {
|
||||
let mut m = access_tenants();
|
||||
let tenant = m.get_mut(&tenantid);
|
||||
|
||||
match tenant {
|
||||
Some(tenant) => {
|
||||
if state == TenantState::Idle && tenant.state != TenantState::Active {
|
||||
// Only Active tenant can become Idle
|
||||
return Ok(tenant.state);
|
||||
}
|
||||
info!("set_tenant_state: {} -> {}", tenant.state, state);
|
||||
tenant.state = state;
|
||||
Ok(tenant.state)
|
||||
}
|
||||
None => bail!("Tenant not found for tenant {}", tenantid),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_repository_for_tenant(tenantid: ZTenantId) -> Result<Arc<dyn Repository>> {
|
||||
access_repository()
|
||||
let m = access_tenants();
|
||||
let tenant = m
|
||||
.get(&tenantid)
|
||||
.map(Arc::clone)
|
||||
.ok_or_else(|| anyhow!("repository not found for tenant name {}", tenantid))
|
||||
.ok_or_else(|| anyhow!("Tenant not found for tenant {}", tenantid))?;
|
||||
|
||||
match &tenant.repo {
|
||||
Some(repo) => Ok(Arc::clone(repo)),
|
||||
None => anyhow::bail!("Repository for tenant {} is not yet valid", tenantid),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_timeline_for_tenant(
|
||||
@@ -170,13 +250,32 @@ pub fn get_timeline_for_tenant(
|
||||
.with_context(|| format!("cannot fetch timeline {}", timelineid))
|
||||
}
|
||||
|
||||
fn list_tenants() -> Result<Vec<ZTenantId>> {
|
||||
let o = &mut REPOSITORY.lock().unwrap();
|
||||
|
||||
o.iter()
|
||||
.map(|tenant| {
|
||||
let (tenantid, _) = tenant;
|
||||
fn list_tenantids() -> Result<Vec<ZTenantId>> {
|
||||
let m = access_tenants();
|
||||
m.iter()
|
||||
.map(|v| {
|
||||
let (tenantid, _) = v;
|
||||
Ok(*tenantid)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TenantInfo {
|
||||
#[serde(with = "hex")]
|
||||
pub id: ZTenantId,
|
||||
pub state: TenantState,
|
||||
}
|
||||
|
||||
pub fn list_tenants() -> Result<Vec<TenantInfo>> {
|
||||
let m = access_tenants();
|
||||
m.iter()
|
||||
.map(|v| {
|
||||
let (id, tenant) = v;
|
||||
Ok(TenantInfo {
|
||||
id: *id,
|
||||
state: tenant.state,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
125
pageserver/src/tenant_threads.rs
Normal file
125
pageserver/src/tenant_threads.rs
Normal file
@@ -0,0 +1,125 @@
|
||||
//! This module contains functions to serve per-tenant background processes,
|
||||
//! such as checkpointer and GC
|
||||
use crate::tenant_mgr;
|
||||
use crate::tenant_mgr::TenantState;
|
||||
use crate::CheckpointConfig;
|
||||
use crate::PageServerConf;
|
||||
use anyhow::Result;
|
||||
use lazy_static::lazy_static;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Mutex;
|
||||
use std::thread::JoinHandle;
|
||||
use std::time::Duration;
|
||||
use tracing::*;
|
||||
use zenith_utils::zid::ZTenantId;
|
||||
|
||||
struct TenantHandleEntry {
|
||||
checkpointer_handle: Option<JoinHandle<()>>,
|
||||
gc_handle: Option<JoinHandle<()>>,
|
||||
}
|
||||
|
||||
// Preserve handles to wait for thread completion
|
||||
// at shutdown
|
||||
lazy_static! {
|
||||
static ref TENANT_HANDLES: Mutex<HashMap<ZTenantId, TenantHandleEntry>> =
|
||||
Mutex::new(HashMap::new());
|
||||
}
|
||||
|
||||
pub fn start_tenant_threads(conf: &'static PageServerConf, tenantid: ZTenantId) {
|
||||
//ensure that old threads are stopeed
|
||||
wait_for_tenant_threads_to_stop(tenantid);
|
||||
|
||||
let checkpointer_handle = std::thread::Builder::new()
|
||||
.name("Checkpointer thread".into())
|
||||
.spawn(move || {
|
||||
checkpoint_loop(tenantid, conf).expect("Checkpointer thread died");
|
||||
})
|
||||
.ok();
|
||||
|
||||
let gc_handle = std::thread::Builder::new()
|
||||
.name("GC thread".into())
|
||||
.spawn(move || {
|
||||
gc_loop(tenantid, conf).expect("GC thread died");
|
||||
})
|
||||
.ok();
|
||||
|
||||
// TODO handle thread errors if any
|
||||
|
||||
let mut handles = TENANT_HANDLES.lock().unwrap();
|
||||
let h = TenantHandleEntry {
|
||||
checkpointer_handle,
|
||||
gc_handle,
|
||||
};
|
||||
|
||||
handles.insert(tenantid, h);
|
||||
}
|
||||
|
||||
pub fn wait_for_tenant_threads_to_stop(tenantid: ZTenantId) {
|
||||
let mut handles = TENANT_HANDLES.lock().unwrap();
|
||||
if let Some(h) = handles.get_mut(&tenantid) {
|
||||
h.checkpointer_handle.take().map(JoinHandle::join);
|
||||
trace!("checkpointer for tenant {} has stopped", tenantid);
|
||||
h.gc_handle.take().map(JoinHandle::join);
|
||||
trace!("gc for tenant {} has stopped", tenantid);
|
||||
}
|
||||
handles.remove(&tenantid);
|
||||
}
|
||||
|
||||
///
|
||||
/// Checkpointer thread's main loop
|
||||
///
|
||||
fn checkpoint_loop(tenantid: ZTenantId, conf: &'static PageServerConf) -> Result<()> {
|
||||
loop {
|
||||
if tenant_mgr::get_tenant_state(tenantid) != TenantState::Active {
|
||||
break;
|
||||
}
|
||||
|
||||
std::thread::sleep(conf.checkpoint_period);
|
||||
trace!("checkpointer thread for tenant {} waking up", tenantid);
|
||||
|
||||
// checkpoint timelines that have accumulated more than CHECKPOINT_DISTANCE
|
||||
// bytes of WAL since last checkpoint.
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
|
||||
repo.checkpoint_iteration(CheckpointConfig::Distance(conf.checkpoint_distance))?;
|
||||
}
|
||||
|
||||
trace!(
|
||||
"checkpointer thread stopped for tenant {} state is {}",
|
||||
tenantid,
|
||||
tenant_mgr::get_tenant_state(tenantid)
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// GC thread's main loop
|
||||
///
|
||||
fn gc_loop(tenantid: ZTenantId, conf: &'static PageServerConf) -> Result<()> {
|
||||
loop {
|
||||
if tenant_mgr::get_tenant_state(tenantid) != TenantState::Active {
|
||||
break;
|
||||
}
|
||||
|
||||
trace!("gc thread for tenant {} waking up", tenantid);
|
||||
|
||||
// Garbage collect old files that are not needed for PITR anymore
|
||||
if conf.gc_horizon > 0 {
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
|
||||
repo.gc_iteration(None, conf.gc_horizon, false).unwrap();
|
||||
}
|
||||
|
||||
// TODO Write it in more adequate way using
|
||||
// condvar.wait_timeout() or something
|
||||
let mut sleep_time = conf.gc_period.as_secs();
|
||||
while sleep_time > 0 && tenant_mgr::get_tenant_state(tenantid) == TenantState::Active {
|
||||
sleep_time -= 1;
|
||||
std::thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
trace!(
|
||||
"GC thread stopped for tenant {} state is {}",
|
||||
tenantid,
|
||||
tenant_mgr::get_tenant_state(tenantid)
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
@@ -284,12 +284,14 @@ fn walreceiver_main(
|
||||
if let Some(last_lsn) = status_update {
|
||||
// TODO: More thought should go into what values are sent here.
|
||||
let last_lsn = PgLsn::from(u64::from(last_lsn));
|
||||
let write_lsn = last_lsn;
|
||||
// We are using disk consistent LSN as `write_lsn`, i.e. LSN at which page server
|
||||
// may guarantee persistence of all received data. Safekeeper is not free to remove
|
||||
// WAL preceding `write_lsn`: it should not be requested by this page server.
|
||||
let write_lsn = PgLsn::from(u64::from(timeline.get_disk_consistent_lsn()));
|
||||
let flush_lsn = last_lsn;
|
||||
let apply_lsn = PgLsn::from(0);
|
||||
let ts = SystemTime::now();
|
||||
const NO_REPLY: u8 = 0;
|
||||
|
||||
physical_stream.standby_status_update(write_lsn, flush_lsn, apply_lsn, ts, NO_REPLY)?;
|
||||
}
|
||||
|
||||
|
||||
@@ -153,6 +153,13 @@ struct WalRedoRequest {
|
||||
records: Vec<(Lsn, WALRecord)>,
|
||||
}
|
||||
|
||||
impl WalRedoRequest {
|
||||
// Can this request be served by zenith redo funcitons
|
||||
// or we need to pass it to wal-redo postgres process?
|
||||
fn can_apply_in_zenith(&self) -> bool {
|
||||
!matches!(self.rel, RelishTag::Relation(_))
|
||||
}
|
||||
}
|
||||
/// An error happened in WAL redo
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum WalRedoError {
|
||||
@@ -161,6 +168,8 @@ pub enum WalRedoError {
|
||||
|
||||
#[error("cannot perform WAL redo now")]
|
||||
InvalidState,
|
||||
#[error("cannot perform WAL redo for this request")]
|
||||
InvalidRequest,
|
||||
}
|
||||
|
||||
///
|
||||
@@ -182,7 +191,6 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
records: Vec<(Lsn, WALRecord)>,
|
||||
) -> Result<Bytes, WalRedoError> {
|
||||
let start_time;
|
||||
let lock_time;
|
||||
let end_time;
|
||||
|
||||
let request = WalRedoRequest {
|
||||
@@ -194,9 +202,16 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
};
|
||||
|
||||
start_time = Instant::now();
|
||||
let result = {
|
||||
let result;
|
||||
|
||||
if request.can_apply_in_zenith() {
|
||||
result = self.handle_apply_request_zenith(&request);
|
||||
|
||||
end_time = Instant::now();
|
||||
WAL_REDO_TIME.observe(end_time.duration_since(start_time).as_secs_f64());
|
||||
} else {
|
||||
let mut process_guard = self.process.lock().unwrap();
|
||||
lock_time = Instant::now();
|
||||
let lock_time = Instant::now();
|
||||
|
||||
// launch the WAL redo process on first use
|
||||
if process_guard.is_none() {
|
||||
@@ -207,13 +222,14 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
}
|
||||
let process = process_guard.as_mut().unwrap();
|
||||
|
||||
self.runtime
|
||||
.block_on(self.handle_apply_request(process, &request))
|
||||
};
|
||||
end_time = Instant::now();
|
||||
result = self
|
||||
.runtime
|
||||
.block_on(self.handle_apply_request_postgres(process, &request));
|
||||
|
||||
WAL_REDO_WAIT_TIME.observe(lock_time.duration_since(start_time).as_secs_f64());
|
||||
WAL_REDO_TIME.observe(end_time.duration_since(lock_time).as_secs_f64());
|
||||
WAL_REDO_WAIT_TIME.observe(lock_time.duration_since(start_time).as_secs_f64());
|
||||
end_time = Instant::now();
|
||||
WAL_REDO_TIME.observe(end_time.duration_since(lock_time).as_secs_f64());
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
@@ -242,13 +258,47 @@ impl PostgresRedoManager {
|
||||
}
|
||||
|
||||
///
|
||||
/// Process one request for WAL redo.
|
||||
/// Process one request for WAL redo using wal-redo postgres
|
||||
///
|
||||
async fn handle_apply_request(
|
||||
async fn handle_apply_request_postgres(
|
||||
&self,
|
||||
process: &mut PostgresRedoProcess,
|
||||
request: &WalRedoRequest,
|
||||
) -> Result<Bytes, WalRedoError> {
|
||||
let blknum = request.blknum;
|
||||
let lsn = request.lsn;
|
||||
let base_img = request.base_img.clone();
|
||||
let records = &request.records;
|
||||
let nrecords = records.len();
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
let apply_result: Result<Bytes, Error>;
|
||||
|
||||
if let RelishTag::Relation(rel) = request.rel {
|
||||
// Relational WAL records are applied using wal-redo-postgres
|
||||
let buf_tag = BufferTag { rel, blknum };
|
||||
apply_result = process.apply_wal_records(buf_tag, base_img, records).await;
|
||||
|
||||
let duration = start.elapsed();
|
||||
|
||||
debug!(
|
||||
"postgres applied {} WAL records in {} ms to reconstruct page image at LSN {}",
|
||||
nrecords,
|
||||
duration.as_millis(),
|
||||
lsn
|
||||
);
|
||||
|
||||
apply_result.map_err(WalRedoError::IoError)
|
||||
} else {
|
||||
Err(WalRedoError::InvalidRequest)
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Process one request for WAL redo using custom zenith code
|
||||
///
|
||||
fn handle_apply_request_zenith(&self, request: &WalRedoRequest) -> Result<Bytes, WalRedoError> {
|
||||
let rel = request.rel;
|
||||
let blknum = request.blknum;
|
||||
let lsn = request.lsn;
|
||||
@@ -260,178 +310,158 @@ impl PostgresRedoManager {
|
||||
let start = Instant::now();
|
||||
|
||||
let apply_result: Result<Bytes, Error>;
|
||||
if let RelishTag::Relation(rel) = rel {
|
||||
// Relational WAL records are applied using wal-redo-postgres
|
||||
let buf_tag = BufferTag { rel, blknum };
|
||||
apply_result = process.apply_wal_records(buf_tag, base_img, records).await;
|
||||
|
||||
// Non-relational WAL records are handled here, with custom code that has the
|
||||
// same effects as the corresponding Postgres WAL redo function.
|
||||
const ZERO_PAGE: [u8; 8192] = [0u8; 8192];
|
||||
let mut page = BytesMut::new();
|
||||
if let Some(fpi) = base_img {
|
||||
// If full-page image is provided, then use it...
|
||||
page.extend_from_slice(&fpi[..]);
|
||||
} else {
|
||||
// Non-relational WAL records are handled here, with custom code that has the
|
||||
// same effects as the corresponding Postgres WAL redo function.
|
||||
const ZERO_PAGE: [u8; 8192] = [0u8; 8192];
|
||||
let mut page = BytesMut::new();
|
||||
if let Some(fpi) = base_img {
|
||||
// If full-page image is provided, then use it...
|
||||
page.extend_from_slice(&fpi[..]);
|
||||
} else {
|
||||
// otherwise initialize page with zeros
|
||||
page.extend_from_slice(&ZERO_PAGE);
|
||||
// otherwise initialize page with zeros
|
||||
page.extend_from_slice(&ZERO_PAGE);
|
||||
}
|
||||
// Apply all collected WAL records
|
||||
for (_lsn, record) in records {
|
||||
let mut buf = record.rec.clone();
|
||||
|
||||
WAL_REDO_RECORD_COUNTER.inc();
|
||||
|
||||
// 1. Parse XLogRecord struct
|
||||
// FIXME: refactor to avoid code duplication.
|
||||
let xlogrec = XLogRecord::from_bytes(&mut buf);
|
||||
|
||||
//move to main data
|
||||
// TODO probably, we should store some records in our special format
|
||||
// to avoid this weird parsing on replay
|
||||
let skip = (record.main_data_offset - pg_constants::SIZEOF_XLOGRECORD) as usize;
|
||||
if buf.remaining() > skip {
|
||||
buf.advance(skip);
|
||||
}
|
||||
// Apply all collected WAL records
|
||||
for (_lsn, record) in records {
|
||||
let mut buf = record.rec.clone();
|
||||
|
||||
WAL_REDO_RECORD_COUNTER.inc();
|
||||
|
||||
// 1. Parse XLogRecord struct
|
||||
// FIXME: refactor to avoid code duplication.
|
||||
let xlogrec = XLogRecord::from_bytes(&mut buf);
|
||||
|
||||
//move to main data
|
||||
// TODO probably, we should store some records in our special format
|
||||
// to avoid this weird parsing on replay
|
||||
let skip = (record.main_data_offset - pg_constants::SIZEOF_XLOGRECORD) as usize;
|
||||
if buf.remaining() > skip {
|
||||
buf.advance(skip);
|
||||
}
|
||||
|
||||
if xlogrec.xl_rmid == pg_constants::RM_XACT_ID {
|
||||
// Transaction manager stuff
|
||||
let rec_segno = match rel {
|
||||
RelishTag::Slru { slru, segno } => {
|
||||
assert!(
|
||||
slru == SlruKind::Clog,
|
||||
"Not valid XACT relish tag {:?}",
|
||||
rel
|
||||
if xlogrec.xl_rmid == pg_constants::RM_XACT_ID {
|
||||
// Transaction manager stuff
|
||||
let rec_segno = match rel {
|
||||
RelishTag::Slru { slru, segno } => {
|
||||
assert!(
|
||||
slru == SlruKind::Clog,
|
||||
"Not valid XACT relish tag {:?}",
|
||||
rel
|
||||
);
|
||||
segno
|
||||
}
|
||||
_ => panic!("Not valid XACT relish tag {:?}", rel),
|
||||
};
|
||||
let parsed_xact =
|
||||
XlXactParsedRecord::decode(&mut buf, xlogrec.xl_xid, xlogrec.xl_info);
|
||||
if parsed_xact.info == pg_constants::XLOG_XACT_COMMIT
|
||||
|| parsed_xact.info == pg_constants::XLOG_XACT_COMMIT_PREPARED
|
||||
{
|
||||
transaction_id_set_status(
|
||||
parsed_xact.xid,
|
||||
pg_constants::TRANSACTION_STATUS_COMMITTED,
|
||||
&mut page,
|
||||
);
|
||||
for subxact in &parsed_xact.subxacts {
|
||||
let pageno = *subxact as u32 / pg_constants::CLOG_XACTS_PER_PAGE;
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
// only update xids on the requested page
|
||||
if rec_segno == segno && blknum == rpageno {
|
||||
transaction_id_set_status(
|
||||
*subxact,
|
||||
pg_constants::TRANSACTION_STATUS_COMMITTED,
|
||||
&mut page,
|
||||
);
|
||||
segno
|
||||
}
|
||||
_ => panic!("Not valid XACT relish tag {:?}", rel),
|
||||
};
|
||||
let parsed_xact =
|
||||
XlXactParsedRecord::decode(&mut buf, xlogrec.xl_xid, xlogrec.xl_info);
|
||||
if parsed_xact.info == pg_constants::XLOG_XACT_COMMIT
|
||||
|| parsed_xact.info == pg_constants::XLOG_XACT_COMMIT_PREPARED
|
||||
{
|
||||
transaction_id_set_status(
|
||||
parsed_xact.xid,
|
||||
pg_constants::TRANSACTION_STATUS_COMMITTED,
|
||||
&mut page,
|
||||
);
|
||||
for subxact in &parsed_xact.subxacts {
|
||||
let pageno = *subxact as u32 / pg_constants::CLOG_XACTS_PER_PAGE;
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
// only update xids on the requested page
|
||||
if rec_segno == segno && blknum == rpageno {
|
||||
transaction_id_set_status(
|
||||
*subxact,
|
||||
pg_constants::TRANSACTION_STATUS_COMMITTED,
|
||||
&mut page,
|
||||
);
|
||||
}
|
||||
}
|
||||
} else if parsed_xact.info == pg_constants::XLOG_XACT_ABORT
|
||||
|| parsed_xact.info == pg_constants::XLOG_XACT_ABORT_PREPARED
|
||||
{
|
||||
transaction_id_set_status(
|
||||
parsed_xact.xid,
|
||||
pg_constants::TRANSACTION_STATUS_ABORTED,
|
||||
&mut page,
|
||||
);
|
||||
for subxact in &parsed_xact.subxacts {
|
||||
let pageno = *subxact as u32 / pg_constants::CLOG_XACTS_PER_PAGE;
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
// only update xids on the requested page
|
||||
if rec_segno == segno && blknum == rpageno {
|
||||
transaction_id_set_status(
|
||||
*subxact,
|
||||
pg_constants::TRANSACTION_STATUS_ABORTED,
|
||||
&mut page,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if xlogrec.xl_rmid == pg_constants::RM_MULTIXACT_ID {
|
||||
// Multixact operations
|
||||
let info = xlogrec.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
if info == pg_constants::XLOG_MULTIXACT_CREATE_ID {
|
||||
let xlrec = XlMultiXactCreate::decode(&mut buf);
|
||||
if let RelishTag::Slru {
|
||||
slru,
|
||||
segno: rec_segno,
|
||||
} = rel
|
||||
{
|
||||
if slru == SlruKind::MultiXactMembers {
|
||||
for i in 0..xlrec.nmembers {
|
||||
let pageno =
|
||||
i / pg_constants::MULTIXACT_MEMBERS_PER_PAGE as u32;
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
if segno == rec_segno && rpageno == blknum {
|
||||
// update only target block
|
||||
let offset = xlrec.moff + i;
|
||||
let memberoff = mx_offset_to_member_offset(offset);
|
||||
let flagsoff = mx_offset_to_flags_offset(offset);
|
||||
let bshift = mx_offset_to_flags_bitshift(offset);
|
||||
let mut flagsval =
|
||||
LittleEndian::read_u32(&page[flagsoff..flagsoff + 4]);
|
||||
flagsval &= !(((1
|
||||
<< pg_constants::MXACT_MEMBER_BITS_PER_XACT)
|
||||
- 1)
|
||||
} else if parsed_xact.info == pg_constants::XLOG_XACT_ABORT
|
||||
|| parsed_xact.info == pg_constants::XLOG_XACT_ABORT_PREPARED
|
||||
{
|
||||
transaction_id_set_status(
|
||||
parsed_xact.xid,
|
||||
pg_constants::TRANSACTION_STATUS_ABORTED,
|
||||
&mut page,
|
||||
);
|
||||
for subxact in &parsed_xact.subxacts {
|
||||
let pageno = *subxact as u32 / pg_constants::CLOG_XACTS_PER_PAGE;
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
// only update xids on the requested page
|
||||
if rec_segno == segno && blknum == rpageno {
|
||||
transaction_id_set_status(
|
||||
*subxact,
|
||||
pg_constants::TRANSACTION_STATUS_ABORTED,
|
||||
&mut page,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if xlogrec.xl_rmid == pg_constants::RM_MULTIXACT_ID {
|
||||
// Multixact operations
|
||||
let info = xlogrec.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
if info == pg_constants::XLOG_MULTIXACT_CREATE_ID {
|
||||
let xlrec = XlMultiXactCreate::decode(&mut buf);
|
||||
if let RelishTag::Slru {
|
||||
slru,
|
||||
segno: rec_segno,
|
||||
} = rel
|
||||
{
|
||||
if slru == SlruKind::MultiXactMembers {
|
||||
for i in 0..xlrec.nmembers {
|
||||
let pageno = i / pg_constants::MULTIXACT_MEMBERS_PER_PAGE as u32;
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
if segno == rec_segno && rpageno == blknum {
|
||||
// update only target block
|
||||
let offset = xlrec.moff + i;
|
||||
let memberoff = mx_offset_to_member_offset(offset);
|
||||
let flagsoff = mx_offset_to_flags_offset(offset);
|
||||
let bshift = mx_offset_to_flags_bitshift(offset);
|
||||
let mut flagsval =
|
||||
LittleEndian::read_u32(&page[flagsoff..flagsoff + 4]);
|
||||
flagsval &=
|
||||
!(((1 << pg_constants::MXACT_MEMBER_BITS_PER_XACT) - 1)
|
||||
<< bshift);
|
||||
flagsval |= xlrec.members[i as usize].status << bshift;
|
||||
LittleEndian::write_u32(
|
||||
&mut page[flagsoff..flagsoff + 4],
|
||||
flagsval,
|
||||
);
|
||||
LittleEndian::write_u32(
|
||||
&mut page[memberoff..memberoff + 4],
|
||||
xlrec.members[i as usize].xid,
|
||||
);
|
||||
}
|
||||
flagsval |= xlrec.members[i as usize].status << bshift;
|
||||
LittleEndian::write_u32(
|
||||
&mut page[flagsoff..flagsoff + 4],
|
||||
flagsval,
|
||||
);
|
||||
LittleEndian::write_u32(
|
||||
&mut page[memberoff..memberoff + 4],
|
||||
xlrec.members[i as usize].xid,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// Multixact offsets SLRU
|
||||
let offs = (xlrec.mid
|
||||
% pg_constants::MULTIXACT_OFFSETS_PER_PAGE as u32
|
||||
* 4) as usize;
|
||||
LittleEndian::write_u32(&mut page[offs..offs + 4], xlrec.moff);
|
||||
}
|
||||
} else {
|
||||
panic!();
|
||||
// Multixact offsets SLRU
|
||||
let offs = (xlrec.mid % pg_constants::MULTIXACT_OFFSETS_PER_PAGE as u32
|
||||
* 4) as usize;
|
||||
LittleEndian::write_u32(&mut page[offs..offs + 4], xlrec.moff);
|
||||
}
|
||||
} else {
|
||||
panic!();
|
||||
}
|
||||
} else {
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
|
||||
apply_result = Ok::<Bytes, Error>(page.freeze());
|
||||
}
|
||||
|
||||
apply_result = Ok::<Bytes, Error>(page.freeze());
|
||||
|
||||
let duration = start.elapsed();
|
||||
|
||||
let result: Result<Bytes, WalRedoError>;
|
||||
|
||||
debug!(
|
||||
"applied {} WAL records in {} ms to reconstruct page image at LSN {}",
|
||||
"zenith applied {} WAL records in {} ms to reconstruct page image at LSN {}",
|
||||
nrecords,
|
||||
duration.as_millis(),
|
||||
lsn
|
||||
);
|
||||
|
||||
if let Err(e) = apply_result {
|
||||
error!("could not apply WAL records: {:#}", e);
|
||||
result = Err(WalRedoError::IoError(e));
|
||||
} else {
|
||||
let img = apply_result.unwrap();
|
||||
|
||||
result = Ok(img);
|
||||
}
|
||||
|
||||
// The caller is responsible for sending the response
|
||||
result
|
||||
apply_result.map_err(WalRedoError::IoError)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
|
||||
use crate::pg_constants;
|
||||
use crate::CheckPoint;
|
||||
use crate::ControlFileData;
|
||||
use crate::FullTransactionId;
|
||||
use crate::XLogLongPageHeaderData;
|
||||
use crate::XLogPageHeaderData;
|
||||
@@ -18,8 +17,8 @@ use crate::XLOG_PAGE_MAGIC;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
use bytes::BytesMut;
|
||||
use bytes::{Buf, Bytes};
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use crc32c::*;
|
||||
use log::*;
|
||||
use std::cmp::max;
|
||||
@@ -410,27 +409,25 @@ impl CheckPoint {
|
||||
}
|
||||
|
||||
//
|
||||
// Generate new WAL segment with single XLOG_CHECKPOINT_SHUTDOWN record.
|
||||
// Generate new, empty WAL segment.
|
||||
// We need this segment to start compute node.
|
||||
// In order to minimize changes in Postgres core, we prefer to
|
||||
// provide WAL segment from which is can extract checkpoint record in standard way,
|
||||
// rather then implement some alternative mechanism.
|
||||
//
|
||||
pub fn generate_wal_segment(pg_control: &ControlFileData) -> Bytes {
|
||||
pub fn generate_wal_segment(segno: u64, system_id: u64) -> Bytes {
|
||||
let mut seg_buf = BytesMut::with_capacity(pg_constants::WAL_SEGMENT_SIZE as usize);
|
||||
|
||||
let pageaddr = XLogSegNoOffsetToRecPtr(segno, 0, pg_constants::WAL_SEGMENT_SIZE);
|
||||
let hdr = XLogLongPageHeaderData {
|
||||
std: {
|
||||
XLogPageHeaderData {
|
||||
xlp_magic: XLOG_PAGE_MAGIC as u16,
|
||||
xlp_info: pg_constants::XLP_LONG_HEADER,
|
||||
xlp_tli: 1, // FIXME: always use Postgres timeline 1
|
||||
xlp_pageaddr: pg_control.checkPoint - XLOG_SIZE_OF_XLOG_LONG_PHD as u64,
|
||||
xlp_pageaddr: pageaddr,
|
||||
xlp_rem_len: 0,
|
||||
..Default::default() // Put 0 in padding fields.
|
||||
}
|
||||
},
|
||||
xlp_sysid: pg_control.system_identifier,
|
||||
xlp_sysid: system_id,
|
||||
xlp_seg_size: pg_constants::WAL_SEGMENT_SIZE as u32,
|
||||
xlp_xlog_blcksz: XLOG_BLCKSZ as u32,
|
||||
};
|
||||
@@ -438,36 +435,6 @@ pub fn generate_wal_segment(pg_control: &ControlFileData) -> Bytes {
|
||||
let hdr_bytes = hdr.encode();
|
||||
seg_buf.extend_from_slice(&hdr_bytes);
|
||||
|
||||
let rec_hdr = XLogRecord {
|
||||
xl_tot_len: (XLOG_SIZE_OF_XLOG_RECORD
|
||||
+ SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT
|
||||
+ SIZEOF_CHECKPOINT) as u32,
|
||||
xl_xid: 0, //0 is for InvalidTransactionId
|
||||
xl_prev: 0,
|
||||
xl_info: pg_constants::XLOG_CHECKPOINT_SHUTDOWN,
|
||||
xl_rmid: pg_constants::RM_XLOG_ID,
|
||||
xl_crc: 0,
|
||||
..Default::default() // Put 0 in padding fields.
|
||||
};
|
||||
|
||||
let mut rec_shord_hdr_bytes = BytesMut::new();
|
||||
rec_shord_hdr_bytes.put_u8(pg_constants::XLR_BLOCK_ID_DATA_SHORT);
|
||||
rec_shord_hdr_bytes.put_u8(SIZEOF_CHECKPOINT as u8);
|
||||
|
||||
let rec_bytes = rec_hdr.encode();
|
||||
let checkpoint_bytes = pg_control.checkPointCopy.encode();
|
||||
|
||||
//calculate record checksum
|
||||
let mut crc = 0;
|
||||
crc = crc32c_append(crc, &rec_shord_hdr_bytes[..]);
|
||||
crc = crc32c_append(crc, &checkpoint_bytes[..]);
|
||||
crc = crc32c_append(crc, &rec_bytes[0..XLOG_RECORD_CRC_OFFS]);
|
||||
|
||||
seg_buf.extend_from_slice(&rec_bytes[0..XLOG_RECORD_CRC_OFFS]);
|
||||
seg_buf.put_u32_le(crc);
|
||||
seg_buf.extend_from_slice(&rec_shord_hdr_bytes);
|
||||
seg_buf.extend_from_slice(&checkpoint_bytes);
|
||||
|
||||
//zero out the rest of the file
|
||||
seg_buf.resize(pg_constants::WAL_SEGMENT_SIZE, 0);
|
||||
seg_buf.freeze()
|
||||
|
||||
@@ -12,7 +12,14 @@ pub struct DatabaseInfo {
|
||||
pub port: u16,
|
||||
pub dbname: String,
|
||||
pub user: String,
|
||||
pub password: String,
|
||||
pub password: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct ProxyAuthResult {
|
||||
pub ready: bool,
|
||||
pub error: Option<String>,
|
||||
pub conn_info: Option<DatabaseInfo>,
|
||||
}
|
||||
|
||||
impl DatabaseInfo {
|
||||
@@ -24,12 +31,23 @@ impl DatabaseInfo {
|
||||
.next()
|
||||
.ok_or_else(|| anyhow::Error::msg("cannot resolve at least one SocketAddr"))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn conn_string(&self) -> String {
|
||||
format!(
|
||||
"dbname={} user={} password={}",
|
||||
self.dbname, self.user, self.password
|
||||
)
|
||||
impl From<DatabaseInfo> for tokio_postgres::Config {
|
||||
fn from(db_info: DatabaseInfo) -> Self {
|
||||
let mut config = tokio_postgres::Config::new();
|
||||
|
||||
config
|
||||
.host(&db_info.host)
|
||||
.port(db_info.port)
|
||||
.dbname(&db_info.dbname)
|
||||
.user(&db_info.user);
|
||||
|
||||
if let Some(password) = db_info.password {
|
||||
config.password(password);
|
||||
}
|
||||
|
||||
config
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,22 +62,25 @@ impl CPlaneApi {
|
||||
database: &str,
|
||||
md5_response: &[u8],
|
||||
salt: &[u8; 4],
|
||||
) -> Result<DatabaseInfo> {
|
||||
psql_session_id: &str,
|
||||
) -> Result<ProxyAuthResult> {
|
||||
let mut url = reqwest::Url::parse(self.auth_endpoint)?;
|
||||
url.query_pairs_mut()
|
||||
.append_pair("login", user)
|
||||
.append_pair("database", database)
|
||||
.append_pair("md5response", std::str::from_utf8(md5_response)?)
|
||||
.append_pair("salt", &hex::encode(salt));
|
||||
.append_pair("salt", &hex::encode(salt))
|
||||
.append_pair("psql_session_id", psql_session_id);
|
||||
|
||||
println!("cplane request: {}", url.as_str());
|
||||
|
||||
let resp = reqwest::blocking::get(url)?;
|
||||
|
||||
if resp.status().is_success() {
|
||||
let conn_info: DatabaseInfo = serde_json::from_str(resp.text()?.as_str())?;
|
||||
println!("got conn info: #{:?}", conn_info);
|
||||
Ok(conn_info)
|
||||
let auth_info: ProxyAuthResult = serde_json::from_str(resp.text()?.as_str())?;
|
||||
println!("got auth info: #{:?}", auth_info);
|
||||
|
||||
Ok(auth_info)
|
||||
} else {
|
||||
bail!("Auth failed")
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
///
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
net::{SocketAddr, TcpListener},
|
||||
net::SocketAddr,
|
||||
sync::{mpsc, Arc, Mutex},
|
||||
thread,
|
||||
};
|
||||
@@ -17,6 +17,7 @@ use clap::{App, Arg, ArgMatches};
|
||||
|
||||
use cplane_api::DatabaseInfo;
|
||||
use rustls::{internal::pemfile, NoClientAuth, ProtocolVersion, ServerConfig};
|
||||
use zenith_utils::tcp_listener;
|
||||
|
||||
mod cplane_api;
|
||||
mod mgmt;
|
||||
@@ -99,7 +100,7 @@ fn main() -> anyhow::Result<()> {
|
||||
.long("uri")
|
||||
.takes_value(true)
|
||||
.help("redirect unauthenticated users to given uri")
|
||||
.default_value("http://127.0.0.1:3000/psql_session/"),
|
||||
.default_value("http://localhost:3000/psql_session/"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("auth-endpoint")
|
||||
@@ -107,7 +108,7 @@ fn main() -> anyhow::Result<()> {
|
||||
.long("auth-endpoint")
|
||||
.takes_value(true)
|
||||
.help("redirect unauthenticated users to given uri")
|
||||
.default_value("http://127.0.0.1:3000/authenticate_proxy_request/"),
|
||||
.default_value("http://localhost:3000/authenticate_proxy_request/"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("ssl-key")
|
||||
@@ -140,23 +141,23 @@ fn main() -> anyhow::Result<()> {
|
||||
|
||||
// Check that we can bind to address before further initialization
|
||||
println!("Starting proxy on {}", state.conf.proxy_address);
|
||||
let pageserver_listener = TcpListener::bind(state.conf.proxy_address)?;
|
||||
let pageserver_listener = tcp_listener::bind(state.conf.proxy_address)?;
|
||||
|
||||
println!("Starting mgmt on {}", state.conf.mgmt_address);
|
||||
let mgmt_listener = TcpListener::bind(state.conf.mgmt_address)?;
|
||||
let mgmt_listener = tcp_listener::bind(state.conf.mgmt_address)?;
|
||||
|
||||
let threads = vec![
|
||||
let threads = [
|
||||
// Spawn a thread to listen for connections. It will spawn further threads
|
||||
// for each connection.
|
||||
thread::Builder::new()
|
||||
.name("Proxy thread".into())
|
||||
.name("Listener thread".into())
|
||||
.spawn(move || proxy::thread_main(state, pageserver_listener))?,
|
||||
thread::Builder::new()
|
||||
.name("Mgmt thread".into())
|
||||
.spawn(move || mgmt::thread_main(state, mgmt_listener))?,
|
||||
];
|
||||
|
||||
for t in threads.into_iter() {
|
||||
for t in threads {
|
||||
t.join().unwrap()?;
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ use anyhow::bail;
|
||||
use tokio_postgres::NoTls;
|
||||
|
||||
use rand::Rng;
|
||||
use std::io::Write;
|
||||
use std::{io, sync::mpsc::channel, thread};
|
||||
use zenith_utils::postgres_backend::Stream;
|
||||
use zenith_utils::postgres_backend::{PostgresBackend, ProtoState};
|
||||
@@ -28,11 +27,13 @@ pub fn thread_main(
|
||||
println!("accepted connection from {}", peer_addr);
|
||||
socket.set_nodelay(true).unwrap();
|
||||
|
||||
thread::spawn(move || {
|
||||
if let Err(err) = proxy_conn_main(state, socket) {
|
||||
println!("error: {}", err);
|
||||
}
|
||||
});
|
||||
thread::Builder::new()
|
||||
.name("Proxy thread".into())
|
||||
.spawn(move || {
|
||||
if let Err(err) = proxy_conn_main(state, socket) {
|
||||
println!("error: {}", err);
|
||||
}
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,8 +75,12 @@ pub fn proxy_conn_main(
|
||||
// This will set conn.existing_user and we can decide on next actions
|
||||
conn.handle_startup()?;
|
||||
|
||||
let mut psql_session_id_buf = [0u8; 8];
|
||||
rand::thread_rng().fill(&mut psql_session_id_buf);
|
||||
conn.psql_session_id = hex::encode(psql_session_id_buf);
|
||||
|
||||
// both scenarious here should end up producing database connection string
|
||||
let db_info = if conn.is_existing_user() {
|
||||
let conn_info = if conn.is_existing_user() {
|
||||
conn.handle_existing_user()?
|
||||
} else {
|
||||
conn.handle_new_user()?
|
||||
@@ -83,7 +88,7 @@ pub fn proxy_conn_main(
|
||||
|
||||
// XXX: move that inside handle_new_user/handle_existing_user to be able to
|
||||
// report wrong connection error.
|
||||
proxy_pass(conn.pgb, db_info)
|
||||
proxy_pass(conn.pgb, conn_info)
|
||||
}
|
||||
|
||||
impl ProxyConnection {
|
||||
@@ -155,9 +160,25 @@ impl ProxyConnection {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Wait for proxy kick form the console with conninfo
|
||||
fn wait_for_conninfo(&mut self) -> anyhow::Result<DatabaseInfo> {
|
||||
let (tx, rx) = channel::<anyhow::Result<DatabaseInfo>>();
|
||||
let _ = self
|
||||
.state
|
||||
.waiters
|
||||
.lock()
|
||||
.unwrap()
|
||||
.insert(self.psql_session_id.clone(), tx);
|
||||
|
||||
// Wait for web console response
|
||||
// TODO: respond with error to client
|
||||
rx.recv()?
|
||||
}
|
||||
|
||||
fn handle_existing_user(&mut self) -> anyhow::Result<DatabaseInfo> {
|
||||
// ask password
|
||||
rand::thread_rng().fill(&mut self.md5_salt);
|
||||
|
||||
self.pgb
|
||||
.write_message(&BeMessage::AuthenticationMD5Password(&self.md5_salt))?;
|
||||
self.pgb.state = ProtoState::Authentication; // XXX
|
||||
@@ -180,14 +201,41 @@ impl ProxyConnection {
|
||||
self.database.as_str(),
|
||||
md5_response,
|
||||
&self.md5_salt,
|
||||
&self.psql_session_id,
|
||||
) {
|
||||
Err(e) => {
|
||||
self.pgb
|
||||
.write_message(&BeMessage::ErrorResponse(format!("{}", e)))?;
|
||||
self.pgb.write_message(&BeMessage::ErrorResponse(format!(
|
||||
"cannot authenticate proxy: {}",
|
||||
e
|
||||
)))?;
|
||||
|
||||
bail!("auth failed: {}", e);
|
||||
}
|
||||
Ok(conn_info) => {
|
||||
|
||||
Ok(auth_info) => {
|
||||
let conn_info = if auth_info.ready {
|
||||
// Cluster is ready, so just take `conn_info` and respond to the client.
|
||||
auth_info
|
||||
.conn_info
|
||||
.expect("conn_info should be provided with ready cluster")
|
||||
} else {
|
||||
match auth_info.error {
|
||||
Some(e) => {
|
||||
self.pgb.write_message(&BeMessage::ErrorResponse(format!(
|
||||
"cannot authenticate proxy: {}",
|
||||
e
|
||||
)))?;
|
||||
|
||||
bail!("auth failed: {}", e);
|
||||
}
|
||||
None => {
|
||||
// Cluster exists, but isn't active, await its start and proxy kick
|
||||
// with `conn_info`.
|
||||
self.wait_for_conninfo()?
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
self.pgb
|
||||
.write_message_noflush(&BeMessage::AuthenticationOk)?;
|
||||
self.pgb
|
||||
@@ -203,10 +251,6 @@ impl ProxyConnection {
|
||||
}
|
||||
|
||||
fn handle_new_user(&mut self) -> anyhow::Result<DatabaseInfo> {
|
||||
let mut psql_session_id_buf = [0u8; 8];
|
||||
rand::thread_rng().fill(&mut psql_session_id_buf);
|
||||
self.psql_session_id = hex::encode(psql_session_id_buf);
|
||||
|
||||
let hello_message = format!("☀️ Welcome to Zenith!
|
||||
|
||||
To proceed with database creation, open the following link:
|
||||
@@ -225,76 +269,83 @@ databases without opening the browser.
|
||||
self.pgb
|
||||
.write_message(&BeMessage::NoticeResponse(hello_message))?;
|
||||
|
||||
// await for database creation
|
||||
let (tx, rx) = channel::<anyhow::Result<DatabaseInfo>>();
|
||||
let _ = self
|
||||
.state
|
||||
.waiters
|
||||
.lock()
|
||||
.unwrap()
|
||||
.insert(self.psql_session_id.clone(), tx);
|
||||
|
||||
// Wait for web console response
|
||||
// XXX: respond with error to client
|
||||
let dbinfo = rx.recv()??;
|
||||
// We requested the DB creation from the console. Now wait for conninfo
|
||||
let conn_info = self.wait_for_conninfo()?;
|
||||
|
||||
self.pgb.write_message_noflush(&BeMessage::NoticeResponse(
|
||||
"Connecting to database.".to_string(),
|
||||
))?;
|
||||
self.pgb.write_message(&BeMessage::ReadyForQuery)?;
|
||||
|
||||
Ok(dbinfo)
|
||||
Ok(conn_info)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a TCP connection to a postgres database, authenticate with it, and receive the ReadyForQuery message
|
||||
async fn connect_to_db(db_info: DatabaseInfo) -> anyhow::Result<tokio::net::TcpStream> {
|
||||
let mut socket = tokio::net::TcpStream::connect(db_info.socket_addr()?).await?;
|
||||
let config = db_info.conn_string().parse::<tokio_postgres::Config>()?;
|
||||
let config = tokio_postgres::Config::from(db_info);
|
||||
let _ = config.connect_raw(&mut socket, NoTls).await?;
|
||||
Ok(socket)
|
||||
}
|
||||
|
||||
/// Concurrently proxy both directions of the client and server connections
|
||||
fn proxy(
|
||||
client_read: ReadStream,
|
||||
client_write: WriteStream,
|
||||
server_read: ReadStream,
|
||||
server_write: WriteStream,
|
||||
(client_read, client_write): (ReadStream, WriteStream),
|
||||
(server_read, server_write): (ReadStream, WriteStream),
|
||||
) -> anyhow::Result<()> {
|
||||
fn do_proxy(mut reader: ReadStream, mut writer: WriteStream) -> io::Result<()> {
|
||||
std::io::copy(&mut reader, &mut writer)?;
|
||||
writer.flush()?;
|
||||
writer.shutdown(std::net::Shutdown::Both)
|
||||
fn do_proxy(mut reader: impl io::Read, mut writer: WriteStream) -> io::Result<u64> {
|
||||
/// FlushWriter will make sure that every message is sent as soon as possible
|
||||
struct FlushWriter<W>(W);
|
||||
|
||||
impl<W: io::Write> io::Write for FlushWriter<W> {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
// `std::io::copy` is guaranteed to exit if we return an error,
|
||||
// so we can afford to lose `res` in case `flush` fails
|
||||
let res = self.0.write(buf);
|
||||
if res.is_ok() {
|
||||
self.0.flush()?;
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
self.0.flush()
|
||||
}
|
||||
}
|
||||
|
||||
let res = std::io::copy(&mut reader, &mut FlushWriter(&mut writer));
|
||||
writer.shutdown(std::net::Shutdown::Both)?;
|
||||
res
|
||||
}
|
||||
|
||||
let client_to_server_jh = thread::spawn(move || do_proxy(client_read, server_write));
|
||||
|
||||
let res1 = do_proxy(server_read, client_write);
|
||||
let res2 = client_to_server_jh.join().unwrap();
|
||||
res1?;
|
||||
res2?;
|
||||
do_proxy(server_read, client_write)?;
|
||||
client_to_server_jh.join().unwrap()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Proxy a client connection to a postgres database
|
||||
fn proxy_pass(pgb: PostgresBackend, db_info: DatabaseInfo) -> anyhow::Result<()> {
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()?;
|
||||
let db_stream = runtime.block_on(connect_to_db(db_info))?;
|
||||
let db_stream = db_stream.into_std()?;
|
||||
db_stream.set_nonblocking(false)?;
|
||||
let db_stream = {
|
||||
// We'll get rid of this once migration to async is complete
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()?;
|
||||
|
||||
let db_stream = zenith_utils::sock_split::BidiStream::from_tcp(db_stream);
|
||||
let (db_read, db_write) = db_stream.split();
|
||||
let stream = runtime.block_on(connect_to_db(db_info))?.into_std()?;
|
||||
stream.set_nonblocking(false)?;
|
||||
stream
|
||||
};
|
||||
|
||||
let stream = match pgb.into_stream() {
|
||||
let db = zenith_utils::sock_split::BidiStream::from_tcp(db_stream);
|
||||
|
||||
let client = match pgb.into_stream() {
|
||||
Stream::Bidirectional(bidi_stream) => bidi_stream,
|
||||
_ => bail!("invalid stream"),
|
||||
};
|
||||
|
||||
let (client_read, client_write) = stream.split();
|
||||
proxy(client_read, client_write, db_read, db_write)
|
||||
proxy(client.split(), db.split())
|
||||
}
|
||||
|
||||
@@ -11,12 +11,19 @@ pyjwt = {extras = ["crypto"], version = "*"}
|
||||
requests = "*"
|
||||
pytest-xdist = "*"
|
||||
asyncpg = "*"
|
||||
cached-property = "*"
|
||||
|
||||
[dev-packages]
|
||||
yapf = "*"
|
||||
# Behavior may change slightly between versions. These are run continuously,
|
||||
# so we pin exact versions to avoid suprising breaks. Update if comfortable.
|
||||
yapf = "==0.31.0"
|
||||
mypy = "==0.910"
|
||||
# Non-pinned packages follow.
|
||||
pipenv = "*"
|
||||
flake8 = "*"
|
||||
mypy = "*"
|
||||
types-requests = "*"
|
||||
types-psycopg2 = "*"
|
||||
|
||||
[requires]
|
||||
# we need at least 3.6, but pipenv doesn't allow to say this directly
|
||||
# we need at least 3.7, but pipenv doesn't allow to say this directly
|
||||
python_version = "3"
|
||||
|
||||
352
test_runner/Pipfile.lock
generated
352
test_runner/Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "3cdc048691824d0b93912b6b78a0aa01dc98f278212c1badb0cc2edbd2103c3a"
|
||||
"sha256": "63b72760ef37375186a638066ba0ad5804dbace99ddc503ea654e9749070ab24"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
@@ -43,94 +43,108 @@
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==21.2.0"
|
||||
},
|
||||
"cached-property": {
|
||||
"hashes": [
|
||||
"sha256:9fa5755838eecbb2d234c3aa390bd80fbd3ac6b6869109bfc1b499f7bd89a130",
|
||||
"sha256:df4f613cf7ad9a588cc381aaf4a512d26265ecebd5eb9e1ba12f1319eb85a6a0"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.5.2"
|
||||
},
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee",
|
||||
"sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8"
|
||||
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
|
||||
"sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
|
||||
],
|
||||
"version": "==2021.5.30"
|
||||
"version": "==2021.10.8"
|
||||
},
|
||||
"cffi": {
|
||||
"hashes": [
|
||||
"sha256:06c54a68935738d206570b20da5ef2b6b6d92b38ef3ec45c5422c0ebaf338d4d",
|
||||
"sha256:0c0591bee64e438883b0c92a7bed78f6290d40bf02e54c5bf0978eaf36061771",
|
||||
"sha256:19ca0dbdeda3b2615421d54bef8985f72af6e0c47082a8d26122adac81a95872",
|
||||
"sha256:22b9c3c320171c108e903d61a3723b51e37aaa8c81255b5e7ce102775bd01e2c",
|
||||
"sha256:26bb2549b72708c833f5abe62b756176022a7b9a7f689b571e74c8478ead51dc",
|
||||
"sha256:33791e8a2dc2953f28b8d8d300dde42dd929ac28f974c4b4c6272cb2955cb762",
|
||||
"sha256:3c8d896becff2fa653dc4438b54a5a25a971d1f4110b32bd3068db3722c80202",
|
||||
"sha256:4373612d59c404baeb7cbd788a18b2b2a8331abcc84c3ba40051fcd18b17a4d5",
|
||||
"sha256:487d63e1454627c8e47dd230025780e91869cfba4c753a74fda196a1f6ad6548",
|
||||
"sha256:48916e459c54c4a70e52745639f1db524542140433599e13911b2f329834276a",
|
||||
"sha256:4922cd707b25e623b902c86188aca466d3620892db76c0bdd7b99a3d5e61d35f",
|
||||
"sha256:55af55e32ae468e9946f741a5d51f9896da6b9bf0bbdd326843fec05c730eb20",
|
||||
"sha256:57e555a9feb4a8460415f1aac331a2dc833b1115284f7ded7278b54afc5bd218",
|
||||
"sha256:5d4b68e216fc65e9fe4f524c177b54964af043dde734807586cf5435af84045c",
|
||||
"sha256:64fda793737bc4037521d4899be780534b9aea552eb673b9833b01f945904c2e",
|
||||
"sha256:6d6169cb3c6c2ad50db5b868db6491a790300ade1ed5d1da29289d73bbe40b56",
|
||||
"sha256:7bcac9a2b4fdbed2c16fa5681356d7121ecabf041f18d97ed5b8e0dd38a80224",
|
||||
"sha256:80b06212075346b5546b0417b9f2bf467fea3bfe7352f781ffc05a8ab24ba14a",
|
||||
"sha256:818014c754cd3dba7229c0f5884396264d51ffb87ec86e927ef0be140bfdb0d2",
|
||||
"sha256:8eb687582ed7cd8c4bdbff3df6c0da443eb89c3c72e6e5dcdd9c81729712791a",
|
||||
"sha256:99f27fefe34c37ba9875f224a8f36e31d744d8083e00f520f133cab79ad5e819",
|
||||
"sha256:9f3e33c28cd39d1b655ed1ba7247133b6f7fc16fa16887b120c0c670e35ce346",
|
||||
"sha256:a8661b2ce9694ca01c529bfa204dbb144b275a31685a075ce123f12331be790b",
|
||||
"sha256:a9da7010cec5a12193d1af9872a00888f396aba3dc79186604a09ea3ee7c029e",
|
||||
"sha256:aedb15f0a5a5949ecb129a82b72b19df97bbbca024081ed2ef88bd5c0a610534",
|
||||
"sha256:b315d709717a99f4b27b59b021e6207c64620790ca3e0bde636a6c7f14618abb",
|
||||
"sha256:ba6f2b3f452e150945d58f4badd92310449876c4c954836cfb1803bdd7b422f0",
|
||||
"sha256:c33d18eb6e6bc36f09d793c0dc58b0211fccc6ae5149b808da4a62660678b156",
|
||||
"sha256:c9a875ce9d7fe32887784274dd533c57909b7b1dcadcc128a2ac21331a9765dd",
|
||||
"sha256:c9e005e9bd57bc987764c32a1bee4364c44fdc11a3cc20a40b93b444984f2b87",
|
||||
"sha256:d2ad4d668a5c0645d281dcd17aff2be3212bc109b33814bbb15c4939f44181cc",
|
||||
"sha256:d950695ae4381ecd856bcaf2b1e866720e4ab9a1498cba61c602e56630ca7195",
|
||||
"sha256:e22dcb48709fc51a7b58a927391b23ab37eb3737a98ac4338e2448bef8559b33",
|
||||
"sha256:e8c6a99be100371dbb046880e7a282152aa5d6127ae01783e37662ef73850d8f",
|
||||
"sha256:e9dc245e3ac69c92ee4c167fbdd7428ec1956d4e754223124991ef29eb57a09d",
|
||||
"sha256:eb687a11f0a7a1839719edd80f41e459cc5366857ecbed383ff376c4e3cc6afd",
|
||||
"sha256:eb9e2a346c5238a30a746893f23a9535e700f8192a68c07c0258e7ece6ff3728",
|
||||
"sha256:ed38b924ce794e505647f7c331b22a693bee1538fdf46b0222c4717b42f744e7",
|
||||
"sha256:f0010c6f9d1a4011e429109fda55a225921e3206e7f62a0c22a35344bfd13cca",
|
||||
"sha256:f0c5d1acbfca6ebdd6b1e3eded8d261affb6ddcf2186205518f1428b8569bb99",
|
||||
"sha256:f10afb1004f102c7868ebfe91c28f4a712227fe4cb24974350ace1f90e1febbf",
|
||||
"sha256:f174135f5609428cc6e1b9090f9268f5c8935fddb1b25ccb8255a2d50de6789e",
|
||||
"sha256:f3ebe6e73c319340830a9b2825d32eb6d8475c1dac020b4f0aa774ee3b898d1c",
|
||||
"sha256:f627688813d0a4140153ff532537fbe4afea5a3dffce1f9deb7f91f848a832b5",
|
||||
"sha256:fd4305f86f53dfd8cd3522269ed7fc34856a8ee3709a5e28b2836b2db9d4cd69"
|
||||
"sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3",
|
||||
"sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2",
|
||||
"sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636",
|
||||
"sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20",
|
||||
"sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728",
|
||||
"sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27",
|
||||
"sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66",
|
||||
"sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443",
|
||||
"sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0",
|
||||
"sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7",
|
||||
"sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39",
|
||||
"sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605",
|
||||
"sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a",
|
||||
"sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37",
|
||||
"sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029",
|
||||
"sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139",
|
||||
"sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc",
|
||||
"sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df",
|
||||
"sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14",
|
||||
"sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880",
|
||||
"sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2",
|
||||
"sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a",
|
||||
"sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e",
|
||||
"sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474",
|
||||
"sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024",
|
||||
"sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8",
|
||||
"sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0",
|
||||
"sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e",
|
||||
"sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a",
|
||||
"sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e",
|
||||
"sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032",
|
||||
"sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6",
|
||||
"sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e",
|
||||
"sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b",
|
||||
"sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e",
|
||||
"sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954",
|
||||
"sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962",
|
||||
"sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c",
|
||||
"sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4",
|
||||
"sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55",
|
||||
"sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962",
|
||||
"sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023",
|
||||
"sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c",
|
||||
"sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6",
|
||||
"sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8",
|
||||
"sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382",
|
||||
"sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7",
|
||||
"sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc",
|
||||
"sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997",
|
||||
"sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796"
|
||||
],
|
||||
"version": "==1.14.6"
|
||||
"version": "==1.15.0"
|
||||
},
|
||||
"charset-normalizer": {
|
||||
"hashes": [
|
||||
"sha256:5d209c0a931f215cee683b6445e2d77677e7e75e159f78def0db09d68fafcaa6",
|
||||
"sha256:5ec46d183433dcbd0ab716f2d7f29d8dee50505b3fdb40c6b985c7c4f5a3591f"
|
||||
"sha256:e019de665e2bcf9c2b64e2e5aa025fa991da8720daa3c1138cadd2fd1856aed0",
|
||||
"sha256:f7af805c321bfa1ce6714c51f254e0d5bb5e5834039bc17db7ebe3a4cec9492b"
|
||||
],
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==2.0.6"
|
||||
"version": "==2.0.7"
|
||||
},
|
||||
"cryptography": {
|
||||
"hashes": [
|
||||
"sha256:0a7dcbcd3f1913f664aca35d47c1331fce738d44ec34b7be8b9d332151b0b01e",
|
||||
"sha256:1eb7bb0df6f6f583dd8e054689def236255161ebbcf62b226454ab9ec663746b",
|
||||
"sha256:21ca464b3a4b8d8e86ba0ee5045e103a1fcfac3b39319727bc0fc58c09c6aff7",
|
||||
"sha256:34dae04a0dce5730d8eb7894eab617d8a70d0c97da76b905de9efb7128ad7085",
|
||||
"sha256:3520667fda779eb788ea00080124875be18f2d8f0848ec00733c0ec3bb8219fc",
|
||||
"sha256:3c4129fc3fdc0fa8e40861b5ac0c673315b3c902bbdc05fc176764815b43dd1d",
|
||||
"sha256:3fa3a7ccf96e826affdf1a0a9432be74dc73423125c8f96a909e3835a5ef194a",
|
||||
"sha256:5b0fbfae7ff7febdb74b574055c7466da334a5371f253732d7e2e7525d570498",
|
||||
"sha256:695104a9223a7239d155d7627ad912953b540929ef97ae0c34c7b8bf30857e89",
|
||||
"sha256:8695456444f277af73a4877db9fc979849cd3ee74c198d04fc0776ebc3db52b9",
|
||||
"sha256:94cc5ed4ceaefcbe5bf38c8fba6a21fc1d365bb8fb826ea1688e3370b2e24a1c",
|
||||
"sha256:94fff993ee9bc1b2440d3b7243d488c6a3d9724cc2b09cdb297f6a886d040ef7",
|
||||
"sha256:9965c46c674ba8cc572bc09a03f4c649292ee73e1b683adb1ce81e82e9a6a0fb",
|
||||
"sha256:a00cf305f07b26c351d8d4e1af84ad7501eca8a342dedf24a7acb0e7b7406e14",
|
||||
"sha256:a305600e7a6b7b855cd798e00278161b681ad6e9b7eca94c721d5f588ab212af",
|
||||
"sha256:cd65b60cfe004790c795cc35f272e41a3df4631e2fb6b35aa7ac6ef2859d554e",
|
||||
"sha256:d2a6e5ef66503da51d2110edf6c403dc6b494cc0082f85db12f54e9c5d4c3ec5",
|
||||
"sha256:d9ec0e67a14f9d1d48dd87a2531009a9b251c02ea42851c060b25c782516ff06",
|
||||
"sha256:f44d141b8c4ea5eb4dbc9b3ad992d45580c1d22bf5e24363f2fbf50c2d7ae8a7"
|
||||
"sha256:07bb7fbfb5de0980590ddfc7f13081520def06dc9ed214000ad4372fb4e3c7f6",
|
||||
"sha256:18d90f4711bf63e2fb21e8c8e51ed8189438e6b35a6d996201ebd98a26abbbe6",
|
||||
"sha256:1ed82abf16df40a60942a8c211251ae72858b25b7421ce2497c2eb7a1cee817c",
|
||||
"sha256:22a38e96118a4ce3b97509443feace1d1011d0571fae81fc3ad35f25ba3ea999",
|
||||
"sha256:2d69645f535f4b2c722cfb07a8eab916265545b3475fdb34e0be2f4ee8b0b15e",
|
||||
"sha256:4a2d0e0acc20ede0f06ef7aa58546eee96d2592c00f450c9acb89c5879b61992",
|
||||
"sha256:54b2605e5475944e2213258e0ab8696f4f357a31371e538ef21e8d61c843c28d",
|
||||
"sha256:7075b304cd567694dc692ffc9747f3e9cb393cc4aa4fb7b9f3abd6f5c4e43588",
|
||||
"sha256:7b7ceeff114c31f285528ba8b390d3e9cfa2da17b56f11d366769a807f17cbaa",
|
||||
"sha256:7eba2cebca600a7806b893cb1d541a6e910afa87e97acf2021a22b32da1df52d",
|
||||
"sha256:928185a6d1ccdb816e883f56ebe92e975a262d31cc536429041921f8cb5a62fd",
|
||||
"sha256:9933f28f70d0517686bd7de36166dda42094eac49415459d9bdf5e7df3e0086d",
|
||||
"sha256:a688ebcd08250eab5bb5bca318cc05a8c66de5e4171a65ca51db6bd753ff8953",
|
||||
"sha256:abb5a361d2585bb95012a19ed9b2c8f412c5d723a9836418fab7aaa0243e67d2",
|
||||
"sha256:c10c797ac89c746e488d2ee92bd4abd593615694ee17b2500578b63cad6b93a8",
|
||||
"sha256:ced40344e811d6abba00295ced98c01aecf0c2de39481792d87af4fa58b7b4d6",
|
||||
"sha256:d57e0cdc1b44b6cdf8af1d01807db06886f10177469312fbde8f44ccbb284bc9",
|
||||
"sha256:d99915d6ab265c22873f1b4d6ea5ef462ef797b4140be4c9d8b179915e0985c6",
|
||||
"sha256:eb80e8a1f91e4b7ef8b33041591e6d89b2b8e122d787e87eeb2b08da71bb16ad",
|
||||
"sha256:ebeddd119f526bcf323a89f853afb12e225902a24d29b55fe18dd6fcb2838a76"
|
||||
],
|
||||
"version": "==3.4.8"
|
||||
"version": "==35.0.0"
|
||||
},
|
||||
"execnet": {
|
||||
"hashes": [
|
||||
@@ -142,11 +156,19 @@
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a",
|
||||
"sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"
|
||||
"sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
|
||||
"sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
|
||||
],
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==3.2"
|
||||
"version": "==3.3"
|
||||
},
|
||||
"importlib-metadata": {
|
||||
"hashes": [
|
||||
"sha256:b618b6d2d5ffa2f16add5697cf57a46c76a56229b0ed1c438322e4e95645bd15",
|
||||
"sha256:f284b3e11256ad1e5d03ab86bb2ccd6f5339688ff17a4d797a0fe7df326f23b1"
|
||||
],
|
||||
"markers": "python_version < '3.8'",
|
||||
"version": "==4.8.1"
|
||||
},
|
||||
"iniconfig": {
|
||||
"hashes": [
|
||||
@@ -207,11 +229,11 @@
|
||||
"crypto"
|
||||
],
|
||||
"hashes": [
|
||||
"sha256:934d73fbba91b0483d3857d1aff50e96b2a892384ee2c17417ed3203f173fca1",
|
||||
"sha256:fba44e7898bbca160a2b2b501f492824fc8382485d3a6f11ba5d0c1937ce6130"
|
||||
"sha256:b888b4d56f06f6dcd777210c334e69c737be74755d3e5e9ee3fe67dc18a0ee41",
|
||||
"sha256:e0c4bb8d9f0af0c7f5b1ec4c5036309617d03d56932877f2f7a0beeb5318322f"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.1.0"
|
||||
"version": "==2.3.0"
|
||||
},
|
||||
"pyparsing": {
|
||||
"hashes": [
|
||||
@@ -272,21 +294,67 @@
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4",
|
||||
"sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"
|
||||
"sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece",
|
||||
"sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
|
||||
"version": "==1.26.6"
|
||||
"version": "==1.26.7"
|
||||
},
|
||||
"zipp": {
|
||||
"hashes": [
|
||||
"sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832",
|
||||
"sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.6.0"
|
||||
}
|
||||
},
|
||||
"develop": {
|
||||
"backports.entry-points-selectable": {
|
||||
"hashes": [
|
||||
"sha256:988468260ec1c196dab6ae1149260e2f5472c9110334e5d51adcb77867361f6a",
|
||||
"sha256:a6d9a871cde5e15b4c4a53e3d43ba890cc6861ec1332c9c2428c92f977192acc"
|
||||
],
|
||||
"markers": "python_version >= '2.7'",
|
||||
"version": "==1.1.0"
|
||||
},
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
|
||||
"sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
|
||||
],
|
||||
"version": "==2021.10.8"
|
||||
},
|
||||
"distlib": {
|
||||
"hashes": [
|
||||
"sha256:c8b54e8454e5bf6237cc84c20e8264c3e991e824ef27e8f1e81049867d861e31",
|
||||
"sha256:d982d0751ff6eaaab5e2ec8e691d949ee80eddf01a62eaa96ddb11531fe16b05"
|
||||
],
|
||||
"version": "==0.3.3"
|
||||
},
|
||||
"filelock": {
|
||||
"hashes": [
|
||||
"sha256:2b5eb3589e7fdda14599e7eb1a50e09b4cc14f34ed98b8ba56d33bfaafcbef2f",
|
||||
"sha256:34a9f35f95c441e7b38209775d6e0337f9a3759f3565f6c5798f19618527c76f"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.3.1"
|
||||
},
|
||||
"flake8": {
|
||||
"hashes": [
|
||||
"sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b",
|
||||
"sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907"
|
||||
"sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d",
|
||||
"sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.9.2"
|
||||
"version": "==4.0.1"
|
||||
},
|
||||
"importlib-metadata": {
|
||||
"hashes": [
|
||||
"sha256:b618b6d2d5ffa2f16add5697cf57a46c76a56229b0ed1c438322e4e95645bd15",
|
||||
"sha256:f284b3e11256ad1e5d03ab86bb2ccd6f5339688ff17a4d797a0fe7df326f23b1"
|
||||
],
|
||||
"markers": "python_version < '3.8'",
|
||||
"version": "==4.8.1"
|
||||
},
|
||||
"mccabe": {
|
||||
"hashes": [
|
||||
@@ -331,21 +399,45 @@
|
||||
],
|
||||
"version": "==0.4.3"
|
||||
},
|
||||
"pipenv": {
|
||||
"hashes": [
|
||||
"sha256:05958fadcd70b2de6a27542fcd2bd72dd5c59c6d35307fdac3e06361fb06e30e",
|
||||
"sha256:d180f5be4775c552fd5e69ae18a9d6099d9dafb462efe54f11c72cb5f4d5e977"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2021.5.29"
|
||||
},
|
||||
"platformdirs": {
|
||||
"hashes": [
|
||||
"sha256:367a5e80b3d04d2428ffa76d33f124cf11e8fff2acdaa9b43d545f5c7d661ef2",
|
||||
"sha256:8868bbe3c3c80d42f20156f22e7131d2fb321f5bc86a2a345375c6481a67021d"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==2.4.0"
|
||||
},
|
||||
"pycodestyle": {
|
||||
"hashes": [
|
||||
"sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068",
|
||||
"sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"
|
||||
"sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20",
|
||||
"sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==2.7.0"
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==2.8.0"
|
||||
},
|
||||
"pyflakes": {
|
||||
"hashes": [
|
||||
"sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3",
|
||||
"sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"
|
||||
"sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c",
|
||||
"sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==2.3.1"
|
||||
"version": "==2.4.0"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
|
||||
"sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==1.16.0"
|
||||
},
|
||||
"toml": {
|
||||
"hashes": [
|
||||
@@ -355,6 +447,58 @@
|
||||
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==0.10.2"
|
||||
},
|
||||
"typed-ast": {
|
||||
"hashes": [
|
||||
"sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace",
|
||||
"sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff",
|
||||
"sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266",
|
||||
"sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528",
|
||||
"sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6",
|
||||
"sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808",
|
||||
"sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4",
|
||||
"sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363",
|
||||
"sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341",
|
||||
"sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04",
|
||||
"sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41",
|
||||
"sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e",
|
||||
"sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3",
|
||||
"sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899",
|
||||
"sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805",
|
||||
"sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c",
|
||||
"sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c",
|
||||
"sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39",
|
||||
"sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a",
|
||||
"sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3",
|
||||
"sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7",
|
||||
"sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f",
|
||||
"sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075",
|
||||
"sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0",
|
||||
"sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40",
|
||||
"sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428",
|
||||
"sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927",
|
||||
"sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3",
|
||||
"sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f",
|
||||
"sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"
|
||||
],
|
||||
"markers": "python_version < '3.8'",
|
||||
"version": "==1.4.3"
|
||||
},
|
||||
"types-psycopg2": {
|
||||
"hashes": [
|
||||
"sha256:77ed80f2668582654623e04fb3d741ecce93effcc39c929d7e02f4a917a538ce",
|
||||
"sha256:98a6e0e9580cd7eb4bd4d20f7c7063d154b2589a2b90c0ce4e3ca6085cde77c6"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.9.1"
|
||||
},
|
||||
"types-requests": {
|
||||
"hashes": [
|
||||
"sha256:b279284e51f668e38ee12d9665e4d789089f532dc2a0be4a1508ca0efd98ba9e",
|
||||
"sha256:ba1d108d512e294b6080c37f6ae7cb2a2abf527560e2b671d1786c1fc46b541a"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.25.11"
|
||||
},
|
||||
"typing-extensions": {
|
||||
"hashes": [
|
||||
"sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e",
|
||||
@@ -364,6 +508,22 @@
|
||||
"index": "pypi",
|
||||
"version": "==3.10.0.2"
|
||||
},
|
||||
"virtualenv": {
|
||||
"hashes": [
|
||||
"sha256:10062e34c204b5e4ec5f62e6ef2473f8ba76513a9a617e873f1f8fb4a519d300",
|
||||
"sha256:bcc17f0b3a29670dd777d6f0755a4c04f28815395bca279cdcb213b97199a6b8"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==20.8.1"
|
||||
},
|
||||
"virtualenv-clone": {
|
||||
"hashes": [
|
||||
"sha256:418ee935c36152f8f153c79824bb93eaf6f0f7984bae31d3f48f350b9183501a",
|
||||
"sha256:44d5263bceed0bac3e1424d64f798095233b64def1c5689afa43dc3223caf5b0"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==0.5.7"
|
||||
},
|
||||
"yapf": {
|
||||
"hashes": [
|
||||
"sha256:408fb9a2b254c302f49db83c59f9aa0b4b0fd0ec25be3a5c51181327922ff63d",
|
||||
@@ -371,6 +531,14 @@
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.31.0"
|
||||
},
|
||||
"zipp": {
|
||||
"hashes": [
|
||||
"sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832",
|
||||
"sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.6.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,10 +3,13 @@
|
||||
This directory contains integration tests.
|
||||
|
||||
Prerequisites:
|
||||
- Python 3.6 or later
|
||||
- Python 3.7 or later
|
||||
- Development headers may also be needed to build `psycopg2` from source.
|
||||
- Python 3.7 is recommended if you want to update tests.
|
||||
- Dependencies: install them via `pipenv install`. Note that Debian/Ubuntu
|
||||
packages are stale, as it commonly happens, so manual installation is not
|
||||
recommended.
|
||||
Exact version of `pipenv` is not important unless you change dependencies.
|
||||
Run `pipenv shell` to activate the venv or use `pipenv run` to run a single
|
||||
command in the venv, e.g. `pipenv run pytest`.
|
||||
- Zenith and Postgres binaries
|
||||
@@ -53,8 +56,8 @@ Useful environment variables:
|
||||
should go.
|
||||
`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
|
||||
|
||||
Let stdout and stderr go to the terminal instead of capturing them:
|
||||
`pytest -s ...`
|
||||
Let stdout, stderr and `INFO` log messages go to the terminal instead of capturing them:
|
||||
`pytest -s --log-cli-level=INFO ...`
|
||||
(Note many tests capture subprocess outputs separately, so this may not
|
||||
show much.)
|
||||
|
||||
@@ -62,44 +65,87 @@ Exit after the first test failure:
|
||||
`pytest -x ...`
|
||||
(there are many more pytest options; run `pytest -h` to see them.)
|
||||
|
||||
### Writing a test
|
||||
|
||||
### Building new tests
|
||||
Every test needs a Zenith Environment, or ZenithEnv to operate in. A Zenith Environment
|
||||
is like a little cloud-in-a-box, and consists of a Pageserver, 0-N Safekeepers, and
|
||||
compute Postgres nodes. The connections between them can be configured to use JWT
|
||||
authentication tokens, and some other configuration options can be tweaked too.
|
||||
|
||||
The tests make heavy use of pytest fixtures. You can read about how they work here: https://docs.pytest.org/en/stable/fixture.html
|
||||
The easiest way to get access to a Zenith Environment is by using the `zenith_simple_env`
|
||||
fixture. The 'simple' env may be shared across multiple tests, so don't shut down the nodes
|
||||
or make other destructive changes in that environment. Also don't assume that
|
||||
there are no tenants or branches or data in the cluster. For convenience, there is a
|
||||
branch called `empty`, though. The convention is to create a test-specific branch of
|
||||
that and load any test data there, instead of the 'main' branch.
|
||||
|
||||
Essentially, this means that each time you see a fixture named as an input parameter, the function with that name will be run and passed as a parameter to the function.
|
||||
|
||||
So this code:
|
||||
For more complicated cases, you can build a custom Zenith Environment, with the `zenith_env`
|
||||
fixture:
|
||||
|
||||
```python
|
||||
def test_something(zenith_cli, pg_bin):
|
||||
pass
|
||||
def test_foobar(zenith_env_builder: ZenithEnvBuilder):
|
||||
# Prescribe the environment.
|
||||
# We want to have 3 safekeeper nodes, and use JWT authentication in the
|
||||
# connections to the page server
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
zenith_env_builder.set_pageserver_auth(True)
|
||||
|
||||
# Now create the environment. This initializes the repository, and starts
|
||||
# up the page server and the safekeepers
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
# Run the test
|
||||
...
|
||||
```
|
||||
|
||||
... will run the fixtures called `zenith_cli` and `pg_bin` and deliver those results to the test function.
|
||||
For more information about pytest fixtures, see https://docs.pytest.org/en/stable/fixture.html
|
||||
|
||||
Fixtures can't be imported using the normal python syntax. Instead, use this:
|
||||
At the end of a test, all the nodes in the environment are automatically stopped, so you
|
||||
don't need to worry about cleaning up. Logs and test data are preserved for the analysis,
|
||||
in a directory under `../test_output/<testname>`
|
||||
|
||||
```python
|
||||
pytest_plugins = ("fixtures.something")
|
||||
### Before submitting a patch
|
||||
#### Obligatory checks
|
||||
Install dev dependencies via `pipenv --python 3.7 install --dev` (better)
|
||||
or `pipenv install --dev` (if you don't have Python 3.7 and don't need to change dependencies).
|
||||
|
||||
We force code formatting via yapf and type hints via mypy.
|
||||
Run the following commands in the `test_runner/` directory:
|
||||
|
||||
```bash
|
||||
pipenv run yapf -ri . # All code is reformatted
|
||||
pipenv run mypy . # Ensure there are no typing errors
|
||||
```
|
||||
|
||||
That will make all the fixtures in the `fixtures/something.py` file available.
|
||||
|
||||
Anything that's likely to be used in multiple tests should be built into a fixture.
|
||||
|
||||
Note that fixtures can clean up after themselves if they use the `yield` syntax.
|
||||
Cleanup will happen even if the test fails (raises an unhandled exception).
|
||||
Python destructors, e.g. `__del__()` aren't recommended for cleanup.
|
||||
|
||||
|
||||
### Code quality
|
||||
|
||||
Before submitting a patch, please consider:
|
||||
|
||||
#### Advisable actions
|
||||
* Writing a couple of docstrings to clarify the reasoning behind a new test.
|
||||
* Running `flake8` (or a linter of your choice, e.g. `pycodestyle`) and fixing possible defects, if any.
|
||||
* Formatting the code with `yapf -r -i .` (TODO: implement an opt-in pre-commit hook for that).
|
||||
* (Optional) Typechecking the code with `mypy .`. Currently this mostly affects `fixtures/zenith_fixtures.py`.
|
||||
* Adding more type hints to your code to avoid `Any`, especially:
|
||||
* For fixture parameters, they are not automatically deduced.
|
||||
* For function arguments and return values.
|
||||
|
||||
The tools can be installed with `pipenv install --dev`.
|
||||
#### Changing dependencies
|
||||
You have to update `Pipfile.lock` if you have changed `Pipfile`:
|
||||
|
||||
```bash
|
||||
pipenv --python 3.7 install --dev # Re-create venv for Python 3.7 and install recent pipenv inside
|
||||
pipenv run pipenv --version # Should be at least 2021.5.29
|
||||
pipenv run pipenv lock # Regenerate Pipfile.lock
|
||||
```
|
||||
|
||||
As the minimal supported version is Python 3.7 and we use it in CI,
|
||||
you have to use a Python 3.7 environment when updating `Pipfile.lock`.
|
||||
Otherwise some back-compatibility packages will be missing.
|
||||
|
||||
It is also important to run recent `pipenv`.
|
||||
Older versions remove markers from `Pipfile.lock`.
|
||||
|
||||
If you don't have Python 3.7, you should install it and its headers (for `psycopg2`)
|
||||
separately, e.g.:
|
||||
|
||||
```bash
|
||||
# In Ubuntu
|
||||
sudo add-apt-repository ppa:deadsnakes/ppa
|
||||
sudo apt update
|
||||
sudo apt install python3.7 python3.7-dev
|
||||
```
|
||||
|
||||
@@ -1,21 +1,22 @@
|
||||
|
||||
from contextlib import closing
|
||||
from typing import Iterator
|
||||
from uuid import uuid4
|
||||
import psycopg2
|
||||
from fixtures.zenith_fixtures import PortDistributor, Postgres, ZenithCli, ZenithPageserver, PgBin
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
import pytest
|
||||
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def test_pageserver_auth(pageserver_auth_enabled: ZenithPageserver):
|
||||
ps = pageserver_auth_enabled
|
||||
def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.pageserver_auth_enabled = True
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
tenant_token = ps.auth_keys.generate_tenant_token(ps.initial_tenant)
|
||||
invalid_tenant_token = ps.auth_keys.generate_tenant_token(uuid4().hex)
|
||||
management_token = ps.auth_keys.generate_management_token()
|
||||
ps = env.pageserver
|
||||
|
||||
tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant)
|
||||
invalid_tenant_token = env.auth_keys.generate_tenant_token(uuid4().hex)
|
||||
management_token = env.auth_keys.generate_management_token()
|
||||
|
||||
# this does not invoke auth check and only decodes jwt and checks it for validity
|
||||
# check both tokens
|
||||
@@ -23,57 +24,41 @@ def test_pageserver_auth(pageserver_auth_enabled: ZenithPageserver):
|
||||
ps.safe_psql("status", password=management_token)
|
||||
|
||||
# tenant can create branches
|
||||
ps.safe_psql(f"branch_create {ps.initial_tenant} new1 main", password=tenant_token)
|
||||
ps.safe_psql(f"branch_create {env.initial_tenant} new1 main", password=tenant_token)
|
||||
# console can create branches for tenant
|
||||
ps.safe_psql(f"branch_create {ps.initial_tenant} new2 main", password=management_token)
|
||||
ps.safe_psql(f"branch_create {env.initial_tenant} new2 main", password=management_token)
|
||||
|
||||
# fail to create branch using token with different tenantid
|
||||
with pytest.raises(psycopg2.DatabaseError, match='Tenant id mismatch. Permission denied'):
|
||||
ps.safe_psql(f"branch_create {ps.initial_tenant} new2 main", password=invalid_tenant_token)
|
||||
ps.safe_psql(f"branch_create {env.initial_tenant} new2 main", password=invalid_tenant_token)
|
||||
|
||||
# create tenant using management token
|
||||
ps.safe_psql(f"tenant_create {uuid4().hex}", password=management_token)
|
||||
|
||||
# fail to create tenant using tenant token
|
||||
with pytest.raises(psycopg2.DatabaseError, match='Attempt to access management api with tenant scope. Permission denied'):
|
||||
with pytest.raises(
|
||||
psycopg2.DatabaseError,
|
||||
match='Attempt to access management api with tenant scope. Permission denied'):
|
||||
ps.safe_psql(f"tenant_create {uuid4().hex}", password=tenant_token)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('with_wal_acceptors', [False, True])
|
||||
def test_compute_auth_to_pageserver(
|
||||
zenith_cli: ZenithCli,
|
||||
wa_factory,
|
||||
pageserver_auth_enabled: ZenithPageserver,
|
||||
repo_dir: str,
|
||||
with_wal_acceptors: bool,
|
||||
pg_bin: PgBin,
|
||||
port_distributor: PortDistributor,
|
||||
):
|
||||
ps = pageserver_auth_enabled
|
||||
# since we are in progress of refactoring protocols between compute safekeeper and page server
|
||||
# use hardcoded management token in safekeeper
|
||||
management_token = ps.auth_keys.generate_management_token()
|
||||
def test_compute_auth_to_pageserver(zenith_env_builder: ZenithEnvBuilder, with_wal_acceptors: bool):
|
||||
zenith_env_builder.pageserver_auth_enabled = True
|
||||
if with_wal_acceptors:
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
branch = f"test_compute_auth_to_pageserver{with_wal_acceptors}"
|
||||
zenith_cli.run(["branch", branch, "empty"])
|
||||
if with_wal_acceptors:
|
||||
wa_factory.start_n_new(3, management_token)
|
||||
env.zenith_cli(["branch", branch, "main"])
|
||||
|
||||
with Postgres(
|
||||
zenith_cli=zenith_cli,
|
||||
repo_dir=repo_dir,
|
||||
pg_bin=pg_bin,
|
||||
tenant_id=ps.initial_tenant,
|
||||
port=port_distributor.get_port(),
|
||||
).create_start(
|
||||
branch,
|
||||
wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None,
|
||||
) as pg:
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
# we rely upon autocommit after each statement
|
||||
# as waiting for acceptors happens there
|
||||
cur.execute('CREATE TABLE t(key int primary key, value text)')
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
cur.execute('SELECT sum(key) FROM t')
|
||||
assert cur.fetchone() == (5000050000, )
|
||||
pg = env.postgres.create_start(branch)
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
# we rely upon autocommit after each statement
|
||||
# as waiting for acceptors happens there
|
||||
cur.execute('CREATE TABLE t(key int primary key, value text)')
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
cur.execute('SELECT sum(key) FROM t')
|
||||
assert cur.fetchone() == (5000050000, )
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import subprocess
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
@@ -8,11 +8,12 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
#
|
||||
# Create a couple of branches off the main branch, at a historical point in time.
|
||||
#
|
||||
def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin):
|
||||
def test_branch_behind(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
zenith_cli.run(["branch", "test_branch_behind", "empty"])
|
||||
env.zenith_cli(["branch", "test_branch_behind", "empty"])
|
||||
|
||||
pgmain = postgres.create_start('test_branch_behind')
|
||||
pgmain = env.postgres.create_start('test_branch_behind')
|
||||
log.info("postgres is running on 'test_branch_behind' branch")
|
||||
|
||||
main_pg_conn = pgmain.connect()
|
||||
@@ -40,7 +41,7 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
log.info(f'LSN after 200100 rows: {lsn_b}')
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
zenith_cli.run(["branch", "test_branch_behind_hundred", "test_branch_behind@" + lsn_a])
|
||||
env.zenith_cli(["branch", "test_branch_behind_hundred", "test_branch_behind@" + lsn_a])
|
||||
|
||||
# Insert many more rows. This generates enough WAL to fill a few segments.
|
||||
main_cur.execute('''
|
||||
@@ -55,10 +56,10 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
log.info(f'LSN after 400100 rows: {lsn_c}')
|
||||
|
||||
# Branch at the point where only 200100 rows were inserted
|
||||
zenith_cli.run(["branch", "test_branch_behind_more", "test_branch_behind@" + lsn_b])
|
||||
env.zenith_cli(["branch", "test_branch_behind_more", "test_branch_behind@" + lsn_b])
|
||||
|
||||
pg_hundred = postgres.create_start("test_branch_behind_hundred")
|
||||
pg_more = postgres.create_start("test_branch_behind_more")
|
||||
pg_hundred = env.postgres.create_start("test_branch_behind_hundred")
|
||||
pg_more = env.postgres.create_start("test_branch_behind_more")
|
||||
|
||||
# On the 'hundred' branch, we should see only 100 rows
|
||||
hundred_pg_conn = pg_hundred.connect()
|
||||
@@ -79,14 +80,17 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
# Check bad lsn's for branching
|
||||
|
||||
# branch at segment boundary
|
||||
zenith_cli.run(["branch", "test_branch_segment_boundary", "test_branch_behind@0/3000000"])
|
||||
pg = postgres.create_start("test_branch_segment_boundary")
|
||||
env.zenith_cli(["branch", "test_branch_segment_boundary", "test_branch_behind@0/3000000"])
|
||||
pg = env.postgres.create_start("test_branch_segment_boundary")
|
||||
cur = pg.connect().cursor()
|
||||
cur.execute('SELECT 1')
|
||||
assert cur.fetchone() == (1, )
|
||||
|
||||
# branch at pre-initdb lsn
|
||||
#
|
||||
# FIXME: This works currently, but probably shouldn't be allowed
|
||||
try:
|
||||
zenith_cli.run(["branch", "test_branch_preinitdb", "test_branch_behind@0/42"])
|
||||
env.zenith_cli(["branch", "test_branch_preinitdb", "test_branch_behind@0/42"])
|
||||
# FIXME: assert false, "branch with invalid LSN should have failed"
|
||||
except subprocess.CalledProcessError:
|
||||
log.info("Branch creation with pre-initdb LSN failed (as expected)")
|
||||
|
||||
@@ -3,7 +3,7 @@ import os
|
||||
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
@@ -12,19 +12,23 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
#
|
||||
# Test compute node start after clog truncation
|
||||
#
|
||||
def test_clog_truncate(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin):
|
||||
def test_clog_truncate(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_clog_truncate", "empty"])
|
||||
env.zenith_cli(["branch", "test_clog_truncate", "empty"])
|
||||
|
||||
# set agressive autovacuum to make sure that truncation will happen
|
||||
config = [
|
||||
'autovacuum_max_workers=10', 'autovacuum_vacuum_threshold=0',
|
||||
'autovacuum_vacuum_insert_threshold=0', 'autovacuum_vacuum_cost_delay=0',
|
||||
'autovacuum_vacuum_cost_limit=10000', 'autovacuum_naptime =1s',
|
||||
'autovacuum_max_workers=10',
|
||||
'autovacuum_vacuum_threshold=0',
|
||||
'autovacuum_vacuum_insert_threshold=0',
|
||||
'autovacuum_vacuum_cost_delay=0',
|
||||
'autovacuum_vacuum_cost_limit=10000',
|
||||
'autovacuum_naptime =1s',
|
||||
'autovacuum_freeze_max_age=100000'
|
||||
]
|
||||
|
||||
pg = postgres.create_start('test_clog_truncate', config_lines=config)
|
||||
pg = env.postgres.create_start('test_clog_truncate', config_lines=config)
|
||||
log.info('postgres is running on test_clog_truncate branch')
|
||||
|
||||
# Install extension containing function needed for test
|
||||
@@ -61,10 +65,10 @@ def test_clog_truncate(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
|
||||
# create new branch after clog truncation and start a compute node on it
|
||||
log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}')
|
||||
zenith_cli.run(
|
||||
env.zenith_cli(
|
||||
["branch", "test_clog_truncate_new", "test_clog_truncate@" + lsn_after_truncation])
|
||||
|
||||
pg2 = postgres.create_start('test_clog_truncate_new')
|
||||
pg2 = env.postgres.create_start('test_clog_truncate_new')
|
||||
log.info('postgres is running on test_clog_truncate_new branch')
|
||||
|
||||
# check that new node doesn't contain truncated segment
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
@@ -9,12 +9,13 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
#
|
||||
# Test starting Postgres with custom options
|
||||
#
|
||||
def test_config(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin):
|
||||
def test_config(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_config", "empty"])
|
||||
env.zenith_cli(["branch", "test_config", "empty"])
|
||||
|
||||
# change config
|
||||
pg = postgres.create_start('test_config', config_lines=['log_min_messages=debug1'])
|
||||
pg = env.postgres.create_start('test_config', config_lines=['log_min_messages=debug1'])
|
||||
log.info('postgres is running on test_config branch')
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
|
||||
@@ -2,7 +2,7 @@ import os
|
||||
import pathlib
|
||||
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory, ZenithCli, check_restored_datadir_content
|
||||
from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
@@ -11,15 +11,11 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
#
|
||||
# Test CREATE DATABASE when there have been relmapper changes
|
||||
#
|
||||
def test_createdb(
|
||||
zenith_cli: ZenithCli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
pg_bin,
|
||||
):
|
||||
zenith_cli.run(["branch", "test_createdb", "empty"])
|
||||
def test_createdb(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli(["branch", "test_createdb", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_createdb')
|
||||
pg = env.postgres.create_start('test_createdb')
|
||||
log.info("postgres is running on 'test_createdb' branch")
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
@@ -33,27 +29,23 @@ def test_createdb(
|
||||
lsn = cur.fetchone()[0]
|
||||
|
||||
# Create a branch
|
||||
zenith_cli.run(["branch", "test_createdb2", "test_createdb@" + lsn])
|
||||
env.zenith_cli(["branch", "test_createdb2", "test_createdb@" + lsn])
|
||||
|
||||
pg2 = postgres.create_start('test_createdb2')
|
||||
pg2 = env.postgres.create_start('test_createdb2')
|
||||
|
||||
# Test that you can connect to the new database on both branches
|
||||
for db in (pg, pg2):
|
||||
db.connect(dbname='foodb').close()
|
||||
|
||||
|
||||
#
|
||||
# Test DROP DATABASE
|
||||
#
|
||||
def test_dropdb(
|
||||
zenith_cli: ZenithCli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
pg_bin,
|
||||
test_output_dir
|
||||
):
|
||||
zenith_cli.run(["branch", "test_dropdb", "empty"])
|
||||
def test_dropdb(zenith_simple_env: ZenithEnv, test_output_dir):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli(["branch", "test_dropdb", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_dropdb')
|
||||
pg = env.postgres.create_start('test_dropdb')
|
||||
log.info("postgres is running on 'test_dropdb' branch")
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
@@ -66,7 +58,6 @@ def test_dropdb(
|
||||
cur.execute("SELECT oid FROM pg_database WHERE datname='foodb';")
|
||||
dboid = cur.fetchone()[0]
|
||||
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('DROP DATABASE foodb')
|
||||
@@ -76,28 +67,29 @@ def test_dropdb(
|
||||
cur.execute('SELECT pg_current_wal_insert_lsn()')
|
||||
lsn_after_drop = cur.fetchone()[0]
|
||||
|
||||
|
||||
# Create two branches before and after database drop.
|
||||
zenith_cli.run(["branch", "test_before_dropdb", "test_dropdb@" + lsn_before_drop])
|
||||
pg_before = postgres.create_start('test_before_dropdb')
|
||||
env.zenith_cli(["branch", "test_before_dropdb", "test_dropdb@" + lsn_before_drop])
|
||||
pg_before = env.postgres.create_start('test_before_dropdb')
|
||||
|
||||
zenith_cli.run(["branch", "test_after_dropdb", "test_dropdb@" + lsn_after_drop])
|
||||
pg_after = postgres.create_start('test_after_dropdb')
|
||||
env.zenith_cli(["branch", "test_after_dropdb", "test_dropdb@" + lsn_after_drop])
|
||||
pg_after = env.postgres.create_start('test_after_dropdb')
|
||||
|
||||
# Test that database exists on the branch before drop
|
||||
pg_before.connect(dbname='foodb').close()
|
||||
|
||||
# Test that database subdir exists on the branch before drop
|
||||
assert pg_before.pgdata_dir
|
||||
dbpath = pathlib.Path(pg_before.pgdata_dir) / 'base' / str(dboid)
|
||||
log.info(dbpath)
|
||||
|
||||
assert os.path.isdir(dbpath) == True
|
||||
|
||||
# Test that database subdir doesn't exist on the branch after drop
|
||||
assert pg_after.pgdata_dir
|
||||
dbpath = pathlib.Path(pg_after.pgdata_dir) / 'base' / str(dboid)
|
||||
log.info(dbpath)
|
||||
|
||||
assert os.path.isdir(dbpath) == False
|
||||
|
||||
# Check that we restore the content of the datadir correctly
|
||||
check_restored_datadir_content(zenith_cli, test_output_dir, pg, pageserver.service_port.pg)
|
||||
check_restored_datadir_content(test_output_dir, env, pg)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
@@ -9,10 +9,11 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
#
|
||||
# Test CREATE USER to check shared catalog restore
|
||||
#
|
||||
def test_createuser(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin):
|
||||
zenith_cli.run(["branch", "test_createuser", "empty"])
|
||||
def test_createuser(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli(["branch", "test_createuser", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_createuser')
|
||||
pg = env.postgres.create_start('test_createuser')
|
||||
log.info("postgres is running on 'test_createuser' branch")
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
@@ -26,9 +27,9 @@ def test_createuser(zenith_cli, pageserver: ZenithPageserver, postgres: Postgres
|
||||
lsn = cur.fetchone()[0]
|
||||
|
||||
# Create a branch
|
||||
zenith_cli.run(["branch", "test_createuser2", "test_createuser@" + lsn])
|
||||
env.zenith_cli(["branch", "test_createuser2", "test_createuser@" + lsn])
|
||||
|
||||
pg2 = postgres.create_start('test_createuser2')
|
||||
pg2 = env.postgres.create_start('test_createuser2')
|
||||
|
||||
# Test that you can connect to new branch as a new user
|
||||
assert pg2.safe_psql('select current_user', username='testuser') == [('testuser', )]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, check_restored_datadir_content
|
||||
from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
@@ -10,11 +10,11 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
# it only checks next_multixact_id field in restored pg_control,
|
||||
# since we don't have functions to check multixact internals.
|
||||
#
|
||||
def test_multixact(pageserver: ZenithPageserver, postgres: PostgresFactory,
|
||||
pg_bin, zenith_cli, base_dir, test_output_dir):
|
||||
def test_multixact(zenith_simple_env: ZenithEnv, test_output_dir):
|
||||
env = zenith_simple_env
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_multixact", "empty"])
|
||||
pg = postgres.create_start('test_multixact')
|
||||
env.zenith_cli(["branch", "test_multixact", "empty"])
|
||||
pg = env.postgres.create_start('test_multixact')
|
||||
|
||||
log.info("postgres is running on 'test_multixact' branch")
|
||||
pg_conn = pg.connect()
|
||||
@@ -53,8 +53,8 @@ def test_multixact(pageserver: ZenithPageserver, postgres: PostgresFactory,
|
||||
assert int(next_multixact_id) > int(next_multixact_id_old)
|
||||
|
||||
# Branch at this point
|
||||
zenith_cli.run(["branch", "test_multixact_new", "test_multixact@" + lsn])
|
||||
pg_new = postgres.create_start('test_multixact_new')
|
||||
env.zenith_cli(["branch", "test_multixact_new", "test_multixact@" + lsn])
|
||||
pg_new = env.postgres.create_start('test_multixact_new')
|
||||
|
||||
log.info("postgres is running on 'test_multixact_new' branch")
|
||||
pg_new_conn = pg_new.connect()
|
||||
@@ -67,4 +67,4 @@ def test_multixact(pageserver: ZenithPageserver, postgres: PostgresFactory,
|
||||
assert next_multixact_id_new == next_multixact_id
|
||||
|
||||
# Check that we restore the content of the datadir correctly
|
||||
check_restored_datadir_content(zenith_cli, test_output_dir, pg_new, pageserver.service_port.pg)
|
||||
check_restored_datadir_content(test_output_dir, env, pg_new)
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
#
|
||||
# Test where Postgres generates a lot of WAL, and it's garbage collected away, but
|
||||
# no pages are evicted so that Postgres uses an old LSN in a GetPage request.
|
||||
@@ -15,10 +16,11 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
# just a hint that the page hasn't been modified since that LSN, and the page
|
||||
# server should return the latest page version regardless of the LSN.
|
||||
#
|
||||
def test_old_request_lsn(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin):
|
||||
def test_old_request_lsn(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_old_request_lsn", "empty"])
|
||||
pg = postgres.create_start('test_old_request_lsn')
|
||||
env.zenith_cli(["branch", "test_old_request_lsn", "empty"])
|
||||
pg = env.postgres.create_start('test_old_request_lsn')
|
||||
log.info('postgres is running on test_old_request_lsn branch')
|
||||
|
||||
pg_conn = pg.connect()
|
||||
@@ -28,7 +30,7 @@ def test_old_request_lsn(zenith_cli, pageserver: ZenithPageserver, postgres: Pos
|
||||
cur.execute("SHOW zenith.zenith_timeline")
|
||||
timeline = cur.fetchone()[0]
|
||||
|
||||
psconn = pageserver.connect()
|
||||
psconn = env.pageserver.connect()
|
||||
pscur = psconn.cursor()
|
||||
|
||||
# Create table, and insert some rows. Make it big enough that it doesn't fit in
|
||||
@@ -47,20 +49,20 @@ def test_old_request_lsn(zenith_cli, pageserver: ZenithPageserver, postgres: Pos
|
||||
from pg_settings where name = 'shared_buffers'
|
||||
''')
|
||||
row = cur.fetchone()
|
||||
log.info(f'shared_buffers is {row[0]}, table size {row[1]}');
|
||||
log.info(f'shared_buffers is {row[0]}, table size {row[1]}')
|
||||
assert int(row[0]) < int(row[1])
|
||||
|
||||
cur.execute('VACUUM foo');
|
||||
cur.execute('VACUUM foo')
|
||||
|
||||
# Make a lot of updates on a single row, generating a lot of WAL. Trigger
|
||||
# garbage collections so that the page server will remove old page versions.
|
||||
for i in range(10):
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
for j in range(100):
|
||||
cur.execute('UPDATE foo SET val = val + 1 WHERE id = 1;');
|
||||
cur.execute('UPDATE foo SET val = val + 1 WHERE id = 1;')
|
||||
|
||||
# All (or at least most of) the updates should've been on the same page, so
|
||||
# that we haven't had to evict any dirty pages for a long time. Now run
|
||||
# a query that sends GetPage@LSN requests with the old LSN.
|
||||
cur.execute("SELECT COUNT(*), SUM(val) FROM foo");
|
||||
cur.execute("SELECT COUNT(*), SUM(val) FROM foo")
|
||||
assert cur.fetchone() == (100000, 101000)
|
||||
|
||||
@@ -3,25 +3,28 @@ from uuid import uuid4
|
||||
import pytest
|
||||
import psycopg2
|
||||
import requests
|
||||
from fixtures.zenith_fixtures import ZenithPageserver, ZenithPageserverHttpClient
|
||||
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, ZenithPageserverHttpClient
|
||||
from typing import cast
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def test_status_psql(pageserver):
|
||||
assert pageserver.safe_psql('status') == [
|
||||
def test_status_psql(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
assert env.pageserver.safe_psql('status') == [
|
||||
('hello world', ),
|
||||
]
|
||||
|
||||
|
||||
def test_branch_list_psql(pageserver: ZenithPageserver, zenith_cli):
|
||||
def test_branch_list_psql(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_branch_list_main", "empty"])
|
||||
env.zenith_cli(["branch", "test_branch_list_main", "empty"])
|
||||
|
||||
conn = pageserver.connect()
|
||||
conn = env.pageserver.connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute(f'branch_list {pageserver.initial_tenant}')
|
||||
cur.execute(f'branch_list {env.initial_tenant}')
|
||||
branches = json.loads(cur.fetchone()[0])
|
||||
# Filter out branches created by other tests
|
||||
branches = [x for x in branches if x['name'].startswith('test_branch_list')]
|
||||
@@ -34,10 +37,10 @@ def test_branch_list_psql(pageserver: ZenithPageserver, zenith_cli):
|
||||
assert 'ancestor_lsn' in branches[0]
|
||||
|
||||
# Create another branch, and start Postgres on it
|
||||
zenith_cli.run(['branch', 'test_branch_list_experimental', 'test_branch_list_main'])
|
||||
zenith_cli.run(['pg', 'create', 'test_branch_list_experimental'])
|
||||
env.zenith_cli(['branch', 'test_branch_list_experimental', 'test_branch_list_main'])
|
||||
env.zenith_cli(['pg', 'create', 'test_branch_list_experimental'])
|
||||
|
||||
cur.execute(f'branch_list {pageserver.initial_tenant}')
|
||||
cur.execute(f'branch_list {env.initial_tenant}')
|
||||
new_branches = json.loads(cur.fetchone()[0])
|
||||
# Filter out branches created by other tests
|
||||
new_branches = [x for x in new_branches if x['name'].startswith('test_branch_list')]
|
||||
@@ -53,18 +56,22 @@ def test_branch_list_psql(pageserver: ZenithPageserver, zenith_cli):
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_tenant_list_psql(pageserver: ZenithPageserver, zenith_cli):
|
||||
res = zenith_cli.run(["tenant", "list"])
|
||||
res.check_returncode()
|
||||
tenants = res.stdout.splitlines()
|
||||
assert tenants == [pageserver.initial_tenant]
|
||||
def test_tenant_list_psql(zenith_env_builder: ZenithEnvBuilder):
|
||||
# don't use zenith_simple_env, because there might be other tenants there,
|
||||
# left over from other tests.
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
conn = pageserver.connect()
|
||||
res = env.zenith_cli(["tenant", "list"])
|
||||
res.check_returncode()
|
||||
tenants = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
|
||||
assert tenants == [env.initial_tenant]
|
||||
|
||||
conn = env.pageserver.connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
# check same tenant cannot be created twice
|
||||
with pytest.raises(psycopg2.DatabaseError, match=f'tenant {pageserver.initial_tenant} already exists'):
|
||||
cur.execute(f'tenant_create {pageserver.initial_tenant}')
|
||||
with pytest.raises(psycopg2.DatabaseError, match=f'tenant {env.initial_tenant} already exists'):
|
||||
cur.execute(f'tenant_create {env.initial_tenant}')
|
||||
|
||||
# create one more tenant
|
||||
tenant1 = uuid4().hex
|
||||
@@ -73,20 +80,20 @@ def test_tenant_list_psql(pageserver: ZenithPageserver, zenith_cli):
|
||||
cur.execute('tenant_list')
|
||||
|
||||
# compare tenants list
|
||||
new_tenants = sorted(json.loads(cur.fetchone()[0]))
|
||||
assert sorted([pageserver.initial_tenant, tenant1]) == new_tenants
|
||||
new_tenants = sorted(map(lambda t: cast(str, t['id']), json.loads(cur.fetchone()[0])))
|
||||
assert sorted([env.initial_tenant, tenant1]) == new_tenants
|
||||
|
||||
|
||||
def check_client(client: ZenithPageserverHttpClient, initial_tenant: str):
|
||||
client.check_status()
|
||||
|
||||
# check initial tenant is there
|
||||
assert initial_tenant in set(client.tenant_list())
|
||||
assert initial_tenant in {t['id'] for t in client.tenant_list()}
|
||||
|
||||
# create new tenant and check it is also there
|
||||
tenant_id = uuid4()
|
||||
client.tenant_create(tenant_id)
|
||||
assert tenant_id.hex in set(client.tenant_list())
|
||||
assert tenant_id.hex in {t['id'] for t in client.tenant_list()}
|
||||
|
||||
# create branch
|
||||
branch_name = uuid4().hex
|
||||
@@ -96,11 +103,17 @@ def check_client(client: ZenithPageserverHttpClient, initial_tenant: str):
|
||||
assert branch_name in {b['name'] for b in client.branch_list(tenant_id)}
|
||||
|
||||
|
||||
def test_pageserver_http_api_client(pageserver: ZenithPageserver):
|
||||
client = pageserver.http_client()
|
||||
check_client(client, pageserver.initial_tenant)
|
||||
def test_pageserver_http_api_client(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
client = env.pageserver.http_client()
|
||||
check_client(client, env.initial_tenant)
|
||||
|
||||
|
||||
def test_pageserver_http_api_client_auth_enabled(pageserver_auth_enabled: ZenithPageserver):
|
||||
client = pageserver_auth_enabled.http_client(auth_token=pageserver_auth_enabled.auth_keys.generate_management_token())
|
||||
check_client(client, pageserver_auth_enabled.initial_tenant)
|
||||
def test_pageserver_http_api_client_auth_enabled(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.pageserver_auth_enabled = True
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
management_token = env.auth_keys.generate_management_token()
|
||||
|
||||
client = env.pageserver.http_client(auth_token=management_token)
|
||||
check_client(client, env.initial_tenant)
|
||||
|
||||
@@ -4,22 +4,22 @@ import time
|
||||
|
||||
from contextlib import closing
|
||||
from multiprocessing import Process, Value
|
||||
from fixtures.zenith_fixtures import WalAcceptorFactory, ZenithPageserver, PostgresFactory
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
# Check that dead minority doesn't prevent the commits: execute insert n_inserts
|
||||
# times, with fault_probability chance of getting a wal acceptor down or up
|
||||
# along the way. 2 of 3 are always alive, so the work keeps going.
|
||||
def test_pageserver_restart(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory: WalAcceptorFactory):
|
||||
|
||||
def test_pageserver_restart(zenith_env_builder: ZenithEnvBuilder):
|
||||
# One safekeeper is enough for this test.
|
||||
wa_factory.start_n_new(1)
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
zenith_cli.run(["branch", "test_pageserver_restart", "empty"])
|
||||
pg = postgres.create_start('test_pageserver_restart',
|
||||
wal_acceptors=wa_factory.get_connstrs())
|
||||
env.zenith_cli(["branch", "test_pageserver_restart", "main"])
|
||||
pg = env.postgres.create_start('test_pageserver_restart')
|
||||
|
||||
pg_conn = pg.connect()
|
||||
cur = pg_conn.cursor()
|
||||
@@ -41,14 +41,14 @@ def test_pageserver_restart(zenith_cli, pageserver: ZenithPageserver, postgres:
|
||||
from pg_settings where name = 'shared_buffers'
|
||||
''')
|
||||
row = cur.fetchone()
|
||||
log.info(f"shared_buffers is {row[0]}, table size {row[1]}");
|
||||
log.info(f"shared_buffers is {row[0]}, table size {row[1]}")
|
||||
assert int(row[0]) < int(row[1])
|
||||
|
||||
# Stop and restart pageserver. This is a more or less graceful shutdown, although
|
||||
# the page server doesn't currently have a shutdown routine so there's no difference
|
||||
# between stopping and crashing.
|
||||
pageserver.stop();
|
||||
pageserver.start();
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
|
||||
# Stopping the pageserver breaks the connection from the postgres backend to
|
||||
# the page server, and causes the next query on the connection to fail. Start a new
|
||||
@@ -62,6 +62,5 @@ def test_pageserver_restart(zenith_cli, pageserver: ZenithPageserver, postgres:
|
||||
assert cur.fetchone() == (100000, )
|
||||
|
||||
# Stop the page server by force, and restart it
|
||||
pageserver.stop();
|
||||
pageserver.start();
|
||||
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
from fixtures.zenith_fixtures import PostgresFactory
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def test_pgbench(postgres: PostgresFactory, pg_bin, zenith_cli):
|
||||
def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin):
|
||||
env = zenith_simple_env
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_pgbench", "empty"])
|
||||
env.zenith_cli(["branch", "test_pgbench", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_pgbench')
|
||||
pg = env.postgres.create_start('test_pgbench')
|
||||
log.info("postgres is running on 'test_pgbench' branch")
|
||||
|
||||
connstr = pg.connstr()
|
||||
|
||||
91
test_runner/batch_others/test_readonly_node.py
Normal file
91
test_runner/batch_others/test_readonly_node.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import subprocess
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
#
|
||||
# Create read-only compute nodes, anchored at historical points in time.
|
||||
#
|
||||
# This is very similar to the 'test_branch_behind' test, but instead of
|
||||
# creating branches, creates read-only nodes.
|
||||
#
|
||||
def test_readonly_node(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli(["branch", "test_readonly_node", "empty"])
|
||||
|
||||
pgmain = env.postgres.create_start('test_readonly_node')
|
||||
print("postgres is running on 'test_readonly_node' branch")
|
||||
|
||||
main_pg_conn = pgmain.connect()
|
||||
main_cur = main_pg_conn.cursor()
|
||||
|
||||
# Create table, and insert the first 100 rows
|
||||
main_cur.execute('CREATE TABLE foo (t text)')
|
||||
main_cur.execute('''
|
||||
INSERT INTO foo
|
||||
SELECT 'long string to consume some space' || g
|
||||
FROM generate_series(1, 100) g
|
||||
''')
|
||||
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
|
||||
lsn_a = main_cur.fetchone()[0]
|
||||
print('LSN after 100 rows: ' + lsn_a)
|
||||
|
||||
# Insert some more rows. (This generates enough WAL to fill a few segments.)
|
||||
main_cur.execute('''
|
||||
INSERT INTO foo
|
||||
SELECT 'long string to consume some space' || g
|
||||
FROM generate_series(1, 200000) g
|
||||
''')
|
||||
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
|
||||
lsn_b = main_cur.fetchone()[0]
|
||||
print('LSN after 200100 rows: ' + lsn_b)
|
||||
|
||||
# Insert many more rows. This generates enough WAL to fill a few segments.
|
||||
main_cur.execute('''
|
||||
INSERT INTO foo
|
||||
SELECT 'long string to consume some space' || g
|
||||
FROM generate_series(1, 200000) g
|
||||
''')
|
||||
|
||||
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
|
||||
lsn_c = main_cur.fetchone()[0]
|
||||
print('LSN after 400100 rows: ' + lsn_c)
|
||||
|
||||
# Create first read-only node at the point where only 100 rows were inserted
|
||||
pg_hundred = env.postgres.create_start("test_readonly_node_hundred",
|
||||
branch=f'test_readonly_node@{lsn_a}')
|
||||
|
||||
# And another at the point where 200100 rows were inserted
|
||||
pg_more = env.postgres.create_start("test_readonly_node_more",
|
||||
branch=f'test_readonly_node@{lsn_b}')
|
||||
|
||||
# On the 'hundred' node, we should see only 100 rows
|
||||
hundred_pg_conn = pg_hundred.connect()
|
||||
hundred_cur = hundred_pg_conn.cursor()
|
||||
hundred_cur.execute('SELECT count(*) FROM foo')
|
||||
assert hundred_cur.fetchone() == (100, )
|
||||
|
||||
# On the 'more' node, we should see 100200 rows
|
||||
more_pg_conn = pg_more.connect()
|
||||
more_cur = more_pg_conn.cursor()
|
||||
more_cur.execute('SELECT count(*) FROM foo')
|
||||
assert more_cur.fetchone() == (200100, )
|
||||
|
||||
# All the rows are visible on the main branch
|
||||
main_cur.execute('SELECT count(*) FROM foo')
|
||||
assert main_cur.fetchone() == (400100, )
|
||||
|
||||
# Check creating a node at segment boundary
|
||||
pg = env.postgres.create_start("test_branch_segment_boundary",
|
||||
branch="test_readonly_node@0/3000000")
|
||||
cur = pg.connect().cursor()
|
||||
cur.execute('SELECT 1')
|
||||
assert cur.fetchone() == (1, )
|
||||
|
||||
# Create node at pre-initdb lsn
|
||||
try:
|
||||
env.zenith_cli(["pg", "start", "test_branch_preinitdb", "test_readonly_node@0/42"])
|
||||
assert False, "compute node startup with invalid LSN should have failed"
|
||||
except Exception:
|
||||
print("Node creation with pre-initdb LSN failed (as expected)")
|
||||
@@ -1,7 +1,7 @@
|
||||
import pytest
|
||||
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
@@ -11,23 +11,15 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
# Test restarting and recreating a postgres instance
|
||||
#
|
||||
@pytest.mark.parametrize('with_wal_acceptors', [False, True])
|
||||
def test_restart_compute(
|
||||
zenith_cli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
pg_bin,
|
||||
wa_factory,
|
||||
with_wal_acceptors: bool,
|
||||
):
|
||||
wal_acceptor_connstrs = None
|
||||
zenith_cli.run(["branch", "test_restart_compute", "empty"])
|
||||
|
||||
def test_restart_compute(zenith_env_builder: ZenithEnvBuilder, with_wal_acceptors: bool):
|
||||
zenith_env_builder.pageserver_auth_enabled = True
|
||||
if with_wal_acceptors:
|
||||
wa_factory.start_n_new(3)
|
||||
wal_acceptor_connstrs = wa_factory.get_connstrs()
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
pg = postgres.create_start('test_restart_compute',
|
||||
wal_acceptors=wal_acceptor_connstrs)
|
||||
env.zenith_cli(["branch", "test_restart_compute", "main"])
|
||||
|
||||
pg = env.postgres.create_start('test_restart_compute')
|
||||
log.info("postgres is running on 'test_restart_compute' branch")
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
@@ -40,9 +32,7 @@ def test_restart_compute(
|
||||
log.info(f"res = {r}")
|
||||
|
||||
# Remove data directory and restart
|
||||
pg.stop_and_destroy().create_start('test_restart_compute',
|
||||
wal_acceptors=wal_acceptor_connstrs)
|
||||
|
||||
pg.stop_and_destroy().create_start('test_restart_compute')
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
@@ -61,8 +51,7 @@ def test_restart_compute(
|
||||
log.info(f"res = {r}")
|
||||
|
||||
# Again remove data directory and restart
|
||||
pg.stop_and_destroy().create_start('test_restart_compute',
|
||||
wal_acceptors=wal_acceptor_connstrs)
|
||||
pg.stop_and_destroy().create_start('test_restart_compute')
|
||||
|
||||
# That select causes lots of FPI's and increases probability of wakeepers
|
||||
# lagging behind after query completion
|
||||
@@ -76,8 +65,7 @@ def test_restart_compute(
|
||||
log.info(f"res = {r}")
|
||||
|
||||
# And again remove data directory and restart
|
||||
pg.stop_and_destroy().create_start('test_restart_compute',
|
||||
wal_acceptors=wal_acceptor_connstrs)
|
||||
pg.stop_and_destroy().create_start('test_restart_compute')
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
||||
@@ -1,14 +1,20 @@
|
||||
from contextlib import closing
|
||||
import psycopg2.extras
|
||||
import time
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def print_gc_result(row):
|
||||
log.info("GC duration {elapsed} ms".format_map(row));
|
||||
log.info(" REL total: {layer_relfiles_total}, needed_by_cutoff {layer_relfiles_needed_by_cutoff}, needed_by_branches: {layer_relfiles_needed_by_branches}, not_updated: {layer_relfiles_not_updated}, needed_as_tombstone {layer_relfiles_needed_as_tombstone}, removed: {layer_relfiles_removed}, dropped: {layer_relfiles_dropped}".format_map(row))
|
||||
log.info(" NONREL total: {layer_nonrelfiles_total}, needed_by_cutoff {layer_nonrelfiles_needed_by_cutoff}, needed_by_branches: {layer_nonrelfiles_needed_by_branches}, not_updated: {layer_nonrelfiles_not_updated}, needed_as_tombstone {layer_nonrelfiles_needed_as_tombstone}, removed: {layer_nonrelfiles_removed}, dropped: {layer_nonrelfiles_dropped}".format_map(row))
|
||||
log.info("GC duration {elapsed} ms".format_map(row))
|
||||
log.info(
|
||||
" REL total: {layer_relfiles_total}, needed_by_cutoff {layer_relfiles_needed_by_cutoff}, needed_by_branches: {layer_relfiles_needed_by_branches}, not_updated: {layer_relfiles_not_updated}, needed_as_tombstone {layer_relfiles_needed_as_tombstone}, removed: {layer_relfiles_removed}, dropped: {layer_relfiles_dropped}"
|
||||
.format_map(row))
|
||||
log.info(
|
||||
" NONREL total: {layer_nonrelfiles_total}, needed_by_cutoff {layer_nonrelfiles_needed_by_cutoff}, needed_by_branches: {layer_nonrelfiles_needed_by_branches}, not_updated: {layer_nonrelfiles_not_updated}, needed_as_tombstone {layer_nonrelfiles_needed_as_tombstone}, removed: {layer_nonrelfiles_removed}, dropped: {layer_nonrelfiles_dropped}"
|
||||
.format_map(row))
|
||||
|
||||
|
||||
#
|
||||
@@ -17,14 +23,15 @@ def print_gc_result(row):
|
||||
# This test is pretty tightly coupled with the current implementation of layered
|
||||
# storage, in layered_repository.rs.
|
||||
#
|
||||
def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
|
||||
zenith_cli.run(["branch", "test_layerfiles_gc", "empty"])
|
||||
pg = postgres.create_start('test_layerfiles_gc')
|
||||
def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli(["branch", "test_layerfiles_gc", "empty"])
|
||||
pg = env.postgres.create_start('test_layerfiles_gc')
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
with closing(pageserver.connect()) as psconn:
|
||||
with psconn.cursor(cursor_factory = psycopg2.extras.DictCursor) as pscur:
|
||||
with closing(env.pageserver.connect()) as psconn:
|
||||
with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
|
||||
|
||||
# Get the timeline ID of our branch. We need it for the 'do_gc' command
|
||||
cur.execute("SHOW zenith.zenith_timeline")
|
||||
@@ -34,9 +41,9 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
|
||||
cur.execute("CREATE TABLE foo(x integer)")
|
||||
cur.execute("INSERT INTO foo VALUES (1)")
|
||||
|
||||
cur.execute("select relfilenode from pg_class where oid = 'foo'::regclass");
|
||||
row = cur.fetchone();
|
||||
log.info(f"relfilenode is {row[0]}");
|
||||
cur.execute("select relfilenode from pg_class where oid = 'foo'::regclass")
|
||||
row = cur.fetchone()
|
||||
log.info(f"relfilenode is {row[0]}")
|
||||
|
||||
# Run GC, to clear out any garbage left behind in the catalogs by
|
||||
# the CREATE TABLE command. We want to have a clean slate with no garbage
|
||||
@@ -52,11 +59,12 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
|
||||
cur.execute("DELETE FROM foo")
|
||||
|
||||
log.info("Running GC before test")
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row);
|
||||
print_gc_result(row)
|
||||
# remember the number of files
|
||||
layer_relfiles_remain = row['layer_relfiles_total'] - row['layer_relfiles_removed']
|
||||
layer_relfiles_remain = (row['layer_relfiles_total'] -
|
||||
row['layer_relfiles_removed'])
|
||||
assert layer_relfiles_remain > 0
|
||||
|
||||
# Insert a row and run GC. Checkpoint should freeze the layer
|
||||
@@ -64,9 +72,9 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
|
||||
# removing the old image and delta layer.
|
||||
log.info("Inserting one row and running GC")
|
||||
cur.execute("INSERT INTO foo VALUES (1)")
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row);
|
||||
print_gc_result(row)
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
|
||||
assert row['layer_relfiles_removed'] == 2
|
||||
assert row['layer_relfiles_dropped'] == 0
|
||||
@@ -78,9 +86,9 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
|
||||
cur.execute("INSERT INTO foo VALUES (2)")
|
||||
cur.execute("INSERT INTO foo VALUES (3)")
|
||||
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row);
|
||||
print_gc_result(row)
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
|
||||
assert row['layer_relfiles_removed'] == 2
|
||||
assert row['layer_relfiles_dropped'] == 0
|
||||
@@ -90,18 +98,18 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
|
||||
cur.execute("INSERT INTO foo VALUES (2)")
|
||||
cur.execute("INSERT INTO foo VALUES (3)")
|
||||
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row);
|
||||
print_gc_result(row)
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
|
||||
assert row['layer_relfiles_removed'] == 2
|
||||
assert row['layer_relfiles_dropped'] == 0
|
||||
|
||||
# Run GC again, with no changes in the database. Should not remove anything.
|
||||
log.info("Run GC again, with nothing to do")
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row);
|
||||
print_gc_result(row)
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain
|
||||
assert row['layer_relfiles_removed'] == 0
|
||||
assert row['layer_relfiles_dropped'] == 0
|
||||
@@ -109,12 +117,12 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
|
||||
#
|
||||
# Test DROP TABLE checks that relation data and metadata was deleted by GC from object storage
|
||||
#
|
||||
log.info("Drop table and run GC again");
|
||||
log.info("Drop table and run GC again")
|
||||
cur.execute("DROP TABLE foo")
|
||||
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row);
|
||||
print_gc_result(row)
|
||||
|
||||
# We still cannot remove the latest layers
|
||||
# because they serve as tombstones for earlier layers.
|
||||
|
||||
@@ -2,39 +2,41 @@ from contextlib import closing
|
||||
|
||||
import pytest
|
||||
|
||||
from fixtures.zenith_fixtures import (
|
||||
TenantFactory,
|
||||
ZenithCli,
|
||||
PostgresFactory,
|
||||
)
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
|
||||
|
||||
@pytest.mark.parametrize('with_wal_acceptors', [False, True])
|
||||
def test_tenants_normal_work(
|
||||
zenith_cli: ZenithCli,
|
||||
tenant_factory: TenantFactory,
|
||||
postgres: PostgresFactory,
|
||||
wa_factory,
|
||||
with_wal_acceptors: bool,
|
||||
):
|
||||
"""Tests tenants with and without wal acceptors"""
|
||||
tenant_1 = tenant_factory.create()
|
||||
tenant_2 = tenant_factory.create()
|
||||
|
||||
zenith_cli.run(["branch", f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}", "main", f"--tenantid={tenant_1}"])
|
||||
zenith_cli.run(["branch", f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}", "main", f"--tenantid={tenant_2}"])
|
||||
def test_tenants_normal_work(zenith_env_builder: ZenithEnvBuilder, with_wal_acceptors: bool):
|
||||
if with_wal_acceptors:
|
||||
wa_factory.start_n_new(3)
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
|
||||
pg_tenant1 = postgres.create_start(
|
||||
env = zenith_env_builder.init()
|
||||
"""Tests tenants with and without wal acceptors"""
|
||||
tenant_1 = env.create_tenant()
|
||||
tenant_2 = env.create_tenant()
|
||||
|
||||
env.zenith_cli([
|
||||
"branch",
|
||||
f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
|
||||
"main",
|
||||
f"--tenantid={tenant_1}"
|
||||
])
|
||||
env.zenith_cli([
|
||||
"branch",
|
||||
f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
|
||||
"main",
|
||||
f"--tenantid={tenant_2}"
|
||||
])
|
||||
|
||||
pg_tenant1 = env.postgres.create_start(
|
||||
f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
|
||||
None, # branch name, None means same as node name
|
||||
tenant_1,
|
||||
wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None,
|
||||
)
|
||||
pg_tenant2 = postgres.create_start(
|
||||
pg_tenant2 = env.postgres.create_start(
|
||||
f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
|
||||
None, # branch name, None means same as node name
|
||||
tenant_2,
|
||||
wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None,
|
||||
)
|
||||
|
||||
for pg in [pg_tenant1, pg_tenant2]:
|
||||
@@ -45,4 +47,4 @@ def test_tenants_normal_work(
|
||||
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
cur.execute("SELECT sum(key) FROM t")
|
||||
assert cur.fetchone() == (5000050000,)
|
||||
assert cur.fetchone() == (5000050000, )
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
from contextlib import closing
|
||||
from uuid import UUID
|
||||
import psycopg2.extras
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
def test_timeline_size(
|
||||
zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin
|
||||
):
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
zenith_cli.run(["branch", "test_timeline_size", "empty"])
|
||||
|
||||
client = pageserver.http_client()
|
||||
res = client.branch_detail(UUID(pageserver.initial_tenant), "test_timeline_size")
|
||||
def test_timeline_size(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
env.zenith_cli(["branch", "test_timeline_size", "empty"])
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
res = client.branch_detail(UUID(env.initial_tenant), "test_timeline_size")
|
||||
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
|
||||
|
||||
pgmain = postgres.create_start("test_timeline_size")
|
||||
pgmain = env.postgres.create_start("test_timeline_size")
|
||||
log.info("postgres is running on 'test_timeline_size' branch")
|
||||
|
||||
with closing(pgmain.connect()) as conn:
|
||||
@@ -23,17 +23,15 @@ def test_timeline_size(
|
||||
|
||||
# Create table, and insert the first 100 rows
|
||||
cur.execute("CREATE TABLE foo (t text)")
|
||||
cur.execute(
|
||||
"""
|
||||
cur.execute("""
|
||||
INSERT INTO foo
|
||||
SELECT 'long string to consume some space' || g
|
||||
FROM generate_series(1, 10) g
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
res = client.branch_detail(UUID(pageserver.initial_tenant), "test_timeline_size")
|
||||
res = client.branch_detail(UUID(env.initial_tenant), "test_timeline_size")
|
||||
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
|
||||
cur.execute("TRUNCATE foo")
|
||||
|
||||
res = client.branch_detail(UUID(pageserver.initial_tenant), "test_timeline_size")
|
||||
res = client.branch_detail(UUID(env.initial_tenant), "test_timeline_size")
|
||||
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import os
|
||||
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, PgBin
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
@@ -9,10 +9,11 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
#
|
||||
# Test branching, when a transaction is in prepared state
|
||||
#
|
||||
def test_twophase(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin: PgBin):
|
||||
zenith_cli.run(["branch", "test_twophase", "empty"])
|
||||
def test_twophase(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli(["branch", "test_twophase", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5'])
|
||||
pg = env.postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5'])
|
||||
log.info("postgres is running on 'test_twophase' branch")
|
||||
|
||||
conn = pg.connect()
|
||||
@@ -57,10 +58,10 @@ def test_twophase(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFa
|
||||
assert len(twophase_files) == 2
|
||||
|
||||
# Create a branch with the transaction in prepared state
|
||||
zenith_cli.run(["branch", "test_twophase_prepared", "test_twophase"])
|
||||
env.zenith_cli(["branch", "test_twophase_prepared", "test_twophase"])
|
||||
|
||||
# Start compute on the new branch
|
||||
pg2 = postgres.create_start(
|
||||
pg2 = env.postgres.create_start(
|
||||
'test_twophase_prepared',
|
||||
config_lines=['max_prepared_transactions=5'],
|
||||
)
|
||||
@@ -79,8 +80,8 @@ def test_twophase(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFa
|
||||
cur2.execute("ROLLBACK PREPARED 'insert_two'")
|
||||
|
||||
cur2.execute('SELECT * FROM foo')
|
||||
assert cur2.fetchall() == [('one',), ('three',)]
|
||||
assert cur2.fetchall() == [('one', ), ('three', )]
|
||||
|
||||
# Only one committed insert is visible on the original branch
|
||||
cur.execute('SELECT * FROM foo')
|
||||
assert cur.fetchall() == [('three',)]
|
||||
assert cur.fetchall() == [('three', )]
|
||||
|
||||
@@ -1,16 +1,19 @@
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
#
|
||||
# Test that the VM bit is cleared correctly at a HEAP_DELETE and
|
||||
# HEAP_UPDATE record.
|
||||
#
|
||||
def test_vm_bit_clear(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, zenith_cli, base_dir):
|
||||
def test_vm_bit_clear(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_vm_bit_clear", "empty"])
|
||||
pg = postgres.create_start('test_vm_bit_clear')
|
||||
env.zenith_cli(["branch", "test_vm_bit_clear", "empty"])
|
||||
pg = env.postgres.create_start('test_vm_bit_clear')
|
||||
|
||||
log.info("postgres is running on 'test_vm_bit_clear' branch")
|
||||
pg_conn = pg.connect()
|
||||
@@ -33,7 +36,7 @@ def test_vm_bit_clear(pageserver: ZenithPageserver, postgres: PostgresFactory, p
|
||||
cur.execute('UPDATE vmtest_update SET id = 5000 WHERE id = 1')
|
||||
|
||||
# Branch at this point, to test that later
|
||||
zenith_cli.run(["branch", "test_vm_bit_clear_new", "test_vm_bit_clear"])
|
||||
env.zenith_cli(["branch", "test_vm_bit_clear_new", "test_vm_bit_clear"])
|
||||
|
||||
# Clear the buffer cache, to force the VM page to be re-fetched from
|
||||
# the page server
|
||||
@@ -49,20 +52,19 @@ def test_vm_bit_clear(pageserver: ZenithPageserver, postgres: PostgresFactory, p
|
||||
''')
|
||||
|
||||
cur.execute('SELECT * FROM vmtest_delete WHERE id = 1')
|
||||
assert(cur.fetchall() == []);
|
||||
assert (cur.fetchall() == [])
|
||||
cur.execute('SELECT * FROM vmtest_update WHERE id = 1')
|
||||
assert(cur.fetchall() == []);
|
||||
assert (cur.fetchall() == [])
|
||||
|
||||
cur.close()
|
||||
|
||||
|
||||
# Check the same thing on the branch that we created right after the DELETE
|
||||
#
|
||||
# As of this writing, the code in smgrwrite() creates a full-page image whenever
|
||||
# a dirty VM page is evicted. If the VM bit was not correctly cleared by the
|
||||
# earlier WAL record, the full-page image hides the problem. Starting a new
|
||||
# server at the right point-in-time avoids that full-page image.
|
||||
pg_new = postgres.create_start('test_vm_bit_clear_new')
|
||||
pg_new = env.postgres.create_start('test_vm_bit_clear_new')
|
||||
|
||||
log.info("postgres is running on 'test_vm_bit_clear_new' branch")
|
||||
pg_new_conn = pg_new.connect()
|
||||
@@ -75,6 +77,6 @@ def test_vm_bit_clear(pageserver: ZenithPageserver, postgres: PostgresFactory, p
|
||||
''')
|
||||
|
||||
cur_new.execute('SELECT * FROM vmtest_delete WHERE id = 1')
|
||||
assert(cur_new.fetchall() == []);
|
||||
assert (cur_new.fetchall() == [])
|
||||
cur_new.execute('SELECT * FROM vmtest_update WHERE id = 1')
|
||||
assert(cur_new.fetchall() == []);
|
||||
assert (cur_new.fetchall() == [])
|
||||
|
||||
@@ -7,7 +7,7 @@ import uuid
|
||||
|
||||
from contextlib import closing
|
||||
from multiprocessing import Process, Value
|
||||
from fixtures.zenith_fixtures import WalAcceptorFactory, ZenithPageserver, PostgresFactory, PgBin
|
||||
from fixtures.zenith_fixtures import PgBin, ZenithEnv, ZenithEnvBuilder
|
||||
from fixtures.utils import lsn_to_hex, mkdir_if_needed
|
||||
from fixtures.log_helper import log
|
||||
|
||||
@@ -16,11 +16,13 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
# basic test, write something in setup with wal acceptors, ensure that commits
|
||||
# succeed and data is written
|
||||
def test_normal_work(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory):
|
||||
zenith_cli.run(["branch", "test_wal_acceptors_normal_work", "empty"])
|
||||
wa_factory.start_n_new(3)
|
||||
pg = postgres.create_start('test_wal_acceptors_normal_work',
|
||||
wal_acceptors=wa_factory.get_connstrs())
|
||||
def test_normal_work(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
env.zenith_cli(["branch", "test_wal_acceptors_normal_work", "main"])
|
||||
|
||||
pg = env.postgres.create_start('test_wal_acceptors_normal_work')
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
@@ -34,18 +36,19 @@ def test_normal_work(zenith_cli, pageserver: ZenithPageserver, postgres: Postgre
|
||||
|
||||
# Run page server and multiple acceptors, and multiple compute nodes running
|
||||
# against different timelines.
|
||||
def test_many_timelines(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory):
|
||||
n_timelines = 2
|
||||
def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
wa_factory.start_n_new(3)
|
||||
n_timelines = 2
|
||||
|
||||
branches = ["test_wal_acceptors_many_timelines_{}".format(tlin) for tlin in range(n_timelines)]
|
||||
|
||||
# start postgres on each timeline
|
||||
pgs = []
|
||||
for branch in branches:
|
||||
zenith_cli.run(["branch", branch, "empty"])
|
||||
pgs.append(postgres.create_start(branch, wal_acceptors=wa_factory.get_connstrs()))
|
||||
env.zenith_cli(["branch", branch, "main"])
|
||||
pgs.append(env.postgres.create_start(branch))
|
||||
|
||||
# Do everything in different loops to have actions on different timelines
|
||||
# interleaved.
|
||||
@@ -66,16 +69,16 @@ def test_many_timelines(zenith_cli, pageserver: ZenithPageserver, postgres: Post
|
||||
# Check that dead minority doesn't prevent the commits: execute insert n_inserts
|
||||
# times, with fault_probability chance of getting a wal acceptor down or up
|
||||
# along the way. 2 of 3 are always alive, so the work keeps going.
|
||||
def test_restarts(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory: WalAcceptorFactory):
|
||||
def test_restarts(zenith_env_builder: ZenithEnvBuilder):
|
||||
fault_probability = 0.01
|
||||
n_inserts = 1000
|
||||
n_acceptors = 3
|
||||
|
||||
wa_factory.start_n_new(n_acceptors)
|
||||
zenith_env_builder.num_safekeepers = n_acceptors
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
zenith_cli.run(["branch", "test_wal_acceptors_restarts", "empty"])
|
||||
pg = postgres.create_start('test_wal_acceptors_restarts',
|
||||
wal_acceptors=wa_factory.get_connstrs())
|
||||
env.zenith_cli(["branch", "test_wal_acceptors_restarts", "main"])
|
||||
pg = env.postgres.create_start('test_wal_acceptors_restarts')
|
||||
|
||||
# we rely upon autocommit after each statement
|
||||
# as waiting for acceptors happens there
|
||||
@@ -89,7 +92,7 @@ def test_restarts(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFa
|
||||
|
||||
if random.random() <= fault_probability:
|
||||
if failed_node is None:
|
||||
failed_node = wa_factory.instances[random.randrange(0, n_acceptors)]
|
||||
failed_node = env.safekeepers[random.randrange(0, n_acceptors)]
|
||||
failed_node.stop()
|
||||
else:
|
||||
failed_node.start()
|
||||
@@ -107,12 +110,12 @@ def delayed_wal_acceptor_start(wa):
|
||||
|
||||
|
||||
# When majority of acceptors is offline, commits are expected to be frozen
|
||||
def test_unavailability(zenith_cli, postgres: PostgresFactory, wa_factory):
|
||||
wa_factory.start_n_new(2)
|
||||
def test_unavailability(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 2
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
zenith_cli.run(["branch", "test_wal_acceptors_unavailability", "empty"])
|
||||
pg = postgres.create_start('test_wal_acceptors_unavailability',
|
||||
wal_acceptors=wa_factory.get_connstrs())
|
||||
env.zenith_cli(["branch", "test_wal_acceptors_unavailability", "main"])
|
||||
pg = env.postgres.create_start('test_wal_acceptors_unavailability')
|
||||
|
||||
# we rely upon autocommit after each statement
|
||||
# as waiting for acceptors happens there
|
||||
@@ -124,9 +127,9 @@ def test_unavailability(zenith_cli, postgres: PostgresFactory, wa_factory):
|
||||
cur.execute("INSERT INTO t values (1, 'payload')")
|
||||
|
||||
# shutdown one of two acceptors, that is, majority
|
||||
wa_factory.instances[0].stop()
|
||||
env.safekeepers[0].stop()
|
||||
|
||||
proc = Process(target=delayed_wal_acceptor_start, args=(wa_factory.instances[0], ))
|
||||
proc = Process(target=delayed_wal_acceptor_start, args=(env.safekeepers[0], ))
|
||||
proc.start()
|
||||
|
||||
start = time.time()
|
||||
@@ -136,9 +139,9 @@ def test_unavailability(zenith_cli, postgres: PostgresFactory, wa_factory):
|
||||
proc.join()
|
||||
|
||||
# for the world's balance, do the same with second acceptor
|
||||
wa_factory.instances[1].stop()
|
||||
env.safekeepers[1].stop()
|
||||
|
||||
proc = Process(target=delayed_wal_acceptor_start, args=(wa_factory.instances[1], ))
|
||||
proc = Process(target=delayed_wal_acceptor_start, args=(env.safekeepers[1], ))
|
||||
proc.start()
|
||||
|
||||
start = time.time()
|
||||
@@ -177,13 +180,13 @@ def stop_value():
|
||||
|
||||
|
||||
# do inserts while concurrently getting up/down subsets of acceptors
|
||||
def test_race_conditions(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory, stop_value):
|
||||
def test_race_conditions(zenith_env_builder: ZenithEnvBuilder, stop_value):
|
||||
|
||||
wa_factory.start_n_new(3)
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
zenith_cli.run(["branch", "test_wal_acceptors_race_conditions", "empty"])
|
||||
pg = postgres.create_start('test_wal_acceptors_race_conditions',
|
||||
wal_acceptors=wa_factory.get_connstrs())
|
||||
env.zenith_cli(["branch", "test_wal_acceptors_race_conditions", "main"])
|
||||
pg = env.postgres.create_start('test_wal_acceptors_race_conditions')
|
||||
|
||||
# we rely upon autocommit after each statement
|
||||
# as waiting for acceptors happens there
|
||||
@@ -192,7 +195,7 @@ def test_race_conditions(zenith_cli, pageserver: ZenithPageserver, postgres: Pos
|
||||
|
||||
cur.execute('CREATE TABLE t(key int primary key, value text)')
|
||||
|
||||
proc = Process(target=xmas_garland, args=(wa_factory.instances, stop_value))
|
||||
proc = Process(target=xmas_garland, args=(env.safekeepers, stop_value))
|
||||
proc.start()
|
||||
|
||||
for i in range(1000):
|
||||
@@ -207,7 +210,8 @@ def test_race_conditions(zenith_cli, pageserver: ZenithPageserver, postgres: Pos
|
||||
|
||||
class ProposerPostgres:
|
||||
"""Object for running safekeepers sync with walproposer"""
|
||||
def __init__(self, pgdata_dir: str, pg_bin: PgBin, timeline_id: str, tenant_id: str):
|
||||
def __init__(self, env: ZenithEnv, pgdata_dir: str, pg_bin, timeline_id: str, tenant_id: str):
|
||||
self.env = env
|
||||
self.pgdata_dir: str = pgdata_dir
|
||||
self.pg_bin: PgBin = pg_bin
|
||||
self.timeline_id: str = timeline_id
|
||||
@@ -253,16 +257,20 @@ class ProposerPostgres:
|
||||
|
||||
|
||||
# insert wal in all safekeepers and run sync on proposer
|
||||
def test_sync_safekeepers(repo_dir: str, pg_bin: PgBin, wa_factory: WalAcceptorFactory):
|
||||
wa_factory.start_n_new(3)
|
||||
def test_sync_safekeepers(zenith_env_builder: ZenithEnvBuilder, pg_bin: PgBin):
|
||||
|
||||
# We don't really need the full environment for this test, just the
|
||||
# safekeepers would be enough.
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
timeline_id = uuid.uuid4().hex
|
||||
tenant_id = uuid.uuid4().hex
|
||||
|
||||
# write config for proposer
|
||||
pgdata_dir = os.path.join(repo_dir, "proposer_pgdata")
|
||||
pg = ProposerPostgres(pgdata_dir, pg_bin, timeline_id, tenant_id)
|
||||
pg.create_dir_config(wa_factory.get_connstrs())
|
||||
pgdata_dir = os.path.join(env.repo_dir, "proposer_pgdata")
|
||||
pg = ProposerPostgres(env, pgdata_dir, pg_bin, timeline_id, tenant_id)
|
||||
pg.create_dir_config(env.get_safekeeper_connstrs())
|
||||
|
||||
# valid lsn, which is not in the segment start, nor in zero segment
|
||||
epoch_start_lsn = 0x16B9188 # 0/16B9188
|
||||
@@ -271,7 +279,7 @@ def test_sync_safekeepers(repo_dir: str, pg_bin: PgBin, wa_factory: WalAcceptorF
|
||||
# append and commit WAL
|
||||
lsn_after_append = []
|
||||
for i in range(3):
|
||||
res = wa_factory.instances[i].append_logical_message(
|
||||
res = env.safekeepers[i].append_logical_message(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
{
|
||||
@@ -295,13 +303,15 @@ def test_sync_safekeepers(repo_dir: str, pg_bin: PgBin, wa_factory: WalAcceptorF
|
||||
assert all(lsn_after_sync == lsn for lsn in lsn_after_append)
|
||||
|
||||
|
||||
def test_timeline_status(zenith_cli, pageserver, postgres, wa_factory: WalAcceptorFactory):
|
||||
wa_factory.start_n_new(1)
|
||||
def test_timeline_status(zenith_env_builder: ZenithEnvBuilder):
|
||||
|
||||
zenith_cli.run(["branch", "test_timeline_status", "empty"])
|
||||
pg = postgres.create_start('test_timeline_status', wal_acceptors=wa_factory.get_connstrs())
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
wa = wa_factory.instances[0]
|
||||
env.zenith_cli(["branch", "test_timeline_status", "main"])
|
||||
pg = env.postgres.create_start('test_timeline_status')
|
||||
|
||||
wa = env.safekeepers[0]
|
||||
wa_http_cli = wa.http_client()
|
||||
wa_http_cli.check_status()
|
||||
|
||||
@@ -318,6 +328,5 @@ def test_timeline_status(zenith_cli, pageserver, postgres, wa_factory: WalAccept
|
||||
pg.stop().start()
|
||||
pg.safe_psql("insert into t values(10)")
|
||||
|
||||
epoch_after_reboot = wa_http_cli.timeline_status(tenant_id,
|
||||
timeline_id).acceptor_epoch
|
||||
epoch_after_reboot = wa_http_cli.timeline_status(tenant_id, timeline_id).acceptor_epoch
|
||||
assert epoch_after_reboot > epoch
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import asyncio
|
||||
import asyncpg
|
||||
import random
|
||||
import time
|
||||
|
||||
from fixtures.zenith_fixtures import WalAcceptor, WalAcceptorFactory, ZenithPageserver, PostgresFactory, Postgres
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder, Postgres, Safekeeper
|
||||
from fixtures.log_helper import getLogger
|
||||
from fixtures.utils import lsn_from_hex, lsn_to_hex
|
||||
from typing import List
|
||||
|
||||
log = getLogger('root.wal_acceptor_async')
|
||||
@@ -19,13 +21,16 @@ class BankClient(object):
|
||||
async def initdb(self):
|
||||
await self.conn.execute('DROP TABLE IF EXISTS bank_accs')
|
||||
await self.conn.execute('CREATE TABLE bank_accs(uid int primary key, amount int)')
|
||||
await self.conn.execute('''
|
||||
await self.conn.execute(
|
||||
'''
|
||||
INSERT INTO bank_accs
|
||||
SELECT *, $1 FROM generate_series(0, $2)
|
||||
''', self.init_amount, self.n_accounts - 1)
|
||||
''',
|
||||
self.init_amount,
|
||||
self.n_accounts - 1)
|
||||
await self.conn.execute('DROP TABLE IF EXISTS bank_log')
|
||||
await self.conn.execute('CREATE TABLE bank_log(from_uid int, to_uid int, amount int)')
|
||||
|
||||
|
||||
# TODO: Remove when https://github.com/zenithdb/zenith/issues/644 is fixed
|
||||
await self.conn.execute('ALTER TABLE bank_accs SET (autovacuum_enabled = false)')
|
||||
await self.conn.execute('ALTER TABLE bank_log SET (autovacuum_enabled = false)')
|
||||
@@ -34,6 +39,7 @@ class BankClient(object):
|
||||
row = await self.conn.fetchrow('SELECT sum(amount) AS sum FROM bank_accs')
|
||||
assert row['sum'] == self.n_accounts * self.init_amount
|
||||
|
||||
|
||||
async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount):
|
||||
# avoid deadlocks by sorting uids
|
||||
if from_uid > to_uid:
|
||||
@@ -42,16 +48,22 @@ async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount):
|
||||
async with conn.transaction():
|
||||
await conn.execute(
|
||||
'UPDATE bank_accs SET amount = amount + ($1) WHERE uid = $2',
|
||||
amount, to_uid,
|
||||
amount,
|
||||
to_uid,
|
||||
)
|
||||
await conn.execute(
|
||||
'UPDATE bank_accs SET amount = amount - ($1) WHERE uid = $2',
|
||||
amount, from_uid,
|
||||
amount,
|
||||
from_uid,
|
||||
)
|
||||
await conn.execute('INSERT INTO bank_log VALUES ($1, $2, $3)',
|
||||
from_uid, to_uid, amount,
|
||||
await conn.execute(
|
||||
'INSERT INTO bank_log VALUES ($1, $2, $3)',
|
||||
from_uid,
|
||||
to_uid,
|
||||
amount,
|
||||
)
|
||||
|
||||
|
||||
class WorkerStats(object):
|
||||
def __init__(self, n_workers):
|
||||
self.counters = [0] * n_workers
|
||||
@@ -92,11 +104,43 @@ async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accou
|
||||
await pg_conn.close()
|
||||
|
||||
|
||||
async def wait_for_lsn(safekeeper: Safekeeper,
|
||||
tenant_id: str,
|
||||
timeline_id: str,
|
||||
wait_lsn: str,
|
||||
polling_interval=1,
|
||||
timeout=600):
|
||||
"""
|
||||
Poll flush_lsn from safekeeper until it's greater or equal than
|
||||
provided wait_lsn. To do that, timeline_status is fetched from
|
||||
safekeeper every polling_interval seconds.
|
||||
"""
|
||||
|
||||
started_at = time.time()
|
||||
client = safekeeper.http_client()
|
||||
|
||||
flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn
|
||||
log.info(
|
||||
f'Safekeeper at port {safekeeper.port.pg} has flush_lsn {flush_lsn}, waiting for lsn {wait_lsn}'
|
||||
)
|
||||
|
||||
while lsn_from_hex(wait_lsn) > lsn_from_hex(flush_lsn):
|
||||
elapsed = time.time() - started_at
|
||||
if elapsed > timeout:
|
||||
raise RuntimeError(
|
||||
f"timed out waiting for safekeeper at port {safekeeper.port.pg} to reach {wait_lsn}, current lsn is {flush_lsn}"
|
||||
)
|
||||
|
||||
await asyncio.sleep(polling_interval)
|
||||
flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn
|
||||
log.debug(f'safekeeper port={safekeeper.port.pg} flush_lsn={flush_lsn} wait_lsn={wait_lsn}')
|
||||
|
||||
|
||||
# This test will run several iterations and check progress in each of them.
|
||||
# On each iteration 1 acceptor is stopped, and 2 others should allow
|
||||
# background workers execute transactions. In the end, state should remain
|
||||
# consistent.
|
||||
async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_workers=10):
|
||||
async def run_restarts_under_load(pg: Postgres, acceptors: List[Safekeeper], n_workers=10):
|
||||
n_accounts = 100
|
||||
init_amount = 100000
|
||||
max_transfer = 100
|
||||
@@ -104,6 +148,9 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_
|
||||
iterations = 6
|
||||
|
||||
pg_conn = await pg.connect_async()
|
||||
tenant_id = await pg_conn.fetchval("show zenith.zenith_tenant")
|
||||
timeline_id = await pg_conn.fetchval("show zenith.zenith_timeline")
|
||||
|
||||
bank = BankClient(pg_conn, n_accounts=n_accounts, init_amount=init_amount)
|
||||
# create tables and initial balances
|
||||
await bank.initdb()
|
||||
@@ -114,19 +161,19 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_
|
||||
worker = run_random_worker(stats, pg, worker_id, bank.n_accounts, max_transfer)
|
||||
workers.append(asyncio.create_task(worker))
|
||||
|
||||
|
||||
for it in range(iterations):
|
||||
victim = acceptors[it % len(acceptors)]
|
||||
victim_idx = it % len(acceptors)
|
||||
victim = acceptors[victim_idx]
|
||||
victim.stop()
|
||||
|
||||
# Wait till previous victim recovers so it is ready for the next
|
||||
# iteration by making any writing xact.
|
||||
conn = await pg.connect_async()
|
||||
await conn.execute(
|
||||
'UPDATE bank_accs SET amount = amount WHERE uid = 1',
|
||||
timeout=120
|
||||
)
|
||||
await conn.close()
|
||||
flush_lsn = await pg_conn.fetchval('SELECT pg_current_wal_flush_lsn()')
|
||||
flush_lsn = lsn_to_hex(flush_lsn)
|
||||
log.info(f'Postgres flush_lsn {flush_lsn}')
|
||||
|
||||
# Wait until alive safekeepers catch up with postgres
|
||||
for idx, safekeeper in enumerate(acceptors):
|
||||
if idx != victim_idx:
|
||||
await wait_for_lsn(safekeeper, tenant_id, timeline_id, flush_lsn)
|
||||
|
||||
stats.reset()
|
||||
await asyncio.sleep(period_time)
|
||||
@@ -145,16 +192,14 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_
|
||||
|
||||
|
||||
# restart acceptors one by one, while executing and validating bank transactions
|
||||
def test_restarts_under_load(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory,
|
||||
wa_factory: WalAcceptorFactory):
|
||||
def test_restarts_under_load(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
wa_factory.start_n_new(3)
|
||||
env.zenith_cli(["branch", "test_wal_acceptors_restarts_under_load", "main"])
|
||||
pg = env.postgres.create_start('test_wal_acceptors_restarts_under_load')
|
||||
|
||||
zenith_cli.run(["branch", "test_wal_acceptors_restarts_under_load", "empty"])
|
||||
pg = postgres.create_start('test_wal_acceptors_restarts_under_load',
|
||||
wal_acceptors=wa_factory.get_connstrs())
|
||||
|
||||
asyncio.run(run_restarts_under_load(pg, wa_factory.instances))
|
||||
asyncio.run(run_restarts_under_load(pg, env.safekeepers))
|
||||
|
||||
# TODO: Remove when https://github.com/zenithdb/zenith/issues/644 is fixed
|
||||
pg.stop()
|
||||
|
||||
@@ -1,97 +1,107 @@
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from fixtures.zenith_fixtures import ZenithCli, ZenithPageserver
|
||||
from psycopg2.extensions import cursor as PgCursor
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from typing import cast
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def helper_compare_branch_list(page_server_cur, zenith_cli, initial_tenant: str):
|
||||
def helper_compare_branch_list(page_server_cur: PgCursor, env: ZenithEnv, initial_tenant: str):
|
||||
"""
|
||||
Compare branches list returned by CLI and directly via API.
|
||||
Filters out branches created by other tests.
|
||||
"""
|
||||
|
||||
page_server_cur.execute(f'branch_list {initial_tenant}')
|
||||
branches_api = sorted(map(lambda b: b['name'], json.loads(page_server_cur.fetchone()[0])))
|
||||
branches_api = sorted(
|
||||
map(lambda b: cast(str, b['name']), json.loads(page_server_cur.fetchone()[0])))
|
||||
branches_api = [b for b in branches_api if b.startswith('test_cli_') or b in ('empty', 'main')]
|
||||
|
||||
res = zenith_cli.run(["branch"])
|
||||
res = env.zenith_cli(["branch"])
|
||||
res.check_returncode()
|
||||
branches_cli = sorted(map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
|
||||
branches_cli = [b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main')]
|
||||
|
||||
res = zenith_cli.run(["branch", f"--tenantid={initial_tenant}"])
|
||||
res = env.zenith_cli(["branch", f"--tenantid={initial_tenant}"])
|
||||
res.check_returncode()
|
||||
branches_cli_with_tenant_arg = sorted(map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
|
||||
branches_cli_with_tenant_arg = [b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main')]
|
||||
branches_cli_with_tenant_arg = sorted(
|
||||
map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
|
||||
branches_cli_with_tenant_arg = [
|
||||
b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main')
|
||||
]
|
||||
|
||||
assert branches_api == branches_cli == branches_cli_with_tenant_arg
|
||||
|
||||
|
||||
def test_cli_branch_list(pageserver: ZenithPageserver, zenith_cli):
|
||||
page_server_conn = pageserver.connect()
|
||||
def test_cli_branch_list(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
page_server_conn = env.pageserver.connect()
|
||||
page_server_cur = page_server_conn.cursor()
|
||||
|
||||
# Initial sanity check
|
||||
helper_compare_branch_list(page_server_cur, zenith_cli, pageserver.initial_tenant)
|
||||
helper_compare_branch_list(page_server_cur, env, env.initial_tenant)
|
||||
|
||||
# Create a branch for us
|
||||
res = zenith_cli.run(["branch", "test_cli_branch_list_main", "main"])
|
||||
res = env.zenith_cli(["branch", "test_cli_branch_list_main", "empty"])
|
||||
assert res.stderr == ''
|
||||
helper_compare_branch_list(page_server_cur, zenith_cli, pageserver.initial_tenant)
|
||||
helper_compare_branch_list(page_server_cur, env, env.initial_tenant)
|
||||
|
||||
# Create a nested branch
|
||||
res = zenith_cli.run(["branch", "test_cli_branch_list_nested", "test_cli_branch_list_main"])
|
||||
res = env.zenith_cli(["branch", "test_cli_branch_list_nested", "test_cli_branch_list_main"])
|
||||
assert res.stderr == ''
|
||||
helper_compare_branch_list(page_server_cur, zenith_cli, pageserver.initial_tenant)
|
||||
helper_compare_branch_list(page_server_cur, env, env.initial_tenant)
|
||||
|
||||
# Check that all new branches are visible via CLI
|
||||
res = zenith_cli.run(["branch"])
|
||||
res = env.zenith_cli(["branch"])
|
||||
assert res.stderr == ''
|
||||
branches_cli = sorted(map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
|
||||
|
||||
assert 'test_cli_branch_list_main' in branches_cli
|
||||
assert 'test_cli_branch_list_nested' in branches_cli
|
||||
|
||||
def helper_compare_tenant_list(page_server_cur, zenith_cli: ZenithCli):
|
||||
page_server_cur.execute(f'tenant_list')
|
||||
tenants_api = sorted(json.loads(page_server_cur.fetchone()[0]))
|
||||
|
||||
res = zenith_cli.run(["tenant", "list"])
|
||||
def helper_compare_tenant_list(page_server_cur: PgCursor, env: ZenithEnv):
|
||||
page_server_cur.execute(f'tenant_list')
|
||||
tenants_api = sorted(
|
||||
map(lambda t: cast(str, t['id']), json.loads(page_server_cur.fetchone()[0])))
|
||||
|
||||
res = env.zenith_cli(["tenant", "list"])
|
||||
assert res.stderr == ''
|
||||
tenants_cli = sorted(res.stdout.splitlines())
|
||||
tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
|
||||
|
||||
assert tenants_api == tenants_cli
|
||||
|
||||
|
||||
def test_cli_tenant_list(pageserver: ZenithPageserver, zenith_cli: ZenithCli):
|
||||
page_server_conn = pageserver.connect()
|
||||
def test_cli_tenant_list(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
page_server_conn = env.pageserver.connect()
|
||||
page_server_cur = page_server_conn.cursor()
|
||||
|
||||
# Initial sanity check
|
||||
helper_compare_tenant_list(page_server_cur, zenith_cli)
|
||||
helper_compare_tenant_list(page_server_cur, env)
|
||||
|
||||
# Create new tenant
|
||||
tenant1 = uuid.uuid4().hex
|
||||
res = zenith_cli.run(["tenant", "create", tenant1])
|
||||
res = env.zenith_cli(["tenant", "create", tenant1])
|
||||
res.check_returncode()
|
||||
|
||||
# check tenant1 appeared
|
||||
helper_compare_tenant_list(page_server_cur, zenith_cli)
|
||||
helper_compare_tenant_list(page_server_cur, env)
|
||||
|
||||
# Create new tenant
|
||||
tenant2 = uuid.uuid4().hex
|
||||
res = zenith_cli.run(["tenant", "create", tenant2])
|
||||
res = env.zenith_cli(["tenant", "create", tenant2])
|
||||
res.check_returncode()
|
||||
|
||||
# check tenant2 appeared
|
||||
helper_compare_tenant_list(page_server_cur, zenith_cli)
|
||||
helper_compare_tenant_list(page_server_cur, env)
|
||||
|
||||
res = zenith_cli.run(["tenant", "list"])
|
||||
res = env.zenith_cli(["tenant", "list"])
|
||||
res.check_returncode()
|
||||
tenants = sorted(res.stdout.splitlines())
|
||||
tenants = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
|
||||
|
||||
assert pageserver.initial_tenant in tenants
|
||||
assert env.initial_tenant in tenants
|
||||
assert tenant1 in tenants
|
||||
assert tenant2 in tenants
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
import os
|
||||
|
||||
from fixtures.utils import mkdir_if_needed
|
||||
from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory
|
||||
from fixtures.zenith_fixtures import ZenithEnv, base_dir, pg_distrib_dir
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def test_isolation(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir,
|
||||
base_dir, capsys):
|
||||
def test_isolation(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys):
|
||||
env = zenith_simple_env
|
||||
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_isolation", "empty"])
|
||||
env.zenith_cli(["branch", "test_isolation", "empty"])
|
||||
|
||||
# Connect to postgres and create a database called "regression".
|
||||
# isolation tests use prepared transactions, so enable them
|
||||
pg = postgres.create_start('test_isolation', config_lines=['max_prepared_transactions=100'])
|
||||
pg = env.postgres.create_start('test_isolation', config_lines=['max_prepared_transactions=100'])
|
||||
pg.safe_psql('CREATE DATABASE isolation_regression')
|
||||
|
||||
# Create some local directories for pg_isolation_regress to run in.
|
||||
@@ -38,7 +38,7 @@ def test_isolation(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_b
|
||||
'--schedule={}'.format(schedule),
|
||||
]
|
||||
|
||||
env = {
|
||||
env_vars = {
|
||||
'PGPORT': str(pg.port),
|
||||
'PGUSER': pg.username,
|
||||
'PGHOST': pg.host,
|
||||
@@ -48,4 +48,4 @@ def test_isolation(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_b
|
||||
# We don't capture the output. It's not too chatty, and it always
|
||||
# logs the exact same data to `regression.out` anyway.
|
||||
with capsys.disabled():
|
||||
pg_bin.run(pg_isolation_regress_command, env=env, cwd=runpath)
|
||||
pg_bin.run(pg_isolation_regress_command, env=env_vars, cwd=runpath)
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
import os
|
||||
|
||||
from fixtures.utils import mkdir_if_needed
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, check_restored_datadir_content
|
||||
from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content, base_dir, pg_distrib_dir
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def test_pg_regress(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir,
|
||||
base_dir, capsys):
|
||||
def test_pg_regress(zenith_simple_env: ZenithEnv, test_output_dir: str, pg_bin, capsys):
|
||||
env = zenith_simple_env
|
||||
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_pg_regress", "empty"])
|
||||
env.zenith_cli(["branch", "test_pg_regress", "empty"])
|
||||
|
||||
# Connect to postgres and create a database called "regression".
|
||||
pg = postgres.create_start('test_pg_regress')
|
||||
pg = env.postgres.create_start('test_pg_regress')
|
||||
pg.safe_psql('CREATE DATABASE regression')
|
||||
|
||||
# Create some local directories for pg_regress to run in.
|
||||
@@ -38,7 +38,7 @@ def test_pg_regress(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_
|
||||
'--inputdir={}'.format(src_path),
|
||||
]
|
||||
|
||||
env = {
|
||||
env_vars = {
|
||||
'PGPORT': str(pg.port),
|
||||
'PGUSER': pg.username,
|
||||
'PGHOST': pg.host,
|
||||
@@ -48,11 +48,11 @@ def test_pg_regress(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_
|
||||
# We don't capture the output. It's not too chatty, and it always
|
||||
# logs the exact same data to `regression.out` anyway.
|
||||
with capsys.disabled():
|
||||
pg_bin.run(pg_regress_command, env=env, cwd=runpath)
|
||||
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)
|
||||
|
||||
# checkpoint one more time to ensure that the lsn we get is the latest one
|
||||
pg.safe_psql('CHECKPOINT')
|
||||
lsn = pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0]
|
||||
|
||||
# Check that we restore the content of the datadir correctly
|
||||
check_restored_datadir_content(zenith_cli, test_output_dir, pg, pageserver.service_port.pg)
|
||||
check_restored_datadir_content(test_output_dir, env, pg)
|
||||
|
||||
@@ -1,20 +1,23 @@
|
||||
import os
|
||||
|
||||
from fixtures.utils import mkdir_if_needed
|
||||
from fixtures.zenith_fixtures import PageserverPort, PostgresFactory, check_restored_datadir_content
|
||||
from fixtures.zenith_fixtures import (ZenithEnv,
|
||||
check_restored_datadir_content,
|
||||
base_dir,
|
||||
pg_distrib_dir)
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def test_zenith_regress(postgres: PostgresFactory, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir,
|
||||
base_dir, capsys, pageserver_port: PageserverPort):
|
||||
def test_zenith_regress(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys):
|
||||
env = zenith_simple_env
|
||||
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_zenith_regress", "empty"])
|
||||
env.zenith_cli(["branch", "test_zenith_regress", "empty"])
|
||||
|
||||
# Connect to postgres and create a database called "regression".
|
||||
pg = postgres.create_start('test_zenith_regress')
|
||||
pg = env.postgres.create_start('test_zenith_regress')
|
||||
pg.safe_psql('CREATE DATABASE regression')
|
||||
|
||||
# Create some local directories for pg_regress to run in.
|
||||
@@ -40,7 +43,7 @@ def test_zenith_regress(postgres: PostgresFactory, pg_bin, zenith_cli, test_outp
|
||||
]
|
||||
|
||||
log.info(pg_regress_command)
|
||||
env = {
|
||||
env_vars = {
|
||||
'PGPORT': str(pg.port),
|
||||
'PGUSER': pg.username,
|
||||
'PGHOST': pg.host,
|
||||
@@ -50,11 +53,11 @@ def test_zenith_regress(postgres: PostgresFactory, pg_bin, zenith_cli, test_outp
|
||||
# We don't capture the output. It's not too chatty, and it always
|
||||
# logs the exact same data to `regression.out` anyway.
|
||||
with capsys.disabled():
|
||||
pg_bin.run(pg_regress_command, env=env, cwd=runpath)
|
||||
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)
|
||||
|
||||
# checkpoint one more time to ensure that the lsn we get is the latest one
|
||||
pg.safe_psql('CHECKPOINT')
|
||||
lsn = pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0]
|
||||
|
||||
# Check that we restore the content of the datadir correctly
|
||||
check_restored_datadir_content(zenith_cli, test_output_dir, pg, pageserver_port.pg)
|
||||
check_restored_datadir_content(test_output_dir, env, pg)
|
||||
|
||||
@@ -24,7 +24,6 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast
|
||||
from typing_extensions import Literal
|
||||
|
||||
from .utils import (get_self_dir, mkdir_if_needed, subprocess_capture)
|
||||
|
||||
"""
|
||||
This file contains fixtures for micro-benchmarks.
|
||||
|
||||
@@ -32,11 +31,11 @@ To use, declare the 'zenbenchmark' fixture in the test function. Run the
|
||||
bencmark, and then record the result by calling zenbenchmark.record. For example:
|
||||
|
||||
import timeit
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
|
||||
def test_mybench(postgres: PostgresFactory, pageserver: ZenithPageserver, zenbenchmark):
|
||||
def test_mybench(zenith_simple_env: env, zenbenchmark):
|
||||
|
||||
# Initialize the test
|
||||
...
|
||||
@@ -56,15 +55,9 @@ in the test initialization, or measure disk usage after the test query.
|
||||
"""
|
||||
|
||||
|
||||
# All the results are collected in this list, as a tuple:
|
||||
# (test_name: str, metric_name: str, metric_value: float, unit: str)
|
||||
#
|
||||
# TODO: It would perhaps be better to store the results as additional
|
||||
# properties in the pytest TestReport objects, to make them visible to
|
||||
# other pytest tools.
|
||||
global zenbenchmark_results
|
||||
zenbenchmark_results = []
|
||||
|
||||
class ZenithBenchmarkResults:
|
||||
""" An object for recording benchmark results. """
|
||||
def __init__(self):
|
||||
@@ -77,6 +70,11 @@ class ZenithBenchmarkResults:
|
||||
|
||||
self.results.append((test_name, metric_name, metric_value, unit))
|
||||
|
||||
|
||||
# Will be recreated in each session.
|
||||
zenbenchmark_results: ZenithBenchmarkResults = ZenithBenchmarkResults()
|
||||
|
||||
|
||||
# Session scope fixture that initializes the results object
|
||||
@pytest.fixture(autouse=True, scope='session')
|
||||
def zenbenchmark_global(request) -> Iterator[ZenithBenchmarkResults]:
|
||||
@@ -88,6 +86,7 @@ def zenbenchmark_global(request) -> Iterator[ZenithBenchmarkResults]:
|
||||
|
||||
yield zenbenchmark_results
|
||||
|
||||
|
||||
class ZenithBenchmarker:
|
||||
"""
|
||||
An object for recording benchmark results. This is created for each test
|
||||
@@ -103,7 +102,6 @@ class ZenithBenchmarker:
|
||||
"""
|
||||
self.results.record(self.request.node.name, metric_name, metric_value, unit)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def record_duration(self, metric_name):
|
||||
"""
|
||||
@@ -134,8 +132,10 @@ class ZenithBenchmarker:
|
||||
# The metric should be an integer, as it's a number of bytes. But in general
|
||||
# all prometheus metrics are floats. So to be pedantic, read it as a float
|
||||
# and round to integer.
|
||||
matches = re.search(r'^pageserver_disk_io_bytes{io_operation="write"} (\S+)$', all_metrics,
|
||||
matches = re.search(r'^pageserver_disk_io_bytes{io_operation="write"} (\S+)$',
|
||||
all_metrics,
|
||||
re.MULTILINE)
|
||||
assert matches
|
||||
return int(round(float(matches.group(1))))
|
||||
|
||||
def get_peak_mem(self, pageserver) -> int:
|
||||
@@ -145,8 +145,8 @@ class ZenithBenchmarker:
|
||||
# Fetch all the exposed prometheus metrics from page server
|
||||
all_metrics = pageserver.http_client().get_metrics()
|
||||
# See comment in get_io_writes()
|
||||
matches = re.search(r'^pageserver_maxrss_kb (\S+)$', all_metrics,
|
||||
re.MULTILINE)
|
||||
matches = re.search(r'^pageserver_maxrss_kb (\S+)$', all_metrics, re.MULTILINE)
|
||||
assert matches
|
||||
return int(round(float(matches.group(1))))
|
||||
|
||||
def get_timeline_size(self, repo_dir: str, tenantid: str, timelineid: str):
|
||||
@@ -171,7 +171,11 @@ class ZenithBenchmarker:
|
||||
yield
|
||||
after = self.get_io_writes(pageserver)
|
||||
|
||||
self.results.record(self.request.node.name, metric_name, round((after - before) / (1024 * 1024)), 'MB')
|
||||
self.results.record(self.request.node.name,
|
||||
metric_name,
|
||||
round((after - before) / (1024 * 1024)),
|
||||
'MB')
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def zenbenchmark(zenbenchmark_global, request) -> Iterator[ZenithBenchmarker]:
|
||||
@@ -185,9 +189,7 @@ def zenbenchmark(zenbenchmark_global, request) -> Iterator[ZenithBenchmarker]:
|
||||
|
||||
# Hook to print the results at the end
|
||||
@pytest.hookimpl(hookwrapper=True)
|
||||
def pytest_terminal_summary(
|
||||
terminalreporter: TerminalReporter, exitstatus: int, config: Config
|
||||
):
|
||||
def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int, config: Config):
|
||||
yield
|
||||
|
||||
global zenbenchmark_results
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import logging
|
||||
import logging.config
|
||||
|
||||
"""
|
||||
This file configures logging to use in python tests.
|
||||
Logs are automatically captured and shown in their
|
||||
@@ -27,17 +26,19 @@ LOGGING = {
|
||||
"level": "INFO"
|
||||
},
|
||||
"root.wal_acceptor_async": {
|
||||
"level": "INFO" # a lot of logs on DEBUG level
|
||||
"level": "INFO" # a lot of logs on DEBUG level
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def getLogger(name='root') -> logging.Logger:
|
||||
"""Method to get logger for tests.
|
||||
|
||||
Should be used to get correctly initialized logger. """
|
||||
return logging.getLogger(name)
|
||||
|
||||
|
||||
# default logger for tests
|
||||
log = getLogger()
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import subprocess
|
||||
from typing import Any, List
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
def get_self_dir() -> str:
|
||||
""" Get the path to the directory where this script lives. """
|
||||
return os.path.dirname(os.path.abspath(__file__))
|
||||
@@ -58,6 +59,13 @@ def global_counter() -> int:
|
||||
_global_counter += 1
|
||||
return _global_counter
|
||||
|
||||
|
||||
def lsn_to_hex(num: int) -> str:
|
||||
""" Convert lsn from int to standard hex notation. """
|
||||
return "{:X}/{:X}".format(num >> 32, num & 0xffffffff)
|
||||
|
||||
|
||||
def lsn_from_hex(lsn_hex: str) -> int:
|
||||
""" Convert lsn from hex notation to int. """
|
||||
l, r = lsn_hex.split('/')
|
||||
return (int(l, 16) << 32) + int(r, 16)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,8 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
while true; do
|
||||
echo -n "==== CURRENT TIME:" >> /tmp/test_output/netstat.stdout
|
||||
date +"%T.%N" >> /tmp/test_output/netstat.stdout
|
||||
sudo netstat -vpnoa | grep tcp | sort >> /tmp/test_output/netstat.stdout
|
||||
sleep 0.5
|
||||
done
|
||||
@@ -1,10 +1,11 @@
|
||||
import os
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
|
||||
|
||||
#
|
||||
# Run bulk INSERT test.
|
||||
#
|
||||
@@ -15,16 +16,17 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
# 3. Disk space used
|
||||
# 4. Peak memory usage
|
||||
#
|
||||
def test_bulk_insert(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str):
|
||||
def test_bulk_insert(zenith_simple_env: ZenithEnv, zenbenchmark):
|
||||
env = zenith_simple_env
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_bulk_insert", "empty"])
|
||||
env.zenith_cli(["branch", "test_bulk_insert", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_bulk_insert')
|
||||
pg = env.postgres.create_start('test_bulk_insert')
|
||||
log.info("postgres is running on 'test_bulk_insert' branch")
|
||||
|
||||
# Open a connection directly to the page server that we'll use to force
|
||||
# flushing the layers to disk
|
||||
psconn = pageserver.connect();
|
||||
psconn = env.pageserver.connect()
|
||||
pscur = psconn.cursor()
|
||||
|
||||
# Get the timeline ID of our branch. We need it for the 'do_gc' command
|
||||
@@ -36,17 +38,19 @@ def test_bulk_insert(postgres: PostgresFactory, pageserver: ZenithPageserver, pg
|
||||
cur.execute("create table huge (i int, j int);")
|
||||
|
||||
# Run INSERT, recording the time and I/O it takes
|
||||
with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'):
|
||||
with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
|
||||
with zenbenchmark.record_duration('insert'):
|
||||
cur.execute("insert into huge values (generate_series(1, 5000000), 0);")
|
||||
|
||||
# Flush the layers from memory to disk. This is included in the reported
|
||||
# time and I/O
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
|
||||
# Record peak memory usage
|
||||
zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(pageserver) / 1024, 'MB')
|
||||
zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(env.pageserver) / 1024, 'MB')
|
||||
|
||||
# Report disk space used by the repository
|
||||
timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024*1024), 'MB')
|
||||
timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
|
||||
env.initial_tenant,
|
||||
timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
|
||||
|
||||
@@ -1,11 +1,7 @@
|
||||
import timeit
|
||||
import pytest
|
||||
|
||||
from fixtures.zenith_fixtures import (
|
||||
TenantFactory,
|
||||
ZenithCli,
|
||||
PostgresFactory,
|
||||
)
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
|
||||
pytest_plugins = ("fixtures.benchmark_fixture")
|
||||
|
||||
@@ -20,34 +16,37 @@ pytest_plugins = ("fixtures.benchmark_fixture")
|
||||
@pytest.mark.parametrize('tenants_count', [1, 5, 10])
|
||||
@pytest.mark.parametrize('use_wal_acceptors', ['with_wa', 'without_wa'])
|
||||
def test_bulk_tenant_create(
|
||||
zenith_cli: ZenithCli,
|
||||
tenant_factory: TenantFactory,
|
||||
postgres: PostgresFactory,
|
||||
wa_factory,
|
||||
zenith_env_builder: ZenithEnvBuilder,
|
||||
use_wal_acceptors: str,
|
||||
tenants_count: int,
|
||||
zenbenchmark,
|
||||
):
|
||||
"""Measure tenant creation time (with and without wal acceptors)"""
|
||||
if use_wal_acceptors == 'with_wa':
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
time_slices = []
|
||||
|
||||
for i in range(tenants_count):
|
||||
start = timeit.default_timer()
|
||||
|
||||
tenant = tenant_factory.create()
|
||||
zenith_cli.run([
|
||||
"branch", f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}", "main",
|
||||
tenant = env.create_tenant()
|
||||
env.zenith_cli([
|
||||
"branch",
|
||||
f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}",
|
||||
"main",
|
||||
f"--tenantid={tenant}"
|
||||
])
|
||||
|
||||
if use_wal_acceptors == 'with_wa':
|
||||
wa_factory.start_n_new(3)
|
||||
# FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now?
|
||||
#if use_wal_acceptors == 'with_wa':
|
||||
# wa_factory.start_n_new(3)
|
||||
|
||||
pg_tenant = postgres.create_start(
|
||||
pg_tenant = env.postgres.create_start(
|
||||
f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}",
|
||||
None, # branch name, None means same as node name
|
||||
tenant,
|
||||
wal_acceptors=wa_factory.get_connstrs() if use_wal_acceptors == 'with_wa' else None,
|
||||
)
|
||||
|
||||
end = timeit.default_timer()
|
||||
|
||||
@@ -1,25 +1,27 @@
|
||||
import os
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
|
||||
|
||||
#
|
||||
# Test buffering GisT build. It WAL-logs the whole relation, in 32-page chunks.
|
||||
# As of this writing, we're duplicate those giant WAL records for each page,
|
||||
# which makes the delta layer about 32x larger than it needs to be.
|
||||
#
|
||||
def test_gist_buffering_build(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str):
|
||||
def test_gist_buffering_build(zenith_simple_env: ZenithEnv, zenbenchmark):
|
||||
env = zenith_simple_env
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_gist_buffering_build", "empty"])
|
||||
env.zenith_cli(["branch", "test_gist_buffering_build", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_gist_buffering_build')
|
||||
pg = env.postgres.create_start('test_gist_buffering_build')
|
||||
log.info("postgres is running on 'test_gist_buffering_build' branch")
|
||||
|
||||
# Open a connection directly to the page server that we'll use to force
|
||||
# flushing the layers to disk
|
||||
psconn = pageserver.connect();
|
||||
psconn = env.pageserver.connect()
|
||||
pscur = psconn.cursor()
|
||||
|
||||
# Get the timeline ID of our branch. We need it for the 'do_gc' command
|
||||
@@ -29,21 +31,27 @@ def test_gist_buffering_build(postgres: PostgresFactory, pageserver: ZenithPages
|
||||
timeline = cur.fetchone()[0]
|
||||
|
||||
# Create test table.
|
||||
cur.execute("create table gist_point_tbl(id int4, p point)");
|
||||
cur.execute("insert into gist_point_tbl select g, point(g, g) from generate_series(1, 1000000) g;");
|
||||
cur.execute("create table gist_point_tbl(id int4, p point)")
|
||||
cur.execute(
|
||||
"insert into gist_point_tbl select g, point(g, g) from generate_series(1, 1000000) g;"
|
||||
)
|
||||
|
||||
# Build the index.
|
||||
with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'):
|
||||
with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
|
||||
with zenbenchmark.record_duration('build'):
|
||||
cur.execute("create index gist_pointidx2 on gist_point_tbl using gist(p) with (buffering = on)");
|
||||
cur.execute(
|
||||
"create index gist_pointidx2 on gist_point_tbl using gist(p) with (buffering = on)"
|
||||
)
|
||||
|
||||
# Flush the layers from memory to disk. This is included in the reported
|
||||
# time and I/O
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 1000000")
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 1000000")
|
||||
|
||||
# Record peak memory usage
|
||||
zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(pageserver) / 1024, 'MB')
|
||||
zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(env.pageserver) / 1024, 'MB')
|
||||
|
||||
# Report disk space used by the repository
|
||||
timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024*1024), 'MB')
|
||||
timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
|
||||
env.initial_tenant,
|
||||
timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import os
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
|
||||
|
||||
#
|
||||
# Run a very short pgbench test.
|
||||
#
|
||||
@@ -14,16 +15,17 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
# 2. Time to run 5000 pgbench transactions
|
||||
# 3. Disk space used
|
||||
#
|
||||
def test_pgbench(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str):
|
||||
def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin, zenbenchmark):
|
||||
env = zenith_simple_env
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_pgbench_perf", "empty"])
|
||||
env.zenith_cli(["branch", "test_pgbench_perf", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_pgbench_perf')
|
||||
pg = env.postgres.create_start('test_pgbench_perf')
|
||||
log.info("postgres is running on 'test_pgbench_perf' branch")
|
||||
|
||||
# Open a connection directly to the page server that we'll use to force
|
||||
# flushing the layers to disk
|
||||
psconn = pageserver.connect();
|
||||
psconn = env.pageserver.connect()
|
||||
pscur = psconn.cursor()
|
||||
|
||||
# Get the timeline ID of our branch. We need it for the 'do_gc' command
|
||||
@@ -35,13 +37,13 @@ def test_pgbench(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin
|
||||
connstr = pg.connstr()
|
||||
|
||||
# Initialize pgbench database, recording the time and I/O it takes
|
||||
with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'):
|
||||
with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
|
||||
with zenbenchmark.record_duration('init'):
|
||||
pg_bin.run_capture(['pgbench', '-s5', '-i', connstr])
|
||||
|
||||
# Flush the layers from memory to disk. This is included in the reported
|
||||
# time and I/O
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
|
||||
# Run pgbench for 5000 transactions
|
||||
with zenbenchmark.record_duration('5000_xacts'):
|
||||
@@ -49,8 +51,8 @@ def test_pgbench(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin
|
||||
|
||||
# Flush the layers to disk again. This is *not' included in the reported time,
|
||||
# though.
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
|
||||
# Report disk space used by the repository
|
||||
timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024*1024), 'MB')
|
||||
timeline_size = zenbenchmark.get_timeline_size(env.repo_dir, env.initial_tenant, timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
|
||||
|
||||
@@ -12,21 +12,23 @@
|
||||
# Amplification problem at its finest.
|
||||
import os
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
|
||||
def test_write_amplification(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str):
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_write_amplification", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_write_amplification')
|
||||
def test_write_amplification(zenith_simple_env: ZenithEnv, zenbenchmark):
|
||||
env = zenith_simple_env
|
||||
# Create a branch for us
|
||||
env.zenith_cli(["branch", "test_write_amplification", "empty"])
|
||||
|
||||
pg = env.postgres.create_start('test_write_amplification')
|
||||
log.info("postgres is running on 'test_write_amplification' branch")
|
||||
|
||||
# Open a connection directly to the page server that we'll use to force
|
||||
# flushing the layers to disk
|
||||
psconn = pageserver.connect();
|
||||
psconn = env.pageserver.connect()
|
||||
pscur = psconn.cursor()
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
@@ -35,7 +37,7 @@ def test_write_amplification(postgres: PostgresFactory, pageserver: ZenithPagese
|
||||
cur.execute("SHOW zenith.zenith_timeline")
|
||||
timeline = cur.fetchone()[0]
|
||||
|
||||
with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'):
|
||||
with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
|
||||
with zenbenchmark.record_duration('run'):
|
||||
|
||||
# NOTE: Because each iteration updates every table already created,
|
||||
@@ -68,8 +70,10 @@ def test_write_amplification(postgres: PostgresFactory, pageserver: ZenithPagese
|
||||
# slower, adding some delays in this loop. But forcing
|
||||
# the the checkpointing and GC makes the test go faster,
|
||||
# with the same total I/O effect.
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
||||
|
||||
# Report disk space used by the repository
|
||||
timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024*1024), 'MB')
|
||||
timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
|
||||
env.initial_tenant,
|
||||
timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
|
||||
|
||||
@@ -2,3 +2,4 @@
|
||||
minversion = 6.0
|
||||
log_format = %(asctime)s.%(msecs)-3d %(levelname)s [%(filename)s:%(lineno)d] %(message)s
|
||||
log_date_format = %Y-%m-%d %H:%M:%S
|
||||
log_cli = true
|
||||
|
||||
@@ -10,6 +10,7 @@ max-line-length = 100
|
||||
[yapf]
|
||||
based_on_style = pep8
|
||||
column_limit = 100
|
||||
split_all_top_level_comma_separated_values = true
|
||||
|
||||
[mypy]
|
||||
# some tests don't typecheck when this flag is set
|
||||
@@ -21,7 +22,11 @@ disallow_untyped_decorators = false
|
||||
disallow_untyped_defs = false
|
||||
strict = true
|
||||
|
||||
[mypy-psycopg2.*]
|
||||
[mypy-asyncpg.*]
|
||||
# There is some work in progress, though: https://github.com/MagicStack/asyncpg/pull/577
|
||||
ignore_missing_imports = true
|
||||
|
||||
[mypy-cached_property.*]
|
||||
ignore_missing_imports = true
|
||||
|
||||
[mypy-pytest.*]
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import pytest
|
||||
import os
|
||||
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
@@ -19,11 +20,13 @@ run_broken = pytest.mark.skipif(os.environ.get('RUN_BROKEN') is None,
|
||||
|
||||
|
||||
@run_broken
|
||||
def test_broken(zenith_cli, pageserver, postgres, pg_bin):
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_broken", "empty"])
|
||||
def test_broken(zenith_simple_env: ZenithEnv, pg_bin):
|
||||
env = zenith_simple_env
|
||||
|
||||
postgres.create_start("test_broken")
|
||||
# Create a branch for us
|
||||
env.zenith_cli(["branch", "test_broken", "empty"])
|
||||
|
||||
env.postgres.create_start("test_broken")
|
||||
log.info('postgres is running')
|
||||
|
||||
log.info('THIS NEXT COMMAND WILL FAIL:')
|
||||
|
||||
2
vendor/postgres
vendored
2
vendor/postgres
vendored
Submodule vendor/postgres updated: 8e82cb4a36...a902dd8a4e
@@ -7,17 +7,16 @@ use const_format::formatcp;
|
||||
use daemonize::Daemonize;
|
||||
use log::*;
|
||||
use std::env;
|
||||
use std::net::TcpListener;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::thread;
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::logging;
|
||||
use zenith_utils::{logging, tcp_listener};
|
||||
|
||||
use walkeeper::defaults::{DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_PG_LISTEN_ADDR};
|
||||
use walkeeper::http;
|
||||
use walkeeper::s3_offload;
|
||||
use walkeeper::wal_service;
|
||||
use walkeeper::WalAcceptorConf;
|
||||
use walkeeper::SafeKeeperConf;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
zenith_metrics::set_common_metrics_prefix("safekeeper");
|
||||
@@ -54,7 +53,7 @@ fn main() -> Result<()> {
|
||||
Arg::with_name("ttl")
|
||||
.long("ttl")
|
||||
.takes_value(true)
|
||||
.help("interval for keeping WAL as walkeeper node, after which them will be uploaded to S3 and removed locally"),
|
||||
.help("interval for keeping WAL at safekeeper node, after which them will be uploaded to S3 and removed locally"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("recall")
|
||||
@@ -78,8 +77,11 @@ fn main() -> Result<()> {
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let mut conf = WalAcceptorConf {
|
||||
data_dir: PathBuf::from("./"),
|
||||
let mut conf = SafeKeeperConf {
|
||||
// Always set to './'. We will chdir into the directory specified on the
|
||||
// command line, so that when the server is running, all paths are relative
|
||||
// to that.
|
||||
workdir: PathBuf::from("./"),
|
||||
daemonize: false,
|
||||
no_sync: false,
|
||||
pageserver_addr: None,
|
||||
@@ -91,10 +93,8 @@ fn main() -> Result<()> {
|
||||
};
|
||||
|
||||
if let Some(dir) = arg_matches.value_of("datadir") {
|
||||
conf.data_dir = PathBuf::from(dir);
|
||||
|
||||
// change into the data directory.
|
||||
std::env::set_current_dir(&conf.data_dir)?;
|
||||
std::env::set_current_dir(PathBuf::from(dir))?;
|
||||
}
|
||||
|
||||
if arg_matches.is_present("no-sync") {
|
||||
@@ -125,20 +125,19 @@ fn main() -> Result<()> {
|
||||
conf.recall_period = Some(humantime::parse_duration(recall)?);
|
||||
}
|
||||
|
||||
start_wal_acceptor(conf)
|
||||
start_safekeeper(conf)
|
||||
}
|
||||
|
||||
fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> {
|
||||
let log_filename = conf.data_dir.join("safekeeper.log");
|
||||
let log_file = logging::init(log_filename, conf.daemonize)?;
|
||||
fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
|
||||
let log_file = logging::init("safekeeper.log", conf.daemonize)?;
|
||||
|
||||
let http_listener = TcpListener::bind(conf.listen_http_addr.clone()).map_err(|e| {
|
||||
let http_listener = tcp_listener::bind(conf.listen_http_addr.clone()).map_err(|e| {
|
||||
error!("failed to bind to address {}: {}", conf.listen_http_addr, e);
|
||||
e
|
||||
})?;
|
||||
|
||||
info!("Starting safekeeper on {}", conf.listen_pg_addr);
|
||||
let pg_listener = TcpListener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
|
||||
let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
|
||||
error!("failed to bind to address {}: {}", conf.listen_pg_addr, e);
|
||||
e
|
||||
})?;
|
||||
|
||||
@@ -10,7 +10,7 @@ use zenith_utils::lsn::Lsn;
|
||||
use crate::safekeeper::AcceptorState;
|
||||
use crate::timeline::CreateControlFile;
|
||||
use crate::timeline::GlobalTimelines;
|
||||
use crate::WalAcceptorConf;
|
||||
use crate::SafeKeeperConf;
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::http::error::ApiError;
|
||||
use zenith_utils::http::json::json_response;
|
||||
@@ -22,9 +22,9 @@ async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
Ok(json_response(StatusCode::OK, "")?)
|
||||
}
|
||||
|
||||
fn get_conf(request: &Request<Body>) -> &WalAcceptorConf {
|
||||
fn get_conf(request: &Request<Body>) -> &SafeKeeperConf {
|
||||
request
|
||||
.data::<Arc<WalAcceptorConf>>()
|
||||
.data::<Arc<SafeKeeperConf>>()
|
||||
.expect("unknown state type")
|
||||
.as_ref()
|
||||
}
|
||||
@@ -49,6 +49,8 @@ struct TimelineStatus {
|
||||
commit_lsn: Lsn,
|
||||
#[serde(serialize_with = "display_serialize")]
|
||||
truncate_lsn: Lsn,
|
||||
#[serde(serialize_with = "display_serialize")]
|
||||
flush_lsn: Lsn,
|
||||
}
|
||||
|
||||
/// Report info about timeline.
|
||||
@@ -64,6 +66,7 @@ async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body
|
||||
)
|
||||
.map_err(ApiError::from_err)?;
|
||||
let sk_state = tli.get_info();
|
||||
let (flush_lsn, _) = tli.get_end_of_wal();
|
||||
|
||||
let status = TimelineStatus {
|
||||
tenant_id,
|
||||
@@ -71,12 +74,13 @@ async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body
|
||||
acceptor_state: sk_state.acceptor_state,
|
||||
commit_lsn: sk_state.commit_lsn,
|
||||
truncate_lsn: sk_state.truncate_lsn,
|
||||
flush_lsn,
|
||||
};
|
||||
Ok(json_response(StatusCode::OK, status)?)
|
||||
}
|
||||
|
||||
/// Safekeeper http router.
|
||||
pub fn make_router(conf: WalAcceptorConf) -> RouterBuilder<hyper::Body, ApiError> {
|
||||
pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError> {
|
||||
let router = endpoint::make_router();
|
||||
router
|
||||
.data(Arc::new(conf))
|
||||
|
||||
@@ -23,8 +23,15 @@ pub mod defaults {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WalAcceptorConf {
|
||||
pub data_dir: PathBuf,
|
||||
pub struct SafeKeeperConf {
|
||||
// Repository directory, relative to current working directory.
|
||||
// Normally, the safekeeper changes the current working directory
|
||||
// to the repository, and 'workdir' is always '.'. But we don't do
|
||||
// that during unit testing, because the current directory is global
|
||||
// to the process but different unit tests work on different
|
||||
// data directories to avoid clashing with each other.
|
||||
pub workdir: PathBuf,
|
||||
|
||||
pub daemonize: bool,
|
||||
pub no_sync: bool,
|
||||
pub listen_pg_addr: String,
|
||||
|
||||
@@ -16,7 +16,7 @@ use crate::safekeeper::ProposerAcceptorMessage;
|
||||
|
||||
use crate::send_wal::SendWalHandler;
|
||||
use crate::timeline::TimelineTools;
|
||||
use crate::WalAcceptorConf;
|
||||
use crate::SafeKeeperConf;
|
||||
use zenith_utils::connstring::connection_host_port;
|
||||
use zenith_utils::postgres_backend::PostgresBackend;
|
||||
use zenith_utils::pq_proto::{BeMessage, FeMessage};
|
||||
@@ -33,7 +33,7 @@ pub struct ReceiveWalConn<'pg> {
|
||||
/// Periodically request pageserver to call back.
|
||||
/// If pageserver already has replication channel, it will just ignore this request
|
||||
///
|
||||
fn request_callback(conf: WalAcceptorConf, timelineid: ZTimelineId, tenantid: ZTenantId) {
|
||||
fn request_callback(conf: SafeKeeperConf, timelineid: ZTimelineId, tenantid: ZTenantId) {
|
||||
let ps_addr = conf.pageserver_addr.unwrap();
|
||||
let ps_connstr = format!(
|
||||
"postgresql://no_user:{}@{}/no_db",
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
//! with the "START_REPLICATION" message.
|
||||
|
||||
use crate::send_wal::SendWalHandler;
|
||||
use crate::timeline::{Timeline, TimelineTools};
|
||||
use crate::timeline::{ReplicaState, Timeline, TimelineTools};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use bytes::Bytes;
|
||||
use log::*;
|
||||
@@ -20,7 +20,7 @@ use std::{str, thread};
|
||||
use zenith_utils::bin_ser::BeSer;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::postgres_backend::PostgresBackend;
|
||||
use zenith_utils::pq_proto::{BeMessage, FeMessage, XLogDataBody};
|
||||
use zenith_utils::pq_proto::{BeMessage, FeMessage, WalSndKeepAlive, XLogDataBody};
|
||||
use zenith_utils::sock_split::ReadStream;
|
||||
|
||||
pub const END_REPLICATION_MARKER: Lsn = Lsn::MAX;
|
||||
@@ -32,7 +32,7 @@ const STANDBY_STATUS_UPDATE_TAG_BYTE: u8 = b'r';
|
||||
type FullTransactionId = u64;
|
||||
|
||||
/// Hot standby feedback received from replica
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
|
||||
pub struct HotStandbyFeedback {
|
||||
pub ts: TimestampTz,
|
||||
pub xmin: FullTransactionId,
|
||||
@@ -49,6 +49,16 @@ impl HotStandbyFeedback {
|
||||
}
|
||||
}
|
||||
|
||||
/// Standby status update
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StandbyReply {
|
||||
pub write_lsn: Lsn, // disk consistent lSN
|
||||
pub flush_lsn: Lsn, // LSN committedby quorum
|
||||
pub apply_lsn: Lsn, // not used
|
||||
pub reply_ts: TimestampTz,
|
||||
pub reply_requested: bool,
|
||||
}
|
||||
|
||||
/// A network connection that's speaking the replication protocol.
|
||||
pub struct ReplicationConn {
|
||||
/// This is an `Option` because we will spawn a background thread that will
|
||||
@@ -56,16 +66,15 @@ pub struct ReplicationConn {
|
||||
stream_in: Option<ReadStream>,
|
||||
}
|
||||
|
||||
// TODO: move this to crate::timeline when there's more users
|
||||
// TODO: design a proper Timeline mock api
|
||||
trait HsFeedbackSubscriber {
|
||||
fn add_hs_feedback(&self, _feedback: HotStandbyFeedback) {}
|
||||
/// Scope guard to unregister replication connection from timeline
|
||||
struct ReplicationConnGuard {
|
||||
replica: usize, // replica internal ID assigned by timeline
|
||||
timeline: Arc<Timeline>,
|
||||
}
|
||||
|
||||
impl HsFeedbackSubscriber for Arc<Timeline> {
|
||||
#[inline(always)]
|
||||
fn add_hs_feedback(&self, feedback: HotStandbyFeedback) {
|
||||
Timeline::add_hs_feedback(self, feedback);
|
||||
impl Drop for ReplicationConnGuard {
|
||||
fn drop(&mut self) {
|
||||
self.timeline.update_replica_state(self.replica, None);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,26 +88,33 @@ impl ReplicationConn {
|
||||
|
||||
/// Handle incoming messages from the network.
|
||||
/// This is spawned into the background by `handle_start_replication`.
|
||||
fn background_thread(
|
||||
mut stream_in: impl Read,
|
||||
subscriber: impl HsFeedbackSubscriber,
|
||||
) -> Result<()> {
|
||||
fn background_thread(mut stream_in: impl Read, timeline: Arc<Timeline>) -> Result<()> {
|
||||
let mut state = ReplicaState::new();
|
||||
let replica = timeline.add_replica(state);
|
||||
let _guard = ReplicationConnGuard {
|
||||
replica,
|
||||
timeline: timeline.clone(),
|
||||
};
|
||||
// Wait for replica's feedback.
|
||||
while let Some(msg) = FeMessage::read(&mut stream_in)? {
|
||||
match &msg {
|
||||
FeMessage::CopyData(m) => {
|
||||
// There's two possible data messages that the client is supposed to send here:
|
||||
// `HotStandbyFeedback` and `StandbyStatusUpdate`. We only handle hot standby
|
||||
// feedback.
|
||||
// `HotStandbyFeedback` and `StandbyStatusUpdate`.
|
||||
|
||||
match m.first().cloned() {
|
||||
Some(HOT_STANDBY_FEEDBACK_TAG_BYTE) => {
|
||||
// Note: deserializing is on m[1..] because we skip the tag byte.
|
||||
let feedback = HotStandbyFeedback::des(&m[1..])
|
||||
state.hs_feedback = HotStandbyFeedback::des(&m[1..])
|
||||
.context("failed to deserialize HotStandbyFeedback")?;
|
||||
subscriber.add_hs_feedback(feedback);
|
||||
timeline.update_replica_state(replica, Some(state));
|
||||
}
|
||||
Some(STANDBY_STATUS_UPDATE_TAG_BYTE) => {
|
||||
let reply = StandbyReply::des(&m[1..])
|
||||
.context("failed to deserialize StandbyReply")?;
|
||||
state.disk_consistent_lsn = reply.write_lsn;
|
||||
timeline.update_replica_state(replica, Some(state));
|
||||
}
|
||||
Some(STANDBY_STATUS_UPDATE_TAG_BYTE) => (),
|
||||
_ => warn!("unexpected message {:?}", msg),
|
||||
}
|
||||
}
|
||||
@@ -187,7 +203,7 @@ impl ReplicationConn {
|
||||
// switch to copy
|
||||
pgb.write_message(&BeMessage::CopyBothResponse)?;
|
||||
|
||||
let mut end_pos: Lsn;
|
||||
let mut end_pos = Lsn(0);
|
||||
let mut wal_file: Option<File> = None;
|
||||
|
||||
loop {
|
||||
@@ -202,7 +218,18 @@ impl ReplicationConn {
|
||||
} else {
|
||||
/* normal mode */
|
||||
let timeline = swh.timeline.get();
|
||||
end_pos = timeline.wait_for_lsn(start_pos);
|
||||
if let Some(lsn) = timeline.wait_for_lsn(start_pos) {
|
||||
end_pos = lsn
|
||||
} else {
|
||||
// timeout expired: request pageserver status
|
||||
pgb.write_message(&BeMessage::KeepAlive(WalSndKeepAlive {
|
||||
sent_ptr: end_pos.0,
|
||||
timestamp: get_current_timestamp(),
|
||||
request_reply: true,
|
||||
}))
|
||||
.context("Failed to send KeepAlive message")?;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if end_pos == END_REPLICATION_MARKER {
|
||||
break;
|
||||
@@ -216,7 +243,7 @@ impl ReplicationConn {
|
||||
let segno = start_pos.segment_number(wal_seg_size);
|
||||
let wal_file_name = XLogFileName(timeline, segno, wal_seg_size);
|
||||
let timeline_id = swh.timeline.get().timelineid.to_string();
|
||||
let wal_file_path = swh.conf.data_dir.join(timeline_id).join(wal_file_name);
|
||||
let wal_file_path = swh.conf.workdir.join(timeline_id).join(wal_file_name);
|
||||
Self::open_wal_file(&wal_file_path)?
|
||||
}
|
||||
};
|
||||
@@ -257,18 +284,3 @@ impl ReplicationConn {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// A no-op impl for tests
|
||||
impl HsFeedbackSubscriber for () {}
|
||||
|
||||
#[test]
|
||||
fn test_replication_conn_background_thread_eof() {
|
||||
// Test that background_thread recognizes EOF
|
||||
let stream: &[u8] = &[];
|
||||
ReplicationConn::background_thread(stream, ()).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,9 +18,9 @@ use tokio::runtime;
|
||||
use tokio::time::sleep;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::WalAcceptorConf;
|
||||
use crate::SafeKeeperConf;
|
||||
|
||||
pub fn thread_main(conf: WalAcceptorConf) {
|
||||
pub fn thread_main(conf: SafeKeeperConf) {
|
||||
// Create a new thread pool
|
||||
//
|
||||
// FIXME: keep it single-threaded for now, make it easier to debug with gdb,
|
||||
@@ -42,7 +42,7 @@ async fn offload_files(
|
||||
bucket: &Bucket,
|
||||
listing: &HashSet<String>,
|
||||
dir_path: &Path,
|
||||
conf: &WalAcceptorConf,
|
||||
conf: &SafeKeeperConf,
|
||||
) -> Result<u64> {
|
||||
let horizon = SystemTime::now() - conf.ttl.unwrap();
|
||||
let mut n: u64 = 0;
|
||||
@@ -54,7 +54,7 @@ async fn offload_files(
|
||||
&& IsXLogFileName(entry.file_name().to_str().unwrap())
|
||||
&& entry.metadata().unwrap().created().unwrap() <= horizon
|
||||
{
|
||||
let relpath = path.strip_prefix(&conf.data_dir).unwrap();
|
||||
let relpath = path.strip_prefix(&conf.workdir).unwrap();
|
||||
let s3path = String::from("walarchive/") + relpath.to_str().unwrap();
|
||||
if !listing.contains(&s3path) {
|
||||
let mut file = File::open(&path)?;
|
||||
@@ -70,7 +70,7 @@ async fn offload_files(
|
||||
Ok(n)
|
||||
}
|
||||
|
||||
async fn main_loop(conf: &WalAcceptorConf) -> Result<()> {
|
||||
async fn main_loop(conf: &SafeKeeperConf) -> Result<()> {
|
||||
let region = Region::Custom {
|
||||
region: env::var("S3_REGION").unwrap(),
|
||||
endpoint: env::var("S3_ENDPOINT").unwrap(),
|
||||
@@ -97,7 +97,7 @@ async fn main_loop(conf: &WalAcceptorConf) -> Result<()> {
|
||||
.flat_map(|b| b.contents.iter().map(|o| o.key.clone()))
|
||||
.collect();
|
||||
|
||||
let n = offload_files(&bucket, &listing, &conf.data_dir, conf).await?;
|
||||
let n = offload_files(&bucket, &listing, &conf.workdir, conf).await?;
|
||||
info!("Offload {} files to S3", n);
|
||||
sleep(conf.ttl.unwrap()).await;
|
||||
}
|
||||
|
||||
@@ -19,7 +19,10 @@ use lazy_static::lazy_static;
|
||||
|
||||
use crate::replication::HotStandbyFeedback;
|
||||
use postgres_ffi::xlog_utils::MAX_SEND_SIZE;
|
||||
use zenith_metrics::{register_gauge_vec, Gauge, GaugeVec};
|
||||
use zenith_metrics::{
|
||||
register_gauge_vec, register_histogram_vec, Gauge, GaugeVec, Histogram, HistogramVec,
|
||||
DISK_WRITE_SECONDS_BUCKETS,
|
||||
};
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::pq_proto::SystemId;
|
||||
@@ -191,6 +194,8 @@ pub struct AppendResponse {
|
||||
// We report back our awareness about which WAL is committed, as this is
|
||||
// a criterion for walproposer --sync mode exit
|
||||
pub commit_lsn: Lsn,
|
||||
// Min disk consistent lsn of pageservers (portion of WAL applied and written to the disk by pageservers)
|
||||
pub disk_consistent_lsn: Lsn,
|
||||
pub hs_feedback: HotStandbyFeedback,
|
||||
}
|
||||
|
||||
@@ -297,11 +302,27 @@ lazy_static! {
|
||||
&["ztli"]
|
||||
)
|
||||
.expect("Failed to register safekeeper_commit_lsn gauge vec");
|
||||
static ref WRITE_WAL_BYTES: HistogramVec = register_histogram_vec!(
|
||||
"safekeeper_write_wal_bytes",
|
||||
"Bytes written to WAL in a single request, grouped by timeline",
|
||||
&["timeline_id"],
|
||||
vec![1.0, 10.0, 100.0, 1024.0, 8192.0, 128.0 * 1024.0, 1024.0 * 1024.0, 10.0 * 1024.0 * 1024.0]
|
||||
)
|
||||
.expect("Failed to register safekeeper_write_wal_bytes histogram vec");
|
||||
static ref WRITE_WAL_SECONDS: HistogramVec = register_histogram_vec!(
|
||||
"safekeeper_write_wal_seconds",
|
||||
"Seconds spent writing and syncing WAL to a disk in a single request, grouped by timeline",
|
||||
&["timeline_id"],
|
||||
DISK_WRITE_SECONDS_BUCKETS.to_vec()
|
||||
)
|
||||
.expect("Failed to register safekeeper_write_wal_seconds histogram vec");
|
||||
}
|
||||
|
||||
struct SafeKeeperMetrics {
|
||||
flush_lsn: Gauge,
|
||||
commit_lsn: Gauge,
|
||||
write_wal_bytes: Histogram,
|
||||
write_wal_seconds: Histogram,
|
||||
}
|
||||
|
||||
impl SafeKeeperMetrics {
|
||||
@@ -310,6 +331,8 @@ impl SafeKeeperMetrics {
|
||||
SafeKeeperMetrics {
|
||||
flush_lsn: FLUSH_LSN_GAUGE.with_label_values(&[&ztli_str]),
|
||||
commit_lsn: COMMIT_LSN_GAUGE.with_label_values(&[&ztli_str]),
|
||||
write_wal_bytes: WRITE_WAL_BYTES.with_label_values(&[&ztli_str]),
|
||||
write_wal_seconds: WRITE_WAL_SECONDS.with_label_values(&[&ztli_str]),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -317,6 +340,8 @@ impl SafeKeeperMetrics {
|
||||
SafeKeeperMetrics {
|
||||
flush_lsn: FLUSH_LSN_GAUGE.with_label_values(&["n/a"]),
|
||||
commit_lsn: COMMIT_LSN_GAUGE.with_label_values(&["n/a"]),
|
||||
write_wal_bytes: WRITE_WAL_BYTES.with_label_values(&["n/a"]),
|
||||
write_wal_seconds: WRITE_WAL_SECONDS.with_label_values(&["n/a"]),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -458,6 +483,7 @@ where
|
||||
epoch: self.s.acceptor_state.epoch,
|
||||
commit_lsn: Lsn(0),
|
||||
flush_lsn: Lsn(0),
|
||||
disk_consistent_lsn: Lsn(0),
|
||||
hs_feedback: HotStandbyFeedback::empty(),
|
||||
};
|
||||
return Ok(AcceptorProposerMessage::AppendResponse(resp));
|
||||
@@ -469,8 +495,14 @@ where
|
||||
// do the job
|
||||
let mut last_rec_lsn = Lsn(0);
|
||||
if !msg.wal_data.is_empty() {
|
||||
self.storage
|
||||
.write_wal(&self.s.server, msg.h.begin_lsn, &msg.wal_data)?;
|
||||
self.metrics
|
||||
.write_wal_bytes
|
||||
.observe(msg.wal_data.len() as f64);
|
||||
{
|
||||
let _timer = self.metrics.write_wal_seconds.start_timer();
|
||||
self.storage
|
||||
.write_wal(&self.s.server, msg.h.begin_lsn, &msg.wal_data)?;
|
||||
}
|
||||
|
||||
// figure out last record's end lsn for reporting (if we got the
|
||||
// whole record)
|
||||
@@ -567,6 +599,7 @@ where
|
||||
epoch: self.s.acceptor_state.epoch,
|
||||
flush_lsn: self.flush_lsn,
|
||||
commit_lsn: self.s.commit_lsn,
|
||||
disk_consistent_lsn: Lsn(0),
|
||||
// will be filled by caller code to avoid bothering safekeeper
|
||||
hs_feedback: HotStandbyFeedback::empty(),
|
||||
};
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
//! Part of WAL acceptor pretending to be Postgres, streaming xlog to
|
||||
//! Part of Safekeeper pretending to be Postgres, streaming xlog to
|
||||
//! pageserver/any other consumer.
|
||||
//!
|
||||
|
||||
@@ -6,7 +6,7 @@ use crate::json_ctrl::handle_json_ctrl;
|
||||
use crate::receive_wal::ReceiveWalConn;
|
||||
use crate::replication::ReplicationConn;
|
||||
use crate::timeline::{Timeline, TimelineTools};
|
||||
use crate::WalAcceptorConf;
|
||||
use crate::SafeKeeperConf;
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
use bytes::Bytes;
|
||||
use std::str::FromStr;
|
||||
@@ -20,7 +20,7 @@ use crate::timeline::CreateControlFile;
|
||||
|
||||
/// Handler for streaming WAL from acceptor
|
||||
pub struct SendWalHandler {
|
||||
pub conf: WalAcceptorConf,
|
||||
pub conf: SafeKeeperConf,
|
||||
/// assigned application name
|
||||
pub appname: Option<String>,
|
||||
pub tenantid: Option<ZTenantId>,
|
||||
@@ -85,7 +85,7 @@ impl postgres_backend::Handler for SendWalHandler {
|
||||
}
|
||||
|
||||
impl SendWalHandler {
|
||||
pub fn new(conf: WalAcceptorConf) -> Self {
|
||||
pub fn new(conf: SafeKeeperConf) -> Self {
|
||||
SendWalHandler {
|
||||
conf,
|
||||
appname: None,
|
||||
|
||||
@@ -11,9 +11,10 @@ use std::collections::HashMap;
|
||||
use std::fs::{self, File, OpenOptions};
|
||||
use std::io::{Seek, SeekFrom, Write};
|
||||
use std::sync::{Arc, Condvar, Mutex};
|
||||
use std::time::Duration;
|
||||
use zenith_metrics::{register_histogram_vec, Histogram, HistogramVec, DISK_WRITE_SECONDS_BUCKETS};
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
use crate::replication::{HotStandbyFeedback, END_REPLICATION_MARKER};
|
||||
@@ -21,10 +22,39 @@ use crate::safekeeper::{
|
||||
AcceptorProposerMessage, ProposerAcceptorMessage, SafeKeeper, SafeKeeperState, ServerInfo,
|
||||
Storage, SK_FORMAT_VERSION, SK_MAGIC,
|
||||
};
|
||||
use crate::WalAcceptorConf;
|
||||
use crate::SafeKeeperConf;
|
||||
use postgres_ffi::xlog_utils::{XLogFileName, XLOG_BLCKSZ};
|
||||
|
||||
const CONTROL_FILE_NAME: &str = "safekeeper.control";
|
||||
const POLL_STATE_TIMEOUT: Duration = Duration::from_secs(1);
|
||||
|
||||
/// Replica status: host standby feedback + disk consistent lsn
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct ReplicaState {
|
||||
/// combined disk_consistent_lsn of pageservers
|
||||
pub disk_consistent_lsn: Lsn,
|
||||
/// combined hot standby feedback from all replicas
|
||||
pub hs_feedback: HotStandbyFeedback,
|
||||
}
|
||||
|
||||
impl Default for ReplicaState {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl ReplicaState {
|
||||
pub fn new() -> ReplicaState {
|
||||
ReplicaState {
|
||||
disk_consistent_lsn: Lsn(u64::MAX),
|
||||
hs_feedback: HotStandbyFeedback {
|
||||
ts: 0,
|
||||
xmin: u64::MAX,
|
||||
catalog_xmin: u64::MAX,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Shared state associated with database instance (tenant)
|
||||
struct SharedState {
|
||||
@@ -33,8 +63,8 @@ struct SharedState {
|
||||
/// For receiving-sending wal cooperation
|
||||
/// quorum commit LSN we've notified walsenders about
|
||||
notified_commit_lsn: Lsn,
|
||||
/// combined hot standby feedback from all replicas
|
||||
hs_feedback: HotStandbyFeedback,
|
||||
/// State of replicas
|
||||
replicas: Vec<Option<ReplicaState>>,
|
||||
}
|
||||
|
||||
// A named boolean.
|
||||
@@ -44,23 +74,70 @@ pub enum CreateControlFile {
|
||||
False,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref PERSIST_SYNC_CONTROL_FILE_SECONDS: HistogramVec = register_histogram_vec!(
|
||||
"safekeeper_persist_sync_control_file_seconds",
|
||||
"Seconds to persist and sync control file, grouped by timeline",
|
||||
&["timeline_id"],
|
||||
DISK_WRITE_SECONDS_BUCKETS.to_vec()
|
||||
)
|
||||
.expect("Failed to register safekeeper_persist_sync_control_file_seconds histogram vec");
|
||||
static ref PERSIST_NOSYNC_CONTROL_FILE_SECONDS: HistogramVec = register_histogram_vec!(
|
||||
"safekeeper_persist_nosync_control_file_seconds",
|
||||
"Seconds to persist and sync control file, grouped by timeline",
|
||||
&["timeline_id"],
|
||||
DISK_WRITE_SECONDS_BUCKETS.to_vec()
|
||||
)
|
||||
.expect("Failed to register safekeeper_persist_nosync_control_file_seconds histogram vec");
|
||||
}
|
||||
|
||||
impl SharedState {
|
||||
/// Get combined stateof all alive replicas
|
||||
pub fn get_replicas_state(&self) -> ReplicaState {
|
||||
let mut acc = ReplicaState::new();
|
||||
for state in self.replicas.iter().flatten() {
|
||||
acc.hs_feedback.ts = max(acc.hs_feedback.ts, state.hs_feedback.ts);
|
||||
acc.hs_feedback.xmin = min(acc.hs_feedback.xmin, state.hs_feedback.xmin);
|
||||
acc.hs_feedback.catalog_xmin =
|
||||
min(acc.hs_feedback.catalog_xmin, state.hs_feedback.catalog_xmin);
|
||||
acc.disk_consistent_lsn = Lsn::min(acc.disk_consistent_lsn, state.disk_consistent_lsn);
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
/// Assign new replica ID. We choose first empty cell in the replicas vector
|
||||
/// or extend the vector if there are not free items.
|
||||
pub fn add_replica(&mut self, state: ReplicaState) -> usize {
|
||||
if let Some(pos) = self.replicas.iter().position(|r| r.is_none()) {
|
||||
self.replicas[pos] = Some(state);
|
||||
return pos;
|
||||
}
|
||||
let pos = self.replicas.len();
|
||||
self.replicas.push(Some(state));
|
||||
pos
|
||||
}
|
||||
|
||||
/// Restore SharedState from control file. Locks the control file along the
|
||||
/// way to prevent running more than one instance of safekeeper on the same
|
||||
/// data dir.
|
||||
/// If create=false and file doesn't exist, bails out.
|
||||
fn create_restore(
|
||||
conf: &WalAcceptorConf,
|
||||
conf: &SafeKeeperConf,
|
||||
timelineid: ZTimelineId,
|
||||
create: CreateControlFile,
|
||||
) -> Result<Self> {
|
||||
let (cf, state) = SharedState::load_control_file(conf, timelineid, create)?;
|
||||
let timelineid_str = format!("{}", timelineid);
|
||||
let storage = FileStorage {
|
||||
control_file: cf,
|
||||
conf: conf.clone(),
|
||||
persist_sync_control_file_seconds: PERSIST_SYNC_CONTROL_FILE_SECONDS
|
||||
.with_label_values(&[&timelineid_str]),
|
||||
persist_nosync_control_file_seconds: PERSIST_NOSYNC_CONTROL_FILE_SECONDS
|
||||
.with_label_values(&[&timelineid_str]),
|
||||
};
|
||||
let (flush_lsn, tli) = if state.server.wal_seg_size != 0 {
|
||||
let wal_dir = conf.data_dir.join(format!("{}", timelineid));
|
||||
let wal_dir = conf.workdir.join(format!("{}", timelineid));
|
||||
find_end_of_wal(
|
||||
&wal_dir,
|
||||
state.server.wal_seg_size as usize,
|
||||
@@ -74,30 +151,19 @@ impl SharedState {
|
||||
Ok(Self {
|
||||
notified_commit_lsn: Lsn(0),
|
||||
sk: SafeKeeper::new(Lsn(flush_lsn), tli, storage, state),
|
||||
hs_feedback: HotStandbyFeedback {
|
||||
ts: 0,
|
||||
xmin: u64::MAX,
|
||||
catalog_xmin: u64::MAX,
|
||||
},
|
||||
replicas: Vec::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Accumulate hot standby feedbacks from replicas
|
||||
pub fn add_hs_feedback(&mut self, feedback: HotStandbyFeedback) {
|
||||
self.hs_feedback.xmin = min(self.hs_feedback.xmin, feedback.xmin);
|
||||
self.hs_feedback.catalog_xmin = min(self.hs_feedback.catalog_xmin, feedback.catalog_xmin);
|
||||
self.hs_feedback.ts = max(self.hs_feedback.ts, feedback.ts);
|
||||
}
|
||||
|
||||
/// Fetch and lock control file (prevent running more than one instance of safekeeper)
|
||||
/// If create=false and file doesn't exist, bails out.
|
||||
fn load_control_file(
|
||||
conf: &WalAcceptorConf,
|
||||
conf: &SafeKeeperConf,
|
||||
timelineid: ZTimelineId,
|
||||
create: CreateControlFile,
|
||||
) -> Result<(File, SafeKeeperState)> {
|
||||
let control_file_path = conf
|
||||
.data_dir
|
||||
.workdir
|
||||
.join(timelineid.to_string())
|
||||
.join(CONTROL_FILE_NAME);
|
||||
info!(
|
||||
@@ -178,20 +244,27 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
|
||||
/// Wait for an LSN to be committed.
|
||||
/// Timed wait for an LSN to be committed.
|
||||
///
|
||||
/// Returns the last committed LSN, which will be at least
|
||||
/// as high as the LSN waited for.
|
||||
/// as high as the LSN waited for, or None if timeout expired.
|
||||
///
|
||||
pub fn wait_for_lsn(&self, lsn: Lsn) -> Lsn {
|
||||
pub fn wait_for_lsn(&self, lsn: Lsn) -> Option<Lsn> {
|
||||
let mut shared_state = self.mutex.lock().unwrap();
|
||||
loop {
|
||||
let commit_lsn = shared_state.notified_commit_lsn;
|
||||
// This must be `>`, not `>=`.
|
||||
if commit_lsn > lsn {
|
||||
return commit_lsn;
|
||||
return Some(commit_lsn);
|
||||
}
|
||||
shared_state = self.cond.wait(shared_state).unwrap();
|
||||
let result = self
|
||||
.cond
|
||||
.wait_timeout(shared_state, POLL_STATE_TIMEOUT)
|
||||
.unwrap();
|
||||
if result.1.timed_out() {
|
||||
return None;
|
||||
}
|
||||
shared_state = result.0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -219,9 +292,11 @@ impl Timeline {
|
||||
// commit_lsn if we are catching up safekeeper.
|
||||
commit_lsn = shared_state.sk.commit_lsn;
|
||||
|
||||
// if this is AppendResponse, fill in proper hot standby feedback
|
||||
// if this is AppendResponse, fill in proper hot standby feedback and disk consistent lsn
|
||||
if let AcceptorProposerMessage::AppendResponse(ref mut resp) = rmsg {
|
||||
resp.hs_feedback = shared_state.hs_feedback.clone();
|
||||
let state = shared_state.get_replicas_state();
|
||||
resp.hs_feedback = state.hs_feedback;
|
||||
resp.disk_consistent_lsn = state.disk_consistent_lsn;
|
||||
}
|
||||
}
|
||||
// Ping wal sender that new data might be available.
|
||||
@@ -233,15 +308,14 @@ impl Timeline {
|
||||
self.mutex.lock().unwrap().sk.s.clone()
|
||||
}
|
||||
|
||||
// Accumulate hot standby feedbacks from replicas
|
||||
pub fn add_hs_feedback(&self, feedback: HotStandbyFeedback) {
|
||||
pub fn add_replica(&self, state: ReplicaState) -> usize {
|
||||
let mut shared_state = self.mutex.lock().unwrap();
|
||||
shared_state.add_hs_feedback(feedback);
|
||||
shared_state.add_replica(state)
|
||||
}
|
||||
|
||||
pub fn get_hs_feedback(&self) -> HotStandbyFeedback {
|
||||
let shared_state = self.mutex.lock().unwrap();
|
||||
shared_state.hs_feedback.clone()
|
||||
pub fn update_replica_state(&self, id: usize, state: Option<ReplicaState>) {
|
||||
let mut shared_state = self.mutex.lock().unwrap();
|
||||
shared_state.replicas[id] = state;
|
||||
}
|
||||
|
||||
pub fn get_end_of_wal(&self) -> (Lsn, u32) {
|
||||
@@ -254,7 +328,7 @@ impl Timeline {
|
||||
pub trait TimelineTools {
|
||||
fn set(
|
||||
&mut self,
|
||||
conf: &WalAcceptorConf,
|
||||
conf: &SafeKeeperConf,
|
||||
tenant_id: ZTenantId,
|
||||
timeline_id: ZTimelineId,
|
||||
create: CreateControlFile,
|
||||
@@ -266,7 +340,7 @@ pub trait TimelineTools {
|
||||
impl TimelineTools for Option<Arc<Timeline>> {
|
||||
fn set(
|
||||
&mut self,
|
||||
conf: &WalAcceptorConf,
|
||||
conf: &SafeKeeperConf,
|
||||
tenant_id: ZTenantId,
|
||||
timeline_id: ZTimelineId,
|
||||
create: CreateControlFile,
|
||||
@@ -295,7 +369,7 @@ impl GlobalTimelines {
|
||||
/// Get a timeline with control file loaded from the global TIMELINES map.
|
||||
/// If control file doesn't exist and create=false, bails out.
|
||||
pub fn get(
|
||||
conf: &WalAcceptorConf,
|
||||
conf: &SafeKeeperConf,
|
||||
tenant_id: ZTenantId,
|
||||
timeline_id: ZTimelineId,
|
||||
create: CreateControlFile,
|
||||
@@ -324,11 +398,19 @@ impl GlobalTimelines {
|
||||
#[derive(Debug)]
|
||||
struct FileStorage {
|
||||
control_file: File,
|
||||
conf: WalAcceptorConf,
|
||||
conf: SafeKeeperConf,
|
||||
persist_sync_control_file_seconds: Histogram,
|
||||
persist_nosync_control_file_seconds: Histogram,
|
||||
}
|
||||
|
||||
impl Storage for FileStorage {
|
||||
fn persist(&mut self, s: &SafeKeeperState, sync: bool) -> Result<()> {
|
||||
let _timer = if sync {
|
||||
&self.persist_sync_control_file_seconds
|
||||
} else {
|
||||
&self.persist_nosync_control_file_seconds
|
||||
}
|
||||
.start_timer();
|
||||
self.control_file.seek(SeekFrom::Start(0))?;
|
||||
s.ser_into(&mut self.control_file)?;
|
||||
if sync {
|
||||
@@ -368,12 +450,12 @@ impl Storage for FileStorage {
|
||||
let wal_file_name = XLogFileName(server.tli, segno, wal_seg_size);
|
||||
let wal_file_path = self
|
||||
.conf
|
||||
.data_dir
|
||||
.workdir
|
||||
.join(ztli.to_string())
|
||||
.join(wal_file_name.clone());
|
||||
let wal_file_partial_path = self
|
||||
.conf
|
||||
.data_dir
|
||||
.workdir
|
||||
.join(ztli.to_string())
|
||||
.join(wal_file_name.clone() + ".partial");
|
||||
|
||||
|
||||
@@ -8,11 +8,11 @@ use std::net::{TcpListener, TcpStream};
|
||||
use std::thread;
|
||||
|
||||
use crate::send_wal::SendWalHandler;
|
||||
use crate::WalAcceptorConf;
|
||||
use crate::SafeKeeperConf;
|
||||
use zenith_utils::postgres_backend::{AuthType, PostgresBackend};
|
||||
|
||||
/// Accept incoming TCP connections and spawn them into a background thread.
|
||||
pub fn thread_main(conf: WalAcceptorConf, listener: TcpListener) -> Result<()> {
|
||||
pub fn thread_main(conf: SafeKeeperConf, listener: TcpListener) -> Result<()> {
|
||||
loop {
|
||||
match listener.accept() {
|
||||
Ok((socket, peer_addr)) => {
|
||||
@@ -31,7 +31,7 @@ pub fn thread_main(conf: WalAcceptorConf, listener: TcpListener) -> Result<()> {
|
||||
|
||||
/// This is run by `thread_main` above, inside a background thread.
|
||||
///
|
||||
fn handle_socket(socket: TcpStream, conf: WalAcceptorConf) -> Result<()> {
|
||||
fn handle_socket(socket: TcpStream, conf: SafeKeeperConf) -> Result<()> {
|
||||
socket.set_nodelay(true)?;
|
||||
|
||||
let mut conn_handler = SendWalHandler::new(conf);
|
||||
|
||||
@@ -15,6 +15,7 @@ postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbf
|
||||
# FIXME: 'pageserver' is needed for BranchInfo. Refactor
|
||||
pageserver = { path = "../pageserver" }
|
||||
control_plane = { path = "../control_plane" }
|
||||
walkeeper = { path = "../walkeeper" }
|
||||
postgres_ffi = { path = "../postgres_ffi" }
|
||||
zenith_utils = { path = "../zenith_utils" }
|
||||
workspace_hack = { path = "../workspace_hack" }
|
||||
|
||||
@@ -1,19 +1,54 @@
|
||||
use anyhow::anyhow;
|
||||
use anyhow::{anyhow, bail};
|
||||
use anyhow::{Context, Result};
|
||||
use clap::{App, AppSettings, Arg, ArgMatches, SubCommand};
|
||||
use control_plane::compute::ComputeControlPlane;
|
||||
use control_plane::local_env;
|
||||
use control_plane::local_env::LocalEnv;
|
||||
use control_plane::safekeeper::SafekeeperNode;
|
||||
use control_plane::storage::PageServerNode;
|
||||
use pageserver::defaults::{DEFAULT_HTTP_LISTEN_PORT, DEFAULT_PG_LISTEN_PORT};
|
||||
use pageserver::defaults::{
|
||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
|
||||
};
|
||||
use std::collections::HashMap;
|
||||
use std::process::exit;
|
||||
use std::str::FromStr;
|
||||
use zenith_utils::auth::{encode_from_key_path, Claims, Scope};
|
||||
use walkeeper::defaults::{
|
||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
|
||||
};
|
||||
use zenith_utils::auth::{Claims, Scope};
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
use pageserver::branches::BranchInfo;
|
||||
|
||||
// Default name of a safekeeper node, if not specified on the command line.
|
||||
const DEFAULT_SAFEKEEPER_NAME: &str = "single";
|
||||
|
||||
fn default_conf() -> String {
|
||||
format!(
|
||||
r#"
|
||||
# Default built-in configuration, defined in main.rs
|
||||
[pageserver]
|
||||
pg_port = {pageserver_pg_port}
|
||||
http_port = {pageserver_http_port}
|
||||
auth_type = '{pageserver_auth_type}'
|
||||
|
||||
[[safekeepers]]
|
||||
name = '{safekeeper_name}'
|
||||
pg_port = {safekeeper_pg_port}
|
||||
http_port = {safekeeper_http_port}
|
||||
"#,
|
||||
pageserver_pg_port = DEFAULT_PAGESERVER_PG_PORT,
|
||||
pageserver_http_port = DEFAULT_PAGESERVER_HTTP_PORT,
|
||||
pageserver_auth_type = AuthType::Trust,
|
||||
safekeeper_name = DEFAULT_SAFEKEEPER_NAME,
|
||||
safekeeper_pg_port = DEFAULT_SAFEKEEPER_PG_PORT,
|
||||
safekeeper_http_port = DEFAULT_SAFEKEEPER_HTTP_PORT,
|
||||
)
|
||||
}
|
||||
|
||||
///
|
||||
/// Branches tree element used as a value in the HashMap.
|
||||
///
|
||||
@@ -32,12 +67,21 @@ struct BranchTreeEl {
|
||||
// * Providing CLI api to the pageserver
|
||||
// * TODO: export/import to/from usual postgres
|
||||
fn main() -> Result<()> {
|
||||
let timeline_arg = Arg::with_name("timeline")
|
||||
.short("n")
|
||||
let pg_node_arg = Arg::with_name("node")
|
||||
.index(1)
|
||||
.help("Timeline name")
|
||||
.help("Node name")
|
||||
.required(true);
|
||||
|
||||
let safekeeper_node_arg = Arg::with_name("node")
|
||||
.index(1)
|
||||
.help("Node name")
|
||||
.required(false);
|
||||
|
||||
let timeline_arg = Arg::with_name("timeline")
|
||||
.index(2)
|
||||
.help("Branch name or a point-in time specification")
|
||||
.required(false);
|
||||
|
||||
let tenantid_arg = Arg::with_name("tenantid")
|
||||
.long("tenantid")
|
||||
.help("Tenant id. Represented as a hexadecimal string 32 symbols length")
|
||||
@@ -49,29 +93,25 @@ fn main() -> Result<()> {
|
||||
.required(false)
|
||||
.value_name("port");
|
||||
|
||||
let stop_mode_arg = Arg::with_name("stop-mode")
|
||||
.short("m")
|
||||
.takes_value(true)
|
||||
.possible_values(&["fast", "immediate"])
|
||||
.help("If 'immediate', don't flush repository data at shutdown")
|
||||
.required(false)
|
||||
.value_name("stop-mode");
|
||||
|
||||
let matches = App::new("Zenith CLI")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.subcommand(
|
||||
SubCommand::with_name("init")
|
||||
.about("Initialize a new Zenith repository")
|
||||
.arg(
|
||||
Arg::with_name("pageserver-pg-port")
|
||||
.long("pageserver-pg-port")
|
||||
Arg::with_name("config")
|
||||
.long("config")
|
||||
.required(false)
|
||||
.value_name("pageserver-pg-port"),
|
||||
.value_name("config"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("pageserver-http-port")
|
||||
.long("pageserver-http-port")
|
||||
.required(false)
|
||||
.value_name("pageserver-http-port"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("enable-auth")
|
||||
.long("enable-auth")
|
||||
.takes_value(false)
|
||||
.help("Enable authentication using ZenithJWT")
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("branch")
|
||||
@@ -86,15 +126,35 @@ fn main() -> Result<()> {
|
||||
.subcommand(SubCommand::with_name("list"))
|
||||
.subcommand(SubCommand::with_name("create").arg(Arg::with_name("tenantid").required(false).index(1)))
|
||||
)
|
||||
.subcommand(SubCommand::with_name("status"))
|
||||
.subcommand(SubCommand::with_name("start").about("Start local pageserver"))
|
||||
.subcommand(SubCommand::with_name("stop").about("Stop local pageserver")
|
||||
.arg(Arg::with_name("immediate")
|
||||
.help("Don't flush repository data at shutdown")
|
||||
.required(false)
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("pageserver")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage pageserver")
|
||||
.subcommand(SubCommand::with_name("status"))
|
||||
.subcommand(SubCommand::with_name("start").about("Start local pageserver"))
|
||||
.subcommand(SubCommand::with_name("stop").about("Stop local pageserver")
|
||||
.arg(stop_mode_arg.clone()))
|
||||
.subcommand(SubCommand::with_name("restart").about("Restart local pageserver"))
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("safekeeper")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage safekeepers")
|
||||
.subcommand(SubCommand::with_name("start")
|
||||
.about("Start local safekeeper")
|
||||
.arg(safekeeper_node_arg.clone())
|
||||
)
|
||||
.subcommand(SubCommand::with_name("stop")
|
||||
.about("Stop local safekeeper")
|
||||
.arg(safekeeper_node_arg.clone())
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
.subcommand(SubCommand::with_name("restart")
|
||||
.about("Restart local safekeeper")
|
||||
.arg(safekeeper_node_arg.clone())
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
)
|
||||
.subcommand(SubCommand::with_name("restart").about("Restart local pageserver"))
|
||||
.subcommand(
|
||||
SubCommand::with_name("pg")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
@@ -102,7 +162,10 @@ fn main() -> Result<()> {
|
||||
.subcommand(SubCommand::with_name("list").arg(tenantid_arg.clone()))
|
||||
.subcommand(SubCommand::with_name("create")
|
||||
.about("Create a postgres compute node")
|
||||
.arg(timeline_arg.clone()).arg(tenantid_arg.clone()).arg(port_arg.clone())
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(timeline_arg.clone())
|
||||
.arg(tenantid_arg.clone())
|
||||
.arg(port_arg.clone())
|
||||
.arg(
|
||||
Arg::with_name("config-only")
|
||||
.help("Don't do basebackup, create compute node with only config files")
|
||||
@@ -111,13 +174,13 @@ fn main() -> Result<()> {
|
||||
))
|
||||
.subcommand(SubCommand::with_name("start")
|
||||
.about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
|
||||
.arg(
|
||||
timeline_arg.clone()
|
||||
).arg(
|
||||
tenantid_arg.clone()
|
||||
).arg(port_arg.clone()))
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(timeline_arg.clone())
|
||||
.arg(tenantid_arg.clone())
|
||||
.arg(port_arg.clone()))
|
||||
.subcommand(
|
||||
SubCommand::with_name("stop")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(timeline_arg.clone())
|
||||
.arg(tenantid_arg.clone())
|
||||
.arg(
|
||||
@@ -129,116 +192,49 @@ fn main() -> Result<()> {
|
||||
)
|
||||
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("start")
|
||||
.about("Start page server and safekeepers")
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("stop")
|
||||
.about("Stop page server and safekeepers")
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
// Create config file
|
||||
if let ("init", Some(init_match)) = matches.subcommand() {
|
||||
let tenantid = ZTenantId::generate();
|
||||
let pageserver_pg_port = match init_match.value_of("pageserver-pg-port") {
|
||||
Some(v) => v.parse()?,
|
||||
None => DEFAULT_PG_LISTEN_PORT,
|
||||
};
|
||||
let pageserver_http_port = match init_match.value_of("pageserver-http-port") {
|
||||
Some(v) => v.parse()?,
|
||||
None => DEFAULT_HTTP_LISTEN_PORT,
|
||||
let (sub_name, sub_args) = matches.subcommand();
|
||||
let sub_args = sub_args.expect("no subcommand");
|
||||
|
||||
// Check for 'zenith init' command first.
|
||||
let subcmd_result = if sub_name == "init" {
|
||||
handle_init(sub_args)
|
||||
} else {
|
||||
// all other commands need an existing config
|
||||
let env = match LocalEnv::load_config() {
|
||||
Ok(conf) => conf,
|
||||
Err(e) => {
|
||||
eprintln!("Error loading config: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let auth_type = if init_match.is_present("enable-auth") {
|
||||
AuthType::ZenithJWT
|
||||
} else {
|
||||
AuthType::Trust
|
||||
};
|
||||
|
||||
local_env::init(
|
||||
pageserver_pg_port,
|
||||
pageserver_http_port,
|
||||
tenantid,
|
||||
auth_type,
|
||||
)
|
||||
.with_context(|| "Failed to create config file")?;
|
||||
match sub_name {
|
||||
"tenant" => handle_tenant(sub_args, &env),
|
||||
"branch" => handle_branch(sub_args, &env),
|
||||
"start" => handle_start_all(sub_args, &env),
|
||||
"stop" => handle_stop_all(sub_args, &env),
|
||||
"pageserver" => handle_pageserver(sub_args, &env),
|
||||
"pg" => handle_pg(sub_args, &env),
|
||||
"safekeeper" => handle_safekeeper(sub_args, &env),
|
||||
_ => bail!("unexpected subcommand {}", sub_name),
|
||||
}
|
||||
};
|
||||
if let Err(e) = subcmd_result {
|
||||
eprintln!("command failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// all other commands would need config
|
||||
let env = match local_env::load_config() {
|
||||
Ok(conf) => conf,
|
||||
Err(e) => {
|
||||
eprintln!("Error loading config: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
match matches.subcommand() {
|
||||
("init", Some(init_match)) => {
|
||||
let pageserver = PageServerNode::from_env(&env);
|
||||
if let Err(e) = pageserver.init(
|
||||
Some(&env.tenantid.to_string()),
|
||||
init_match.is_present("enable-auth"),
|
||||
) {
|
||||
eprintln!("pageserver init failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
("tenant", Some(args)) => {
|
||||
if let Err(e) = handle_tenant(args, &env) {
|
||||
eprintln!("tenant command failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
("branch", Some(sub_args)) => {
|
||||
if let Err(e) = handle_branch(sub_args, &env) {
|
||||
eprintln!("branch command failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
("start", Some(_sub_m)) => {
|
||||
let pageserver = PageServerNode::from_env(&env);
|
||||
|
||||
if let Err(e) = pageserver.start() {
|
||||
eprintln!("pageserver start failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
("stop", Some(stop_match)) => {
|
||||
let pageserver = PageServerNode::from_env(&env);
|
||||
|
||||
let immediate = stop_match.is_present("immediate");
|
||||
|
||||
if let Err(e) = pageserver.stop(immediate) {
|
||||
eprintln!("pageserver stop failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
("restart", Some(_sub_m)) => {
|
||||
let pageserver = PageServerNode::from_env(&env);
|
||||
|
||||
//TODO what shutdown strategy should we use here?
|
||||
if let Err(e) = pageserver.stop(false) {
|
||||
eprintln!("pageserver stop failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if let Err(e) = pageserver.start() {
|
||||
eprintln!("pageserver start failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
("status", Some(_sub_m)) => {}
|
||||
|
||||
("pg", Some(pg_match)) => {
|
||||
if let Err(e) = handle_pg(pg_match, &env) {
|
||||
eprintln!("pg operation failed: {:?}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -373,12 +369,52 @@ fn get_branch_infos(
|
||||
Ok(branch_infos)
|
||||
}
|
||||
|
||||
// Helper function to parse --tenantid option, or get the default from config file
|
||||
fn get_tenantid(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<ZTenantId> {
|
||||
if let Some(tenantid_cmd) = sub_match.value_of("tenantid") {
|
||||
Ok(ZTenantId::from_str(tenantid_cmd)?)
|
||||
} else if let Some(tenantid_conf) = env.default_tenantid {
|
||||
Ok(tenantid_conf)
|
||||
} else {
|
||||
bail!("No tenantid. Use --tenantid, or set 'default_tenantid' in the config file");
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_init(init_match: &ArgMatches) -> Result<()> {
|
||||
// Create config file
|
||||
let toml_file: String = if let Some(config_path) = init_match.value_of("config") {
|
||||
// load and parse the file
|
||||
std::fs::read_to_string(std::path::Path::new(config_path))
|
||||
.with_context(|| format!("Could not read configuration file \"{}\"", config_path))?
|
||||
} else {
|
||||
// Built-in default config
|
||||
default_conf()
|
||||
};
|
||||
|
||||
let mut env = LocalEnv::create_config(&toml_file)
|
||||
.with_context(|| "Failed to create zenith configuration")?;
|
||||
env.init()
|
||||
.with_context(|| "Failed to initialize zenith repository")?;
|
||||
|
||||
// Call 'pageserver init'.
|
||||
let pageserver = PageServerNode::from_env(&env);
|
||||
if let Err(e) = pageserver.init(
|
||||
// default_tenantid was generated by the `env.init()` call above
|
||||
Some(&env.default_tenantid.unwrap().to_string()),
|
||||
) {
|
||||
eprintln!("pageserver init failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_tenant(tenant_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
match tenant_match.subcommand() {
|
||||
("list", Some(_)) => {
|
||||
for tenant in pageserver.tenant_list()? {
|
||||
println!("{}", tenant);
|
||||
for t in pageserver.tenant_list()? {
|
||||
println!("{} {}", t.id, t.state);
|
||||
}
|
||||
}
|
||||
("create", Some(create_match)) => {
|
||||
@@ -390,7 +426,10 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &local_env::LocalEnv) -> Result
|
||||
pageserver.tenant_create(tenantid)?;
|
||||
println!("tenant successfully created on the pageserver");
|
||||
}
|
||||
_ => {}
|
||||
|
||||
(sub_name, _) => {
|
||||
bail!("Unexpected tenant subcommand '{}'", sub_name)
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -398,22 +437,18 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &local_env::LocalEnv) -> Result
|
||||
fn handle_branch(branch_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
|
||||
let tenantid = get_tenantid(branch_match, env)?;
|
||||
|
||||
if let Some(branchname) = branch_match.value_of("branchname") {
|
||||
let startpoint_str = branch_match
|
||||
.value_of("start-point")
|
||||
.ok_or_else(|| anyhow!("Missing start-point"))?;
|
||||
let tenantid: ZTenantId = branch_match
|
||||
.value_of("tenantid")
|
||||
.map_or(Ok(env.tenantid), |value| value.parse())?;
|
||||
let branch = pageserver.branch_create(branchname, startpoint_str, &tenantid)?;
|
||||
println!(
|
||||
"Created branch '{}' at {:?} for tenant: {}",
|
||||
branch.name, branch.latest_valid_lsn, tenantid,
|
||||
);
|
||||
} else {
|
||||
let tenantid: ZTenantId = branch_match
|
||||
.value_of("tenantid")
|
||||
.map_or(Ok(env.tenantid), |value| value.parse())?;
|
||||
// No arguments, list branches for tenant
|
||||
let branches = pageserver.branch_list(&tenantid)?;
|
||||
print_branches_tree(branches)?;
|
||||
@@ -423,74 +458,79 @@ fn handle_branch(branch_match: &ArgMatches, env: &local_env::LocalEnv) -> Result
|
||||
}
|
||||
|
||||
fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let (sub_name, sub_args) = pg_match.subcommand();
|
||||
let sub_args = sub_args.expect("no pg subcommand");
|
||||
|
||||
let mut cplane = ComputeControlPlane::load(env.clone())?;
|
||||
|
||||
match pg_match.subcommand() {
|
||||
("list", Some(list_match)) => {
|
||||
let tenantid: ZTenantId = list_match
|
||||
.value_of("tenantid")
|
||||
.map_or(Ok(env.tenantid), |value| value.parse())?;
|
||||
// All subcommands take an optional --tenantid option
|
||||
let tenantid = get_tenantid(sub_args, env)?;
|
||||
|
||||
match sub_name {
|
||||
"list" => {
|
||||
let branch_infos = get_branch_infos(env, &tenantid).unwrap_or_else(|e| {
|
||||
eprintln!("Failed to load branch info: {}", e);
|
||||
HashMap::new()
|
||||
});
|
||||
|
||||
println!("BRANCH\tADDRESS\t\tLSN\t\tSTATUS");
|
||||
for ((_, timeline_name), node) in cplane
|
||||
println!("NODE\tADDRESS\t\tBRANCH\tLSN\t\tSTATUS");
|
||||
for ((_, node_name), node) in cplane
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|((node_tenantid, _), _)| node_tenantid == &tenantid)
|
||||
{
|
||||
// FIXME: This shows the LSN at the end of the timeline. It's not the
|
||||
// right thing to do for read-only nodes that might be anchored at an
|
||||
// older point in time, or following but lagging behind the primary.
|
||||
let lsn_str = branch_infos
|
||||
.get(&node.timelineid)
|
||||
.map(|bi| bi.latest_valid_lsn.to_string())
|
||||
.unwrap_or_else(|| "?".to_string());
|
||||
|
||||
println!(
|
||||
"{}\t{}\t{}\t{}",
|
||||
timeline_name,
|
||||
"{}\t{}\t{}\t{}\t{}",
|
||||
node_name,
|
||||
node.address,
|
||||
branch_infos
|
||||
.get(&node.timelineid)
|
||||
.map(|bi| bi.latest_valid_lsn.to_string())
|
||||
.unwrap_or_else(|| "?".to_string()),
|
||||
node.timelineid, // FIXME: resolve human-friendly branch name
|
||||
lsn_str,
|
||||
node.status(),
|
||||
);
|
||||
}
|
||||
}
|
||||
("create", Some(create_match)) => {
|
||||
let tenantid: ZTenantId = create_match
|
||||
.value_of("tenantid")
|
||||
.map_or(Ok(env.tenantid), |value| value.parse())?;
|
||||
let timeline_name = create_match.value_of("timeline").unwrap_or("main");
|
||||
"create" => {
|
||||
let node_name = sub_args.value_of("node").unwrap_or("main");
|
||||
let timeline_name = sub_args.value_of("timeline").unwrap_or(node_name);
|
||||
|
||||
let port: Option<u16> = match create_match.value_of("port") {
|
||||
let port: Option<u16> = match sub_args.value_of("port") {
|
||||
Some(p) => Some(p.parse()?),
|
||||
None => None,
|
||||
};
|
||||
cplane.new_node(tenantid, timeline_name, port)?;
|
||||
cplane.new_node(tenantid, node_name, timeline_name, port)?;
|
||||
}
|
||||
("start", Some(start_match)) => {
|
||||
let tenantid: ZTenantId = start_match
|
||||
.value_of("tenantid")
|
||||
.map_or(Ok(env.tenantid), |value| value.parse())?;
|
||||
let timeline_name = start_match.value_of("timeline").unwrap_or("main");
|
||||
"start" => {
|
||||
let node_name = sub_args.value_of("node").unwrap_or("main");
|
||||
let timeline_name = sub_args.value_of("timeline");
|
||||
|
||||
let port: Option<u16> = match start_match.value_of("port") {
|
||||
let port: Option<u16> = match sub_args.value_of("port") {
|
||||
Some(p) => Some(p.parse()?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let node = cplane.nodes.get(&(tenantid, timeline_name.to_owned()));
|
||||
let node = cplane.nodes.get(&(tenantid, node_name.to_owned()));
|
||||
|
||||
let auth_token = if matches!(env.auth_type, AuthType::ZenithJWT) {
|
||||
let auth_token = if matches!(env.pageserver.auth_type, AuthType::ZenithJWT) {
|
||||
let claims = Claims::new(Some(tenantid), Scope::Tenant);
|
||||
Some(encode_from_key_path(&claims, &env.private_key_path)?)
|
||||
|
||||
Some(env.generate_auth_token(&claims)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
println!(
|
||||
"Starting {} postgres on timeline {}...",
|
||||
if node.is_some() { "existing" } else { "new" },
|
||||
timeline_name
|
||||
);
|
||||
if let Some(node) = node {
|
||||
if timeline_name.is_some() {
|
||||
println!("timeline name ignored because node exists already");
|
||||
}
|
||||
println!("Starting existing postgres {}...", node_name);
|
||||
node.start(&auth_token)?;
|
||||
} else {
|
||||
// when used with custom port this results in non obvious behaviour
|
||||
@@ -498,26 +538,170 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
// start --port X
|
||||
// stop
|
||||
// start <-- will also use port X even without explicit port argument
|
||||
let node = cplane.new_node(tenantid, timeline_name, port)?;
|
||||
let timeline_name = timeline_name.unwrap_or(node_name);
|
||||
println!(
|
||||
"Starting new postgres {} on {}...",
|
||||
node_name, timeline_name
|
||||
);
|
||||
let node = cplane.new_node(tenantid, node_name, timeline_name, port)?;
|
||||
node.start(&auth_token)?;
|
||||
}
|
||||
}
|
||||
("stop", Some(stop_match)) => {
|
||||
let timeline_name = stop_match.value_of("timeline").unwrap_or("main");
|
||||
let destroy = stop_match.is_present("destroy");
|
||||
let tenantid: ZTenantId = stop_match
|
||||
.value_of("tenantid")
|
||||
.map_or(Ok(env.tenantid), |value| value.parse())?;
|
||||
"stop" => {
|
||||
let node_name = sub_args.value_of("node").unwrap_or("main");
|
||||
let destroy = sub_args.is_present("destroy");
|
||||
|
||||
let node = cplane
|
||||
.nodes
|
||||
.get(&(tenantid, timeline_name.to_owned()))
|
||||
.ok_or_else(|| anyhow!("postgres {} is not found", timeline_name))?;
|
||||
.get(&(tenantid, node_name.to_owned()))
|
||||
.ok_or_else(|| anyhow!("postgres {} is not found", node_name))?;
|
||||
node.stop(destroy)?;
|
||||
}
|
||||
|
||||
_ => {}
|
||||
_ => {
|
||||
bail!("Unexpected pg subcommand '{}'", sub_name)
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
|
||||
match sub_match.subcommand() {
|
||||
("start", Some(_sub_m)) => {
|
||||
if let Err(e) = pageserver.start() {
|
||||
eprintln!("pageserver start failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
("stop", Some(stop_match)) => {
|
||||
let immediate = stop_match.value_of("stop-mode") == Some("immediate");
|
||||
|
||||
if let Err(e) = pageserver.stop(immediate) {
|
||||
eprintln!("pageserver stop failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
("restart", Some(_sub_m)) => {
|
||||
//TODO what shutdown strategy should we use here?
|
||||
if let Err(e) = pageserver.stop(false) {
|
||||
eprintln!("pageserver stop failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if let Err(e) = pageserver.start() {
|
||||
eprintln!("pageserver start failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
(sub_name, _) => {
|
||||
bail!("Unexpected pageserver subcommand '{}'", sub_name)
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_safekeeper(env: &local_env::LocalEnv, name: &str) -> Result<SafekeeperNode> {
|
||||
if let Some(node) = env.safekeepers.iter().find(|node| node.name == name) {
|
||||
Ok(SafekeeperNode::from_env(env, node))
|
||||
} else {
|
||||
bail!("could not find safekeeper '{}'", name)
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let (sub_name, sub_args) = sub_match.subcommand();
|
||||
let sub_args = sub_args.expect("no safekeeper subcommand");
|
||||
|
||||
// All the commands take an optional safekeeper name argument
|
||||
let node_name = sub_args.value_of("node").unwrap_or(DEFAULT_SAFEKEEPER_NAME);
|
||||
let safekeeper = get_safekeeper(env, node_name)?;
|
||||
|
||||
match sub_name {
|
||||
"start" => {
|
||||
if let Err(e) = safekeeper.start() {
|
||||
eprintln!("safekeeper start failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
"stop" => {
|
||||
let immediate = sub_args.value_of("stop-mode") == Some("immediate");
|
||||
|
||||
if let Err(e) = safekeeper.stop(immediate) {
|
||||
eprintln!("safekeeper stop failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
"restart" => {
|
||||
let immediate = sub_args.value_of("stop-mode") == Some("immediate");
|
||||
|
||||
if let Err(e) = safekeeper.stop(immediate) {
|
||||
eprintln!("safekeeper stop failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if let Err(e) = safekeeper.start() {
|
||||
eprintln!("safekeeper start failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
_ => {
|
||||
bail!("Unexpected safekeeper subcommand '{}'", sub_name)
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_start_all(_sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
|
||||
// Postgres nodes are not started automatically
|
||||
|
||||
if let Err(e) = pageserver.start() {
|
||||
eprintln!("pageserver start failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for node in env.safekeepers.iter() {
|
||||
let safekeeper = SafekeeperNode::from_env(env, node);
|
||||
if let Err(e) = safekeeper.start() {
|
||||
eprintln!("safekeeper '{}' start failed: {}", safekeeper.name, e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let immediate = sub_match.value_of("stop-mode") == Some("immediate");
|
||||
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
|
||||
// Stop all compute nodes
|
||||
let cplane = ComputeControlPlane::load(env.clone())?;
|
||||
for (_k, node) in cplane.nodes {
|
||||
if let Err(e) = node.stop(false) {
|
||||
eprintln!("postgres stop failed: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
if let Err(e) = pageserver.stop(immediate) {
|
||||
eprintln!("pageserver stop failed: {}", e);
|
||||
}
|
||||
|
||||
for node in env.safekeepers.iter() {
|
||||
let safekeeper = SafekeeperNode::from_env(env, node);
|
||||
if let Err(e) = safekeeper.stop(immediate) {
|
||||
eprintln!("safekeeper '{}' stop failed: {}", safekeeper.name, e);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -74,6 +74,10 @@ lazy_static! {
|
||||
.expect("Failed to register maxrss_kb int gauge");
|
||||
}
|
||||
|
||||
pub const DISK_WRITE_SECONDS_BUCKETS: &[f64] = &[
|
||||
0.000_050, 0.000_100, 0.000_500, 0.001, 0.003, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5,
|
||||
];
|
||||
|
||||
// Records I/O stats in a "cross-platform" way.
|
||||
// Compiles both on macOS and Linux, but current macOS implementation always returns 0 as values for I/O stats.
|
||||
// An alternative is to read procfs (`/proc/[pid]/io`) which does not work under macOS at all, hence abandoned.
|
||||
|
||||
@@ -11,7 +11,6 @@ byteorder = "1.4.3"
|
||||
bytes = "1.0.1"
|
||||
hyper = { version = "0.14.7", features = ["full"] }
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.14"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
routerify = "2"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
@@ -19,8 +18,8 @@ serde_json = "1"
|
||||
thiserror = "1.0"
|
||||
tokio = "1.11"
|
||||
tracing = "0.1"
|
||||
tracing-log = "0.1"
|
||||
tracing-subscriber = "0.2"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
nix = "0.23.0"
|
||||
|
||||
zenith_metrics = { path = "../zenith_metrics" }
|
||||
workspace_hack = { path = "../workspace_hack" }
|
||||
|
||||
@@ -104,8 +104,8 @@ impl JwtAuth {
|
||||
}
|
||||
|
||||
pub fn from_key_path(key_path: &Path) -> Result<Self> {
|
||||
let public_key = fs::read_to_string(key_path)?;
|
||||
Ok(Self::new(DecodingKey::from_rsa_pem(public_key.as_bytes())?))
|
||||
let public_key = fs::read(key_path)?;
|
||||
Ok(Self::new(DecodingKey::from_rsa_pem(&public_key)?))
|
||||
}
|
||||
|
||||
pub fn decode(&self, token: &str) -> Result<TokenData<Claims>> {
|
||||
@@ -114,8 +114,7 @@ impl JwtAuth {
|
||||
}
|
||||
|
||||
// this function is used only for testing purposes in CLI e g generate tokens during init
|
||||
pub fn encode_from_key_path(claims: &Claims, key_path: &Path) -> Result<String> {
|
||||
let key_data = fs::read_to_string(key_path)?;
|
||||
let key = EncodingKey::from_rsa_pem(key_data.as_bytes())?;
|
||||
pub fn encode_from_key_file(claims: &Claims, key_data: &[u8]) -> Result<String> {
|
||||
let key = EncodingKey::from_rsa_pem(key_data)?;
|
||||
Ok(encode(&Header::new(JWT_ALGORITHM), claims, &key)?)
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user