Compare commits

..

37 Commits

Author SHA1 Message Date
Bojan Serafimov
13cddbb10d WIP 2022-06-15 18:19:21 -04:00
Anastasia Lubennikova
d11c9f9fcb Use random ports for the proxy and local pg in tests
Fixes #1931
Author: Dmitry Ivanov
2022-06-15 20:21:58 +03:00
Kirill Bulatov
d8a37452c8 Rename ZenithFeedback (#1912) 2022-06-11 00:44:05 +03:00
chaitanya sharma
e1336f451d renamed .zenith data-dir to .neon. 2022-06-09 18:19:18 +02:00
Arseny Sher
a4d8261390 Save Postgres log in test_find_end_of_wal_* tests. 2022-06-09 19:16:43 +04:00
Egor Suvorov
e2a5a31595 Safekeeper HTTP router: add comment about /v1/timeline 2022-06-09 17:14:46 +02:00
Egor Suvorov
0ac0fba77a test_runner: test Safekeeper HTTP API Auth
All endpoints except for POST /v1/timeline are tested, this one is not tested in any way yet.
Three attempts for each endpoint: correctly authenticated, badly authenticated, unauthenticated.
2022-06-09 17:14:46 +02:00
Egor Suvorov
a001052cdd test_runner: SafekeeperHttpClient: support auth 2022-06-09 17:14:46 +02:00
Egor Suvorov
1f1d852204 ZenithEnvBuilder: rename pageserver_auth_enabled --> auth_enabled 2022-06-09 17:14:46 +02:00
Egor Suvorov
f7b878611a Implement JWT authentication in Safekeeper HTTP API (#1753)
* `control_plane` crate (used by `neon_local`) now parses an `auth_enabled` bool for each Safekeeper
* If auth is enabled, a Safekeeper is passed a path to a public key via a new command line argument
* Added TODO comments to other places needing auth
2022-06-09 17:14:46 +02:00
Arseny Sher
a51b2dac9a Don't s3 offload from newly joined safekeeper not having required WAL.
I made the check at launcher level with the perspective of generally moving
election (decision who offloads) there.

Also log timeline 'active' changes.
2022-06-09 18:30:16 +04:00
Thang Pham
e22d9cee3a fix ZeroDivisionError in scripts/generate_perf_report_page (#1906)
Fixes the `ZeroDivisionError` error by adding `EPS=1e-6` when doing the calculation.
2022-06-08 09:15:12 -04:00
Arthur Petukhovsky
a01999bc4a Replace most common remote logs with metrics (#1909) 2022-06-08 13:36:49 +03:00
chaitanya sharma
32e64afd54 Use better parallel build instructions in readme.md (#1908) 2022-06-08 11:25:37 +03:00
Kirill Bulatov
8a53472e4f Force etcd broker keys to not to intersect 2022-06-08 11:21:05 +03:00
Dmitry Rodionov
6e26588d17 Allow to customize shutdown condition in PostgresBackend
Use it in PageServerHandler to check per thread shutdown condition
from thread_mgr which takes into account tenants and timelines
2022-06-07 22:11:54 +03:00
Arseny Sher
0b93253b3c Fix leaked keepalive task in s3 offloading leader election.
I still don't like the surroundings and feel we'd better get away without using
election API at all, but this is a quick fix to keep CI green.

ref #1815
2022-06-07 15:17:57 +04:00
Dmitry Rodionov
7dc6beacbd make it possible to associate thread with a tenant after thread start 2022-06-07 12:59:35 +03:00
Thang Pham
6cfebc096f Add read/write throughput performance tests (#1883)
Part of #1467 

This PR adds several performance tests that compare the [PG statistics](https://www.postgresql.org/docs/current/monitoring-stats.html) obtained when running PG benchmarks against Neon and vanilla PG to measure the read/write throughput of the DB.
2022-06-06 12:32:10 -04:00
KlimentSerafimov
fecad1ca34 Resolving issue #1745. Added cluster option for SNI data (#1813)
* Added project option in case SNI data is missing. Resolving issue #1745.

* Added invariant checking for project name: if both sni_data and project_name are available then they should match.
2022-06-06 08:14:41 -04:00
bojanserafimov
92de8423af Remove dead code (#1886) 2022-06-05 09:18:11 -04:00
Dmitry Rodionov
e442f5357b unify two identical failpoints in flush_frozen_layer
probably is a merge artfact
2022-06-03 19:36:09 +03:00
Arseny Sher
5a723d44cd Parametrize test_normal_work.
I like to run small test locally, but let's avoid duplication.
2022-06-03 20:32:53 +04:00
Kirill Bulatov
2623193876 Remove pageserver_connstr from WAL stream logic 2022-06-03 17:30:36 +03:00
Arseny Sher
70a53c4b03 Get backup test_safekeeper_normal_work, but skip by default.
It is handy for development.
2022-06-03 16:12:14 +04:00
Arseny Sher
9e108102b3 Silence etcd safekeeper info key parse errors.
When we subscribe to everything, it is ok to receive not only safekeeper
timeline updates.
2022-06-03 16:12:14 +04:00
huming
9c846a93e8 chore(doc) 2022-06-03 14:24:27 +03:00
Kirill Bulatov
c5007d3916 Remove unused module 2022-06-03 00:23:13 +03:00
Kirill Bulatov
5b06599770 Simplify etcd key regex parsing 2022-06-03 00:23:13 +03:00
Kirill Bulatov
1d16ee92d4 Fix the Lsn difference reconnection 2022-06-03 00:23:13 +03:00
Kirill Bulatov
7933804284 Fix and test regex parsing 2022-06-03 00:23:13 +03:00
Kirill Bulatov
a91e0c299d Reproduce etcd parsing bug in Python tests 2022-06-03 00:23:13 +03:00
Kirill Bulatov
b0c4ec0594 Log storage sync and etcd events a bit better 2022-06-03 00:23:13 +03:00
bojanserafimov
90e2c9ee1f Rename zenith to neon in python tests (#1871) 2022-06-02 16:21:28 -04:00
Egor Suvorov
aba5e5f8b5 GitHub Actions: pin Rust version to 1.58 like on CircleCI
* Fix failing `cargo clippy` while we're here.
  The behavior has been changed in Rust 1.60: https://github.com/rust-lang/rust-clippy/issues/8928
* Add Rust version to the Cargo deps cache key
2022-06-02 17:45:53 +02:00
Dmitry Rodionov
b155fe0e2f avoid perf test result context for pg regress 2022-06-02 17:41:34 +03:00
Ryan Russell
c71faae2c6 Docs readability cont
Signed-off-by: Ryan Russell <git@ryanrussell.org>
2022-06-02 15:05:12 +02:00
128 changed files with 2098 additions and 2244 deletions

View File

@@ -57,7 +57,7 @@
args:
creates: "/storage/pageserver/data/tenants"
environment:
ZENITH_REPO_DIR: "/storage/pageserver/data"
NEON_REPO_DIR: "/storage/pageserver/data"
LD_LIBRARY_PATH: "/usr/local/lib"
become: true
tags:
@@ -131,7 +131,7 @@
args:
creates: "/storage/safekeeper/data/safekeeper.id"
environment:
ZENITH_REPO_DIR: "/storage/safekeeper/data"
NEON_REPO_DIR: "/storage/safekeeper/data"
LD_LIBRARY_PATH: "/usr/local/lib"
become: true
tags:

View File

@@ -5,7 +5,7 @@ After=network.target auditd.service
[Service]
Type=simple
User=pageserver
Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed

View File

@@ -5,7 +5,7 @@ After=network.target auditd.service
[Service]
Type=simple
User=safekeeper
Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="wal"}'
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed

View File

@@ -750,7 +750,6 @@ workflows:
- build-postgres-<< matrix.build_type >>
- run-pytest:
name: pg_regress-tests-<< matrix.build_type >>
context: PERF_TEST_RESULT_CONNSTR
matrix:
parameters:
build_type: ["debug", "release"]

View File

@@ -9,8 +9,8 @@ tmp_install
tmp_check_cli
test_output
.vscode
.zenith
integration_tests/.zenith
.neon
integration_tests/.neon
.mypy_cache
Dockerfile

View File

@@ -12,7 +12,7 @@ jobs:
matrix:
# If we want to duplicate this job for different
# Rust toolchains (e.g. nightly or 1.37.0), add them here.
rust_toolchain: [stable]
rust_toolchain: [1.58]
os: [ubuntu-latest, macos-latest]
timeout-minutes: 30
name: run regression test suite
@@ -87,7 +87,7 @@ jobs:
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}
key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
- name: Run cargo clippy
run: ./run_clippy.sh

5
.gitignore vendored
View File

@@ -5,8 +5,9 @@
__pycache__/
test_output/
.vscode
/.zenith
/integration_tests/.zenith
.idea
/.neon
/integration_tests/.neon
# Coverage
*.profraw

View File

@@ -6,5 +6,5 @@ target/
tmp_install/
__pycache__/
test_output/
.zenith/
.neon/
.git/

1
Cargo.lock generated
View File

@@ -811,6 +811,7 @@ name = "etcd_broker"
version = "0.1.0"
dependencies = [
"etcd-client",
"once_cell",
"regex",
"serde",
"serde_json",

View File

@@ -80,7 +80,7 @@ brew link --force libpq
```sh
git clone --recursive https://github.com/neondatabase/neon.git
cd neon
make -j5
make -j`nproc`
```
#### dependency installation notes
@@ -93,7 +93,7 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
#### running neon database
1. Start pageserver and postgres on top of it (should be called from repo root):
```sh
# Create repository in .zenith with proper paths to binaries and data
# Create repository in .neon with proper paths to binaries and data
# Later that would be responsibility of a package install script
> ./target/debug/neon_local init
initializing tenantid 9ef87a5bf0d92544f6fafeeb3239695c
@@ -103,16 +103,16 @@ pageserver init succeeded
# start pageserver and safekeeper
> ./target/debug/neon_local start
Starting pageserver at '127.0.0.1:64000' in '.zenith'
Starting pageserver at '127.0.0.1:64000' in '.neon'
Pageserver started
initializing for sk 1 for 7676
Starting safekeeper at '127.0.0.1:5454' in '.zenith/safekeepers/sk1'
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
Safekeeper started
# start postgres compute node
> ./target/debug/neon_local pg start main
Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
# check list of running postgres instances
@@ -149,7 +149,7 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
# start postgres on that branch
> ./target/debug/neon_local pg start migration_check --branch-name migration_check
Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
# check the new list of running postgres instances

View File

@@ -21,9 +21,9 @@ use utils::{
use crate::safekeeper::SafekeeperNode;
//
// This data structures represents zenith CLI config
// This data structures represents neon_local CLI config
//
// It is deserialized from the .zenith/config file, or the config file passed
// It is deserialized from the .neon/config file, or the config file passed
// to 'zenith init --config=<path>' option. See control_plane/simple.conf for
// an example.
//
@@ -34,8 +34,8 @@ pub struct LocalEnv {
// compute nodes).
//
// This is not stored in the config file. Rather, this is the path where the
// config file itself is. It is read from the ZENITH_REPO_DIR env variable or
// '.zenith' if not given.
// config file itself is. It is read from the NEON_REPO_DIR env variable or
// '.neon' if not given.
#[serde(skip)]
pub base_data_dir: PathBuf,
@@ -177,6 +177,7 @@ pub struct SafekeeperConf {
pub sync: bool,
pub remote_storage: Option<String>,
pub backup_threads: Option<u32>,
pub auth_enabled: bool,
}
impl Default for SafekeeperConf {
@@ -188,6 +189,7 @@ impl Default for SafekeeperConf {
sync: true,
remote_storage: None,
backup_threads: None,
auth_enabled: false,
}
}
}
@@ -337,7 +339,7 @@ impl LocalEnv {
pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
// Currently, the user first passes a config file with 'zenith init --config=<path>'
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
// to .zenith/config. TODO: We lose any formatting and comments along the way, which is
// to .neon/config. TODO: We lose any formatting and comments along the way, which is
// a bit sad.
let mut conf_content = r#"# This file describes a locale deployment of the page server
# and safekeeeper node. It is read by the 'zenith' command-line
@@ -481,9 +483,9 @@ impl LocalEnv {
}
fn base_path() -> PathBuf {
match std::env::var_os("ZENITH_REPO_DIR") {
match std::env::var_os("NEON_REPO_DIR") {
Some(val) => PathBuf::from(val),
None => PathBuf::from(".zenith"),
None => PathBuf::from(".neon"),
}
}

View File

@@ -149,6 +149,11 @@ impl SafekeeperNode {
if let Some(ref remote_storage) = self.conf.remote_storage {
cmd.args(&["--remote-storage", remote_storage]);
}
if self.conf.auth_enabled {
cmd.arg("--auth-validation-public-key-path");
// PathBuf is better be passed as is, not via `String`.
cmd.arg(self.env.base_data_dir.join("auth_public_key.pem"));
}
fill_aws_secrets_vars(&mut cmd);

View File

@@ -188,7 +188,7 @@ Not currently committed but proposed:
3. Prefetching
- Why?
As far as pages in Zenith are loaded on demand, to reduce node startup time
and also sppedup some massive queries we need some mechanism for bulk loading to
and also speedup some massive queries we need some mechanism for bulk loading to
reduce page request round-trip overhead.
Currently Postgres is supporting prefetching only for bitmap scan.

View File

@@ -77,7 +77,7 @@ Upon storage node restart recent WAL files are applied to appropriate pages and
### **Checkpointing**
No such mechanism is needed. Or we may look at the storage node as at kind of continuous chekpointer.
No such mechanism is needed. Or we may look at the storage node as at kind of continuous checkpointer.
### **Full page writes (torn page protection)**

View File

@@ -36,12 +36,12 @@ This is how the `LOGICAL_TIMELINE_SIZE` metric is implemented in the pageserver.
Alternatively, we could count only relation data. As in pg_database_size().
This approach is somewhat more user-friendly because it is the data that is really affected by the user.
On the other hand, it puts us in a weaker position than other services, i.e., RDS.
We will need to refactor the timeline_size counter or add another counter to implement it.
We will need to refactor the timeline_size counter or add another counter to implement it.
Timeline size is updated during wal digestion. It is not versioned and is valid at the last_received_lsn moment.
Then this size should be reported to compute node.
`current_timeline_size` value is included in the walreceiver's custom feedback message: `ZenithFeedback.`
`current_timeline_size` value is included in the walreceiver's custom feedback message: `ReplicationFeedback.`
(PR about protocol changes https://github.com/zenithdb/zenith/pull/1037).
@@ -64,11 +64,11 @@ We should warn users if the limit is soon to be reached.
### **Reliability, failure modes and corner cases**
1. `current_timeline_size` is valid at the last received and digested by pageserver lsn.
If pageserver lags behind compute node, `current_timeline_size` will lag too. This lag can be tuned using backpressure, but it is not expected to be 0 all the time.
So transactions that happen in this lsn range may cause limit overflow. Especially operations that generate (i.e., CREATE DATABASE) or free (i.e., TRUNCATE) a lot of data pages while generating a small amount of WAL. Are there other operations like this?
Currently, CREATE DATABASE operations are restricted in the console. So this is not an issue.

View File

@@ -154,7 +154,7 @@ The default distrib dir is `./tmp_install/`.
#### workdir (-D)
A directory in the file system, where pageserver will store its files.
The default is `./.zenith/`.
The default is `./.neon/`.
This parameter has a special CLI alias (`-D`) and can not be overridden with regular `-c` way.

View File

@@ -9,6 +9,7 @@
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
serde_with = "1.12.0"
once_cell = "1.8.0"
utils = { path = "../utils" }
workspace_hack = { version = "0.1", path = "../../workspace_hack" }

View File

@@ -1,90 +1,43 @@
//! A set of primitives to access a shared data/updates, propagated via etcd broker (not persistent).
//! Intended to connect services to each other, not to store their data.
/// All broker keys, that are used when dealing with etcd.
pub mod subscription_key;
/// All broker values, possible to use when dealing with etcd.
pub mod subscription_value;
use std::{
collections::{hash_map, HashMap},
fmt::Display,
str::FromStr,
};
use regex::{Captures, Regex};
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
pub use etcd_client::*;
use serde::de::DeserializeOwned;
use subscription_key::SubscriptionKey;
use tokio::{sync::mpsc, task::JoinHandle};
use tracing::*;
use utils::{
lsn::Lsn,
zid::{NodeId, ZTenantId, ZTenantTimelineId},
};
use utils::zid::{NodeId, ZTenantTimelineId};
use crate::subscription_key::SubscriptionFullKey;
pub use etcd_client::*;
/// Default value to use for prefixing to all etcd keys with.
/// This way allows isolating safekeeper/pageserver groups in the same etcd cluster.
pub const DEFAULT_NEON_BROKER_ETCD_PREFIX: &str = "neon";
#[derive(Debug, Deserialize, Serialize)]
struct SafekeeperTimeline {
safekeeper_id: NodeId,
info: SkTimelineInfo,
}
/// Published data about safekeeper's timeline. Fields made optional for easy migrations.
#[serde_as]
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct SkTimelineInfo {
/// Term of the last entry.
pub last_log_term: Option<u64>,
/// LSN of the last record.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub flush_lsn: Option<Lsn>,
/// Up to which LSN safekeeper regards its WAL as committed.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub commit_lsn: Option<Lsn>,
/// LSN up to which safekeeper has backed WAL.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub backup_lsn: Option<Lsn>,
/// LSN of last checkpoint uploaded by pageserver.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub remote_consistent_lsn: Option<Lsn>,
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub peer_horizon_lsn: Option<Lsn>,
#[serde(default)]
pub safekeeper_connstr: Option<String>,
#[serde(default)]
pub pageserver_connstr: Option<String>,
}
#[derive(Debug, thiserror::Error)]
pub enum BrokerError {
#[error("Etcd client error: {0}. Context: {1}")]
EtcdClient(etcd_client::Error, String),
#[error("Error during parsing etcd data: {0}")]
ParsingError(String),
#[error("Internal error: {0}")]
InternalError(String),
}
/// A way to control the data retrieval from a certain subscription.
pub struct SkTimelineSubscription {
safekeeper_timeline_updates:
mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>>,
kind: SkTimelineSubscriptionKind,
pub struct BrokerSubscription<V> {
value_updates: mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<NodeId, V>>>,
key: SubscriptionKey,
watcher_handle: JoinHandle<Result<(), BrokerError>>,
watcher: Watcher,
}
impl SkTimelineSubscription {
impl<V> BrokerSubscription<V> {
/// Asynchronously polls for more data from the subscription, suspending the current future if there's no data sent yet.
pub async fn fetch_data(
&mut self,
) -> Option<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>> {
self.safekeeper_timeline_updates.recv().await
pub async fn fetch_data(&mut self) -> Option<HashMap<ZTenantTimelineId, HashMap<NodeId, V>>> {
self.value_updates.recv().await
}
/// Cancels the subscription, stopping the data poller and waiting for it to shut down.
@@ -92,142 +45,90 @@ impl SkTimelineSubscription {
self.watcher.cancel().await.map_err(|e| {
BrokerError::EtcdClient(
e,
format!(
"Failed to cancel timeline subscription, kind: {:?}",
self.kind
),
format!("Failed to cancel broker subscription, kind: {:?}", self.key),
)
})?;
self.watcher_handle.await.map_err(|e| {
BrokerError::InternalError(format!(
"Failed to join the timeline updates task, kind: {:?}, error: {e}",
self.kind
"Failed to join the broker value updates task, kind: {:?}, error: {e}",
self.key
))
})?
}
}
/// The subscription kind to the timeline updates from safekeeper.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct SkTimelineSubscriptionKind {
broker_etcd_prefix: String,
kind: SubscriptionKind,
}
impl SkTimelineSubscriptionKind {
pub fn all(broker_etcd_prefix: String) -> Self {
Self {
broker_etcd_prefix,
kind: SubscriptionKind::All,
}
}
pub fn tenant(broker_etcd_prefix: String, tenant: ZTenantId) -> Self {
Self {
broker_etcd_prefix,
kind: SubscriptionKind::Tenant(tenant),
}
}
pub fn timeline(broker_etcd_prefix: String, timeline: ZTenantTimelineId) -> Self {
Self {
broker_etcd_prefix,
kind: SubscriptionKind::Timeline(timeline),
}
}
fn watch_regex(&self) -> Regex {
match self.kind {
SubscriptionKind::All => Regex::new(&format!(
r"^{}/([[:xdigit:]]+)/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
self.broker_etcd_prefix
))
.expect("wrong regex for 'everything' subscription"),
SubscriptionKind::Tenant(tenant_id) => Regex::new(&format!(
r"^{}/{tenant_id}/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
self.broker_etcd_prefix
))
.expect("wrong regex for 'tenant' subscription"),
SubscriptionKind::Timeline(ZTenantTimelineId {
tenant_id,
timeline_id,
}) => Regex::new(&format!(
r"^{}/{tenant_id}/{timeline_id}/safekeeper/([[:digit:]])$",
self.broker_etcd_prefix
))
.expect("wrong regex for 'timeline' subscription"),
}
}
/// Etcd key to use for watching a certain timeline updates from safekeepers.
pub fn watch_key(&self) -> String {
match self.kind {
SubscriptionKind::All => self.broker_etcd_prefix.to_string(),
SubscriptionKind::Tenant(tenant_id) => {
format!("{}/{tenant_id}/safekeeper", self.broker_etcd_prefix)
}
SubscriptionKind::Timeline(ZTenantTimelineId {
tenant_id,
timeline_id,
}) => format!(
"{}/{tenant_id}/{timeline_id}/safekeeper",
self.broker_etcd_prefix
),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum SubscriptionKind {
/// Get every timeline update.
All,
/// Get certain tenant timelines' updates.
Tenant(ZTenantId),
/// Get certain timeline updates.
Timeline(ZTenantTimelineId),
#[derive(Debug, thiserror::Error)]
pub enum BrokerError {
#[error("Etcd client error: {0}. Context: {1}")]
EtcdClient(etcd_client::Error, String),
#[error("Error during parsing etcd key: {0}")]
KeyNotParsed(String),
#[error("Internal error: {0}")]
InternalError(String),
}
/// Creates a background task to poll etcd for timeline updates from safekeepers.
/// Stops and returns `Err` on any error during etcd communication.
/// Watches the key changes until either the watcher is cancelled via etcd or the subscription cancellation handle,
/// exiting normally in such cases.
pub async fn subscribe_to_safekeeper_timeline_updates(
/// Etcd values are parsed as json fukes into a type, specified in the generic patameter.
pub async fn subscribe_for_json_values<V>(
client: &mut Client,
subscription: SkTimelineSubscriptionKind,
) -> Result<SkTimelineSubscription, BrokerError> {
info!("Subscribing to timeline updates, subscription kind: {subscription:?}");
key: SubscriptionKey,
) -> Result<BrokerSubscription<V>, BrokerError>
where
V: DeserializeOwned + Send + 'static,
{
subscribe_for_values(client, key, |_, value_str| {
match serde_json::from_str::<V>(value_str) {
Ok(value) => Some(value),
Err(e) => {
error!("Failed to parse value str '{value_str}': {e}");
None
}
}
})
.await
}
/// Same as [`subscribe_for_json_values`], but allows to specify a custom parser of a etcd value string.
pub async fn subscribe_for_values<P, V>(
client: &mut Client,
key: SubscriptionKey,
value_parser: P,
) -> Result<BrokerSubscription<V>, BrokerError>
where
V: Send + 'static,
P: Fn(SubscriptionFullKey, &str) -> Option<V> + Send + 'static,
{
info!("Subscribing to broker value updates, key: {key:?}");
let subscription_key = key.clone();
let (watcher, mut stream) = client
.watch(
subscription.watch_key(),
Some(WatchOptions::new().with_prefix()),
)
.watch(key.watch_key(), Some(WatchOptions::new().with_prefix()))
.await
.map_err(|e| {
BrokerError::EtcdClient(
e,
format!("Failed to init the watch for subscription {subscription:?}"),
format!("Failed to init the watch for subscription {key:?}"),
)
})?;
let (timeline_updates_sender, safekeeper_timeline_updates) = mpsc::unbounded_channel();
let subscription_kind = subscription.kind;
let regex = subscription.watch_regex();
let (value_updates_sender, value_updates_receiver) = mpsc::unbounded_channel();
let watcher_handle = tokio::spawn(async move {
while let Some(resp) = stream.message().await.map_err(|e| BrokerError::InternalError(format!(
"Failed to get messages from the subscription stream, kind: {subscription_kind:?}, error: {e}"
"Failed to get messages from the subscription stream, kind: {:?}, error: {e}", key.kind
)))? {
if resp.canceled() {
info!("Watch for timeline updates subscription was canceled, exiting");
break;
}
let mut timeline_updates: HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>> = HashMap::new();
let mut value_updates: HashMap<ZTenantTimelineId, HashMap<NodeId, V>> = HashMap::new();
// Keep track that the timeline data updates from etcd arrive in the right order.
// https://etcd.io/docs/v3.5/learning/api_guarantees/#isolation-level-and-consistency-of-replicas
// > etcd does not ensure linearizability for watch operations. Users are expected to verify the revision of watch responses to ensure correct ordering.
let mut timeline_etcd_versions: HashMap<ZTenantTimelineId, i64> = HashMap::new();
let mut value_etcd_versions: HashMap<ZTenantTimelineId, i64> = HashMap::new();
let events = resp.events();
@@ -238,113 +139,77 @@ pub async fn subscribe_to_safekeeper_timeline_updates(
if let Some(new_etcd_kv) = event.kv() {
let new_kv_version = new_etcd_kv.version();
match parse_etcd_key_value(subscription_kind, &regex, new_etcd_kv) {
Ok(Some((zttid, timeline))) => {
match timeline_updates
.entry(zttid)
.or_default()
.entry(timeline.safekeeper_id)
{
hash_map::Entry::Occupied(mut o) => {
let old_etcd_kv_version = timeline_etcd_versions.get(&zttid).copied().unwrap_or(i64::MIN);
if old_etcd_kv_version < new_kv_version {
o.insert(timeline.info);
timeline_etcd_versions.insert(zttid,new_kv_version);
match parse_etcd_kv(new_etcd_kv, &value_parser, &key.cluster_prefix) {
Ok(Some((key, value))) => match value_updates
.entry(key.id)
.or_default()
.entry(key.node_id)
{
hash_map::Entry::Occupied(mut o) => {
let old_etcd_kv_version = value_etcd_versions.get(&key.id).copied().unwrap_or(i64::MIN);
if old_etcd_kv_version < new_kv_version {
o.insert(value);
value_etcd_versions.insert(key.id,new_kv_version);
} else {
debug!("Skipping etcd timeline update due to older version compared to one that's already stored");
}
}
}
hash_map::Entry::Vacant(v) => {
v.insert(timeline.info);
timeline_etcd_versions.insert(zttid,new_kv_version);
}
}
}
Ok(None) => {}
Err(e) => error!("Failed to parse timeline update: {e}"),
hash_map::Entry::Vacant(v) => {
v.insert(value);
value_etcd_versions.insert(key.id,new_kv_version);
}
},
Ok(None) => debug!("Ignoring key {key:?} : no value was returned by the parser"),
Err(BrokerError::KeyNotParsed(e)) => debug!("Unexpected key {key:?} for timeline update: {e}"),
Err(e) => error!("Failed to represent etcd KV {new_etcd_kv:?}: {e}"),
};
}
}
}
if let Err(e) = timeline_updates_sender.send(timeline_updates) {
info!("Timeline updates sender got dropped, exiting: {e}");
break;
if !value_updates.is_empty() {
if let Err(e) = value_updates_sender.send(value_updates) {
info!("Broker value updates for key {key:?} sender got dropped, exiting: {e}");
break;
}
}
}
Ok(())
});
}.instrument(info_span!("etcd_broker")));
Ok(SkTimelineSubscription {
kind: subscription,
safekeeper_timeline_updates,
Ok(BrokerSubscription {
key: subscription_key,
value_updates: value_updates_receiver,
watcher_handle,
watcher,
})
}
fn parse_etcd_key_value(
subscription_kind: SubscriptionKind,
regex: &Regex,
fn parse_etcd_kv<P, V>(
kv: &KeyValue,
) -> Result<Option<(ZTenantTimelineId, SafekeeperTimeline)>, BrokerError> {
let caps = if let Some(caps) = regex.captures(kv.key_str().map_err(|e| {
BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as key str"))
})?) {
caps
} else {
return Ok(None);
};
let (zttid, safekeeper_id) = match subscription_kind {
SubscriptionKind::All => (
ZTenantTimelineId::new(
parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?,
),
NodeId(parse_capture(&caps, 3).map_err(BrokerError::ParsingError)?),
),
SubscriptionKind::Tenant(tenant_id) => (
ZTenantTimelineId::new(
tenant_id,
parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
),
NodeId(parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?),
),
SubscriptionKind::Timeline(zttid) => (
zttid,
NodeId(parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?),
),
};
let info_str = kv.value_str().map_err(|e| {
BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as value str"))
})?;
Ok(Some((
zttid,
SafekeeperTimeline {
safekeeper_id,
info: serde_json::from_str(info_str).map_err(|e| {
BrokerError::ParsingError(format!(
"Failed to parse '{info_str}' as safekeeper timeline info: {e}"
))
})?,
},
)))
}
fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
value_parser: &P,
cluster_prefix: &str,
) -> Result<Option<(SubscriptionFullKey, V)>, BrokerError>
where
T: FromStr,
<T as FromStr>::Err: Display,
P: Fn(SubscriptionFullKey, &str) -> Option<V>,
{
let capture_match = caps
.get(index)
.ok_or_else(|| format!("Failed to get capture match at index {index}"))?
.as_str();
capture_match.parse().map_err(|e| {
format!(
"Failed to parse {} from {capture_match}: {e}",
std::any::type_name::<T>()
)
})
let key_str = kv.key_str().map_err(|e| {
BrokerError::EtcdClient(e, "Failed to extract key str out of etcd KV".to_string())
})?;
let value_str = kv.value_str().map_err(|e| {
BrokerError::EtcdClient(e, "Failed to extract value str out of etcd KV".to_string())
})?;
if !key_str.starts_with(cluster_prefix) {
return Err(BrokerError::KeyNotParsed(format!(
"KV has unexpected key '{key_str}' that does not start with cluster prefix {cluster_prefix}"
)));
}
let key = SubscriptionFullKey::from_str(&key_str[cluster_prefix.len()..]).map_err(|e| {
BrokerError::KeyNotParsed(format!("Failed to parse KV key '{key_str}': {e}"))
})?;
Ok(value_parser(key, value_str).map(|value| (key, value)))
}

View File

@@ -0,0 +1,310 @@
//! Etcd broker keys, used in the project and shared between instances.
//! The keys are split into two categories:
//!
//! * [`SubscriptionFullKey`] full key format: `<cluster_prefix>/<tenant>/<timeline>/<node_kind>/<operation>/<node_id>`
//! Always returned from etcd in this form, always start with the user key provided.
//!
//! * [`SubscriptionKey`] user input key format: always partial, since it's unknown which `node_id`'s are available.
//! Full key always starts with the user input one, due to etcd subscription properties.
use std::{fmt::Display, str::FromStr};
use once_cell::sync::Lazy;
use regex::{Captures, Regex};
use utils::zid::{NodeId, ZTenantId, ZTenantTimelineId};
/// The subscription kind to the timeline updates from safekeeper.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct SubscriptionKey {
/// Generic cluster prefix, allowing to use the same etcd instance by multiple logic groups.
pub cluster_prefix: String,
/// The subscription kind.
pub kind: SubscriptionKind,
}
/// All currently possible key kinds of a etcd broker subscription.
/// Etcd works so, that every key that starts with the subbscription key given is considered matching and
/// returned as part of the subscrption.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SubscriptionKind {
/// Get every update in etcd.
All,
/// Get etcd updates for any timeiline of a certain tenant, affected by any operation from any node kind.
TenantTimelines(ZTenantId),
/// Get etcd updates for a certain timeline of a tenant, affected by any operation from any node kind.
Timeline(ZTenantTimelineId),
/// Get etcd timeline updates, specific to a certain node kind.
Node(ZTenantTimelineId, NodeKind),
/// Get etcd timeline updates for a certain operation on specific nodes.
Operation(ZTenantTimelineId, NodeKind, OperationKind),
}
/// All kinds of nodes, able to write into etcd.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum NodeKind {
Safekeeper,
Pageserver,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum OperationKind {
Safekeeper(SkOperationKind),
}
/// Current operations, running inside the safekeeper node.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SkOperationKind {
TimelineInfo,
WalBackup,
}
static SUBSCRIPTION_FULL_KEY_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new("/([[:xdigit:]]+)/([[:xdigit:]]+)/([^/]+)/([^/]+)/([[:digit:]]+)$")
.expect("wrong subscription full etcd key regex")
});
/// Full key, received from etcd during any of the component's work.
/// No other etcd keys are considered during system's work.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct SubscriptionFullKey {
pub id: ZTenantTimelineId,
pub node_kind: NodeKind,
pub operation: OperationKind,
pub node_id: NodeId,
}
impl SubscriptionKey {
/// Subscribes for all etcd updates.
pub fn all(cluster_prefix: String) -> Self {
SubscriptionKey {
cluster_prefix,
kind: SubscriptionKind::All,
}
}
/// Subscribes to a given timeline info updates from safekeepers.
pub fn sk_timeline_info(cluster_prefix: String, timeline: ZTenantTimelineId) -> Self {
Self {
cluster_prefix,
kind: SubscriptionKind::Operation(
timeline,
NodeKind::Safekeeper,
OperationKind::Safekeeper(SkOperationKind::TimelineInfo),
),
}
}
/// Subscribes to all timeine updates during specific operations, running on the corresponding nodes.
pub fn operation(
cluster_prefix: String,
timeline: ZTenantTimelineId,
node_kind: NodeKind,
operation: OperationKind,
) -> Self {
Self {
cluster_prefix,
kind: SubscriptionKind::Operation(timeline, node_kind, operation),
}
}
/// Etcd key to use for watching a certain timeline updates from safekeepers.
pub fn watch_key(&self) -> String {
let cluster_prefix = &self.cluster_prefix;
match self.kind {
SubscriptionKind::All => cluster_prefix.to_string(),
SubscriptionKind::TenantTimelines(tenant_id) => {
format!("{cluster_prefix}/{tenant_id}")
}
SubscriptionKind::Timeline(id) => {
format!("{cluster_prefix}/{id}")
}
SubscriptionKind::Node(id, node_kind) => {
format!("{cluster_prefix}/{id}/{node_kind}")
}
SubscriptionKind::Operation(id, node_kind, operation_kind) => {
format!("{cluster_prefix}/{id}/{node_kind}/{operation_kind}")
}
}
}
}
impl Display for OperationKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
OperationKind::Safekeeper(o) => o.fmt(f),
}
}
}
impl FromStr for OperationKind {
type Err = String;
fn from_str(operation_kind_str: &str) -> Result<Self, Self::Err> {
match operation_kind_str {
"timeline_info" => Ok(OperationKind::Safekeeper(SkOperationKind::TimelineInfo)),
"wal_backup" => Ok(OperationKind::Safekeeper(SkOperationKind::WalBackup)),
_ => Err(format!("Unknown operation kind: {operation_kind_str}")),
}
}
}
impl Display for SubscriptionFullKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self {
id,
node_kind,
operation,
node_id,
} = self;
write!(f, "{id}/{node_kind}/{operation}/{node_id}")
}
}
impl FromStr for SubscriptionFullKey {
type Err = String;
fn from_str(subscription_kind_str: &str) -> Result<Self, Self::Err> {
let key_captures = match SUBSCRIPTION_FULL_KEY_REGEX.captures(subscription_kind_str) {
Some(captures) => captures,
None => {
return Err(format!(
"Subscription kind str does not match a subscription full key regex {}",
SUBSCRIPTION_FULL_KEY_REGEX.as_str()
));
}
};
Ok(Self {
id: ZTenantTimelineId::new(
parse_capture(&key_captures, 1)?,
parse_capture(&key_captures, 2)?,
),
node_kind: parse_capture(&key_captures, 3)?,
operation: parse_capture(&key_captures, 4)?,
node_id: NodeId(parse_capture(&key_captures, 5)?),
})
}
}
fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
where
T: FromStr,
<T as FromStr>::Err: Display,
{
let capture_match = caps
.get(index)
.ok_or_else(|| format!("Failed to get capture match at index {index}"))?
.as_str();
capture_match.parse().map_err(|e| {
format!(
"Failed to parse {} from {capture_match}: {e}",
std::any::type_name::<T>()
)
})
}
impl Display for NodeKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Safekeeper => write!(f, "safekeeper"),
Self::Pageserver => write!(f, "pageserver"),
}
}
}
impl FromStr for NodeKind {
type Err = String;
fn from_str(node_kind_str: &str) -> Result<Self, Self::Err> {
match node_kind_str {
"safekeeper" => Ok(Self::Safekeeper),
"pageserver" => Ok(Self::Pageserver),
_ => Err(format!("Invalid node kind: {node_kind_str}")),
}
}
}
impl Display for SkOperationKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::TimelineInfo => write!(f, "timeline_info"),
Self::WalBackup => write!(f, "wal_backup"),
}
}
}
impl FromStr for SkOperationKind {
type Err = String;
fn from_str(operation_str: &str) -> Result<Self, Self::Err> {
match operation_str {
"timeline_info" => Ok(Self::TimelineInfo),
"wal_backup" => Ok(Self::WalBackup),
_ => Err(format!("Invalid operation: {operation_str}")),
}
}
}
#[cfg(test)]
mod tests {
use utils::zid::ZTimelineId;
use super::*;
#[test]
fn full_cluster_key_parsing() {
let prefix = "neon";
let node_kind = NodeKind::Safekeeper;
let operation_kind = OperationKind::Safekeeper(SkOperationKind::WalBackup);
let tenant_id = ZTenantId::generate();
let timeline_id = ZTimelineId::generate();
let id = ZTenantTimelineId::new(tenant_id, timeline_id);
let node_id = NodeId(1);
let timeline_subscription_keys = [
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::All,
},
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::TenantTimelines(tenant_id),
},
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::Timeline(id),
},
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::Node(id, node_kind),
},
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::Operation(id, node_kind, operation_kind),
},
];
let full_key_string = format!(
"{}/{node_id}",
timeline_subscription_keys.last().unwrap().watch_key()
);
for key in timeline_subscription_keys {
assert!(full_key_string.starts_with(&key.watch_key()), "Full key '{full_key_string}' should start with any of the keys, keys, but {key:?} did not match");
}
let full_key = SubscriptionFullKey::from_str(&full_key_string).unwrap_or_else(|e| {
panic!("Failed to parse {full_key_string} as a subscription full key: {e}")
});
assert_eq!(
full_key,
SubscriptionFullKey {
id,
node_kind,
operation: operation_kind,
node_id
}
)
}
}

View File

@@ -0,0 +1,35 @@
//! Module for the values to put into etcd.
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use utils::lsn::Lsn;
/// Data about safekeeper's timeline. Fields made optional for easy migrations.
#[serde_as]
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct SkTimelineInfo {
/// Term of the last entry.
pub last_log_term: Option<u64>,
/// LSN of the last record.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub flush_lsn: Option<Lsn>,
/// Up to which LSN safekeeper regards its WAL as committed.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub commit_lsn: Option<Lsn>,
/// LSN up to which safekeeper has backed WAL.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub backup_lsn: Option<Lsn>,
/// LSN of last checkpoint uploaded by pageserver.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub remote_consistent_lsn: Option<Lsn>,
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub peer_horizon_lsn: Option<Lsn>,
/// A connection string to use for WAL receiving.
#[serde(default)]
pub safekeeper_connstr: Option<String>,
}

View File

@@ -4,6 +4,7 @@ use log::*;
use postgres::types::PgLsn;
use postgres::Client;
use std::cmp::Ordering;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::time::Instant;
@@ -69,6 +70,12 @@ impl Conf {
pub fn start_server(&self) -> Result<PostgresServer> {
info!("Starting Postgres server in {:?}", self.datadir);
let log_file = fs::File::create(self.datadir.join("pg.log")).with_context(|| {
format!(
"Failed to create pg.log file in directory {}",
self.datadir.display()
)
})?;
let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols)
let unix_socket_dir_path = unix_socket_dir.path().to_owned();
let server_process = self
@@ -84,7 +91,7 @@ impl Conf {
// Disable background processes as much as possible
.args(&["-c", "wal_writer_delay=10s"])
.args(&["-c", "autovacuum=off"])
.stderr(Stdio::null())
.stderr(Stdio::from(log_file))
.spawn()?;
let server = PostgresServer {
process: server_process,

View File

@@ -13,13 +13,10 @@ use std::fmt;
use std::io::{self, Write};
use std::net::{Shutdown, SocketAddr, TcpStream};
use std::str::FromStr;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use tracing::*;
static PGBACKEND_SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
pub trait Handler {
/// Handle single query.
/// postgres_backend will issue ReadyForQuery after calling this (this
@@ -45,6 +42,10 @@ pub trait Handler {
fn check_auth_jwt(&mut self, _pgb: &mut PostgresBackend, _jwt_response: &[u8]) -> Result<()> {
bail!("JWT auth failed")
}
fn is_shutdown_requested(&self) -> bool {
false
}
}
/// PostgresBackend protocol state.
@@ -274,7 +275,7 @@ impl PostgresBackend {
let mut unnamed_query_string = Bytes::new();
while !PGBACKEND_SHUTDOWN_REQUESTED.load(Ordering::Relaxed) {
while !handler.is_shutdown_requested() {
match self.read_message() {
Ok(message) => {
if let Some(msg) = message {
@@ -493,8 +494,3 @@ impl PostgresBackend {
Ok(ProcessMsgResult::Continue)
}
}
// Set the flag to inform connections to cancel
pub fn set_pgbackend_shutdown_requested() {
PGBACKEND_SHUTDOWN_REQUESTED.swap(true, Ordering::Relaxed);
}

View File

@@ -269,15 +269,18 @@ impl FeStartupPacket {
.next()
.context("expected even number of params in StartupMessage")?;
if name == "options" {
//parsing options arguments "..&options=<var>:<val>,.."
//extended example and set of options:
//https://github.com/neondatabase/neon/blob/main/docs/rfcs/016-connection-routing.md#connection-url
for cmdopt in value.split(',') {
let nameval: Vec<&str> = cmdopt.split(':').collect();
// parsing options arguments "...&options=<var0>%3D<val0>+<var1>=<var1>..."
// '%3D' is '=' and '+' is ' '
// Note: we allow users that don't have SNI capabilities,
// to pass a special keyword argument 'project'
// to be used to determine the cluster name by the proxy.
//TODO: write unit test for this and refactor in its own function.
for cmdopt in value.split(' ') {
let nameval: Vec<&str> = cmdopt.split('=').collect();
if nameval.len() == 2 {
params.insert(nameval[0].to_string(), nameval[1].to_string());
} else {
//todo: inform user / throw error message if options format is wrong.
}
}
} else {
@@ -923,10 +926,10 @@ impl<'a> BeMessage<'a> {
}
}
// Zenith extension of postgres replication protocol
// See ZENITH_STATUS_UPDATE_TAG_BYTE
// Neon extension of postgres replication protocol
// See NEON_STATUS_UPDATE_TAG_BYTE
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub struct ZenithFeedback {
pub struct ReplicationFeedback {
// Last known size of the timeline. Used to enforce timeline size limit.
pub current_timeline_size: u64,
// Parts of StandbyStatusUpdate we resend to compute via safekeeper
@@ -936,13 +939,13 @@ pub struct ZenithFeedback {
pub ps_replytime: SystemTime,
}
// NOTE: Do not forget to increment this number when adding new fields to ZenithFeedback.
// NOTE: Do not forget to increment this number when adding new fields to ReplicationFeedback.
// Do not remove previously available fields because this might be backwards incompatible.
pub const ZENITH_FEEDBACK_FIELDS_NUMBER: u8 = 5;
pub const REPLICATION_FEEDBACK_FIELDS_NUMBER: u8 = 5;
impl ZenithFeedback {
pub fn empty() -> ZenithFeedback {
ZenithFeedback {
impl ReplicationFeedback {
pub fn empty() -> ReplicationFeedback {
ReplicationFeedback {
current_timeline_size: 0,
ps_writelsn: 0,
ps_applylsn: 0,
@@ -951,7 +954,7 @@ impl ZenithFeedback {
}
}
// Serialize ZenithFeedback using custom format
// Serialize ReplicationFeedback using custom format
// to support protocol extensibility.
//
// Following layout is used:
@@ -962,7 +965,7 @@ impl ZenithFeedback {
// uint32 - value length in bytes
// value itself
pub fn serialize(&self, buf: &mut BytesMut) -> Result<()> {
buf.put_u8(ZENITH_FEEDBACK_FIELDS_NUMBER); // # of keys
buf.put_u8(REPLICATION_FEEDBACK_FIELDS_NUMBER); // # of keys
write_cstr(&Bytes::from("current_timeline_size"), buf)?;
buf.put_i32(8);
buf.put_u64(self.current_timeline_size);
@@ -989,9 +992,9 @@ impl ZenithFeedback {
Ok(())
}
// Deserialize ZenithFeedback message
pub fn parse(mut buf: Bytes) -> ZenithFeedback {
let mut zf = ZenithFeedback::empty();
// Deserialize ReplicationFeedback message
pub fn parse(mut buf: Bytes) -> ReplicationFeedback {
let mut zf = ReplicationFeedback::empty();
let nfields = buf.get_u8();
let mut i = 0;
while i < nfields {
@@ -1032,14 +1035,14 @@ impl ZenithFeedback {
_ => {
let len = buf.get_i32();
warn!(
"ZenithFeedback parse. unknown key {} of len {}. Skip it.",
"ReplicationFeedback parse. unknown key {} of len {}. Skip it.",
key, len
);
buf.advance(len as usize);
}
}
}
trace!("ZenithFeedback parsed is {:?}", zf);
trace!("ReplicationFeedback parsed is {:?}", zf);
zf
}
}
@@ -1049,8 +1052,8 @@ mod tests {
use super::*;
#[test]
fn test_zenithfeedback_serialization() {
let mut zf = ZenithFeedback::empty();
fn test_replication_feedback_serialization() {
let mut zf = ReplicationFeedback::empty();
// Fill zf with some values
zf.current_timeline_size = 12345678;
// Set rounded time to be able to compare it with deserialized value,
@@ -1059,13 +1062,13 @@ mod tests {
let mut data = BytesMut::new();
zf.serialize(&mut data).unwrap();
let zf_parsed = ZenithFeedback::parse(data.freeze());
let zf_parsed = ReplicationFeedback::parse(data.freeze());
assert_eq!(zf, zf_parsed);
}
#[test]
fn test_zenithfeedback_unknown_key() {
let mut zf = ZenithFeedback::empty();
fn test_replication_feedback_unknown_key() {
let mut zf = ReplicationFeedback::empty();
// Fill zf with some values
zf.current_timeline_size = 12345678;
// Set rounded time to be able to compare it with deserialized value,
@@ -1076,7 +1079,7 @@ mod tests {
// Add an extra field to the buffer and adjust number of keys
if let Some(first) = data.first_mut() {
*first = ZENITH_FEEDBACK_FIELDS_NUMBER + 1;
*first = REPLICATION_FEEDBACK_FIELDS_NUMBER + 1;
}
write_cstr(&Bytes::from("new_field_one"), &mut data).unwrap();
@@ -1084,7 +1087,7 @@ mod tests {
data.put_u64(42);
// Parse serialized data and check that new field is not parsed
let zf_parsed = ZenithFeedback::parse(data.freeze());
let zf_parsed = ReplicationFeedback::parse(data.freeze());
assert_eq!(zf, zf_parsed);
}

View File

@@ -69,7 +69,7 @@ Repository
The repository stores all the page versions, or WAL records needed to
reconstruct them. Each tenant has a separate Repository, which is
stored in the .zenith/tenants/<tenantid> directory.
stored in the .neon/tenants/<tenantid> directory.
Repository is an abstract trait, defined in `repository.rs`. It is
implemented by the LayeredRepository object in
@@ -92,7 +92,7 @@ Each repository also has a WAL redo manager associated with it, see
records, whenever we need to reconstruct a page version from WAL to
satisfy a GetPage@LSN request, or to avoid accumulating too much WAL
for a page. The WAL redo manager uses a Postgres process running in
special zenith wal-redo mode to do the actual WAL redo, and
special Neon wal-redo mode to do the actual WAL redo, and
communicates with the process using a pipe.

View File

@@ -104,7 +104,7 @@ fn main() -> anyhow::Result<()> {
return Ok(());
}
let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".zenith"));
let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".neon"));
let workdir = workdir
.canonicalize()
.with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;

View File

@@ -4,7 +4,7 @@
//! The functions here are responsible for locating the correct layer for the
//! get/put call, tracing timeline branching history as needed.
//!
//! The files are stored in the .zenith/tenants/<tenantid>/timelines/<timelineid>
//! The files are stored in the .neon/tenants/<tenantid>/timelines/<timelineid>
//! directory. See layered_repository/README for how the files are managed.
//! In addition to the layer files, there is a metadata file in the same
//! directory that contains information about the timeline, in particular its
@@ -148,7 +148,7 @@ lazy_static! {
.expect("failed to define a metric");
}
/// Parts of the `.zenith/tenants/<tenantid>/timelines/<timelineid>` directory prefix.
/// Parts of the `.neon/tenants/<tenantid>/timelines/<timelineid>` directory prefix.
pub const TIMELINES_SEGMENT_NAME: &str = "timelines";
///
@@ -1727,9 +1727,7 @@ impl LayeredTimeline {
new_delta_path.clone(),
self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
])?;
fail_point!("checkpoint-before-sync");
fail_point!("flush-frozen");
fail_point!("flush-frozen-before-sync");
// Finally, replace the frozen in-memory layer with the new on-disk layer
{

View File

@@ -123,7 +123,7 @@ The files are called "layer files". Each layer file covers a range of keys, and
a range of LSNs (or a single LSN, in case of image layers). You can think of it
as a rectangle in the two-dimensional key-LSN space. The layer files for each
timeline are stored in the timeline's subdirectory under
`.zenith/tenants/<tenantid>/timelines`.
`.neon/tenants/<tenantid>/timelines`.
There are two kind of layer files: images, and delta layers. An image file
contains a snapshot of all keys at a particular LSN, whereas a delta file
@@ -178,7 +178,7 @@ version, and how branching and GC works is still valid.
The full path of a delta file looks like this:
```
.zenith/tenants/941ddc8604413b88b3d208bddf90396c/timelines/4af489b06af8eed9e27a841775616962/rel_1663_13990_2609_0_10_000000000169C348_0000000001702000
.neon/tenants/941ddc8604413b88b3d208bddf90396c/timelines/4af489b06af8eed9e27a841775616962/rel_1663_13990_2609_0_10_000000000169C348_0000000001702000
```
For simplicity, the examples below use a simplified notation for the
@@ -409,7 +409,7 @@ removed because there is no newer layer file for the table.
Things get slightly more complicated with multiple branches. All of
the above still holds, but in addition to recent files we must also
retain older shapshot files that are still needed by child branches.
retain older snapshot files that are still needed by child branches.
For example, if child branch is created at LSN 150, and the 'customers'
table is updated on the branch, you would have these files:

View File

@@ -24,7 +24,6 @@ pub mod walredo;
use lazy_static::lazy_static;
use tracing::info;
use utils::postgres_backend;
use crate::thread_mgr::ThreadKind;
use metrics::{register_int_gauge_vec, IntGaugeVec};
@@ -73,7 +72,6 @@ pub fn shutdown_pageserver(exit_code: i32) {
thread_mgr::shutdown_threads(Some(ThreadKind::LibpqEndpointListener), None, None);
// Shut down any page service threads.
postgres_backend::set_pgbackend_shutdown_requested();
thread_mgr::shutdown_threads(Some(ThreadKind::PageRequestHandler), None, None);
// Shut down all the tenants. This flushes everything to disk and kills

View File

@@ -20,7 +20,7 @@
//! assign a buffer for a page, you must hold the mapping lock and the lock on
//! the slot at the same time.
//!
//! Whenever you need to hold both locks simultenously, the slot lock must be
//! Whenever you need to hold both locks simultaneously, the slot lock must be
//! acquired first. This consistent ordering avoids deadlocks. To look up a page
//! in the cache, you would first look up the mapping, while holding the mapping
//! lock, and then lock the slot. You must release the mapping lock in between,

View File

@@ -370,6 +370,10 @@ impl PageServerHandler {
) -> anyhow::Result<()> {
let _enter = info_span!("pagestream", timeline = %timelineid, tenant = %tenantid).entered();
// NOTE: pagerequests handler exits when connection is closed,
// so there is no need to reset the association
thread_mgr::associate_with(Some(tenantid), Some(timelineid));
// Check that the timeline exists
let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
.context("Cannot load local timeline")?;
@@ -672,6 +676,10 @@ impl postgres_backend::Handler for PageServerHandler {
Ok(())
}
fn is_shutdown_requested(&self) -> bool {
thread_mgr::is_shutdown_requested()
}
fn process_query(
&mut self,
pgb: &mut PostgresBackend,
@@ -802,7 +810,6 @@ impl postgres_backend::Handler for PageServerHandler {
.map(|h| h.as_str().parse())
.unwrap_or_else(|| Ok(repo.get_gc_horizon()))?;
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
// Use tenant's pitr setting
let pitr = repo.get_pitr_interval();
let result = repo.gc_iteration(Some(timelineid), gc_horizon, pitr, true)?;

View File

@@ -1,223 +0,0 @@
//! Timeline synchronization logic to delete a bulk of timeline's remote files from the remote storage.
use anyhow::Context;
use futures::stream::{FuturesUnordered, StreamExt};
use tracing::{debug, error, info};
use utils::zid::ZTenantTimelineId;
use crate::remote_storage::{
storage_sync::{SyncQueue, SyncTask},
RemoteStorage,
};
use super::{LayersDeletion, SyncData};
/// Attempts to remove the timleline layers from the remote storage.
/// If the task had not adjusted the metadata before, the deletion will fail.
pub(super) async fn delete_timeline_layers<'a, P, S>(
storage: &'a S,
sync_queue: &SyncQueue,
sync_id: ZTenantTimelineId,
mut delete_data: SyncData<LayersDeletion>,
) -> bool
where
P: std::fmt::Debug + Send + Sync + 'static,
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
{
if !delete_data.data.deletion_registered {
error!("Cannot delete timeline layers before the deletion metadata is not registered, reenqueueing");
delete_data.retries += 1;
sync_queue.push(sync_id, SyncTask::Delete(delete_data));
return false;
}
if delete_data.data.layers_to_delete.is_empty() {
info!("No layers to delete, skipping");
return true;
}
let layers_to_delete = delete_data
.data
.layers_to_delete
.drain()
.collect::<Vec<_>>();
debug!("Layers to delete: {layers_to_delete:?}");
info!("Deleting {} timeline layers", layers_to_delete.len());
let mut delete_tasks = layers_to_delete
.into_iter()
.map(|local_layer_path| async {
let storage_path = match storage.storage_path(&local_layer_path).with_context(|| {
format!(
"Failed to get the layer storage path for local path '{}'",
local_layer_path.display()
)
}) {
Ok(path) => path,
Err(e) => return Err((e, local_layer_path)),
};
match storage.delete(&storage_path).await.with_context(|| {
format!(
"Failed to delete remote layer from storage at '{:?}'",
storage_path
)
}) {
Ok(()) => Ok(local_layer_path),
Err(e) => Err((e, local_layer_path)),
}
})
.collect::<FuturesUnordered<_>>();
let mut errored = false;
while let Some(deletion_result) = delete_tasks.next().await {
match deletion_result {
Ok(local_layer_path) => {
debug!(
"Successfully deleted layer {} for timeline {sync_id}",
local_layer_path.display()
);
delete_data.data.deleted_layers.insert(local_layer_path);
}
Err((e, local_layer_path)) => {
errored = true;
error!(
"Failed to delete layer {} for timeline {sync_id}: {e:?}",
local_layer_path.display()
);
delete_data.data.layers_to_delete.insert(local_layer_path);
}
}
}
if errored {
debug!("Reenqueuing failed delete task for timeline {sync_id}");
delete_data.retries += 1;
sync_queue.push(sync_id, SyncTask::Delete(delete_data));
}
errored
}
#[cfg(test)]
mod tests {
use std::{collections::HashSet, num::NonZeroUsize};
use itertools::Itertools;
use tempfile::tempdir;
use tokio::fs;
use utils::lsn::Lsn;
use crate::{
remote_storage::{
storage_sync::test_utils::{create_local_timeline, dummy_metadata},
LocalFs,
},
repository::repo_harness::{RepoHarness, TIMELINE_ID},
};
use super::*;
#[tokio::test]
async fn delete_timeline_negative() -> anyhow::Result<()> {
let harness = RepoHarness::create("delete_timeline_negative")?;
let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
let storage = LocalFs::new(tempdir()?.path().to_path_buf(), &harness.conf.workdir)?;
let deleted = delete_timeline_layers(
&storage,
&sync_queue,
sync_id,
SyncData {
retries: 1,
data: LayersDeletion {
deleted_layers: HashSet::new(),
layers_to_delete: HashSet::new(),
deletion_registered: false,
},
},
)
.await;
assert!(
!deleted,
"Should not start the deletion for task with delete metadata unregistered"
);
Ok(())
}
#[tokio::test]
async fn delete_timeline() -> anyhow::Result<()> {
let harness = RepoHarness::create("delete_timeline")?;
let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
let layer_files = ["a", "b", "c", "d"];
let storage = LocalFs::new(tempdir()?.path().to_path_buf(), &harness.conf.workdir)?;
let current_retries = 3;
let metadata = dummy_metadata(Lsn(0x30));
let local_timeline_path = harness.timeline_path(&TIMELINE_ID);
let timeline_upload =
create_local_timeline(&harness, TIMELINE_ID, &layer_files, metadata.clone()).await?;
for local_path in timeline_upload.layers_to_upload {
let remote_path = storage.storage_path(&local_path)?;
let remote_parent_dir = remote_path.parent().unwrap();
if !remote_parent_dir.exists() {
fs::create_dir_all(&remote_parent_dir).await?;
}
fs::copy(&local_path, &remote_path).await?;
}
assert_eq!(
storage
.list()
.await?
.into_iter()
.map(|remote_path| storage.local_path(&remote_path).unwrap())
.filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) })
.sorted()
.collect::<Vec<_>>(),
layer_files
.iter()
.map(|layer_str| layer_str.to_string())
.sorted()
.collect::<Vec<_>>(),
"Expect to have all layer files remotely before deletion"
);
let deleted = delete_timeline_layers(
&storage,
&sync_queue,
sync_id,
SyncData {
retries: current_retries,
data: LayersDeletion {
deleted_layers: HashSet::new(),
layers_to_delete: HashSet::from([
local_timeline_path.join("a"),
local_timeline_path.join("c"),
local_timeline_path.join("something_different"),
]),
deletion_registered: true,
},
},
)
.await;
assert!(deleted, "Should be able to delete timeline files");
assert_eq!(
storage
.list()
.await?
.into_iter()
.map(|remote_path| storage.local_path(&remote_path).unwrap())
.filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) })
.sorted()
.collect::<Vec<_>>(),
vec!["b".to_string(), "d".to_string()],
"Expect to have only non-deleted files remotely"
);
Ok(())
}
}

View File

@@ -195,8 +195,9 @@ impl Display for TimelineSyncStatusUpdate {
f.write_str(s)
}
}
///
/// A repository corresponds to one .zenith directory. One repository holds multiple
/// A repository corresponds to one .neon directory. One repository holds multiple
/// timelines, forked off from the same initial call to 'initdb'.
pub trait Repository: Send + Sync {
type Timeline: Timeline;
@@ -242,7 +243,7 @@ pub trait Repository: Send + Sync {
///
/// 'timelineid' specifies the timeline to GC, or None for all.
/// `horizon` specifies delta from last lsn to preserve all object versions (pitr interval).
/// `checkpoint_before_gc` parameter is used to force compaction of storage before CG
/// `checkpoint_before_gc` parameter is used to force compaction of storage before GC
/// to make tests more deterministic.
/// TODO Do we still need it or we can call checkpoint explicitly in tests where needed?
fn gc_iteration(

View File

@@ -186,8 +186,8 @@ use crate::{
};
use metrics::{
register_histogram_vec, register_int_counter, register_int_gauge, HistogramVec, IntCounter,
IntGauge,
register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge,
HistogramVec, IntCounter, IntCounterVec, IntGauge,
};
use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};
@@ -208,14 +208,17 @@ lazy_static! {
static ref IMAGE_SYNC_TIME: HistogramVec = register_histogram_vec!(
"pageserver_remote_storage_image_sync_seconds",
"Time took to synchronize (download or upload) a whole pageserver image. \
Grouped by `operation_kind` (upload|download) and `status` (success|failure)",
&["operation_kind", "status"],
vec![
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0, 6.0, 7.0,
8.0, 9.0, 10.0, 12.5, 15.0, 17.5, 20.0
]
Grouped by tenant and timeline ids, `operation_kind` (upload|download) and `status` (success|failure)",
&["tenant_id", "timeline_id", "operation_kind", "status"],
vec![0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 3.0, 10.0, 20.0]
)
.expect("failed to register pageserver image sync time histogram vec");
static ref REMOTE_INDEX_UPLOAD: IntCounterVec = register_int_counter_vec!(
"pageserver_remote_storage_remote_index_uploads_total",
"Number of remote index uploads",
&["tenant_id", "timeline_id"],
)
.expect("failed to register pageserver remote index upload vec");
}
static SYNC_QUEUE: OnceCell<SyncQueue> = OnceCell::new();
@@ -892,7 +895,7 @@ fn storage_sync_loop<P, S>(
REMAINING_SYNC_ITEMS.set(remaining_queue_length as i64);
if remaining_queue_length > 0 || !batched_tasks.is_empty() {
info!("Processing tasks for {} timelines in batch, more tasks left to process: {remaining_queue_length}", batched_tasks.len());
debug!("Processing tasks for {} timelines in batch, more tasks left to process: {remaining_queue_length}", batched_tasks.len());
} else {
debug!("No tasks to process");
continue;
@@ -1146,19 +1149,19 @@ where
.await
{
DownloadedTimeline::Abort => {
register_sync_status(sync_start, task_name, None);
register_sync_status(sync_id, sync_start, task_name, None);
if let Err(e) = index.write().await.set_awaits_download(&sync_id, false) {
error!("Timeline {sync_id} was expected to be in the remote index after a download attempt, but it's absent: {e:?}");
}
}
DownloadedTimeline::FailedAndRescheduled => {
register_sync_status(sync_start, task_name, Some(false));
register_sync_status(sync_id, sync_start, task_name, Some(false));
}
DownloadedTimeline::Successful(mut download_data) => {
match update_local_metadata(conf, sync_id, current_remote_timeline).await {
Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
Ok(()) => {
register_sync_status(sync_start, task_name, Some(true));
register_sync_status(sync_id, sync_start, task_name, Some(true));
return Some(TimelineSyncStatusUpdate::Downloaded);
}
Err(e) => {
@@ -1169,7 +1172,7 @@ where
error!("Failed to update local timeline metadata: {e:?}");
download_data.retries += 1;
sync_queue.push(sync_id, SyncTask::Download(download_data));
register_sync_status(sync_start, task_name, Some(false));
register_sync_status(sync_id, sync_start, task_name, Some(false));
}
}
}
@@ -1186,7 +1189,7 @@ async fn update_local_metadata(
let remote_metadata = match remote_timeline {
Some(timeline) => &timeline.metadata,
None => {
info!("No remote timeline to update local metadata from, skipping the update");
debug!("No remote timeline to update local metadata from, skipping the update");
return Ok(());
}
};
@@ -1265,14 +1268,14 @@ async fn delete_timeline_data<P, S>(
error!("Failed to update remote timeline {sync_id}: {e:?}");
new_delete_data.retries += 1;
sync_queue.push(sync_id, SyncTask::Delete(new_delete_data));
register_sync_status(sync_start, task_name, Some(false));
register_sync_status(sync_id, sync_start, task_name, Some(false));
return;
}
}
timeline_delete.deletion_registered = true;
let sync_status = delete_timeline_layers(storage, sync_queue, sync_id, new_delete_data).await;
register_sync_status(sync_start, task_name, Some(sync_status));
register_sync_status(sync_id, sync_start, task_name, Some(sync_status));
}
async fn read_metadata_file(metadata_path: &Path) -> anyhow::Result<TimelineMetadata> {
@@ -1306,7 +1309,7 @@ async fn upload_timeline_data<P, S>(
.await
{
UploadedTimeline::FailedAndRescheduled => {
register_sync_status(sync_start, task_name, Some(false));
register_sync_status(sync_id, sync_start, task_name, Some(false));
return;
}
UploadedTimeline::Successful(upload_data) => upload_data,
@@ -1325,13 +1328,13 @@ async fn upload_timeline_data<P, S>(
.await
{
Ok(()) => {
register_sync_status(sync_start, task_name, Some(true));
register_sync_status(sync_id, sync_start, task_name, Some(true));
}
Err(e) => {
error!("Failed to update remote timeline {sync_id}: {e:?}");
uploaded_data.retries += 1;
sync_queue.push(sync_id, SyncTask::Upload(uploaded_data));
register_sync_status(sync_start, task_name, Some(false));
register_sync_status(sync_id, sync_start, task_name, Some(false));
}
}
}
@@ -1421,7 +1424,14 @@ where
IndexPart::from_remote_timeline(&timeline_path, updated_remote_timeline)
.context("Failed to create an index part from the updated remote timeline")?;
info!("Uploading remote index for the timeline");
debug!("Uploading remote index for the timeline");
REMOTE_INDEX_UPLOAD
.with_label_values(&[
&sync_id.tenant_id.to_string(),
&sync_id.timeline_id.to_string(),
])
.inc();
upload_index_part(conf, storage, sync_id, new_index_part)
.await
.context("Failed to upload new index part")
@@ -1590,12 +1600,24 @@ fn compare_local_and_remote_timeline(
(initial_timeline_status, awaits_download)
}
fn register_sync_status(sync_start: Instant, sync_name: &str, sync_status: Option<bool>) {
fn register_sync_status(
sync_id: ZTenantTimelineId,
sync_start: Instant,
sync_name: &str,
sync_status: Option<bool>,
) {
let secs_elapsed = sync_start.elapsed().as_secs_f64();
info!("Processed a sync task in {secs_elapsed:.2} seconds");
debug!("Processed a sync task in {secs_elapsed:.2} seconds");
let tenant_id = sync_id.tenant_id.to_string();
let timeline_id = sync_id.timeline_id.to_string();
match sync_status {
Some(true) => IMAGE_SYNC_TIME.with_label_values(&[sync_name, "success"]),
Some(false) => IMAGE_SYNC_TIME.with_label_values(&[sync_name, "failure"]),
Some(true) => {
IMAGE_SYNC_TIME.with_label_values(&[&tenant_id, &timeline_id, sync_name, "success"])
}
Some(false) => {
IMAGE_SYNC_TIME.with_label_values(&[&tenant_id, &timeline_id, sync_name, "failure"])
}
None => return,
}
.observe(secs_elapsed)

View File

@@ -4,6 +4,7 @@ use std::{fmt::Debug, path::PathBuf};
use anyhow::Context;
use futures::stream::{FuturesUnordered, StreamExt};
use lazy_static::lazy_static;
use remote_storage::RemoteStorage;
use tokio::fs;
use tracing::{debug, error, info, warn};
@@ -17,6 +18,16 @@ use super::{
use crate::{
config::PageServerConf, layered_repository::metadata::metadata_path, storage_sync::SyncTask,
};
use metrics::{register_int_counter_vec, IntCounterVec};
lazy_static! {
static ref NO_LAYERS_UPLOAD: IntCounterVec = register_int_counter_vec!(
"pageserver_remote_storage_no_layers_uploads_total",
"Number of skipped uploads due to no layers",
&["tenant_id", "timeline_id"],
)
.expect("failed to register pageserver no layers upload vec");
}
/// Serializes and uploads the given index part data to the remote storage.
pub(super) async fn upload_index_part<P, S>(
@@ -102,7 +113,13 @@ where
.collect::<Vec<_>>();
if layers_to_upload.is_empty() {
info!("No layers to upload after filtering, aborting");
debug!("No layers to upload after filtering, aborting");
NO_LAYERS_UPLOAD
.with_label_values(&[
&sync_id.tenant_id.to_string(),
&sync_id.timeline_id.to_string(),
])
.inc();
return UploadedTimeline::Successful(upload_data);
}

View File

@@ -37,7 +37,7 @@ pub mod defaults {
pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1_000_000;
pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10_000;
}
/// Per-tenant configuration options

View File

@@ -108,15 +108,21 @@ pub enum ThreadKind {
StorageSync,
}
struct MutableThreadState {
/// Tenant and timeline that this thread is associated with.
tenant_id: Option<ZTenantId>,
timeline_id: Option<ZTimelineId>,
/// Handle for waiting for the thread to exit. It can be None, if the
/// the thread has already exited.
join_handle: Option<JoinHandle<()>>,
}
struct PageServerThread {
_thread_id: u64,
kind: ThreadKind,
/// Tenant and timeline that this thread is associated with.
tenant_id: Option<ZTenantId>,
timeline_id: Option<ZTimelineId>,
name: String,
// To request thread shutdown, set the flag, and send a dummy message to the
@@ -124,9 +130,7 @@ struct PageServerThread {
shutdown_requested: AtomicBool,
shutdown_tx: watch::Sender<()>,
/// Handle for waiting for the thread to exit. It can be None, if the
/// the thread has already exited.
join_handle: Mutex<Option<JoinHandle<()>>>,
mutable: Mutex<MutableThreadState>,
}
/// Launch a new thread
@@ -145,29 +149,27 @@ where
{
let (shutdown_tx, shutdown_rx) = watch::channel(());
let thread_id = NEXT_THREAD_ID.fetch_add(1, Ordering::Relaxed);
let thread = PageServerThread {
let thread = Arc::new(PageServerThread {
_thread_id: thread_id,
kind,
tenant_id,
timeline_id,
name: name.to_string(),
shutdown_requested: AtomicBool::new(false),
shutdown_tx,
join_handle: Mutex::new(None),
};
let thread_rc = Arc::new(thread);
let mut jh_guard = thread_rc.join_handle.lock().unwrap();
mutable: Mutex::new(MutableThreadState {
tenant_id,
timeline_id,
join_handle: None,
}),
});
THREADS
.lock()
.unwrap()
.insert(thread_id, Arc::clone(&thread_rc));
.insert(thread_id, Arc::clone(&thread));
let thread_rc2 = Arc::clone(&thread_rc);
let mut thread_mut = thread.mutable.lock().unwrap();
let thread_cloned = Arc::clone(&thread);
let thread_name = name.to_string();
let join_handle = match thread::Builder::new()
.name(name.to_string())
@@ -175,7 +177,7 @@ where
thread_wrapper(
thread_name,
thread_id,
thread_rc2,
thread_cloned,
shutdown_rx,
shutdown_process_on_error,
f,
@@ -189,8 +191,8 @@ where
return Err(err);
}
};
*jh_guard = Some(join_handle);
drop(jh_guard);
thread_mut.join_handle = Some(join_handle);
drop(thread_mut);
// The thread is now running. Nothing more to do here
Ok(thread_id)
@@ -229,19 +231,20 @@ fn thread_wrapper<F>(
.remove(&thread_id)
.expect("no thread in registry");
let thread_mut = thread.mutable.lock().unwrap();
match result {
Ok(Ok(())) => debug!("Thread '{}' exited normally", thread_name),
Ok(Err(err)) => {
if shutdown_process_on_error {
error!(
"Shutting down: thread '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
thread_name, thread.tenant_id, thread.timeline_id, err
thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
);
shutdown_pageserver(1);
} else {
error!(
"Thread '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
thread_name, thread.tenant_id, thread.timeline_id, err
thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
);
}
}
@@ -249,19 +252,29 @@ fn thread_wrapper<F>(
if shutdown_process_on_error {
error!(
"Shutting down: thread '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
thread_name, thread.tenant_id, thread.timeline_id, err
thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
);
shutdown_pageserver(1);
} else {
error!(
"Thread '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
thread_name, thread.tenant_id, thread.timeline_id, err
thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
);
}
}
}
}
// expected to be called from the thread of the given id.
pub fn associate_with(tenant_id: Option<ZTenantId>, timeline_id: Option<ZTimelineId>) {
CURRENT_THREAD.with(|ct| {
let borrowed = ct.borrow();
let mut thread_mut = borrowed.as_ref().unwrap().mutable.lock().unwrap();
thread_mut.tenant_id = tenant_id;
thread_mut.timeline_id = timeline_id;
});
}
/// Is there a thread running that matches the criteria
/// Signal and wait for threads to shut down.
@@ -285,9 +298,10 @@ pub fn shutdown_threads(
let threads = THREADS.lock().unwrap();
for thread in threads.values() {
let thread_mut = thread.mutable.lock().unwrap();
if (kind.is_none() || Some(thread.kind) == kind)
&& (tenant_id.is_none() || thread.tenant_id == tenant_id)
&& (timeline_id.is_none() || thread.timeline_id == timeline_id)
&& (tenant_id.is_none() || thread_mut.tenant_id == tenant_id)
&& (timeline_id.is_none() || thread_mut.timeline_id == timeline_id)
{
thread.shutdown_requested.store(true, Ordering::Relaxed);
// FIXME: handle error?
@@ -298,8 +312,10 @@ pub fn shutdown_threads(
drop(threads);
for thread in victim_threads {
let mut thread_mut = thread.mutable.lock().unwrap();
info!("waiting for {} to shut down", thread.name);
if let Some(join_handle) = thread.join_handle.lock().unwrap().take() {
if let Some(join_handle) = thread_mut.join_handle.take() {
drop(thread_mut);
let _ = join_handle.join();
} else {
// The thread had not even fully started yet. Or it was shut down

View File

@@ -50,7 +50,10 @@ use crate::thread_mgr::ThreadKind;
use crate::{thread_mgr, DatadirTimelineImpl};
use anyhow::{ensure, Context};
use chrono::{NaiveDateTime, Utc};
use etcd_broker::{Client, SkTimelineInfo, SkTimelineSubscription, SkTimelineSubscriptionKind};
use etcd_broker::{
subscription_key::SubscriptionKey, subscription_value::SkTimelineInfo, BrokerSubscription,
Client,
};
use itertools::Itertools;
use once_cell::sync::Lazy;
use std::cell::Cell;
@@ -68,7 +71,7 @@ use tokio::{
use tracing::*;
use url::Url;
use utils::lsn::Lsn;
use utils::pq_proto::ZenithFeedback;
use utils::pq_proto::ReplicationFeedback;
use utils::zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId};
use self::connection_handler::{WalConnectionEvent, WalReceiverConnection};
@@ -403,7 +406,7 @@ async fn timeline_wal_broker_loop_step(
// Endlessly try to subscribe for broker updates for a given timeline.
// If there are no safekeepers to maintain the lease, the timeline subscription will be inavailable in the broker and the operation will fail constantly.
// This is ok, pageservers should anyway try subscribing (with some backoff) since it's the only way they can get the timeline WAL anyway.
let mut broker_subscription: SkTimelineSubscription;
let mut broker_subscription: BrokerSubscription<SkTimelineInfo>;
let mut attempt = 0;
loop {
select! {
@@ -420,9 +423,9 @@ async fn timeline_wal_broker_loop_step(
info!("Broker subscription loop cancelled, shutting down");
return Ok(ControlFlow::Break(()));
},
new_subscription = etcd_broker::subscribe_to_safekeeper_timeline_updates(
new_subscription = etcd_broker::subscribe_for_json_values(
etcd_client,
SkTimelineSubscriptionKind::timeline(broker_prefix.to_owned(), id),
SubscriptionKey::sk_timeline_info(broker_prefix.to_owned(), id),
)
.instrument(info_span!("etcd_subscription")) => match new_subscription {
Ok(new_subscription) => {
@@ -468,14 +471,21 @@ async fn timeline_wal_broker_loop_step(
// finally, if no other tasks are completed, get another broker update and possibly reconnect
updates = broker_subscription.fetch_data() => match updates {
Some(mut all_timeline_updates) => {
if let Some(subscribed_timeline_updates) = all_timeline_updates.remove(&id) {
match wal_connection_manager.select_connection_candidate(subscribed_timeline_updates) {
Some(candidate) => {
info!("Switching to different safekeeper {} for timeline {id}, reason: {:?}", candidate.safekeeper_id, candidate.reason);
wal_connection_manager.change_connection(candidate.safekeeper_id, candidate.wal_producer_connstr).await;
},
None => {}
match all_timeline_updates.remove(&id) {
Some(subscribed_timeline_updates) => {
match wal_connection_manager.select_connection_candidate(subscribed_timeline_updates) {
Some(candidate) => {
info!("Switching to different safekeeper {} for timeline {id}, reason: {:?}", candidate.safekeeper_id, candidate.reason);
wal_connection_manager.change_connection(candidate.safekeeper_id, candidate.wal_producer_connstr).await;
},
None => debug!("No connection candidate was selected for timeline"),
}
}
// XXX: If we subscribe for a certain timeline, we expect only its data to come.
// But somebody could propagate a new etcd key, that has the same prefix as the subscribed one, then we'll get odd data.
// This is an error, we don't want to have overlapping prefixes for timelines, but we can complain and thow those away instead of panicking,
// since the next poll might bring the correct data.
None => error!("Timeline has an active broker subscription, but got no updates. Other data length: {}", all_timeline_updates.len()),
}
},
None => {
@@ -511,7 +521,7 @@ struct WalConnectionData {
safekeeper_id: NodeId,
connection: WalReceiverConnection,
connection_init_time: NaiveDateTime,
last_wal_receiver_data: Option<(ZenithFeedback, NaiveDateTime)>,
last_wal_receiver_data: Option<(ReplicationFeedback, NaiveDateTime)>,
}
#[derive(Debug, PartialEq, Eq)]
@@ -622,18 +632,28 @@ impl WalConnectionManager {
/// Checks current state against every fetched safekeeper state of a given timeline.
/// Returns a new candidate, if the current state is somewhat lagging, or `None` otherwise.
/// The current rules for approving new candidates:
/// * pick the safekeeper with biggest `commit_lsn` that's after than pageserver's latest Lsn for the timeline
/// * if the leader is a different SK and either
/// * no WAL updates happened after certain time (either none since the connection time or none since the last event after the connection) — reconnect
/// * same time amount had passed since the connection, WAL updates happened recently, but the new leader SK has timeline Lsn way ahead of the old one — reconnect
/// * pick from the input data from etcd for currently connected safekeeper (if any)
/// * out of the rest input entries, pick one with biggest `commit_lsn` that's after than pageserver's latest Lsn for the timeline
/// * if there's no such entry, no new candidate found, abort
/// * otherwise, check if etcd updates contain currently connected safekeeper
/// * if not, that means no WAL updates happened after certain time (either none since the connection time or none since the last event after the connection)
/// Reconnect if the time exceeds the threshold.
/// * if there's one, compare its Lsn with the other candidate's, reconnect if candidate's over threshold
///
/// This way we ensure to keep up with the most up-to-date safekeeper and don't try to jump from one safekeeper to another too frequently.
/// Both thresholds are configured per tenant.
fn select_connection_candidate(
&self,
safekeeper_timelines: HashMap<NodeId, SkTimelineInfo>,
mut safekeeper_timelines: HashMap<NodeId, SkTimelineInfo>,
) -> Option<NewWalConnectionCandidate> {
let (&new_sk_id, new_sk_timeline, new_wal_producer_connstr) = safekeeper_timelines
let current_sk_data_updated =
self.wal_connection_data
.as_ref()
.and_then(|connection_data| {
safekeeper_timelines.remove(&connection_data.safekeeper_id)
});
let candidate_sk_data = safekeeper_timelines
.iter()
.filter(|(_, info)| {
info.commit_lsn > Some(self.timeline.tline.get_last_record_lsn())
@@ -642,7 +662,6 @@ impl WalConnectionManager {
match wal_stream_connection_string(
self.id,
info.safekeeper_connstr.as_deref()?,
info.pageserver_connstr.as_deref()?,
) {
Ok(connstr) => Some((sk_id, info, connstr)),
Err(e) => {
@@ -651,68 +670,78 @@ impl WalConnectionManager {
}
}
})
.max_by_key(|(_, info, _)| info.commit_lsn)?;
.max_by_key(|(_, info, _)| info.commit_lsn);
match self.wal_connection_data.as_ref() {
None => Some(NewWalConnectionCandidate {
safekeeper_id: new_sk_id,
wal_producer_connstr: new_wal_producer_connstr,
reason: ReconnectReason::NoExistingConnection,
}),
Some(current_connection) => {
if current_connection.safekeeper_id == new_sk_id {
None
} else {
self.reason_to_reconnect(current_connection, new_sk_timeline)
.map(|reason| NewWalConnectionCandidate {
safekeeper_id: new_sk_id,
wal_producer_connstr: new_wal_producer_connstr,
reason,
})
match (current_sk_data_updated, candidate_sk_data) {
// No better candidate than one we're already connected to:
// whatever data update comes for the connected one, we don't have a better candidate
(_, None) => None,
// No updates from the old SK in this batch, but some candidate is available:
// check how long time ago did we receive updates from the current SK, switch connections in case it's over the threshold
(None, Some((&new_sk_id, _, new_wal_producer_connstr))) => {
match self.wal_connection_data.as_ref() {
Some(current_connection) => {
let last_sk_interaction_time =
match current_connection.last_wal_receiver_data.as_ref() {
Some((_, data_submission_time)) => *data_submission_time,
None => current_connection.connection_init_time,
};
let now = Utc::now().naive_utc();
match (now - last_sk_interaction_time).to_std() {
Ok(last_interaction) => {
if last_interaction > self.lagging_wal_timeout {
return Some(NewWalConnectionCandidate {
safekeeper_id: new_sk_id,
wal_producer_connstr: new_wal_producer_connstr,
reason: ReconnectReason::NoWalTimeout {
last_wal_interaction: last_sk_interaction_time,
check_time: now,
threshold: self.lagging_wal_timeout,
},
});
}
}
Err(_e) => {
warn!("Last interaction with safekeeper {} happened in the future, ignoring the candidate. Interaction time: {last_sk_interaction_time}, now: {now}", current_connection.safekeeper_id);
}
}
None
}
None => Some(NewWalConnectionCandidate {
safekeeper_id: new_sk_id,
wal_producer_connstr: new_wal_producer_connstr,
reason: ReconnectReason::NoExistingConnection,
}),
}
}
}
}
fn reason_to_reconnect(
&self,
current_connection: &WalConnectionData,
new_sk_timeline: &SkTimelineInfo,
) -> Option<ReconnectReason> {
let last_sk_interaction_time = match current_connection.last_wal_receiver_data.as_ref() {
Some((last_wal_receiver_data, data_submission_time)) => {
let new_lsn = new_sk_timeline.commit_lsn?;
match new_lsn.0.checked_sub(last_wal_receiver_data.ps_writelsn)
// Both current SK got updated via etcd and there's another candidate with suitable Lsn:
// check how bigger the new SK Lsn is in the future compared to the current SK, switch connections in case it's over the threshold
(
Some(current_sk_timeline),
Some((&new_sk_id, new_sk_timeline, new_wal_producer_connstr)),
) => {
let new_lsn = new_sk_timeline.commit_lsn.unwrap_or(Lsn(0));
let current_lsn = current_sk_timeline.commit_lsn.unwrap_or(Lsn(0));
match new_lsn.0.checked_sub(current_lsn.0)
{
Some(sk_lsn_advantage) => {
if sk_lsn_advantage >= self.max_lsn_wal_lag.get() {
return Some(ReconnectReason::LaggingWal { current_lsn: Lsn(last_wal_receiver_data.ps_writelsn), new_lsn, threshold: self.max_lsn_wal_lag });
Some(new_sk_lsn_advantage) => {
if new_sk_lsn_advantage >= self.max_lsn_wal_lag.get() {
return Some(
NewWalConnectionCandidate {
safekeeper_id: new_sk_id,
wal_producer_connstr: new_wal_producer_connstr,
reason: ReconnectReason::LaggingWal { current_lsn, new_lsn, threshold: self.max_lsn_wal_lag },
});
}
}
None => debug!("Best SK candidate has its commit Lsn behind the current timeline's latest consistent Lsn"),
}
*data_submission_time
}
None => current_connection.connection_init_time,
};
let now = Utc::now().naive_utc();
match (now - last_sk_interaction_time).to_std() {
Ok(last_interaction) => {
if last_interaction > self.lagging_wal_timeout {
return Some(ReconnectReason::NoWalTimeout {
last_wal_interaction: last_sk_interaction_time,
check_time: now,
threshold: self.lagging_wal_timeout,
});
}
}
Err(_e) => {
warn!("Last interaction with safekeeper {} happened in the future, ignoring the candidate. Interaction time: {last_sk_interaction_time}, now: {now}",
current_connection.safekeeper_id);
None
}
}
None
}
}
@@ -722,7 +751,6 @@ fn wal_stream_connection_string(
timeline_id,
}: ZTenantTimelineId,
listen_pg_addr_str: &str,
pageserver_connstr: &str,
) -> anyhow::Result<String> {
let sk_connstr = format!("postgresql://no_user@{listen_pg_addr_str}/no_db");
let me_conf = sk_connstr
@@ -732,7 +760,7 @@ fn wal_stream_connection_string(
})?;
let (host, port) = utils::connstring::connection_host_port(&me_conf);
Ok(format!(
"host={host} port={port} options='-c ztimelineid={timeline_id} ztenantid={tenant_id} pageserver_connstr={pageserver_connstr}'",
"host={host} port={port} options='-c ztimelineid={timeline_id} ztenantid={tenant_id}'"
))
}
@@ -765,20 +793,6 @@ mod tests {
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: None,
pageserver_connstr: Some("no safekeeper_connstr".to_string()),
},
),
(
NodeId(1),
SkTimelineInfo {
last_log_term: None,
flush_lsn: None,
commit_lsn: Some(Lsn(1)),
backup_lsn: None,
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some("no pageserver_connstr".to_string()),
pageserver_connstr: None,
},
),
(
@@ -791,7 +805,6 @@ mod tests {
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some("no commit_lsn".to_string()),
pageserver_connstr: Some("no commit_lsn (p)".to_string()),
},
),
(
@@ -804,7 +817,6 @@ mod tests {
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some("no commit_lsn".to_string()),
pageserver_connstr: Some("no commit_lsn (p)".to_string()),
},
),
]));
@@ -834,7 +846,7 @@ mod tests {
.await;
let now = Utc::now().naive_utc();
dummy_connection_data.last_wal_receiver_data = Some((
ZenithFeedback {
ReplicationFeedback {
current_timeline_size: 1,
ps_writelsn: 1,
ps_applylsn: current_lsn,
@@ -860,7 +872,6 @@ mod tests {
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
},
),
(
@@ -873,7 +884,6 @@ mod tests {
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some("not advanced Lsn".to_string()),
pageserver_connstr: Some("not advanced Lsn (p)".to_string()),
},
),
(
@@ -888,7 +898,6 @@ mod tests {
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some("not enough advanced Lsn".to_string()),
pageserver_connstr: Some("not enough advanced Lsn (p)".to_string()),
},
),
]));
@@ -920,7 +929,6 @@ mod tests {
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
},
)]))
.expect("Expected one candidate selected out of the only data option, but got none");
@@ -933,9 +941,6 @@ mod tests {
assert!(only_candidate
.wal_producer_connstr
.contains(DUMMY_SAFEKEEPER_CONNSTR));
assert!(only_candidate
.wal_producer_connstr
.contains(DUMMY_PAGESERVER_CONNSTR));
let selected_lsn = 100_000;
let biggest_wal_candidate = data_manager_with_no_connection
@@ -950,7 +955,6 @@ mod tests {
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some("smaller commit_lsn".to_string()),
pageserver_connstr: Some("smaller commit_lsn (p)".to_string()),
},
),
(
@@ -963,7 +967,6 @@ mod tests {
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
},
),
(
@@ -976,9 +979,6 @@ mod tests {
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: None,
pageserver_connstr: Some(
"no safekeeper_connstr despite bigger commit_lsn".to_string(),
),
},
),
]))
@@ -995,9 +995,6 @@ mod tests {
assert!(biggest_wal_candidate
.wal_producer_connstr
.contains(DUMMY_SAFEKEEPER_CONNSTR));
assert!(biggest_wal_candidate
.wal_producer_connstr
.contains(DUMMY_PAGESERVER_CONNSTR));
Ok(())
}
@@ -1014,13 +1011,13 @@ mod tests {
let mut data_manager_with_connection = dummy_wal_connection_manager(&harness);
let connected_sk_id = NodeId(0);
let mut dummy_connection_data = dummy_connection_data(id, NodeId(0)).await;
let mut dummy_connection_data = dummy_connection_data(id, connected_sk_id).await;
let lagging_wal_timeout =
chrono::Duration::from_std(data_manager_with_connection.lagging_wal_timeout)?;
let time_over_threshold =
Utc::now().naive_utc() - lagging_wal_timeout - lagging_wal_timeout;
dummy_connection_data.last_wal_receiver_data = Some((
ZenithFeedback {
ReplicationFeedback {
current_timeline_size: 1,
ps_writelsn: current_lsn.0,
ps_applylsn: 1,
@@ -1044,7 +1041,6 @@ mod tests {
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
},
),
(
@@ -1057,7 +1053,6 @@ mod tests {
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some("advanced by Lsn safekeeper".to_string()),
pageserver_connstr: Some("advanced by Lsn safekeeper (p)".to_string()),
},
),
]);
@@ -1081,16 +1076,13 @@ mod tests {
assert!(over_threshcurrent_candidate
.wal_producer_connstr
.contains("advanced by Lsn safekeeper"));
assert!(over_threshcurrent_candidate
.wal_producer_connstr
.contains("advanced by Lsn safekeeper (p)"));
Ok(())
}
#[tokio::test]
async fn timeout_wal_over_threshcurrent_candidate() -> anyhow::Result<()> {
let harness = RepoHarness::create("timeout_wal_over_threshcurrent_candidate")?;
async fn timeout_wal_over_threshhold_current_candidate() -> anyhow::Result<()> {
let harness = RepoHarness::create("timeout_wal_over_threshhold_current_candidate")?;
let current_lsn = Lsn(100_000).align();
let id = ZTenantTimelineId {
@@ -1108,36 +1100,19 @@ mod tests {
dummy_connection_data.connection_init_time = time_over_threshold;
data_manager_with_connection.wal_connection_data = Some(dummy_connection_data);
let new_lsn = Lsn(current_lsn.0 + data_manager_with_connection.max_lsn_wal_lag.get() + 1);
let over_threshcurrent_candidate = data_manager_with_connection
.select_connection_candidate(HashMap::from([
(
NodeId(0),
SkTimelineInfo {
last_log_term: None,
flush_lsn: None,
commit_lsn: Some(new_lsn),
backup_lsn: None,
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
},
),
(
NodeId(1),
SkTimelineInfo {
last_log_term: None,
flush_lsn: None,
commit_lsn: Some(current_lsn),
backup_lsn: None,
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some("not advanced by Lsn safekeeper".to_string()),
pageserver_connstr: Some("not advanced by Lsn safekeeper".to_string()),
},
),
]))
.select_connection_candidate(HashMap::from([(
NodeId(0),
SkTimelineInfo {
last_log_term: None,
flush_lsn: None,
commit_lsn: Some(current_lsn),
backup_lsn: None,
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
},
)]))
.expect(
"Expected one candidate selected out of multiple valid data options, but got none",
);
@@ -1157,9 +1132,6 @@ mod tests {
assert!(over_threshcurrent_candidate
.wal_producer_connstr
.contains(DUMMY_SAFEKEEPER_CONNSTR));
assert!(over_threshcurrent_candidate
.wal_producer_connstr
.contains(DUMMY_PAGESERVER_CONNSTR));
Ok(())
}
@@ -1186,7 +1158,6 @@ mod tests {
}
const DUMMY_SAFEKEEPER_CONNSTR: &str = "safekeeper_connstr";
const DUMMY_PAGESERVER_CONNSTR: &str = "pageserver_connstr";
// the function itself does not need async, but it spawns a tokio::task underneath hence neeed
// a runtime to not to panic
@@ -1194,9 +1165,8 @@ mod tests {
id: ZTenantTimelineId,
safekeeper_id: NodeId,
) -> WalConnectionData {
let dummy_connstr =
wal_stream_connection_string(id, DUMMY_SAFEKEEPER_CONNSTR, DUMMY_PAGESERVER_CONNSTR)
.expect("Failed to construct dummy wal producer connstr");
let dummy_connstr = wal_stream_connection_string(id, DUMMY_SAFEKEEPER_CONNSTR)
.expect("Failed to construct dummy wal producer connstr");
WalConnectionData {
safekeeper_id,
connection: WalReceiverConnection::open(

View File

@@ -19,7 +19,7 @@ use tokio_stream::StreamExt;
use tracing::{debug, error, info, info_span, trace, warn, Instrument};
use utils::{
lsn::Lsn,
pq_proto::ZenithFeedback,
pq_proto::ReplicationFeedback,
zid::{NodeId, ZTenantTimelineId},
};
@@ -33,7 +33,7 @@ use crate::{
#[derive(Debug, Clone)]
pub enum WalConnectionEvent {
Started,
NewWal(ZenithFeedback),
NewWal(ReplicationFeedback),
End(Result<(), String>),
}
@@ -328,7 +328,7 @@ async fn handle_walreceiver_connection(
// Send zenith feedback message.
// Regular standby_status_update fields are put into this message.
let zenith_status_update = ZenithFeedback {
let zenith_status_update = ReplicationFeedback {
current_timeline_size: timeline.get_current_logical_size() as u64,
ps_writelsn: write_lsn,
ps_flushlsn: flush_lsn,

View File

@@ -27,9 +27,10 @@ pub struct ClientCredentials {
// Other Auth backends don't need it.
pub sni_data: Option<String>,
// cluster_option is passed as argument from options from url.
// To be used to determine cluster name in case sni_data is missing.
pub project_option: Option<String>,
// project_name is passed as argument from options from url.
// In case sni_data is missing: project_name is used to determine cluster name.
// In case sni_data is available: project_name and sni_data should match (otherwise throws an error).
pub project_name: Option<String>,
}
impl ClientCredentials {
@@ -41,29 +42,42 @@ impl ClientCredentials {
#[derive(Debug, Error)]
pub enum ProjectNameError {
#[error("SNI info is missing. EITHER please upgrade the postgres client library OR pass the project name as a parameter: '..&options=project:<project name>..'.")]
#[error("SNI is missing. EITHER please upgrade the postgres client library OR pass the project name as a parameter: '...&options=project%3D<project-name>...'.")]
Missing,
#[error("SNI is malformed.")]
Bad,
#[error("Inconsistent project name inferred from SNI and project option. String from SNI: '{0}', String from project option: '{1}'")]
Inconsistent(String, String),
}
impl UserFacingError for ProjectNameError {}
impl ClientCredentials {
/// Determine project name from SNI.
/// Determine project name from SNI or from project_name parameter from options argument.
pub fn project_name(&self) -> Result<&str, ProjectNameError> {
let ret = match &self.sni_data {
//if sni_data exists, use it to determine project name
Some(sni_data) => {
sni_data
.split_once('.')
.ok_or(ProjectNameError::Bad)?
.0
// Checking that if both sni_data and project_name are set, then they should match
// otherwise, throws a ProjectNameError::Inconsistent error.
if let Some(sni_data) = &self.sni_data {
let project_name_from_sni_data =
sni_data.split_once('.').ok_or(ProjectNameError::Bad)?.0;
if let Some(project_name_from_options) = &self.project_name {
if !project_name_from_options.eq(project_name_from_sni_data) {
return Err(ProjectNameError::Inconsistent(
project_name_from_sni_data.to_string(),
project_name_from_options.to_string(),
));
}
}
//otherwise use project_option if it was manually set thought ..&options=project:<name> parameter
}
// determine the project name from self.sni_data if it exists, otherwise from self.project_name.
let ret = match &self.sni_data {
// if sni_data exists, use it to determine project name
Some(sni_data) => sni_data.split_once('.').ok_or(ProjectNameError::Bad)?.0,
// otherwise use project_option if it was manually set thought options parameter.
None => self
.project_option
.project_name
.as_ref()
.ok_or(ProjectNameError::Missing)?
.as_str(),
@@ -84,17 +98,13 @@ impl TryFrom<HashMap<String, String>> for ClientCredentials {
let user = get_param("user")?;
let dbname = get_param("database")?;
let project = get_param("project");
let project_option = match project {
Ok(project) => Some(project),
Err(_) => None,
};
let project_name = get_param("project").ok();
Ok(Self {
user,
dbname,
sni_data: None,
project_option,
project_name,
})
}
}

View File

@@ -1,251 +0,0 @@
//! Declaration of Cloud API V2.
use crate::{
auth::{self, AuthFlow},
compute, scram,
};
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::auth::ClientCredentials;
use crate::stream::PqStream;
use tokio::io::{AsyncRead, AsyncWrite};
use utils::pq_proto::{BeMessage as Be, BeParameterStatusMessage};
#[derive(Debug, Error)]
pub enum ConsoleAuthError {
// We shouldn't include the actual secret here.
#[error("Bad authentication secret")]
BadSecret,
#[error("Bad client credentials: {0:?}")]
BadCredentials(crate::auth::ClientCredentials),
#[error("SNI info is missing. EITHER please upgrade the postgres client library OR pass ..&options=cluster:<project name>.. parameter")]
SniMissingAndProjectNameMissing,
#[error("Unexpected SNI content")]
SniWrong,
#[error(transparent)]
BadUrl(#[from] url::ParseError),
#[error(transparent)]
Io(#[from] std::io::Error),
/// HTTP status (other than 200) returned by the console.
#[error("Console responded with an HTTP status: {0}")]
HttpStatus(reqwest::StatusCode),
#[error(transparent)]
Transport(#[from] reqwest::Error),
#[error("Console responded with a malformed JSON: '{0}'")]
MalformedResponse(#[from] serde_json::Error),
#[error("Console responded with a malformed compute address: '{0}'")]
MalformedComputeAddress(String),
}
#[derive(Serialize, Deserialize, Debug)]
struct GetRoleSecretResponse {
role_secret: String,
}
#[derive(Serialize, Deserialize, Debug)]
struct GetWakeComputeResponse {
address: String,
}
/// Auth secret which is managed by the cloud.
pub enum AuthInfo {
/// Md5 hash of user's password.
Md5([u8; 16]),
/// [SCRAM](crate::scram) authentication info.
Scram(scram::ServerSecret),
}
/// Compute node connection params provided by the cloud.
/// Note how it implements serde traits, since we receive it over the wire.
#[derive(Serialize, Deserialize, Default)]
pub struct DatabaseInfo {
pub host: String,
pub port: u16,
pub dbname: String,
pub user: String,
/// [Cloud API V1](super::legacy) returns cleartext password,
/// but [Cloud API V2](super::api) implements [SCRAM](crate::scram)
/// authentication, so we can leverage this method and cope without password.
pub password: Option<String>,
}
// Manually implement debug to omit personal and sensitive info.
impl std::fmt::Debug for DatabaseInfo {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
fmt.debug_struct("DatabaseInfo")
.field("host", &self.host)
.field("port", &self.port)
.finish()
}
}
impl From<DatabaseInfo> for tokio_postgres::Config {
fn from(db_info: DatabaseInfo) -> Self {
let mut config = tokio_postgres::Config::new();
config
.host(&db_info.host)
.port(db_info.port)
.dbname(&db_info.dbname)
.user(&db_info.user);
if let Some(password) = db_info.password {
config.password(password);
}
config
}
}
async fn get_auth_info(
auth_endpoint: &str,
user: &str,
cluster: &str,
) -> Result<AuthInfo, ConsoleAuthError> {
let mut url = reqwest::Url::parse(&format!("{auth_endpoint}/proxy_get_role_secret"))?;
url.query_pairs_mut()
.append_pair("project", cluster)
.append_pair("role", user);
// TODO: use a proper logger
println!("cplane request: {}", url);
let resp = reqwest::get(url).await?;
if !resp.status().is_success() {
return Err(ConsoleAuthError::HttpStatus(resp.status()));
}
let response: GetRoleSecretResponse = serde_json::from_str(resp.text().await?.as_str())?;
scram::ServerSecret::parse(response.role_secret.as_str())
.map(AuthInfo::Scram)
.ok_or(ConsoleAuthError::BadSecret)
}
/// Wake up the compute node and return the corresponding connection info.
async fn wake_compute(
auth_endpoint: &str,
cluster: &str,
) -> Result<(String, u16), ConsoleAuthError> {
let mut url = reqwest::Url::parse(&format!("{auth_endpoint}/proxy_wake_compute"))?;
url.query_pairs_mut().append_pair("project", cluster);
// TODO: use a proper logger
println!("cplane request: {}", url);
let resp = reqwest::get(url).await?;
if !resp.status().is_success() {
return Err(ConsoleAuthError::HttpStatus(resp.status()));
}
let response: GetWakeComputeResponse = serde_json::from_str(resp.text().await?.as_str())?;
let (host, port) = response
.address
.split_once(':')
.ok_or_else(|| ConsoleAuthError::MalformedComputeAddress(response.address.clone()))?;
let port: u16 = port
.parse()
.map_err(|_| ConsoleAuthError::MalformedComputeAddress(response.address.clone()))?;
Ok((host.to_string(), port))
}
pub async fn handle_user(
auth_endpoint: &str,
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
creds: &ClientCredentials,
) -> Result<compute::NodeInfo, crate::auth::AuthError> {
// Determine cluster name from SNI (creds.sni_data) or from creds.cluster_option.
let cluster = match &creds.sni_data {
//if sni_data exists, use it
Some(sni_data) => {
sni_data
.split_once('.')
.ok_or(ConsoleAuthError::SniWrong)?
.0
}
//otherwise use cluster_option if it was manually set thought ..&options=cluster:<name> parameter
None => creds
.cluster_option
.as_ref()
.ok_or(ConsoleAuthError::SniMissingAndProjectNameMissing)?
.as_str(),
};
let user = creds.user.as_str();
// Step 1: get the auth secret
let auth_info = get_auth_info(auth_endpoint, user, cluster).await?;
let flow = AuthFlow::new(client);
let scram_keys = match auth_info {
AuthInfo::Md5(_) => {
// TODO: decide if we should support MD5 in api v2
return Err(crate::auth::AuthErrorImpl::auth_failed("MD5 is not supported").into());
}
AuthInfo::Scram(secret) => {
let scram = auth::Scram(&secret);
Some(compute::ScramKeys {
client_key: flow.begin(scram).await?.authenticate().await?.as_bytes(),
server_key: secret.server_key.as_bytes(),
})
}
};
client
.write_message_noflush(&Be::AuthenticationOk)?
.write_message_noflush(&BeParameterStatusMessage::encoding())?;
// Step 2: wake compute
let (host, port) = wake_compute(auth_endpoint, cluster).await?;
Ok(compute::NodeInfo {
db_info: DatabaseInfo {
host,
port,
dbname: creds.dbname.clone(),
user: creds.user.clone(),
password: None,
},
scram_keys,
})
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn parse_db_info() -> anyhow::Result<()> {
let _: DatabaseInfo = serde_json::from_value(json!({
"host": "localhost",
"port": 5432,
"dbname": "postgres",
"user": "john_doe",
"password": "password",
}))?;
let _: DatabaseInfo = serde_json::from_value(json!({
"host": "localhost",
"port": 5432,
"dbname": "postgres",
"user": "john_doe",
}))?;
Ok(())
}
}

View File

@@ -10,6 +10,7 @@ use remote_storage::RemoteStorageConfig;
use std::fs::{self, File};
use std::io::{ErrorKind, Write};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::thread;
use tokio::sync::mpsc;
use toml_edit::Document;
@@ -27,6 +28,7 @@ use safekeeper::timeline::GlobalTimelines;
use safekeeper::wal_backup;
use safekeeper::wal_service;
use safekeeper::SafeKeeperConf;
use utils::auth::JwtAuth;
use utils::{
http::endpoint, logging, project_git_version, shutdown::exit_now, signals, tcp_listener,
zid::NodeId,
@@ -132,6 +134,12 @@ fn main() -> anyhow::Result<()> {
.default_missing_value("true")
.help("Enable/disable WAL backup to s3. When disabled, safekeeper removes WAL ignoring WAL backup horizon."),
)
.arg(
Arg::new("auth-validation-public-key-path")
.long("auth-validation-public-key-path")
.takes_value(true)
.help("Path to an RSA .pem public key which is used to check JWT tokens")
)
.get_matches();
if let Some(addr) = arg_matches.value_of("dump-control-file") {
@@ -204,6 +212,10 @@ fn main() -> anyhow::Result<()> {
.parse()
.context("failed to parse bool enable-s3-offload bool")?;
conf.auth_validation_public_key_path = arg_matches
.value_of("auth-validation-public-key-path")
.map(PathBuf::from);
start_safekeeper(conf, given_id, arg_matches.is_present("init"))
}
@@ -239,6 +251,19 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
e
})?;
let auth = match conf.auth_validation_public_key_path.as_ref() {
None => {
info!("Auth is disabled");
None
}
Some(path) => {
info!("Loading JWT auth key from {}", path.display());
Some(Arc::new(
JwtAuth::from_key_path(path).context("failed to load the auth key")?,
))
}
};
// XXX: Don't spawn any threads before daemonizing!
if conf.daemonize {
info!("daemonizing...");
@@ -280,8 +305,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
thread::Builder::new()
.name("http_endpoint_thread".into())
.spawn(|| {
// TODO authentication
let router = http::make_router(conf_);
let router = http::make_router(conf_, auth);
endpoint::serve_thread_main(
router,
http_listener,
@@ -295,6 +319,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
let safekeeper_thread = thread::Builder::new()
.name("Safekeeper thread".into())
.spawn(|| {
// TODO: add auth
if let Err(e) = wal_service::thread_main(conf_cloned, pg_listener) {
info!("safekeeper thread terminated: {e}");
}
@@ -309,6 +334,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
thread::Builder::new()
.name("broker thread".into())
.spawn(|| {
// TODO: add auth?
broker::thread_main(conf_);
})?,
);
@@ -321,6 +347,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
thread::Builder::new()
.name("WAL removal thread".into())
.spawn(|| {
// TODO: add auth?
remove_wal::thread_main(conf_);
})?,
);
@@ -330,6 +357,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
thread::Builder::new()
.name("wal backup launcher thread".into())
.spawn(move || {
// TODO: add auth?
wal_backup::wal_backup_launcher_thread_main(conf_, wal_backup_launcher_rx);
})?,
);

View File

@@ -4,9 +4,7 @@ use anyhow::anyhow;
use anyhow::Context;
use anyhow::Error;
use anyhow::Result;
use etcd_broker::Client;
use etcd_broker::PutOptions;
use etcd_broker::SkTimelineSubscriptionKind;
use etcd_broker::subscription_value::SkTimelineInfo;
use std::time::Duration;
use tokio::spawn;
use tokio::task::JoinHandle;
@@ -15,6 +13,10 @@ use tracing::*;
use url::Url;
use crate::{timeline::GlobalTimelines, SafeKeeperConf};
use etcd_broker::{
subscription_key::{OperationKind, SkOperationKind, SubscriptionKey},
Client, PutOptions,
};
use utils::zid::{NodeId, ZTenantTimelineId};
const RETRY_INTERVAL_MSEC: u64 = 1000;
@@ -43,7 +45,7 @@ fn timeline_safekeeper_path(
) -> String {
format!(
"{}/{sk_id}",
SkTimelineSubscriptionKind::timeline(broker_etcd_prefix, zttid).watch_key()
SubscriptionKey::sk_timeline_info(broker_etcd_prefix, zttid).watch_key()
)
}
@@ -90,7 +92,7 @@ impl ElectionLeader {
}
}
pub async fn get_leader(req: &Election) -> Result<ElectionLeader> {
pub async fn get_leader(req: &Election, leader: &mut Option<ElectionLeader>) -> Result<()> {
let mut client = Client::connect(req.broker_endpoints.clone(), None)
.await
.context("Could not connect to etcd")?;
@@ -102,22 +104,27 @@ pub async fn get_leader(req: &Election) -> Result<ElectionLeader> {
let lease_id = lease.map(|l| l.id()).unwrap();
let keep_alive = spawn::<_>(lease_keep_alive(client.clone(), lease_id));
// kill previous keepalive, if any
if let Some(l) = leader.take() {
l.give_up().await;
}
if let Err(e) = client
let keep_alive = spawn::<_>(lease_keep_alive(client.clone(), lease_id));
// immediately save handle to kill task if we get canceled below
*leader = Some(ElectionLeader {
client: client.clone(),
keep_alive,
});
client
.campaign(
req.election_name.clone(),
req.candidate_name.clone(),
lease_id,
)
.await
{
keep_alive.abort();
let _ = keep_alive.await;
return Err(e.into());
}
.await?;
Ok(ElectionLeader { client, keep_alive })
Ok(())
}
async fn lease_keep_alive(mut client: Client, lease_id: i64) -> Result<()> {
@@ -143,20 +150,8 @@ async fn lease_keep_alive(mut client: Client, lease_id: i64) -> Result<()> {
}
}
pub fn get_campaign_name(
election_name: String,
broker_prefix: String,
timeline_id: &ZTenantTimelineId,
) -> String {
return format!(
"{}/{}",
SkTimelineSubscriptionKind::timeline(broker_prefix, *timeline_id).watch_key(),
election_name
);
}
pub fn get_candiate_name(system_id: NodeId) -> String {
format!("id_{}", system_id)
format!("id_{system_id}")
}
/// Push once in a while data about all active timelines to the broker.
@@ -208,9 +203,20 @@ async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
async fn pull_loop(conf: SafeKeeperConf) -> Result<()> {
let mut client = Client::connect(&conf.broker_endpoints, None).await?;
let mut subscription = etcd_broker::subscribe_to_safekeeper_timeline_updates(
let mut subscription = etcd_broker::subscribe_for_values(
&mut client,
SkTimelineSubscriptionKind::all(conf.broker_etcd_prefix.clone()),
SubscriptionKey::all(conf.broker_etcd_prefix.clone()),
|full_key, value_str| {
if full_key.operation == OperationKind::Safekeeper(SkOperationKind::TimelineInfo) {
match serde_json::from_str::<SkTimelineInfo>(value_str) {
Ok(new_info) => return Some(new_info),
Err(e) => {
error!("Failed to parse timeline info from value str '{value_str}': {e}")
}
}
}
None
},
)
.await
.context("failed to subscribe for safekeeper info")?;

View File

@@ -1,305 +0,0 @@
//!
//! Callmemaybe module is responsible for periodically requesting
//! pageserver to initiate wal streaming.
//!
//! Other threads can use CallmeEvent messages to subscribe or unsubscribe
//! from the call list.
//!
use crate::SafeKeeperConf;
use anyhow::{Context, Result};
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::sync::Mutex;
use std::time::{Duration, Instant};
use tokio::runtime;
use tokio::sync::mpsc::UnboundedReceiver;
use tokio::task;
use tokio_postgres::NoTls;
use tracing::*;
use utils::{
connstring::connection_host_port,
zid::{ZTenantId, ZTimelineId},
};
async fn request_callback(
pageserver_connstr: String,
listen_pg_addr_str: String,
timelineid: ZTimelineId,
tenantid: ZTenantId,
) -> Result<()> {
info!(
"callmemaybe request_callback Connecting to pageserver {}",
&pageserver_connstr
);
let (client, connection) = tokio_postgres::connect(&pageserver_connstr, NoTls).await?;
tokio::spawn(async move {
if let Err(e) = connection.await {
error!("connection error: {}", e);
}
});
// use Config parsing because SockAddr parsing doesn't allow to use host names instead of ip addresses
let me_connstr = format!("postgresql://no_user@{}/no_db", listen_pg_addr_str);
let me_conf: postgres::config::Config = me_connstr.parse().unwrap();
let (host, port) = connection_host_port(&me_conf);
// pageserver connstr is needed to be able to distinguish between different pageservers
// it is required to correctly manage callmemaybe subscriptions when more than one pageserver is involved
// TODO it is better to use some sort of a unique id instead of connection string, see https://github.com/zenithdb/zenith/issues/1105
let callme = format!(
"callmemaybe {} {} host={} port={} options='-c ztimelineid={} ztenantid={} pageserver_connstr={}'",
tenantid, timelineid, host, port, timelineid, tenantid, pageserver_connstr,
);
let _ = client.simple_query(&callme).await?;
Ok(())
}
pub fn thread_main(conf: SafeKeeperConf, rx: UnboundedReceiver<CallmeEvent>) -> Result<()> {
let runtime = runtime::Builder::new_current_thread()
.enable_all()
.build()
.unwrap();
runtime.block_on(main_loop(conf, rx))
}
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct SubscriptionStateKey {
tenant_id: ZTenantId,
timeline_id: ZTimelineId,
pageserver_connstr: String,
}
impl SubscriptionStateKey {
pub fn new(tenant_id: ZTenantId, timeline_id: ZTimelineId, pageserver_connstr: String) -> Self {
Self {
tenant_id,
timeline_id,
pageserver_connstr,
}
}
}
/// Messages to the callmemaybe thread
#[derive(Debug)]
pub enum CallmeEvent {
// add new subscription to the list
Subscribe(SubscriptionStateKey),
// remove the subscription from the list
Unsubscribe(SubscriptionStateKey),
// don't serve this subscription, but keep it in the list
Pause(SubscriptionStateKey),
// resume this subscription, if it exists,
// but don't create a new one if it is gone
Resume(SubscriptionStateKey),
// TODO how do we delete from subscriptions?
}
#[derive(Debug)]
struct SubscriptionState {
tenantid: ZTenantId,
timelineid: ZTimelineId,
pageserver_connstr: String,
handle: Option<task::JoinHandle<()>>,
last_call_time: Instant,
paused: bool,
}
impl SubscriptionState {
fn new(
tenantid: ZTenantId,
timelineid: ZTimelineId,
pageserver_connstr: String,
) -> SubscriptionState {
SubscriptionState {
tenantid,
timelineid,
pageserver_connstr,
handle: None,
last_call_time: Instant::now(),
paused: false,
}
}
fn pause(&mut self) {
self.paused = true;
self.abort_handle();
}
fn resume(&mut self) {
self.paused = false;
}
// Most likely, the task have already successfully completed
// and abort() won't have any effect.
fn abort_handle(&mut self) {
if let Some(handle) = self.handle.take() {
handle.abort();
let timelineid = self.timelineid;
let tenantid = self.tenantid;
let pageserver_connstr = self.pageserver_connstr.clone();
tokio::spawn(async move {
if let Err(err) = handle.await {
if err.is_cancelled() {
warn!("callback task for timelineid={} tenantid={} was cancelled before spawning a new one",
timelineid, tenantid);
} else {
error!(
"callback task for timelineid={} tenantid={} pageserver_connstr={} failed: {}",
timelineid, tenantid, pageserver_connstr, err
);
}
}
});
}
}
fn call(&mut self, recall_period: Duration, listen_pg_addr: String) {
// Ignore call request if this subscription is paused
if self.paused {
debug!(
"ignore call request for paused subscription \
tenantid: {}, timelineid: {}",
self.tenantid, self.timelineid
);
return;
}
// Check if it too early to recall
if self.handle.is_some() && self.last_call_time.elapsed() < recall_period {
debug!(
"too early to recall. self.last_call_time.elapsed: {:?}, recall_period: {:?} \
tenantid: {}, timelineid: {}",
self.last_call_time, recall_period, self.tenantid, self.timelineid
);
return;
}
// If previous task didn't complete in recall_period, it must be hanging,
// so don't wait for it forever, just abort it and try again.
self.abort_handle();
let timelineid = self.timelineid;
let tenantid = self.tenantid;
let pageserver_connstr = self.pageserver_connstr.clone();
self.handle = Some(tokio::spawn(async move {
request_callback(pageserver_connstr, listen_pg_addr, timelineid, tenantid)
.await
.unwrap_or_else(|e| {
error!(
"callback task for timelineid={} tenantid={} failed: {}",
timelineid, tenantid, e
)
});
}));
// Update last_call_time
self.last_call_time = Instant::now();
info!(
"new call spawned. last call time {:?} tenantid: {}, timelineid: {}",
self.last_call_time, self.tenantid, self.timelineid
);
}
}
impl Drop for SubscriptionState {
fn drop(&mut self) {
self.abort_handle();
}
}
pub async fn main_loop(conf: SafeKeeperConf, mut rx: UnboundedReceiver<CallmeEvent>) -> Result<()> {
let subscriptions: Mutex<HashMap<SubscriptionStateKey, SubscriptionState>> =
Mutex::new(HashMap::new());
let mut ticker = tokio::time::interval(conf.recall_period);
loop {
tokio::select! {
request = rx.recv() =>
{
match request.context("done")?
{
CallmeEvent::Subscribe(key) =>
{
let _enter = info_span!("callmemaybe: subscribe", timelineid = %key.timeline_id, tenantid = %key.tenant_id, pageserver_connstr=%key.pageserver_connstr.clone()).entered();
let mut subscriptions = subscriptions.lock().unwrap();
// XXX this clone is ugly, is there a way to use the trick with Borrow trait with entry API?
// when we switch to node id instead of the connection string key will be Copy and there will be no need to clone
match subscriptions.entry(key.clone()) {
Entry::Occupied(_) => {
// Do nothing if subscription already exists
// If it is paused it means that there is already established replication connection.
// If it is not paused it will be polled with other subscriptions when timeout expires.
// This can occur when replication channel is established before subscription is added.
info!(
"subscription already exists",
);
}
Entry::Vacant(entry) => {
let subscription = entry.insert(SubscriptionState::new(
key.tenant_id,
key.timeline_id,
key.pageserver_connstr,
));
subscription.call(conf.recall_period, conf.listen_pg_addr.clone());
}
}
},
CallmeEvent::Unsubscribe(key) => {
let _enter = debug_span!("callmemaybe: unsubscribe", timelineid = %key.timeline_id, tenantid = %key.tenant_id, pageserver_connstr=%key.pageserver_connstr.clone()).entered();
debug!("unsubscribe");
let mut subscriptions = subscriptions.lock().unwrap();
subscriptions.remove(&key);
},
CallmeEvent::Pause(key) => {
let _enter = debug_span!("callmemaybe: pause", timelineid = %key.timeline_id, tenantid = %key.tenant_id, pageserver_connstr=%key.pageserver_connstr.clone()).entered();
let mut subscriptions = subscriptions.lock().unwrap();
// If pause received when no corresponding subscription exists it means that someone started replication
// without using callmemaybe. So we create subscription and pause it.
// In tenant relocation scenario subscribe call will be executed after pause when compute is restarted.
// In that case there is no need to create new/unpause existing subscription.
match subscriptions.entry(key.clone()) {
Entry::Occupied(mut sub) => {
debug!("pause existing");
sub.get_mut().pause();
}
Entry::Vacant(entry) => {
debug!("create paused");
let subscription = entry.insert(SubscriptionState::new(
key.tenant_id,
key.timeline_id,
key.pageserver_connstr,
));
subscription.pause();
}
}
},
CallmeEvent::Resume(key) => {
debug!(
"callmemaybe. thread_main. resume callback request for timelineid={} tenantid={} pageserver_connstr={}",
key.timeline_id, key.tenant_id, key.pageserver_connstr,
);
let mut subscriptions = subscriptions.lock().unwrap();
if let Some(sub) = subscriptions.get_mut(&key)
{
sub.resume();
};
},
}
},
_ = ticker.tick() => {
let _enter = debug_span!("callmemaybe: tick").entered();
let mut subscriptions = subscriptions.lock().unwrap();
for (_, state) in subscriptions.iter_mut() {
state.call(conf.recall_period, conf.listen_pg_addr.clone());
}
},
};
}
}

View File

@@ -29,12 +29,11 @@ pub struct SafekeeperPostgresHandler {
pub ztenantid: Option<ZTenantId>,
pub ztimelineid: Option<ZTimelineId>,
pub timeline: Option<Arc<Timeline>>,
pageserver_connstr: Option<String>,
}
/// Parsed Postgres command.
enum SafekeeperPostgresCommand {
StartWalPush { pageserver_connstr: Option<String> },
StartWalPush,
StartReplication { start_lsn: Lsn },
IdentifySystem,
JSONCtrl { cmd: AppendLogicalMessage },
@@ -42,11 +41,7 @@ enum SafekeeperPostgresCommand {
fn parse_cmd(cmd: &str) -> Result<SafekeeperPostgresCommand> {
if cmd.starts_with("START_WAL_PUSH") {
let re = Regex::new(r"START_WAL_PUSH(?: (.+))?").unwrap();
let caps = re.captures(cmd).unwrap();
let pageserver_connstr = caps.get(1).map(|m| m.as_str().to_owned());
Ok(SafekeeperPostgresCommand::StartWalPush { pageserver_connstr })
Ok(SafekeeperPostgresCommand::StartWalPush)
} else if cmd.starts_with("START_REPLICATION") {
let re =
Regex::new(r"START_REPLICATION(?: PHYSICAL)? ([[:xdigit:]]+/[[:xdigit:]]+)").unwrap();
@@ -86,8 +81,6 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
self.appname = Some(app_name.clone());
}
self.pageserver_connstr = params.get("pageserver_connstr").cloned();
Ok(())
} else {
bail!("Safekeeper received unexpected initial message: {:?}", sm);
@@ -113,14 +106,14 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
}
match cmd {
SafekeeperPostgresCommand::StartWalPush { pageserver_connstr } => {
ReceiveWalConn::new(pgb, pageserver_connstr)
SafekeeperPostgresCommand::StartWalPush => {
ReceiveWalConn::new(pgb)
.run(self)
.context("failed to run ReceiveWalConn")?;
}
SafekeeperPostgresCommand::StartReplication { start_lsn } => {
ReplicationConn::new(pgb)
.run(self, pgb, start_lsn, self.pageserver_connstr.clone())
.run(self, pgb, start_lsn)
.context("failed to run ReplicationConn")?;
}
SafekeeperPostgresCommand::IdentifySystem => {
@@ -142,7 +135,6 @@ impl SafekeeperPostgresHandler {
ztenantid: None,
ztimelineid: None,
timeline: None,
pageserver_connstr: None,
}
}

View File

@@ -1,9 +1,9 @@
use etcd_broker::SkTimelineInfo;
use hyper::{Body, Request, Response, StatusCode};
use hyper::{Body, Request, Response, StatusCode, Uri};
use once_cell::sync::Lazy;
use serde::Serialize;
use serde::Serializer;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::fmt::Display;
use std::sync::Arc;
@@ -11,9 +11,11 @@ use crate::safekeeper::Term;
use crate::safekeeper::TermHistory;
use crate::timeline::{GlobalTimelines, TimelineDeleteForceResult};
use crate::SafeKeeperConf;
use etcd_broker::subscription_value::SkTimelineInfo;
use utils::{
auth::JwtAuth,
http::{
endpoint,
endpoint::{self, auth_middleware, check_permission},
error::ApiError,
json::{json_request, json_response},
request::{ensure_no_body, parse_request_param},
@@ -32,6 +34,7 @@ struct SafekeeperStatus {
/// Healthcheck handler.
async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permission(&request, None)?;
let conf = get_conf(&request);
let status = SafekeeperStatus { id: conf.my_id };
json_response(StatusCode::OK, status)
@@ -91,6 +94,7 @@ async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body
parse_request_param(&request, "tenant_id")?,
parse_request_param(&request, "timeline_id")?,
);
check_permission(&request, Some(zttid.tenant_id))?;
let tli = GlobalTimelines::get(get_conf(&request), zttid, false).map_err(ApiError::from_err)?;
let (inmem, state) = tli.get_state();
@@ -125,6 +129,7 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
tenant_id: request_data.tenant_id,
timeline_id: request_data.timeline_id,
};
check_permission(&request, Some(zttid.tenant_id))?;
GlobalTimelines::create(get_conf(&request), zttid, request_data.peer_ids)
.map_err(ApiError::from_err)?;
@@ -145,6 +150,7 @@ async fn timeline_delete_force_handler(
parse_request_param(&request, "tenant_id")?,
parse_request_param(&request, "timeline_id")?,
);
check_permission(&request, Some(zttid.tenant_id))?;
ensure_no_body(&mut request).await?;
json_response(
StatusCode::OK,
@@ -160,6 +166,7 @@ async fn tenant_delete_force_handler(
mut request: Request<Body>,
) -> Result<Response<Body>, ApiError> {
let tenant_id = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
ensure_no_body(&mut request).await?;
json_response(
StatusCode::OK,
@@ -178,6 +185,7 @@ async fn record_safekeeper_info(mut request: Request<Body>) -> Result<Response<B
parse_request_param(&request, "tenant_id")?,
parse_request_param(&request, "timeline_id")?,
);
check_permission(&request, Some(zttid.tenant_id))?;
let safekeeper_info: SkTimelineInfo = json_request(&mut request).await?;
let tli = GlobalTimelines::get(get_conf(&request), zttid, false).map_err(ApiError::from_err)?;
@@ -188,15 +196,33 @@ async fn record_safekeeper_info(mut request: Request<Body>) -> Result<Response<B
}
/// Safekeeper http router.
pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError> {
let router = endpoint::make_router();
pub fn make_router(
conf: SafeKeeperConf,
auth: Option<Arc<JwtAuth>>,
) -> RouterBuilder<hyper::Body, ApiError> {
let mut router = endpoint::make_router();
if auth.is_some() {
router = router.middleware(auth_middleware(|request| {
#[allow(clippy::mutable_key_type)]
static ALLOWLIST_ROUTES: Lazy<HashSet<Uri>> =
Lazy::new(|| ["/v1/status"].iter().map(|v| v.parse().unwrap()).collect());
if ALLOWLIST_ROUTES.contains(request.uri()) {
None
} else {
// Option<Arc<JwtAuth>> is always provided as data below, hence unwrap().
request.data::<Option<Arc<JwtAuth>>>().unwrap().as_deref()
}
}))
}
router
.data(Arc::new(conf))
.data(auth)
.get("/v1/status", status_handler)
.get(
"/v1/timeline/:tenant_id/:timeline_id",
timeline_status_handler,
)
// Will be used in the future instead of implicit timeline creation
.post("/v1/timeline", timeline_create_handler)
.delete(
"/v1/tenant/:tenant_id/timeline/:timeline_id",

View File

@@ -57,6 +57,7 @@ pub struct SafeKeeperConf {
pub my_id: NodeId,
pub broker_endpoints: Vec<Url>,
pub broker_etcd_prefix: String,
pub auth_validation_public_key_path: Option<PathBuf>,
}
impl SafeKeeperConf {
@@ -88,6 +89,7 @@ impl Default for SafeKeeperConf {
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
backup_runtime_threads: DEFAULT_WAL_BACKUP_RUNTIME_THREADS,
wal_backup_enabled: true,
auth_validation_public_key_path: None,
}
}
}

View File

@@ -242,9 +242,9 @@ impl Collector for TimelineCollector {
let timeline_id = tli.zttid.timeline_id.to_string();
let labels = &[tenant_id.as_str(), timeline_id.as_str()];
let mut most_advanced: Option<utils::pq_proto::ZenithFeedback> = None;
let mut most_advanced: Option<utils::pq_proto::ReplicationFeedback> = None;
for replica in tli.replicas.iter() {
if let Some(replica_feedback) = replica.zenith_feedback {
if let Some(replica_feedback) = replica.pageserver_feedback {
if let Some(current) = most_advanced {
if current.ps_writelsn < replica_feedback.ps_writelsn {
most_advanced = Some(replica_feedback);

View File

@@ -32,22 +32,14 @@ pub struct ReceiveWalConn<'pg> {
pg_backend: &'pg mut PostgresBackend,
/// The cached result of `pg_backend.socket().peer_addr()` (roughly)
peer_addr: SocketAddr,
/// Pageserver connection string forwarded from compute
/// NOTE that it is allowed to operate without a pageserver.
/// So if compute has no pageserver configured do not use it.
pageserver_connstr: Option<String>,
}
impl<'pg> ReceiveWalConn<'pg> {
pub fn new(
pg: &'pg mut PostgresBackend,
pageserver_connstr: Option<String>,
) -> ReceiveWalConn<'pg> {
pub fn new(pg: &'pg mut PostgresBackend) -> ReceiveWalConn<'pg> {
let peer_addr = *pg.get_peer_addr();
ReceiveWalConn {
pg_backend: pg,
peer_addr,
pageserver_connstr,
}
}
@@ -120,9 +112,7 @@ impl<'pg> ReceiveWalConn<'pg> {
// Register the connection and defer unregister. Do that only
// after processing first message, as it sets wal_seg_size,
// wanted by many.
spg.timeline
.get()
.on_compute_connect(self.pageserver_connstr.as_ref())?;
spg.timeline.get().on_compute_connect()?;
_guard = Some(ComputeConnectionGuard {
timeline: Arc::clone(spg.timeline.get()),
});

View File

@@ -4,7 +4,7 @@ use anyhow::{bail, Context, Result};
use byteorder::{LittleEndian, ReadBytesExt};
use bytes::{Buf, BufMut, Bytes, BytesMut};
use etcd_broker::SkTimelineInfo;
use etcd_broker::subscription_value::SkTimelineInfo;
use postgres_ffi::xlog_utils::TimeLineID;
use postgres_ffi::xlog_utils::XLogSegNo;
@@ -23,7 +23,7 @@ use postgres_ffi::xlog_utils::MAX_SEND_SIZE;
use utils::{
bin_ser::LeSer,
lsn::Lsn,
pq_proto::{SystemId, ZenithFeedback},
pq_proto::{ReplicationFeedback, SystemId},
zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
};
@@ -348,7 +348,7 @@ pub struct AppendResponse {
// a criterion for walproposer --sync mode exit
pub commit_lsn: Lsn,
pub hs_feedback: HotStandbyFeedback,
pub zenith_feedback: ZenithFeedback,
pub pageserver_feedback: ReplicationFeedback,
}
impl AppendResponse {
@@ -358,7 +358,7 @@ impl AppendResponse {
flush_lsn: Lsn(0),
commit_lsn: Lsn(0),
hs_feedback: HotStandbyFeedback::empty(),
zenith_feedback: ZenithFeedback::empty(),
pageserver_feedback: ReplicationFeedback::empty(),
}
}
}
@@ -476,7 +476,7 @@ impl AcceptorProposerMessage {
buf.put_u64_le(msg.hs_feedback.xmin);
buf.put_u64_le(msg.hs_feedback.catalog_xmin);
msg.zenith_feedback.serialize(buf)?
msg.pageserver_feedback.serialize(buf)?
}
}
@@ -677,7 +677,7 @@ where
commit_lsn: self.state.commit_lsn,
// will be filled by the upper code to avoid bothering safekeeper
hs_feedback: HotStandbyFeedback::empty(),
zenith_feedback: ZenithFeedback::empty(),
pageserver_feedback: ReplicationFeedback::empty(),
};
trace!("formed AppendResponse {:?}", ar);
ar

View File

@@ -21,7 +21,7 @@ use utils::{
bin_ser::BeSer,
lsn::Lsn,
postgres_backend::PostgresBackend,
pq_proto::{BeMessage, FeMessage, WalSndKeepAlive, XLogDataBody, ZenithFeedback},
pq_proto::{BeMessage, FeMessage, ReplicationFeedback, WalSndKeepAlive, XLogDataBody},
sock_split::ReadStream,
};
@@ -29,7 +29,7 @@ use utils::{
const HOT_STANDBY_FEEDBACK_TAG_BYTE: u8 = b'h';
const STANDBY_STATUS_UPDATE_TAG_BYTE: u8 = b'r';
// zenith extension of replication protocol
const ZENITH_STATUS_UPDATE_TAG_BYTE: u8 = b'z';
const NEON_STATUS_UPDATE_TAG_BYTE: u8 = b'z';
type FullTransactionId = u64;
@@ -122,15 +122,15 @@ impl ReplicationConn {
warn!("unexpected StandbyReply. Read-only postgres replicas are not supported in safekeepers yet.");
// timeline.update_replica_state(replica_id, Some(state));
}
Some(ZENITH_STATUS_UPDATE_TAG_BYTE) => {
Some(NEON_STATUS_UPDATE_TAG_BYTE) => {
// Note: deserializing is on m[9..] because we skip the tag byte and len bytes.
let buf = Bytes::copy_from_slice(&m[9..]);
let reply = ZenithFeedback::parse(buf);
let reply = ReplicationFeedback::parse(buf);
trace!("ZenithFeedback is {:?}", reply);
// Only pageserver sends ZenithFeedback, so set the flag.
trace!("ReplicationFeedback is {:?}", reply);
// Only pageserver sends ReplicationFeedback, so set the flag.
// This replica is the source of information to resend to compute.
state.zenith_feedback = Some(reply);
state.pageserver_feedback = Some(reply);
timeline.update_replica_state(replica_id, state);
}
@@ -162,9 +162,8 @@ impl ReplicationConn {
spg: &mut SafekeeperPostgresHandler,
pgb: &mut PostgresBackend,
mut start_pos: Lsn,
pageserver_connstr: Option<String>,
) -> Result<()> {
let _enter = info_span!("WAL sender", timeline = %spg.ztimelineid.unwrap(), pageserver_connstr = %pageserver_connstr.as_deref().unwrap_or_default()).entered();
let _enter = info_span!("WAL sender", timeline = %spg.ztimelineid.unwrap()).entered();
// spawn the background thread which receives HotStandbyFeedback messages.
let bg_timeline = Arc::clone(spg.timeline.get());

View File

@@ -3,7 +3,7 @@
use anyhow::{bail, Context, Result};
use etcd_broker::SkTimelineInfo;
use etcd_broker::subscription_value::SkTimelineInfo;
use lazy_static::lazy_static;
use postgres_ffi::xlog_utils::XLogSegNo;
@@ -21,7 +21,7 @@ use tracing::*;
use utils::{
lsn::Lsn,
pq_proto::ZenithFeedback,
pq_proto::ReplicationFeedback,
zid::{NodeId, ZTenantId, ZTenantTimelineId},
};
@@ -48,8 +48,8 @@ pub struct ReplicaState {
pub remote_consistent_lsn: Lsn,
/// combined hot standby feedback from all replicas
pub hs_feedback: HotStandbyFeedback,
/// Zenith specific feedback received from pageserver, if any
pub zenith_feedback: Option<ZenithFeedback>,
/// Replication specific feedback received from pageserver, if any
pub pageserver_feedback: Option<ReplicationFeedback>,
}
impl Default for ReplicaState {
@@ -68,7 +68,7 @@ impl ReplicaState {
xmin: u64::MAX,
catalog_xmin: u64::MAX,
},
zenith_feedback: None,
pageserver_feedback: None,
}
}
}
@@ -95,7 +95,6 @@ struct SharedState {
/// when tli is inactive instead of having this flag.
active: bool,
num_computes: u32,
pageserver_connstr: Option<String>,
last_removed_segno: XLogSegNo,
}
@@ -119,7 +118,6 @@ impl SharedState {
wal_backup_active: false,
active: false,
num_computes: 0,
pageserver_connstr: None,
last_removed_segno: 0,
})
}
@@ -139,7 +137,6 @@ impl SharedState {
wal_backup_active: false,
active: false,
num_computes: 0,
pageserver_connstr: None,
last_removed_segno: 0,
})
}
@@ -152,8 +149,12 @@ impl SharedState {
/// Mark timeline active/inactive and return whether s3 offloading requires
/// start/stop action.
fn update_status(&mut self) -> bool {
self.active = self.is_active();
fn update_status(&mut self, ttid: ZTenantTimelineId) -> bool {
let is_active = self.is_active();
if self.active != is_active {
info!("timeline {} active={} now", ttid, is_active);
}
self.active = is_active;
self.is_wal_backup_action_pending()
}
@@ -190,33 +191,10 @@ impl SharedState {
self.wal_backup_active
}
/// Activate timeline's walsender: start/change timeline information propagated into etcd for further pageserver connections.
fn activate_walsender(
&mut self,
zttid: &ZTenantTimelineId,
new_pageserver_connstr: Option<String>,
) {
if self.pageserver_connstr != new_pageserver_connstr {
self.deactivate_walsender(zttid);
if new_pageserver_connstr.is_some() {
info!(
"timeline {} has activated its walsender with connstr {new_pageserver_connstr:?}",
zttid.timeline_id,
);
}
self.pageserver_connstr = new_pageserver_connstr;
}
}
/// Deactivate the timeline: stop sending the timeline data into etcd, so no pageserver can connect for WAL streaming.
fn deactivate_walsender(&mut self, zttid: &ZTenantTimelineId) {
if let Some(pageserver_connstr) = self.pageserver_connstr.take() {
info!(
"timeline {} had deactivated its wallsender with connstr {pageserver_connstr:?}",
zttid.timeline_id,
)
}
// Can this safekeeper offload to s3? Recently joined safekeepers might not
// have necessary WAL.
fn can_wal_backup(&self) -> bool {
self.sk.state.local_start_lsn <= self.sk.inmem.backup_lsn
}
fn get_wal_seg_size(&self) -> usize {
@@ -243,25 +221,25 @@ impl SharedState {
// we need to know which pageserver compute node considers to be main.
// See https://github.com/zenithdb/zenith/issues/1171
//
if let Some(zenith_feedback) = state.zenith_feedback {
if let Some(acc_feedback) = acc.zenith_feedback {
if acc_feedback.ps_writelsn < zenith_feedback.ps_writelsn {
if let Some(pageserver_feedback) = state.pageserver_feedback {
if let Some(acc_feedback) = acc.pageserver_feedback {
if acc_feedback.ps_writelsn < pageserver_feedback.ps_writelsn {
warn!("More than one pageserver is streaming WAL for the timeline. Feedback resolving is not fully supported yet.");
acc.zenith_feedback = Some(zenith_feedback);
acc.pageserver_feedback = Some(pageserver_feedback);
}
} else {
acc.zenith_feedback = Some(zenith_feedback);
acc.pageserver_feedback = Some(pageserver_feedback);
}
// last lsn received by pageserver
// FIXME if multiple pageservers are streaming WAL, last_received_lsn must be tracked per pageserver.
// See https://github.com/zenithdb/zenith/issues/1171
acc.last_received_lsn = Lsn::from(zenith_feedback.ps_writelsn);
acc.last_received_lsn = Lsn::from(pageserver_feedback.ps_writelsn);
// When at least one pageserver has preserved data up to remote_consistent_lsn,
// safekeeper is free to delete it, so choose max of all pageservers.
acc.remote_consistent_lsn = max(
Lsn::from(zenith_feedback.ps_applylsn),
Lsn::from(pageserver_feedback.ps_applylsn),
acc.remote_consistent_lsn,
);
}
@@ -318,17 +296,12 @@ impl Timeline {
/// Register compute connection, starting timeline-related activity if it is
/// not running yet.
/// Can fail only if channel to a static thread got closed, which is not normal at all.
pub fn on_compute_connect(&self, pageserver_connstr: Option<&String>) -> Result<()> {
pub fn on_compute_connect(&self) -> Result<()> {
let is_wal_backup_action_pending: bool;
{
let mut shared_state = self.mutex.lock().unwrap();
shared_state.num_computes += 1;
is_wal_backup_action_pending = shared_state.update_status();
// FIXME: currently we always adopt latest pageserver connstr, but we
// should have kind of generations assigned by compute to distinguish
// the latest one or even pass it through consensus to reliably deliver
// to all safekeepers.
shared_state.activate_walsender(&self.zttid, pageserver_connstr.cloned());
is_wal_backup_action_pending = shared_state.update_status(self.zttid);
}
// Wake up wal backup launcher, if offloading not started yet.
if is_wal_backup_action_pending {
@@ -345,7 +318,7 @@ impl Timeline {
{
let mut shared_state = self.mutex.lock().unwrap();
shared_state.num_computes -= 1;
is_wal_backup_action_pending = shared_state.update_status();
is_wal_backup_action_pending = shared_state.update_status(self.zttid);
}
// Wake up wal backup launcher, if it is time to stop the offloading.
if is_wal_backup_action_pending {
@@ -364,7 +337,7 @@ impl Timeline {
(replica_state.remote_consistent_lsn != Lsn::MAX && // Lsn::MAX means that we don't know the latest LSN yet.
replica_state.remote_consistent_lsn >= shared_state.sk.inmem.commit_lsn);
if stop {
shared_state.deactivate_walsender(&self.zttid);
shared_state.update_status(self.zttid);
return Ok(true);
}
}
@@ -378,6 +351,12 @@ impl Timeline {
shared_state.wal_backup_attend()
}
// Can this safekeeper offload to s3? Recently joined safekeepers might not
// have necessary WAL.
pub fn can_wal_backup(&self) -> bool {
self.mutex.lock().unwrap().can_wal_backup()
}
/// Deactivates the timeline, assuming it is being deleted.
/// Returns whether the timeline was already active.
///
@@ -478,8 +457,8 @@ impl Timeline {
if let Some(AcceptorProposerMessage::AppendResponse(ref mut resp)) = rmsg {
let state = shared_state.get_replicas_state();
resp.hs_feedback = state.hs_feedback;
if let Some(zenith_feedback) = state.zenith_feedback {
resp.zenith_feedback = zenith_feedback;
if let Some(pageserver_feedback) = state.pageserver_feedback {
resp.pageserver_feedback = pageserver_feedback;
}
}
@@ -525,7 +504,6 @@ impl Timeline {
)),
peer_horizon_lsn: Some(shared_state.sk.inmem.peer_horizon_lsn),
safekeeper_connstr: Some(conf.listen_pg_addr.clone()),
pageserver_connstr: shared_state.pageserver_connstr.clone(),
backup_lsn: Some(shared_state.sk.inmem.backup_lsn),
})
}
@@ -547,7 +525,7 @@ impl Timeline {
}
shared_state.sk.record_safekeeper_info(sk_info)?;
self.notify_wal_senders(&mut shared_state);
is_wal_backup_action_pending = shared_state.update_status();
is_wal_backup_action_pending = shared_state.update_status(self.zttid);
commit_lsn = shared_state.sk.inmem.commit_lsn;
}
self.commit_lsn_watch_tx.send(commit_lsn)?;

View File

@@ -1,4 +1,7 @@
use anyhow::{Context, Result};
use etcd_broker::subscription_key::{
NodeKind, OperationKind, SkOperationKind, SubscriptionKey, SubscriptionKind,
};
use tokio::task::JoinHandle;
use std::cmp::min;
@@ -26,8 +29,6 @@ use crate::{broker, SafeKeeperConf};
use once_cell::sync::OnceCell;
const BACKUP_ELECTION_NAME: &str = "WAL_BACKUP";
const BROKER_CONNECTION_RETRY_DELAY_MS: u64 = 1000;
const UPLOAD_FAILURE_RETRY_MIN_MS: u64 = 10;
@@ -48,14 +49,10 @@ pub fn wal_backup_launcher_thread_main(
});
}
/// Check whether wal backup is required for timeline and mark that launcher is
/// aware of current status (if timeline exists).
fn is_wal_backup_required(zttid: ZTenantTimelineId) -> bool {
if let Some(tli) = GlobalTimelines::get_loaded(zttid) {
tli.wal_backup_attend()
} else {
false
}
/// Check whether wal backup is required for timeline. If yes, mark that launcher is
/// aware of current status and return the timeline.
fn is_wal_backup_required(zttid: ZTenantTimelineId) -> Option<Arc<Timeline>> {
GlobalTimelines::get_loaded(zttid).filter(|t| t.wal_backup_attend())
}
struct WalBackupTaskHandle {
@@ -63,6 +60,56 @@ struct WalBackupTaskHandle {
handle: JoinHandle<()>,
}
struct WalBackupTimelineEntry {
timeline: Arc<Timeline>,
handle: Option<WalBackupTaskHandle>,
}
/// Start per timeline task, if it makes sense for this safekeeper to offload.
fn consider_start_task(
conf: &SafeKeeperConf,
zttid: ZTenantTimelineId,
task: &mut WalBackupTimelineEntry,
) {
if !task.timeline.can_wal_backup() {
return;
}
info!("starting WAL backup task for {}", zttid);
// TODO: decide who should offload right here by simply checking current
// state instead of running elections in offloading task.
let election_name = SubscriptionKey {
cluster_prefix: conf.broker_etcd_prefix.clone(),
kind: SubscriptionKind::Operation(
zttid,
NodeKind::Safekeeper,
OperationKind::Safekeeper(SkOperationKind::WalBackup),
),
}
.watch_key();
let my_candidate_name = broker::get_candiate_name(conf.my_id);
let election = broker::Election::new(
election_name,
my_candidate_name,
conf.broker_endpoints.clone(),
);
let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
let timeline_dir = conf.timeline_dir(&zttid);
let handle = tokio::spawn(
backup_task_main(zttid, timeline_dir, shutdown_rx, election)
.instrument(info_span!("WAL backup task", zttid = %zttid)),
);
task.handle = Some(WalBackupTaskHandle {
shutdown_tx,
handle,
});
}
const CHECK_TASKS_INTERVAL_MSEC: u64 = 1000;
/// Sits on wal_backup_launcher_rx and starts/stops per timeline wal backup
/// tasks. Having this in separate task simplifies locking, allows to reap
/// panics and separate elections from offloading itself.
@@ -71,7 +118,7 @@ async fn wal_backup_launcher_main_loop(
mut wal_backup_launcher_rx: Receiver<ZTenantTimelineId>,
) {
info!(
"WAL backup launcher: started, remote config {:?}",
"WAL backup launcher started, remote config {:?}",
conf.remote_storage
);
@@ -82,60 +129,50 @@ async fn wal_backup_launcher_main_loop(
})
});
let mut tasks: HashMap<ZTenantTimelineId, WalBackupTaskHandle> = HashMap::new();
// Presense in this map means launcher is aware s3 offloading is needed for
// the timeline, but task is started only if it makes sense for to offload
// from this safekeeper.
let mut tasks: HashMap<ZTenantTimelineId, WalBackupTimelineEntry> = HashMap::new();
let mut ticker = tokio::time::interval(Duration::from_millis(CHECK_TASKS_INTERVAL_MSEC));
loop {
// channel is never expected to get closed
let zttid = wal_backup_launcher_rx.recv().await.unwrap();
let is_wal_backup_required = is_wal_backup_required(zttid);
if conf.remote_storage.is_none() || !conf.wal_backup_enabled {
continue; /* just drain the channel and do nothing */
}
// do we need to do anything at all?
if is_wal_backup_required != tasks.contains_key(&zttid) {
if is_wal_backup_required {
// need to start the task
info!("starting WAL backup task for {}", zttid);
tokio::select! {
zttid = wal_backup_launcher_rx.recv() => {
// channel is never expected to get closed
let zttid = zttid.unwrap();
if conf.remote_storage.is_none() || !conf.wal_backup_enabled {
continue; /* just drain the channel and do nothing */
}
let timeline = is_wal_backup_required(zttid);
// do we need to do anything at all?
if timeline.is_some() != tasks.contains_key(&zttid) {
if let Some(timeline) = timeline {
// need to start the task
let entry = tasks.entry(zttid).or_insert(WalBackupTimelineEntry {
timeline,
handle: None,
});
consider_start_task(&conf, zttid, entry);
} else {
// need to stop the task
info!("stopping WAL backup task for {}", zttid);
// TODO: decide who should offload in launcher itself by simply checking current state
let election_name = broker::get_campaign_name(
BACKUP_ELECTION_NAME.to_string(),
conf.broker_etcd_prefix.clone(),
&zttid,
);
let my_candidate_name = broker::get_candiate_name(conf.my_id);
let election = broker::Election::new(
election_name,
my_candidate_name,
conf.broker_endpoints.clone(),
);
let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
let timeline_dir = conf.timeline_dir(&zttid);
let handle = tokio::spawn(
backup_task_main(zttid, timeline_dir, shutdown_rx, election)
.instrument(info_span!("WAL backup task", zttid = %zttid)),
);
tasks.insert(
zttid,
WalBackupTaskHandle {
shutdown_tx,
handle,
},
);
} else {
// need to stop the task
info!("stopping WAL backup task for {}", zttid);
let wb_handle = tasks.remove(&zttid).unwrap();
// Tell the task to shutdown. Error means task exited earlier, that's ok.
let _ = wb_handle.shutdown_tx.send(()).await;
// Await the task itself. TODO: restart panicked tasks earlier.
// Hm, why I can't await on reference to handle?
if let Err(e) = wb_handle.handle.await {
warn!("WAL backup task for {} panicked: {}", zttid, e);
let entry = tasks.remove(&zttid).unwrap();
if let Some(wb_handle) = entry.handle {
// Tell the task to shutdown. Error means task exited earlier, that's ok.
let _ = wb_handle.shutdown_tx.send(()).await;
// Await the task itself. TODO: restart panicked tasks earlier.
if let Err(e) = wb_handle.handle.await {
warn!("WAL backup task for {} panicked: {}", zttid, e);
}
}
}
}
}
// Start known tasks, if needed and possible.
_ = ticker.tick() => {
for (zttid, entry) in tasks.iter_mut().filter(|(_, entry)| entry.handle.is_none()) {
consider_start_task(&conf, *zttid, entry);
}
}
}
@@ -200,20 +237,11 @@ impl WalBackupTask {
loop {
let mut retry_attempt = 0u32;
if let Some(l) = self.leader.take() {
l.give_up().await;
}
info!("acquiring leadership");
match broker::get_leader(&self.election).await {
Ok(l) => {
self.leader = Some(l);
}
Err(e) => {
error!("error during leader election {:?}", e);
sleep(Duration::from_millis(BROKER_CONNECTION_RETRY_DELAY_MS)).await;
continue;
}
if let Err(e) = broker::get_leader(&self.election, &mut self.leader).await {
error!("error during leader election {:?}", e);
sleep(Duration::from_millis(BROKER_CONNECTION_RETRY_DELAY_MS)).await;
continue;
}
info!("acquired leadership");

View File

@@ -26,6 +26,7 @@ KEY_EXCLUDE_FIELDS = frozenset({
})
NEGATIVE_COLOR = 'negative'
POSITIVE_COLOR = 'positive'
EPS = 1e-6
@dataclass
@@ -120,7 +121,8 @@ def get_row_values(columns: List[str], run_result: SuitRun,
# this might happen when new metric is added and there is no value for it in previous run
# let this be here, TODO add proper handling when this actually happens
raise ValueError(f'{column} not found in previous result')
ratio = float(value) / float(prev_value['value']) - 1
# adding `EPS` to each term to avoid ZeroDivisionError when the denominator is zero
ratio = (float(value) + EPS) / (float(prev_value['value']) + EPS) - 1
ratio_display, color = format_ratio(ratio, current_value['report'])
row_values.append(RowValue(value, color, ratio_display))
return row_values

View File

@@ -1,14 +1,14 @@
## Zenith test runner
## Neon test runner
This directory contains integration tests.
Prerequisites:
- Correctly configured Python, see [`/docs/sourcetree.md`](/docs/sourcetree.md#using-python)
- Zenith and Postgres binaries
- Neon and Postgres binaries
- See the root [README.md](/README.md) for build directions
- Tests can be run from the git tree; or see the environment variables
below to run from other directories.
- The zenith git repo, including the postgres submodule
- The neon git repo, including the postgres submodule
(for some tests, e.g. `pg_regress`)
- Some tests (involving storage nodes coordination) require etcd installed. Follow
[`the guide`](https://etcd.io/docs/v3.5/install/) to obtain it.
@@ -51,8 +51,8 @@ Useful environment variables:
should go.
`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
`ZENITH_PAGESERVER_OVERRIDES`: add a `;`-separated set of configs that will be passed as
`--pageserver-config-override=${value}` parameter values when zenith cli is invoked
`RUST_LOG`: logging configuration to pass into Zenith CLI
`--pageserver-config-override=${value}` parameter values when neon_local cli is invoked
`RUST_LOG`: logging configuration to pass into Neon CLI
Let stdout, stderr and `INFO` log messages go to the terminal instead of capturing them:
`./scripts/pytest -s --log-cli-level=INFO ...`
@@ -65,32 +65,32 @@ Exit after the first test failure:
### Writing a test
Every test needs a Zenith Environment, or ZenithEnv to operate in. A Zenith Environment
Every test needs a Neon Environment, or NeonEnv to operate in. A Neon Environment
is like a little cloud-in-a-box, and consists of a Pageserver, 0-N Safekeepers, and
compute Postgres nodes. The connections between them can be configured to use JWT
authentication tokens, and some other configuration options can be tweaked too.
The easiest way to get access to a Zenith Environment is by using the `zenith_simple_env`
The easiest way to get access to a Neon Environment is by using the `neon_simple_env`
fixture. The 'simple' env may be shared across multiple tests, so don't shut down the nodes
or make other destructive changes in that environment. Also don't assume that
there are no tenants or branches or data in the cluster. For convenience, there is a
branch called `empty`, though. The convention is to create a test-specific branch of
that and load any test data there, instead of the 'main' branch.
For more complicated cases, you can build a custom Zenith Environment, with the `zenith_env`
For more complicated cases, you can build a custom Neon Environment, with the `neon_env`
fixture:
```python
def test_foobar(zenith_env_builder: ZenithEnvBuilder):
def test_foobar(neon_env_builder: NeonEnvBuilder):
# Prescribe the environment.
# We want to have 3 safekeeper nodes, and use JWT authentication in the
# connections to the page server
zenith_env_builder.num_safekeepers = 3
zenith_env_builder.set_pageserver_auth(True)
neon_env_builder.num_safekeepers = 3
neon_env_builder.set_pageserver_auth(True)
# Now create the environment. This initializes the repository, and starts
# up the page server and the safekeepers
env = zenith_env_builder.init_start()
env = neon_env_builder.init_start()
# Run the test
...

View File

@@ -3,18 +3,18 @@ from contextlib import closing
import psycopg2.extras
import pytest
from fixtures.log_helper import log
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, ZenithPageserverApiException
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException
#
# Create ancestor branches off the main branch.
#
def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
env = zenith_env_builder.init_start()
def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
# Override defaults, 1M gc_horizon and 4M checkpoint_distance.
# Extend compaction_period and gc_period to disable background compaction and gc.
tenant, _ = env.zenith_cli.create_tenant(
tenant, _ = env.neon_cli.create_tenant(
conf={
'gc_period': '10 m',
'gc_horizon': '1048576',
@@ -24,7 +24,7 @@ def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
'compaction_target_size': '4194304',
})
env.pageserver.safe_psql("failpoints flush-frozen=sleep(10000)")
env.pageserver.safe_psql("failpoints flush-frozen-before-sync=sleep(10000)")
pg_branch0 = env.postgres.create_start('main', tenant_id=tenant)
branch0_cur = pg_branch0.connect().cursor()
@@ -48,7 +48,7 @@ def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
log.info(f'LSN after 100k rows: {lsn_100}')
# Create branch1.
env.zenith_cli.create_branch('branch1', 'main', tenant_id=tenant, ancestor_start_lsn=lsn_100)
env.neon_cli.create_branch('branch1', 'main', tenant_id=tenant, ancestor_start_lsn=lsn_100)
pg_branch1 = env.postgres.create_start('branch1', tenant_id=tenant)
log.info("postgres is running on 'branch1' branch")
@@ -72,7 +72,7 @@ def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
log.info(f'LSN after 200k rows: {lsn_200}')
# Create branch2.
env.zenith_cli.create_branch('branch2', 'branch1', tenant_id=tenant, ancestor_start_lsn=lsn_200)
env.neon_cli.create_branch('branch2', 'branch1', tenant_id=tenant, ancestor_start_lsn=lsn_200)
pg_branch2 = env.postgres.create_start('branch2', tenant_id=tenant)
log.info("postgres is running on 'branch2' branch")
branch2_cur = pg_branch2.connect().cursor()
@@ -110,15 +110,14 @@ def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
assert branch2_cur.fetchone() == (300000, )
def test_ancestor_branch_detach(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
def test_ancestor_branch_detach(neon_simple_env: NeonEnv):
env = neon_simple_env
parent_timeline_id = env.zenith_cli.create_branch("test_ancestor_branch_detach_parent", "empty")
parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_detach_parent", "empty")
env.zenith_cli.create_branch("test_ancestor_branch_detach_branch1",
"test_ancestor_branch_detach_parent")
env.neon_cli.create_branch("test_ancestor_branch_detach_branch1",
"test_ancestor_branch_detach_parent")
ps_http = env.pageserver.http_client()
with pytest.raises(ZenithPageserverApiException,
match="Failed to detach inmem tenant timeline"):
with pytest.raises(NeonPageserverApiException, match="Failed to detach inmem tenant timeline"):
ps_http.timeline_detach(env.initial_tenant, parent_timeline_id)

View File

@@ -1,14 +1,14 @@
from contextlib import closing
from typing import Iterator
from uuid import UUID, uuid4
from fixtures.zenith_fixtures import ZenithEnvBuilder, ZenithPageserverApiException
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
from requests.exceptions import HTTPError
import pytest
def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.pageserver_auth_enabled = True
env = zenith_env_builder.init_start()
def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
neon_env_builder.auth_enabled = True
env = neon_env_builder.init_start()
ps = env.pageserver
@@ -25,8 +25,8 @@ def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):
ps.safe_psql("set FOO", password=tenant_token)
ps.safe_psql("set FOO", password=management_token)
new_timeline_id = env.zenith_cli.create_branch('test_pageserver_auth',
tenant_id=env.initial_tenant)
new_timeline_id = env.neon_cli.create_branch('test_pageserver_auth',
tenant_id=env.initial_tenant)
# tenant can create branches
tenant_http_client.timeline_create(tenant_id=env.initial_tenant,
@@ -36,7 +36,7 @@ def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):
ancestor_timeline_id=new_timeline_id)
# fail to create branch using token with different tenant_id
with pytest.raises(ZenithPageserverApiException,
with pytest.raises(NeonPageserverApiException,
match='Forbidden: Tenant id mismatch. Permission denied'):
invalid_tenant_http_client.timeline_create(tenant_id=env.initial_tenant,
ancestor_timeline_id=new_timeline_id)
@@ -46,21 +46,21 @@ def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):
# fail to create tenant using tenant token
with pytest.raises(
ZenithPageserverApiException,
NeonPageserverApiException,
match='Forbidden: Attempt to access management api with tenant scope. Permission denied'
):
tenant_http_client.tenant_create()
@pytest.mark.parametrize('with_safekeepers', [False, True])
def test_compute_auth_to_pageserver(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool):
zenith_env_builder.pageserver_auth_enabled = True
def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
neon_env_builder.auth_enabled = True
if with_safekeepers:
zenith_env_builder.num_safekeepers = 3
env = zenith_env_builder.init_start()
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
branch = f'test_compute_auth_to_pageserver{with_safekeepers}'
env.zenith_cli.create_branch(branch)
env.neon_cli.create_branch(branch)
pg = env.postgres.create_start(branch)
with closing(pg.connect()) as conn:

View File

@@ -1,15 +1,15 @@
from contextlib import closing, contextmanager
import psycopg2.extras
import pytest
from fixtures.zenith_fixtures import PgProtocol, ZenithEnvBuilder
from fixtures.neon_fixtures import PgProtocol, NeonEnvBuilder
from fixtures.log_helper import log
import os
import time
import asyncpg
from fixtures.zenith_fixtures import Postgres
from fixtures.neon_fixtures import Postgres
import threading
pytest_plugins = ("fixtures.zenith_fixtures")
pytest_plugins = ("fixtures.neon_fixtures")
@contextmanager
@@ -26,7 +26,7 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
log.info("checks started")
with pg_cur(pg) as cur:
cur.execute("CREATE EXTENSION neon") # TODO move it to zenith_fixtures?
cur.execute("CREATE EXTENSION neon") # TODO move it to neon_fixtures?
cur.execute("select pg_size_bytes(current_setting('max_replication_write_lag'))")
res = cur.fetchone()
@@ -93,10 +93,10 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
@pytest.mark.skip("See https://github.com/neondatabase/neon/issues/1587")
def test_backpressure_received_lsn_lag(zenith_env_builder: ZenithEnvBuilder):
env = zenith_env_builder.init_start()
def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
# Create a branch for us
env.zenith_cli.create_branch('test_backpressure')
env.neon_cli.create_branch('test_backpressure')
pg = env.postgres.create_start('test_backpressure',
config_lines=['max_replication_write_lag=30MB'])

View File

@@ -1,7 +1,7 @@
import pytest
from contextlib import closing
from fixtures.zenith_fixtures import ZenithEnv
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
@@ -9,9 +9,9 @@ from fixtures.log_helper import log
# Test error handling, if the 'basebackup' command fails in the middle
# of building the tar archive.
#
def test_basebackup_error(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
env.zenith_cli.create_branch("test_basebackup_error", "empty")
def test_basebackup_error(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_basebackup_error", "empty")
# Introduce failpoint
env.pageserver.safe_psql(f"failpoints basebackup-before-control-file=return")

View File

@@ -5,26 +5,26 @@ import psycopg2.extras
import pytest
from fixtures.log_helper import log
from fixtures.utils import print_gc_result
from fixtures.zenith_fixtures import ZenithEnvBuilder
from fixtures.neon_fixtures import NeonEnvBuilder
#
# Create a couple of branches off the main branch, at a historical point in time.
#
def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
def test_branch_behind(neon_env_builder: NeonEnvBuilder):
# Use safekeeper in this test to avoid a subtle race condition.
# Without safekeeper, walreceiver reconnection can stuck
# because of IO deadlock.
#
# See https://github.com/zenithdb/zenith/issues/1068
zenith_env_builder.num_safekeepers = 1
# See https://github.com/neondatabase/neon/issues/1068
neon_env_builder.num_safekeepers = 1
# Disable pitr, because here we want to test branch creation after GC
zenith_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
env = zenith_env_builder.init_start()
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
env = neon_env_builder.init_start()
# Branch at the point where only 100 rows were inserted
env.zenith_cli.create_branch('test_branch_behind')
env.neon_cli.create_branch('test_branch_behind')
pgmain = env.postgres.create_start('test_branch_behind')
log.info("postgres is running on 'test_branch_behind' branch")
@@ -61,9 +61,9 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
log.info(f'LSN after 200100 rows: {lsn_b}')
# Branch at the point where only 100 rows were inserted
env.zenith_cli.create_branch('test_branch_behind_hundred',
'test_branch_behind',
ancestor_start_lsn=lsn_a)
env.neon_cli.create_branch('test_branch_behind_hundred',
'test_branch_behind',
ancestor_start_lsn=lsn_a)
# Insert many more rows. This generates enough WAL to fill a few segments.
main_cur.execute('''
@@ -78,9 +78,9 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
log.info(f'LSN after 400100 rows: {lsn_c}')
# Branch at the point where only 200100 rows were inserted
env.zenith_cli.create_branch('test_branch_behind_more',
'test_branch_behind',
ancestor_start_lsn=lsn_b)
env.neon_cli.create_branch('test_branch_behind_more',
'test_branch_behind',
ancestor_start_lsn=lsn_b)
pg_hundred = env.postgres.create_start('test_branch_behind_hundred')
pg_more = env.postgres.create_start('test_branch_behind_more')
@@ -104,9 +104,9 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
# Check bad lsn's for branching
# branch at segment boundary
env.zenith_cli.create_branch('test_branch_segment_boundary',
'test_branch_behind',
ancestor_start_lsn="0/3000000")
env.neon_cli.create_branch('test_branch_segment_boundary',
'test_branch_behind',
ancestor_start_lsn="0/3000000")
pg = env.postgres.create_start('test_branch_segment_boundary')
cur = pg.connect().cursor()
cur.execute('SELECT 1')
@@ -114,13 +114,13 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
# branch at pre-initdb lsn
with pytest.raises(Exception, match="invalid branch start lsn"):
env.zenith_cli.create_branch('test_branch_preinitdb', ancestor_start_lsn="0/42")
env.neon_cli.create_branch('test_branch_preinitdb', ancestor_start_lsn="0/42")
# branch at pre-ancestor lsn
with pytest.raises(Exception, match="less than timeline ancestor lsn"):
env.zenith_cli.create_branch('test_branch_preinitdb',
'test_branch_behind',
ancestor_start_lsn="0/42")
env.neon_cli.create_branch('test_branch_preinitdb',
'test_branch_behind',
ancestor_start_lsn="0/42")
# check that we cannot create branch based on garbage collected data
with closing(env.pageserver.connect()) as psconn:
@@ -132,9 +132,9 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
with pytest.raises(Exception, match="invalid branch start lsn"):
# this gced_lsn is pretty random, so if gc is disabled this woudln't fail
env.zenith_cli.create_branch('test_branch_create_fail',
'test_branch_behind',
ancestor_start_lsn=gced_lsn)
env.neon_cli.create_branch('test_branch_create_fail',
'test_branch_behind',
ancestor_start_lsn=gced_lsn)
# check that after gc everything is still there
hundred_cur.execute('SELECT count(*) FROM foo')

View File

@@ -1,22 +1,22 @@
import pytest
import concurrent.futures
from contextlib import closing
from fixtures.zenith_fixtures import ZenithEnvBuilder, ZenithEnv
from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv
from fixtures.log_helper import log
import os
# Test restarting page server, while safekeeper and compute node keep
# running.
def test_broken_timeline(zenith_env_builder: ZenithEnvBuilder):
def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
# One safekeeper is enough for this test.
zenith_env_builder.num_safekeepers = 3
env = zenith_env_builder.init_start()
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
tenant_timelines = []
for n in range(4):
tenant_id_uuid, timeline_id_uuid = env.zenith_cli.create_tenant()
tenant_id_uuid, timeline_id_uuid = env.neon_cli.create_tenant()
tenant_id = tenant_id_uuid.hex
timeline_id = timeline_id_uuid.hex
@@ -81,14 +81,14 @@ def test_broken_timeline(zenith_env_builder: ZenithEnvBuilder):
log.info(f'compute startup failed as expected: {err}')
def test_create_multiple_timelines_parallel(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
env = neon_simple_env
tenant_id, _ = env.zenith_cli.create_tenant()
tenant_id, _ = env.neon_cli.create_tenant()
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = [
executor.submit(env.zenith_cli.create_timeline,
executor.submit(env.neon_cli.create_timeline,
f"test-create-multiple-timelines-{i}",
tenant_id) for i in range(4)
]
@@ -96,20 +96,20 @@ def test_create_multiple_timelines_parallel(zenith_simple_env: ZenithEnv):
future.result()
def test_fix_broken_timelines_on_startup(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
def test_fix_broken_timelines_on_startup(neon_simple_env: NeonEnv):
env = neon_simple_env
tenant_id, _ = env.zenith_cli.create_tenant()
tenant_id, _ = env.neon_cli.create_tenant()
# Introduce failpoint when creating a new timeline
env.pageserver.safe_psql(f"failpoints before-checkpoint-new-timeline=return")
with pytest.raises(Exception, match="before-checkpoint-new-timeline"):
_ = env.zenith_cli.create_timeline("test_fix_broken_timelines", tenant_id)
_ = env.neon_cli.create_timeline("test_fix_broken_timelines", tenant_id)
# Restart the page server
env.zenith_cli.pageserver_stop(immediate=True)
env.zenith_cli.pageserver_start()
env.neon_cli.pageserver_stop(immediate=True)
env.neon_cli.pageserver_start()
# Check that the "broken" timeline is not loaded
timelines = env.zenith_cli.list_timelines(tenant_id)
timelines = env.neon_cli.list_timelines(tenant_id)
assert len(timelines) == 1

View File

@@ -3,16 +3,16 @@ import os
from contextlib import closing
from fixtures.zenith_fixtures import ZenithEnv
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
#
# Test compute node start after clog truncation
#
def test_clog_truncate(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
env.zenith_cli.create_branch('test_clog_truncate', 'empty')
def test_clog_truncate(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch('test_clog_truncate', 'empty')
# set aggressive autovacuum to make sure that truncation will happen
config = [
@@ -62,9 +62,9 @@ def test_clog_truncate(zenith_simple_env: ZenithEnv):
# create new branch after clog truncation and start a compute node on it
log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}')
env.zenith_cli.create_branch('test_clog_truncate_new',
'test_clog_truncate',
ancestor_start_lsn=lsn_after_truncation)
env.neon_cli.create_branch('test_clog_truncate_new',
'test_clog_truncate',
ancestor_start_lsn=lsn_after_truncation)
pg2 = env.postgres.create_start('test_clog_truncate_new')
log.info('postgres is running on test_clog_truncate_new branch')

View File

@@ -1,15 +1,15 @@
from contextlib import closing
from fixtures.zenith_fixtures import ZenithEnv
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
#
# Test starting Postgres with custom options
#
def test_config(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
env.zenith_cli.create_branch("test_config", "empty")
def test_config(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_config", "empty")
# change config
pg = env.postgres.create_start('test_config', config_lines=['log_min_messages=debug1'])

View File

@@ -2,16 +2,16 @@ import os
import pathlib
from contextlib import closing
from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
from fixtures.log_helper import log
#
# Test CREATE DATABASE when there have been relmapper changes
#
def test_createdb(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
env.zenith_cli.create_branch('test_createdb', 'empty')
def test_createdb(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch('test_createdb', 'empty')
pg = env.postgres.create_start('test_createdb')
log.info("postgres is running on 'test_createdb' branch")
@@ -27,7 +27,7 @@ def test_createdb(zenith_simple_env: ZenithEnv):
lsn = cur.fetchone()[0]
# Create a branch
env.zenith_cli.create_branch('test_createdb2', 'test_createdb', ancestor_start_lsn=lsn)
env.neon_cli.create_branch('test_createdb2', 'test_createdb', ancestor_start_lsn=lsn)
pg2 = env.postgres.create_start('test_createdb2')
# Test that you can connect to the new database on both branches
@@ -40,16 +40,16 @@ def test_createdb(zenith_simple_env: ZenithEnv):
('foodb', ))
res = cur.fetchone()
# check that dbsize equals sum of all relation sizes, excluding shared ones
# This is how we define dbsize in zenith for now
# This is how we define dbsize in neon for now
assert res[0] == res[1]
#
# Test DROP DATABASE
#
def test_dropdb(zenith_simple_env: ZenithEnv, test_output_dir):
env = zenith_simple_env
env.zenith_cli.create_branch('test_dropdb', 'empty')
def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
env.neon_cli.create_branch('test_dropdb', 'empty')
pg = env.postgres.create_start('test_dropdb')
log.info("postgres is running on 'test_dropdb' branch")
@@ -73,14 +73,14 @@ def test_dropdb(zenith_simple_env: ZenithEnv, test_output_dir):
lsn_after_drop = cur.fetchone()[0]
# Create two branches before and after database drop.
env.zenith_cli.create_branch('test_before_dropdb',
'test_dropdb',
ancestor_start_lsn=lsn_before_drop)
env.neon_cli.create_branch('test_before_dropdb',
'test_dropdb',
ancestor_start_lsn=lsn_before_drop)
pg_before = env.postgres.create_start('test_before_dropdb')
env.zenith_cli.create_branch('test_after_dropdb',
'test_dropdb',
ancestor_start_lsn=lsn_after_drop)
env.neon_cli.create_branch('test_after_dropdb',
'test_dropdb',
ancestor_start_lsn=lsn_after_drop)
pg_after = env.postgres.create_start('test_after_dropdb')
# Test that database exists on the branch before drop

View File

@@ -1,15 +1,15 @@
from contextlib import closing
from fixtures.zenith_fixtures import ZenithEnv
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
#
# Test CREATE USER to check shared catalog restore
#
def test_createuser(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
env.zenith_cli.create_branch('test_createuser', 'empty')
def test_createuser(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch('test_createuser', 'empty')
pg = env.postgres.create_start('test_createuser')
log.info("postgres is running on 'test_createuser' branch")
@@ -24,7 +24,7 @@ def test_createuser(zenith_simple_env: ZenithEnv):
lsn = cur.fetchone()[0]
# Create a branch
env.zenith_cli.create_branch('test_createuser2', 'test_createuser', ancestor_start_lsn=lsn)
env.neon_cli.create_branch('test_createuser2', 'test_createuser', ancestor_start_lsn=lsn)
pg2 = env.postgres.create_start('test_createuser2')
# Test that you can connect to new branch as a new user

View File

@@ -1,7 +1,7 @@
import asyncio
import random
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
from fixtures.log_helper import log
# Test configuration
@@ -27,7 +27,7 @@ async def update_table(pg: Postgres):
# Perform aggressive GC with 0 horizon
async def gc(env: ZenithEnv, timeline: str):
async def gc(env: NeonEnv, timeline: str):
psconn = await env.pageserver.connect_async()
while updates_performed < updates_to_perform:
@@ -35,7 +35,7 @@ async def gc(env: ZenithEnv, timeline: str):
# At the same time, run UPDATEs and GC
async def update_and_gc(env: ZenithEnv, pg: Postgres, timeline: str):
async def update_and_gc(env: NeonEnv, pg: Postgres, timeline: str):
workers = []
for worker_id in range(num_connections):
workers.append(asyncio.create_task(update_table(pg)))
@@ -48,14 +48,14 @@ async def update_and_gc(env: ZenithEnv, pg: Postgres, timeline: str):
#
# Aggressively force GC, while running queries.
#
# (repro for https://github.com/zenithdb/zenith/issues/1047)
# (repro for https://github.com/neondatabase/neon/issues/1047)
#
def test_gc_aggressive(zenith_env_builder: ZenithEnvBuilder):
def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
# Disable pitr, because here we want to test branch creation after GC
zenith_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
env = zenith_env_builder.init_start()
env.zenith_cli.create_branch("test_gc_aggressive", "main")
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_gc_aggressive", "main")
pg = env.postgres.create_start('test_gc_aggressive')
log.info('postgres is running on test_gc_aggressive branch')

View File

@@ -4,7 +4,7 @@ import math
from uuid import UUID
import psycopg2.extras
import psycopg2.errors
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
from fixtures.log_helper import log
import time
@@ -12,11 +12,11 @@ import time
#
# Test pageserver get_lsn_by_timestamp API
#
def test_lsn_mapping(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.num_safekeepers = 1
env = zenith_env_builder.init_start()
def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 1
env = neon_env_builder.init_start()
new_timeline_id = env.zenith_cli.create_branch('test_lsn_mapping')
new_timeline_id = env.neon_cli.create_branch('test_lsn_mapping')
pgmain = env.postgres.create_start("test_lsn_mapping")
log.info("postgres is running on 'test_lsn_mapping' branch")

View File

@@ -1,4 +1,4 @@
from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
from fixtures.log_helper import log
@@ -8,9 +8,9 @@ from fixtures.log_helper import log
# it only checks next_multixact_id field in restored pg_control,
# since we don't have functions to check multixact internals.
#
def test_multixact(zenith_simple_env: ZenithEnv, test_output_dir):
env = zenith_simple_env
env.zenith_cli.create_branch('test_multixact', 'empty')
def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
env.neon_cli.create_branch('test_multixact', 'empty')
pg = env.postgres.create_start('test_multixact')
log.info("postgres is running on 'test_multixact' branch")
@@ -60,7 +60,7 @@ def test_multixact(zenith_simple_env: ZenithEnv, test_output_dir):
assert int(next_multixact_id) > int(next_multixact_id_old)
# Branch at this point
env.zenith_cli.create_branch('test_multixact_new', 'test_multixact', ancestor_start_lsn=lsn)
env.neon_cli.create_branch('test_multixact_new', 'test_multixact', ancestor_start_lsn=lsn)
pg_new = env.postgres.create_start('test_multixact_new')
log.info("postgres is running on 'test_multixact_new' branch")

View File

@@ -1,12 +1,12 @@
import uuid
import requests
from fixtures.zenith_fixtures import DEFAULT_BRANCH_NAME, ZenithEnv, ZenithEnvBuilder, ZenithPageserverHttpClient
from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
from typing import cast
def helper_compare_timeline_list(pageserver_http_client: ZenithPageserverHttpClient,
env: ZenithEnv,
def helper_compare_timeline_list(pageserver_http_client: NeonPageserverHttpClient,
env: NeonEnv,
initial_tenant: uuid.UUID):
"""
Compare timelines list returned by CLI and directly via API.
@@ -17,65 +17,65 @@ def helper_compare_timeline_list(pageserver_http_client: ZenithPageserverHttpCli
map(lambda t: cast(str, t['timeline_id']),
pageserver_http_client.timeline_list(initial_tenant)))
timelines_cli = env.zenith_cli.list_timelines()
assert timelines_cli == env.zenith_cli.list_timelines(initial_tenant)
timelines_cli = env.neon_cli.list_timelines()
assert timelines_cli == env.neon_cli.list_timelines(initial_tenant)
cli_timeline_ids = sorted([timeline_id for (_, timeline_id) in timelines_cli])
assert timelines_api == cli_timeline_ids
def test_cli_timeline_list(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
def test_cli_timeline_list(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http_client = env.pageserver.http_client()
# Initial sanity check
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
# Create a branch for us
main_timeline_id = env.zenith_cli.create_branch('test_cli_branch_list_main')
main_timeline_id = env.neon_cli.create_branch('test_cli_branch_list_main')
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
# Create a nested branch
nested_timeline_id = env.zenith_cli.create_branch('test_cli_branch_list_nested',
'test_cli_branch_list_main')
nested_timeline_id = env.neon_cli.create_branch('test_cli_branch_list_nested',
'test_cli_branch_list_main')
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
# Check that all new branches are visible via CLI
timelines_cli = [timeline_id for (_, timeline_id) in env.zenith_cli.list_timelines()]
timelines_cli = [timeline_id for (_, timeline_id) in env.neon_cli.list_timelines()]
assert main_timeline_id.hex in timelines_cli
assert nested_timeline_id.hex in timelines_cli
def helper_compare_tenant_list(pageserver_http_client: ZenithPageserverHttpClient, env: ZenithEnv):
def helper_compare_tenant_list(pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv):
tenants = pageserver_http_client.tenant_list()
tenants_api = sorted(map(lambda t: cast(str, t['id']), tenants))
res = env.zenith_cli.list_tenants()
res = env.neon_cli.list_tenants()
tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
assert tenants_api == tenants_cli
def test_cli_tenant_list(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
def test_cli_tenant_list(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http_client = env.pageserver.http_client()
# Initial sanity check
helper_compare_tenant_list(pageserver_http_client, env)
# Create new tenant
tenant1, _ = env.zenith_cli.create_tenant()
tenant1, _ = env.neon_cli.create_tenant()
# check tenant1 appeared
helper_compare_tenant_list(pageserver_http_client, env)
# Create new tenant
tenant2, _ = env.zenith_cli.create_tenant()
tenant2, _ = env.neon_cli.create_tenant()
# check tenant2 appeared
helper_compare_tenant_list(pageserver_http_client, env)
res = env.zenith_cli.list_tenants()
res = env.neon_cli.list_tenants()
tenants = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
assert env.initial_tenant.hex in tenants
@@ -83,18 +83,18 @@ def test_cli_tenant_list(zenith_simple_env: ZenithEnv):
assert tenant2.hex in tenants
def test_cli_tenant_create(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
tenant_id, _ = env.zenith_cli.create_tenant()
timelines = env.zenith_cli.list_timelines(tenant_id)
def test_cli_tenant_create(neon_simple_env: NeonEnv):
env = neon_simple_env
tenant_id, _ = env.neon_cli.create_tenant()
timelines = env.neon_cli.list_timelines(tenant_id)
# an initial timeline should be created upon tenant creation
assert len(timelines) == 1
assert timelines[0][0] == DEFAULT_BRANCH_NAME
def test_cli_ipv4_listeners(zenith_env_builder: ZenithEnvBuilder):
env = zenith_env_builder.init_start()
def test_cli_ipv4_listeners(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
# Connect to sk port on v4 loopback
res = requests.get(f'http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status')
@@ -108,17 +108,17 @@ def test_cli_ipv4_listeners(zenith_env_builder: ZenithEnvBuilder):
# assert res.ok
def test_cli_start_stop(zenith_env_builder: ZenithEnvBuilder):
env = zenith_env_builder.init_start()
def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
# Stop default ps/sk
env.zenith_cli.pageserver_stop()
env.zenith_cli.safekeeper_stop()
env.neon_cli.pageserver_stop()
env.neon_cli.safekeeper_stop()
# Default start
res = env.zenith_cli.raw_cli(["start"])
res = env.neon_cli.raw_cli(["start"])
res.check_returncode()
# Default stop
res = env.zenith_cli.raw_cli(["stop"])
res = env.neon_cli.raw_cli(["stop"])
res.check_returncode()

View File

@@ -1,12 +1,12 @@
import time
from fixtures.zenith_fixtures import ZenithEnvBuilder
from fixtures.neon_fixtures import NeonEnvBuilder
# Test restarting page server, while safekeeper and compute node keep
# running.
def test_next_xid(zenith_env_builder: ZenithEnvBuilder):
env = zenith_env_builder.init_start()
def test_next_xid(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
pg = env.postgres.create_start('main')

View File

@@ -1,9 +1,10 @@
from fixtures.log_helper import log
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, ZenithPageserverHttpClient
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
import pytest
def check_tenant(env: ZenithEnv, pageserver_http: ZenithPageserverHttpClient):
tenant_id, timeline_id = env.zenith_cli.create_tenant()
def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient):
tenant_id, timeline_id = env.neon_cli.create_tenant()
pg = env.postgres.create_start('main', tenant_id=tenant_id)
# we rely upon autocommit after each statement
res_1 = pg.safe_psql_many(queries=[
@@ -26,7 +27,8 @@ def check_tenant(env: ZenithEnv, pageserver_http: ZenithPageserverHttpClient):
pageserver_http.timeline_detach(tenant_id, timeline_id)
def test_normal_work(zenith_env_builder: ZenithEnvBuilder):
@pytest.mark.parametrize('num_timelines,num_safekeepers', [(3, 1)])
def test_normal_work(neon_env_builder: NeonEnvBuilder, num_timelines: int, num_safekeepers: int):
"""
Basic test:
* create new tenant with a timeline
@@ -40,8 +42,9 @@ def test_normal_work(zenith_env_builder: ZenithEnvBuilder):
Repeat check for several tenants/timelines.
"""
env = zenith_env_builder.init_start()
neon_env_builder.num_safekeepers = num_safekeepers
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
for _ in range(3):
for _ in range(num_timelines):
check_tenant(env, pageserver_http)

View File

@@ -1,4 +1,4 @@
from fixtures.zenith_fixtures import ZenithEnvBuilder
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.log_helper import log
from fixtures.utils import print_gc_result
import psycopg2.extras
@@ -14,11 +14,11 @@ import psycopg2.extras
# just a hint that the page hasn't been modified since that LSN, and the page
# server should return the latest page version regardless of the LSN.
#
def test_old_request_lsn(zenith_env_builder: ZenithEnvBuilder):
def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
# Disable pitr, because here we want to test branch creation after GC
zenith_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
env = zenith_env_builder.init_start()
env.zenith_cli.create_branch("test_old_request_lsn", "main")
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_old_request_lsn", "main")
pg = env.postgres.create_start('test_old_request_lsn')
log.info('postgres is running on test_old_request_lsn branch')

View File

@@ -2,26 +2,26 @@ from typing import Optional
from uuid import uuid4, UUID
import pytest
from fixtures.utils import lsn_from_hex
from fixtures.zenith_fixtures import (
from fixtures.neon_fixtures import (
DEFAULT_BRANCH_NAME,
ZenithEnv,
ZenithEnvBuilder,
ZenithPageserverHttpClient,
ZenithPageserverApiException,
NeonEnv,
NeonEnvBuilder,
NeonPageserverHttpClient,
NeonPageserverApiException,
wait_until,
)
# test that we cannot override node id
def test_pageserver_init_node_id(zenith_env_builder: ZenithEnvBuilder):
env = zenith_env_builder.init()
def test_pageserver_init_node_id(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init()
with pytest.raises(
Exception,
match="node id can only be set during pageserver init and cannot be overridden"):
env.pageserver.start(overrides=['--pageserver-config-override=id=10'])
def check_client(client: ZenithPageserverHttpClient, initial_tenant: UUID):
def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID):
client.check_status()
# check initial tenant is there
@@ -57,11 +57,11 @@ def check_client(client: ZenithPageserverHttpClient, initial_tenant: UUID):
assert local_timeline_details['timeline_state'] == 'Loaded'
def test_pageserver_http_get_wal_receiver_not_found(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
env = neon_simple_env
client = env.pageserver.http_client()
tenant_id, timeline_id = env.zenith_cli.create_tenant()
tenant_id, timeline_id = env.neon_cli.create_tenant()
empty_response = client.wal_receiver_get(tenant_id, timeline_id)
@@ -70,11 +70,11 @@ def test_pageserver_http_get_wal_receiver_not_found(zenith_simple_env: ZenithEnv
assert empty_response.get('last_received_msg_ts') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
def test_pageserver_http_get_wal_receiver_success(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
env = neon_simple_env
client = env.pageserver.http_client()
tenant_id, timeline_id = env.zenith_cli.create_tenant()
tenant_id, timeline_id = env.neon_cli.create_tenant()
pg = env.postgres.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)
def expect_updated_msg_lsn(prev_msg_lsn: Optional[int]) -> int:
@@ -107,15 +107,15 @@ def test_pageserver_http_get_wal_receiver_success(zenith_simple_env: ZenithEnv):
wait_until(number_of_iterations=5, interval=1, func=lambda: expect_updated_msg_lsn(lsn))
def test_pageserver_http_api_client(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
def test_pageserver_http_api_client(neon_simple_env: NeonEnv):
env = neon_simple_env
client = env.pageserver.http_client()
check_client(client, env.initial_tenant)
def test_pageserver_http_api_client_auth_enabled(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.pageserver_auth_enabled = True
env = zenith_env_builder.init_start()
def test_pageserver_http_api_client_auth_enabled(neon_env_builder: NeonEnvBuilder):
neon_env_builder.auth_enabled = True
env = neon_env_builder.init_start()
management_token = env.auth_keys.generate_management_token()

View File

@@ -1,15 +1,15 @@
from fixtures.zenith_fixtures import ZenithEnvBuilder
from fixtures.neon_fixtures import NeonEnvBuilder
# Test safekeeper sync and pageserver catch up
# while initial compute node is down and pageserver is lagging behind safekeepers.
# Ensure that basebackup after restart of all components is correct
# and new compute node contains all data.
def test_pageserver_catchup_while_compute_down(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.num_safekeepers = 3
env = zenith_env_builder.init_start()
def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.zenith_cli.create_branch('test_pageserver_catchup_while_compute_down')
env.neon_cli.create_branch('test_pageserver_catchup_while_compute_down')
# Make shared_buffers large to ensure we won't query pageserver while it is down.
pg = env.postgres.create_start('test_pageserver_catchup_while_compute_down',
config_lines=['shared_buffers=512MB'])

View File

@@ -1,13 +1,13 @@
from fixtures.zenith_fixtures import ZenithEnvBuilder
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.log_helper import log
# Test restarting page server, while safekeeper and compute node keep
# running.
def test_pageserver_restart(zenith_env_builder: ZenithEnvBuilder):
env = zenith_env_builder.init_start()
def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.zenith_cli.create_branch('test_pageserver_restart')
env.neon_cli.create_branch('test_pageserver_restart')
pg = env.postgres.create_start('test_pageserver_restart')
pg_conn = pg.connect()

View File

@@ -1,6 +1,6 @@
from io import BytesIO
import asyncio
from fixtures.zenith_fixtures import ZenithEnv, Postgres
from fixtures.neon_fixtures import NeonEnv, Postgres
from fixtures.log_helper import log
@@ -38,9 +38,9 @@ async def parallel_load_same_table(pg: Postgres, n_parallel: int):
# Load data into one table with COPY TO from 5 parallel connections
def test_parallel_copy(zenith_simple_env: ZenithEnv, n_parallel=5):
env = zenith_simple_env
env.zenith_cli.create_branch("test_parallel_copy", "empty")
def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5):
env = neon_simple_env
env.neon_cli.create_branch("test_parallel_copy", "empty")
pg = env.postgres.create_start('test_parallel_copy')
log.info("postgres is running on 'test_parallel_copy' branch")

View File

@@ -5,20 +5,20 @@ import psycopg2.extras
import pytest
from fixtures.log_helper import log
from fixtures.utils import print_gc_result
from fixtures.zenith_fixtures import ZenithEnvBuilder
from fixtures.neon_fixtures import NeonEnvBuilder
#
# Check pitr_interval GC behavior.
# Insert some data, run GC and create a branch in the past.
#
def test_pitr_gc(zenith_env_builder: ZenithEnvBuilder):
def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
zenith_env_builder.num_safekeepers = 1
neon_env_builder.num_safekeepers = 1
# Set pitr interval such that we need to keep the data
zenith_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '1 day', gc_horizon = 0}"
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '1 day', gc_horizon = 0}"
env = zenith_env_builder.init_start()
env = neon_env_builder.init_start()
pgmain = env.postgres.create_start('main')
log.info("postgres is running on 'main' branch")
@@ -62,7 +62,7 @@ def test_pitr_gc(zenith_env_builder: ZenithEnvBuilder):
# Branch at the point where only 100 rows were inserted
# It must have been preserved by PITR setting
env.zenith_cli.create_branch('test_pitr_gc_hundred', 'main', ancestor_start_lsn=lsn_a)
env.neon_cli.create_branch('test_pitr_gc_hundred', 'main', ancestor_start_lsn=lsn_a)
pg_hundred = env.postgres.create_start('test_pitr_gc_hundred')

View File

@@ -1,12 +1,12 @@
from contextlib import closing
from fixtures.zenith_fixtures import ZenithEnv
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
from psycopg2.errors import UndefinedTable
from psycopg2.errors import IoError
pytest_plugins = ("fixtures.zenith_fixtures")
pytest_plugins = ("fixtures.neon_fixtures")
extensions = ["pageinspect", "neon_test_utils", "pg_buffercache"]
@@ -14,9 +14,9 @@ extensions = ["pageinspect", "neon_test_utils", "pg_buffercache"]
#
# Validation of reading different page versions
#
def test_read_validation(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
env.zenith_cli.create_branch("test_read_validation", "empty")
def test_read_validation(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_read_validation", "empty")
pg = env.postgres.create_start("test_read_validation")
log.info("postgres is running on 'test_read_validation' branch")
@@ -125,9 +125,9 @@ def test_read_validation(zenith_simple_env: ZenithEnv):
log.info("Caught an expected failure: {}".format(e))
def test_read_validation_neg(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
env.zenith_cli.create_branch("test_read_validation_neg", "empty")
def test_read_validation_neg(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_read_validation_neg", "empty")
pg = env.postgres.create_start("test_read_validation_neg")
log.info("postgres is running on 'test_read_validation_neg' branch")

View File

@@ -1,6 +1,6 @@
import pytest
from fixtures.log_helper import log
from fixtures.zenith_fixtures import ZenithEnv
from fixtures.neon_fixtures import NeonEnv
#
@@ -9,9 +9,9 @@ from fixtures.zenith_fixtures import ZenithEnv
# This is very similar to the 'test_branch_behind' test, but instead of
# creating branches, creates read-only nodes.
#
def test_readonly_node(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
env.zenith_cli.create_branch('test_readonly_node', 'empty')
def test_readonly_node(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch('test_readonly_node', 'empty')
pgmain = env.postgres.create_start('test_readonly_node')
log.info("postgres is running on 'test_readonly_node' branch")

View File

@@ -4,28 +4,28 @@ import psycopg2.extras
import json
from ast import Assert
from contextlib import closing
from fixtures.zenith_fixtures import ZenithEnvBuilder
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.log_helper import log
#
# Test pageserver recovery after crash
#
def test_pageserver_recovery(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.num_safekeepers = 1
def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 1
# Override default checkpointer settings to run it more often
zenith_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance = 1048576}"
neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance = 1048576}"
env = zenith_env_builder.init()
env = neon_env_builder.init()
# Check if failpoints enables. Otherwise the test doesn't make sense
f = env.zenith_cli.pageserver_enabled_features()
f = env.neon_cli.pageserver_enabled_features()
assert "failpoints" in f["features"], "Build pageserver with --features=failpoints option to run this test"
zenith_env_builder.start()
neon_env_builder.start()
# Create a branch for us
env.zenith_cli.create_branch("test_pageserver_recovery", "main")
env.neon_cli.create_branch("test_pageserver_recovery", "main")
pg = env.postgres.create_start('test_pageserver_recovery')
log.info("postgres is running on 'test_pageserver_recovery' branch")
@@ -45,7 +45,8 @@ def test_pageserver_recovery(zenith_env_builder: ZenithEnvBuilder):
# Configure failpoints
pscur.execute(
"failpoints checkpoint-before-sync=sleep(2000);checkpoint-after-sync=exit")
"failpoints flush-frozen-before-sync=sleep(2000);checkpoint-after-sync=exit"
)
# Do some updates until pageserver is crashed
try:

View File

@@ -6,7 +6,7 @@ from contextlib import closing
from pathlib import Path
import time
from uuid import UUID
from fixtures.zenith_fixtures import ZenithEnvBuilder, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload
from fixtures.neon_fixtures import NeonEnvBuilder, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload
from fixtures.log_helper import log
from fixtures.utils import lsn_from_hex, lsn_to_hex
import pytest
@@ -30,12 +30,15 @@ import pytest
#
# The tests are done for all types of remote storage pageserver supports.
@pytest.mark.parametrize('storage_type', ['local_fs', 'mock_s3'])
def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder, storage_type: str):
# zenith_env_builder.rust_log_override = 'debug'
def test_remote_storage_backup_and_restore(neon_env_builder: NeonEnvBuilder, storage_type: str):
# Use this test to check more realistic SK ids: some etcd key parsing bugs were related,
# and this test needs SK to write data to pageserver, so it will be visible
neon_env_builder.safekeepers_id_start = 12
if storage_type == 'local_fs':
zenith_env_builder.enable_local_fs_remote_storage()
neon_env_builder.enable_local_fs_remote_storage()
elif storage_type == 'mock_s3':
zenith_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
neon_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
else:
raise RuntimeError(f'Unknown storage type: {storage_type}')
@@ -43,7 +46,7 @@ def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder,
data_secret = 'very secret secret'
##### First start, insert secret data and upload it to the remote storage
env = zenith_env_builder.init_start()
env = neon_env_builder.init_start()
pg = env.postgres.create_start('main')
client = env.pageserver.http_client()

View File

@@ -1,7 +1,7 @@
import pytest
from contextlib import closing
from fixtures.zenith_fixtures import ZenithEnvBuilder
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.log_helper import log
@@ -9,13 +9,13 @@ from fixtures.log_helper import log
# Test restarting and recreating a postgres instance
#
@pytest.mark.parametrize('with_safekeepers', [False, True])
def test_restart_compute(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool):
zenith_env_builder.pageserver_auth_enabled = True
def test_restart_compute(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
neon_env_builder.auth_enabled = True
if with_safekeepers:
zenith_env_builder.num_safekeepers = 3
env = zenith_env_builder.init_start()
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.zenith_cli.create_branch('test_restart_compute')
env.neon_cli.create_branch('test_restart_compute')
pg = env.postgres.create_start('test_restart_compute')
log.info("postgres is running on 'test_restart_compute' branch")

View File

@@ -1,4 +1,4 @@
from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
from fixtures.log_helper import log
@@ -6,11 +6,11 @@ from fixtures.log_helper import log
#
# The pg_subxact SLRU is not preserved on restarts, and doesn't need to be
# maintained in the pageserver, so subtransactions are not very exciting for
# Zenith. They are included in the commit record though and updated in the
# Neon. They are included in the commit record though and updated in the
# CLOG.
def test_subxacts(zenith_simple_env: ZenithEnv, test_output_dir):
env = zenith_simple_env
env.zenith_cli.create_branch("test_subxacts", "empty")
def test_subxacts(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
env.neon_cli.create_branch("test_subxacts", "empty")
pg = env.postgres.create_start('test_subxacts')
log.info("postgres is running on 'test_subxacts' branch")

View File

@@ -3,25 +3,25 @@ from contextlib import closing
import pytest
import psycopg2.extras
from fixtures.zenith_fixtures import ZenithEnvBuilder
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.log_helper import log
def test_tenant_config(zenith_env_builder: ZenithEnvBuilder):
def test_tenant_config(neon_env_builder: NeonEnvBuilder):
# set some non-default global config
zenith_env_builder.pageserver_config_override = '''
neon_env_builder.pageserver_config_override = '''
page_cache_size=444;
wait_lsn_timeout='111 s';
tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
env = zenith_env_builder.init_start()
env = neon_env_builder.init_start()
"""Test per tenant configuration"""
tenant, _ = env.zenith_cli.create_tenant(conf={
tenant, _ = env.neon_cli.create_tenant(conf={
'checkpoint_distance': '20000',
'gc_period': '30sec',
})
env.zenith_cli.create_timeline(f'test_tenant_conf', tenant_id=tenant)
env.neon_cli.create_timeline(f'test_tenant_conf', tenant_id=tenant)
pg = env.postgres.create_start(
"test_tenant_conf",
"main",
@@ -66,11 +66,11 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
}.items())
# update the config and ensure that it has changed
env.zenith_cli.config_tenant(tenant_id=tenant,
conf={
'checkpoint_distance': '15000',
'gc_period': '80sec',
})
env.neon_cli.config_tenant(tenant_id=tenant,
conf={
'checkpoint_distance': '15000',
'gc_period': '80sec',
})
with closing(env.pageserver.connect()) as psconn:
with psconn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as pscur:

View File

@@ -10,7 +10,7 @@ from typing import Optional
import signal
import pytest
from fixtures.zenith_fixtures import PgProtocol, PortDistributor, Postgres, ZenithEnvBuilder, Etcd, ZenithPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, zenith_binpath, pg_distrib_dir
from fixtures.neon_fixtures import PgProtocol, PortDistributor, Postgres, NeonEnvBuilder, Etcd, NeonPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, neon_binpath, pg_distrib_dir
from fixtures.utils import lsn_from_hex
@@ -26,7 +26,7 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path,
http_port: int,
broker: Optional[Etcd]):
"""
cannot use ZenithPageserver yet because it depends on zenith cli
cannot use NeonPageserver yet because it depends on neon cli
which currently lacks support for multiple pageservers
"""
cmd = [
@@ -106,21 +106,21 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
"needs to replace callmemaybe call with better idea how to migrate timelines between pageservers"
)
@pytest.mark.parametrize('with_load', ['with_load', 'without_load'])
def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
port_distributor: PortDistributor,
with_load: str):
zenith_env_builder.enable_local_fs_remote_storage()
neon_env_builder.enable_local_fs_remote_storage()
env = zenith_env_builder.init_start()
env = neon_env_builder.init_start()
# create folder for remote storage mock
remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage'
tenant, _ = env.zenith_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
tenant, _ = env.neon_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
log.info("tenant to relocate %s", tenant)
# attach does not download ancestor branches (should it?), just use root branch for now
env.zenith_cli.create_root_branch('test_tenant_relocation', tenant_id=tenant)
env.neon_cli.create_root_branch('test_tenant_relocation', tenant_id=tenant)
tenant_pg = env.postgres.create_start(branch_name='test_tenant_relocation',
node_name='test_tenant_relocation',
@@ -177,16 +177,16 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
new_pageserver_pg_port = port_distributor.get_port()
new_pageserver_http_port = port_distributor.get_port()
log.info("new pageserver ports pg %s http %s", new_pageserver_pg_port, new_pageserver_http_port)
pageserver_bin = pathlib.Path(zenith_binpath) / 'pageserver'
pageserver_bin = pathlib.Path(neon_binpath) / 'pageserver'
new_pageserver_http = ZenithPageserverHttpClient(port=new_pageserver_http_port, auth_token=None)
new_pageserver_http = NeonPageserverHttpClient(port=new_pageserver_http_port, auth_token=None)
with new_pageserver_helper(new_pageserver_dir,
pageserver_bin,
remote_storage_mock_path,
new_pageserver_pg_port,
new_pageserver_http_port,
zenith_env_builder.broker):
neon_env_builder.broker):
# call to attach timeline to new pageserver
new_pageserver_http.timeline_attach(tenant, timeline)
@@ -215,7 +215,7 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
tenant_pg.stop()
# rewrite zenith cli config to use new pageserver for basebackup to start new compute
# rewrite neon cli config to use new pageserver for basebackup to start new compute
cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
cli_config_lines[-2] = f"listen_http_addr = 'localhost:{new_pageserver_http_port}'"
cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'"
@@ -258,7 +258,7 @@ def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
assert not os.path.exists(timeline_to_detach_local_path), f'After detach, local timeline dir {timeline_to_detach_local_path} should be removed'
# bring old pageserver back for clean shutdown via zenith cli
# bring old pageserver back for clean shutdown via neon cli
# new pageserver will be shut down by the context manager
cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
cli_config_lines[-2] = f"listen_http_addr = 'localhost:{env.pageserver.service_port.http}'"

View File

@@ -2,27 +2,30 @@ from contextlib import closing
from datetime import datetime
import os
import pytest
import time
from uuid import UUID
from fixtures.zenith_fixtures import ZenithEnvBuilder
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.log_helper import log
from fixtures.metrics import parse_metrics
from fixtures.utils import lsn_to_hex
from fixtures.benchmark_fixture import MetricReport
@pytest.mark.parametrize('with_safekeepers', [False, True])
def test_tenants_normal_work(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool):
def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
if with_safekeepers:
zenith_env_builder.num_safekeepers = 3
neon_env_builder.num_safekeepers = 3
env = zenith_env_builder.init_start()
env = neon_env_builder.init_start()
"""Tests tenants with and without wal acceptors"""
tenant_1, _ = env.zenith_cli.create_tenant()
tenant_2, _ = env.zenith_cli.create_tenant()
tenant_1, _ = env.neon_cli.create_tenant()
tenant_2, _ = env.neon_cli.create_tenant()
env.zenith_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
tenant_id=tenant_1)
env.zenith_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
tenant_id=tenant_2)
env.neon_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
tenant_id=tenant_1)
env.neon_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
tenant_id=tenant_2)
pg_tenant1 = env.postgres.create_start(
f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
@@ -44,15 +47,65 @@ def test_tenants_normal_work(zenith_env_builder: ZenithEnvBuilder, with_safekeep
assert cur.fetchone() == (5000050000, )
def test_metrics_normal_work(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.num_safekeepers = 3
def test_tenant_threads(neon_env_builder, zenbenchmark):
neon_env_builder.num_safekeepers = 1
env = neon_env_builder.init_start()
env = zenith_env_builder.init_start()
tenant_1, _ = env.zenith_cli.create_tenant()
tenant_2, _ = env.zenith_cli.create_tenant()
def get_num_threads() -> int:
metrics = env.pageserver.http_client().get_metrics()
parsed = parse_metrics(metrics)
threads = parsed.query_one("process_threads").value
return threads
timeline_1 = env.zenith_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_1)
timeline_2 = env.zenith_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_2)
threads_before = get_num_threads()
zenbenchmark.record("threads_before", threads_before, "", report=MetricReport.LOWER_IS_BETTER)
tenants = env.pageserver.http_client().tenant_list()
num_tenants = len(tenants)
num_active = len([t for t in tenants if t["state"] == "Active"])
zenbenchmark.record("tenants_before", num_tenants, "", report=MetricReport.LOWER_IS_BETTER)
zenbenchmark.record("active_before", num_active, "", report=MetricReport.LOWER_IS_BETTER)
for i in range(20):
print(f"creating tenant {i}")
name = f"test_tenant_threads_{i}"
tenant, _ = env.neon_cli.create_tenant()
timeline = env.neon_cli.create_timeline(name, tenant_id=tenant)
pg = env.postgres.create_start(name, tenant_id=tenant)
pg.safe_psql("select 1;")
pg.stop()
env.pageserver.http_client().timeline_detach(tenant, timeline)
remaining_timelines = [
UUID(r["timeline_id"])
for r in env.pageserver.http_client().timeline_list(tenant)
]
for t in remaining_timelines:
env.pageserver.http_client().timeline_detach(tenant, t)
time.sleep(5)
threads_after = get_num_threads()
zenbenchmark.record("threads_before", threads_after, "", report=MetricReport.LOWER_IS_BETTER)
tenants = env.pageserver.http_client().tenant_list()
num_tenants = len(tenants)
num_active = len([t for t in tenants if t["state"] == "Active"])
zenbenchmark.record("tenants_after", num_tenants, "", report=MetricReport.LOWER_IS_BETTER)
zenbenchmark.record("active_after", num_active, "", report=MetricReport.LOWER_IS_BETTER)
def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
tenant_1, _ = env.neon_cli.create_tenant()
tenant_2, _ = env.neon_cli.create_tenant()
timeline_1 = env.neon_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_1)
timeline_2 = env.neon_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_2)
pg_tenant1 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_1)
pg_tenant2 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_2)
@@ -72,7 +125,7 @@ def test_metrics_normal_work(zenith_env_builder: ZenithEnvBuilder):
collected_metrics[f'safekeeper{sk.id}'] = sk.http_client().get_metrics_str()
for name in collected_metrics:
basepath = os.path.join(zenith_env_builder.repo_dir, f'{name}.metrics')
basepath = os.path.join(neon_env_builder.repo_dir, f'{name}.metrics')
with open(basepath, 'w') as stdout_f:
print(collected_metrics[name], file=stdout_f, flush=True)

View File

@@ -12,11 +12,11 @@ from uuid import UUID
import pytest
from fixtures.zenith_fixtures import ZenithEnvBuilder, ZenithEnv, Postgres, wait_for_last_record_lsn, wait_for_upload
from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres, wait_for_last_record_lsn, wait_for_upload
from fixtures.utils import lsn_from_hex
async def tenant_workload(env: ZenithEnv, pg: Postgres):
async def tenant_workload(env: NeonEnv, pg: Postgres):
pageserver_conn = await env.pageserver.connect_async()
pg_conn = await pg.connect_async()
@@ -35,7 +35,7 @@ async def tenant_workload(env: ZenithEnv, pg: Postgres):
assert res == i * 1000
async def all_tenants_workload(env: ZenithEnv, tenants_pgs):
async def all_tenants_workload(env: NeonEnv, tenants_pgs):
workers = []
for tenant, pg in tenants_pgs:
worker = tenant_workload(env, pg)
@@ -46,28 +46,28 @@ async def all_tenants_workload(env: ZenithEnv, tenants_pgs):
@pytest.mark.parametrize('storage_type', ['local_fs', 'mock_s3'])
def test_tenants_many(zenith_env_builder: ZenithEnvBuilder, storage_type: str):
def test_tenants_many(neon_env_builder: NeonEnvBuilder, storage_type: str):
if storage_type == 'local_fs':
zenith_env_builder.enable_local_fs_remote_storage()
neon_env_builder.enable_local_fs_remote_storage()
elif storage_type == 'mock_s3':
zenith_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
neon_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
else:
raise RuntimeError(f'Unknown storage type: {storage_type}')
zenith_env_builder.enable_local_fs_remote_storage()
neon_env_builder.enable_local_fs_remote_storage()
env = zenith_env_builder.init_start()
env = neon_env_builder.init_start()
tenants_pgs = []
for i in range(1, 5):
# Use a tiny checkpoint distance, to create a lot of layers quickly
tenant, _ = env.zenith_cli.create_tenant(
tenant, _ = env.neon_cli.create_tenant(
conf={
'checkpoint_distance': '5000000',
})
env.zenith_cli.create_timeline(f'test_tenants_many', tenant_id=tenant)
env.neon_cli.create_timeline(f'test_tenants_many', tenant_id=tenant)
pg = env.postgres.create_start(
f'test_tenants_many',

View File

@@ -1,15 +1,15 @@
from contextlib import closing
import psycopg2.extras
import psycopg2.errors
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres, assert_local
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_local
from fixtures.log_helper import log
import time
def test_timeline_size(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
def test_timeline_size(neon_simple_env: NeonEnv):
env = neon_simple_env
# Branch at the point where only 100 rows were inserted
new_timeline_id = env.zenith_cli.create_branch('test_timeline_size', 'empty')
new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')
client = env.pageserver.http_client()
timeline_details = assert_local(client, env.initial_tenant, new_timeline_id)
@@ -69,9 +69,9 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60
time.sleep(polling_interval)
def test_timeline_size_quota(zenith_env_builder: ZenithEnvBuilder):
env = zenith_env_builder.init_start()
new_timeline_id = env.zenith_cli.create_branch('test_timeline_size_quota')
def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
new_timeline_id = env.neon_cli.create_branch('test_timeline_size_quota')
client = env.pageserver.http_client()
res = assert_local(client, env.initial_tenant, new_timeline_id)
@@ -86,7 +86,7 @@ def test_timeline_size_quota(zenith_env_builder: ZenithEnvBuilder):
with closing(pgmain.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE EXTENSION neon") # TODO move it to zenith_fixtures?
cur.execute("CREATE EXTENSION neon") # TODO move it to neon_fixtures?
cur.execute("CREATE TABLE foo (t text)")

View File

@@ -1,15 +1,15 @@
import os
from fixtures.zenith_fixtures import ZenithEnv
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
#
# Test branching, when a transaction is in prepared state
#
def test_twophase(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
env.zenith_cli.create_branch("test_twophase", "empty")
def test_twophase(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_twophase", "empty")
pg = env.postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5'])
log.info("postgres is running on 'test_twophase' branch")
@@ -55,7 +55,7 @@ def test_twophase(zenith_simple_env: ZenithEnv):
assert len(twophase_files) == 2
# Create a branch with the transaction in prepared state
env.zenith_cli.create_branch("test_twophase_prepared", "test_twophase")
env.neon_cli.create_branch("test_twophase_prepared", "test_twophase")
# Start compute on the new branch
pg2 = env.postgres.create_start(

View File

@@ -1,4 +1,4 @@
from fixtures.zenith_fixtures import ZenithEnv
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
@@ -6,10 +6,10 @@ from fixtures.log_helper import log
# Test that the VM bit is cleared correctly at a HEAP_DELETE and
# HEAP_UPDATE record.
#
def test_vm_bit_clear(zenith_simple_env: ZenithEnv):
env = zenith_simple_env
def test_vm_bit_clear(neon_simple_env: NeonEnv):
env = neon_simple_env
env.zenith_cli.create_branch("test_vm_bit_clear", "empty")
env.neon_cli.create_branch("test_vm_bit_clear", "empty")
pg = env.postgres.create_start('test_vm_bit_clear')
log.info("postgres is running on 'test_vm_bit_clear' branch")
@@ -33,7 +33,7 @@ def test_vm_bit_clear(zenith_simple_env: ZenithEnv):
cur.execute('UPDATE vmtest_update SET id = 5000 WHERE id = 1')
# Branch at this point, to test that later
env.zenith_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear")
env.neon_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear")
# Clear the buffer cache, to force the VM page to be re-fetched from
# the page server

View File

@@ -12,10 +12,11 @@ from contextlib import closing
from dataclasses import dataclass, field
from multiprocessing import Process, Value
from pathlib import Path
from fixtures.zenith_fixtures import PgBin, Etcd, Postgres, RemoteStorageUsers, Safekeeper, ZenithEnv, ZenithEnvBuilder, PortDistributor, SafekeeperPort, zenith_binpath, PgProtocol
from fixtures.neon_fixtures import PgBin, Etcd, Postgres, RemoteStorageUsers, Safekeeper, NeonEnv, NeonEnvBuilder, PortDistributor, SafekeeperPort, neon_binpath, PgProtocol
from fixtures.utils import get_dir_size, lsn_to_hex, mkdir_if_needed, lsn_from_hex
from fixtures.log_helper import log
from typing import List, Optional, Any
from uuid import uuid4
@dataclass
@@ -29,9 +30,9 @@ class TimelineMetrics:
# Run page server and multiple acceptors, and multiple compute nodes running
# against different timelines.
def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.num_safekeepers = 3
env = zenith_env_builder.init_start()
def test_many_timelines(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
n_timelines = 3
@@ -39,15 +40,15 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
"test_safekeepers_many_timelines_{}".format(tlin) for tlin in range(n_timelines)
]
# pageserver, safekeeper operate timelines via their ids (can be represented in hex as 'ad50847381e248feaac9876cc71ae418')
# that's not really human readable, so the branch names are introduced in Zenith CLI.
# Zenith CLI stores its branch <-> timeline mapping in its internals,
# that's not really human readable, so the branch names are introduced in Neon CLI.
# Neon CLI stores its branch <-> timeline mapping in its internals,
# but we need this to collect metrics from other servers, related to the timeline.
branch_names_to_timeline_ids = {}
# start postgres on each timeline
pgs = []
for branch_name in branch_names:
new_timeline_id = env.zenith_cli.create_branch(branch_name)
new_timeline_id = env.neon_cli.create_branch(branch_name)
pgs.append(env.postgres.create_start(branch_name))
branch_names_to_timeline_ids[branch_name] = new_timeline_id
@@ -93,14 +94,14 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
# the compute node, which only happens after a consensus of safekeepers
# has confirmed the transaction. We assume majority consensus here.
assert (2 * sum(m.last_record_lsn <= lsn
for lsn in m.flush_lsns) > zenith_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
for lsn in m.flush_lsns) > neon_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
assert (2 * sum(m.last_record_lsn <= lsn
for lsn in m.commit_lsns) > zenith_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
for lsn in m.commit_lsns) > neon_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
timeline_metrics.append(m)
log.info(f"{message}: {timeline_metrics}")
return timeline_metrics
# TODO: https://github.com/zenithdb/zenith/issues/809
# TODO: https://github.com/neondatabase/neon/issues/809
# collect_metrics("before CREATE TABLE")
# Do everything in different loops to have actions on different timelines
@@ -168,15 +169,15 @@ def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
# Check that dead minority doesn't prevent the commits: execute insert n_inserts
# times, with fault_probability chance of getting a wal acceptor down or up
# along the way. 2 of 3 are always alive, so the work keeps going.
def test_restarts(zenith_env_builder: ZenithEnvBuilder):
def test_restarts(neon_env_builder: NeonEnvBuilder):
fault_probability = 0.01
n_inserts = 1000
n_acceptors = 3
zenith_env_builder.num_safekeepers = n_acceptors
env = zenith_env_builder.init_start()
neon_env_builder.num_safekeepers = n_acceptors
env = neon_env_builder.init_start()
env.zenith_cli.create_branch('test_safekeepers_restarts')
env.neon_cli.create_branch('test_safekeepers_restarts')
pg = env.postgres.create_start('test_safekeepers_restarts')
# we rely upon autocommit after each statement
@@ -209,11 +210,11 @@ def delayed_safekeeper_start(wa):
# When majority of acceptors is offline, commits are expected to be frozen
def test_unavailability(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.num_safekeepers = 2
env = zenith_env_builder.init_start()
def test_unavailability(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 2
env = neon_env_builder.init_start()
env.zenith_cli.create_branch('test_safekeepers_unavailability')
env.neon_cli.create_branch('test_safekeepers_unavailability')
pg = env.postgres.create_start('test_safekeepers_unavailability')
# we rely upon autocommit after each statement
@@ -279,12 +280,12 @@ def stop_value():
# do inserts while concurrently getting up/down subsets of acceptors
def test_race_conditions(zenith_env_builder: ZenithEnvBuilder, stop_value):
def test_race_conditions(neon_env_builder: NeonEnvBuilder, stop_value):
zenith_env_builder.num_safekeepers = 3
env = zenith_env_builder.init_start()
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.zenith_cli.create_branch('test_safekeepers_race_conditions')
env.neon_cli.create_branch('test_safekeepers_race_conditions')
pg = env.postgres.create_start('test_safekeepers_race_conditions')
# we rely upon autocommit after each statement
@@ -308,16 +309,16 @@ def test_race_conditions(zenith_env_builder: ZenithEnvBuilder, stop_value):
# Test that safekeepers push their info to the broker and learn peer status from it
def test_broker(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.num_safekeepers = 3
zenith_env_builder.enable_local_fs_remote_storage()
env = zenith_env_builder.init_start()
def test_broker(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
neon_env_builder.enable_local_fs_remote_storage()
env = neon_env_builder.init_start()
env.zenith_cli.create_branch("test_broker", "main")
env.neon_cli.create_branch("test_broker", "main")
pg = env.postgres.create_start('test_broker')
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
# learn zenith timeline from compute
# learn neon timeline from compute
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
@@ -349,13 +350,15 @@ def test_broker(zenith_env_builder: ZenithEnvBuilder):
# Test that old WAL consumed by peers and pageserver is removed from safekeepers.
def test_wal_removal(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.num_safekeepers = 2
@pytest.mark.parametrize('auth_enabled', [False, True])
def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
neon_env_builder.num_safekeepers = 2
# to advance remote_consistent_llsn
zenith_env_builder.enable_local_fs_remote_storage()
env = zenith_env_builder.init_start()
neon_env_builder.enable_local_fs_remote_storage()
neon_env_builder.auth_enabled = auth_enabled
env = neon_env_builder.init_start()
env.zenith_cli.create_branch('test_safekeepers_wal_removal')
env.neon_cli.create_branch('test_safekeepers_wal_removal')
pg = env.postgres.create_start('test_safekeepers_wal_removal')
with closing(pg.connect()) as conn:
@@ -369,7 +372,10 @@ def test_wal_removal(zenith_env_builder: ZenithEnvBuilder):
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
# force checkpoint to advance remote_consistent_lsn
with closing(env.pageserver.connect()) as psconn:
pageserver_conn_options = {}
if auth_enabled:
pageserver_conn_options['password'] = env.auth_keys.generate_tenant_token(tenant_id)
with closing(env.pageserver.connect(**pageserver_conn_options)) as psconn:
with psconn.cursor() as pscur:
pscur.execute(f"checkpoint {tenant_id} {timeline_id}")
@@ -380,9 +386,29 @@ def test_wal_removal(zenith_env_builder: ZenithEnvBuilder):
]
assert all(os.path.exists(p) for p in first_segments)
http_cli = env.safekeepers[0].http_client()
if not auth_enabled:
http_cli = env.safekeepers[0].http_client()
else:
http_cli = env.safekeepers[0].http_client(
auth_token=env.auth_keys.generate_tenant_token(tenant_id))
http_cli_other = env.safekeepers[0].http_client(
auth_token=env.auth_keys.generate_tenant_token(uuid4().hex))
http_cli_noauth = env.safekeepers[0].http_client()
# Pretend WAL is offloaded to s3.
if auth_enabled:
old_backup_lsn = http_cli.timeline_status(tenant_id=tenant_id,
timeline_id=timeline_id).backup_lsn
assert 'FFFFFFFF/FEFFFFFF' != old_backup_lsn
for cli in [http_cli_other, http_cli_noauth]:
with pytest.raises(cli.HTTPError, match='Forbidden|Unauthorized'):
cli.record_safekeeper_info(tenant_id,
timeline_id, {'backup_lsn': 'FFFFFFFF/FEFFFFFF'})
assert old_backup_lsn == http_cli.timeline_status(tenant_id=tenant_id,
timeline_id=timeline_id).backup_lsn
http_cli.record_safekeeper_info(tenant_id, timeline_id, {'backup_lsn': 'FFFFFFFF/FEFFFFFF'})
assert 'FFFFFFFF/FEFFFFFF' == http_cli.timeline_status(tenant_id=tenant_id,
timeline_id=timeline_id).backup_lsn
# wait till first segment is removed on all safekeepers
started_at = time.time()
@@ -412,22 +438,22 @@ def wait_segment_offload(tenant_id, timeline_id, live_sk, seg_end):
@pytest.mark.parametrize('storage_type', ['mock_s3', 'local_fs'])
def test_wal_backup(zenith_env_builder: ZenithEnvBuilder, storage_type: str):
zenith_env_builder.num_safekeepers = 3
def test_wal_backup(neon_env_builder: NeonEnvBuilder, storage_type: str):
neon_env_builder.num_safekeepers = 3
if storage_type == 'local_fs':
zenith_env_builder.enable_local_fs_remote_storage()
neon_env_builder.enable_local_fs_remote_storage()
elif storage_type == 'mock_s3':
zenith_env_builder.enable_s3_mock_remote_storage('test_safekeepers_wal_backup')
neon_env_builder.enable_s3_mock_remote_storage('test_safekeepers_wal_backup')
else:
raise RuntimeError(f'Unknown storage type: {storage_type}')
zenith_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER
neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER
env = zenith_env_builder.init_start()
env = neon_env_builder.init_start()
env.zenith_cli.create_branch('test_safekeepers_wal_backup')
env.neon_cli.create_branch('test_safekeepers_wal_backup')
pg = env.postgres.create_start('test_safekeepers_wal_backup')
# learn zenith timeline from compute
# learn neon timeline from compute
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
@@ -460,7 +486,7 @@ def test_wal_backup(zenith_env_builder: ZenithEnvBuilder, storage_type: str):
class ProposerPostgres(PgProtocol):
"""Object for running postgres without ZenithEnv"""
"""Object for running postgres without NeonEnv"""
def __init__(self,
pgdata_dir: str,
pg_bin,
@@ -542,14 +568,14 @@ class ProposerPostgres(PgProtocol):
# insert wal in all safekeepers and run sync on proposer
def test_sync_safekeepers(zenith_env_builder: ZenithEnvBuilder,
def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
port_distributor: PortDistributor):
# We don't really need the full environment for this test, just the
# safekeepers would be enough.
zenith_env_builder.num_safekeepers = 3
env = zenith_env_builder.init_start()
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
timeline_id = uuid.uuid4()
tenant_id = uuid.uuid4()
@@ -596,25 +622,42 @@ def test_sync_safekeepers(zenith_env_builder: ZenithEnvBuilder,
assert all(lsn_after_sync == lsn for lsn in lsn_after_append)
def test_timeline_status(zenith_env_builder: ZenithEnvBuilder):
env = zenith_env_builder.init_start()
@pytest.mark.parametrize('auth_enabled', [False, True])
def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
neon_env_builder.auth_enabled = auth_enabled
env = neon_env_builder.init_start()
env.zenith_cli.create_branch('test_timeline_status')
env.neon_cli.create_branch('test_timeline_status')
pg = env.postgres.create_start('test_timeline_status')
wa = env.safekeepers[0]
wa_http_cli = wa.http_client()
wa_http_cli.check_status()
# learn zenith timeline from compute
# learn neon timeline from compute
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
if not auth_enabled:
wa_http_cli = wa.http_client()
wa_http_cli.check_status()
else:
wa_http_cli = wa.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id))
wa_http_cli.check_status()
wa_http_cli_bad = wa.http_client(
auth_token=env.auth_keys.generate_tenant_token(uuid4().hex))
wa_http_cli_bad.check_status()
wa_http_cli_noauth = wa.http_client()
wa_http_cli_noauth.check_status()
# fetch something sensible from status
tli_status = wa_http_cli.timeline_status(tenant_id, timeline_id)
epoch = tli_status.acceptor_epoch
timeline_start_lsn = tli_status.timeline_start_lsn
if auth_enabled:
for cli in [wa_http_cli_bad, wa_http_cli_noauth]:
with pytest.raises(cli.HTTPError, match='Forbidden|Unauthorized'):
cli.timeline_status(tenant_id, timeline_id)
pg.safe_psql("create table t(i int)")
# ensure epoch goes up after reboot
@@ -642,7 +685,7 @@ class SafekeeperEnv:
peer_port=self.port_distributor.get_port())
self.pg_bin = pg_bin
self.num_safekeepers = num_safekeepers
self.bin_safekeeper = os.path.join(str(zenith_binpath), 'safekeeper')
self.bin_safekeeper = os.path.join(str(neon_binpath), 'safekeeper')
self.safekeepers: Optional[List[subprocess.CompletedProcess[Any]]] = None
self.postgres: Optional[ProposerPostgres] = None
self.tenant_id: Optional[uuid.UUID] = None
@@ -753,8 +796,8 @@ def test_safekeeper_without_pageserver(test_output_dir: str,
assert res == 5050
def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
def safekeepers_guc(env: ZenithEnv, sk_names: List[int]) -> str:
def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
def safekeepers_guc(env: NeonEnv, sk_names: List[int]) -> str:
return ','.join([f'localhost:{sk.port.pg}' for sk in env.safekeepers if sk.id in sk_names])
def execute_payload(pg: Postgres):
@@ -781,9 +824,9 @@ def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
except Exception as e:
log.info(f"Safekeeper {sk.id} status error: {e}")
zenith_env_builder.num_safekeepers = 4
env = zenith_env_builder.init_start()
env.zenith_cli.create_branch('test_replace_safekeeper')
neon_env_builder.num_safekeepers = 4
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_replace_safekeeper')
log.info("Use only first 3 safekeepers")
env.safekeepers[3].stop()
@@ -792,7 +835,7 @@ def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
pg.start()
# learn zenith timeline from compute
# learn neon timeline from compute
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
@@ -844,7 +887,7 @@ def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
# We have `wal_keep_size=0`, so postgres should trim WAL once it's broadcasted
# to all safekeepers. This test checks that compute WAL can fit into small number
# of WAL segments.
def test_wal_deleted_after_broadcast(zenith_env_builder: ZenithEnvBuilder):
def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder):
# used to calculate delta in collect_stats
last_lsn = .0
@@ -866,10 +909,10 @@ def test_wal_deleted_after_broadcast(zenith_env_builder: ZenithEnvBuilder):
def generate_wal(cur):
cur.execute("INSERT INTO t SELECT generate_series(1,300000), 'payload'")
zenith_env_builder.num_safekeepers = 3
env = zenith_env_builder.init_start()
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.zenith_cli.create_branch('test_wal_deleted_after_broadcast')
env.neon_cli.create_branch('test_wal_deleted_after_broadcast')
# Adjust checkpoint config to prevent keeping old WAL segments
pg = env.postgres.create_start(
'test_wal_deleted_after_broadcast',
@@ -894,18 +937,20 @@ def test_wal_deleted_after_broadcast(zenith_env_builder: ZenithEnvBuilder):
assert wal_size_after_checkpoint < 16 * 2.5
def test_delete_force(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.num_safekeepers = 1
env = zenith_env_builder.init_start()
@pytest.mark.parametrize('auth_enabled', [False, True])
def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
neon_env_builder.num_safekeepers = 1
neon_env_builder.auth_enabled = auth_enabled
env = neon_env_builder.init_start()
# Create two tenants: one will be deleted, other should be preserved.
tenant_id = env.initial_tenant.hex
timeline_id_1 = env.zenith_cli.create_branch('br1').hex # Active, delete explicitly
timeline_id_2 = env.zenith_cli.create_branch('br2').hex # Inactive, delete explicitly
timeline_id_3 = env.zenith_cli.create_branch('br3').hex # Active, delete with the tenant
timeline_id_4 = env.zenith_cli.create_branch('br4').hex # Inactive, delete with the tenant
timeline_id_1 = env.neon_cli.create_branch('br1').hex # Active, delete explicitly
timeline_id_2 = env.neon_cli.create_branch('br2').hex # Inactive, delete explicitly
timeline_id_3 = env.neon_cli.create_branch('br3').hex # Active, delete with the tenant
timeline_id_4 = env.neon_cli.create_branch('br4').hex # Inactive, delete with the tenant
tenant_id_other_uuid, timeline_id_other_uuid = env.zenith_cli.create_tenant()
tenant_id_other_uuid, timeline_id_other_uuid = env.neon_cli.create_tenant()
tenant_id_other = tenant_id_other_uuid.hex
timeline_id_other = timeline_id_other_uuid.hex
@@ -921,7 +966,14 @@ def test_delete_force(zenith_env_builder: ZenithEnvBuilder):
cur.execute('CREATE TABLE t(key int primary key)')
sk = env.safekeepers[0]
sk_data_dir = Path(sk.data_dir())
sk_http = sk.http_client()
if not auth_enabled:
sk_http = sk.http_client()
sk_http_other = sk_http
else:
sk_http = sk.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id))
sk_http_other = sk.http_client(
auth_token=env.auth_keys.generate_tenant_token(tenant_id_other))
sk_http_noauth = sk.http_client()
assert (sk_data_dir / tenant_id / timeline_id_1).is_dir()
assert (sk_data_dir / tenant_id / timeline_id_2).is_dir()
assert (sk_data_dir / tenant_id / timeline_id_3).is_dir()
@@ -961,6 +1013,15 @@ def test_delete_force(zenith_env_builder: ZenithEnvBuilder):
assert (sk_data_dir / tenant_id / timeline_id_4).is_dir()
assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir()
if auth_enabled:
# Ensure we cannot delete the other tenant
for sk_h in [sk_http, sk_http_noauth]:
with pytest.raises(sk_h.HTTPError, match='Forbidden|Unauthorized'):
assert sk_h.timeline_delete_force(tenant_id_other, timeline_id_other)
with pytest.raises(sk_h.HTTPError, match='Forbidden|Unauthorized'):
assert sk_h.tenant_delete_force(tenant_id_other)
assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir()
# Remove initial tenant's br2 (inactive)
assert sk_http.timeline_delete_force(tenant_id, timeline_id_2) == {
"dir_existed": True,
@@ -1001,7 +1062,7 @@ def test_delete_force(zenith_env_builder: ZenithEnvBuilder):
assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir()
# Ensure the other tenant still works
sk_http.timeline_status(tenant_id_other, timeline_id_other)
sk_http_other.timeline_status(tenant_id_other, timeline_id_other)
with closing(pg_other.connect()) as conn:
with conn.cursor() as cur:
cur.execute('INSERT INTO t (key) VALUES (123)')

View File

@@ -4,7 +4,7 @@ import asyncpg
import random
import time
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres, Safekeeper
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper
from fixtures.log_helper import getLogger
from fixtures.utils import lsn_from_hex, lsn_to_hex
from typing import List
@@ -136,7 +136,7 @@ async def wait_for_lsn(safekeeper: Safekeeper,
# On each iteration 1 acceptor is stopped, and 2 others should allow
# background workers execute transactions. In the end, state should remain
# consistent.
async def run_restarts_under_load(env: ZenithEnv,
async def run_restarts_under_load(env: NeonEnv,
pg: Postgres,
acceptors: List[Safekeeper],
n_workers=10,
@@ -202,11 +202,11 @@ async def run_restarts_under_load(env: ZenithEnv,
# Restart acceptors one by one, while executing and validating bank transactions
def test_restarts_under_load(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.num_safekeepers = 3
env = zenith_env_builder.init_start()
def test_restarts_under_load(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.zenith_cli.create_branch('test_safekeepers_restarts_under_load')
env.neon_cli.create_branch('test_safekeepers_restarts_under_load')
# Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
pg = env.postgres.create_start('test_safekeepers_restarts_under_load',
config_lines=['max_replication_write_lag=1MB'])
@@ -217,11 +217,11 @@ def test_restarts_under_load(zenith_env_builder: ZenithEnvBuilder):
# Restart acceptors one by one and test that everything is working as expected
# when checkpoins are triggered frequently by max_wal_size=32MB. Because we have
# wal_keep_size=0, there will be aggressive WAL segments recycling.
def test_restarts_frequent_checkpoints(zenith_env_builder: ZenithEnvBuilder):
zenith_env_builder.num_safekeepers = 3
env = zenith_env_builder.init_start()
def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.zenith_cli.create_branch('test_restarts_frequent_checkpoints')
env.neon_cli.create_branch('test_restarts_frequent_checkpoints')
# Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
pg = env.postgres.create_start('test_restarts_frequent_checkpoints',
config_lines=[

View File

@@ -1,26 +1,26 @@
import os
import subprocess
from fixtures.zenith_fixtures import (ZenithEnvBuilder,
VanillaPostgres,
PortDistributor,
PgBin,
base_dir,
vanilla_pg,
pg_distrib_dir)
from fixtures.neon_fixtures import (NeonEnvBuilder,
VanillaPostgres,
PortDistributor,
PgBin,
base_dir,
vanilla_pg,
pg_distrib_dir)
from fixtures.log_helper import log
def test_wal_restore(zenith_env_builder: ZenithEnvBuilder,
def test_wal_restore(neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
test_output_dir,
port_distributor: PortDistributor):
env = zenith_env_builder.init_start()
env.zenith_cli.create_branch("test_wal_restore")
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_wal_restore")
pg = env.postgres.create_start('test_wal_restore')
pg.safe_psql("create table t as select generate_series(1,300000)")
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
env.zenith_cli.pageserver_stop()
env.neon_cli.pageserver_stop()
port = port_distributor.get_port()
data_dir = os.path.join(test_output_dir, 'pgsql.restored')
with VanillaPostgres(data_dir, PgBin(test_output_dir), port) as restored:

View File

@@ -1,16 +1,16 @@
import os
import pytest
from fixtures.utils import mkdir_if_needed
from fixtures.zenith_fixtures import ZenithEnv, base_dir, pg_distrib_dir
from fixtures.neon_fixtures import NeonEnv, base_dir, pg_distrib_dir
# The isolation tests run for a long time, especially in debug mode,
# so use a larger-than-default timeout.
@pytest.mark.timeout(1800)
def test_isolation(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys):
env = zenith_simple_env
def test_isolation(neon_simple_env: NeonEnv, test_output_dir, pg_bin, capsys):
env = neon_simple_env
env.zenith_cli.create_branch("test_isolation", "empty")
env.neon_cli.create_branch("test_isolation", "empty")
# Connect to postgres and create a database called "regression".
# isolation tests use prepared transactions, so enable them
pg = env.postgres.create_start('test_isolation', config_lines=['max_prepared_transactions=100'])

View File

@@ -1,19 +1,19 @@
import os
from fixtures.utils import mkdir_if_needed
from fixtures.zenith_fixtures import (ZenithEnv,
check_restored_datadir_content,
base_dir,
pg_distrib_dir)
from fixtures.neon_fixtures import (NeonEnv,
check_restored_datadir_content,
base_dir,
pg_distrib_dir)
from fixtures.log_helper import log
def test_zenith_regress(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys):
env = zenith_simple_env
def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir, pg_bin, capsys):
env = neon_simple_env
env.zenith_cli.create_branch("test_zenith_regress", "empty")
env.neon_cli.create_branch("test_neon_regress", "empty")
# Connect to postgres and create a database called "regression".
pg = env.postgres.create_start('test_zenith_regress')
pg = env.postgres.create_start('test_neon_regress')
pg.safe_psql('CREATE DATABASE regression')
# Create some local directories for pg_regress to run in.
@@ -22,9 +22,9 @@ def test_zenith_regress(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, c
mkdir_if_needed(os.path.join(runpath, 'testtablespace'))
# Compute all the file locations that pg_regress will need.
# This test runs zenith specific tests
# This test runs neon specific tests
build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress')
src_path = os.path.join(base_dir, 'test_runner/zenith_regress')
src_path = os.path.join(base_dir, 'test_runner/neon_regress')
bindir = os.path.join(pg_distrib_dir, 'bin')
schedule = os.path.join(src_path, 'parallel_schedule')
pg_regress = os.path.join(build_path, 'pg_regress')

View File

@@ -1,16 +1,16 @@
import os
import pytest
from fixtures.utils import mkdir_if_needed
from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content, base_dir, pg_distrib_dir
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content, base_dir, pg_distrib_dir
# The pg_regress tests run for a long time, especially in debug mode,
# so use a larger-than-default timeout.
@pytest.mark.timeout(1800)
def test_pg_regress(zenith_simple_env: ZenithEnv, test_output_dir: str, pg_bin, capsys):
env = zenith_simple_env
def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: str, pg_bin, capsys):
env = neon_simple_env
env.zenith_cli.create_branch("test_pg_regress", "empty")
env.neon_cli.create_branch("test_pg_regress", "empty")
# Connect to postgres and create a database called "regression".
pg = env.postgres.create_start('test_pg_regress')
pg.safe_psql('CREATE DATABASE regression')

View File

@@ -1,6 +1,5 @@
pytest_plugins = (
"fixtures.zenith_fixtures",
"fixtures.benchmark_fixture",
"fixtures.compare_fixtures",
"fixtures.slow",
)
pytest_plugins = ("fixtures.neon_fixtures",
"fixtures.benchmark_fixture",
"fixtures.compare_fixtures",
"fixtures.slow",
"fixtures.pg_stats")

View File

@@ -25,9 +25,9 @@ To use, declare the 'zenbenchmark' fixture in the test function. Run the
bencmark, and then record the result by calling zenbenchmark.record. For example:
import timeit
from fixtures.zenith_fixtures import ZenithEnv
from fixtures.neon_fixtures import NeonEnv
def test_mybench(zenith_simple_env: env, zenbenchmark):
def test_mybench(neon_simple_env: env, zenbenchmark):
# Initialize the test
...
@@ -142,7 +142,7 @@ class MetricReport(str, enum.Enum): # str is a hack to make it json serializabl
LOWER_IS_BETTER = 'lower_is_better'
class ZenithBenchmarker:
class NeonBenchmarker:
"""
An object for recording benchmark results. This is created for each test
function by the zenbenchmark fixture
@@ -163,7 +163,7 @@ class ZenithBenchmarker:
Record a benchmark result.
"""
# just to namespace the value
name = f"zenith_benchmarker_{metric_name}"
name = f"neon_benchmarker_{metric_name}"
self.property_recorder(
name,
{
@@ -289,12 +289,12 @@ class ZenithBenchmarker:
@pytest.fixture(scope="function")
def zenbenchmark(record_property) -> Iterator[ZenithBenchmarker]:
def zenbenchmark(record_property) -> Iterator[NeonBenchmarker]:
"""
This is a python decorator for benchmark fixtures. It contains functions for
recording measurements, and prints them out at the end.
"""
benchmarker = ZenithBenchmarker(record_property)
benchmarker = NeonBenchmarker(record_property)
yield benchmarker

View File

@@ -1,18 +1,19 @@
import pytest
from contextlib import contextmanager
from abc import ABC, abstractmethod
from fixtures.pg_stats import PgStatTable
from fixtures.zenith_fixtures import PgBin, PgProtocol, VanillaPostgres, RemotePostgres, ZenithEnv
from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
from fixtures.neon_fixtures import PgBin, PgProtocol, VanillaPostgres, RemotePostgres, NeonEnv
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
# Type-related stuff
from typing import Iterator
from typing import Dict, List
class PgCompare(ABC):
"""Common interface of all postgres implementations, useful for benchmarks.
This class is a helper class for the zenith_with_baseline fixture. See its documentation
This class is a helper class for the neon_with_baseline fixture. See its documentation
for more details.
"""
@property
@@ -26,7 +27,7 @@ class PgCompare(ABC):
pass
@property
def zenbenchmark(self) -> ZenithBenchmarker:
def zenbenchmark(self) -> NeonBenchmarker:
pass
@abstractmethod
@@ -51,20 +52,45 @@ class PgCompare(ABC):
def record_duration(self, out_name):
pass
@contextmanager
def record_pg_stats(self, pg_stats: List[PgStatTable]):
init_data = self._retrieve_pg_stats(pg_stats)
class ZenithCompare(PgCompare):
"""PgCompare interface for the zenith stack."""
yield
data = self._retrieve_pg_stats(pg_stats)
for k in set(init_data) & set(data):
self.zenbenchmark.record(k, data[k] - init_data[k], '', MetricReport.HIGHER_IS_BETTER)
def _retrieve_pg_stats(self, pg_stats: List[PgStatTable]) -> Dict[str, int]:
results: Dict[str, int] = {}
with self.pg.connect().cursor() as cur:
for pg_stat in pg_stats:
cur.execute(pg_stat.query)
row = cur.fetchone()
assert len(row) == len(pg_stat.columns)
for col, val in zip(pg_stat.columns, row):
results[f"{pg_stat.table}.{col}"] = int(val)
return results
class NeonCompare(PgCompare):
"""PgCompare interface for the neon stack."""
def __init__(self,
zenbenchmark: ZenithBenchmarker,
zenith_simple_env: ZenithEnv,
zenbenchmark: NeonBenchmarker,
neon_simple_env: NeonEnv,
pg_bin: PgBin,
branch_name):
self.env = zenith_simple_env
self.env = neon_simple_env
self._zenbenchmark = zenbenchmark
self._pg_bin = pg_bin
# We only use one branch and one timeline
self.env.zenith_cli.create_branch(branch_name, 'empty')
self.env.neon_cli.create_branch(branch_name, 'empty')
self._pg = self.env.postgres.create_start(branch_name)
self.timeline = self.pg.safe_psql("SHOW neon.timeline_id")[0][0]
@@ -221,9 +247,9 @@ class RemoteCompare(PgCompare):
@pytest.fixture(scope='function')
def zenith_compare(request, zenbenchmark, pg_bin, zenith_simple_env) -> ZenithCompare:
def neon_compare(request, zenbenchmark, pg_bin, neon_simple_env) -> NeonCompare:
branch_name = request.node.name
return ZenithCompare(zenbenchmark, zenith_simple_env, pg_bin, branch_name)
return NeonCompare(zenbenchmark, neon_simple_env, pg_bin, branch_name)
@pytest.fixture(scope='function')
@@ -236,13 +262,13 @@ def remote_compare(zenbenchmark, remote_pg) -> RemoteCompare:
return RemoteCompare(zenbenchmark, remote_pg)
@pytest.fixture(params=["vanilla_compare", "zenith_compare"], ids=["vanilla", "zenith"])
def zenith_with_baseline(request) -> PgCompare:
"""Parameterized fixture that helps compare zenith against vanilla postgres.
@pytest.fixture(params=["vanilla_compare", "neon_compare"], ids=["vanilla", "neon"])
def neon_with_baseline(request) -> PgCompare:
"""Parameterized fixture that helps compare neon against vanilla postgres.
A test that uses this fixture turns into a parameterized test that runs against:
1. A vanilla postgres instance
2. A simple zenith env (see zenith_simple_env)
2. A simple neon env (see neon_simple_env)
3. Possibly other postgres protocol implementations.
The main goal of this fixture is to make it easier for people to read and write
@@ -254,7 +280,7 @@ def zenith_with_baseline(request) -> PgCompare:
of that.
If a test requires some one-off special implementation-specific logic, use of
isinstance(zenith_with_baseline, ZenithCompare) is encouraged. Though if that
isinstance(neon_with_baseline, NeonCompare) is encouraged. Though if that
implementation-specific logic is widely useful across multiple tests, it might
make sense to add methods to the PgCompare class.
"""

View File

@@ -29,7 +29,7 @@ from dataclasses import dataclass
# Type-related stuff
from psycopg2.extensions import connection as PgConnection
from psycopg2.extensions import make_dsn, parse_dsn
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, TypeVar, cast, Union, Tuple
from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple
from typing_extensions import Literal
import requests
@@ -81,7 +81,7 @@ def pytest_addoption(parser):
# These are set in pytest_configure()
base_dir = ""
zenith_binpath = ""
neon_binpath = ""
pg_distrib_dir = ""
top_output_dir = ""
@@ -100,7 +100,7 @@ def check_interferring_processes(config):
# result of the test.
# NOTE this shows as an internal pytest error, there might be a better way
raise Exception(
'Found interfering processes running. Stop all Zenith pageservers, nodes, safekeepers, as well as stand-alone Postgres.'
'Found interfering processes running. Stop all Neon pageservers, nodes, safekeepers, as well as stand-alone Postgres.'
)
@@ -146,25 +146,25 @@ def pytest_configure(config):
raise Exception('postgres not found at "{}"'.format(pg_distrib_dir))
if os.getenv("REMOTE_ENV"):
# we are in remote env and do not have zenith binaries locally
# we are in remote env and do not have neon binaries locally
# this is the case for benchmarks run on self-hosted runner
return
# Find the zenith binaries.
global zenith_binpath
env_zenith_bin = os.environ.get('ZENITH_BIN')
if env_zenith_bin:
zenith_binpath = env_zenith_bin
# Find the neon binaries.
global neon_binpath
env_neon_bin = os.environ.get('ZENITH_BIN')
if env_neon_bin:
neon_binpath = env_neon_bin
else:
zenith_binpath = os.path.join(base_dir, 'target/debug')
log.info(f'zenith_binpath is {zenith_binpath}')
if not os.path.exists(os.path.join(zenith_binpath, 'pageserver')):
raise Exception('zenith binaries not found at "{}"'.format(zenith_binpath))
neon_binpath = os.path.join(base_dir, 'target/debug')
log.info(f'neon_binpath is {neon_binpath}')
if not os.path.exists(os.path.join(neon_binpath, 'pageserver')):
raise Exception('neon binaries not found at "{}"'.format(neon_binpath))
def profiling_supported():
"""Return True if the pageserver was compiled with the 'profiling' feature
"""
bin_pageserver = os.path.join(str(zenith_binpath), 'pageserver')
bin_pageserver = os.path.join(str(neon_binpath), 'pageserver')
res = subprocess.run([bin_pageserver, '--version'],
check=True,
universal_newlines=True,
@@ -223,7 +223,7 @@ def can_bind(host: str, port: int) -> bool:
# TODO: The pageserver and safekeepers don't use SO_REUSEADDR at the
# moment. If that changes, we should use start using SO_REUSEADDR here
# too, to allow reusing ports more quickly.
# See https://github.com/zenithdb/zenith/issues/801
# See https://github.com/neondatabase/neon/issues/801
#sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
try:
@@ -479,27 +479,30 @@ class RemoteStorageUsers(Flag):
SAFEKEEPER = auto()
class ZenithEnvBuilder:
class NeonEnvBuilder:
"""
Builder object to create a Zenith runtime environment
Builder object to create a Neon runtime environment
You should use the `zenith_env_builder` or `zenith_simple_env` pytest
fixture to create the ZenithEnv object. That way, the repository is
You should use the `neon_env_builder` or `neon_simple_env` pytest
fixture to create the NeonEnv object. That way, the repository is
created in the right directory, based on the test name, and it's properly
cleaned up after the test has finished.
"""
def __init__(self,
repo_dir: Path,
port_distributor: PortDistributor,
broker: Etcd,
mock_s3_server: MockS3Server,
remote_storage: Optional[RemoteStorage] = None,
remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
pageserver_config_override: Optional[str] = None,
num_safekeepers: int = 1,
pageserver_auth_enabled: bool = False,
rust_log_override: Optional[str] = None,
default_branch_name=DEFAULT_BRANCH_NAME):
def __init__(
self,
repo_dir: Path,
port_distributor: PortDistributor,
broker: Etcd,
mock_s3_server: MockS3Server,
remote_storage: Optional[RemoteStorage] = None,
remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
pageserver_config_override: Optional[str] = None,
num_safekeepers: int = 1,
# Use non-standard SK ids to check for various parsing bugs
safekeepers_id_start: int = 0,
auth_enabled: bool = False,
rust_log_override: Optional[str] = None,
default_branch_name=DEFAULT_BRANCH_NAME):
self.repo_dir = repo_dir
self.rust_log_override = rust_log_override
self.port_distributor = port_distributor
@@ -509,20 +512,21 @@ class ZenithEnvBuilder:
self.mock_s3_server = mock_s3_server
self.pageserver_config_override = pageserver_config_override
self.num_safekeepers = num_safekeepers
self.pageserver_auth_enabled = pageserver_auth_enabled
self.safekeepers_id_start = safekeepers_id_start
self.auth_enabled = auth_enabled
self.default_branch_name = default_branch_name
self.env: Optional[ZenithEnv] = None
self.env: Optional[NeonEnv] = None
def init(self) -> ZenithEnv:
def init(self) -> NeonEnv:
# Cannot create more than one environment from one builder
assert self.env is None, "environment already initialized"
self.env = ZenithEnv(self)
self.env = NeonEnv(self)
return self.env
def start(self):
self.env.start()
def init_start(self) -> ZenithEnv:
def init_start(self) -> NeonEnv:
env = self.init()
self.start()
return env
@@ -571,12 +575,12 @@ class ZenithEnvBuilder:
self.env.pageserver.stop(immediate=True)
class ZenithEnv:
class NeonEnv:
"""
An object representing the Zenith runtime environment. It consists of
An object representing the Neon runtime environment. It consists of
the page server, 0-N safekeepers, and the compute nodes.
ZenithEnv contains functions for stopping/starting nodes in the
NeonEnv contains functions for stopping/starting nodes in the
environment, checking their status, creating tenants, connecting to the
nodes, creating and destroying compute nodes, etc. The page server and
the safekeepers are considered fixed in the environment, you cannot
@@ -584,7 +588,7 @@ class ZenithEnv:
likely change in the future, as we start supporting multiple page
servers and adding/removing safekeepers on the fly).
Some notable functions and fields in ZenithEnv:
Some notable functions and fields in NeonEnv:
postgres - A factory object for creating postgres compute nodes.
@@ -598,24 +602,24 @@ class ZenithEnv:
initial_tenant - tenant ID of the initial tenant created in the repository
zenith_cli - can be used to run the 'zenith' CLI tool
neon_cli - can be used to run the 'neon' CLI tool
create_tenant() - initializes a new tenant in the page server, returns
the tenant id
"""
def __init__(self, config: ZenithEnvBuilder):
def __init__(self, config: NeonEnvBuilder):
self.repo_dir = config.repo_dir
self.rust_log_override = config.rust_log_override
self.port_distributor = config.port_distributor
self.s3_mock_server = config.mock_s3_server
self.zenith_cli = ZenithCli(env=self)
self.neon_cli = NeonCli(env=self)
self.postgres = PostgresFactory(self)
self.safekeepers: List[Safekeeper] = []
self.broker = config.broker
self.remote_storage = config.remote_storage
self.remote_storage_users = config.remote_storage_users
# generate initial tenant ID here instead of letting 'zenith init' generate it,
# generate initial tenant ID here instead of letting 'neon init' generate it,
# so that we don't need to dig it out of the config file afterwards.
self.initial_tenant = uuid.uuid4()
@@ -635,7 +639,7 @@ class ZenithEnv:
pg=self.port_distributor.get_port(),
http=self.port_distributor.get_port(),
)
pageserver_auth_type = "ZenithJWT" if config.pageserver_auth_enabled else "Trust"
pageserver_auth_type = "ZenithJWT" if config.auth_enabled else "Trust"
toml += textwrap.dedent(f"""
[pageserver]
@@ -645,10 +649,10 @@ class ZenithEnv:
auth_type = '{pageserver_auth_type}'
""")
# Create a corresponding ZenithPageserver object
self.pageserver = ZenithPageserver(self,
port=pageserver_port,
config_override=config.pageserver_config_override)
# Create a corresponding NeonPageserver object
self.pageserver = NeonPageserver(self,
port=pageserver_port,
config_override=config.pageserver_config_override)
# Create config and a Safekeeper object for each safekeeper
for i in range(1, config.num_safekeepers + 1):
@@ -656,13 +660,17 @@ class ZenithEnv:
pg=self.port_distributor.get_port(),
http=self.port_distributor.get_port(),
)
id = i # assign ids sequentially
id = config.safekeepers_id_start + i # assign ids sequentially
toml += textwrap.dedent(f"""
[[safekeepers]]
id = {id}
pg_port = {port.pg}
http_port = {port.http}
sync = false # Disable fsyncs to make the tests go faster""")
if config.auth_enabled:
toml += textwrap.dedent(f"""
auth_enabled = true
""")
if bool(self.remote_storage_users
& RemoteStorageUsers.SAFEKEEPER) and self.remote_storage is not None:
toml += textwrap.dedent(f"""
@@ -672,7 +680,7 @@ class ZenithEnv:
self.safekeepers.append(safekeeper)
log.info(f"Config: {toml}")
self.zenith_cli.init(toml)
self.neon_cli.init(toml)
def start(self):
# Start up broker, pageserver and all safekeepers
@@ -697,10 +705,10 @@ class ZenithEnv:
def _shared_simple_env(request: Any,
port_distributor: PortDistributor,
mock_s3_server: MockS3Server,
default_broker: Etcd) -> Iterator[ZenithEnv]:
default_broker: Etcd) -> Iterator[NeonEnv]:
"""
# Internal fixture backing the `zenith_simple_env` fixture. If TEST_SHARED_FIXTURES
is set, this is shared by all tests using `zenith_simple_env`.
# Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES
is set, this is shared by all tests using `neon_simple_env`.
"""
if os.environ.get('TEST_SHARED_FIXTURES') is None:
@@ -711,23 +719,23 @@ def _shared_simple_env(request: Any,
repo_dir = os.path.join(str(top_output_dir), "shared_repo")
shutil.rmtree(repo_dir, ignore_errors=True)
with ZenithEnvBuilder(Path(repo_dir), port_distributor, default_broker,
mock_s3_server) as builder:
with NeonEnvBuilder(Path(repo_dir), port_distributor, default_broker,
mock_s3_server) as builder:
env = builder.init_start()
# For convenience in tests, create a branch from the freshly-initialized cluster.
env.zenith_cli.create_branch('empty', ancestor_branch_name=DEFAULT_BRANCH_NAME)
env.neon_cli.create_branch('empty', ancestor_branch_name=DEFAULT_BRANCH_NAME)
yield env
@pytest.fixture(scope='function')
def zenith_simple_env(_shared_simple_env: ZenithEnv) -> Iterator[ZenithEnv]:
def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:
"""
Simple Zenith environment, with no authentication and no safekeepers.
Simple Neon environment, with no authentication and no safekeepers.
If TEST_SHARED_FIXTURES environment variable is set, we reuse the same
environment for all tests that use 'zenith_simple_env', keeping the
environment for all tests that use 'neon_simple_env', keeping the
page server and safekeepers running. Any compute nodes are stopped after
each the test, however.
"""
@@ -737,17 +745,17 @@ def zenith_simple_env(_shared_simple_env: ZenithEnv) -> Iterator[ZenithEnv]:
@pytest.fixture(scope='function')
def zenith_env_builder(test_output_dir,
port_distributor: PortDistributor,
mock_s3_server: MockS3Server,
default_broker: Etcd) -> Iterator[ZenithEnvBuilder]:
def neon_env_builder(test_output_dir,
port_distributor: PortDistributor,
mock_s3_server: MockS3Server,
default_broker: Etcd) -> Iterator[NeonEnvBuilder]:
"""
Fixture to create a Zenith environment for test.
Fixture to create a Neon environment for test.
To use, define 'zenith_env_builder' fixture in your test to get access to the
To use, define 'neon_env_builder' fixture in your test to get access to the
builder object. Set properties on it to describe the environment.
Finally, initialize and start up the environment by calling
zenith_env_builder.init_start().
neon_env_builder.init_start().
After the initialization, you can launch compute nodes by calling
the functions in the 'env.postgres' factory object, stop/start the
@@ -758,16 +766,16 @@ def zenith_env_builder(test_output_dir,
repo_dir = os.path.join(test_output_dir, "repo")
# Return the builder to the caller
with ZenithEnvBuilder(Path(repo_dir), port_distributor, default_broker,
mock_s3_server) as builder:
with NeonEnvBuilder(Path(repo_dir), port_distributor, default_broker,
mock_s3_server) as builder:
yield builder
class ZenithPageserverApiException(Exception):
class NeonPageserverApiException(Exception):
pass
class ZenithPageserverHttpClient(requests.Session):
class NeonPageserverHttpClient(requests.Session):
def __init__(self, port: int, auth_token: Optional[str] = None):
super().__init__()
self.port = port
@@ -784,7 +792,7 @@ class ZenithPageserverHttpClient(requests.Session):
msg = res.json()['msg']
except:
msg = ''
raise ZenithPageserverApiException(msg) from e
raise NeonPageserverApiException(msg) from e
def check_status(self):
self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
@@ -891,12 +899,12 @@ TIMELINE_DATA_EXTRACTOR = re.compile(r"\s(?P<branch_name>[^\s]+)\s\[(?P<timeline
re.MULTILINE)
class ZenithCli:
class NeonCli:
"""
A typed wrapper around the `zenith` CLI tool.
A typed wrapper around the `neon` CLI tool.
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
"""
def __init__(self, env: ZenithEnv):
def __init__(self, env: NeonEnv):
self.env = env
pass
@@ -982,7 +990,7 @@ class ZenithCli:
created_timeline_id = matches.group('timeline_id')
if created_timeline_id is None:
raise Exception('could not find timeline id after `zenith timeline create` invocation')
raise Exception('could not find timeline id after `neon timeline create` invocation')
else:
return uuid.UUID(created_timeline_id)
@@ -1014,13 +1022,13 @@ class ZenithCli:
created_timeline_id = matches.group('timeline_id')
if created_timeline_id is None:
raise Exception('could not find timeline id after `zenith timeline create` invocation')
raise Exception('could not find timeline id after `neon timeline create` invocation')
else:
return uuid.UUID(created_timeline_id)
def list_timelines(self, tenant_id: Optional[uuid.UUID] = None) -> List[Tuple[str, str]]:
"""
Returns a list of (branch_name, timeline_id) tuples out of parsed `zenith timeline list` CLI output.
Returns a list of (branch_name, timeline_id) tuples out of parsed `neon timeline list` CLI output.
"""
# (L) main [b49f7954224a0ad25cc0013ea107b54b]
@@ -1053,7 +1061,7 @@ class ZenithCli:
return res
def pageserver_enabled_features(self) -> Any:
bin_pageserver = os.path.join(str(zenith_binpath), 'pageserver')
bin_pageserver = os.path.join(str(neon_binpath), 'pageserver')
args = [bin_pageserver, '--enabled-features']
log.info('Running command "{}"'.format(' '.join(args)))
@@ -1093,7 +1101,7 @@ class ZenithCli:
immediate=False) -> 'subprocess.CompletedProcess[str]':
args = ['safekeeper', 'stop']
if id is not None:
args.extend(str(id))
args.append(str(id))
if immediate:
args.extend(['-m', 'immediate'])
return self.raw_cli(args)
@@ -1173,27 +1181,27 @@ class ZenithCli:
extra_env_vars: Optional[Dict[str, str]] = None,
check_return_code=True) -> 'subprocess.CompletedProcess[str]':
"""
Run "zenith" with the specified arguments.
Run "neon" with the specified arguments.
Arguments must be in list form, e.g. ['pg', 'create']
Return both stdout and stderr, which can be accessed as
>>> result = env.zenith_cli.raw_cli(...)
>>> result = env.neon_cli.raw_cli(...)
>>> assert result.stderr == ""
>>> log.info(result.stdout)
"""
assert type(arguments) == list
bin_zenith = os.path.join(str(zenith_binpath), 'neon_local')
bin_neon = os.path.join(str(neon_binpath), 'neon_local')
args = [bin_zenith] + arguments
args = [bin_neon] + arguments
log.info('Running command "{}"'.format(' '.join(args)))
log.info(f'Running in "{self.env.repo_dir}"')
env_vars = os.environ.copy()
env_vars['ZENITH_REPO_DIR'] = str(self.env.repo_dir)
env_vars['NEON_REPO_DIR'] = str(self.env.repo_dir)
env_vars['POSTGRES_DISTRIB_DIR'] = str(pg_distrib_dir)
if self.env.rust_log_override is not None:
env_vars['RUST_LOG'] = self.env.rust_log_override
@@ -1231,20 +1239,20 @@ class ZenithCli:
return res
class ZenithPageserver(PgProtocol):
class NeonPageserver(PgProtocol):
"""
An object representing a running pageserver.
Initializes the repository via `zenith init`.
Initializes the repository via `neon init`.
"""
def __init__(self, env: ZenithEnv, port: PageserverPort, config_override: Optional[str] = None):
def __init__(self, env: NeonEnv, port: PageserverPort, config_override: Optional[str] = None):
super().__init__(host='localhost', port=port.pg, user='cloud_admin')
self.env = env
self.running = False
self.service_port = port
self.config_override = config_override
def start(self, overrides=()) -> 'ZenithPageserver':
def start(self, overrides=()) -> 'NeonPageserver':
"""
Start the page server.
`overrides` allows to add some config to this pageserver start.
@@ -1252,17 +1260,17 @@ class ZenithPageserver(PgProtocol):
"""
assert self.running == False
self.env.zenith_cli.pageserver_start(overrides=overrides)
self.env.neon_cli.pageserver_start(overrides=overrides)
self.running = True
return self
def stop(self, immediate=False) -> 'ZenithPageserver':
def stop(self, immediate=False) -> 'NeonPageserver':
"""
Stop the page server.
Returns self.
"""
if self.running:
self.env.zenith_cli.pageserver_stop(immediate)
self.env.neon_cli.pageserver_stop(immediate)
self.running = False
return self
@@ -1272,8 +1280,8 @@ class ZenithPageserver(PgProtocol):
def __exit__(self, exc_type, exc, tb):
self.stop(True)
def http_client(self, auth_token: Optional[str] = None) -> ZenithPageserverHttpClient:
return ZenithPageserverHttpClient(
def http_client(self, auth_token: Optional[str] = None) -> NeonPageserverHttpClient:
return NeonPageserverHttpClient(
port=self.service_port.http,
auth_token=auth_token,
)
@@ -1371,6 +1379,7 @@ class VanillaPostgres(PgProtocol):
self.pg_bin = pg_bin
self.running = False
self.pg_bin.run_capture(['initdb', '-D', pgdatadir])
self.configure([f"port = {port}\n"])
def configure(self, options: List[str]):
"""Append lines into postgresql.conf file."""
@@ -1405,10 +1414,12 @@ class VanillaPostgres(PgProtocol):
@pytest.fixture(scope='function')
def vanilla_pg(test_output_dir: str) -> Iterator[VanillaPostgres]:
def vanilla_pg(test_output_dir: str,
port_distributor: PortDistributor) -> Iterator[VanillaPostgres]:
pgdatadir = os.path.join(test_output_dir, "pgdata-vanilla")
pg_bin = PgBin(test_output_dir)
with VanillaPostgres(pgdatadir, pg_bin, 5432) as vanilla_pg:
port = port_distributor.get_port()
with VanillaPostgres(pgdatadir, pg_bin, port) as vanilla_pg:
yield vanilla_pg
@@ -1453,8 +1464,8 @@ def remote_pg(test_output_dir: str) -> Iterator[RemotePostgres]:
yield remote_pg
class ZenithProxy(PgProtocol):
def __init__(self, port: int):
class NeonProxy(PgProtocol):
def __init__(self, port: int, pg_port: int):
super().__init__(host="127.0.0.1",
user="proxy_user",
password="pytest2",
@@ -1463,18 +1474,20 @@ class ZenithProxy(PgProtocol):
self.http_port = 7001
self.host = "127.0.0.1"
self.port = port
self.pg_port = pg_port
self._popen: Optional[subprocess.Popen[bytes]] = None
def start_static(self, addr="127.0.0.1:5432") -> None:
def start(self) -> None:
assert self._popen is None
# Start proxy
bin_proxy = os.path.join(str(zenith_binpath), 'proxy')
bin_proxy = os.path.join(str(neon_binpath), 'proxy')
args = [bin_proxy]
args.extend(["--http", f"{self.host}:{self.http_port}"])
args.extend(["--proxy", f"{self.host}:{self.port}"])
args.extend(["--auth-backend", "postgres"])
args.extend(["--auth-endpoint", "postgres://proxy_auth:pytest1@localhost:5432/postgres"])
args.extend(
["--auth-endpoint", f"postgres://proxy_auth:pytest1@localhost:{self.pg_port}/postgres"])
self._popen = subprocess.Popen(args)
self._wait_until_ready()
@@ -1493,20 +1506,22 @@ class ZenithProxy(PgProtocol):
@pytest.fixture(scope='function')
def static_proxy(vanilla_pg) -> Iterator[ZenithProxy]:
"""Zenith proxy that routes directly to vanilla postgres."""
def static_proxy(vanilla_pg, port_distributor) -> Iterator[NeonProxy]:
"""Neon proxy that routes directly to vanilla postgres."""
vanilla_pg.start()
vanilla_pg.safe_psql("create user proxy_auth with password 'pytest1' superuser")
vanilla_pg.safe_psql("create user proxy_user with password 'pytest2'")
with ZenithProxy(4432) as proxy:
proxy.start_static()
port = port_distributor.get_port()
pg_port = vanilla_pg.default_options['port']
with NeonProxy(port, pg_port) as proxy:
proxy.start()
yield proxy
class Postgres(PgProtocol):
""" An object representing a running postgres daemon. """
def __init__(self, env: ZenithEnv, tenant_id: uuid.UUID, port: int):
def __init__(self, env: NeonEnv, tenant_id: uuid.UUID, port: int):
super().__init__(host='localhost', port=port, user='cloud_admin', dbname='postgres')
self.env = env
self.running = False
@@ -1532,11 +1547,11 @@ class Postgres(PgProtocol):
config_lines = []
self.node_name = node_name or f'{branch_name}_pg_node'
self.env.zenith_cli.pg_create(branch_name,
node_name=self.node_name,
tenant_id=self.tenant_id,
lsn=lsn,
port=self.port)
self.env.neon_cli.pg_create(branch_name,
node_name=self.node_name,
tenant_id=self.tenant_id,
lsn=lsn,
port=self.port)
path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id.hex / self.node_name
self.pgdata_dir = os.path.join(self.env.repo_dir, path)
@@ -1560,9 +1575,9 @@ class Postgres(PgProtocol):
log.info(f"Starting postgres node {self.node_name}")
run_result = self.env.zenith_cli.pg_start(self.node_name,
tenant_id=self.tenant_id,
port=self.port)
run_result = self.env.neon_cli.pg_start(self.node_name,
tenant_id=self.tenant_id,
port=self.port)
self.running = True
log.info(f"stdout: {run_result.stdout}")
@@ -1630,7 +1645,7 @@ class Postgres(PgProtocol):
if self.running:
assert self.node_name is not None
self.env.zenith_cli.pg_stop(self.node_name, self.tenant_id)
self.env.neon_cli.pg_stop(self.node_name, self.tenant_id)
self.running = False
return self
@@ -1642,7 +1657,7 @@ class Postgres(PgProtocol):
"""
assert self.node_name is not None
self.env.zenith_cli.pg_stop(self.node_name, self.tenant_id, True)
self.env.neon_cli.pg_stop(self.node_name, self.tenant_id, True)
self.node_name = None
self.running = False
@@ -1679,7 +1694,7 @@ class Postgres(PgProtocol):
class PostgresFactory:
""" An object representing multiple running postgres daemons. """
def __init__(self, env: ZenithEnv):
def __init__(self, env: NeonEnv):
self.env = env
self.num_instances = 0
self.instances: List[Postgres] = []
@@ -1750,15 +1765,14 @@ class SafekeeperPort:
@dataclass
class Safekeeper:
""" An object representing a running safekeeper daemon. """
env: ZenithEnv
env: NeonEnv
port: SafekeeperPort
id: int
auth_token: Optional[str] = None
running: bool = False
def start(self) -> 'Safekeeper':
assert self.running == False
self.env.zenith_cli.safekeeper_start(self.id)
self.env.neon_cli.safekeeper_start(self.id)
self.running = True
# wait for wal acceptor start by checking its status
started_at = time.time()
@@ -1778,7 +1792,7 @@ class Safekeeper:
def stop(self, immediate=False) -> 'Safekeeper':
log.info('Stopping safekeeper {}'.format(self.id))
self.env.zenith_cli.safekeeper_stop(self.id, immediate)
self.env.neon_cli.safekeeper_stop(self.id, immediate)
self.running = False
return self
@@ -1809,8 +1823,8 @@ class Safekeeper:
assert isinstance(res, dict)
return res
def http_client(self) -> SafekeeperHttpClient:
return SafekeeperHttpClient(port=self.port.http)
def http_client(self, auth_token: Optional[str] = None) -> SafekeeperHttpClient:
return SafekeeperHttpClient(port=self.port.http, auth_token=auth_token)
def data_dir(self) -> str:
return os.path.join(self.env.repo_dir, "safekeepers", f"sk{self.id}")
@@ -1834,9 +1848,15 @@ class SafekeeperMetrics:
class SafekeeperHttpClient(requests.Session):
def __init__(self, port: int):
HTTPError = requests.HTTPError
def __init__(self, port: int, auth_token: Optional[str] = None):
super().__init__()
self.port = port
self.auth_token = auth_token
if auth_token is not None:
self.headers['Authorization'] = f'Bearer {auth_token}'
def check_status(self):
self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
@@ -1966,7 +1986,7 @@ def get_test_output_dir(request: Any) -> str:
# This is autouse, so the test output directory always gets created, even
# if a test doesn't put anything there. It also solves a problem with the
# zenith_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it
# neon_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it
# creates the repo in the test output directory. But it cannot depend on
# 'test_output_dir' fixture, because when TEST_SHARED_FIXTURES is not set,
# it has 'session' scope and cannot access fixtures with 'function'
@@ -2044,7 +2064,7 @@ def list_files_to_compare(pgdata_dir: str):
# pg is the existing and running compute node, that we want to compare with a basebackup
def check_restored_datadir_content(test_output_dir: str, env: ZenithEnv, pg: Postgres):
def check_restored_datadir_content(test_output_dir: str, env: NeonEnv, pg: Postgres):
# Get the timeline ID. We need it for the 'basebackup' command
with closing(pg.connect()) as conn:
@@ -2134,7 +2154,7 @@ def wait_until(number_of_iterations: int, interval: int, func):
raise Exception("timed out while waiting for %s" % func) from last_exception
def assert_local(pageserver_http_client: ZenithPageserverHttpClient,
def assert_local(pageserver_http_client: NeonPageserverHttpClient,
tenant: uuid.UUID,
timeline: uuid.UUID):
timeline_detail = pageserver_http_client.timeline_detail(tenant, timeline)
@@ -2142,7 +2162,7 @@ def assert_local(pageserver_http_client: ZenithPageserverHttpClient,
return timeline_detail
def remote_consistent_lsn(pageserver_http_client: ZenithPageserverHttpClient,
def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient,
tenant: uuid.UUID,
timeline: uuid.UUID) -> int:
detail = pageserver_http_client.timeline_detail(tenant, timeline)
@@ -2158,7 +2178,7 @@ def remote_consistent_lsn(pageserver_http_client: ZenithPageserverHttpClient,
return lsn_from_hex(lsn_str)
def wait_for_upload(pageserver_http_client: ZenithPageserverHttpClient,
def wait_for_upload(pageserver_http_client: NeonPageserverHttpClient,
tenant: uuid.UUID,
timeline: uuid.UUID,
lsn: int):
@@ -2174,7 +2194,7 @@ def wait_for_upload(pageserver_http_client: ZenithPageserverHttpClient,
lsn_to_hex(lsn), lsn_to_hex(current_lsn)))
def last_record_lsn(pageserver_http_client: ZenithPageserverHttpClient,
def last_record_lsn(pageserver_http_client: NeonPageserverHttpClient,
tenant: uuid.UUID,
timeline: uuid.UUID) -> int:
detail = pageserver_http_client.timeline_detail(tenant, timeline)
@@ -2184,7 +2204,7 @@ def last_record_lsn(pageserver_http_client: ZenithPageserverHttpClient,
return lsn_from_hex(lsn_str)
def wait_for_last_record_lsn(pageserver_http_client: ZenithPageserverHttpClient,
def wait_for_last_record_lsn(pageserver_http_client: NeonPageserverHttpClient,
tenant: uuid.UUID,
timeline: uuid.UUID,
lsn: int):

View File

@@ -0,0 +1,52 @@
from typing import List
import pytest
class PgStatTable:
table: str
columns: List[str]
additional_query: str
def __init__(self, table: str, columns: List[str], filter_query: str = ""):
self.table = table
self.columns = columns
self.additional_query = filter_query
@property
def query(self) -> str:
return f"SELECT {','.join(self.columns)} FROM {self.table} {self.additional_query}"
@pytest.fixture(scope='function')
def pg_stats_rw() -> List[PgStatTable]:
return [
PgStatTable("pg_stat_database",
["tup_returned", "tup_fetched", "tup_inserted", "tup_updated", "tup_deleted"],
"WHERE datname='postgres'"),
]
@pytest.fixture(scope='function')
def pg_stats_ro() -> List[PgStatTable]:
return [
PgStatTable("pg_stat_database", ["tup_returned", "tup_fetched"],
"WHERE datname='postgres'"),
]
@pytest.fixture(scope='function')
def pg_stats_wo() -> List[PgStatTable]:
return [
PgStatTable("pg_stat_database", ["tup_inserted", "tup_updated", "tup_deleted"],
"WHERE datname='postgres'"),
]
@pytest.fixture(scope='function')
def pg_stats_wal() -> List[PgStatTable]:
return [
PgStatTable("pg_stat_wal",
["wal_records", "wal_fpi", "wal_bytes", "wal_buffers_full", "wal_write"],
"")
]

Some files were not shown because too many files have changed in this diff Show More