mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-16 11:50:36 +00:00
Compare commits
33 Commits
ps-snapsho
...
added-proj
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2bfef5514e | ||
|
|
9ec312ce98 | ||
|
|
6ec80c0015 | ||
|
|
bcf5cd908e | ||
|
|
5f9924b7f6 | ||
|
|
ec483d705d | ||
|
|
a76fe9bf8a | ||
|
|
0bc9ff571b | ||
|
|
77366148ee | ||
|
|
9aadbc316d | ||
|
|
619515d935 | ||
|
|
b763adaf8a | ||
|
|
1314bb483f | ||
|
|
e1eb53ac59 | ||
|
|
837aeb77ac | ||
|
|
c46bf93808 | ||
|
|
f5e6b1c525 | ||
|
|
a15470e3d6 | ||
|
|
96bda79092 | ||
|
|
98a1a2b3cd | ||
|
|
bf6428971e | ||
|
|
b85d284f08 | ||
|
|
164f8f8124 | ||
|
|
45792c25be | ||
|
|
cffea24d20 | ||
|
|
fc0b51819c | ||
|
|
1d18b813b2 | ||
|
|
fedcc71c01 | ||
|
|
a3238cd69d | ||
|
|
f12fa69c9f | ||
|
|
bbe7bc4dc1 | ||
|
|
c5f3c9bbc7 | ||
|
|
e74d00feb6 |
@@ -750,6 +750,7 @@ workflows:
|
||||
- build-postgres-<< matrix.build_type >>
|
||||
- run-pytest:
|
||||
name: pg_regress-tests-<< matrix.build_type >>
|
||||
context: PERF_TEST_RESULT_CONNSTR
|
||||
matrix:
|
||||
parameters:
|
||||
build_type: ["debug", "release"]
|
||||
|
||||
4
.github/workflows/testing.yml
vendored
4
.github/workflows/testing.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
matrix:
|
||||
# If we want to duplicate this job for different
|
||||
# Rust toolchains (e.g. nightly or 1.37.0), add them here.
|
||||
rust_toolchain: [1.58]
|
||||
rust_toolchain: [stable]
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
timeout-minutes: 30
|
||||
name: run regression test suite
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
target
|
||||
key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
|
||||
key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}
|
||||
|
||||
- name: Run cargo clippy
|
||||
run: ./run_clippy.sh
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -5,7 +5,6 @@
|
||||
__pycache__/
|
||||
test_output/
|
||||
.vscode
|
||||
.idea
|
||||
/.zenith
|
||||
/integration_tests/.zenith
|
||||
|
||||
|
||||
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -811,7 +811,6 @@ name = "etcd_broker"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"etcd-client",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
||||
@@ -188,7 +188,7 @@ Not currently committed but proposed:
|
||||
3. Prefetching
|
||||
- Why?
|
||||
As far as pages in Zenith are loaded on demand, to reduce node startup time
|
||||
and also speedup some massive queries we need some mechanism for bulk loading to
|
||||
and also sppedup some massive queries we need some mechanism for bulk loading to
|
||||
reduce page request round-trip overhead.
|
||||
|
||||
Currently Postgres is supporting prefetching only for bitmap scan.
|
||||
|
||||
@@ -77,7 +77,7 @@ Upon storage node restart recent WAL files are applied to appropriate pages and
|
||||
|
||||
### **Checkpointing**
|
||||
|
||||
No such mechanism is needed. Or we may look at the storage node as at kind of continuous checkpointer.
|
||||
No such mechanism is needed. Or we may look at the storage node as at kind of continuous chekpointer.
|
||||
|
||||
### **Full page writes (torn page protection)**
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_with = "1.12.0"
|
||||
once_cell = "1.8.0"
|
||||
|
||||
utils = { path = "../utils" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
@@ -6,7 +6,6 @@ use std::{
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::{Captures, Regex};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
@@ -57,15 +56,15 @@ pub struct SkTimelineInfo {
|
||||
pub peer_horizon_lsn: Option<Lsn>,
|
||||
#[serde(default)]
|
||||
pub safekeeper_connstr: Option<String>,
|
||||
#[serde(default)]
|
||||
pub pageserver_connstr: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum BrokerError {
|
||||
#[error("Etcd client error: {0}. Context: {1}")]
|
||||
EtcdClient(etcd_client::Error, String),
|
||||
#[error("Error during parsing etcd key: {0}")]
|
||||
InvalidKey(String),
|
||||
#[error("Error during parsing etcd value: {0}")]
|
||||
#[error("Error during parsing etcd data: {0}")]
|
||||
ParsingError(String),
|
||||
#[error("Internal error: {0}")]
|
||||
InternalError(String),
|
||||
@@ -137,6 +136,29 @@ impl SkTimelineSubscriptionKind {
|
||||
}
|
||||
}
|
||||
|
||||
fn watch_regex(&self) -> Regex {
|
||||
match self.kind {
|
||||
SubscriptionKind::All => Regex::new(&format!(
|
||||
r"^{}/([[:xdigit:]]+)/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
|
||||
self.broker_etcd_prefix
|
||||
))
|
||||
.expect("wrong regex for 'everything' subscription"),
|
||||
SubscriptionKind::Tenant(tenant_id) => Regex::new(&format!(
|
||||
r"^{}/{tenant_id}/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
|
||||
self.broker_etcd_prefix
|
||||
))
|
||||
.expect("wrong regex for 'tenant' subscription"),
|
||||
SubscriptionKind::Timeline(ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
}) => Regex::new(&format!(
|
||||
r"^{}/{tenant_id}/{timeline_id}/safekeeper/([[:digit:]])$",
|
||||
self.broker_etcd_prefix
|
||||
))
|
||||
.expect("wrong regex for 'timeline' subscription"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Etcd key to use for watching a certain timeline updates from safekeepers.
|
||||
pub fn watch_key(&self) -> String {
|
||||
match self.kind {
|
||||
@@ -174,7 +196,6 @@ pub async fn subscribe_to_safekeeper_timeline_updates(
|
||||
subscription: SkTimelineSubscriptionKind,
|
||||
) -> Result<SkTimelineSubscription, BrokerError> {
|
||||
info!("Subscribing to timeline updates, subscription kind: {subscription:?}");
|
||||
let kind = subscription.clone();
|
||||
|
||||
let (watcher, mut stream) = client
|
||||
.watch(
|
||||
@@ -190,9 +211,12 @@ pub async fn subscribe_to_safekeeper_timeline_updates(
|
||||
})?;
|
||||
|
||||
let (timeline_updates_sender, safekeeper_timeline_updates) = mpsc::unbounded_channel();
|
||||
|
||||
let subscription_kind = subscription.kind;
|
||||
let regex = subscription.watch_regex();
|
||||
let watcher_handle = tokio::spawn(async move {
|
||||
while let Some(resp) = stream.message().await.map_err(|e| BrokerError::InternalError(format!(
|
||||
"Failed to get messages from the subscription stream, kind: {:?}, error: {e}", subscription.kind
|
||||
"Failed to get messages from the subscription stream, kind: {subscription_kind:?}, error: {e}"
|
||||
)))? {
|
||||
if resp.canceled() {
|
||||
info!("Watch for timeline updates subscription was canceled, exiting");
|
||||
@@ -213,16 +237,9 @@ pub async fn subscribe_to_safekeeper_timeline_updates(
|
||||
if EventType::Put == event.event_type() {
|
||||
if let Some(new_etcd_kv) = event.kv() {
|
||||
let new_kv_version = new_etcd_kv.version();
|
||||
let (key_str, value_str) = match extract_key_value_str(new_etcd_kv) {
|
||||
Ok(strs) => strs,
|
||||
Err(e) => {
|
||||
error!("Failed to represent etcd KV {new_etcd_kv:?} as pair of str: {e}");
|
||||
continue;
|
||||
},
|
||||
};
|
||||
|
||||
match parse_safekeeper_timeline(&subscription, key_str, value_str) {
|
||||
Ok((zttid, timeline)) => {
|
||||
match parse_etcd_key_value(subscription_kind, ®ex, new_etcd_kv) {
|
||||
Ok(Some((zttid, timeline))) => {
|
||||
match timeline_updates
|
||||
.entry(zttid)
|
||||
.or_default()
|
||||
@@ -233,8 +250,6 @@ pub async fn subscribe_to_safekeeper_timeline_updates(
|
||||
if old_etcd_kv_version < new_kv_version {
|
||||
o.insert(timeline.info);
|
||||
timeline_etcd_versions.insert(zttid,new_kv_version);
|
||||
} else {
|
||||
debug!("Skipping etcd timeline update due to older version compared to one that's already stored");
|
||||
}
|
||||
}
|
||||
hash_map::Entry::Vacant(v) => {
|
||||
@@ -243,8 +258,7 @@ pub async fn subscribe_to_safekeeper_timeline_updates(
|
||||
}
|
||||
}
|
||||
}
|
||||
// it is normal to get other keys when we subscribe to everything
|
||||
Err(BrokerError::InvalidKey(e)) => debug!("Unexpected key for timeline update: {e}"),
|
||||
Ok(None) => {}
|
||||
Err(e) => error!("Failed to parse timeline update: {e}"),
|
||||
};
|
||||
}
|
||||
@@ -258,72 +272,64 @@ pub async fn subscribe_to_safekeeper_timeline_updates(
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}.instrument(info_span!("etcd_broker")));
|
||||
});
|
||||
|
||||
Ok(SkTimelineSubscription {
|
||||
kind,
|
||||
kind: subscription,
|
||||
safekeeper_timeline_updates,
|
||||
watcher_handle,
|
||||
watcher,
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_key_value_str(kv: &KeyValue) -> Result<(&str, &str), BrokerError> {
|
||||
let key = kv.key_str().map_err(|e| {
|
||||
BrokerError::EtcdClient(e, "Failed to extract key str out of etcd KV".to_string())
|
||||
})?;
|
||||
let value = kv.value_str().map_err(|e| {
|
||||
BrokerError::EtcdClient(e, "Failed to extract value str out of etcd KV".to_string())
|
||||
})?;
|
||||
Ok((key, value))
|
||||
}
|
||||
|
||||
static SK_TIMELINE_KEY_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new("/([[:xdigit:]]+)/([[:xdigit:]]+)/safekeeper/([[:digit:]]+)$")
|
||||
.expect("wrong regex for safekeeper timeline etcd key")
|
||||
});
|
||||
|
||||
fn parse_safekeeper_timeline(
|
||||
subscription: &SkTimelineSubscriptionKind,
|
||||
key_str: &str,
|
||||
value_str: &str,
|
||||
) -> Result<(ZTenantTimelineId, SafekeeperTimeline), BrokerError> {
|
||||
let broker_prefix = subscription.broker_etcd_prefix.as_str();
|
||||
if !key_str.starts_with(broker_prefix) {
|
||||
return Err(BrokerError::InvalidKey(format!(
|
||||
"KV has unexpected key '{key_str}' that does not start with broker prefix {broker_prefix}"
|
||||
)));
|
||||
}
|
||||
|
||||
let key_part = &key_str[broker_prefix.len()..];
|
||||
let key_captures = match SK_TIMELINE_KEY_REGEX.captures(key_part) {
|
||||
Some(captures) => captures,
|
||||
None => {
|
||||
return Err(BrokerError::InvalidKey(format!(
|
||||
"KV has unexpected key part '{key_part}' that does not match required regex {}",
|
||||
SK_TIMELINE_KEY_REGEX.as_str()
|
||||
)));
|
||||
}
|
||||
fn parse_etcd_key_value(
|
||||
subscription_kind: SubscriptionKind,
|
||||
regex: &Regex,
|
||||
kv: &KeyValue,
|
||||
) -> Result<Option<(ZTenantTimelineId, SafekeeperTimeline)>, BrokerError> {
|
||||
let caps = if let Some(caps) = regex.captures(kv.key_str().map_err(|e| {
|
||||
BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as key str"))
|
||||
})?) {
|
||||
caps
|
||||
} else {
|
||||
return Ok(None);
|
||||
};
|
||||
let info = serde_json::from_str(value_str).map_err(|e| {
|
||||
BrokerError::ParsingError(format!(
|
||||
"Failed to parse '{value_str}' as safekeeper timeline info: {e}"
|
||||
))
|
||||
|
||||
let (zttid, safekeeper_id) = match subscription_kind {
|
||||
SubscriptionKind::All => (
|
||||
ZTenantTimelineId::new(
|
||||
parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
|
||||
parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?,
|
||||
),
|
||||
NodeId(parse_capture(&caps, 3).map_err(BrokerError::ParsingError)?),
|
||||
),
|
||||
SubscriptionKind::Tenant(tenant_id) => (
|
||||
ZTenantTimelineId::new(
|
||||
tenant_id,
|
||||
parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
|
||||
),
|
||||
NodeId(parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?),
|
||||
),
|
||||
SubscriptionKind::Timeline(zttid) => (
|
||||
zttid,
|
||||
NodeId(parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?),
|
||||
),
|
||||
};
|
||||
|
||||
let info_str = kv.value_str().map_err(|e| {
|
||||
BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as value str"))
|
||||
})?;
|
||||
|
||||
let zttid = ZTenantTimelineId::new(
|
||||
parse_capture(&key_captures, 1).map_err(BrokerError::ParsingError)?,
|
||||
parse_capture(&key_captures, 2).map_err(BrokerError::ParsingError)?,
|
||||
);
|
||||
let safekeeper_id = NodeId(parse_capture(&key_captures, 3).map_err(BrokerError::ParsingError)?);
|
||||
|
||||
Ok((
|
||||
Ok(Some((
|
||||
zttid,
|
||||
SafekeeperTimeline {
|
||||
safekeeper_id,
|
||||
info,
|
||||
info: serde_json::from_str(info_str).map_err(|e| {
|
||||
BrokerError::ParsingError(format!(
|
||||
"Failed to parse '{info_str}' as safekeeper timeline info: {e}"
|
||||
))
|
||||
})?,
|
||||
},
|
||||
))
|
||||
)))
|
||||
}
|
||||
|
||||
fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
|
||||
@@ -342,53 +348,3 @@ where
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use utils::zid::ZTimelineId;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn typical_etcd_prefix_should_be_parsed() {
|
||||
let prefix = "neon";
|
||||
let tenant_id = ZTenantId::generate();
|
||||
let timeline_id = ZTimelineId::generate();
|
||||
let all_subscription = SkTimelineSubscriptionKind {
|
||||
broker_etcd_prefix: prefix.to_string(),
|
||||
kind: SubscriptionKind::All,
|
||||
};
|
||||
let tenant_subscription = SkTimelineSubscriptionKind {
|
||||
broker_etcd_prefix: prefix.to_string(),
|
||||
kind: SubscriptionKind::Tenant(tenant_id),
|
||||
};
|
||||
let timeline_subscription = SkTimelineSubscriptionKind {
|
||||
broker_etcd_prefix: prefix.to_string(),
|
||||
kind: SubscriptionKind::Timeline(ZTenantTimelineId::new(tenant_id, timeline_id)),
|
||||
};
|
||||
|
||||
let typical_etcd_kv_strs = [
|
||||
(
|
||||
format!("{prefix}/{tenant_id}/{timeline_id}/safekeeper/1"),
|
||||
r#"{"last_log_term":231,"flush_lsn":"0/241BB70","commit_lsn":"0/241BB70","backup_lsn":"0/2000000","remote_consistent_lsn":"0/0","peer_horizon_lsn":"0/16960E8","safekeeper_connstr":"something.local:1234","pageserver_connstr":"postgresql://(null):@somethine.else.local:3456"}"#,
|
||||
),
|
||||
(
|
||||
format!("{prefix}/{tenant_id}/{timeline_id}/safekeeper/13"),
|
||||
r#"{"last_log_term":231,"flush_lsn":"0/241BB70","commit_lsn":"0/241BB70","backup_lsn":"0/2000000","remote_consistent_lsn":"0/0","peer_horizon_lsn":"0/16960E8","safekeeper_connstr":"something.local:1234","pageserver_connstr":"postgresql://(null):@somethine.else.local:3456"}"#,
|
||||
),
|
||||
];
|
||||
|
||||
for (key_string, value_str) in typical_etcd_kv_strs {
|
||||
for subscription in [
|
||||
&all_subscription,
|
||||
&tenant_subscription,
|
||||
&timeline_subscription,
|
||||
] {
|
||||
let (id, _timeline) =
|
||||
parse_safekeeper_timeline(subscription, &key_string, value_str)
|
||||
.unwrap_or_else(|e| panic!("Should be able to parse etcd key string '{key_string}' and etcd value string '{value_str}' for subscription {subscription:?}, but got: {e}"));
|
||||
assert_eq!(id, ZTenantTimelineId::new(tenant_id, timeline_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -269,18 +269,15 @@ impl FeStartupPacket {
|
||||
.next()
|
||||
.context("expected even number of params in StartupMessage")?;
|
||||
if name == "options" {
|
||||
// parsing options arguments "...&options=<var0>%3D<val0>+<var1>=<var1>..."
|
||||
// '%3D' is '=' and '+' is ' '
|
||||
|
||||
// Note: we allow users that don't have SNI capabilities,
|
||||
// to pass a special keyword argument 'project'
|
||||
// to be used to determine the cluster name by the proxy.
|
||||
|
||||
//TODO: write unit test for this and refactor in its own function.
|
||||
for cmdopt in value.split(' ') {
|
||||
let nameval: Vec<&str> = cmdopt.split('=').collect();
|
||||
//parsing options arguments "..&options=<var>:<val>,.."
|
||||
//extended example and set of options:
|
||||
//https://github.com/neondatabase/neon/blob/main/docs/rfcs/016-connection-routing.md#connection-url
|
||||
for cmdopt in value.split(',') {
|
||||
let nameval: Vec<&str> = cmdopt.split(':').collect();
|
||||
if nameval.len() == 2 {
|
||||
params.insert(nameval[0].to_string(), nameval[1].to_string());
|
||||
} else {
|
||||
//todo: inform user / throw error message if options format is wrong.
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -1727,7 +1727,9 @@ impl LayeredTimeline {
|
||||
new_delta_path.clone(),
|
||||
self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
|
||||
])?;
|
||||
fail_point!("flush-frozen-before-sync");
|
||||
fail_point!("checkpoint-before-sync");
|
||||
|
||||
fail_point!("flush-frozen");
|
||||
|
||||
// Finally, replace the frozen in-memory layer with the new on-disk layer
|
||||
{
|
||||
|
||||
@@ -409,7 +409,7 @@ removed because there is no newer layer file for the table.
|
||||
|
||||
Things get slightly more complicated with multiple branches. All of
|
||||
the above still holds, but in addition to recent files we must also
|
||||
retain older snapshot files that are still needed by child branches.
|
||||
retain older shapshot files that are still needed by child branches.
|
||||
For example, if child branch is created at LSN 150, and the 'customers'
|
||||
table is updated on the branch, you would have these files:
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
//! assign a buffer for a page, you must hold the mapping lock and the lock on
|
||||
//! the slot at the same time.
|
||||
//!
|
||||
//! Whenever you need to hold both locks simultaneously, the slot lock must be
|
||||
//! Whenever you need to hold both locks simultenously, the slot lock must be
|
||||
//! acquired first. This consistent ordering avoids deadlocks. To look up a page
|
||||
//! in the cache, you would first look up the mapping, while holding the mapping
|
||||
//! lock, and then lock the slot. You must release the mapping lock in between,
|
||||
|
||||
223
pageserver/src/remote_storage/storage_sync/delete.rs
Normal file
223
pageserver/src/remote_storage/storage_sync/delete.rs
Normal file
@@ -0,0 +1,223 @@
|
||||
//! Timeline synchronization logic to delete a bulk of timeline's remote files from the remote storage.
|
||||
|
||||
use anyhow::Context;
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use tracing::{debug, error, info};
|
||||
use utils::zid::ZTenantTimelineId;
|
||||
|
||||
use crate::remote_storage::{
|
||||
storage_sync::{SyncQueue, SyncTask},
|
||||
RemoteStorage,
|
||||
};
|
||||
|
||||
use super::{LayersDeletion, SyncData};
|
||||
|
||||
/// Attempts to remove the timleline layers from the remote storage.
|
||||
/// If the task had not adjusted the metadata before, the deletion will fail.
|
||||
pub(super) async fn delete_timeline_layers<'a, P, S>(
|
||||
storage: &'a S,
|
||||
sync_queue: &SyncQueue,
|
||||
sync_id: ZTenantTimelineId,
|
||||
mut delete_data: SyncData<LayersDeletion>,
|
||||
) -> bool
|
||||
where
|
||||
P: std::fmt::Debug + Send + Sync + 'static,
|
||||
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
||||
{
|
||||
if !delete_data.data.deletion_registered {
|
||||
error!("Cannot delete timeline layers before the deletion metadata is not registered, reenqueueing");
|
||||
delete_data.retries += 1;
|
||||
sync_queue.push(sync_id, SyncTask::Delete(delete_data));
|
||||
return false;
|
||||
}
|
||||
|
||||
if delete_data.data.layers_to_delete.is_empty() {
|
||||
info!("No layers to delete, skipping");
|
||||
return true;
|
||||
}
|
||||
|
||||
let layers_to_delete = delete_data
|
||||
.data
|
||||
.layers_to_delete
|
||||
.drain()
|
||||
.collect::<Vec<_>>();
|
||||
debug!("Layers to delete: {layers_to_delete:?}");
|
||||
info!("Deleting {} timeline layers", layers_to_delete.len());
|
||||
|
||||
let mut delete_tasks = layers_to_delete
|
||||
.into_iter()
|
||||
.map(|local_layer_path| async {
|
||||
let storage_path = match storage.storage_path(&local_layer_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to get the layer storage path for local path '{}'",
|
||||
local_layer_path.display()
|
||||
)
|
||||
}) {
|
||||
Ok(path) => path,
|
||||
Err(e) => return Err((e, local_layer_path)),
|
||||
};
|
||||
|
||||
match storage.delete(&storage_path).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to delete remote layer from storage at '{:?}'",
|
||||
storage_path
|
||||
)
|
||||
}) {
|
||||
Ok(()) => Ok(local_layer_path),
|
||||
Err(e) => Err((e, local_layer_path)),
|
||||
}
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
let mut errored = false;
|
||||
while let Some(deletion_result) = delete_tasks.next().await {
|
||||
match deletion_result {
|
||||
Ok(local_layer_path) => {
|
||||
debug!(
|
||||
"Successfully deleted layer {} for timeline {sync_id}",
|
||||
local_layer_path.display()
|
||||
);
|
||||
delete_data.data.deleted_layers.insert(local_layer_path);
|
||||
}
|
||||
Err((e, local_layer_path)) => {
|
||||
errored = true;
|
||||
error!(
|
||||
"Failed to delete layer {} for timeline {sync_id}: {e:?}",
|
||||
local_layer_path.display()
|
||||
);
|
||||
delete_data.data.layers_to_delete.insert(local_layer_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if errored {
|
||||
debug!("Reenqueuing failed delete task for timeline {sync_id}");
|
||||
delete_data.retries += 1;
|
||||
sync_queue.push(sync_id, SyncTask::Delete(delete_data));
|
||||
}
|
||||
errored
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{collections::HashSet, num::NonZeroUsize};
|
||||
|
||||
use itertools::Itertools;
|
||||
use tempfile::tempdir;
|
||||
use tokio::fs;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::{
|
||||
remote_storage::{
|
||||
storage_sync::test_utils::{create_local_timeline, dummy_metadata},
|
||||
LocalFs,
|
||||
},
|
||||
repository::repo_harness::{RepoHarness, TIMELINE_ID},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn delete_timeline_negative() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("delete_timeline_negative")?;
|
||||
let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
|
||||
let storage = LocalFs::new(tempdir()?.path().to_path_buf(), &harness.conf.workdir)?;
|
||||
|
||||
let deleted = delete_timeline_layers(
|
||||
&storage,
|
||||
&sync_queue,
|
||||
sync_id,
|
||||
SyncData {
|
||||
retries: 1,
|
||||
data: LayersDeletion {
|
||||
deleted_layers: HashSet::new(),
|
||||
layers_to_delete: HashSet::new(),
|
||||
deletion_registered: false,
|
||||
},
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
!deleted,
|
||||
"Should not start the deletion for task with delete metadata unregistered"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn delete_timeline() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("delete_timeline")?;
|
||||
let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
|
||||
let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
|
||||
let layer_files = ["a", "b", "c", "d"];
|
||||
let storage = LocalFs::new(tempdir()?.path().to_path_buf(), &harness.conf.workdir)?;
|
||||
let current_retries = 3;
|
||||
let metadata = dummy_metadata(Lsn(0x30));
|
||||
let local_timeline_path = harness.timeline_path(&TIMELINE_ID);
|
||||
let timeline_upload =
|
||||
create_local_timeline(&harness, TIMELINE_ID, &layer_files, metadata.clone()).await?;
|
||||
for local_path in timeline_upload.layers_to_upload {
|
||||
let remote_path = storage.storage_path(&local_path)?;
|
||||
let remote_parent_dir = remote_path.parent().unwrap();
|
||||
if !remote_parent_dir.exists() {
|
||||
fs::create_dir_all(&remote_parent_dir).await?;
|
||||
}
|
||||
fs::copy(&local_path, &remote_path).await?;
|
||||
}
|
||||
assert_eq!(
|
||||
storage
|
||||
.list()
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|remote_path| storage.local_path(&remote_path).unwrap())
|
||||
.filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) })
|
||||
.sorted()
|
||||
.collect::<Vec<_>>(),
|
||||
layer_files
|
||||
.iter()
|
||||
.map(|layer_str| layer_str.to_string())
|
||||
.sorted()
|
||||
.collect::<Vec<_>>(),
|
||||
"Expect to have all layer files remotely before deletion"
|
||||
);
|
||||
|
||||
let deleted = delete_timeline_layers(
|
||||
&storage,
|
||||
&sync_queue,
|
||||
sync_id,
|
||||
SyncData {
|
||||
retries: current_retries,
|
||||
data: LayersDeletion {
|
||||
deleted_layers: HashSet::new(),
|
||||
layers_to_delete: HashSet::from([
|
||||
local_timeline_path.join("a"),
|
||||
local_timeline_path.join("c"),
|
||||
local_timeline_path.join("something_different"),
|
||||
]),
|
||||
deletion_registered: true,
|
||||
},
|
||||
},
|
||||
)
|
||||
.await;
|
||||
assert!(deleted, "Should be able to delete timeline files");
|
||||
|
||||
assert_eq!(
|
||||
storage
|
||||
.list()
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|remote_path| storage.local_path(&remote_path).unwrap())
|
||||
.filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) })
|
||||
.sorted()
|
||||
.collect::<Vec<_>>(),
|
||||
vec!["b".to_string(), "d".to_string()],
|
||||
"Expect to have only non-deleted files remotely"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -195,7 +195,6 @@ impl Display for TimelineSyncStatusUpdate {
|
||||
f.write_str(s)
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// A repository corresponds to one .zenith directory. One repository holds multiple
|
||||
/// timelines, forked off from the same initial call to 'initdb'.
|
||||
@@ -243,7 +242,7 @@ pub trait Repository: Send + Sync {
|
||||
///
|
||||
/// 'timelineid' specifies the timeline to GC, or None for all.
|
||||
/// `horizon` specifies delta from last lsn to preserve all object versions (pitr interval).
|
||||
/// `checkpoint_before_gc` parameter is used to force compaction of storage before GC
|
||||
/// `checkpoint_before_gc` parameter is used to force compaction of storage before CG
|
||||
/// to make tests more deterministic.
|
||||
/// TODO Do we still need it or we can call checkpoint explicitly in tests where needed?
|
||||
fn gc_iteration(
|
||||
|
||||
@@ -892,7 +892,7 @@ fn storage_sync_loop<P, S>(
|
||||
|
||||
REMAINING_SYNC_ITEMS.set(remaining_queue_length as i64);
|
||||
if remaining_queue_length > 0 || !batched_tasks.is_empty() {
|
||||
debug!("Processing tasks for {} timelines in batch, more tasks left to process: {remaining_queue_length}", batched_tasks.len());
|
||||
info!("Processing tasks for {} timelines in batch, more tasks left to process: {remaining_queue_length}", batched_tasks.len());
|
||||
} else {
|
||||
debug!("No tasks to process");
|
||||
continue;
|
||||
@@ -1186,7 +1186,7 @@ async fn update_local_metadata(
|
||||
let remote_metadata = match remote_timeline {
|
||||
Some(timeline) => &timeline.metadata,
|
||||
None => {
|
||||
debug!("No remote timeline to update local metadata from, skipping the update");
|
||||
info!("No remote timeline to update local metadata from, skipping the update");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -37,7 +37,7 @@ pub mod defaults {
|
||||
pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
|
||||
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
|
||||
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
|
||||
pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10_000;
|
||||
pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1_000_000;
|
||||
}
|
||||
|
||||
/// Per-tenant configuration options
|
||||
|
||||
@@ -468,21 +468,14 @@ async fn timeline_wal_broker_loop_step(
|
||||
// finally, if no other tasks are completed, get another broker update and possibly reconnect
|
||||
updates = broker_subscription.fetch_data() => match updates {
|
||||
Some(mut all_timeline_updates) => {
|
||||
match all_timeline_updates.remove(&id) {
|
||||
Some(subscribed_timeline_updates) => {
|
||||
match wal_connection_manager.select_connection_candidate(subscribed_timeline_updates) {
|
||||
Some(candidate) => {
|
||||
info!("Switching to different safekeeper {} for timeline {id}, reason: {:?}", candidate.safekeeper_id, candidate.reason);
|
||||
wal_connection_manager.change_connection(candidate.safekeeper_id, candidate.wal_producer_connstr).await;
|
||||
},
|
||||
None => debug!("No connection candidate was selected for timeline"),
|
||||
}
|
||||
if let Some(subscribed_timeline_updates) = all_timeline_updates.remove(&id) {
|
||||
match wal_connection_manager.select_connection_candidate(subscribed_timeline_updates) {
|
||||
Some(candidate) => {
|
||||
info!("Switching to different safekeeper {} for timeline {id}, reason: {:?}", candidate.safekeeper_id, candidate.reason);
|
||||
wal_connection_manager.change_connection(candidate.safekeeper_id, candidate.wal_producer_connstr).await;
|
||||
},
|
||||
None => {}
|
||||
}
|
||||
// XXX: If we subscribe for a certain timeline, we expect only its data to come.
|
||||
// But somebody could propagate a new etcd key, that has the same prefix as the subscribed one, then we'll get odd data.
|
||||
// This is an error, we don't want to have overlapping prefixes for timelines, but we can complain and thow those away instead of panicking,
|
||||
// since the next poll might bring the correct data.
|
||||
None => error!("Timeline has an active broker subscription, but got no updates. Other data length: {}", all_timeline_updates.len()),
|
||||
}
|
||||
},
|
||||
None => {
|
||||
@@ -629,28 +622,18 @@ impl WalConnectionManager {
|
||||
/// Checks current state against every fetched safekeeper state of a given timeline.
|
||||
/// Returns a new candidate, if the current state is somewhat lagging, or `None` otherwise.
|
||||
/// The current rules for approving new candidates:
|
||||
/// * pick from the input data from etcd for currently connected safekeeper (if any)
|
||||
/// * out of the rest input entries, pick one with biggest `commit_lsn` that's after than pageserver's latest Lsn for the timeline
|
||||
/// * if there's no such entry, no new candidate found, abort
|
||||
/// * otherwise, check if etcd updates contain currently connected safekeeper
|
||||
/// * if not, that means no WAL updates happened after certain time (either none since the connection time or none since the last event after the connection)
|
||||
/// Reconnect if the time exceeds the threshold.
|
||||
/// * if there's one, compare its Lsn with the other candidate's, reconnect if candidate's over threshold
|
||||
/// * pick the safekeeper with biggest `commit_lsn` that's after than pageserver's latest Lsn for the timeline
|
||||
/// * if the leader is a different SK and either
|
||||
/// * no WAL updates happened after certain time (either none since the connection time or none since the last event after the connection) — reconnect
|
||||
/// * same time amount had passed since the connection, WAL updates happened recently, but the new leader SK has timeline Lsn way ahead of the old one — reconnect
|
||||
///
|
||||
/// This way we ensure to keep up with the most up-to-date safekeeper and don't try to jump from one safekeeper to another too frequently.
|
||||
/// Both thresholds are configured per tenant.
|
||||
fn select_connection_candidate(
|
||||
&self,
|
||||
mut safekeeper_timelines: HashMap<NodeId, SkTimelineInfo>,
|
||||
safekeeper_timelines: HashMap<NodeId, SkTimelineInfo>,
|
||||
) -> Option<NewWalConnectionCandidate> {
|
||||
let current_sk_data_updated =
|
||||
self.wal_connection_data
|
||||
.as_ref()
|
||||
.and_then(|connection_data| {
|
||||
safekeeper_timelines.remove(&connection_data.safekeeper_id)
|
||||
});
|
||||
|
||||
let candidate_sk_data = safekeeper_timelines
|
||||
let (&new_sk_id, new_sk_timeline, new_wal_producer_connstr) = safekeeper_timelines
|
||||
.iter()
|
||||
.filter(|(_, info)| {
|
||||
info.commit_lsn > Some(self.timeline.tline.get_last_record_lsn())
|
||||
@@ -659,6 +642,7 @@ impl WalConnectionManager {
|
||||
match wal_stream_connection_string(
|
||||
self.id,
|
||||
info.safekeeper_connstr.as_deref()?,
|
||||
info.pageserver_connstr.as_deref()?,
|
||||
) {
|
||||
Ok(connstr) => Some((sk_id, info, connstr)),
|
||||
Err(e) => {
|
||||
@@ -667,78 +651,68 @@ impl WalConnectionManager {
|
||||
}
|
||||
}
|
||||
})
|
||||
.max_by_key(|(_, info, _)| info.commit_lsn);
|
||||
.max_by_key(|(_, info, _)| info.commit_lsn)?;
|
||||
|
||||
match (current_sk_data_updated, candidate_sk_data) {
|
||||
// No better candidate than one we're already connected to:
|
||||
// whatever data update comes for the connected one, we don't have a better candidate
|
||||
(_, None) => None,
|
||||
|
||||
// No updates from the old SK in this batch, but some candidate is available:
|
||||
// check how long time ago did we receive updates from the current SK, switch connections in case it's over the threshold
|
||||
(None, Some((&new_sk_id, _, new_wal_producer_connstr))) => {
|
||||
match self.wal_connection_data.as_ref() {
|
||||
Some(current_connection) => {
|
||||
let last_sk_interaction_time =
|
||||
match current_connection.last_wal_receiver_data.as_ref() {
|
||||
Some((_, data_submission_time)) => *data_submission_time,
|
||||
None => current_connection.connection_init_time,
|
||||
};
|
||||
|
||||
let now = Utc::now().naive_utc();
|
||||
match (now - last_sk_interaction_time).to_std() {
|
||||
Ok(last_interaction) => {
|
||||
if last_interaction > self.lagging_wal_timeout {
|
||||
return Some(NewWalConnectionCandidate {
|
||||
safekeeper_id: new_sk_id,
|
||||
wal_producer_connstr: new_wal_producer_connstr,
|
||||
reason: ReconnectReason::NoWalTimeout {
|
||||
last_wal_interaction: last_sk_interaction_time,
|
||||
check_time: now,
|
||||
threshold: self.lagging_wal_timeout,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
Err(_e) => {
|
||||
warn!("Last interaction with safekeeper {} happened in the future, ignoring the candidate. Interaction time: {last_sk_interaction_time}, now: {now}", current_connection.safekeeper_id);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
None => Some(NewWalConnectionCandidate {
|
||||
safekeeper_id: new_sk_id,
|
||||
wal_producer_connstr: new_wal_producer_connstr,
|
||||
reason: ReconnectReason::NoExistingConnection,
|
||||
}),
|
||||
match self.wal_connection_data.as_ref() {
|
||||
None => Some(NewWalConnectionCandidate {
|
||||
safekeeper_id: new_sk_id,
|
||||
wal_producer_connstr: new_wal_producer_connstr,
|
||||
reason: ReconnectReason::NoExistingConnection,
|
||||
}),
|
||||
Some(current_connection) => {
|
||||
if current_connection.safekeeper_id == new_sk_id {
|
||||
None
|
||||
} else {
|
||||
self.reason_to_reconnect(current_connection, new_sk_timeline)
|
||||
.map(|reason| NewWalConnectionCandidate {
|
||||
safekeeper_id: new_sk_id,
|
||||
wal_producer_connstr: new_wal_producer_connstr,
|
||||
reason,
|
||||
})
|
||||
}
|
||||
}
|
||||
// Both current SK got updated via etcd and there's another candidate with suitable Lsn:
|
||||
// check how bigger the new SK Lsn is in the future compared to the current SK, switch connections in case it's over the threshold
|
||||
(
|
||||
Some(current_sk_timeline),
|
||||
Some((&new_sk_id, new_sk_timeline, new_wal_producer_connstr)),
|
||||
) => {
|
||||
let new_lsn = new_sk_timeline.commit_lsn.unwrap_or(Lsn(0));
|
||||
let current_lsn = current_sk_timeline.commit_lsn.unwrap_or(Lsn(0));
|
||||
match new_lsn.0.checked_sub(current_lsn.0)
|
||||
}
|
||||
}
|
||||
|
||||
fn reason_to_reconnect(
|
||||
&self,
|
||||
current_connection: &WalConnectionData,
|
||||
new_sk_timeline: &SkTimelineInfo,
|
||||
) -> Option<ReconnectReason> {
|
||||
let last_sk_interaction_time = match current_connection.last_wal_receiver_data.as_ref() {
|
||||
Some((last_wal_receiver_data, data_submission_time)) => {
|
||||
let new_lsn = new_sk_timeline.commit_lsn?;
|
||||
match new_lsn.0.checked_sub(last_wal_receiver_data.ps_writelsn)
|
||||
{
|
||||
Some(new_sk_lsn_advantage) => {
|
||||
if new_sk_lsn_advantage >= self.max_lsn_wal_lag.get() {
|
||||
return Some(
|
||||
NewWalConnectionCandidate {
|
||||
safekeeper_id: new_sk_id,
|
||||
wal_producer_connstr: new_wal_producer_connstr,
|
||||
reason: ReconnectReason::LaggingWal { current_lsn, new_lsn, threshold: self.max_lsn_wal_lag },
|
||||
});
|
||||
Some(sk_lsn_advantage) => {
|
||||
if sk_lsn_advantage >= self.max_lsn_wal_lag.get() {
|
||||
return Some(ReconnectReason::LaggingWal { current_lsn: Lsn(last_wal_receiver_data.ps_writelsn), new_lsn, threshold: self.max_lsn_wal_lag });
|
||||
}
|
||||
}
|
||||
None => debug!("Best SK candidate has its commit Lsn behind the current timeline's latest consistent Lsn"),
|
||||
}
|
||||
*data_submission_time
|
||||
}
|
||||
None => current_connection.connection_init_time,
|
||||
};
|
||||
|
||||
None
|
||||
let now = Utc::now().naive_utc();
|
||||
match (now - last_sk_interaction_time).to_std() {
|
||||
Ok(last_interaction) => {
|
||||
if last_interaction > self.lagging_wal_timeout {
|
||||
return Some(ReconnectReason::NoWalTimeout {
|
||||
last_wal_interaction: last_sk_interaction_time,
|
||||
check_time: now,
|
||||
threshold: self.lagging_wal_timeout,
|
||||
});
|
||||
}
|
||||
}
|
||||
Err(_e) => {
|
||||
warn!("Last interaction with safekeeper {} happened in the future, ignoring the candidate. Interaction time: {last_sk_interaction_time}, now: {now}",
|
||||
current_connection.safekeeper_id);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@@ -748,6 +722,7 @@ fn wal_stream_connection_string(
|
||||
timeline_id,
|
||||
}: ZTenantTimelineId,
|
||||
listen_pg_addr_str: &str,
|
||||
pageserver_connstr: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
let sk_connstr = format!("postgresql://no_user@{listen_pg_addr_str}/no_db");
|
||||
let me_conf = sk_connstr
|
||||
@@ -757,7 +732,7 @@ fn wal_stream_connection_string(
|
||||
})?;
|
||||
let (host, port) = utils::connstring::connection_host_port(&me_conf);
|
||||
Ok(format!(
|
||||
"host={host} port={port} options='-c ztimelineid={timeline_id} ztenantid={tenant_id}'"
|
||||
"host={host} port={port} options='-c ztimelineid={timeline_id} ztenantid={tenant_id} pageserver_connstr={pageserver_connstr}'",
|
||||
))
|
||||
}
|
||||
|
||||
@@ -790,6 +765,20 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: None,
|
||||
pageserver_connstr: Some("no safekeeper_connstr".to_string()),
|
||||
},
|
||||
),
|
||||
(
|
||||
NodeId(1),
|
||||
SkTimelineInfo {
|
||||
last_log_term: None,
|
||||
flush_lsn: None,
|
||||
commit_lsn: Some(Lsn(1)),
|
||||
backup_lsn: None,
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some("no pageserver_connstr".to_string()),
|
||||
pageserver_connstr: None,
|
||||
},
|
||||
),
|
||||
(
|
||||
@@ -802,6 +791,7 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some("no commit_lsn".to_string()),
|
||||
pageserver_connstr: Some("no commit_lsn (p)".to_string()),
|
||||
},
|
||||
),
|
||||
(
|
||||
@@ -814,6 +804,7 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some("no commit_lsn".to_string()),
|
||||
pageserver_connstr: Some("no commit_lsn (p)".to_string()),
|
||||
},
|
||||
),
|
||||
]));
|
||||
@@ -869,6 +860,7 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
|
||||
pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
|
||||
},
|
||||
),
|
||||
(
|
||||
@@ -881,6 +873,7 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some("not advanced Lsn".to_string()),
|
||||
pageserver_connstr: Some("not advanced Lsn (p)".to_string()),
|
||||
},
|
||||
),
|
||||
(
|
||||
@@ -895,6 +888,7 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some("not enough advanced Lsn".to_string()),
|
||||
pageserver_connstr: Some("not enough advanced Lsn (p)".to_string()),
|
||||
},
|
||||
),
|
||||
]));
|
||||
@@ -926,6 +920,7 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
|
||||
pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
|
||||
},
|
||||
)]))
|
||||
.expect("Expected one candidate selected out of the only data option, but got none");
|
||||
@@ -938,6 +933,9 @@ mod tests {
|
||||
assert!(only_candidate
|
||||
.wal_producer_connstr
|
||||
.contains(DUMMY_SAFEKEEPER_CONNSTR));
|
||||
assert!(only_candidate
|
||||
.wal_producer_connstr
|
||||
.contains(DUMMY_PAGESERVER_CONNSTR));
|
||||
|
||||
let selected_lsn = 100_000;
|
||||
let biggest_wal_candidate = data_manager_with_no_connection
|
||||
@@ -952,6 +950,7 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some("smaller commit_lsn".to_string()),
|
||||
pageserver_connstr: Some("smaller commit_lsn (p)".to_string()),
|
||||
},
|
||||
),
|
||||
(
|
||||
@@ -964,6 +963,7 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
|
||||
pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
|
||||
},
|
||||
),
|
||||
(
|
||||
@@ -976,6 +976,9 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: None,
|
||||
pageserver_connstr: Some(
|
||||
"no safekeeper_connstr despite bigger commit_lsn".to_string(),
|
||||
),
|
||||
},
|
||||
),
|
||||
]))
|
||||
@@ -992,6 +995,9 @@ mod tests {
|
||||
assert!(biggest_wal_candidate
|
||||
.wal_producer_connstr
|
||||
.contains(DUMMY_SAFEKEEPER_CONNSTR));
|
||||
assert!(biggest_wal_candidate
|
||||
.wal_producer_connstr
|
||||
.contains(DUMMY_PAGESERVER_CONNSTR));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1008,7 +1014,7 @@ mod tests {
|
||||
|
||||
let mut data_manager_with_connection = dummy_wal_connection_manager(&harness);
|
||||
let connected_sk_id = NodeId(0);
|
||||
let mut dummy_connection_data = dummy_connection_data(id, connected_sk_id).await;
|
||||
let mut dummy_connection_data = dummy_connection_data(id, NodeId(0)).await;
|
||||
let lagging_wal_timeout =
|
||||
chrono::Duration::from_std(data_manager_with_connection.lagging_wal_timeout)?;
|
||||
let time_over_threshold =
|
||||
@@ -1038,6 +1044,7 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
|
||||
pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
|
||||
},
|
||||
),
|
||||
(
|
||||
@@ -1050,6 +1057,7 @@ mod tests {
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some("advanced by Lsn safekeeper".to_string()),
|
||||
pageserver_connstr: Some("advanced by Lsn safekeeper (p)".to_string()),
|
||||
},
|
||||
),
|
||||
]);
|
||||
@@ -1073,13 +1081,16 @@ mod tests {
|
||||
assert!(over_threshcurrent_candidate
|
||||
.wal_producer_connstr
|
||||
.contains("advanced by Lsn safekeeper"));
|
||||
assert!(over_threshcurrent_candidate
|
||||
.wal_producer_connstr
|
||||
.contains("advanced by Lsn safekeeper (p)"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn timeout_wal_over_threshhold_current_candidate() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("timeout_wal_over_threshhold_current_candidate")?;
|
||||
async fn timeout_wal_over_threshcurrent_candidate() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("timeout_wal_over_threshcurrent_candidate")?;
|
||||
let current_lsn = Lsn(100_000).align();
|
||||
|
||||
let id = ZTenantTimelineId {
|
||||
@@ -1097,19 +1108,36 @@ mod tests {
|
||||
dummy_connection_data.connection_init_time = time_over_threshold;
|
||||
data_manager_with_connection.wal_connection_data = Some(dummy_connection_data);
|
||||
|
||||
let new_lsn = Lsn(current_lsn.0 + data_manager_with_connection.max_lsn_wal_lag.get() + 1);
|
||||
let over_threshcurrent_candidate = data_manager_with_connection
|
||||
.select_connection_candidate(HashMap::from([(
|
||||
NodeId(0),
|
||||
SkTimelineInfo {
|
||||
last_log_term: None,
|
||||
flush_lsn: None,
|
||||
commit_lsn: Some(current_lsn),
|
||||
backup_lsn: None,
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
|
||||
},
|
||||
)]))
|
||||
.select_connection_candidate(HashMap::from([
|
||||
(
|
||||
NodeId(0),
|
||||
SkTimelineInfo {
|
||||
last_log_term: None,
|
||||
flush_lsn: None,
|
||||
commit_lsn: Some(new_lsn),
|
||||
backup_lsn: None,
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
|
||||
pageserver_connstr: Some(DUMMY_PAGESERVER_CONNSTR.to_string()),
|
||||
},
|
||||
),
|
||||
(
|
||||
NodeId(1),
|
||||
SkTimelineInfo {
|
||||
last_log_term: None,
|
||||
flush_lsn: None,
|
||||
commit_lsn: Some(current_lsn),
|
||||
backup_lsn: None,
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some("not advanced by Lsn safekeeper".to_string()),
|
||||
pageserver_connstr: Some("not advanced by Lsn safekeeper".to_string()),
|
||||
},
|
||||
),
|
||||
]))
|
||||
.expect(
|
||||
"Expected one candidate selected out of multiple valid data options, but got none",
|
||||
);
|
||||
@@ -1129,6 +1157,9 @@ mod tests {
|
||||
assert!(over_threshcurrent_candidate
|
||||
.wal_producer_connstr
|
||||
.contains(DUMMY_SAFEKEEPER_CONNSTR));
|
||||
assert!(over_threshcurrent_candidate
|
||||
.wal_producer_connstr
|
||||
.contains(DUMMY_PAGESERVER_CONNSTR));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1155,6 +1186,7 @@ mod tests {
|
||||
}
|
||||
|
||||
const DUMMY_SAFEKEEPER_CONNSTR: &str = "safekeeper_connstr";
|
||||
const DUMMY_PAGESERVER_CONNSTR: &str = "pageserver_connstr";
|
||||
|
||||
// the function itself does not need async, but it spawns a tokio::task underneath hence neeed
|
||||
// a runtime to not to panic
|
||||
@@ -1162,8 +1194,9 @@ mod tests {
|
||||
id: ZTenantTimelineId,
|
||||
safekeeper_id: NodeId,
|
||||
) -> WalConnectionData {
|
||||
let dummy_connstr = wal_stream_connection_string(id, DUMMY_SAFEKEEPER_CONNSTR)
|
||||
.expect("Failed to construct dummy wal producer connstr");
|
||||
let dummy_connstr =
|
||||
wal_stream_connection_string(id, DUMMY_SAFEKEEPER_CONNSTR, DUMMY_PAGESERVER_CONNSTR)
|
||||
.expect("Failed to construct dummy wal producer connstr");
|
||||
WalConnectionData {
|
||||
safekeeper_id,
|
||||
connection: WalReceiverConnection::open(
|
||||
|
||||
@@ -27,10 +27,9 @@ pub struct ClientCredentials {
|
||||
// Other Auth backends don't need it.
|
||||
pub sni_data: Option<String>,
|
||||
|
||||
// project_name is passed as argument from options from url.
|
||||
// In case sni_data is missing: project_name is used to determine cluster name.
|
||||
// In case sni_data is available: project_name and sni_data should match (otherwise throws an error).
|
||||
pub project_name: Option<String>,
|
||||
// cluster_option is passed as argument from options from url.
|
||||
// To be used to determine cluster name in case sni_data is missing.
|
||||
pub project_option: Option<String>,
|
||||
}
|
||||
|
||||
impl ClientCredentials {
|
||||
@@ -42,42 +41,29 @@ impl ClientCredentials {
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ProjectNameError {
|
||||
#[error("SNI is missing. EITHER please upgrade the postgres client library OR pass the project name as a parameter: '...&options=project%3D<project-name>...'.")]
|
||||
#[error("SNI info is missing. EITHER please upgrade the postgres client library OR pass the project name as a parameter: '..&options=project:<project name>..'.")]
|
||||
Missing,
|
||||
|
||||
#[error("SNI is malformed.")]
|
||||
Bad,
|
||||
|
||||
#[error("Inconsistent project name inferred from SNI and project option. String from SNI: '{0}', String from project option: '{1}'")]
|
||||
Inconsistent(String, String),
|
||||
}
|
||||
|
||||
impl UserFacingError for ProjectNameError {}
|
||||
|
||||
impl ClientCredentials {
|
||||
/// Determine project name from SNI or from project_name parameter from options argument.
|
||||
/// Determine project name from SNI.
|
||||
pub fn project_name(&self) -> Result<&str, ProjectNameError> {
|
||||
// Checking that if both sni_data and project_name are set, then they should match
|
||||
// otherwise, throws a ProjectNameError::Inconsistent error.
|
||||
if let Some(sni_data) = &self.sni_data {
|
||||
let project_name_from_sni_data =
|
||||
sni_data.split_once('.').ok_or(ProjectNameError::Bad)?.0;
|
||||
if let Some(project_name_from_options) = &self.project_name {
|
||||
if !project_name_from_options.eq(project_name_from_sni_data) {
|
||||
return Err(ProjectNameError::Inconsistent(
|
||||
project_name_from_sni_data.to_string(),
|
||||
project_name_from_options.to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
// determine the project name from self.sni_data if it exists, otherwise from self.project_name.
|
||||
let ret = match &self.sni_data {
|
||||
// if sni_data exists, use it to determine project name
|
||||
Some(sni_data) => sni_data.split_once('.').ok_or(ProjectNameError::Bad)?.0,
|
||||
// otherwise use project_option if it was manually set thought options parameter.
|
||||
//if sni_data exists, use it to determine project name
|
||||
Some(sni_data) => {
|
||||
sni_data
|
||||
.split_once('.')
|
||||
.ok_or(ProjectNameError::Bad)?
|
||||
.0
|
||||
}
|
||||
//otherwise use project_option if it was manually set thought ..&options=project:<name> parameter
|
||||
None => self
|
||||
.project_name
|
||||
.project_option
|
||||
.as_ref()
|
||||
.ok_or(ProjectNameError::Missing)?
|
||||
.as_str(),
|
||||
@@ -98,13 +84,17 @@ impl TryFrom<HashMap<String, String>> for ClientCredentials {
|
||||
|
||||
let user = get_param("user")?;
|
||||
let dbname = get_param("database")?;
|
||||
let project_name = get_param("project").ok();
|
||||
let project = get_param("project");
|
||||
let project_option = match project {
|
||||
Ok(project) => Some(project),
|
||||
Err(_) => None,
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
user,
|
||||
dbname,
|
||||
sni_data: None,
|
||||
project_name,
|
||||
project_option,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
251
proxy/src/auth_backend/console.rs
Normal file
251
proxy/src/auth_backend/console.rs
Normal file
@@ -0,0 +1,251 @@
|
||||
//! Declaration of Cloud API V2.
|
||||
|
||||
use crate::{
|
||||
auth::{self, AuthFlow},
|
||||
compute, scram,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::auth::ClientCredentials;
|
||||
use crate::stream::PqStream;
|
||||
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use utils::pq_proto::{BeMessage as Be, BeParameterStatusMessage};
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ConsoleAuthError {
|
||||
// We shouldn't include the actual secret here.
|
||||
#[error("Bad authentication secret")]
|
||||
BadSecret,
|
||||
|
||||
#[error("Bad client credentials: {0:?}")]
|
||||
BadCredentials(crate::auth::ClientCredentials),
|
||||
|
||||
#[error("SNI info is missing. EITHER please upgrade the postgres client library OR pass ..&options=cluster:<project name>.. parameter")]
|
||||
SniMissingAndProjectNameMissing,
|
||||
|
||||
#[error("Unexpected SNI content")]
|
||||
SniWrong,
|
||||
|
||||
#[error(transparent)]
|
||||
BadUrl(#[from] url::ParseError),
|
||||
|
||||
#[error(transparent)]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
/// HTTP status (other than 200) returned by the console.
|
||||
#[error("Console responded with an HTTP status: {0}")]
|
||||
HttpStatus(reqwest::StatusCode),
|
||||
|
||||
#[error(transparent)]
|
||||
Transport(#[from] reqwest::Error),
|
||||
|
||||
#[error("Console responded with a malformed JSON: '{0}'")]
|
||||
MalformedResponse(#[from] serde_json::Error),
|
||||
|
||||
#[error("Console responded with a malformed compute address: '{0}'")]
|
||||
MalformedComputeAddress(String),
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct GetRoleSecretResponse {
|
||||
role_secret: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct GetWakeComputeResponse {
|
||||
address: String,
|
||||
}
|
||||
|
||||
/// Auth secret which is managed by the cloud.
|
||||
pub enum AuthInfo {
|
||||
/// Md5 hash of user's password.
|
||||
Md5([u8; 16]),
|
||||
/// [SCRAM](crate::scram) authentication info.
|
||||
Scram(scram::ServerSecret),
|
||||
}
|
||||
|
||||
/// Compute node connection params provided by the cloud.
|
||||
/// Note how it implements serde traits, since we receive it over the wire.
|
||||
#[derive(Serialize, Deserialize, Default)]
|
||||
pub struct DatabaseInfo {
|
||||
pub host: String,
|
||||
pub port: u16,
|
||||
pub dbname: String,
|
||||
pub user: String,
|
||||
|
||||
/// [Cloud API V1](super::legacy) returns cleartext password,
|
||||
/// but [Cloud API V2](super::api) implements [SCRAM](crate::scram)
|
||||
/// authentication, so we can leverage this method and cope without password.
|
||||
pub password: Option<String>,
|
||||
}
|
||||
|
||||
// Manually implement debug to omit personal and sensitive info.
|
||||
impl std::fmt::Debug for DatabaseInfo {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
fmt.debug_struct("DatabaseInfo")
|
||||
.field("host", &self.host)
|
||||
.field("port", &self.port)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DatabaseInfo> for tokio_postgres::Config {
|
||||
fn from(db_info: DatabaseInfo) -> Self {
|
||||
let mut config = tokio_postgres::Config::new();
|
||||
|
||||
config
|
||||
.host(&db_info.host)
|
||||
.port(db_info.port)
|
||||
.dbname(&db_info.dbname)
|
||||
.user(&db_info.user);
|
||||
|
||||
if let Some(password) = db_info.password {
|
||||
config.password(password);
|
||||
}
|
||||
|
||||
config
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_auth_info(
|
||||
auth_endpoint: &str,
|
||||
user: &str,
|
||||
cluster: &str,
|
||||
) -> Result<AuthInfo, ConsoleAuthError> {
|
||||
let mut url = reqwest::Url::parse(&format!("{auth_endpoint}/proxy_get_role_secret"))?;
|
||||
|
||||
url.query_pairs_mut()
|
||||
.append_pair("project", cluster)
|
||||
.append_pair("role", user);
|
||||
|
||||
// TODO: use a proper logger
|
||||
println!("cplane request: {}", url);
|
||||
|
||||
let resp = reqwest::get(url).await?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(ConsoleAuthError::HttpStatus(resp.status()));
|
||||
}
|
||||
|
||||
let response: GetRoleSecretResponse = serde_json::from_str(resp.text().await?.as_str())?;
|
||||
|
||||
scram::ServerSecret::parse(response.role_secret.as_str())
|
||||
.map(AuthInfo::Scram)
|
||||
.ok_or(ConsoleAuthError::BadSecret)
|
||||
}
|
||||
|
||||
/// Wake up the compute node and return the corresponding connection info.
|
||||
async fn wake_compute(
|
||||
auth_endpoint: &str,
|
||||
cluster: &str,
|
||||
) -> Result<(String, u16), ConsoleAuthError> {
|
||||
let mut url = reqwest::Url::parse(&format!("{auth_endpoint}/proxy_wake_compute"))?;
|
||||
url.query_pairs_mut().append_pair("project", cluster);
|
||||
|
||||
// TODO: use a proper logger
|
||||
println!("cplane request: {}", url);
|
||||
|
||||
let resp = reqwest::get(url).await?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(ConsoleAuthError::HttpStatus(resp.status()));
|
||||
}
|
||||
|
||||
let response: GetWakeComputeResponse = serde_json::from_str(resp.text().await?.as_str())?;
|
||||
let (host, port) = response
|
||||
.address
|
||||
.split_once(':')
|
||||
.ok_or_else(|| ConsoleAuthError::MalformedComputeAddress(response.address.clone()))?;
|
||||
let port: u16 = port
|
||||
.parse()
|
||||
.map_err(|_| ConsoleAuthError::MalformedComputeAddress(response.address.clone()))?;
|
||||
|
||||
Ok((host.to_string(), port))
|
||||
}
|
||||
|
||||
pub async fn handle_user(
|
||||
auth_endpoint: &str,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
creds: &ClientCredentials,
|
||||
) -> Result<compute::NodeInfo, crate::auth::AuthError> {
|
||||
// Determine cluster name from SNI (creds.sni_data) or from creds.cluster_option.
|
||||
let cluster = match &creds.sni_data {
|
||||
//if sni_data exists, use it
|
||||
Some(sni_data) => {
|
||||
sni_data
|
||||
.split_once('.')
|
||||
.ok_or(ConsoleAuthError::SniWrong)?
|
||||
.0
|
||||
}
|
||||
//otherwise use cluster_option if it was manually set thought ..&options=cluster:<name> parameter
|
||||
None => creds
|
||||
.cluster_option
|
||||
.as_ref()
|
||||
.ok_or(ConsoleAuthError::SniMissingAndProjectNameMissing)?
|
||||
.as_str(),
|
||||
};
|
||||
|
||||
let user = creds.user.as_str();
|
||||
|
||||
// Step 1: get the auth secret
|
||||
let auth_info = get_auth_info(auth_endpoint, user, cluster).await?;
|
||||
|
||||
let flow = AuthFlow::new(client);
|
||||
let scram_keys = match auth_info {
|
||||
AuthInfo::Md5(_) => {
|
||||
// TODO: decide if we should support MD5 in api v2
|
||||
return Err(crate::auth::AuthErrorImpl::auth_failed("MD5 is not supported").into());
|
||||
}
|
||||
AuthInfo::Scram(secret) => {
|
||||
let scram = auth::Scram(&secret);
|
||||
Some(compute::ScramKeys {
|
||||
client_key: flow.begin(scram).await?.authenticate().await?.as_bytes(),
|
||||
server_key: secret.server_key.as_bytes(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
client
|
||||
.write_message_noflush(&Be::AuthenticationOk)?
|
||||
.write_message_noflush(&BeParameterStatusMessage::encoding())?;
|
||||
|
||||
// Step 2: wake compute
|
||||
let (host, port) = wake_compute(auth_endpoint, cluster).await?;
|
||||
|
||||
Ok(compute::NodeInfo {
|
||||
db_info: DatabaseInfo {
|
||||
host,
|
||||
port,
|
||||
dbname: creds.dbname.clone(),
|
||||
user: creds.user.clone(),
|
||||
password: None,
|
||||
},
|
||||
scram_keys,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn parse_db_info() -> anyhow::Result<()> {
|
||||
let _: DatabaseInfo = serde_json::from_value(json!({
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"dbname": "postgres",
|
||||
"user": "john_doe",
|
||||
"password": "password",
|
||||
}))?;
|
||||
|
||||
let _: DatabaseInfo = serde_json::from_value(json!({
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"dbname": "postgres",
|
||||
"user": "john_doe",
|
||||
}))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -144,15 +144,19 @@ async fn lease_keep_alive(mut client: Client, lease_id: i64) -> Result<()> {
|
||||
}
|
||||
|
||||
pub fn get_campaign_name(
|
||||
election_name: &str,
|
||||
broker_prefix: &str,
|
||||
id: ZTenantTimelineId,
|
||||
election_name: String,
|
||||
broker_prefix: String,
|
||||
timeline_id: &ZTenantTimelineId,
|
||||
) -> String {
|
||||
format!("{broker_prefix}/{id}/{election_name}")
|
||||
return format!(
|
||||
"{}/{}",
|
||||
SkTimelineSubscriptionKind::timeline(broker_prefix, *timeline_id).watch_key(),
|
||||
election_name
|
||||
);
|
||||
}
|
||||
|
||||
pub fn get_candiate_name(system_id: NodeId) -> String {
|
||||
format!("id_{system_id}")
|
||||
format!("id_{}", system_id)
|
||||
}
|
||||
|
||||
/// Push once in a while data about all active timelines to the broker.
|
||||
|
||||
305
safekeeper/src/callmemaybe.rs
Normal file
305
safekeeper/src/callmemaybe.rs
Normal file
@@ -0,0 +1,305 @@
|
||||
//!
|
||||
//! Callmemaybe module is responsible for periodically requesting
|
||||
//! pageserver to initiate wal streaming.
|
||||
//!
|
||||
//! Other threads can use CallmeEvent messages to subscribe or unsubscribe
|
||||
//! from the call list.
|
||||
//!
|
||||
use crate::SafeKeeperConf;
|
||||
use anyhow::{Context, Result};
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Mutex;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::runtime;
|
||||
use tokio::sync::mpsc::UnboundedReceiver;
|
||||
use tokio::task;
|
||||
use tokio_postgres::NoTls;
|
||||
use tracing::*;
|
||||
use utils::{
|
||||
connstring::connection_host_port,
|
||||
zid::{ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
async fn request_callback(
|
||||
pageserver_connstr: String,
|
||||
listen_pg_addr_str: String,
|
||||
timelineid: ZTimelineId,
|
||||
tenantid: ZTenantId,
|
||||
) -> Result<()> {
|
||||
info!(
|
||||
"callmemaybe request_callback Connecting to pageserver {}",
|
||||
&pageserver_connstr
|
||||
);
|
||||
let (client, connection) = tokio_postgres::connect(&pageserver_connstr, NoTls).await?;
|
||||
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
error!("connection error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
// use Config parsing because SockAddr parsing doesn't allow to use host names instead of ip addresses
|
||||
let me_connstr = format!("postgresql://no_user@{}/no_db", listen_pg_addr_str);
|
||||
let me_conf: postgres::config::Config = me_connstr.parse().unwrap();
|
||||
let (host, port) = connection_host_port(&me_conf);
|
||||
|
||||
// pageserver connstr is needed to be able to distinguish between different pageservers
|
||||
// it is required to correctly manage callmemaybe subscriptions when more than one pageserver is involved
|
||||
// TODO it is better to use some sort of a unique id instead of connection string, see https://github.com/zenithdb/zenith/issues/1105
|
||||
let callme = format!(
|
||||
"callmemaybe {} {} host={} port={} options='-c ztimelineid={} ztenantid={} pageserver_connstr={}'",
|
||||
tenantid, timelineid, host, port, timelineid, tenantid, pageserver_connstr,
|
||||
);
|
||||
|
||||
let _ = client.simple_query(&callme).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn thread_main(conf: SafeKeeperConf, rx: UnboundedReceiver<CallmeEvent>) -> Result<()> {
|
||||
let runtime = runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
runtime.block_on(main_loop(conf, rx))
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
|
||||
pub struct SubscriptionStateKey {
|
||||
tenant_id: ZTenantId,
|
||||
timeline_id: ZTimelineId,
|
||||
pageserver_connstr: String,
|
||||
}
|
||||
|
||||
impl SubscriptionStateKey {
|
||||
pub fn new(tenant_id: ZTenantId, timeline_id: ZTimelineId, pageserver_connstr: String) -> Self {
|
||||
Self {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
pageserver_connstr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Messages to the callmemaybe thread
|
||||
#[derive(Debug)]
|
||||
pub enum CallmeEvent {
|
||||
// add new subscription to the list
|
||||
Subscribe(SubscriptionStateKey),
|
||||
// remove the subscription from the list
|
||||
Unsubscribe(SubscriptionStateKey),
|
||||
// don't serve this subscription, but keep it in the list
|
||||
Pause(SubscriptionStateKey),
|
||||
// resume this subscription, if it exists,
|
||||
// but don't create a new one if it is gone
|
||||
Resume(SubscriptionStateKey),
|
||||
// TODO how do we delete from subscriptions?
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct SubscriptionState {
|
||||
tenantid: ZTenantId,
|
||||
timelineid: ZTimelineId,
|
||||
pageserver_connstr: String,
|
||||
handle: Option<task::JoinHandle<()>>,
|
||||
last_call_time: Instant,
|
||||
paused: bool,
|
||||
}
|
||||
|
||||
impl SubscriptionState {
|
||||
fn new(
|
||||
tenantid: ZTenantId,
|
||||
timelineid: ZTimelineId,
|
||||
pageserver_connstr: String,
|
||||
) -> SubscriptionState {
|
||||
SubscriptionState {
|
||||
tenantid,
|
||||
timelineid,
|
||||
pageserver_connstr,
|
||||
handle: None,
|
||||
last_call_time: Instant::now(),
|
||||
paused: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn pause(&mut self) {
|
||||
self.paused = true;
|
||||
self.abort_handle();
|
||||
}
|
||||
|
||||
fn resume(&mut self) {
|
||||
self.paused = false;
|
||||
}
|
||||
|
||||
// Most likely, the task have already successfully completed
|
||||
// and abort() won't have any effect.
|
||||
fn abort_handle(&mut self) {
|
||||
if let Some(handle) = self.handle.take() {
|
||||
handle.abort();
|
||||
|
||||
let timelineid = self.timelineid;
|
||||
let tenantid = self.tenantid;
|
||||
let pageserver_connstr = self.pageserver_connstr.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(err) = handle.await {
|
||||
if err.is_cancelled() {
|
||||
warn!("callback task for timelineid={} tenantid={} was cancelled before spawning a new one",
|
||||
timelineid, tenantid);
|
||||
} else {
|
||||
error!(
|
||||
"callback task for timelineid={} tenantid={} pageserver_connstr={} failed: {}",
|
||||
timelineid, tenantid, pageserver_connstr, err
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn call(&mut self, recall_period: Duration, listen_pg_addr: String) {
|
||||
// Ignore call request if this subscription is paused
|
||||
if self.paused {
|
||||
debug!(
|
||||
"ignore call request for paused subscription \
|
||||
tenantid: {}, timelineid: {}",
|
||||
self.tenantid, self.timelineid
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if it too early to recall
|
||||
if self.handle.is_some() && self.last_call_time.elapsed() < recall_period {
|
||||
debug!(
|
||||
"too early to recall. self.last_call_time.elapsed: {:?}, recall_period: {:?} \
|
||||
tenantid: {}, timelineid: {}",
|
||||
self.last_call_time, recall_period, self.tenantid, self.timelineid
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// If previous task didn't complete in recall_period, it must be hanging,
|
||||
// so don't wait for it forever, just abort it and try again.
|
||||
self.abort_handle();
|
||||
|
||||
let timelineid = self.timelineid;
|
||||
let tenantid = self.tenantid;
|
||||
let pageserver_connstr = self.pageserver_connstr.clone();
|
||||
self.handle = Some(tokio::spawn(async move {
|
||||
request_callback(pageserver_connstr, listen_pg_addr, timelineid, tenantid)
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
error!(
|
||||
"callback task for timelineid={} tenantid={} failed: {}",
|
||||
timelineid, tenantid, e
|
||||
)
|
||||
});
|
||||
}));
|
||||
|
||||
// Update last_call_time
|
||||
self.last_call_time = Instant::now();
|
||||
info!(
|
||||
"new call spawned. last call time {:?} tenantid: {}, timelineid: {}",
|
||||
self.last_call_time, self.tenantid, self.timelineid
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for SubscriptionState {
|
||||
fn drop(&mut self) {
|
||||
self.abort_handle();
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn main_loop(conf: SafeKeeperConf, mut rx: UnboundedReceiver<CallmeEvent>) -> Result<()> {
|
||||
let subscriptions: Mutex<HashMap<SubscriptionStateKey, SubscriptionState>> =
|
||||
Mutex::new(HashMap::new());
|
||||
|
||||
let mut ticker = tokio::time::interval(conf.recall_period);
|
||||
loop {
|
||||
tokio::select! {
|
||||
request = rx.recv() =>
|
||||
{
|
||||
match request.context("done")?
|
||||
{
|
||||
CallmeEvent::Subscribe(key) =>
|
||||
{
|
||||
let _enter = info_span!("callmemaybe: subscribe", timelineid = %key.timeline_id, tenantid = %key.tenant_id, pageserver_connstr=%key.pageserver_connstr.clone()).entered();
|
||||
let mut subscriptions = subscriptions.lock().unwrap();
|
||||
// XXX this clone is ugly, is there a way to use the trick with Borrow trait with entry API?
|
||||
// when we switch to node id instead of the connection string key will be Copy and there will be no need to clone
|
||||
match subscriptions.entry(key.clone()) {
|
||||
Entry::Occupied(_) => {
|
||||
// Do nothing if subscription already exists
|
||||
// If it is paused it means that there is already established replication connection.
|
||||
// If it is not paused it will be polled with other subscriptions when timeout expires.
|
||||
// This can occur when replication channel is established before subscription is added.
|
||||
info!(
|
||||
"subscription already exists",
|
||||
);
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
let subscription = entry.insert(SubscriptionState::new(
|
||||
key.tenant_id,
|
||||
key.timeline_id,
|
||||
key.pageserver_connstr,
|
||||
));
|
||||
subscription.call(conf.recall_period, conf.listen_pg_addr.clone());
|
||||
}
|
||||
}
|
||||
},
|
||||
CallmeEvent::Unsubscribe(key) => {
|
||||
let _enter = debug_span!("callmemaybe: unsubscribe", timelineid = %key.timeline_id, tenantid = %key.tenant_id, pageserver_connstr=%key.pageserver_connstr.clone()).entered();
|
||||
debug!("unsubscribe");
|
||||
let mut subscriptions = subscriptions.lock().unwrap();
|
||||
subscriptions.remove(&key);
|
||||
|
||||
},
|
||||
CallmeEvent::Pause(key) => {
|
||||
let _enter = debug_span!("callmemaybe: pause", timelineid = %key.timeline_id, tenantid = %key.tenant_id, pageserver_connstr=%key.pageserver_connstr.clone()).entered();
|
||||
let mut subscriptions = subscriptions.lock().unwrap();
|
||||
// If pause received when no corresponding subscription exists it means that someone started replication
|
||||
// without using callmemaybe. So we create subscription and pause it.
|
||||
// In tenant relocation scenario subscribe call will be executed after pause when compute is restarted.
|
||||
// In that case there is no need to create new/unpause existing subscription.
|
||||
match subscriptions.entry(key.clone()) {
|
||||
Entry::Occupied(mut sub) => {
|
||||
debug!("pause existing");
|
||||
sub.get_mut().pause();
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
debug!("create paused");
|
||||
let subscription = entry.insert(SubscriptionState::new(
|
||||
key.tenant_id,
|
||||
key.timeline_id,
|
||||
key.pageserver_connstr,
|
||||
));
|
||||
subscription.pause();
|
||||
}
|
||||
}
|
||||
},
|
||||
CallmeEvent::Resume(key) => {
|
||||
debug!(
|
||||
"callmemaybe. thread_main. resume callback request for timelineid={} tenantid={} pageserver_connstr={}",
|
||||
key.timeline_id, key.tenant_id, key.pageserver_connstr,
|
||||
);
|
||||
let mut subscriptions = subscriptions.lock().unwrap();
|
||||
if let Some(sub) = subscriptions.get_mut(&key)
|
||||
{
|
||||
sub.resume();
|
||||
};
|
||||
},
|
||||
}
|
||||
},
|
||||
_ = ticker.tick() => {
|
||||
let _enter = debug_span!("callmemaybe: tick").entered();
|
||||
let mut subscriptions = subscriptions.lock().unwrap();
|
||||
|
||||
for (_, state) in subscriptions.iter_mut() {
|
||||
state.call(conf.recall_period, conf.listen_pg_addr.clone());
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -29,11 +29,12 @@ pub struct SafekeeperPostgresHandler {
|
||||
pub ztenantid: Option<ZTenantId>,
|
||||
pub ztimelineid: Option<ZTimelineId>,
|
||||
pub timeline: Option<Arc<Timeline>>,
|
||||
pageserver_connstr: Option<String>,
|
||||
}
|
||||
|
||||
/// Parsed Postgres command.
|
||||
enum SafekeeperPostgresCommand {
|
||||
StartWalPush,
|
||||
StartWalPush { pageserver_connstr: Option<String> },
|
||||
StartReplication { start_lsn: Lsn },
|
||||
IdentifySystem,
|
||||
JSONCtrl { cmd: AppendLogicalMessage },
|
||||
@@ -41,7 +42,11 @@ enum SafekeeperPostgresCommand {
|
||||
|
||||
fn parse_cmd(cmd: &str) -> Result<SafekeeperPostgresCommand> {
|
||||
if cmd.starts_with("START_WAL_PUSH") {
|
||||
Ok(SafekeeperPostgresCommand::StartWalPush)
|
||||
let re = Regex::new(r"START_WAL_PUSH(?: (.+))?").unwrap();
|
||||
|
||||
let caps = re.captures(cmd).unwrap();
|
||||
let pageserver_connstr = caps.get(1).map(|m| m.as_str().to_owned());
|
||||
Ok(SafekeeperPostgresCommand::StartWalPush { pageserver_connstr })
|
||||
} else if cmd.starts_with("START_REPLICATION") {
|
||||
let re =
|
||||
Regex::new(r"START_REPLICATION(?: PHYSICAL)? ([[:xdigit:]]+/[[:xdigit:]]+)").unwrap();
|
||||
@@ -81,6 +86,8 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
|
||||
self.appname = Some(app_name.clone());
|
||||
}
|
||||
|
||||
self.pageserver_connstr = params.get("pageserver_connstr").cloned();
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
bail!("Safekeeper received unexpected initial message: {:?}", sm);
|
||||
@@ -106,14 +113,14 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
|
||||
}
|
||||
|
||||
match cmd {
|
||||
SafekeeperPostgresCommand::StartWalPush => {
|
||||
ReceiveWalConn::new(pgb)
|
||||
SafekeeperPostgresCommand::StartWalPush { pageserver_connstr } => {
|
||||
ReceiveWalConn::new(pgb, pageserver_connstr)
|
||||
.run(self)
|
||||
.context("failed to run ReceiveWalConn")?;
|
||||
}
|
||||
SafekeeperPostgresCommand::StartReplication { start_lsn } => {
|
||||
ReplicationConn::new(pgb)
|
||||
.run(self, pgb, start_lsn)
|
||||
.run(self, pgb, start_lsn, self.pageserver_connstr.clone())
|
||||
.context("failed to run ReplicationConn")?;
|
||||
}
|
||||
SafekeeperPostgresCommand::IdentifySystem => {
|
||||
@@ -135,6 +142,7 @@ impl SafekeeperPostgresHandler {
|
||||
ztenantid: None,
|
||||
ztimelineid: None,
|
||||
timeline: None,
|
||||
pageserver_connstr: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -32,14 +32,22 @@ pub struct ReceiveWalConn<'pg> {
|
||||
pg_backend: &'pg mut PostgresBackend,
|
||||
/// The cached result of `pg_backend.socket().peer_addr()` (roughly)
|
||||
peer_addr: SocketAddr,
|
||||
/// Pageserver connection string forwarded from compute
|
||||
/// NOTE that it is allowed to operate without a pageserver.
|
||||
/// So if compute has no pageserver configured do not use it.
|
||||
pageserver_connstr: Option<String>,
|
||||
}
|
||||
|
||||
impl<'pg> ReceiveWalConn<'pg> {
|
||||
pub fn new(pg: &'pg mut PostgresBackend) -> ReceiveWalConn<'pg> {
|
||||
pub fn new(
|
||||
pg: &'pg mut PostgresBackend,
|
||||
pageserver_connstr: Option<String>,
|
||||
) -> ReceiveWalConn<'pg> {
|
||||
let peer_addr = *pg.get_peer_addr();
|
||||
ReceiveWalConn {
|
||||
pg_backend: pg,
|
||||
peer_addr,
|
||||
pageserver_connstr,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,7 +120,9 @@ impl<'pg> ReceiveWalConn<'pg> {
|
||||
// Register the connection and defer unregister. Do that only
|
||||
// after processing first message, as it sets wal_seg_size,
|
||||
// wanted by many.
|
||||
spg.timeline.get().on_compute_connect()?;
|
||||
spg.timeline
|
||||
.get()
|
||||
.on_compute_connect(self.pageserver_connstr.as_ref())?;
|
||||
_guard = Some(ComputeConnectionGuard {
|
||||
timeline: Arc::clone(spg.timeline.get()),
|
||||
});
|
||||
|
||||
@@ -162,8 +162,9 @@ impl ReplicationConn {
|
||||
spg: &mut SafekeeperPostgresHandler,
|
||||
pgb: &mut PostgresBackend,
|
||||
mut start_pos: Lsn,
|
||||
pageserver_connstr: Option<String>,
|
||||
) -> Result<()> {
|
||||
let _enter = info_span!("WAL sender", timeline = %spg.ztimelineid.unwrap()).entered();
|
||||
let _enter = info_span!("WAL sender", timeline = %spg.ztimelineid.unwrap(), pageserver_connstr = %pageserver_connstr.as_deref().unwrap_or_default()).entered();
|
||||
|
||||
// spawn the background thread which receives HotStandbyFeedback messages.
|
||||
let bg_timeline = Arc::clone(spg.timeline.get());
|
||||
|
||||
@@ -95,6 +95,7 @@ struct SharedState {
|
||||
/// when tli is inactive instead of having this flag.
|
||||
active: bool,
|
||||
num_computes: u32,
|
||||
pageserver_connstr: Option<String>,
|
||||
last_removed_segno: XLogSegNo,
|
||||
}
|
||||
|
||||
@@ -118,6 +119,7 @@ impl SharedState {
|
||||
wal_backup_active: false,
|
||||
active: false,
|
||||
num_computes: 0,
|
||||
pageserver_connstr: None,
|
||||
last_removed_segno: 0,
|
||||
})
|
||||
}
|
||||
@@ -137,6 +139,7 @@ impl SharedState {
|
||||
wal_backup_active: false,
|
||||
active: false,
|
||||
num_computes: 0,
|
||||
pageserver_connstr: None,
|
||||
last_removed_segno: 0,
|
||||
})
|
||||
}
|
||||
@@ -187,6 +190,35 @@ impl SharedState {
|
||||
self.wal_backup_active
|
||||
}
|
||||
|
||||
/// Activate timeline's walsender: start/change timeline information propagated into etcd for further pageserver connections.
|
||||
fn activate_walsender(
|
||||
&mut self,
|
||||
zttid: &ZTenantTimelineId,
|
||||
new_pageserver_connstr: Option<String>,
|
||||
) {
|
||||
if self.pageserver_connstr != new_pageserver_connstr {
|
||||
self.deactivate_walsender(zttid);
|
||||
|
||||
if new_pageserver_connstr.is_some() {
|
||||
info!(
|
||||
"timeline {} has activated its walsender with connstr {new_pageserver_connstr:?}",
|
||||
zttid.timeline_id,
|
||||
);
|
||||
}
|
||||
self.pageserver_connstr = new_pageserver_connstr;
|
||||
}
|
||||
}
|
||||
|
||||
/// Deactivate the timeline: stop sending the timeline data into etcd, so no pageserver can connect for WAL streaming.
|
||||
fn deactivate_walsender(&mut self, zttid: &ZTenantTimelineId) {
|
||||
if let Some(pageserver_connstr) = self.pageserver_connstr.take() {
|
||||
info!(
|
||||
"timeline {} had deactivated its wallsender with connstr {pageserver_connstr:?}",
|
||||
zttid.timeline_id,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_wal_seg_size(&self) -> usize {
|
||||
self.sk.state.server.wal_seg_size as usize
|
||||
}
|
||||
@@ -286,12 +318,17 @@ impl Timeline {
|
||||
/// Register compute connection, starting timeline-related activity if it is
|
||||
/// not running yet.
|
||||
/// Can fail only if channel to a static thread got closed, which is not normal at all.
|
||||
pub fn on_compute_connect(&self) -> Result<()> {
|
||||
pub fn on_compute_connect(&self, pageserver_connstr: Option<&String>) -> Result<()> {
|
||||
let is_wal_backup_action_pending: bool;
|
||||
{
|
||||
let mut shared_state = self.mutex.lock().unwrap();
|
||||
shared_state.num_computes += 1;
|
||||
is_wal_backup_action_pending = shared_state.update_status();
|
||||
// FIXME: currently we always adopt latest pageserver connstr, but we
|
||||
// should have kind of generations assigned by compute to distinguish
|
||||
// the latest one or even pass it through consensus to reliably deliver
|
||||
// to all safekeepers.
|
||||
shared_state.activate_walsender(&self.zttid, pageserver_connstr.cloned());
|
||||
}
|
||||
// Wake up wal backup launcher, if offloading not started yet.
|
||||
if is_wal_backup_action_pending {
|
||||
@@ -327,7 +364,7 @@ impl Timeline {
|
||||
(replica_state.remote_consistent_lsn != Lsn::MAX && // Lsn::MAX means that we don't know the latest LSN yet.
|
||||
replica_state.remote_consistent_lsn >= shared_state.sk.inmem.commit_lsn);
|
||||
if stop {
|
||||
shared_state.update_status();
|
||||
shared_state.deactivate_walsender(&self.zttid);
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
@@ -488,6 +525,7 @@ impl Timeline {
|
||||
)),
|
||||
peer_horizon_lsn: Some(shared_state.sk.inmem.peer_horizon_lsn),
|
||||
safekeeper_connstr: Some(conf.listen_pg_addr.clone()),
|
||||
pageserver_connstr: shared_state.pageserver_connstr.clone(),
|
||||
backup_lsn: Some(shared_state.sk.inmem.backup_lsn),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -99,9 +99,9 @@ async fn wal_backup_launcher_main_loop(
|
||||
|
||||
// TODO: decide who should offload in launcher itself by simply checking current state
|
||||
let election_name = broker::get_campaign_name(
|
||||
BACKUP_ELECTION_NAME,
|
||||
&conf.broker_etcd_prefix,
|
||||
zttid,
|
||||
BACKUP_ELECTION_NAME.to_string(),
|
||||
conf.broker_etcd_prefix.clone(),
|
||||
&zttid,
|
||||
);
|
||||
let my_candidate_name = broker::get_candiate_name(conf.my_id);
|
||||
let election = broker::Election::new(
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
## Neon test runner
|
||||
## Zenith test runner
|
||||
|
||||
This directory contains integration tests.
|
||||
|
||||
Prerequisites:
|
||||
- Correctly configured Python, see [`/docs/sourcetree.md`](/docs/sourcetree.md#using-python)
|
||||
- Neon and Postgres binaries
|
||||
- Zenith and Postgres binaries
|
||||
- See the root [README.md](/README.md) for build directions
|
||||
- Tests can be run from the git tree; or see the environment variables
|
||||
below to run from other directories.
|
||||
- The neon git repo, including the postgres submodule
|
||||
- The zenith git repo, including the postgres submodule
|
||||
(for some tests, e.g. `pg_regress`)
|
||||
- Some tests (involving storage nodes coordination) require etcd installed. Follow
|
||||
[`the guide`](https://etcd.io/docs/v3.5/install/) to obtain it.
|
||||
@@ -51,8 +51,8 @@ Useful environment variables:
|
||||
should go.
|
||||
`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
|
||||
`ZENITH_PAGESERVER_OVERRIDES`: add a `;`-separated set of configs that will be passed as
|
||||
`--pageserver-config-override=${value}` parameter values when neon_local cli is invoked
|
||||
`RUST_LOG`: logging configuration to pass into Neon CLI
|
||||
`--pageserver-config-override=${value}` parameter values when zenith cli is invoked
|
||||
`RUST_LOG`: logging configuration to pass into Zenith CLI
|
||||
|
||||
Let stdout, stderr and `INFO` log messages go to the terminal instead of capturing them:
|
||||
`./scripts/pytest -s --log-cli-level=INFO ...`
|
||||
@@ -65,32 +65,32 @@ Exit after the first test failure:
|
||||
|
||||
### Writing a test
|
||||
|
||||
Every test needs a Neon Environment, or NeonEnv to operate in. A Neon Environment
|
||||
Every test needs a Zenith Environment, or ZenithEnv to operate in. A Zenith Environment
|
||||
is like a little cloud-in-a-box, and consists of a Pageserver, 0-N Safekeepers, and
|
||||
compute Postgres nodes. The connections between them can be configured to use JWT
|
||||
authentication tokens, and some other configuration options can be tweaked too.
|
||||
|
||||
The easiest way to get access to a Neon Environment is by using the `neon_simple_env`
|
||||
The easiest way to get access to a Zenith Environment is by using the `zenith_simple_env`
|
||||
fixture. The 'simple' env may be shared across multiple tests, so don't shut down the nodes
|
||||
or make other destructive changes in that environment. Also don't assume that
|
||||
there are no tenants or branches or data in the cluster. For convenience, there is a
|
||||
branch called `empty`, though. The convention is to create a test-specific branch of
|
||||
that and load any test data there, instead of the 'main' branch.
|
||||
|
||||
For more complicated cases, you can build a custom Neon Environment, with the `neon_env`
|
||||
For more complicated cases, you can build a custom Zenith Environment, with the `zenith_env`
|
||||
fixture:
|
||||
|
||||
```python
|
||||
def test_foobar(neon_env_builder: NeonEnvBuilder):
|
||||
def test_foobar(zenith_env_builder: ZenithEnvBuilder):
|
||||
# Prescribe the environment.
|
||||
# We want to have 3 safekeeper nodes, and use JWT authentication in the
|
||||
# connections to the page server
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
neon_env_builder.set_pageserver_auth(True)
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
zenith_env_builder.set_pageserver_auth(True)
|
||||
|
||||
# Now create the environment. This initializes the repository, and starts
|
||||
# up the page server and the safekeepers
|
||||
env = neon_env_builder.init_start()
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
# Run the test
|
||||
...
|
||||
|
||||
@@ -3,18 +3,18 @@ from contextlib import closing
|
||||
import psycopg2.extras
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException
|
||||
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, ZenithPageserverApiException
|
||||
|
||||
|
||||
#
|
||||
# Create ancestor branches off the main branch.
|
||||
#
|
||||
def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
def test_ancestor_branch(zenith_env_builder: ZenithEnvBuilder):
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
# Override defaults, 1M gc_horizon and 4M checkpoint_distance.
|
||||
# Extend compaction_period and gc_period to disable background compaction and gc.
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
tenant, _ = env.zenith_cli.create_tenant(
|
||||
conf={
|
||||
'gc_period': '10 m',
|
||||
'gc_horizon': '1048576',
|
||||
@@ -24,7 +24,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
||||
'compaction_target_size': '4194304',
|
||||
})
|
||||
|
||||
env.pageserver.safe_psql("failpoints flush-frozen-before-sync=sleep(10000)")
|
||||
env.pageserver.safe_psql("failpoints flush-frozen=sleep(10000)")
|
||||
|
||||
pg_branch0 = env.postgres.create_start('main', tenant_id=tenant)
|
||||
branch0_cur = pg_branch0.connect().cursor()
|
||||
@@ -48,7 +48,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
||||
log.info(f'LSN after 100k rows: {lsn_100}')
|
||||
|
||||
# Create branch1.
|
||||
env.neon_cli.create_branch('branch1', 'main', tenant_id=tenant, ancestor_start_lsn=lsn_100)
|
||||
env.zenith_cli.create_branch('branch1', 'main', tenant_id=tenant, ancestor_start_lsn=lsn_100)
|
||||
pg_branch1 = env.postgres.create_start('branch1', tenant_id=tenant)
|
||||
log.info("postgres is running on 'branch1' branch")
|
||||
|
||||
@@ -72,7 +72,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
||||
log.info(f'LSN after 200k rows: {lsn_200}')
|
||||
|
||||
# Create branch2.
|
||||
env.neon_cli.create_branch('branch2', 'branch1', tenant_id=tenant, ancestor_start_lsn=lsn_200)
|
||||
env.zenith_cli.create_branch('branch2', 'branch1', tenant_id=tenant, ancestor_start_lsn=lsn_200)
|
||||
pg_branch2 = env.postgres.create_start('branch2', tenant_id=tenant)
|
||||
log.info("postgres is running on 'branch2' branch")
|
||||
branch2_cur = pg_branch2.connect().cursor()
|
||||
@@ -110,14 +110,15 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
||||
assert branch2_cur.fetchone() == (300000, )
|
||||
|
||||
|
||||
def test_ancestor_branch_detach(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
def test_ancestor_branch_detach(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
|
||||
parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_detach_parent", "empty")
|
||||
parent_timeline_id = env.zenith_cli.create_branch("test_ancestor_branch_detach_parent", "empty")
|
||||
|
||||
env.neon_cli.create_branch("test_ancestor_branch_detach_branch1",
|
||||
"test_ancestor_branch_detach_parent")
|
||||
env.zenith_cli.create_branch("test_ancestor_branch_detach_branch1",
|
||||
"test_ancestor_branch_detach_parent")
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
with pytest.raises(NeonPageserverApiException, match="Failed to detach inmem tenant timeline"):
|
||||
with pytest.raises(ZenithPageserverApiException,
|
||||
match="Failed to detach inmem tenant timeline"):
|
||||
ps_http.timeline_detach(env.initial_tenant, parent_timeline_id)
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
from contextlib import closing
|
||||
from typing import Iterator
|
||||
from uuid import UUID, uuid4
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder, ZenithPageserverApiException
|
||||
from requests.exceptions import HTTPError
|
||||
import pytest
|
||||
|
||||
|
||||
def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.pageserver_auth_enabled = True
|
||||
env = neon_env_builder.init_start()
|
||||
def test_pageserver_auth(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.pageserver_auth_enabled = True
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
ps = env.pageserver
|
||||
|
||||
@@ -25,8 +25,8 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
|
||||
ps.safe_psql("set FOO", password=tenant_token)
|
||||
ps.safe_psql("set FOO", password=management_token)
|
||||
|
||||
new_timeline_id = env.neon_cli.create_branch('test_pageserver_auth',
|
||||
tenant_id=env.initial_tenant)
|
||||
new_timeline_id = env.zenith_cli.create_branch('test_pageserver_auth',
|
||||
tenant_id=env.initial_tenant)
|
||||
|
||||
# tenant can create branches
|
||||
tenant_http_client.timeline_create(tenant_id=env.initial_tenant,
|
||||
@@ -36,7 +36,7 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
|
||||
ancestor_timeline_id=new_timeline_id)
|
||||
|
||||
# fail to create branch using token with different tenant_id
|
||||
with pytest.raises(NeonPageserverApiException,
|
||||
with pytest.raises(ZenithPageserverApiException,
|
||||
match='Forbidden: Tenant id mismatch. Permission denied'):
|
||||
invalid_tenant_http_client.timeline_create(tenant_id=env.initial_tenant,
|
||||
ancestor_timeline_id=new_timeline_id)
|
||||
@@ -46,21 +46,21 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# fail to create tenant using tenant token
|
||||
with pytest.raises(
|
||||
NeonPageserverApiException,
|
||||
ZenithPageserverApiException,
|
||||
match='Forbidden: Attempt to access management api with tenant scope. Permission denied'
|
||||
):
|
||||
tenant_http_client.tenant_create()
|
||||
|
||||
|
||||
@pytest.mark.parametrize('with_safekeepers', [False, True])
|
||||
def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
|
||||
neon_env_builder.pageserver_auth_enabled = True
|
||||
def test_compute_auth_to_pageserver(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool):
|
||||
zenith_env_builder.pageserver_auth_enabled = True
|
||||
if with_safekeepers:
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
branch = f'test_compute_auth_to_pageserver{with_safekeepers}'
|
||||
env.neon_cli.create_branch(branch)
|
||||
env.zenith_cli.create_branch(branch)
|
||||
pg = env.postgres.create_start(branch)
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
from contextlib import closing, contextmanager
|
||||
import psycopg2.extras
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import PgProtocol, NeonEnvBuilder
|
||||
from fixtures.zenith_fixtures import PgProtocol, ZenithEnvBuilder
|
||||
from fixtures.log_helper import log
|
||||
import os
|
||||
import time
|
||||
import asyncpg
|
||||
from fixtures.neon_fixtures import Postgres
|
||||
from fixtures.zenith_fixtures import Postgres
|
||||
import threading
|
||||
|
||||
pytest_plugins = ("fixtures.neon_fixtures")
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
@contextmanager
|
||||
@@ -26,7 +26,7 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
|
||||
log.info("checks started")
|
||||
|
||||
with pg_cur(pg) as cur:
|
||||
cur.execute("CREATE EXTENSION neon") # TODO move it to neon_fixtures?
|
||||
cur.execute("CREATE EXTENSION neon") # TODO move it to zenith_fixtures?
|
||||
|
||||
cur.execute("select pg_size_bytes(current_setting('max_replication_write_lag'))")
|
||||
res = cur.fetchone()
|
||||
@@ -93,10 +93,10 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
|
||||
|
||||
|
||||
@pytest.mark.skip("See https://github.com/neondatabase/neon/issues/1587")
|
||||
def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
def test_backpressure_received_lsn_lag(zenith_env_builder: ZenithEnvBuilder):
|
||||
env = zenith_env_builder.init_start()
|
||||
# Create a branch for us
|
||||
env.neon_cli.create_branch('test_backpressure')
|
||||
env.zenith_cli.create_branch('test_backpressure')
|
||||
|
||||
pg = env.postgres.create_start('test_backpressure',
|
||||
config_lines=['max_replication_write_lag=30MB'])
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import pytest
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
@@ -9,9 +9,9 @@ from fixtures.log_helper import log
|
||||
# Test error handling, if the 'basebackup' command fails in the middle
|
||||
# of building the tar archive.
|
||||
#
|
||||
def test_basebackup_error(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_basebackup_error", "empty")
|
||||
def test_basebackup_error(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch("test_basebackup_error", "empty")
|
||||
|
||||
# Introduce failpoint
|
||||
env.pageserver.safe_psql(f"failpoints basebackup-before-control-file=return")
|
||||
|
||||
@@ -5,26 +5,26 @@ import psycopg2.extras
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.utils import print_gc_result
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
|
||||
|
||||
#
|
||||
# Create a couple of branches off the main branch, at a historical point in time.
|
||||
#
|
||||
def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):
|
||||
|
||||
# Use safekeeper in this test to avoid a subtle race condition.
|
||||
# Without safekeeper, walreceiver reconnection can stuck
|
||||
# because of IO deadlock.
|
||||
#
|
||||
# See https://github.com/neondatabase/neon/issues/1068
|
||||
neon_env_builder.num_safekeepers = 1
|
||||
# See https://github.com/zenithdb/zenith/issues/1068
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
# Disable pitr, because here we want to test branch creation after GC
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = neon_env_builder.init_start()
|
||||
zenith_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
env.neon_cli.create_branch('test_branch_behind')
|
||||
env.zenith_cli.create_branch('test_branch_behind')
|
||||
pgmain = env.postgres.create_start('test_branch_behind')
|
||||
log.info("postgres is running on 'test_branch_behind' branch")
|
||||
|
||||
@@ -61,9 +61,9 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
log.info(f'LSN after 200100 rows: {lsn_b}')
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
env.neon_cli.create_branch('test_branch_behind_hundred',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn=lsn_a)
|
||||
env.zenith_cli.create_branch('test_branch_behind_hundred',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn=lsn_a)
|
||||
|
||||
# Insert many more rows. This generates enough WAL to fill a few segments.
|
||||
main_cur.execute('''
|
||||
@@ -78,9 +78,9 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
log.info(f'LSN after 400100 rows: {lsn_c}')
|
||||
|
||||
# Branch at the point where only 200100 rows were inserted
|
||||
env.neon_cli.create_branch('test_branch_behind_more',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn=lsn_b)
|
||||
env.zenith_cli.create_branch('test_branch_behind_more',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn=lsn_b)
|
||||
|
||||
pg_hundred = env.postgres.create_start('test_branch_behind_hundred')
|
||||
pg_more = env.postgres.create_start('test_branch_behind_more')
|
||||
@@ -104,9 +104,9 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
# Check bad lsn's for branching
|
||||
|
||||
# branch at segment boundary
|
||||
env.neon_cli.create_branch('test_branch_segment_boundary',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn="0/3000000")
|
||||
env.zenith_cli.create_branch('test_branch_segment_boundary',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn="0/3000000")
|
||||
pg = env.postgres.create_start('test_branch_segment_boundary')
|
||||
cur = pg.connect().cursor()
|
||||
cur.execute('SELECT 1')
|
||||
@@ -114,13 +114,13 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# branch at pre-initdb lsn
|
||||
with pytest.raises(Exception, match="invalid branch start lsn"):
|
||||
env.neon_cli.create_branch('test_branch_preinitdb', ancestor_start_lsn="0/42")
|
||||
env.zenith_cli.create_branch('test_branch_preinitdb', ancestor_start_lsn="0/42")
|
||||
|
||||
# branch at pre-ancestor lsn
|
||||
with pytest.raises(Exception, match="less than timeline ancestor lsn"):
|
||||
env.neon_cli.create_branch('test_branch_preinitdb',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn="0/42")
|
||||
env.zenith_cli.create_branch('test_branch_preinitdb',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn="0/42")
|
||||
|
||||
# check that we cannot create branch based on garbage collected data
|
||||
with closing(env.pageserver.connect()) as psconn:
|
||||
@@ -132,9 +132,9 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
with pytest.raises(Exception, match="invalid branch start lsn"):
|
||||
# this gced_lsn is pretty random, so if gc is disabled this woudln't fail
|
||||
env.neon_cli.create_branch('test_branch_create_fail',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn=gced_lsn)
|
||||
env.zenith_cli.create_branch('test_branch_create_fail',
|
||||
'test_branch_behind',
|
||||
ancestor_start_lsn=gced_lsn)
|
||||
|
||||
# check that after gc everything is still there
|
||||
hundred_cur.execute('SELECT count(*) FROM foo')
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
import pytest
|
||||
import concurrent.futures
|
||||
from contextlib import closing
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder, ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
import os
|
||||
|
||||
|
||||
# Test restarting page server, while safekeeper and compute node keep
|
||||
# running.
|
||||
def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
|
||||
def test_broken_timeline(zenith_env_builder: ZenithEnvBuilder):
|
||||
# One safekeeper is enough for this test.
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
tenant_timelines = []
|
||||
|
||||
for n in range(4):
|
||||
tenant_id_uuid, timeline_id_uuid = env.neon_cli.create_tenant()
|
||||
tenant_id_uuid, timeline_id_uuid = env.zenith_cli.create_tenant()
|
||||
tenant_id = tenant_id_uuid.hex
|
||||
timeline_id = timeline_id_uuid.hex
|
||||
|
||||
@@ -81,14 +81,14 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
|
||||
log.info(f'compute startup failed as expected: {err}')
|
||||
|
||||
|
||||
def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
def test_create_multiple_timelines_parallel(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
|
||||
tenant_id, _ = env.neon_cli.create_tenant()
|
||||
tenant_id, _ = env.zenith_cli.create_tenant()
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
|
||||
futures = [
|
||||
executor.submit(env.neon_cli.create_timeline,
|
||||
executor.submit(env.zenith_cli.create_timeline,
|
||||
f"test-create-multiple-timelines-{i}",
|
||||
tenant_id) for i in range(4)
|
||||
]
|
||||
@@ -96,20 +96,20 @@ def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
|
||||
future.result()
|
||||
|
||||
|
||||
def test_fix_broken_timelines_on_startup(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
def test_fix_broken_timelines_on_startup(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
|
||||
tenant_id, _ = env.neon_cli.create_tenant()
|
||||
tenant_id, _ = env.zenith_cli.create_tenant()
|
||||
|
||||
# Introduce failpoint when creating a new timeline
|
||||
env.pageserver.safe_psql(f"failpoints before-checkpoint-new-timeline=return")
|
||||
with pytest.raises(Exception, match="before-checkpoint-new-timeline"):
|
||||
_ = env.neon_cli.create_timeline("test_fix_broken_timelines", tenant_id)
|
||||
_ = env.zenith_cli.create_timeline("test_fix_broken_timelines", tenant_id)
|
||||
|
||||
# Restart the page server
|
||||
env.neon_cli.pageserver_stop(immediate=True)
|
||||
env.neon_cli.pageserver_start()
|
||||
env.zenith_cli.pageserver_stop(immediate=True)
|
||||
env.zenith_cli.pageserver_start()
|
||||
|
||||
# Check that the "broken" timeline is not loaded
|
||||
timelines = env.neon_cli.list_timelines(tenant_id)
|
||||
timelines = env.zenith_cli.list_timelines(tenant_id)
|
||||
assert len(timelines) == 1
|
||||
|
||||
@@ -3,16 +3,16 @@ import os
|
||||
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
#
|
||||
# Test compute node start after clog truncation
|
||||
#
|
||||
def test_clog_truncate(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch('test_clog_truncate', 'empty')
|
||||
def test_clog_truncate(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch('test_clog_truncate', 'empty')
|
||||
|
||||
# set aggressive autovacuum to make sure that truncation will happen
|
||||
config = [
|
||||
@@ -62,9 +62,9 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
|
||||
|
||||
# create new branch after clog truncation and start a compute node on it
|
||||
log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}')
|
||||
env.neon_cli.create_branch('test_clog_truncate_new',
|
||||
'test_clog_truncate',
|
||||
ancestor_start_lsn=lsn_after_truncation)
|
||||
env.zenith_cli.create_branch('test_clog_truncate_new',
|
||||
'test_clog_truncate',
|
||||
ancestor_start_lsn=lsn_after_truncation)
|
||||
pg2 = env.postgres.create_start('test_clog_truncate_new')
|
||||
log.info('postgres is running on test_clog_truncate_new branch')
|
||||
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
#
|
||||
# Test starting Postgres with custom options
|
||||
#
|
||||
def test_config(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_config", "empty")
|
||||
def test_config(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch("test_config", "empty")
|
||||
|
||||
# change config
|
||||
pg = env.postgres.create_start('test_config', config_lines=['log_min_messages=debug1'])
|
||||
|
||||
@@ -2,16 +2,16 @@ import os
|
||||
import pathlib
|
||||
|
||||
from contextlib import closing
|
||||
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
|
||||
from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
#
|
||||
# Test CREATE DATABASE when there have been relmapper changes
|
||||
#
|
||||
def test_createdb(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch('test_createdb', 'empty')
|
||||
def test_createdb(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch('test_createdb', 'empty')
|
||||
|
||||
pg = env.postgres.create_start('test_createdb')
|
||||
log.info("postgres is running on 'test_createdb' branch")
|
||||
@@ -27,7 +27,7 @@ def test_createdb(neon_simple_env: NeonEnv):
|
||||
lsn = cur.fetchone()[0]
|
||||
|
||||
# Create a branch
|
||||
env.neon_cli.create_branch('test_createdb2', 'test_createdb', ancestor_start_lsn=lsn)
|
||||
env.zenith_cli.create_branch('test_createdb2', 'test_createdb', ancestor_start_lsn=lsn)
|
||||
pg2 = env.postgres.create_start('test_createdb2')
|
||||
|
||||
# Test that you can connect to the new database on both branches
|
||||
@@ -40,16 +40,16 @@ def test_createdb(neon_simple_env: NeonEnv):
|
||||
('foodb', ))
|
||||
res = cur.fetchone()
|
||||
# check that dbsize equals sum of all relation sizes, excluding shared ones
|
||||
# This is how we define dbsize in neon for now
|
||||
# This is how we define dbsize in zenith for now
|
||||
assert res[0] == res[1]
|
||||
|
||||
|
||||
#
|
||||
# Test DROP DATABASE
|
||||
#
|
||||
def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch('test_dropdb', 'empty')
|
||||
def test_dropdb(zenith_simple_env: ZenithEnv, test_output_dir):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch('test_dropdb', 'empty')
|
||||
pg = env.postgres.create_start('test_dropdb')
|
||||
log.info("postgres is running on 'test_dropdb' branch")
|
||||
|
||||
@@ -73,14 +73,14 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
|
||||
lsn_after_drop = cur.fetchone()[0]
|
||||
|
||||
# Create two branches before and after database drop.
|
||||
env.neon_cli.create_branch('test_before_dropdb',
|
||||
'test_dropdb',
|
||||
ancestor_start_lsn=lsn_before_drop)
|
||||
env.zenith_cli.create_branch('test_before_dropdb',
|
||||
'test_dropdb',
|
||||
ancestor_start_lsn=lsn_before_drop)
|
||||
pg_before = env.postgres.create_start('test_before_dropdb')
|
||||
|
||||
env.neon_cli.create_branch('test_after_dropdb',
|
||||
'test_dropdb',
|
||||
ancestor_start_lsn=lsn_after_drop)
|
||||
env.zenith_cli.create_branch('test_after_dropdb',
|
||||
'test_dropdb',
|
||||
ancestor_start_lsn=lsn_after_drop)
|
||||
pg_after = env.postgres.create_start('test_after_dropdb')
|
||||
|
||||
# Test that database exists on the branch before drop
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
#
|
||||
# Test CREATE USER to check shared catalog restore
|
||||
#
|
||||
def test_createuser(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch('test_createuser', 'empty')
|
||||
def test_createuser(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch('test_createuser', 'empty')
|
||||
pg = env.postgres.create_start('test_createuser')
|
||||
log.info("postgres is running on 'test_createuser' branch")
|
||||
|
||||
@@ -24,7 +24,7 @@ def test_createuser(neon_simple_env: NeonEnv):
|
||||
lsn = cur.fetchone()[0]
|
||||
|
||||
# Create a branch
|
||||
env.neon_cli.create_branch('test_createuser2', 'test_createuser', ancestor_start_lsn=lsn)
|
||||
env.zenith_cli.create_branch('test_createuser2', 'test_createuser', ancestor_start_lsn=lsn)
|
||||
pg2 = env.postgres.create_start('test_createuser2')
|
||||
|
||||
# Test that you can connect to new branch as a new user
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import asyncio
|
||||
import random
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
|
||||
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres
|
||||
from fixtures.log_helper import log
|
||||
|
||||
# Test configuration
|
||||
@@ -27,7 +27,7 @@ async def update_table(pg: Postgres):
|
||||
|
||||
|
||||
# Perform aggressive GC with 0 horizon
|
||||
async def gc(env: NeonEnv, timeline: str):
|
||||
async def gc(env: ZenithEnv, timeline: str):
|
||||
psconn = await env.pageserver.connect_async()
|
||||
|
||||
while updates_performed < updates_to_perform:
|
||||
@@ -35,7 +35,7 @@ async def gc(env: NeonEnv, timeline: str):
|
||||
|
||||
|
||||
# At the same time, run UPDATEs and GC
|
||||
async def update_and_gc(env: NeonEnv, pg: Postgres, timeline: str):
|
||||
async def update_and_gc(env: ZenithEnv, pg: Postgres, timeline: str):
|
||||
workers = []
|
||||
for worker_id in range(num_connections):
|
||||
workers.append(asyncio.create_task(update_table(pg)))
|
||||
@@ -48,14 +48,14 @@ async def update_and_gc(env: NeonEnv, pg: Postgres, timeline: str):
|
||||
#
|
||||
# Aggressively force GC, while running queries.
|
||||
#
|
||||
# (repro for https://github.com/neondatabase/neon/issues/1047)
|
||||
# (repro for https://github.com/zenithdb/zenith/issues/1047)
|
||||
#
|
||||
def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
|
||||
def test_gc_aggressive(zenith_env_builder: ZenithEnvBuilder):
|
||||
|
||||
# Disable pitr, because here we want to test branch creation after GC
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_gc_aggressive", "main")
|
||||
zenith_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = zenith_env_builder.init_start()
|
||||
env.zenith_cli.create_branch("test_gc_aggressive", "main")
|
||||
pg = env.postgres.create_start('test_gc_aggressive')
|
||||
log.info('postgres is running on test_gc_aggressive branch')
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import math
|
||||
from uuid import UUID
|
||||
import psycopg2.extras
|
||||
import psycopg2.errors
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
|
||||
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres
|
||||
from fixtures.log_helper import log
|
||||
import time
|
||||
|
||||
@@ -12,11 +12,11 @@ import time
|
||||
#
|
||||
# Test pageserver get_lsn_by_timestamp API
|
||||
#
|
||||
def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 1
|
||||
env = neon_env_builder.init_start()
|
||||
def test_lsn_mapping(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
new_timeline_id = env.neon_cli.create_branch('test_lsn_mapping')
|
||||
new_timeline_id = env.zenith_cli.create_branch('test_lsn_mapping')
|
||||
pgmain = env.postgres.create_start("test_lsn_mapping")
|
||||
log.info("postgres is running on 'test_lsn_mapping' branch")
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
|
||||
from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
@@ -8,9 +8,9 @@ from fixtures.log_helper import log
|
||||
# it only checks next_multixact_id field in restored pg_control,
|
||||
# since we don't have functions to check multixact internals.
|
||||
#
|
||||
def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch('test_multixact', 'empty')
|
||||
def test_multixact(zenith_simple_env: ZenithEnv, test_output_dir):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch('test_multixact', 'empty')
|
||||
pg = env.postgres.create_start('test_multixact')
|
||||
|
||||
log.info("postgres is running on 'test_multixact' branch")
|
||||
@@ -60,7 +60,7 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
|
||||
assert int(next_multixact_id) > int(next_multixact_id_old)
|
||||
|
||||
# Branch at this point
|
||||
env.neon_cli.create_branch('test_multixact_new', 'test_multixact', ancestor_start_lsn=lsn)
|
||||
env.zenith_cli.create_branch('test_multixact_new', 'test_multixact', ancestor_start_lsn=lsn)
|
||||
pg_new = env.postgres.create_start('test_multixact_new')
|
||||
|
||||
log.info("postgres is running on 'test_multixact_new' branch")
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import time
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
|
||||
|
||||
# Test restarting page server, while safekeeper and compute node keep
|
||||
# running.
|
||||
def test_next_xid(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
def test_next_xid(zenith_env_builder: ZenithEnvBuilder):
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
pg = env.postgres.create_start('main')
|
||||
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
|
||||
import pytest
|
||||
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, ZenithPageserverHttpClient
|
||||
|
||||
|
||||
def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient):
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
def check_tenant(env: ZenithEnv, pageserver_http: ZenithPageserverHttpClient):
|
||||
tenant_id, timeline_id = env.zenith_cli.create_tenant()
|
||||
pg = env.postgres.create_start('main', tenant_id=tenant_id)
|
||||
# we rely upon autocommit after each statement
|
||||
res_1 = pg.safe_psql_many(queries=[
|
||||
@@ -27,8 +26,7 @@ def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient):
|
||||
pageserver_http.timeline_detach(tenant_id, timeline_id)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('num_timelines,num_safekeepers', [(3, 1)])
|
||||
def test_normal_work(neon_env_builder: NeonEnvBuilder, num_timelines: int, num_safekeepers: int):
|
||||
def test_normal_work(zenith_env_builder: ZenithEnvBuilder):
|
||||
"""
|
||||
Basic test:
|
||||
* create new tenant with a timeline
|
||||
@@ -42,9 +40,8 @@ def test_normal_work(neon_env_builder: NeonEnvBuilder, num_timelines: int, num_s
|
||||
Repeat check for several tenants/timelines.
|
||||
"""
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
neon_env_builder.num_safekeepers = num_safekeepers
|
||||
env = zenith_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
for _ in range(num_timelines):
|
||||
for _ in range(3):
|
||||
check_tenant(env, pageserver_http)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.utils import print_gc_result
|
||||
import psycopg2.extras
|
||||
@@ -14,11 +14,11 @@ import psycopg2.extras
|
||||
# just a hint that the page hasn't been modified since that LSN, and the page
|
||||
# server should return the latest page version regardless of the LSN.
|
||||
#
|
||||
def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
|
||||
def test_old_request_lsn(zenith_env_builder: ZenithEnvBuilder):
|
||||
# Disable pitr, because here we want to test branch creation after GC
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_old_request_lsn", "main")
|
||||
zenith_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
|
||||
env = zenith_env_builder.init_start()
|
||||
env.zenith_cli.create_branch("test_old_request_lsn", "main")
|
||||
pg = env.postgres.create_start('test_old_request_lsn')
|
||||
log.info('postgres is running on test_old_request_lsn branch')
|
||||
|
||||
|
||||
@@ -2,26 +2,26 @@ from typing import Optional
|
||||
from uuid import uuid4, UUID
|
||||
import pytest
|
||||
from fixtures.utils import lsn_from_hex
|
||||
from fixtures.neon_fixtures import (
|
||||
from fixtures.zenith_fixtures import (
|
||||
DEFAULT_BRANCH_NAME,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
NeonPageserverHttpClient,
|
||||
NeonPageserverApiException,
|
||||
ZenithEnv,
|
||||
ZenithEnvBuilder,
|
||||
ZenithPageserverHttpClient,
|
||||
ZenithPageserverApiException,
|
||||
wait_until,
|
||||
)
|
||||
|
||||
|
||||
# test that we cannot override node id
|
||||
def test_pageserver_init_node_id(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init()
|
||||
def test_pageserver_init_node_id(zenith_env_builder: ZenithEnvBuilder):
|
||||
env = zenith_env_builder.init()
|
||||
with pytest.raises(
|
||||
Exception,
|
||||
match="node id can only be set during pageserver init and cannot be overridden"):
|
||||
env.pageserver.start(overrides=['--pageserver-config-override=id=10'])
|
||||
|
||||
|
||||
def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID):
|
||||
def check_client(client: ZenithPageserverHttpClient, initial_tenant: UUID):
|
||||
client.check_status()
|
||||
|
||||
# check initial tenant is there
|
||||
@@ -57,11 +57,11 @@ def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID):
|
||||
assert local_timeline_details['timeline_state'] == 'Loaded'
|
||||
|
||||
|
||||
def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
def test_pageserver_http_get_wal_receiver_not_found(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
tenant_id, timeline_id = env.zenith_cli.create_tenant()
|
||||
|
||||
empty_response = client.wal_receiver_get(tenant_id, timeline_id)
|
||||
|
||||
@@ -70,11 +70,11 @@ def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
|
||||
assert empty_response.get('last_received_msg_ts') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
|
||||
|
||||
|
||||
def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
def test_pageserver_http_get_wal_receiver_success(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
tenant_id, timeline_id = env.zenith_cli.create_tenant()
|
||||
pg = env.postgres.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)
|
||||
|
||||
def expect_updated_msg_lsn(prev_msg_lsn: Optional[int]) -> int:
|
||||
@@ -107,15 +107,15 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
|
||||
wait_until(number_of_iterations=5, interval=1, func=lambda: expect_updated_msg_lsn(lsn))
|
||||
|
||||
|
||||
def test_pageserver_http_api_client(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
def test_pageserver_http_api_client(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
client = env.pageserver.http_client()
|
||||
check_client(client, env.initial_tenant)
|
||||
|
||||
|
||||
def test_pageserver_http_api_client_auth_enabled(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.pageserver_auth_enabled = True
|
||||
env = neon_env_builder.init_start()
|
||||
def test_pageserver_http_api_client_auth_enabled(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.pageserver_auth_enabled = True
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
management_token = env.auth_keys.generate_management_token()
|
||||
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
|
||||
|
||||
# Test safekeeper sync and pageserver catch up
|
||||
# while initial compute node is down and pageserver is lagging behind safekeepers.
|
||||
# Ensure that basebackup after restart of all components is correct
|
||||
# and new compute node contains all data.
|
||||
def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
def test_pageserver_catchup_while_compute_down(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_pageserver_catchup_while_compute_down')
|
||||
env.zenith_cli.create_branch('test_pageserver_catchup_while_compute_down')
|
||||
# Make shared_buffers large to ensure we won't query pageserver while it is down.
|
||||
pg = env.postgres.create_start('test_pageserver_catchup_while_compute_down',
|
||||
config_lines=['shared_buffers=512MB'])
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
# Test restarting page server, while safekeeper and compute node keep
|
||||
# running.
|
||||
def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
def test_pageserver_restart(zenith_env_builder: ZenithEnvBuilder):
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_pageserver_restart')
|
||||
env.zenith_cli.create_branch('test_pageserver_restart')
|
||||
pg = env.postgres.create_start('test_pageserver_restart')
|
||||
|
||||
pg_conn = pg.connect()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from io import BytesIO
|
||||
import asyncio
|
||||
from fixtures.neon_fixtures import NeonEnv, Postgres
|
||||
from fixtures.zenith_fixtures import ZenithEnv, Postgres
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
@@ -38,9 +38,9 @@ async def parallel_load_same_table(pg: Postgres, n_parallel: int):
|
||||
|
||||
|
||||
# Load data into one table with COPY TO from 5 parallel connections
|
||||
def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_parallel_copy", "empty")
|
||||
def test_parallel_copy(zenith_simple_env: ZenithEnv, n_parallel=5):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch("test_parallel_copy", "empty")
|
||||
pg = env.postgres.create_start('test_parallel_copy')
|
||||
log.info("postgres is running on 'test_parallel_copy' branch")
|
||||
|
||||
|
||||
@@ -5,20 +5,20 @@ import psycopg2.extras
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.utils import print_gc_result
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
|
||||
|
||||
#
|
||||
# Check pitr_interval GC behavior.
|
||||
# Insert some data, run GC and create a branch in the past.
|
||||
#
|
||||
def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
|
||||
def test_pitr_gc(zenith_env_builder: ZenithEnvBuilder):
|
||||
|
||||
neon_env_builder.num_safekeepers = 1
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
# Set pitr interval such that we need to keep the data
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '1 day', gc_horizon = 0}"
|
||||
zenith_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '1 day', gc_horizon = 0}"
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env = zenith_env_builder.init_start()
|
||||
pgmain = env.postgres.create_start('main')
|
||||
log.info("postgres is running on 'main' branch")
|
||||
|
||||
@@ -62,7 +62,7 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
# It must have been preserved by PITR setting
|
||||
env.neon_cli.create_branch('test_pitr_gc_hundred', 'main', ancestor_start_lsn=lsn_a)
|
||||
env.zenith_cli.create_branch('test_pitr_gc_hundred', 'main', ancestor_start_lsn=lsn_a)
|
||||
|
||||
pg_hundred = env.postgres.create_start('test_pitr_gc_hundred')
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
from psycopg2.errors import UndefinedTable
|
||||
from psycopg2.errors import IoError
|
||||
|
||||
pytest_plugins = ("fixtures.neon_fixtures")
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
extensions = ["pageinspect", "neon_test_utils", "pg_buffercache"]
|
||||
|
||||
@@ -14,9 +14,9 @@ extensions = ["pageinspect", "neon_test_utils", "pg_buffercache"]
|
||||
#
|
||||
# Validation of reading different page versions
|
||||
#
|
||||
def test_read_validation(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_read_validation", "empty")
|
||||
def test_read_validation(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch("test_read_validation", "empty")
|
||||
|
||||
pg = env.postgres.create_start("test_read_validation")
|
||||
log.info("postgres is running on 'test_read_validation' branch")
|
||||
@@ -125,9 +125,9 @@ def test_read_validation(neon_simple_env: NeonEnv):
|
||||
log.info("Caught an expected failure: {}".format(e))
|
||||
|
||||
|
||||
def test_read_validation_neg(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_read_validation_neg", "empty")
|
||||
def test_read_validation_neg(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch("test_read_validation_neg", "empty")
|
||||
|
||||
pg = env.postgres.create_start("test_read_validation_neg")
|
||||
log.info("postgres is running on 'test_read_validation_neg' branch")
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
|
||||
|
||||
#
|
||||
@@ -9,9 +9,9 @@ from fixtures.neon_fixtures import NeonEnv
|
||||
# This is very similar to the 'test_branch_behind' test, but instead of
|
||||
# creating branches, creates read-only nodes.
|
||||
#
|
||||
def test_readonly_node(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch('test_readonly_node', 'empty')
|
||||
def test_readonly_node(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch('test_readonly_node', 'empty')
|
||||
pgmain = env.postgres.create_start('test_readonly_node')
|
||||
log.info("postgres is running on 'test_readonly_node' branch")
|
||||
|
||||
|
||||
@@ -4,28 +4,28 @@ import psycopg2.extras
|
||||
import json
|
||||
from ast import Assert
|
||||
from contextlib import closing
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
#
|
||||
# Test pageserver recovery after crash
|
||||
#
|
||||
def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 1
|
||||
def test_pageserver_recovery(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
# Override default checkpointer settings to run it more often
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance = 1048576}"
|
||||
zenith_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance = 1048576}"
|
||||
|
||||
env = neon_env_builder.init()
|
||||
env = zenith_env_builder.init()
|
||||
|
||||
# Check if failpoints enables. Otherwise the test doesn't make sense
|
||||
f = env.neon_cli.pageserver_enabled_features()
|
||||
f = env.zenith_cli.pageserver_enabled_features()
|
||||
|
||||
assert "failpoints" in f["features"], "Build pageserver with --features=failpoints option to run this test"
|
||||
neon_env_builder.start()
|
||||
zenith_env_builder.start()
|
||||
|
||||
# Create a branch for us
|
||||
env.neon_cli.create_branch("test_pageserver_recovery", "main")
|
||||
env.zenith_cli.create_branch("test_pageserver_recovery", "main")
|
||||
|
||||
pg = env.postgres.create_start('test_pageserver_recovery')
|
||||
log.info("postgres is running on 'test_pageserver_recovery' branch")
|
||||
@@ -45,8 +45,7 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Configure failpoints
|
||||
pscur.execute(
|
||||
"failpoints flush-frozen-before-sync=sleep(2000);checkpoint-after-sync=exit"
|
||||
)
|
||||
"failpoints checkpoint-before-sync=sleep(2000);checkpoint-after-sync=exit")
|
||||
|
||||
# Do some updates until pageserver is crashed
|
||||
try:
|
||||
|
||||
@@ -6,7 +6,7 @@ from contextlib import closing
|
||||
from pathlib import Path
|
||||
import time
|
||||
from uuid import UUID
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.utils import lsn_from_hex, lsn_to_hex
|
||||
import pytest
|
||||
@@ -30,15 +30,12 @@ import pytest
|
||||
#
|
||||
# The tests are done for all types of remote storage pageserver supports.
|
||||
@pytest.mark.parametrize('storage_type', ['local_fs', 'mock_s3'])
|
||||
def test_remote_storage_backup_and_restore(neon_env_builder: NeonEnvBuilder, storage_type: str):
|
||||
# Use this test to check more realistic SK ids: some etcd key parsing bugs were related,
|
||||
# and this test needs SK to write data to pageserver, so it will be visible
|
||||
neon_env_builder.safekeepers_id_start = 12
|
||||
|
||||
def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder, storage_type: str):
|
||||
# zenith_env_builder.rust_log_override = 'debug'
|
||||
if storage_type == 'local_fs':
|
||||
neon_env_builder.enable_local_fs_remote_storage()
|
||||
zenith_env_builder.enable_local_fs_remote_storage()
|
||||
elif storage_type == 'mock_s3':
|
||||
neon_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
|
||||
zenith_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
|
||||
else:
|
||||
raise RuntimeError(f'Unknown storage type: {storage_type}')
|
||||
|
||||
@@ -46,7 +43,7 @@ def test_remote_storage_backup_and_restore(neon_env_builder: NeonEnvBuilder, sto
|
||||
data_secret = 'very secret secret'
|
||||
|
||||
##### First start, insert secret data and upload it to the remote storage
|
||||
env = neon_env_builder.init_start()
|
||||
env = zenith_env_builder.init_start()
|
||||
pg = env.postgres.create_start('main')
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import pytest
|
||||
|
||||
from contextlib import closing
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
@@ -9,13 +9,13 @@ from fixtures.log_helper import log
|
||||
# Test restarting and recreating a postgres instance
|
||||
#
|
||||
@pytest.mark.parametrize('with_safekeepers', [False, True])
|
||||
def test_restart_compute(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
|
||||
neon_env_builder.pageserver_auth_enabled = True
|
||||
def test_restart_compute(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool):
|
||||
zenith_env_builder.pageserver_auth_enabled = True
|
||||
if with_safekeepers:
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_restart_compute')
|
||||
env.zenith_cli.create_branch('test_restart_compute')
|
||||
pg = env.postgres.create_start('test_restart_compute')
|
||||
log.info("postgres is running on 'test_restart_compute' branch")
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
|
||||
from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
@@ -6,11 +6,11 @@ from fixtures.log_helper import log
|
||||
#
|
||||
# The pg_subxact SLRU is not preserved on restarts, and doesn't need to be
|
||||
# maintained in the pageserver, so subtransactions are not very exciting for
|
||||
# Neon. They are included in the commit record though and updated in the
|
||||
# Zenith. They are included in the commit record though and updated in the
|
||||
# CLOG.
|
||||
def test_subxacts(neon_simple_env: NeonEnv, test_output_dir):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_subxacts", "empty")
|
||||
def test_subxacts(zenith_simple_env: ZenithEnv, test_output_dir):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch("test_subxacts", "empty")
|
||||
pg = env.postgres.create_start('test_subxacts')
|
||||
|
||||
log.info("postgres is running on 'test_subxacts' branch")
|
||||
|
||||
@@ -3,25 +3,25 @@ from contextlib import closing
|
||||
import pytest
|
||||
import psycopg2.extras
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
def test_tenant_config(neon_env_builder: NeonEnvBuilder):
|
||||
def test_tenant_config(zenith_env_builder: ZenithEnvBuilder):
|
||||
# set some non-default global config
|
||||
neon_env_builder.pageserver_config_override = '''
|
||||
zenith_env_builder.pageserver_config_override = '''
|
||||
page_cache_size=444;
|
||||
wait_lsn_timeout='111 s';
|
||||
tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env = zenith_env_builder.init_start()
|
||||
"""Test per tenant configuration"""
|
||||
tenant, _ = env.neon_cli.create_tenant(conf={
|
||||
tenant, _ = env.zenith_cli.create_tenant(conf={
|
||||
'checkpoint_distance': '20000',
|
||||
'gc_period': '30sec',
|
||||
})
|
||||
|
||||
env.neon_cli.create_timeline(f'test_tenant_conf', tenant_id=tenant)
|
||||
env.zenith_cli.create_timeline(f'test_tenant_conf', tenant_id=tenant)
|
||||
pg = env.postgres.create_start(
|
||||
"test_tenant_conf",
|
||||
"main",
|
||||
@@ -66,11 +66,11 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
|
||||
}.items())
|
||||
|
||||
# update the config and ensure that it has changed
|
||||
env.neon_cli.config_tenant(tenant_id=tenant,
|
||||
conf={
|
||||
'checkpoint_distance': '15000',
|
||||
'gc_period': '80sec',
|
||||
})
|
||||
env.zenith_cli.config_tenant(tenant_id=tenant,
|
||||
conf={
|
||||
'checkpoint_distance': '15000',
|
||||
'gc_period': '80sec',
|
||||
})
|
||||
|
||||
with closing(env.pageserver.connect()) as psconn:
|
||||
with psconn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as pscur:
|
||||
|
||||
@@ -10,7 +10,7 @@ from typing import Optional
|
||||
import signal
|
||||
import pytest
|
||||
|
||||
from fixtures.neon_fixtures import PgProtocol, PortDistributor, Postgres, NeonEnvBuilder, Etcd, NeonPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, neon_binpath, pg_distrib_dir
|
||||
from fixtures.zenith_fixtures import PgProtocol, PortDistributor, Postgres, ZenithEnvBuilder, Etcd, ZenithPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, zenith_binpath, pg_distrib_dir
|
||||
from fixtures.utils import lsn_from_hex
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path,
|
||||
http_port: int,
|
||||
broker: Optional[Etcd]):
|
||||
"""
|
||||
cannot use NeonPageserver yet because it depends on neon cli
|
||||
cannot use ZenithPageserver yet because it depends on zenith cli
|
||||
which currently lacks support for multiple pageservers
|
||||
"""
|
||||
cmd = [
|
||||
@@ -106,21 +106,21 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
|
||||
"needs to replace callmemaybe call with better idea how to migrate timelines between pageservers"
|
||||
)
|
||||
@pytest.mark.parametrize('with_load', ['with_load', 'without_load'])
|
||||
def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
|
||||
def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
|
||||
port_distributor: PortDistributor,
|
||||
with_load: str):
|
||||
neon_env_builder.enable_local_fs_remote_storage()
|
||||
zenith_env_builder.enable_local_fs_remote_storage()
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
# create folder for remote storage mock
|
||||
remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage'
|
||||
|
||||
tenant, _ = env.neon_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
|
||||
tenant, _ = env.zenith_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
|
||||
log.info("tenant to relocate %s", tenant)
|
||||
|
||||
# attach does not download ancestor branches (should it?), just use root branch for now
|
||||
env.neon_cli.create_root_branch('test_tenant_relocation', tenant_id=tenant)
|
||||
env.zenith_cli.create_root_branch('test_tenant_relocation', tenant_id=tenant)
|
||||
|
||||
tenant_pg = env.postgres.create_start(branch_name='test_tenant_relocation',
|
||||
node_name='test_tenant_relocation',
|
||||
@@ -177,16 +177,16 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
|
||||
new_pageserver_pg_port = port_distributor.get_port()
|
||||
new_pageserver_http_port = port_distributor.get_port()
|
||||
log.info("new pageserver ports pg %s http %s", new_pageserver_pg_port, new_pageserver_http_port)
|
||||
pageserver_bin = pathlib.Path(neon_binpath) / 'pageserver'
|
||||
pageserver_bin = pathlib.Path(zenith_binpath) / 'pageserver'
|
||||
|
||||
new_pageserver_http = NeonPageserverHttpClient(port=new_pageserver_http_port, auth_token=None)
|
||||
new_pageserver_http = ZenithPageserverHttpClient(port=new_pageserver_http_port, auth_token=None)
|
||||
|
||||
with new_pageserver_helper(new_pageserver_dir,
|
||||
pageserver_bin,
|
||||
remote_storage_mock_path,
|
||||
new_pageserver_pg_port,
|
||||
new_pageserver_http_port,
|
||||
neon_env_builder.broker):
|
||||
zenith_env_builder.broker):
|
||||
|
||||
# call to attach timeline to new pageserver
|
||||
new_pageserver_http.timeline_attach(tenant, timeline)
|
||||
@@ -215,7 +215,7 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
|
||||
|
||||
tenant_pg.stop()
|
||||
|
||||
# rewrite neon cli config to use new pageserver for basebackup to start new compute
|
||||
# rewrite zenith cli config to use new pageserver for basebackup to start new compute
|
||||
cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
|
||||
cli_config_lines[-2] = f"listen_http_addr = 'localhost:{new_pageserver_http_port}'"
|
||||
cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'"
|
||||
@@ -258,7 +258,7 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
|
||||
|
||||
assert not os.path.exists(timeline_to_detach_local_path), f'After detach, local timeline dir {timeline_to_detach_local_path} should be removed'
|
||||
|
||||
# bring old pageserver back for clean shutdown via neon cli
|
||||
# bring old pageserver back for clean shutdown via zenith cli
|
||||
# new pageserver will be shut down by the context manager
|
||||
cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
|
||||
cli_config_lines[-2] = f"listen_http_addr = 'localhost:{env.pageserver.service_port.http}'"
|
||||
|
||||
@@ -3,26 +3,26 @@ from datetime import datetime
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import parse_metrics
|
||||
from fixtures.utils import lsn_to_hex
|
||||
|
||||
|
||||
@pytest.mark.parametrize('with_safekeepers', [False, True])
|
||||
def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
|
||||
def test_tenants_normal_work(zenith_env_builder: ZenithEnvBuilder, with_safekeepers: bool):
|
||||
if with_safekeepers:
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env = zenith_env_builder.init_start()
|
||||
"""Tests tenants with and without wal acceptors"""
|
||||
tenant_1, _ = env.neon_cli.create_tenant()
|
||||
tenant_2, _ = env.neon_cli.create_tenant()
|
||||
tenant_1, _ = env.zenith_cli.create_tenant()
|
||||
tenant_2, _ = env.zenith_cli.create_tenant()
|
||||
|
||||
env.neon_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
|
||||
tenant_id=tenant_1)
|
||||
env.neon_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
|
||||
tenant_id=tenant_2)
|
||||
env.zenith_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
|
||||
tenant_id=tenant_1)
|
||||
env.zenith_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
|
||||
tenant_id=tenant_2)
|
||||
|
||||
pg_tenant1 = env.postgres.create_start(
|
||||
f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
|
||||
@@ -44,15 +44,15 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers:
|
||||
assert cur.fetchone() == (5000050000, )
|
||||
|
||||
|
||||
def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
def test_metrics_normal_work(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
tenant_1, _ = env.neon_cli.create_tenant()
|
||||
tenant_2, _ = env.neon_cli.create_tenant()
|
||||
env = zenith_env_builder.init_start()
|
||||
tenant_1, _ = env.zenith_cli.create_tenant()
|
||||
tenant_2, _ = env.zenith_cli.create_tenant()
|
||||
|
||||
timeline_1 = env.neon_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_1)
|
||||
timeline_2 = env.neon_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_2)
|
||||
timeline_1 = env.zenith_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_1)
|
||||
timeline_2 = env.zenith_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_2)
|
||||
|
||||
pg_tenant1 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_1)
|
||||
pg_tenant2 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_2)
|
||||
@@ -72,7 +72,7 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
|
||||
collected_metrics[f'safekeeper{sk.id}'] = sk.http_client().get_metrics_str()
|
||||
|
||||
for name in collected_metrics:
|
||||
basepath = os.path.join(neon_env_builder.repo_dir, f'{name}.metrics')
|
||||
basepath = os.path.join(zenith_env_builder.repo_dir, f'{name}.metrics')
|
||||
|
||||
with open(basepath, 'w') as stdout_f:
|
||||
print(collected_metrics[name], file=stdout_f, flush=True)
|
||||
|
||||
@@ -12,11 +12,11 @@ from uuid import UUID
|
||||
|
||||
import pytest
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres, wait_for_last_record_lsn, wait_for_upload
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder, ZenithEnv, Postgres, wait_for_last_record_lsn, wait_for_upload
|
||||
from fixtures.utils import lsn_from_hex
|
||||
|
||||
|
||||
async def tenant_workload(env: NeonEnv, pg: Postgres):
|
||||
async def tenant_workload(env: ZenithEnv, pg: Postgres):
|
||||
pageserver_conn = await env.pageserver.connect_async()
|
||||
|
||||
pg_conn = await pg.connect_async()
|
||||
@@ -35,7 +35,7 @@ async def tenant_workload(env: NeonEnv, pg: Postgres):
|
||||
assert res == i * 1000
|
||||
|
||||
|
||||
async def all_tenants_workload(env: NeonEnv, tenants_pgs):
|
||||
async def all_tenants_workload(env: ZenithEnv, tenants_pgs):
|
||||
workers = []
|
||||
for tenant, pg in tenants_pgs:
|
||||
worker = tenant_workload(env, pg)
|
||||
@@ -46,28 +46,28 @@ async def all_tenants_workload(env: NeonEnv, tenants_pgs):
|
||||
|
||||
|
||||
@pytest.mark.parametrize('storage_type', ['local_fs', 'mock_s3'])
|
||||
def test_tenants_many(neon_env_builder: NeonEnvBuilder, storage_type: str):
|
||||
def test_tenants_many(zenith_env_builder: ZenithEnvBuilder, storage_type: str):
|
||||
|
||||
if storage_type == 'local_fs':
|
||||
neon_env_builder.enable_local_fs_remote_storage()
|
||||
zenith_env_builder.enable_local_fs_remote_storage()
|
||||
elif storage_type == 'mock_s3':
|
||||
neon_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
|
||||
zenith_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
|
||||
else:
|
||||
raise RuntimeError(f'Unknown storage type: {storage_type}')
|
||||
|
||||
neon_env_builder.enable_local_fs_remote_storage()
|
||||
zenith_env_builder.enable_local_fs_remote_storage()
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
tenants_pgs = []
|
||||
|
||||
for i in range(1, 5):
|
||||
# Use a tiny checkpoint distance, to create a lot of layers quickly
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
tenant, _ = env.zenith_cli.create_tenant(
|
||||
conf={
|
||||
'checkpoint_distance': '5000000',
|
||||
})
|
||||
env.neon_cli.create_timeline(f'test_tenants_many', tenant_id=tenant)
|
||||
env.zenith_cli.create_timeline(f'test_tenants_many', tenant_id=tenant)
|
||||
|
||||
pg = env.postgres.create_start(
|
||||
f'test_tenants_many',
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
from contextlib import closing
|
||||
import psycopg2.extras
|
||||
import psycopg2.errors
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_local
|
||||
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres, assert_local
|
||||
from fixtures.log_helper import log
|
||||
import time
|
||||
|
||||
|
||||
def test_timeline_size(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
def test_timeline_size(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')
|
||||
new_timeline_id = env.zenith_cli.create_branch('test_timeline_size', 'empty')
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
timeline_details = assert_local(client, env.initial_tenant, new_timeline_id)
|
||||
@@ -69,9 +69,9 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60
|
||||
time.sleep(polling_interval)
|
||||
|
||||
|
||||
def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
new_timeline_id = env.neon_cli.create_branch('test_timeline_size_quota')
|
||||
def test_timeline_size_quota(zenith_env_builder: ZenithEnvBuilder):
|
||||
env = zenith_env_builder.init_start()
|
||||
new_timeline_id = env.zenith_cli.create_branch('test_timeline_size_quota')
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
res = assert_local(client, env.initial_tenant, new_timeline_id)
|
||||
@@ -86,7 +86,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
with closing(pgmain.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE EXTENSION neon") # TODO move it to neon_fixtures?
|
||||
cur.execute("CREATE EXTENSION neon") # TODO move it to zenith_fixtures?
|
||||
|
||||
cur.execute("CREATE TABLE foo (t text)")
|
||||
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
import os
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
#
|
||||
# Test branching, when a transaction is in prepared state
|
||||
#
|
||||
def test_twophase(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_twophase", "empty")
|
||||
def test_twophase(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch("test_twophase", "empty")
|
||||
pg = env.postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5'])
|
||||
log.info("postgres is running on 'test_twophase' branch")
|
||||
|
||||
@@ -55,7 +55,7 @@ def test_twophase(neon_simple_env: NeonEnv):
|
||||
assert len(twophase_files) == 2
|
||||
|
||||
# Create a branch with the transaction in prepared state
|
||||
env.neon_cli.create_branch("test_twophase_prepared", "test_twophase")
|
||||
env.zenith_cli.create_branch("test_twophase_prepared", "test_twophase")
|
||||
|
||||
# Start compute on the new branch
|
||||
pg2 = env.postgres.create_start(
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
@@ -6,10 +6,10 @@ from fixtures.log_helper import log
|
||||
# Test that the VM bit is cleared correctly at a HEAP_DELETE and
|
||||
# HEAP_UPDATE record.
|
||||
#
|
||||
def test_vm_bit_clear(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
def test_vm_bit_clear(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
|
||||
env.neon_cli.create_branch("test_vm_bit_clear", "empty")
|
||||
env.zenith_cli.create_branch("test_vm_bit_clear", "empty")
|
||||
pg = env.postgres.create_start('test_vm_bit_clear')
|
||||
|
||||
log.info("postgres is running on 'test_vm_bit_clear' branch")
|
||||
@@ -33,7 +33,7 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
|
||||
cur.execute('UPDATE vmtest_update SET id = 5000 WHERE id = 1')
|
||||
|
||||
# Branch at this point, to test that later
|
||||
env.neon_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear")
|
||||
env.zenith_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear")
|
||||
|
||||
# Clear the buffer cache, to force the VM page to be re-fetched from
|
||||
# the page server
|
||||
|
||||
@@ -12,7 +12,7 @@ from contextlib import closing
|
||||
from dataclasses import dataclass, field
|
||||
from multiprocessing import Process, Value
|
||||
from pathlib import Path
|
||||
from fixtures.neon_fixtures import PgBin, Etcd, Postgres, RemoteStorageUsers, Safekeeper, NeonEnv, NeonEnvBuilder, PortDistributor, SafekeeperPort, neon_binpath, PgProtocol
|
||||
from fixtures.zenith_fixtures import PgBin, Etcd, Postgres, RemoteStorageUsers, Safekeeper, ZenithEnv, ZenithEnvBuilder, PortDistributor, SafekeeperPort, zenith_binpath, PgProtocol
|
||||
from fixtures.utils import get_dir_size, lsn_to_hex, mkdir_if_needed, lsn_from_hex
|
||||
from fixtures.log_helper import log
|
||||
from typing import List, Optional, Any
|
||||
@@ -29,9 +29,9 @@ class TimelineMetrics:
|
||||
|
||||
# Run page server and multiple acceptors, and multiple compute nodes running
|
||||
# against different timelines.
|
||||
def test_many_timelines(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
def test_many_timelines(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
n_timelines = 3
|
||||
|
||||
@@ -39,15 +39,15 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
|
||||
"test_safekeepers_many_timelines_{}".format(tlin) for tlin in range(n_timelines)
|
||||
]
|
||||
# pageserver, safekeeper operate timelines via their ids (can be represented in hex as 'ad50847381e248feaac9876cc71ae418')
|
||||
# that's not really human readable, so the branch names are introduced in Neon CLI.
|
||||
# Neon CLI stores its branch <-> timeline mapping in its internals,
|
||||
# that's not really human readable, so the branch names are introduced in Zenith CLI.
|
||||
# Zenith CLI stores its branch <-> timeline mapping in its internals,
|
||||
# but we need this to collect metrics from other servers, related to the timeline.
|
||||
branch_names_to_timeline_ids = {}
|
||||
|
||||
# start postgres on each timeline
|
||||
pgs = []
|
||||
for branch_name in branch_names:
|
||||
new_timeline_id = env.neon_cli.create_branch(branch_name)
|
||||
new_timeline_id = env.zenith_cli.create_branch(branch_name)
|
||||
pgs.append(env.postgres.create_start(branch_name))
|
||||
branch_names_to_timeline_ids[branch_name] = new_timeline_id
|
||||
|
||||
@@ -93,14 +93,14 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
|
||||
# the compute node, which only happens after a consensus of safekeepers
|
||||
# has confirmed the transaction. We assume majority consensus here.
|
||||
assert (2 * sum(m.last_record_lsn <= lsn
|
||||
for lsn in m.flush_lsns) > neon_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
|
||||
for lsn in m.flush_lsns) > zenith_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
|
||||
assert (2 * sum(m.last_record_lsn <= lsn
|
||||
for lsn in m.commit_lsns) > neon_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
|
||||
for lsn in m.commit_lsns) > zenith_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
|
||||
timeline_metrics.append(m)
|
||||
log.info(f"{message}: {timeline_metrics}")
|
||||
return timeline_metrics
|
||||
|
||||
# TODO: https://github.com/neondatabase/neon/issues/809
|
||||
# TODO: https://github.com/zenithdb/zenith/issues/809
|
||||
# collect_metrics("before CREATE TABLE")
|
||||
|
||||
# Do everything in different loops to have actions on different timelines
|
||||
@@ -168,15 +168,15 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
|
||||
# Check that dead minority doesn't prevent the commits: execute insert n_inserts
|
||||
# times, with fault_probability chance of getting a wal acceptor down or up
|
||||
# along the way. 2 of 3 are always alive, so the work keeps going.
|
||||
def test_restarts(neon_env_builder: NeonEnvBuilder):
|
||||
def test_restarts(zenith_env_builder: ZenithEnvBuilder):
|
||||
fault_probability = 0.01
|
||||
n_inserts = 1000
|
||||
n_acceptors = 3
|
||||
|
||||
neon_env_builder.num_safekeepers = n_acceptors
|
||||
env = neon_env_builder.init_start()
|
||||
zenith_env_builder.num_safekeepers = n_acceptors
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_safekeepers_restarts')
|
||||
env.zenith_cli.create_branch('test_safekeepers_restarts')
|
||||
pg = env.postgres.create_start('test_safekeepers_restarts')
|
||||
|
||||
# we rely upon autocommit after each statement
|
||||
@@ -209,11 +209,11 @@ def delayed_safekeeper_start(wa):
|
||||
|
||||
|
||||
# When majority of acceptors is offline, commits are expected to be frozen
|
||||
def test_unavailability(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 2
|
||||
env = neon_env_builder.init_start()
|
||||
def test_unavailability(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 2
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_safekeepers_unavailability')
|
||||
env.zenith_cli.create_branch('test_safekeepers_unavailability')
|
||||
pg = env.postgres.create_start('test_safekeepers_unavailability')
|
||||
|
||||
# we rely upon autocommit after each statement
|
||||
@@ -279,12 +279,12 @@ def stop_value():
|
||||
|
||||
|
||||
# do inserts while concurrently getting up/down subsets of acceptors
|
||||
def test_race_conditions(neon_env_builder: NeonEnvBuilder, stop_value):
|
||||
def test_race_conditions(zenith_env_builder: ZenithEnvBuilder, stop_value):
|
||||
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_safekeepers_race_conditions')
|
||||
env.zenith_cli.create_branch('test_safekeepers_race_conditions')
|
||||
pg = env.postgres.create_start('test_safekeepers_race_conditions')
|
||||
|
||||
# we rely upon autocommit after each statement
|
||||
@@ -308,16 +308,16 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder, stop_value):
|
||||
|
||||
|
||||
# Test that safekeepers push their info to the broker and learn peer status from it
|
||||
def test_broker(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
neon_env_builder.enable_local_fs_remote_storage()
|
||||
env = neon_env_builder.init_start()
|
||||
def test_broker(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
zenith_env_builder.enable_local_fs_remote_storage()
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch("test_broker", "main")
|
||||
env.zenith_cli.create_branch("test_broker", "main")
|
||||
pg = env.postgres.create_start('test_broker')
|
||||
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
|
||||
|
||||
# learn neon timeline from compute
|
||||
# learn zenith timeline from compute
|
||||
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
|
||||
|
||||
@@ -349,13 +349,13 @@ def test_broker(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
|
||||
# Test that old WAL consumed by peers and pageserver is removed from safekeepers.
|
||||
def test_wal_removal(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 2
|
||||
def test_wal_removal(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 2
|
||||
# to advance remote_consistent_llsn
|
||||
neon_env_builder.enable_local_fs_remote_storage()
|
||||
env = neon_env_builder.init_start()
|
||||
zenith_env_builder.enable_local_fs_remote_storage()
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_safekeepers_wal_removal')
|
||||
env.zenith_cli.create_branch('test_safekeepers_wal_removal')
|
||||
pg = env.postgres.create_start('test_safekeepers_wal_removal')
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
@@ -412,22 +412,22 @@ def wait_segment_offload(tenant_id, timeline_id, live_sk, seg_end):
|
||||
|
||||
|
||||
@pytest.mark.parametrize('storage_type', ['mock_s3', 'local_fs'])
|
||||
def test_wal_backup(neon_env_builder: NeonEnvBuilder, storage_type: str):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
def test_wal_backup(zenith_env_builder: ZenithEnvBuilder, storage_type: str):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
if storage_type == 'local_fs':
|
||||
neon_env_builder.enable_local_fs_remote_storage()
|
||||
zenith_env_builder.enable_local_fs_remote_storage()
|
||||
elif storage_type == 'mock_s3':
|
||||
neon_env_builder.enable_s3_mock_remote_storage('test_safekeepers_wal_backup')
|
||||
zenith_env_builder.enable_s3_mock_remote_storage('test_safekeepers_wal_backup')
|
||||
else:
|
||||
raise RuntimeError(f'Unknown storage type: {storage_type}')
|
||||
neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER
|
||||
zenith_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_safekeepers_wal_backup')
|
||||
env.zenith_cli.create_branch('test_safekeepers_wal_backup')
|
||||
pg = env.postgres.create_start('test_safekeepers_wal_backup')
|
||||
|
||||
# learn neon timeline from compute
|
||||
# learn zenith timeline from compute
|
||||
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
|
||||
|
||||
@@ -460,7 +460,7 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, storage_type: str):
|
||||
|
||||
|
||||
class ProposerPostgres(PgProtocol):
|
||||
"""Object for running postgres without NeonEnv"""
|
||||
"""Object for running postgres without ZenithEnv"""
|
||||
def __init__(self,
|
||||
pgdata_dir: str,
|
||||
pg_bin,
|
||||
@@ -542,14 +542,14 @@ class ProposerPostgres(PgProtocol):
|
||||
|
||||
|
||||
# insert wal in all safekeepers and run sync on proposer
|
||||
def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder,
|
||||
def test_sync_safekeepers(zenith_env_builder: ZenithEnvBuilder,
|
||||
pg_bin: PgBin,
|
||||
port_distributor: PortDistributor):
|
||||
|
||||
# We don't really need the full environment for this test, just the
|
||||
# safekeepers would be enough.
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
timeline_id = uuid.uuid4()
|
||||
tenant_id = uuid.uuid4()
|
||||
@@ -596,17 +596,17 @@ def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder,
|
||||
assert all(lsn_after_sync == lsn for lsn in lsn_after_append)
|
||||
|
||||
|
||||
def test_timeline_status(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
def test_timeline_status(zenith_env_builder: ZenithEnvBuilder):
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_timeline_status')
|
||||
env.zenith_cli.create_branch('test_timeline_status')
|
||||
pg = env.postgres.create_start('test_timeline_status')
|
||||
|
||||
wa = env.safekeepers[0]
|
||||
wa_http_cli = wa.http_client()
|
||||
wa_http_cli.check_status()
|
||||
|
||||
# learn neon timeline from compute
|
||||
# learn zenith timeline from compute
|
||||
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
|
||||
|
||||
@@ -642,7 +642,7 @@ class SafekeeperEnv:
|
||||
peer_port=self.port_distributor.get_port())
|
||||
self.pg_bin = pg_bin
|
||||
self.num_safekeepers = num_safekeepers
|
||||
self.bin_safekeeper = os.path.join(str(neon_binpath), 'safekeeper')
|
||||
self.bin_safekeeper = os.path.join(str(zenith_binpath), 'safekeeper')
|
||||
self.safekeepers: Optional[List[subprocess.CompletedProcess[Any]]] = None
|
||||
self.postgres: Optional[ProposerPostgres] = None
|
||||
self.tenant_id: Optional[uuid.UUID] = None
|
||||
@@ -753,8 +753,8 @@ def test_safekeeper_without_pageserver(test_output_dir: str,
|
||||
assert res == 5050
|
||||
|
||||
|
||||
def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
|
||||
def safekeepers_guc(env: NeonEnv, sk_names: List[int]) -> str:
|
||||
def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
|
||||
def safekeepers_guc(env: ZenithEnv, sk_names: List[int]) -> str:
|
||||
return ','.join([f'localhost:{sk.port.pg}' for sk in env.safekeepers if sk.id in sk_names])
|
||||
|
||||
def execute_payload(pg: Postgres):
|
||||
@@ -781,9 +781,9 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
|
||||
except Exception as e:
|
||||
log.info(f"Safekeeper {sk.id} status error: {e}")
|
||||
|
||||
neon_env_builder.num_safekeepers = 4
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch('test_replace_safekeeper')
|
||||
zenith_env_builder.num_safekeepers = 4
|
||||
env = zenith_env_builder.init_start()
|
||||
env.zenith_cli.create_branch('test_replace_safekeeper')
|
||||
|
||||
log.info("Use only first 3 safekeepers")
|
||||
env.safekeepers[3].stop()
|
||||
@@ -792,7 +792,7 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
|
||||
pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
|
||||
pg.start()
|
||||
|
||||
# learn neon timeline from compute
|
||||
# learn zenith timeline from compute
|
||||
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
|
||||
|
||||
@@ -844,7 +844,7 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
|
||||
# We have `wal_keep_size=0`, so postgres should trim WAL once it's broadcasted
|
||||
# to all safekeepers. This test checks that compute WAL can fit into small number
|
||||
# of WAL segments.
|
||||
def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder):
|
||||
def test_wal_deleted_after_broadcast(zenith_env_builder: ZenithEnvBuilder):
|
||||
# used to calculate delta in collect_stats
|
||||
last_lsn = .0
|
||||
|
||||
@@ -866,10 +866,10 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder):
|
||||
def generate_wal(cur):
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,300000), 'payload'")
|
||||
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_wal_deleted_after_broadcast')
|
||||
env.zenith_cli.create_branch('test_wal_deleted_after_broadcast')
|
||||
# Adjust checkpoint config to prevent keeping old WAL segments
|
||||
pg = env.postgres.create_start(
|
||||
'test_wal_deleted_after_broadcast',
|
||||
@@ -894,18 +894,18 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder):
|
||||
assert wal_size_after_checkpoint < 16 * 2.5
|
||||
|
||||
|
||||
def test_delete_force(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 1
|
||||
env = neon_env_builder.init_start()
|
||||
def test_delete_force(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
# Create two tenants: one will be deleted, other should be preserved.
|
||||
tenant_id = env.initial_tenant.hex
|
||||
timeline_id_1 = env.neon_cli.create_branch('br1').hex # Active, delete explicitly
|
||||
timeline_id_2 = env.neon_cli.create_branch('br2').hex # Inactive, delete explicitly
|
||||
timeline_id_3 = env.neon_cli.create_branch('br3').hex # Active, delete with the tenant
|
||||
timeline_id_4 = env.neon_cli.create_branch('br4').hex # Inactive, delete with the tenant
|
||||
timeline_id_1 = env.zenith_cli.create_branch('br1').hex # Active, delete explicitly
|
||||
timeline_id_2 = env.zenith_cli.create_branch('br2').hex # Inactive, delete explicitly
|
||||
timeline_id_3 = env.zenith_cli.create_branch('br3').hex # Active, delete with the tenant
|
||||
timeline_id_4 = env.zenith_cli.create_branch('br4').hex # Inactive, delete with the tenant
|
||||
|
||||
tenant_id_other_uuid, timeline_id_other_uuid = env.neon_cli.create_tenant()
|
||||
tenant_id_other_uuid, timeline_id_other_uuid = env.zenith_cli.create_tenant()
|
||||
tenant_id_other = tenant_id_other_uuid.hex
|
||||
timeline_id_other = timeline_id_other_uuid.hex
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import asyncpg
|
||||
import random
|
||||
import time
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper
|
||||
from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres, Safekeeper
|
||||
from fixtures.log_helper import getLogger
|
||||
from fixtures.utils import lsn_from_hex, lsn_to_hex
|
||||
from typing import List
|
||||
@@ -136,7 +136,7 @@ async def wait_for_lsn(safekeeper: Safekeeper,
|
||||
# On each iteration 1 acceptor is stopped, and 2 others should allow
|
||||
# background workers execute transactions. In the end, state should remain
|
||||
# consistent.
|
||||
async def run_restarts_under_load(env: NeonEnv,
|
||||
async def run_restarts_under_load(env: ZenithEnv,
|
||||
pg: Postgres,
|
||||
acceptors: List[Safekeeper],
|
||||
n_workers=10,
|
||||
@@ -202,11 +202,11 @@ async def run_restarts_under_load(env: NeonEnv,
|
||||
|
||||
|
||||
# Restart acceptors one by one, while executing and validating bank transactions
|
||||
def test_restarts_under_load(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
def test_restarts_under_load(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_safekeepers_restarts_under_load')
|
||||
env.zenith_cli.create_branch('test_safekeepers_restarts_under_load')
|
||||
# Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
|
||||
pg = env.postgres.create_start('test_safekeepers_restarts_under_load',
|
||||
config_lines=['max_replication_write_lag=1MB'])
|
||||
@@ -217,11 +217,11 @@ def test_restarts_under_load(neon_env_builder: NeonEnvBuilder):
|
||||
# Restart acceptors one by one and test that everything is working as expected
|
||||
# when checkpoins are triggered frequently by max_wal_size=32MB. Because we have
|
||||
# wal_keep_size=0, there will be aggressive WAL segments recycling.
|
||||
def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
def test_restarts_frequent_checkpoints(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_restarts_frequent_checkpoints')
|
||||
env.zenith_cli.create_branch('test_restarts_frequent_checkpoints')
|
||||
# Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
|
||||
pg = env.postgres.create_start('test_restarts_frequent_checkpoints',
|
||||
config_lines=[
|
||||
|
||||
@@ -1,26 +1,26 @@
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from fixtures.neon_fixtures import (NeonEnvBuilder,
|
||||
VanillaPostgres,
|
||||
PortDistributor,
|
||||
PgBin,
|
||||
base_dir,
|
||||
vanilla_pg,
|
||||
pg_distrib_dir)
|
||||
from fixtures.zenith_fixtures import (ZenithEnvBuilder,
|
||||
VanillaPostgres,
|
||||
PortDistributor,
|
||||
PgBin,
|
||||
base_dir,
|
||||
vanilla_pg,
|
||||
pg_distrib_dir)
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
def test_wal_restore(neon_env_builder: NeonEnvBuilder,
|
||||
def test_wal_restore(zenith_env_builder: ZenithEnvBuilder,
|
||||
pg_bin: PgBin,
|
||||
test_output_dir,
|
||||
port_distributor: PortDistributor):
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_wal_restore")
|
||||
env = zenith_env_builder.init_start()
|
||||
env.zenith_cli.create_branch("test_wal_restore")
|
||||
pg = env.postgres.create_start('test_wal_restore')
|
||||
pg.safe_psql("create table t as select generate_series(1,300000)")
|
||||
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
|
||||
env.neon_cli.pageserver_stop()
|
||||
env.zenith_cli.pageserver_stop()
|
||||
port = port_distributor.get_port()
|
||||
data_dir = os.path.join(test_output_dir, 'pgsql.restored')
|
||||
with VanillaPostgres(data_dir, PgBin(test_output_dir), port) as restored:
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import uuid
|
||||
import requests
|
||||
|
||||
from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
|
||||
from fixtures.zenith_fixtures import DEFAULT_BRANCH_NAME, ZenithEnv, ZenithEnvBuilder, ZenithPageserverHttpClient
|
||||
from typing import cast
|
||||
|
||||
|
||||
def helper_compare_timeline_list(pageserver_http_client: NeonPageserverHttpClient,
|
||||
env: NeonEnv,
|
||||
def helper_compare_timeline_list(pageserver_http_client: ZenithPageserverHttpClient,
|
||||
env: ZenithEnv,
|
||||
initial_tenant: uuid.UUID):
|
||||
"""
|
||||
Compare timelines list returned by CLI and directly via API.
|
||||
@@ -17,65 +17,65 @@ def helper_compare_timeline_list(pageserver_http_client: NeonPageserverHttpClien
|
||||
map(lambda t: cast(str, t['timeline_id']),
|
||||
pageserver_http_client.timeline_list(initial_tenant)))
|
||||
|
||||
timelines_cli = env.neon_cli.list_timelines()
|
||||
assert timelines_cli == env.neon_cli.list_timelines(initial_tenant)
|
||||
timelines_cli = env.zenith_cli.list_timelines()
|
||||
assert timelines_cli == env.zenith_cli.list_timelines(initial_tenant)
|
||||
|
||||
cli_timeline_ids = sorted([timeline_id for (_, timeline_id) in timelines_cli])
|
||||
assert timelines_api == cli_timeline_ids
|
||||
|
||||
|
||||
def test_cli_timeline_list(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
def test_cli_timeline_list(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
pageserver_http_client = env.pageserver.http_client()
|
||||
|
||||
# Initial sanity check
|
||||
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
|
||||
|
||||
# Create a branch for us
|
||||
main_timeline_id = env.neon_cli.create_branch('test_cli_branch_list_main')
|
||||
main_timeline_id = env.zenith_cli.create_branch('test_cli_branch_list_main')
|
||||
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
|
||||
|
||||
# Create a nested branch
|
||||
nested_timeline_id = env.neon_cli.create_branch('test_cli_branch_list_nested',
|
||||
'test_cli_branch_list_main')
|
||||
nested_timeline_id = env.zenith_cli.create_branch('test_cli_branch_list_nested',
|
||||
'test_cli_branch_list_main')
|
||||
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
|
||||
|
||||
# Check that all new branches are visible via CLI
|
||||
timelines_cli = [timeline_id for (_, timeline_id) in env.neon_cli.list_timelines()]
|
||||
timelines_cli = [timeline_id for (_, timeline_id) in env.zenith_cli.list_timelines()]
|
||||
|
||||
assert main_timeline_id.hex in timelines_cli
|
||||
assert nested_timeline_id.hex in timelines_cli
|
||||
|
||||
|
||||
def helper_compare_tenant_list(pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv):
|
||||
def helper_compare_tenant_list(pageserver_http_client: ZenithPageserverHttpClient, env: ZenithEnv):
|
||||
tenants = pageserver_http_client.tenant_list()
|
||||
tenants_api = sorted(map(lambda t: cast(str, t['id']), tenants))
|
||||
|
||||
res = env.neon_cli.list_tenants()
|
||||
res = env.zenith_cli.list_tenants()
|
||||
tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
|
||||
|
||||
assert tenants_api == tenants_cli
|
||||
|
||||
|
||||
def test_cli_tenant_list(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
def test_cli_tenant_list(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
pageserver_http_client = env.pageserver.http_client()
|
||||
# Initial sanity check
|
||||
helper_compare_tenant_list(pageserver_http_client, env)
|
||||
|
||||
# Create new tenant
|
||||
tenant1, _ = env.neon_cli.create_tenant()
|
||||
tenant1, _ = env.zenith_cli.create_tenant()
|
||||
|
||||
# check tenant1 appeared
|
||||
helper_compare_tenant_list(pageserver_http_client, env)
|
||||
|
||||
# Create new tenant
|
||||
tenant2, _ = env.neon_cli.create_tenant()
|
||||
tenant2, _ = env.zenith_cli.create_tenant()
|
||||
|
||||
# check tenant2 appeared
|
||||
helper_compare_tenant_list(pageserver_http_client, env)
|
||||
|
||||
res = env.neon_cli.list_tenants()
|
||||
res = env.zenith_cli.list_tenants()
|
||||
tenants = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
|
||||
|
||||
assert env.initial_tenant.hex in tenants
|
||||
@@ -83,18 +83,18 @@ def test_cli_tenant_list(neon_simple_env: NeonEnv):
|
||||
assert tenant2.hex in tenants
|
||||
|
||||
|
||||
def test_cli_tenant_create(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
tenant_id, _ = env.neon_cli.create_tenant()
|
||||
timelines = env.neon_cli.list_timelines(tenant_id)
|
||||
def test_cli_tenant_create(zenith_simple_env: ZenithEnv):
|
||||
env = zenith_simple_env
|
||||
tenant_id, _ = env.zenith_cli.create_tenant()
|
||||
timelines = env.zenith_cli.list_timelines(tenant_id)
|
||||
|
||||
# an initial timeline should be created upon tenant creation
|
||||
assert len(timelines) == 1
|
||||
assert timelines[0][0] == DEFAULT_BRANCH_NAME
|
||||
|
||||
|
||||
def test_cli_ipv4_listeners(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
def test_cli_ipv4_listeners(zenith_env_builder: ZenithEnvBuilder):
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
# Connect to sk port on v4 loopback
|
||||
res = requests.get(f'http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status')
|
||||
@@ -108,17 +108,17 @@ def test_cli_ipv4_listeners(neon_env_builder: NeonEnvBuilder):
|
||||
# assert res.ok
|
||||
|
||||
|
||||
def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
def test_cli_start_stop(zenith_env_builder: ZenithEnvBuilder):
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
# Stop default ps/sk
|
||||
env.neon_cli.pageserver_stop()
|
||||
env.neon_cli.safekeeper_stop()
|
||||
env.zenith_cli.pageserver_stop()
|
||||
env.zenith_cli.safekeeper_stop()
|
||||
|
||||
# Default start
|
||||
res = env.neon_cli.raw_cli(["start"])
|
||||
res = env.zenith_cli.raw_cli(["start"])
|
||||
res.check_returncode()
|
||||
|
||||
# Default stop
|
||||
res = env.neon_cli.raw_cli(["stop"])
|
||||
res = env.zenith_cli.raw_cli(["stop"])
|
||||
res.check_returncode()
|
||||
@@ -1,16 +1,16 @@
|
||||
import os
|
||||
import pytest
|
||||
from fixtures.utils import mkdir_if_needed
|
||||
from fixtures.neon_fixtures import NeonEnv, base_dir, pg_distrib_dir
|
||||
from fixtures.zenith_fixtures import ZenithEnv, base_dir, pg_distrib_dir
|
||||
|
||||
|
||||
# The isolation tests run for a long time, especially in debug mode,
|
||||
# so use a larger-than-default timeout.
|
||||
@pytest.mark.timeout(1800)
|
||||
def test_isolation(neon_simple_env: NeonEnv, test_output_dir, pg_bin, capsys):
|
||||
env = neon_simple_env
|
||||
def test_isolation(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys):
|
||||
env = zenith_simple_env
|
||||
|
||||
env.neon_cli.create_branch("test_isolation", "empty")
|
||||
env.zenith_cli.create_branch("test_isolation", "empty")
|
||||
# Connect to postgres and create a database called "regression".
|
||||
# isolation tests use prepared transactions, so enable them
|
||||
pg = env.postgres.create_start('test_isolation', config_lines=['max_prepared_transactions=100'])
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
import os
|
||||
import pytest
|
||||
from fixtures.utils import mkdir_if_needed
|
||||
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content, base_dir, pg_distrib_dir
|
||||
from fixtures.zenith_fixtures import ZenithEnv, check_restored_datadir_content, base_dir, pg_distrib_dir
|
||||
|
||||
|
||||
# The pg_regress tests run for a long time, especially in debug mode,
|
||||
# so use a larger-than-default timeout.
|
||||
@pytest.mark.timeout(1800)
|
||||
def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: str, pg_bin, capsys):
|
||||
env = neon_simple_env
|
||||
def test_pg_regress(zenith_simple_env: ZenithEnv, test_output_dir: str, pg_bin, capsys):
|
||||
env = zenith_simple_env
|
||||
|
||||
env.neon_cli.create_branch("test_pg_regress", "empty")
|
||||
env.zenith_cli.create_branch("test_pg_regress", "empty")
|
||||
# Connect to postgres and create a database called "regression".
|
||||
pg = env.postgres.create_start('test_pg_regress')
|
||||
pg.safe_psql('CREATE DATABASE regression')
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
import os
|
||||
|
||||
from fixtures.utils import mkdir_if_needed
|
||||
from fixtures.neon_fixtures import (NeonEnv,
|
||||
check_restored_datadir_content,
|
||||
base_dir,
|
||||
pg_distrib_dir)
|
||||
from fixtures.zenith_fixtures import (ZenithEnv,
|
||||
check_restored_datadir_content,
|
||||
base_dir,
|
||||
pg_distrib_dir)
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir, pg_bin, capsys):
|
||||
env = neon_simple_env
|
||||
def test_zenith_regress(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys):
|
||||
env = zenith_simple_env
|
||||
|
||||
env.neon_cli.create_branch("test_neon_regress", "empty")
|
||||
env.zenith_cli.create_branch("test_zenith_regress", "empty")
|
||||
# Connect to postgres and create a database called "regression".
|
||||
pg = env.postgres.create_start('test_neon_regress')
|
||||
pg = env.postgres.create_start('test_zenith_regress')
|
||||
pg.safe_psql('CREATE DATABASE regression')
|
||||
|
||||
# Create some local directories for pg_regress to run in.
|
||||
@@ -22,9 +22,9 @@ def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir, pg_bin, capsys)
|
||||
mkdir_if_needed(os.path.join(runpath, 'testtablespace'))
|
||||
|
||||
# Compute all the file locations that pg_regress will need.
|
||||
# This test runs neon specific tests
|
||||
# This test runs zenith specific tests
|
||||
build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress')
|
||||
src_path = os.path.join(base_dir, 'test_runner/neon_regress')
|
||||
src_path = os.path.join(base_dir, 'test_runner/zenith_regress')
|
||||
bindir = os.path.join(pg_distrib_dir, 'bin')
|
||||
schedule = os.path.join(src_path, 'parallel_schedule')
|
||||
pg_regress = os.path.join(build_path, 'pg_regress')
|
||||
@@ -1,5 +1,5 @@
|
||||
pytest_plugins = (
|
||||
"fixtures.neon_fixtures",
|
||||
"fixtures.zenith_fixtures",
|
||||
"fixtures.benchmark_fixture",
|
||||
"fixtures.compare_fixtures",
|
||||
"fixtures.slow",
|
||||
|
||||
@@ -25,9 +25,9 @@ To use, declare the 'zenbenchmark' fixture in the test function. Run the
|
||||
bencmark, and then record the result by calling zenbenchmark.record. For example:
|
||||
|
||||
import timeit
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
|
||||
def test_mybench(neon_simple_env: env, zenbenchmark):
|
||||
def test_mybench(zenith_simple_env: env, zenbenchmark):
|
||||
|
||||
# Initialize the test
|
||||
...
|
||||
@@ -142,7 +142,7 @@ class MetricReport(str, enum.Enum): # str is a hack to make it json serializabl
|
||||
LOWER_IS_BETTER = 'lower_is_better'
|
||||
|
||||
|
||||
class NeonBenchmarker:
|
||||
class ZenithBenchmarker:
|
||||
"""
|
||||
An object for recording benchmark results. This is created for each test
|
||||
function by the zenbenchmark fixture
|
||||
@@ -163,7 +163,7 @@ class NeonBenchmarker:
|
||||
Record a benchmark result.
|
||||
"""
|
||||
# just to namespace the value
|
||||
name = f"neon_benchmarker_{metric_name}"
|
||||
name = f"zenith_benchmarker_{metric_name}"
|
||||
self.property_recorder(
|
||||
name,
|
||||
{
|
||||
@@ -289,12 +289,12 @@ class NeonBenchmarker:
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def zenbenchmark(record_property) -> Iterator[NeonBenchmarker]:
|
||||
def zenbenchmark(record_property) -> Iterator[ZenithBenchmarker]:
|
||||
"""
|
||||
This is a python decorator for benchmark fixtures. It contains functions for
|
||||
recording measurements, and prints them out at the end.
|
||||
"""
|
||||
benchmarker = NeonBenchmarker(record_property)
|
||||
benchmarker = ZenithBenchmarker(record_property)
|
||||
yield benchmarker
|
||||
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@ import pytest
|
||||
from contextlib import contextmanager
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from fixtures.neon_fixtures import PgBin, PgProtocol, VanillaPostgres, RemotePostgres, NeonEnv
|
||||
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
|
||||
from fixtures.zenith_fixtures import PgBin, PgProtocol, VanillaPostgres, RemotePostgres, ZenithEnv
|
||||
from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
|
||||
|
||||
# Type-related stuff
|
||||
from typing import Iterator
|
||||
@@ -12,7 +12,7 @@ from typing import Iterator
|
||||
class PgCompare(ABC):
|
||||
"""Common interface of all postgres implementations, useful for benchmarks.
|
||||
|
||||
This class is a helper class for the neon_with_baseline fixture. See its documentation
|
||||
This class is a helper class for the zenith_with_baseline fixture. See its documentation
|
||||
for more details.
|
||||
"""
|
||||
@property
|
||||
@@ -26,7 +26,7 @@ class PgCompare(ABC):
|
||||
pass
|
||||
|
||||
@property
|
||||
def zenbenchmark(self) -> NeonBenchmarker:
|
||||
def zenbenchmark(self) -> ZenithBenchmarker:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@@ -52,19 +52,19 @@ class PgCompare(ABC):
|
||||
pass
|
||||
|
||||
|
||||
class NeonCompare(PgCompare):
|
||||
"""PgCompare interface for the neon stack."""
|
||||
class ZenithCompare(PgCompare):
|
||||
"""PgCompare interface for the zenith stack."""
|
||||
def __init__(self,
|
||||
zenbenchmark: NeonBenchmarker,
|
||||
neon_simple_env: NeonEnv,
|
||||
zenbenchmark: ZenithBenchmarker,
|
||||
zenith_simple_env: ZenithEnv,
|
||||
pg_bin: PgBin,
|
||||
branch_name):
|
||||
self.env = neon_simple_env
|
||||
self.env = zenith_simple_env
|
||||
self._zenbenchmark = zenbenchmark
|
||||
self._pg_bin = pg_bin
|
||||
|
||||
# We only use one branch and one timeline
|
||||
self.env.neon_cli.create_branch(branch_name, 'empty')
|
||||
self.env.zenith_cli.create_branch(branch_name, 'empty')
|
||||
self._pg = self.env.postgres.create_start(branch_name)
|
||||
self.timeline = self.pg.safe_psql("SHOW neon.timeline_id")[0][0]
|
||||
|
||||
@@ -221,9 +221,9 @@ class RemoteCompare(PgCompare):
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def neon_compare(request, zenbenchmark, pg_bin, neon_simple_env) -> NeonCompare:
|
||||
def zenith_compare(request, zenbenchmark, pg_bin, zenith_simple_env) -> ZenithCompare:
|
||||
branch_name = request.node.name
|
||||
return NeonCompare(zenbenchmark, neon_simple_env, pg_bin, branch_name)
|
||||
return ZenithCompare(zenbenchmark, zenith_simple_env, pg_bin, branch_name)
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
@@ -236,13 +236,13 @@ def remote_compare(zenbenchmark, remote_pg) -> RemoteCompare:
|
||||
return RemoteCompare(zenbenchmark, remote_pg)
|
||||
|
||||
|
||||
@pytest.fixture(params=["vanilla_compare", "neon_compare"], ids=["vanilla", "neon"])
|
||||
def neon_with_baseline(request) -> PgCompare:
|
||||
"""Parameterized fixture that helps compare neon against vanilla postgres.
|
||||
@pytest.fixture(params=["vanilla_compare", "zenith_compare"], ids=["vanilla", "zenith"])
|
||||
def zenith_with_baseline(request) -> PgCompare:
|
||||
"""Parameterized fixture that helps compare zenith against vanilla postgres.
|
||||
|
||||
A test that uses this fixture turns into a parameterized test that runs against:
|
||||
1. A vanilla postgres instance
|
||||
2. A simple neon env (see neon_simple_env)
|
||||
2. A simple zenith env (see zenith_simple_env)
|
||||
3. Possibly other postgres protocol implementations.
|
||||
|
||||
The main goal of this fixture is to make it easier for people to read and write
|
||||
@@ -254,7 +254,7 @@ def neon_with_baseline(request) -> PgCompare:
|
||||
of that.
|
||||
|
||||
If a test requires some one-off special implementation-specific logic, use of
|
||||
isinstance(neon_with_baseline, NeonCompare) is encouraged. Though if that
|
||||
isinstance(zenith_with_baseline, ZenithCompare) is encouraged. Though if that
|
||||
implementation-specific logic is widely useful across multiple tests, it might
|
||||
make sense to add methods to the PgCompare class.
|
||||
"""
|
||||
|
||||
@@ -81,7 +81,7 @@ def pytest_addoption(parser):
|
||||
|
||||
# These are set in pytest_configure()
|
||||
base_dir = ""
|
||||
neon_binpath = ""
|
||||
zenith_binpath = ""
|
||||
pg_distrib_dir = ""
|
||||
top_output_dir = ""
|
||||
|
||||
@@ -100,7 +100,7 @@ def check_interferring_processes(config):
|
||||
# result of the test.
|
||||
# NOTE this shows as an internal pytest error, there might be a better way
|
||||
raise Exception(
|
||||
'Found interfering processes running. Stop all Neon pageservers, nodes, safekeepers, as well as stand-alone Postgres.'
|
||||
'Found interfering processes running. Stop all Zenith pageservers, nodes, safekeepers, as well as stand-alone Postgres.'
|
||||
)
|
||||
|
||||
|
||||
@@ -146,25 +146,25 @@ def pytest_configure(config):
|
||||
raise Exception('postgres not found at "{}"'.format(pg_distrib_dir))
|
||||
|
||||
if os.getenv("REMOTE_ENV"):
|
||||
# we are in remote env and do not have neon binaries locally
|
||||
# we are in remote env and do not have zenith binaries locally
|
||||
# this is the case for benchmarks run on self-hosted runner
|
||||
return
|
||||
# Find the neon binaries.
|
||||
global neon_binpath
|
||||
env_neon_bin = os.environ.get('ZENITH_BIN')
|
||||
if env_neon_bin:
|
||||
neon_binpath = env_neon_bin
|
||||
# Find the zenith binaries.
|
||||
global zenith_binpath
|
||||
env_zenith_bin = os.environ.get('ZENITH_BIN')
|
||||
if env_zenith_bin:
|
||||
zenith_binpath = env_zenith_bin
|
||||
else:
|
||||
neon_binpath = os.path.join(base_dir, 'target/debug')
|
||||
log.info(f'neon_binpath is {neon_binpath}')
|
||||
if not os.path.exists(os.path.join(neon_binpath, 'pageserver')):
|
||||
raise Exception('neon binaries not found at "{}"'.format(neon_binpath))
|
||||
zenith_binpath = os.path.join(base_dir, 'target/debug')
|
||||
log.info(f'zenith_binpath is {zenith_binpath}')
|
||||
if not os.path.exists(os.path.join(zenith_binpath, 'pageserver')):
|
||||
raise Exception('zenith binaries not found at "{}"'.format(zenith_binpath))
|
||||
|
||||
|
||||
def profiling_supported():
|
||||
"""Return True if the pageserver was compiled with the 'profiling' feature
|
||||
"""
|
||||
bin_pageserver = os.path.join(str(neon_binpath), 'pageserver')
|
||||
bin_pageserver = os.path.join(str(zenith_binpath), 'pageserver')
|
||||
res = subprocess.run([bin_pageserver, '--version'],
|
||||
check=True,
|
||||
universal_newlines=True,
|
||||
@@ -223,7 +223,7 @@ def can_bind(host: str, port: int) -> bool:
|
||||
# TODO: The pageserver and safekeepers don't use SO_REUSEADDR at the
|
||||
# moment. If that changes, we should use start using SO_REUSEADDR here
|
||||
# too, to allow reusing ports more quickly.
|
||||
# See https://github.com/neondatabase/neon/issues/801
|
||||
# See https://github.com/zenithdb/zenith/issues/801
|
||||
#sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
|
||||
try:
|
||||
@@ -479,30 +479,27 @@ class RemoteStorageUsers(Flag):
|
||||
SAFEKEEPER = auto()
|
||||
|
||||
|
||||
class NeonEnvBuilder:
|
||||
class ZenithEnvBuilder:
|
||||
"""
|
||||
Builder object to create a Neon runtime environment
|
||||
Builder object to create a Zenith runtime environment
|
||||
|
||||
You should use the `neon_env_builder` or `neon_simple_env` pytest
|
||||
fixture to create the NeonEnv object. That way, the repository is
|
||||
You should use the `zenith_env_builder` or `zenith_simple_env` pytest
|
||||
fixture to create the ZenithEnv object. That way, the repository is
|
||||
created in the right directory, based on the test name, and it's properly
|
||||
cleaned up after the test has finished.
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
repo_dir: Path,
|
||||
port_distributor: PortDistributor,
|
||||
broker: Etcd,
|
||||
mock_s3_server: MockS3Server,
|
||||
remote_storage: Optional[RemoteStorage] = None,
|
||||
remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
|
||||
pageserver_config_override: Optional[str] = None,
|
||||
num_safekeepers: int = 1,
|
||||
# Use non-standard SK ids to check for various parsing bugs
|
||||
safekeepers_id_start: int = 0,
|
||||
pageserver_auth_enabled: bool = False,
|
||||
rust_log_override: Optional[str] = None,
|
||||
default_branch_name=DEFAULT_BRANCH_NAME):
|
||||
def __init__(self,
|
||||
repo_dir: Path,
|
||||
port_distributor: PortDistributor,
|
||||
broker: Etcd,
|
||||
mock_s3_server: MockS3Server,
|
||||
remote_storage: Optional[RemoteStorage] = None,
|
||||
remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
|
||||
pageserver_config_override: Optional[str] = None,
|
||||
num_safekeepers: int = 1,
|
||||
pageserver_auth_enabled: bool = False,
|
||||
rust_log_override: Optional[str] = None,
|
||||
default_branch_name=DEFAULT_BRANCH_NAME):
|
||||
self.repo_dir = repo_dir
|
||||
self.rust_log_override = rust_log_override
|
||||
self.port_distributor = port_distributor
|
||||
@@ -512,21 +509,20 @@ class NeonEnvBuilder:
|
||||
self.mock_s3_server = mock_s3_server
|
||||
self.pageserver_config_override = pageserver_config_override
|
||||
self.num_safekeepers = num_safekeepers
|
||||
self.safekeepers_id_start = safekeepers_id_start
|
||||
self.pageserver_auth_enabled = pageserver_auth_enabled
|
||||
self.default_branch_name = default_branch_name
|
||||
self.env: Optional[NeonEnv] = None
|
||||
self.env: Optional[ZenithEnv] = None
|
||||
|
||||
def init(self) -> NeonEnv:
|
||||
def init(self) -> ZenithEnv:
|
||||
# Cannot create more than one environment from one builder
|
||||
assert self.env is None, "environment already initialized"
|
||||
self.env = NeonEnv(self)
|
||||
self.env = ZenithEnv(self)
|
||||
return self.env
|
||||
|
||||
def start(self):
|
||||
self.env.start()
|
||||
|
||||
def init_start(self) -> NeonEnv:
|
||||
def init_start(self) -> ZenithEnv:
|
||||
env = self.init()
|
||||
self.start()
|
||||
return env
|
||||
@@ -575,12 +571,12 @@ class NeonEnvBuilder:
|
||||
self.env.pageserver.stop(immediate=True)
|
||||
|
||||
|
||||
class NeonEnv:
|
||||
class ZenithEnv:
|
||||
"""
|
||||
An object representing the Neon runtime environment. It consists of
|
||||
An object representing the Zenith runtime environment. It consists of
|
||||
the page server, 0-N safekeepers, and the compute nodes.
|
||||
|
||||
NeonEnv contains functions for stopping/starting nodes in the
|
||||
ZenithEnv contains functions for stopping/starting nodes in the
|
||||
environment, checking their status, creating tenants, connecting to the
|
||||
nodes, creating and destroying compute nodes, etc. The page server and
|
||||
the safekeepers are considered fixed in the environment, you cannot
|
||||
@@ -588,7 +584,7 @@ class NeonEnv:
|
||||
likely change in the future, as we start supporting multiple page
|
||||
servers and adding/removing safekeepers on the fly).
|
||||
|
||||
Some notable functions and fields in NeonEnv:
|
||||
Some notable functions and fields in ZenithEnv:
|
||||
|
||||
postgres - A factory object for creating postgres compute nodes.
|
||||
|
||||
@@ -602,24 +598,24 @@ class NeonEnv:
|
||||
|
||||
initial_tenant - tenant ID of the initial tenant created in the repository
|
||||
|
||||
neon_cli - can be used to run the 'neon' CLI tool
|
||||
zenith_cli - can be used to run the 'zenith' CLI tool
|
||||
|
||||
create_tenant() - initializes a new tenant in the page server, returns
|
||||
the tenant id
|
||||
"""
|
||||
def __init__(self, config: NeonEnvBuilder):
|
||||
def __init__(self, config: ZenithEnvBuilder):
|
||||
self.repo_dir = config.repo_dir
|
||||
self.rust_log_override = config.rust_log_override
|
||||
self.port_distributor = config.port_distributor
|
||||
self.s3_mock_server = config.mock_s3_server
|
||||
self.neon_cli = NeonCli(env=self)
|
||||
self.zenith_cli = ZenithCli(env=self)
|
||||
self.postgres = PostgresFactory(self)
|
||||
self.safekeepers: List[Safekeeper] = []
|
||||
self.broker = config.broker
|
||||
self.remote_storage = config.remote_storage
|
||||
self.remote_storage_users = config.remote_storage_users
|
||||
|
||||
# generate initial tenant ID here instead of letting 'neon init' generate it,
|
||||
# generate initial tenant ID here instead of letting 'zenith init' generate it,
|
||||
# so that we don't need to dig it out of the config file afterwards.
|
||||
self.initial_tenant = uuid.uuid4()
|
||||
|
||||
@@ -649,10 +645,10 @@ class NeonEnv:
|
||||
auth_type = '{pageserver_auth_type}'
|
||||
""")
|
||||
|
||||
# Create a corresponding NeonPageserver object
|
||||
self.pageserver = NeonPageserver(self,
|
||||
port=pageserver_port,
|
||||
config_override=config.pageserver_config_override)
|
||||
# Create a corresponding ZenithPageserver object
|
||||
self.pageserver = ZenithPageserver(self,
|
||||
port=pageserver_port,
|
||||
config_override=config.pageserver_config_override)
|
||||
|
||||
# Create config and a Safekeeper object for each safekeeper
|
||||
for i in range(1, config.num_safekeepers + 1):
|
||||
@@ -660,7 +656,7 @@ class NeonEnv:
|
||||
pg=self.port_distributor.get_port(),
|
||||
http=self.port_distributor.get_port(),
|
||||
)
|
||||
id = config.safekeepers_id_start + i # assign ids sequentially
|
||||
id = i # assign ids sequentially
|
||||
toml += textwrap.dedent(f"""
|
||||
[[safekeepers]]
|
||||
id = {id}
|
||||
@@ -676,7 +672,7 @@ class NeonEnv:
|
||||
self.safekeepers.append(safekeeper)
|
||||
|
||||
log.info(f"Config: {toml}")
|
||||
self.neon_cli.init(toml)
|
||||
self.zenith_cli.init(toml)
|
||||
|
||||
def start(self):
|
||||
# Start up broker, pageserver and all safekeepers
|
||||
@@ -701,10 +697,10 @@ class NeonEnv:
|
||||
def _shared_simple_env(request: Any,
|
||||
port_distributor: PortDistributor,
|
||||
mock_s3_server: MockS3Server,
|
||||
default_broker: Etcd) -> Iterator[NeonEnv]:
|
||||
default_broker: Etcd) -> Iterator[ZenithEnv]:
|
||||
"""
|
||||
# Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES
|
||||
is set, this is shared by all tests using `neon_simple_env`.
|
||||
# Internal fixture backing the `zenith_simple_env` fixture. If TEST_SHARED_FIXTURES
|
||||
is set, this is shared by all tests using `zenith_simple_env`.
|
||||
"""
|
||||
|
||||
if os.environ.get('TEST_SHARED_FIXTURES') is None:
|
||||
@@ -715,23 +711,23 @@ def _shared_simple_env(request: Any,
|
||||
repo_dir = os.path.join(str(top_output_dir), "shared_repo")
|
||||
shutil.rmtree(repo_dir, ignore_errors=True)
|
||||
|
||||
with NeonEnvBuilder(Path(repo_dir), port_distributor, default_broker,
|
||||
mock_s3_server) as builder:
|
||||
with ZenithEnvBuilder(Path(repo_dir), port_distributor, default_broker,
|
||||
mock_s3_server) as builder:
|
||||
env = builder.init_start()
|
||||
|
||||
# For convenience in tests, create a branch from the freshly-initialized cluster.
|
||||
env.neon_cli.create_branch('empty', ancestor_branch_name=DEFAULT_BRANCH_NAME)
|
||||
env.zenith_cli.create_branch('empty', ancestor_branch_name=DEFAULT_BRANCH_NAME)
|
||||
|
||||
yield env
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:
|
||||
def zenith_simple_env(_shared_simple_env: ZenithEnv) -> Iterator[ZenithEnv]:
|
||||
"""
|
||||
Simple Neon environment, with no authentication and no safekeepers.
|
||||
Simple Zenith environment, with no authentication and no safekeepers.
|
||||
|
||||
If TEST_SHARED_FIXTURES environment variable is set, we reuse the same
|
||||
environment for all tests that use 'neon_simple_env', keeping the
|
||||
environment for all tests that use 'zenith_simple_env', keeping the
|
||||
page server and safekeepers running. Any compute nodes are stopped after
|
||||
each the test, however.
|
||||
"""
|
||||
@@ -741,17 +737,17 @@ def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def neon_env_builder(test_output_dir,
|
||||
port_distributor: PortDistributor,
|
||||
mock_s3_server: MockS3Server,
|
||||
default_broker: Etcd) -> Iterator[NeonEnvBuilder]:
|
||||
def zenith_env_builder(test_output_dir,
|
||||
port_distributor: PortDistributor,
|
||||
mock_s3_server: MockS3Server,
|
||||
default_broker: Etcd) -> Iterator[ZenithEnvBuilder]:
|
||||
"""
|
||||
Fixture to create a Neon environment for test.
|
||||
Fixture to create a Zenith environment for test.
|
||||
|
||||
To use, define 'neon_env_builder' fixture in your test to get access to the
|
||||
To use, define 'zenith_env_builder' fixture in your test to get access to the
|
||||
builder object. Set properties on it to describe the environment.
|
||||
Finally, initialize and start up the environment by calling
|
||||
neon_env_builder.init_start().
|
||||
zenith_env_builder.init_start().
|
||||
|
||||
After the initialization, you can launch compute nodes by calling
|
||||
the functions in the 'env.postgres' factory object, stop/start the
|
||||
@@ -762,16 +758,16 @@ def neon_env_builder(test_output_dir,
|
||||
repo_dir = os.path.join(test_output_dir, "repo")
|
||||
|
||||
# Return the builder to the caller
|
||||
with NeonEnvBuilder(Path(repo_dir), port_distributor, default_broker,
|
||||
mock_s3_server) as builder:
|
||||
with ZenithEnvBuilder(Path(repo_dir), port_distributor, default_broker,
|
||||
mock_s3_server) as builder:
|
||||
yield builder
|
||||
|
||||
|
||||
class NeonPageserverApiException(Exception):
|
||||
class ZenithPageserverApiException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class NeonPageserverHttpClient(requests.Session):
|
||||
class ZenithPageserverHttpClient(requests.Session):
|
||||
def __init__(self, port: int, auth_token: Optional[str] = None):
|
||||
super().__init__()
|
||||
self.port = port
|
||||
@@ -788,7 +784,7 @@ class NeonPageserverHttpClient(requests.Session):
|
||||
msg = res.json()['msg']
|
||||
except:
|
||||
msg = ''
|
||||
raise NeonPageserverApiException(msg) from e
|
||||
raise ZenithPageserverApiException(msg) from e
|
||||
|
||||
def check_status(self):
|
||||
self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
|
||||
@@ -895,12 +891,12 @@ TIMELINE_DATA_EXTRACTOR = re.compile(r"\s(?P<branch_name>[^\s]+)\s\[(?P<timeline
|
||||
re.MULTILINE)
|
||||
|
||||
|
||||
class NeonCli:
|
||||
class ZenithCli:
|
||||
"""
|
||||
A typed wrapper around the `neon` CLI tool.
|
||||
A typed wrapper around the `zenith` CLI tool.
|
||||
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
|
||||
"""
|
||||
def __init__(self, env: NeonEnv):
|
||||
def __init__(self, env: ZenithEnv):
|
||||
self.env = env
|
||||
pass
|
||||
|
||||
@@ -986,7 +982,7 @@ class NeonCli:
|
||||
created_timeline_id = matches.group('timeline_id')
|
||||
|
||||
if created_timeline_id is None:
|
||||
raise Exception('could not find timeline id after `neon timeline create` invocation')
|
||||
raise Exception('could not find timeline id after `zenith timeline create` invocation')
|
||||
else:
|
||||
return uuid.UUID(created_timeline_id)
|
||||
|
||||
@@ -1018,13 +1014,13 @@ class NeonCli:
|
||||
created_timeline_id = matches.group('timeline_id')
|
||||
|
||||
if created_timeline_id is None:
|
||||
raise Exception('could not find timeline id after `neon timeline create` invocation')
|
||||
raise Exception('could not find timeline id after `zenith timeline create` invocation')
|
||||
else:
|
||||
return uuid.UUID(created_timeline_id)
|
||||
|
||||
def list_timelines(self, tenant_id: Optional[uuid.UUID] = None) -> List[Tuple[str, str]]:
|
||||
"""
|
||||
Returns a list of (branch_name, timeline_id) tuples out of parsed `neon timeline list` CLI output.
|
||||
Returns a list of (branch_name, timeline_id) tuples out of parsed `zenith timeline list` CLI output.
|
||||
"""
|
||||
|
||||
# (L) main [b49f7954224a0ad25cc0013ea107b54b]
|
||||
@@ -1057,7 +1053,7 @@ class NeonCli:
|
||||
return res
|
||||
|
||||
def pageserver_enabled_features(self) -> Any:
|
||||
bin_pageserver = os.path.join(str(neon_binpath), 'pageserver')
|
||||
bin_pageserver = os.path.join(str(zenith_binpath), 'pageserver')
|
||||
args = [bin_pageserver, '--enabled-features']
|
||||
log.info('Running command "{}"'.format(' '.join(args)))
|
||||
|
||||
@@ -1097,7 +1093,7 @@ class NeonCli:
|
||||
immediate=False) -> 'subprocess.CompletedProcess[str]':
|
||||
args = ['safekeeper', 'stop']
|
||||
if id is not None:
|
||||
args.append(str(id))
|
||||
args.extend(str(id))
|
||||
if immediate:
|
||||
args.extend(['-m', 'immediate'])
|
||||
return self.raw_cli(args)
|
||||
@@ -1177,22 +1173,22 @@ class NeonCli:
|
||||
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||
check_return_code=True) -> 'subprocess.CompletedProcess[str]':
|
||||
"""
|
||||
Run "neon" with the specified arguments.
|
||||
Run "zenith" with the specified arguments.
|
||||
|
||||
Arguments must be in list form, e.g. ['pg', 'create']
|
||||
|
||||
Return both stdout and stderr, which can be accessed as
|
||||
|
||||
>>> result = env.neon_cli.raw_cli(...)
|
||||
>>> result = env.zenith_cli.raw_cli(...)
|
||||
>>> assert result.stderr == ""
|
||||
>>> log.info(result.stdout)
|
||||
"""
|
||||
|
||||
assert type(arguments) == list
|
||||
|
||||
bin_neon = os.path.join(str(neon_binpath), 'neon_local')
|
||||
bin_zenith = os.path.join(str(zenith_binpath), 'neon_local')
|
||||
|
||||
args = [bin_neon] + arguments
|
||||
args = [bin_zenith] + arguments
|
||||
log.info('Running command "{}"'.format(' '.join(args)))
|
||||
log.info(f'Running in "{self.env.repo_dir}"')
|
||||
|
||||
@@ -1235,20 +1231,20 @@ class NeonCli:
|
||||
return res
|
||||
|
||||
|
||||
class NeonPageserver(PgProtocol):
|
||||
class ZenithPageserver(PgProtocol):
|
||||
"""
|
||||
An object representing a running pageserver.
|
||||
|
||||
Initializes the repository via `neon init`.
|
||||
Initializes the repository via `zenith init`.
|
||||
"""
|
||||
def __init__(self, env: NeonEnv, port: PageserverPort, config_override: Optional[str] = None):
|
||||
def __init__(self, env: ZenithEnv, port: PageserverPort, config_override: Optional[str] = None):
|
||||
super().__init__(host='localhost', port=port.pg, user='cloud_admin')
|
||||
self.env = env
|
||||
self.running = False
|
||||
self.service_port = port
|
||||
self.config_override = config_override
|
||||
|
||||
def start(self, overrides=()) -> 'NeonPageserver':
|
||||
def start(self, overrides=()) -> 'ZenithPageserver':
|
||||
"""
|
||||
Start the page server.
|
||||
`overrides` allows to add some config to this pageserver start.
|
||||
@@ -1256,17 +1252,17 @@ class NeonPageserver(PgProtocol):
|
||||
"""
|
||||
assert self.running == False
|
||||
|
||||
self.env.neon_cli.pageserver_start(overrides=overrides)
|
||||
self.env.zenith_cli.pageserver_start(overrides=overrides)
|
||||
self.running = True
|
||||
return self
|
||||
|
||||
def stop(self, immediate=False) -> 'NeonPageserver':
|
||||
def stop(self, immediate=False) -> 'ZenithPageserver':
|
||||
"""
|
||||
Stop the page server.
|
||||
Returns self.
|
||||
"""
|
||||
if self.running:
|
||||
self.env.neon_cli.pageserver_stop(immediate)
|
||||
self.env.zenith_cli.pageserver_stop(immediate)
|
||||
self.running = False
|
||||
return self
|
||||
|
||||
@@ -1276,8 +1272,8 @@ class NeonPageserver(PgProtocol):
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
self.stop(True)
|
||||
|
||||
def http_client(self, auth_token: Optional[str] = None) -> NeonPageserverHttpClient:
|
||||
return NeonPageserverHttpClient(
|
||||
def http_client(self, auth_token: Optional[str] = None) -> ZenithPageserverHttpClient:
|
||||
return ZenithPageserverHttpClient(
|
||||
port=self.service_port.http,
|
||||
auth_token=auth_token,
|
||||
)
|
||||
@@ -1457,7 +1453,7 @@ def remote_pg(test_output_dir: str) -> Iterator[RemotePostgres]:
|
||||
yield remote_pg
|
||||
|
||||
|
||||
class NeonProxy(PgProtocol):
|
||||
class ZenithProxy(PgProtocol):
|
||||
def __init__(self, port: int):
|
||||
super().__init__(host="127.0.0.1",
|
||||
user="proxy_user",
|
||||
@@ -1473,7 +1469,7 @@ class NeonProxy(PgProtocol):
|
||||
assert self._popen is None
|
||||
|
||||
# Start proxy
|
||||
bin_proxy = os.path.join(str(neon_binpath), 'proxy')
|
||||
bin_proxy = os.path.join(str(zenith_binpath), 'proxy')
|
||||
args = [bin_proxy]
|
||||
args.extend(["--http", f"{self.host}:{self.http_port}"])
|
||||
args.extend(["--proxy", f"{self.host}:{self.port}"])
|
||||
@@ -1497,20 +1493,20 @@ class NeonProxy(PgProtocol):
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def static_proxy(vanilla_pg) -> Iterator[NeonProxy]:
|
||||
"""Neon proxy that routes directly to vanilla postgres."""
|
||||
def static_proxy(vanilla_pg) -> Iterator[ZenithProxy]:
|
||||
"""Zenith proxy that routes directly to vanilla postgres."""
|
||||
vanilla_pg.start()
|
||||
vanilla_pg.safe_psql("create user proxy_auth with password 'pytest1' superuser")
|
||||
vanilla_pg.safe_psql("create user proxy_user with password 'pytest2'")
|
||||
|
||||
with NeonProxy(4432) as proxy:
|
||||
with ZenithProxy(4432) as proxy:
|
||||
proxy.start_static()
|
||||
yield proxy
|
||||
|
||||
|
||||
class Postgres(PgProtocol):
|
||||
""" An object representing a running postgres daemon. """
|
||||
def __init__(self, env: NeonEnv, tenant_id: uuid.UUID, port: int):
|
||||
def __init__(self, env: ZenithEnv, tenant_id: uuid.UUID, port: int):
|
||||
super().__init__(host='localhost', port=port, user='cloud_admin', dbname='postgres')
|
||||
self.env = env
|
||||
self.running = False
|
||||
@@ -1536,11 +1532,11 @@ class Postgres(PgProtocol):
|
||||
config_lines = []
|
||||
|
||||
self.node_name = node_name or f'{branch_name}_pg_node'
|
||||
self.env.neon_cli.pg_create(branch_name,
|
||||
node_name=self.node_name,
|
||||
tenant_id=self.tenant_id,
|
||||
lsn=lsn,
|
||||
port=self.port)
|
||||
self.env.zenith_cli.pg_create(branch_name,
|
||||
node_name=self.node_name,
|
||||
tenant_id=self.tenant_id,
|
||||
lsn=lsn,
|
||||
port=self.port)
|
||||
path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id.hex / self.node_name
|
||||
self.pgdata_dir = os.path.join(self.env.repo_dir, path)
|
||||
|
||||
@@ -1564,9 +1560,9 @@ class Postgres(PgProtocol):
|
||||
|
||||
log.info(f"Starting postgres node {self.node_name}")
|
||||
|
||||
run_result = self.env.neon_cli.pg_start(self.node_name,
|
||||
tenant_id=self.tenant_id,
|
||||
port=self.port)
|
||||
run_result = self.env.zenith_cli.pg_start(self.node_name,
|
||||
tenant_id=self.tenant_id,
|
||||
port=self.port)
|
||||
self.running = True
|
||||
|
||||
log.info(f"stdout: {run_result.stdout}")
|
||||
@@ -1634,7 +1630,7 @@ class Postgres(PgProtocol):
|
||||
|
||||
if self.running:
|
||||
assert self.node_name is not None
|
||||
self.env.neon_cli.pg_stop(self.node_name, self.tenant_id)
|
||||
self.env.zenith_cli.pg_stop(self.node_name, self.tenant_id)
|
||||
self.running = False
|
||||
|
||||
return self
|
||||
@@ -1646,7 +1642,7 @@ class Postgres(PgProtocol):
|
||||
"""
|
||||
|
||||
assert self.node_name is not None
|
||||
self.env.neon_cli.pg_stop(self.node_name, self.tenant_id, True)
|
||||
self.env.zenith_cli.pg_stop(self.node_name, self.tenant_id, True)
|
||||
self.node_name = None
|
||||
self.running = False
|
||||
|
||||
@@ -1683,7 +1679,7 @@ class Postgres(PgProtocol):
|
||||
|
||||
class PostgresFactory:
|
||||
""" An object representing multiple running postgres daemons. """
|
||||
def __init__(self, env: NeonEnv):
|
||||
def __init__(self, env: ZenithEnv):
|
||||
self.env = env
|
||||
self.num_instances = 0
|
||||
self.instances: List[Postgres] = []
|
||||
@@ -1754,7 +1750,7 @@ class SafekeeperPort:
|
||||
@dataclass
|
||||
class Safekeeper:
|
||||
""" An object representing a running safekeeper daemon. """
|
||||
env: NeonEnv
|
||||
env: ZenithEnv
|
||||
port: SafekeeperPort
|
||||
id: int
|
||||
auth_token: Optional[str] = None
|
||||
@@ -1762,7 +1758,7 @@ class Safekeeper:
|
||||
|
||||
def start(self) -> 'Safekeeper':
|
||||
assert self.running == False
|
||||
self.env.neon_cli.safekeeper_start(self.id)
|
||||
self.env.zenith_cli.safekeeper_start(self.id)
|
||||
self.running = True
|
||||
# wait for wal acceptor start by checking its status
|
||||
started_at = time.time()
|
||||
@@ -1782,7 +1778,7 @@ class Safekeeper:
|
||||
|
||||
def stop(self, immediate=False) -> 'Safekeeper':
|
||||
log.info('Stopping safekeeper {}'.format(self.id))
|
||||
self.env.neon_cli.safekeeper_stop(self.id, immediate)
|
||||
self.env.zenith_cli.safekeeper_stop(self.id, immediate)
|
||||
self.running = False
|
||||
return self
|
||||
|
||||
@@ -1970,7 +1966,7 @@ def get_test_output_dir(request: Any) -> str:
|
||||
|
||||
# This is autouse, so the test output directory always gets created, even
|
||||
# if a test doesn't put anything there. It also solves a problem with the
|
||||
# neon_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it
|
||||
# zenith_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it
|
||||
# creates the repo in the test output directory. But it cannot depend on
|
||||
# 'test_output_dir' fixture, because when TEST_SHARED_FIXTURES is not set,
|
||||
# it has 'session' scope and cannot access fixtures with 'function'
|
||||
@@ -2048,7 +2044,7 @@ def list_files_to_compare(pgdata_dir: str):
|
||||
|
||||
|
||||
# pg is the existing and running compute node, that we want to compare with a basebackup
|
||||
def check_restored_datadir_content(test_output_dir: str, env: NeonEnv, pg: Postgres):
|
||||
def check_restored_datadir_content(test_output_dir: str, env: ZenithEnv, pg: Postgres):
|
||||
|
||||
# Get the timeline ID. We need it for the 'basebackup' command
|
||||
with closing(pg.connect()) as conn:
|
||||
@@ -2138,7 +2134,7 @@ def wait_until(number_of_iterations: int, interval: int, func):
|
||||
raise Exception("timed out while waiting for %s" % func) from last_exception
|
||||
|
||||
|
||||
def assert_local(pageserver_http_client: NeonPageserverHttpClient,
|
||||
def assert_local(pageserver_http_client: ZenithPageserverHttpClient,
|
||||
tenant: uuid.UUID,
|
||||
timeline: uuid.UUID):
|
||||
timeline_detail = pageserver_http_client.timeline_detail(tenant, timeline)
|
||||
@@ -2146,7 +2142,7 @@ def assert_local(pageserver_http_client: NeonPageserverHttpClient,
|
||||
return timeline_detail
|
||||
|
||||
|
||||
def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient,
|
||||
def remote_consistent_lsn(pageserver_http_client: ZenithPageserverHttpClient,
|
||||
tenant: uuid.UUID,
|
||||
timeline: uuid.UUID) -> int:
|
||||
detail = pageserver_http_client.timeline_detail(tenant, timeline)
|
||||
@@ -2162,7 +2158,7 @@ def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient,
|
||||
return lsn_from_hex(lsn_str)
|
||||
|
||||
|
||||
def wait_for_upload(pageserver_http_client: NeonPageserverHttpClient,
|
||||
def wait_for_upload(pageserver_http_client: ZenithPageserverHttpClient,
|
||||
tenant: uuid.UUID,
|
||||
timeline: uuid.UUID,
|
||||
lsn: int):
|
||||
@@ -2178,7 +2174,7 @@ def wait_for_upload(pageserver_http_client: NeonPageserverHttpClient,
|
||||
lsn_to_hex(lsn), lsn_to_hex(current_lsn)))
|
||||
|
||||
|
||||
def last_record_lsn(pageserver_http_client: NeonPageserverHttpClient,
|
||||
def last_record_lsn(pageserver_http_client: ZenithPageserverHttpClient,
|
||||
tenant: uuid.UUID,
|
||||
timeline: uuid.UUID) -> int:
|
||||
detail = pageserver_http_client.timeline_detail(tenant, timeline)
|
||||
@@ -2188,7 +2184,7 @@ def last_record_lsn(pageserver_http_client: NeonPageserverHttpClient,
|
||||
return lsn_from_hex(lsn_str)
|
||||
|
||||
|
||||
def wait_for_last_record_lsn(pageserver_http_client: NeonPageserverHttpClient,
|
||||
def wait_for_last_record_lsn(pageserver_http_client: ZenithPageserverHttpClient,
|
||||
tenant: uuid.UUID,
|
||||
timeline: uuid.UUID,
|
||||
lsn: int):
|
||||
@@ -1,8 +1,8 @@
|
||||
from contextlib import closing
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
|
||||
from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
|
||||
|
||||
|
||||
#
|
||||
@@ -15,8 +15,8 @@ from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
|
||||
# 3. Disk space used
|
||||
# 4. Peak memory usage
|
||||
#
|
||||
def test_bulk_insert(neon_with_baseline: PgCompare):
|
||||
env = neon_with_baseline
|
||||
def test_bulk_insert(zenith_with_baseline: PgCompare):
|
||||
env = zenith_with_baseline
|
||||
|
||||
with closing(env.pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
||||
@@ -2,7 +2,7 @@ import timeit
|
||||
from fixtures.benchmark_fixture import MetricReport
|
||||
import pytest
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
|
||||
# Run bulk tenant creation test.
|
||||
#
|
||||
@@ -14,21 +14,21 @@ from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
@pytest.mark.parametrize('tenants_count', [1, 5, 10])
|
||||
def test_bulk_tenant_create(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
zenith_env_builder: ZenithEnvBuilder,
|
||||
tenants_count: int,
|
||||
zenbenchmark,
|
||||
):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
time_slices = []
|
||||
|
||||
for i in range(tenants_count):
|
||||
start = timeit.default_timer()
|
||||
|
||||
tenant, _ = env.neon_cli.create_tenant()
|
||||
env.neon_cli.create_timeline(f'test_bulk_tenant_create_{tenants_count}_{i}',
|
||||
tenant_id=tenant)
|
||||
tenant, _ = env.zenith_cli.create_tenant()
|
||||
env.zenith_cli.create_timeline(f'test_bulk_tenant_create_{tenants_count}_{i}',
|
||||
tenant_id=tenant)
|
||||
|
||||
# FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now?
|
||||
#if use_safekeepers == 'with_sa':
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from contextlib import closing
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
|
||||
from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
|
||||
from io import BufferedReader, RawIOBase
|
||||
from itertools import repeat
|
||||
|
||||
@@ -41,8 +41,8 @@ def copy_test_data(rows: int):
|
||||
#
|
||||
# COPY performance tests.
|
||||
#
|
||||
def test_copy(neon_with_baseline: PgCompare):
|
||||
env = neon_with_baseline
|
||||
def test_copy(zenith_with_baseline: PgCompare):
|
||||
env = zenith_with_baseline
|
||||
|
||||
# Get the timeline ID of our branch. We need it for the pageserver 'checkpoint' command
|
||||
with closing(env.pg.connect()) as conn:
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import os
|
||||
from contextlib import closing
|
||||
from fixtures.benchmark_fixture import MetricReport
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@ from fixtures.log_helper import log
|
||||
# As of this writing, we're duplicate those giant WAL records for each page,
|
||||
# which makes the delta layer about 32x larger than it needs to be.
|
||||
#
|
||||
def test_gist_buffering_build(neon_with_baseline: PgCompare):
|
||||
env = neon_with_baseline
|
||||
def test_gist_buffering_build(zenith_with_baseline: PgCompare):
|
||||
env = zenith_with_baseline
|
||||
|
||||
with closing(env.pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
||||
@@ -8,7 +8,7 @@ from pytest_lazyfixture import lazy_fixture # type: ignore
|
||||
"env",
|
||||
[
|
||||
# The test is too slow to run in CI, but fast enough to run with remote tests
|
||||
pytest.param(lazy_fixture("neon_compare"), id="neon", marks=pytest.mark.slow),
|
||||
pytest.param(lazy_fixture("zenith_compare"), id="zenith", marks=pytest.mark.slow),
|
||||
pytest.param(lazy_fixture("vanilla_compare"), id="vanilla", marks=pytest.mark.slow),
|
||||
pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster),
|
||||
])
|
||||
|
||||
@@ -8,7 +8,7 @@ from pytest_lazyfixture import lazy_fixture # type: ignore
|
||||
"env",
|
||||
[
|
||||
# The test is too slow to run in CI, but fast enough to run with remote tests
|
||||
pytest.param(lazy_fixture("neon_compare"), id="neon", marks=pytest.mark.slow),
|
||||
pytest.param(lazy_fixture("zenith_compare"), id="zenith", marks=pytest.mark.slow),
|
||||
pytest.param(lazy_fixture("vanilla_compare"), id="vanilla", marks=pytest.mark.slow),
|
||||
pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster),
|
||||
])
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
from io import BytesIO
|
||||
import asyncio
|
||||
import asyncpg
|
||||
from fixtures.neon_fixtures import NeonEnv, Postgres, PgProtocol
|
||||
from fixtures.zenith_fixtures import ZenithEnv, Postgres, PgProtocol
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
|
||||
from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
|
||||
|
||||
|
||||
async def repeat_bytes(buf, repetitions: int):
|
||||
@@ -36,9 +36,9 @@ async def parallel_load_different_tables(pg: PgProtocol, n_parallel: int):
|
||||
|
||||
|
||||
# Load 5 different tables in parallel with COPY TO
|
||||
def test_parallel_copy_different_tables(neon_with_baseline: PgCompare, n_parallel=5):
|
||||
def test_parallel_copy_different_tables(zenith_with_baseline: PgCompare, n_parallel=5):
|
||||
|
||||
env = neon_with_baseline
|
||||
env = zenith_with_baseline
|
||||
conn = env.pg.connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
@@ -65,8 +65,8 @@ async def parallel_load_same_table(pg: PgProtocol, n_parallel: int):
|
||||
|
||||
|
||||
# Load data into one table with COPY TO from 5 parallel connections
|
||||
def test_parallel_copy_same_table(neon_with_baseline: PgCompare, n_parallel=5):
|
||||
env = neon_with_baseline
|
||||
def test_parallel_copy_same_table(zenith_with_baseline: PgCompare, n_parallel=5):
|
||||
env = zenith_with_baseline
|
||||
conn = env.pg.connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from contextlib import closing
|
||||
from fixtures.neon_fixtures import PgBin, VanillaPostgres, NeonEnv, profiling_supported
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
|
||||
from fixtures.zenith_fixtures import PgBin, VanillaPostgres, ZenithEnv, profiling_supported
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
|
||||
|
||||
from fixtures.benchmark_fixture import PgBenchRunResult, MetricReport, NeonBenchmarker
|
||||
from fixtures.benchmark_fixture import PgBenchRunResult, MetricReport, ZenithBenchmarker
|
||||
from fixtures.log_helper import log
|
||||
|
||||
from pathlib import Path
|
||||
@@ -99,11 +99,11 @@ def get_scales_matrix():
|
||||
return list(map(int, scales.split(",")))
|
||||
|
||||
|
||||
# Run the pgbench tests against vanilla Postgres and neon
|
||||
# Run the pgbench tests against vanilla Postgres and zenith
|
||||
@pytest.mark.parametrize("scale", get_scales_matrix())
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
def test_pgbench(neon_with_baseline: PgCompare, scale: int, duration: int):
|
||||
run_test_pgbench(neon_with_baseline, scale, duration)
|
||||
def test_pgbench(zenith_with_baseline: PgCompare, scale: int, duration: int):
|
||||
run_test_pgbench(zenith_with_baseline, scale, duration)
|
||||
|
||||
|
||||
# Run the pgbench tests, and generate a flamegraph from it
|
||||
@@ -114,18 +114,18 @@ def test_pgbench(neon_with_baseline: PgCompare, scale: int, duration: int):
|
||||
# can see how much overhead the profiling adds.
|
||||
@pytest.mark.parametrize("scale", get_scales_matrix())
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
def test_pgbench_flamegraph(zenbenchmark, pg_bin, neon_env_builder, scale: int, duration: int):
|
||||
neon_env_builder.num_safekeepers = 1
|
||||
neon_env_builder.pageserver_config_override = '''
|
||||
def test_pgbench_flamegraph(zenbenchmark, pg_bin, zenith_env_builder, scale: int, duration: int):
|
||||
zenith_env_builder.num_safekeepers = 1
|
||||
zenith_env_builder.pageserver_config_override = '''
|
||||
profiling="page_requests"
|
||||
'''
|
||||
if not profiling_supported():
|
||||
pytest.skip("pageserver was built without 'profiling' feature")
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("empty", "main")
|
||||
env = zenith_env_builder.init_start()
|
||||
env.zenith_cli.create_branch("empty", "main")
|
||||
|
||||
run_test_pgbench(NeonCompare(zenbenchmark, env, pg_bin, "pgbench"), scale, duration)
|
||||
run_test_pgbench(ZenithCompare(zenbenchmark, env, pg_bin, "pgbench"), scale, duration)
|
||||
|
||||
|
||||
# Run the pgbench tests against an existing Postgres cluster
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import os
|
||||
from contextlib import closing
|
||||
from fixtures.benchmark_fixture import MetricReport
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
|
||||
from fixtures.log_helper import log
|
||||
|
||||
import psycopg2.extras
|
||||
@@ -16,14 +16,14 @@ import time
|
||||
# A naive pageserver implementation would create a full image layer for each
|
||||
# dirty segment, leading to write_amplification = segment_size / page_size,
|
||||
# when compared to vanilla postgres. With segment_size = 10MB, that's 1250.
|
||||
def test_random_writes(neon_with_baseline: PgCompare):
|
||||
env = neon_with_baseline
|
||||
def test_random_writes(zenith_with_baseline: PgCompare):
|
||||
env = zenith_with_baseline
|
||||
|
||||
# Number of rows in the test database. 1M rows runs quickly, but implies
|
||||
# a small effective_checkpoint_distance, which makes the test less realistic.
|
||||
# Using a 300 TB database would imply a 250 MB effective_checkpoint_distance,
|
||||
# but it will take a very long time to run. From what I've seen so far,
|
||||
# increasing n_rows doesn't have impact on the (neon_runtime / vanilla_runtime)
|
||||
# increasing n_rows doesn't have impact on the (zenith_runtime / vanilla_runtime)
|
||||
# performance ratio.
|
||||
n_rows = 1 * 1000 * 1000 # around 36 MB table
|
||||
|
||||
@@ -65,7 +65,7 @@ def test_random_writes(neon_with_baseline: PgCompare):
|
||||
env.zenbenchmark.record("table_size", table_size, 'bytes', MetricReport.TEST_PARAM)
|
||||
|
||||
# Decide how much to write, based on knowledge of pageserver implementation.
|
||||
# Avoiding segment collisions maximizes (neon_runtime / vanilla_runtime).
|
||||
# Avoiding segment collisions maximizes (zenith_runtime / vanilla_runtime).
|
||||
segment_size = 10 * 1024 * 1024
|
||||
n_segments = table_size // segment_size
|
||||
n_writes = load_factor * n_segments // 3
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
#
|
||||
from contextlib import closing
|
||||
from dataclasses import dataclass
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
|
||||
from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
|
||||
from fixtures.compare_fixtures import PgCompare
|
||||
import pytest
|
||||
|
||||
@@ -20,8 +20,8 @@ import pytest
|
||||
pytest.param(10000000, 1, 0),
|
||||
pytest.param(10000000, 1, 4)
|
||||
])
|
||||
def test_seqscans(neon_with_baseline: PgCompare, rows: int, iters: int, workers: int):
|
||||
env = neon_with_baseline
|
||||
def test_seqscans(zenith_with_baseline: PgCompare, rows: int, iters: int, workers: int):
|
||||
env = zenith_with_baseline
|
||||
|
||||
with closing(env.pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
import pytest
|
||||
from contextlib import closing
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.benchmark_fixture import NeonBenchmarker
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
from fixtures.benchmark_fixture import ZenithBenchmarker
|
||||
|
||||
|
||||
# This test sometimes runs for longer than the global 5 minute timeout.
|
||||
@pytest.mark.timeout(600)
|
||||
def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
def test_startup(zenith_env_builder: ZenithEnvBuilder, zenbenchmark: ZenithBenchmarker):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
env = zenith_env_builder.init_start()
|
||||
|
||||
# Start
|
||||
env.neon_cli.create_branch('test_startup')
|
||||
env.zenith_cli.create_branch('test_startup')
|
||||
with zenbenchmark.record_duration("startup_time"):
|
||||
pg = env.postgres.create_start('test_startup')
|
||||
pg.safe_psql("select 1;")
|
||||
|
||||
@@ -13,13 +13,13 @@
|
||||
import os
|
||||
from contextlib import closing
|
||||
from fixtures.benchmark_fixture import MetricReport
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
def test_write_amplification(neon_with_baseline: PgCompare):
|
||||
env = neon_with_baseline
|
||||
def test_write_amplification(zenith_with_baseline: PgCompare):
|
||||
env = zenith_with_baseline
|
||||
|
||||
with closing(env.pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
import time
|
||||
from pathlib import Path
|
||||
import os
|
||||
import shutil
|
||||
|
||||
|
||||
# TODO this function just generates timeline.zip. Make a function that checks
|
||||
# whether the timeline can be loaded, and separate this "generate timeline.zip"
|
||||
# functionality elsewhere.
|
||||
def test_snapshot_pageserver(neon_env_builder: NeonEnvBuilder, test_output_dir):
|
||||
# 81920 is the minimum compaction_target size. I choose a larger one to get some images.
|
||||
neon_env_builder.pageserver_config_override = """
|
||||
tenant_config={checkpoint_distance = 3, compaction_target_size = 81920, compaction_period = "1 s"}"""
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch("test_snapshot_pageserver", "main")
|
||||
pg = env.postgres.create_start("test_snapshot_pageserver")
|
||||
|
||||
tenant = env.initial_tenant.hex
|
||||
timeline = pg.safe_psql("SHOW neon.timeline_id")[0][0]
|
||||
|
||||
pg.safe_psql("CREATE TABLE foo(i int);")
|
||||
for i in range(100):
|
||||
pg.safe_psql(f"INSERT INTO foo VALUES ({i});")
|
||||
# Sleep so compaction kicks in (every second)
|
||||
# TODO trigger manual compaction instead?
|
||||
time.sleep(0.1)
|
||||
|
||||
# make some hot pages so we get image layers
|
||||
for k in range(6):
|
||||
pg.safe_psql(f"UPDATE foo SET i = i + 1")
|
||||
# Sleep so compaction kicks in (every second)
|
||||
time.sleep(0.3)
|
||||
|
||||
# One more checkpoint
|
||||
psconn = env.pageserver.safe_psql(f"checkpoint {tenant} {timeline}")
|
||||
|
||||
# Zip the timeline
|
||||
# TODO check it contains L0, L1, at least one image
|
||||
path = Path(test_output_dir) / "repo" / "tenants" / tenant / "timelines" / timeline
|
||||
shutil.make_archive("timeline", "zip", path)
|
||||
@@ -1,7 +1,7 @@
|
||||
import pytest
|
||||
import os
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
"""
|
||||
Use this test to see what happens when tests fail.
|
||||
@@ -18,10 +18,10 @@ run_broken = pytest.mark.skipif(os.environ.get('RUN_BROKEN') is None,
|
||||
|
||||
|
||||
@run_broken
|
||||
def test_broken(neon_simple_env: NeonEnv, pg_bin):
|
||||
env = neon_simple_env
|
||||
def test_broken(zenith_simple_env: ZenithEnv, pg_bin):
|
||||
env = zenith_simple_env
|
||||
|
||||
env.neon_cli.create_branch("test_broken", "empty")
|
||||
env.zenith_cli.create_branch("test_broken", "empty")
|
||||
env.postgres.create_start("test_broken")
|
||||
log.info('postgres is running')
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
To add a new SQL test
|
||||
|
||||
- add sql script to run to neon_regress/sql/testname.sql
|
||||
- add expected output to neon_regress/expected/testname.out
|
||||
- add sql script to run to zenith_regress/sql/testname.sql
|
||||
- add expected output to zenith_regress/expected/testname.out
|
||||
- add testname to parallel_schedule
|
||||
|
||||
That's it.
|
||||
@@ -4,7 +4,7 @@
|
||||
# number of connections needed to run the tests.
|
||||
# ----------
|
||||
|
||||
test: neon-cid
|
||||
test: neon-rel-truncate
|
||||
test: neon-clog
|
||||
test: neon-vacuum-full
|
||||
test: zenith-cid
|
||||
test: zenith-rel-truncate
|
||||
test: zenith-clog
|
||||
test: zenith-vacuum-full
|
||||
Reference in New Issue
Block a user