Compare commits

..

1 Commits

Author SHA1 Message Date
John Spray
2fe454238d pageserver: warn on unexpected refs to Tenant after shutdown
This should help to validate that our shutdown logic is working
the way we expect.
2023-08-03 15:44:14 +01:00
41 changed files with 580 additions and 730 deletions

View File

@@ -794,7 +794,7 @@ jobs:
run:
shell: sh -eu {0}
env:
VM_BUILDER_VERSION: v0.15.0-alpha1
VM_BUILDER_VERSION: v0.13.1
steps:
- name: Checkout

View File

@@ -29,13 +29,13 @@ See developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more informati
```bash
apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler \
libcurl4-openssl-dev openssl python-poetry
libcurl4-openssl-dev
```
* On Fedora, these packages are needed:
```bash
dnf install flex bison readline-devel zlib-devel openssl-devel \
libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler \
protobuf-devel libcurl-devel openssl poetry
protobuf-devel libcurl-devel
```
* On Arch based systems, these packages are needed:
```bash
@@ -235,13 +235,6 @@ CARGO_BUILD_FLAGS="--features=testing" make
./scripts/pytest
```
By default, this runs both debug and release modes, and all supported postgres versions. When
testing locally, it is convenient to run just run one set of permutations, like this:
```sh
DEFAULT_PG_VERSION=15 BUILD_TYPE=release ./scripts/pytest
```
## Documentation
[docs](/docs) Contains a top-level overview of all available markdown documentation.

View File

@@ -107,25 +107,23 @@ async fn get_holes(path: &Path, max_holes: usize) -> Result<Vec<Hole>> {
// min-heap (reserve space for one more element added before eviction)
let mut heap: BinaryHeap<Hole> = BinaryHeap::with_capacity(max_holes + 1);
let mut prev_key: Option<Key> = None;
tree_reader
.visit(
&[0u8; DELTA_KEY_SIZE],
VisitDirection::Forwards,
|key, _value| {
let curr = Key::from_slice(&key[..KEY_SIZE]);
if let Some(prev) = prev_key {
if curr.to_i128() - prev.to_i128() >= MIN_HOLE_LENGTH {
heap.push(Hole(prev..curr));
if heap.len() > max_holes {
heap.pop(); // remove smallest hole
}
tree_reader.visit(
&[0u8; DELTA_KEY_SIZE],
VisitDirection::Forwards,
|key, _value| {
let curr = Key::from_slice(&key[..KEY_SIZE]);
if let Some(prev) = prev_key {
if curr.to_i128() - prev.to_i128() >= MIN_HOLE_LENGTH {
heap.push(Hole(prev..curr));
if heap.len() > max_holes {
heap.pop(); // remove smallest hole
}
}
prev_key = Some(curr.next());
true
},
)
.await?;
}
prev_key = Some(curr.next());
true
},
)?;
let mut holes = heap.into_vec();
holes.sort_by_key(|hole| hole.0.start);
Ok(holes)

View File

@@ -59,17 +59,15 @@ async fn read_delta_file(path: impl AsRef<Path>) -> Result<()> {
);
// TODO(chi): dedup w/ `delta_layer.rs` by exposing the API.
let mut all = vec![];
tree_reader
.visit(
&[0u8; DELTA_KEY_SIZE],
VisitDirection::Forwards,
|key, value_offset| {
let curr = Key::from_slice(&key[..KEY_SIZE]);
all.push((curr, BlobRef(value_offset)));
true
},
)
.await?;
tree_reader.visit(
&[0u8; DELTA_KEY_SIZE],
VisitDirection::Forwards,
|key, value_offset| {
let curr = Key::from_slice(&key[..KEY_SIZE]);
all.push((curr, BlobRef(value_offset)));
true
},
)?;
let cursor = BlockCursor::new(&file);
for (k, v) in all {
let value = cursor.read_blob(v.pos())?;

View File

@@ -47,50 +47,24 @@ pub use crate::metrics::preinitialize_metrics;
#[tracing::instrument]
pub async fn shutdown_pageserver(exit_code: i32) {
use std::time::Duration;
// Shut down the libpq endpoint task. This prevents new connections from
// being accepted.
timed(
task_mgr::shutdown_tasks(Some(TaskKind::LibpqEndpointListener), None, None),
"shutdown LibpqEndpointListener",
Duration::from_secs(1),
)
.await;
task_mgr::shutdown_tasks(Some(TaskKind::LibpqEndpointListener), None, None).await;
// Shut down any page service tasks.
timed(
task_mgr::shutdown_tasks(Some(TaskKind::PageRequestHandler), None, None),
"shutdown PageRequestHandlers",
Duration::from_secs(1),
)
.await;
task_mgr::shutdown_tasks(Some(TaskKind::PageRequestHandler), None, None).await;
// Shut down all the tenants. This flushes everything to disk and kills
// the checkpoint and GC tasks.
timed(
tenant::mgr::shutdown_all_tenants(),
"shutdown all tenants",
Duration::from_secs(5),
)
.await;
tenant::mgr::shutdown_all_tenants().await;
// Shut down the HTTP endpoint last, so that you can still check the server's
// status while it's shutting down.
// FIXME: We should probably stop accepting commands like attach/detach earlier.
timed(
task_mgr::shutdown_tasks(Some(TaskKind::HttpEndpointListener), None, None),
"shutdown http",
Duration::from_secs(1),
)
.await;
task_mgr::shutdown_tasks(Some(TaskKind::HttpEndpointListener), None, None).await;
// There should be nothing left, but let's be sure
timed(
task_mgr::shutdown_tasks(None, None, None),
"shutdown leftovers",
Duration::from_secs(1),
)
.await;
task_mgr::shutdown_tasks(None, None, None).await;
info!("Shut down successfully completed");
std::process::exit(exit_code);
}
@@ -198,45 +172,6 @@ pub struct InitializationOrder {
pub background_jobs_can_start: utils::completion::Barrier,
}
/// Time the future with a warning when it exceeds a threshold.
async fn timed<Fut: std::future::Future>(
fut: Fut,
name: &str,
warn_at: std::time::Duration,
) -> <Fut as std::future::Future>::Output {
let started = std::time::Instant::now();
let mut fut = std::pin::pin!(fut);
match tokio::time::timeout(warn_at, &mut fut).await {
Ok(ret) => {
tracing::info!(
task = name,
elapsed_ms = started.elapsed().as_millis(),
"completed"
);
ret
}
Err(_) => {
tracing::info!(
task = name,
elapsed_ms = started.elapsed().as_millis(),
"still waiting, taking longer than expected..."
);
let ret = fut.await;
tracing::warn!(
task = name,
elapsed_ms = started.elapsed().as_millis(),
"completed, took longer than expected"
);
ret
}
}
}
#[cfg(test)]
mod backoff_defaults_tests {
use super::*;
@@ -267,36 +202,3 @@ mod backoff_defaults_tests {
);
}
}
#[cfg(test)]
mod timed_tests {
use super::timed;
use std::time::Duration;
#[tokio::test]
async fn timed_completes_when_inner_future_completes() {
// A future that completes on time should have its result returned
let r1 = timed(
async move {
tokio::time::sleep(Duration::from_millis(10)).await;
123
},
"test 1",
Duration::from_millis(50),
)
.await;
assert_eq!(r1, 123);
// A future that completes too slowly should also have its result returned
let r1 = timed(
async move {
tokio::time::sleep(Duration::from_millis(50)).await;
456
},
"test 1",
Duration::from_millis(10),
)
.await;
assert_eq!(r1, 456);
}
}

View File

@@ -230,15 +230,14 @@ where
///
/// Read the value for given key. Returns the value, or None if it doesn't exist.
///
pub async fn get(&self, search_key: &[u8; L]) -> Result<Option<u64>> {
pub fn get(&self, search_key: &[u8; L]) -> Result<Option<u64>> {
let mut result: Option<u64> = None;
self.visit(search_key, VisitDirection::Forwards, |key, value| {
if key == search_key {
result = Some(value);
}
false
})
.await?;
})?;
Ok(result)
}
@@ -247,7 +246,7 @@ where
/// will be called for every key >= 'search_key' (or <= 'search_key', if scanning
/// backwards)
///
pub async fn visit<V>(
pub fn visit<V>(
&self,
search_key: &[u8; L],
dir: VisitDirection,
@@ -270,9 +269,23 @@ where
V: FnMut(&[u8], u64) -> bool,
{
// Locate the node.
let node_buf = self.reader.read_blk(self.start_blk + node_blknum)?;
let blk = self.reader.read_blk(self.start_blk + node_blknum)?;
let node = OnDiskNode::deparse(node_buf.as_ref())?;
// Search all entries on this node
self.search_node(blk.as_ref(), search_key, dir, visitor)
}
fn search_node<V>(
&self,
node_buf: &[u8],
search_key: &[u8; L],
dir: VisitDirection,
visitor: &mut V,
) -> Result<bool>
where
V: FnMut(&[u8], u64) -> bool,
{
let node = OnDiskNode::deparse(node_buf)?;
let prefix_len = node.prefix_len as usize;
let suffix_len = node.suffix_len as usize;
@@ -769,12 +782,12 @@ mod tests {
// Test the `get` function on all the keys.
for (key, val) in all_data.iter() {
assert_eq!(reader.get(key).await?, Some(*val));
assert_eq!(reader.get(key)?, Some(*val));
}
// And on some keys that don't exist
assert_eq!(reader.get(b"aaaaaa").await?, None);
assert_eq!(reader.get(b"zzzzzz").await?, None);
assert_eq!(reader.get(b"xaaabx").await?, None);
assert_eq!(reader.get(b"aaaaaa")?, None);
assert_eq!(reader.get(b"zzzzzz")?, None);
assert_eq!(reader.get(b"xaaabx")?, None);
// Test search with `visit` function
let search_key = b"xabaaa";
@@ -785,12 +798,10 @@ mod tests {
.collect();
let mut data = Vec::new();
reader
.visit(search_key, VisitDirection::Forwards, |key, value| {
data.push((key.to_vec(), value));
true
})
.await?;
reader.visit(search_key, VisitDirection::Forwards, |key, value| {
data.push((key.to_vec(), value));
true
})?;
assert_eq!(data, expected);
// Test a backwards scan
@@ -801,20 +812,16 @@ mod tests {
.collect();
expected.reverse();
let mut data = Vec::new();
reader
.visit(search_key, VisitDirection::Backwards, |key, value| {
data.push((key.to_vec(), value));
true
})
.await?;
reader.visit(search_key, VisitDirection::Backwards, |key, value| {
data.push((key.to_vec(), value));
true
})?;
assert_eq!(data, expected);
// Backward scan where nothing matches
reader
.visit(b"aaaaaa", VisitDirection::Backwards, |key, value| {
panic!("found unexpected key {}: {}", hex::encode(key), value);
})
.await?;
reader.visit(b"aaaaaa", VisitDirection::Backwards, |key, value| {
panic!("found unexpected key {}: {}", hex::encode(key), value);
})?;
// Full scan
let expected: Vec<(Vec<u8>, u64)> = all_data
@@ -822,12 +829,10 @@ mod tests {
.map(|(key, value)| (key.to_vec(), *value))
.collect();
let mut data = Vec::new();
reader
.visit(&[0u8; 6], VisitDirection::Forwards, |key, value| {
data.push((key.to_vec(), value));
true
})
.await?;
reader.visit(&[0u8; 6], VisitDirection::Forwards, |key, value| {
data.push((key.to_vec(), value));
true
})?;
assert_eq!(data, expected);
Ok(())
@@ -875,15 +880,13 @@ mod tests {
for search_key_int in 0..(NUM_KEYS * 2 + 10) {
let search_key = u64::to_be_bytes(search_key_int);
assert_eq!(
reader.get(&search_key).await?,
reader.get(&search_key)?,
all_data.get(&search_key_int).cloned()
);
// Test a forward scan starting with this key
result.lock().unwrap().clear();
reader
.visit(&search_key, VisitDirection::Forwards, take_ten)
.await?;
reader.visit(&search_key, VisitDirection::Forwards, take_ten)?;
let expected = all_data
.range(search_key_int..)
.take(10)
@@ -893,9 +896,7 @@ mod tests {
// And a backwards scan
result.lock().unwrap().clear();
reader
.visit(&search_key, VisitDirection::Backwards, take_ten)
.await?;
reader.visit(&search_key, VisitDirection::Backwards, take_ten)?;
let expected = all_data
.range(..=search_key_int)
.rev()
@@ -909,9 +910,7 @@ mod tests {
let search_key = u64::to_be_bytes(0);
limit.store(usize::MAX, Ordering::Relaxed);
result.lock().unwrap().clear();
reader
.visit(&search_key, VisitDirection::Forwards, take_ten)
.await?;
reader.visit(&search_key, VisitDirection::Forwards, take_ten)?;
let expected = all_data
.iter()
.map(|(&key, &val)| (key, val))
@@ -922,9 +921,7 @@ mod tests {
let search_key = u64::to_be_bytes(u64::MAX);
limit.store(usize::MAX, Ordering::Relaxed);
result.lock().unwrap().clear();
reader
.visit(&search_key, VisitDirection::Backwards, take_ten)
.await?;
reader.visit(&search_key, VisitDirection::Backwards, take_ten)?;
let expected = all_data
.iter()
.rev()
@@ -935,8 +932,8 @@ mod tests {
Ok(())
}
#[tokio::test]
async fn random_data() -> Result<()> {
#[test]
fn random_data() -> Result<()> {
// Generate random keys with exponential distribution, to
// exercise the prefix compression
const NUM_KEYS: usize = 100000;
@@ -963,23 +960,19 @@ mod tests {
// Test get() operation on all the keys
for (&key, &val) in all_data.iter() {
let search_key = u128::to_be_bytes(key);
assert_eq!(reader.get(&search_key).await?, Some(val));
assert_eq!(reader.get(&search_key)?, Some(val));
}
// Test get() operations on random keys, most of which will not exist
for _ in 0..100000 {
let key_int = rand::thread_rng().gen::<u128>();
let search_key = u128::to_be_bytes(key_int);
assert!(reader.get(&search_key).await? == all_data.get(&key_int).cloned());
assert!(reader.get(&search_key)? == all_data.get(&key_int).cloned());
}
// Test boundary cases
assert!(
reader.get(&u128::to_be_bytes(u128::MIN)).await? == all_data.get(&u128::MIN).cloned()
);
assert!(
reader.get(&u128::to_be_bytes(u128::MAX)).await? == all_data.get(&u128::MAX).cloned()
);
assert!(reader.get(&u128::to_be_bytes(u128::MIN))? == all_data.get(&u128::MIN).cloned());
assert!(reader.get(&u128::to_be_bytes(u128::MAX))? == all_data.get(&u128::MAX).cloned());
Ok(())
}
@@ -1021,17 +1014,15 @@ mod tests {
// Test get() operation on all the keys
for (key, val) in disk_btree_test_data::TEST_DATA {
assert_eq!(reader.get(&key).await?, Some(val));
assert_eq!(reader.get(&key)?, Some(val));
}
// Test full scan
let mut count = 0;
reader
.visit(&[0u8; 26], VisitDirection::Forwards, |_key, _value| {
count += 1;
true
})
.await?;
reader.visit(&[0u8; 26], VisitDirection::Forwards, |_key, _value| {
count += 1;
true
})?;
assert_eq!(count, disk_btree_test_data::TEST_DATA.len());
reader.dump().await?;

View File

@@ -266,77 +266,81 @@ async fn shutdown_all_tenants0(tenants: &tokio::sync::RwLock<TenantsMap>) {
}
};
let started_at = std::time::Instant::now();
let mut join_set = JoinSet::new();
for (tenant_id, tenant) in tenants_to_shut_down {
join_set.spawn(
async move {
let freeze_and_flush = true;
// ordering shouldn't matter for this, either we store true right away or never
let ordering = std::sync::atomic::Ordering::Relaxed;
let joined_other = std::sync::atomic::AtomicBool::new(false);
let res = {
let (_guard, shutdown_progress) = completion::channel();
tenant.shutdown(shutdown_progress, freeze_and_flush).await
let tenant_weak = Arc::downgrade(&tenant);
let mut shutdown = std::pin::pin!(async {
let freeze_and_flush = true;
let res = {
let (_guard, shutdown_progress) = completion::channel();
// Always safe to upgrade this weak ptr, because the outer code block
// holds an Arc to it past the point it awaits this async block.
tenant_weak.upgrade().unwrap().shutdown(shutdown_progress, freeze_and_flush).await
};
if let Err(other_progress) = res {
// join the another shutdown in progress
joined_other.store(true, ordering);
other_progress.wait().await;
}
});
// in practice we might not have a lot time to go, since systemd is going to
// SIGKILL us at 10s, but we can try. delete tenant might take a while, so put out
// a warning.
let warning = std::time::Duration::from_secs(5);
let mut warning = std::pin::pin!(tokio::time::sleep(warning));
tokio::select! {
_ = &mut shutdown => {},
_ = &mut warning => {
let joined_other = joined_other.load(ordering);
warn!(%joined_other, "waiting for the shutdown to complete");
shutdown.await;
}
};
if let Err(other_progress) = res {
// join the another shutdown in progress
other_progress.wait().await;
// Shutdown should have stopped all the background tasks that referenced
// the tenant: when we drop our ref it should drop the Tenant. If that isn't
// the case it's a bug.
if Arc::into_inner(tenant).is_none() {
warn!("Tenant still has references after shutdown");
}
// we cannot afford per tenant logging here, because if s3 is degraded, we are
// going to log too many lines
debug!("tenant successfully stopped");
}
.instrument(info_span!("shutdown", %tenant_id)),
);
}
let total = join_set.len();
let mut panicked = 0;
let mut buffering = true;
const BUFFER_FOR: std::time::Duration = std::time::Duration::from_millis(500);
let mut buffered = std::pin::pin!(tokio::time::sleep(BUFFER_FOR));
while !join_set.is_empty() {
tokio::select! {
Some(joined) = join_set.join_next() => {
match joined {
Ok(()) => {}
Err(join_error) if join_error.is_cancelled() => {
unreachable!("we are not cancelling any of the futures");
}
Err(join_error) if join_error.is_panic() => {
// cannot really do anything, as this panic is likely a bug
panicked += 1;
}
Err(join_error) => {
warn!("unknown kind of JoinError: {join_error}");
}
}
if !buffering {
// buffer so that every 500ms since the first update (or starting) we'll log
// how far away we are; this is because we will get SIGKILL'd at 10s, and we
// are not able to log *then*.
buffering = true;
buffered.as_mut().reset(tokio::time::Instant::now() + BUFFER_FOR);
}
},
_ = &mut buffered, if buffering => {
buffering = false;
info!(remaining = join_set.len(), total, elapsed_ms = started_at.elapsed().as_millis(), "waiting for tenants to shutdown");
while let Some(res) = join_set.join_next().await {
match res {
Ok(()) => {}
Err(join_error) if join_error.is_cancelled() => {
unreachable!("we are not cancelling any of the futures");
}
Err(join_error) if join_error.is_panic() => {
// cannot really do anything, as this panic is likely a bug
panicked += 1;
}
Err(join_error) => {
warn!("unknown kind of JoinError: {join_error}");
}
}
}
if panicked > 0 {
warn!(
panicked,
total, "observed panicks while shutting down tenants"
);
warn!(panicked, "observed panicks while shutting down tenants");
}
// caller will log how long we took
}
pub async fn create_tenant(

View File

@@ -281,24 +281,22 @@ impl Layer for DeltaLayer {
Ok(desc)
};
tree_reader
.visit(
&[0u8; DELTA_KEY_SIZE],
VisitDirection::Forwards,
|delta_key, val| {
let blob_ref = BlobRef(val);
let key = DeltaKey::extract_key_from_buf(delta_key);
let lsn = DeltaKey::extract_lsn_from_buf(delta_key);
tree_reader.visit(
&[0u8; DELTA_KEY_SIZE],
VisitDirection::Forwards,
|delta_key, val| {
let blob_ref = BlobRef(val);
let key = DeltaKey::extract_key_from_buf(delta_key);
let lsn = DeltaKey::extract_lsn_from_buf(delta_key);
let desc = match dump_blob(blob_ref) {
Ok(desc) => desc,
Err(err) => format!("ERROR: {}", err),
};
println!(" key {} at {}: {}", key, lsn, desc);
true
},
)
.await?;
let desc = match dump_blob(blob_ref) {
Ok(desc) => desc,
Err(err) => format!("ERROR: {}", err),
};
println!(" key {} at {}: {}", key, lsn, desc);
true
},
)?;
Ok(())
}
@@ -330,21 +328,19 @@ impl Layer for DeltaLayer {
let mut offsets: Vec<(Lsn, u64)> = Vec::new();
tree_reader
.visit(&search_key.0, VisitDirection::Backwards, |key, value| {
let blob_ref = BlobRef(value);
if key[..KEY_SIZE] != search_key.0[..KEY_SIZE] {
return false;
}
let entry_lsn = DeltaKey::extract_lsn_from_buf(key);
if entry_lsn < lsn_range.start {
return false;
}
offsets.push((entry_lsn, blob_ref.pos()));
tree_reader.visit(&search_key.0, VisitDirection::Backwards, |key, value| {
let blob_ref = BlobRef(value);
if key[..KEY_SIZE] != search_key.0[..KEY_SIZE] {
return false;
}
let entry_lsn = DeltaKey::extract_lsn_from_buf(key);
if entry_lsn < lsn_range.start {
return false;
}
offsets.push((entry_lsn, blob_ref.pos()));
!blob_ref.will_init()
})
.await?;
!blob_ref.will_init()
})?;
// Ok, 'offsets' now contains the offsets of all the entries we need to read
let cursor = file.block_cursor();
@@ -618,23 +614,19 @@ impl DeltaLayer {
/// Obtains all keys and value references stored in the layer
///
/// The value can be obtained via the [`ValueRef::load`] function.
pub async fn load_val_refs(&self, ctx: &RequestContext) -> Result<Vec<(Key, Lsn, ValueRef)>> {
pub fn load_val_refs(&self, ctx: &RequestContext) -> Result<Vec<(Key, Lsn, ValueRef)>> {
let inner = self
.load(LayerAccessKind::KeyIter, ctx)
.context("load delta layer")?;
DeltaLayerInner::load_val_refs(inner)
.await
.context("Layer index is corrupted")
DeltaLayerInner::load_val_refs(inner).context("Layer index is corrupted")
}
/// Loads all keys stored in the layer. Returns key, lsn and value size.
pub async fn load_keys(&self, ctx: &RequestContext) -> Result<Vec<(Key, Lsn, u64)>> {
pub fn load_keys(&self, ctx: &RequestContext) -> Result<Vec<(Key, Lsn, u64)>> {
let inner = self
.load(LayerAccessKind::KeyIter, ctx)
.context("load delta layer keys")?;
DeltaLayerInner::load_keys(inner)
.await
.context("Layer index is corrupted")
inner.load_keys().context("Layer index is corrupted")
}
}
@@ -907,7 +899,7 @@ impl Drop for DeltaLayerWriter {
}
impl DeltaLayerInner {
async fn load_val_refs(this: &Arc<DeltaLayerInner>) -> Result<Vec<(Key, Lsn, ValueRef)>> {
fn load_val_refs(this: &Arc<DeltaLayerInner>) -> Result<Vec<(Key, Lsn, ValueRef)>> {
let file = &this.file;
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
this.index_start_blk,
@@ -916,25 +908,23 @@ impl DeltaLayerInner {
);
let mut all_offsets = Vec::<(Key, Lsn, ValueRef)>::new();
tree_reader
.visit(
&[0u8; DELTA_KEY_SIZE],
VisitDirection::Forwards,
|key, value| {
let delta_key = DeltaKey::from_slice(key);
let val_ref = ValueRef {
blob_ref: BlobRef(value),
reader: BlockCursor::new(Adapter(this.clone())),
};
all_offsets.push((delta_key.key(), delta_key.lsn(), val_ref));
true
},
)
.await?;
tree_reader.visit(
&[0u8; DELTA_KEY_SIZE],
VisitDirection::Forwards,
|key, value| {
let delta_key = DeltaKey::from_slice(key);
let val_ref = ValueRef {
blob_ref: BlobRef(value),
reader: BlockCursor::new(Adapter(this.clone())),
};
all_offsets.push((delta_key.key(), delta_key.lsn(), val_ref));
true
},
)?;
Ok(all_offsets)
}
async fn load_keys(&self) -> Result<Vec<(Key, Lsn, u64)>> {
fn load_keys(&self) -> Result<Vec<(Key, Lsn, u64)>> {
let file = &self.file;
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
self.index_start_blk,
@@ -943,28 +933,26 @@ impl DeltaLayerInner {
);
let mut all_keys: Vec<(Key, Lsn, u64)> = Vec::new();
tree_reader
.visit(
&[0u8; DELTA_KEY_SIZE],
VisitDirection::Forwards,
|key, value| {
let delta_key = DeltaKey::from_slice(key);
let pos = BlobRef(value).pos();
if let Some(last) = all_keys.last_mut() {
if last.0 == delta_key.key() {
return true;
} else {
// subtract offset of new key BLOB and first blob of this key
// to get total size if values associated with this key
let first_pos = last.2;
last.2 = pos - first_pos;
}
tree_reader.visit(
&[0u8; DELTA_KEY_SIZE],
VisitDirection::Forwards,
|key, value| {
let delta_key = DeltaKey::from_slice(key);
let pos = BlobRef(value).pos();
if let Some(last) = all_keys.last_mut() {
if last.0 == delta_key.key() {
return true;
} else {
// subtract offset of new key BLOB and first blob of this key
// to get total size if values associated with this key
let first_pos = last.2;
last.2 = pos - first_pos;
}
all_keys.push((delta_key.key(), delta_key.lsn(), pos));
true
},
)
.await?;
}
all_keys.push((delta_key.key(), delta_key.lsn(), pos));
true
},
)?;
if let Some(last) = all_keys.last_mut() {
// Last key occupies all space till end of layer
last.2 = std::fs::metadata(&file.file.path)?.len() - last.2;

View File

@@ -175,12 +175,10 @@ impl Layer for ImageLayer {
tree_reader.dump().await?;
tree_reader
.visit(&[0u8; KEY_SIZE], VisitDirection::Forwards, |key, value| {
println!("key: {} offset {}", hex::encode(key), value);
true
})
.await?;
tree_reader.visit(&[0u8; KEY_SIZE], VisitDirection::Forwards, |key, value| {
println!("key: {} offset {}", hex::encode(key), value);
true
})?;
Ok(())
}
@@ -204,7 +202,7 @@ impl Layer for ImageLayer {
let mut keybuf: [u8; KEY_SIZE] = [0u8; KEY_SIZE];
key.write_to_byte_slice(&mut keybuf);
if let Some(offset) = tree_reader.get(&keybuf).await? {
if let Some(offset) = tree_reader.get(&keybuf)? {
let blob = file.block_cursor().read_blob(offset).with_context(|| {
format!(
"failed to read value from data file {} at offset {}",

View File

@@ -73,13 +73,17 @@ pub fn start_background_loops(
///
async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
let wait_duration = Duration::from_secs(2);
info!("starting");
TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
async {
let ctx = RequestContext::todo_child(TaskKind::Compaction, DownloadBehavior::Download);
let mut first = true;
loop {
trace!("waking up");
tokio::select! {
_ = cancel.cancelled() => {
info!("received cancellation request");
return;
},
tenant_wait_result = wait_for_active_tenant(&tenant) => match tenant_wait_result {
@@ -99,6 +103,11 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
}
}
if cancel.is_cancelled() {
info!("received cancellation request");
break;
}
let started_at = Instant::now();
let sleep_duration = if period == Duration::ZERO {
@@ -122,12 +131,15 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
.await
.is_ok()
{
info!("received cancellation request during idling");
break;
}
}
}
.await;
TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
trace!("compaction loop stopped.");
}
///
@@ -135,6 +147,7 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
///
async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
let wait_duration = Duration::from_secs(2);
info!("starting");
TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
async {
// GC might require downloading, to find the cutoff LSN that corresponds to the
@@ -143,8 +156,11 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
let mut first = true;
loop {
trace!("waking up");
tokio::select! {
_ = cancel.cancelled() => {
info!("received cancellation request");
return;
},
tenant_wait_result = wait_for_active_tenant(&tenant) => match tenant_wait_result {
@@ -189,12 +205,14 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
.await
.is_ok()
{
info!("received cancellation request during idling");
break;
}
}
}
.await;
TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
trace!("GC loop stopped.");
}
async fn wait_for_active_tenant(tenant: &Arc<Tenant>) -> ControlFlow<()> {
@@ -219,6 +237,7 @@ async fn wait_for_active_tenant(tenant: &Arc<Tenant>) -> ControlFlow<()> {
}
}
Err(_sender_dropped_error) => {
info!("Tenant dropped the state updates sender, quitting waiting for tenant and the task loop");
return ControlFlow::Break(());
}
}

View File

@@ -19,7 +19,6 @@ use pageserver_api::models::{
use remote_storage::GenericRemoteStorage;
use serde_with::serde_as;
use storage_broker::BrokerClientChannel;
use tokio::runtime::Handle;
use tokio::sync::{oneshot, watch, TryAcquireError};
use tokio_util::sync::CancellationToken;
use tracing::*;
@@ -3519,41 +3518,16 @@ impl Timeline {
// min-heap (reserve space for one more element added before eviction)
let mut heap: BinaryHeap<Hole> = BinaryHeap::with_capacity(max_holes + 1);
let mut prev: Option<Key> = None;
let mut all_value_refs = Vec::new();
for l in deltas_to_compact.iter() {
// TODO: replace this with an await once we fully go async
all_value_refs.extend(
Handle::current().block_on(
l.clone()
.downcast_delta_layer()
.expect("delta layer")
.load_val_refs(ctx),
)?,
);
}
// The current stdlib sorting implementation is designed in a way where it is
// particularly fast where the slice is made up of sorted sub-ranges.
all_value_refs.sort_by_key(|(key, _lsn, _value_ref)| *key);
let mut all_keys = Vec::new();
for l in deltas_to_compact.iter() {
// TODO: replace this with an await once we fully go async
all_keys.extend(
Handle::current().block_on(
l.clone()
.downcast_delta_layer()
.expect("delta layer")
.load_keys(ctx),
)?,
);
}
// The current stdlib sorting implementation is designed in a way where it is
// particularly fast where the slice is made up of sorted sub-ranges.
all_keys.sort_by_key(|(key, _lsn, _size)| *key);
for (next_key, _next_lsn, _size) in all_keys.iter() {
let next_key = *next_key;
for (next_key, _next_lsn, _size) in itertools::process_results(
deltas_to_compact.iter().map(|l| -> Result<_> {
Ok(l.clone()
.downcast_delta_layer()
.expect("delta layer")
.load_keys(ctx)?
.into_iter())
}),
|iter_iter| iter_iter.kmerge_by(|a, b| a.0 < b.0),
)? {
if let Some(prev_key) = prev {
// just first fast filter
if next_key.to_i128() - prev_key.to_i128() >= min_hole_range {
@@ -3586,10 +3560,40 @@ impl Timeline {
// This iterator walks through all key-value pairs from all the layers
// we're compacting, in key, LSN order.
let all_values_iter = all_value_refs.into_iter();
let all_values_iter = itertools::process_results(
deltas_to_compact.iter().map(|l| -> Result<_> {
Ok(l.clone()
.downcast_delta_layer()
.expect("delta layer")
.load_val_refs(ctx)?
.into_iter())
}),
|iter_iter| {
iter_iter.kmerge_by(|a, b| {
let (a_key, a_lsn, _) = a;
let (b_key, b_lsn, _) = b;
(a_key, a_lsn) < (b_key, b_lsn)
})
},
)?;
// This iterator walks through all keys and is needed to calculate size used by each key
let mut all_keys_iter = all_keys.into_iter();
let mut all_keys_iter = itertools::process_results(
deltas_to_compact.iter().map(|l| -> Result<_> {
Ok(l.clone()
.downcast_delta_layer()
.expect("delta layer")
.load_keys(ctx)?
.into_iter())
}),
|iter_iter| {
iter_iter.kmerge_by(|a, b| {
let (a_key, a_lsn, _) = a;
let (b_key, b_lsn, _) = b;
(a_key, a_lsn) < (b_key, b_lsn)
})
},
)?;
stats.prepare_iterators_micros = stats.read_lock_drop_micros.till_now();

View File

@@ -78,6 +78,9 @@ impl Timeline {
#[instrument(skip_all, fields(tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))]
async fn eviction_task(self: Arc<Self>, cancel: CancellationToken) {
scopeguard::defer! {
info!("eviction task finishing");
}
use crate::tenant::tasks::random_init_delay;
{
let policy = self.get_eviction_policy();

View File

@@ -79,10 +79,6 @@ struct Args {
/// Listen http endpoint for management and metrics in the form host:port.
#[arg(long, default_value = DEFAULT_HTTP_LISTEN_ADDR)]
listen_http: String,
/// Advertised endpoint for receiving/sending WAL in the form host:port. If not
/// specified, listen_pg is used to advertise instead.
#[arg(long, default_value = None)]
advertise_pg: Option<String>,
/// Availability zone of the safekeeper.
#[arg(long)]
availability_zone: Option<String>,
@@ -189,7 +185,6 @@ async fn main() -> anyhow::Result<()> {
listen_pg_addr: args.listen_pg,
listen_pg_addr_tenant_only: args.listen_pg_tenant_only,
listen_http_addr: args.listen_http,
advertise_pg_addr: args.advertise_pg,
availability_zone: args.availability_zone,
no_sync: args.no_sync,
broker_endpoint: args.broker_endpoint,

View File

@@ -55,7 +55,6 @@ pub struct SafeKeeperConf {
pub listen_pg_addr: String,
pub listen_pg_addr_tenant_only: Option<String>,
pub listen_http_addr: String,
pub advertise_pg_addr: Option<String>,
pub availability_zone: Option<String>,
pub no_sync: bool,
pub broker_endpoint: Uri,
@@ -89,7 +88,6 @@ impl SafeKeeperConf {
listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
listen_pg_addr_tenant_only: None,
listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
advertise_pg_addr: None,
availability_zone: None,
remote_storage: None,
my_id: NodeId(0),

View File

@@ -237,10 +237,7 @@ impl SharedState {
commit_lsn: self.sk.inmem.commit_lsn.0,
remote_consistent_lsn: remote_consistent_lsn.0,
peer_horizon_lsn: self.sk.inmem.peer_horizon_lsn.0,
safekeeper_connstr: conf
.advertise_pg_addr
.to_owned()
.unwrap_or(conf.listen_pg_addr.clone()),
safekeeper_connstr: conf.listen_pg_addr.clone(),
backup_lsn: self.sk.inmem.backup_lsn.0,
local_start_lsn: self.sk.state.local_start_lsn.0,
availability_zone: conf.availability_zone.clone(),

View File

@@ -1,60 +0,0 @@
import subprocess
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Optional
from fixtures.log_helper import log
@dataclass
class NeonBroker:
"""An object managing storage_broker instance"""
logfile: Path
port: int
neon_binpath: Path
handle: Optional[subprocess.Popen[Any]] = None # handle of running daemon
def listen_addr(self):
return f"127.0.0.1:{self.port}"
def client_url(self):
return f"http://{self.listen_addr()}"
def check_status(self):
return True # TODO
def try_start(self):
if self.handle is not None:
log.debug(f"storage_broker is already running on port {self.port}")
return
listen_addr = self.listen_addr()
log.info(f'starting storage_broker to listen incoming connections at "{listen_addr}"')
with open(self.logfile, "wb") as logfile:
args = [
str(self.neon_binpath / "storage_broker"),
f"--listen-addr={listen_addr}",
]
self.handle = subprocess.Popen(args, stdout=logfile, stderr=logfile)
# wait for start
started_at = time.time()
while True:
try:
self.check_status()
except Exception as e:
elapsed = time.time() - started_at
if elapsed > 5:
raise RuntimeError(
f"timed out waiting {elapsed:.0f}s for storage_broker start: {e}"
) from e
time.sleep(0.5)
else:
break # success
def stop(self):
if self.handle is not None:
self.handle.terminate()
self.handle.wait()

View File

@@ -2,11 +2,13 @@ from __future__ import annotations
import abc
import asyncio
import enum
import filecmp
import json
import os
import re
import shutil
import socket
import subprocess
import tempfile
import textwrap
@@ -15,11 +17,12 @@ import uuid
from contextlib import closing, contextmanager
from dataclasses import dataclass, field
from datetime import datetime
from enum import Flag, auto
from functools import cached_property
from itertools import chain, product
from pathlib import Path
from types import TracebackType
from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, cast
from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, Union, cast
from urllib.parse import urlparse
import asyncpg
@@ -39,21 +42,10 @@ from psycopg2.extensions import cursor as PgCursor
from psycopg2.extensions import make_dsn, parse_dsn
from typing_extensions import Literal
from fixtures.broker import NeonBroker
from fixtures.log_helper import log
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
from fixtures.pg_version import PgVersion
from fixtures.port_distributor import PortDistributor
from fixtures.remote_storage import (
LocalFsStorage,
MockS3Server,
RemoteStorage,
RemoteStorageKind,
RemoteStorageUsers,
S3Storage,
remote_storage_to_toml_inline_table,
)
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import (
ATTACHMENT_NAME_REGEX,
@@ -228,6 +220,77 @@ def get_dir_size(path: str) -> int:
return totalbytes
def can_bind(host: str, port: int) -> bool:
"""
Check whether a host:port is available to bind for listening
Inspired by the can_bind() perl function used in Postgres tests, in
vendor/postgres-v14/src/test/perl/PostgresNode.pm
"""
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
# TODO: The pageserver and safekeepers don't use SO_REUSEADDR at the
# moment. If that changes, we should use start using SO_REUSEADDR here
# too, to allow reusing ports more quickly.
# See https://github.com/neondatabase/neon/issues/801
# sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
try:
sock.bind((host, port))
sock.listen()
return True
except socket.error:
log.info(f"Port {port} is in use, skipping")
return False
finally:
sock.close()
class PortDistributor:
def __init__(self, base_port: int, port_number: int):
self.iterator = iter(range(base_port, base_port + port_number))
self.port_map: Dict[int, int] = {}
def get_port(self) -> int:
for port in self.iterator:
if can_bind("localhost", port):
return port
raise RuntimeError(
"port range configured for test is exhausted, consider enlarging the range"
)
def replace_with_new_port(self, value: Union[int, str]) -> Union[int, str]:
"""
Returns a new port for a port number in a string (like "localhost:1234") or int.
Replacements are memorised, so a substitution for the same port is always the same.
"""
# TODO: replace with structural pattern matching for Python >= 3.10
if isinstance(value, int):
return self._replace_port_int(value)
if isinstance(value, str):
return self._replace_port_str(value)
raise TypeError(f"unsupported type {type(value)} of {value=}")
def _replace_port_int(self, value: int) -> int:
known_port = self.port_map.get(value)
if known_port is None:
known_port = self.port_map[value] = self.get_port()
return known_port
def _replace_port_str(self, value: str) -> str:
# Use regex to find port in a string
# urllib.parse.urlparse produces inconvenient results for cases without scheme like "localhost:5432"
# See https://bugs.python.org/issue27657
ports = re.findall(r":(\d+)(?:/|$)", value)
assert len(ports) == 1, f"can't find port in {value}"
port_int = int(ports[0])
return value.replace(f":{port_int}", f":{self._replace_port_int(port_int)}")
@pytest.fixture(scope="session")
def port_distributor(worker_base_port: int) -> PortDistributor:
return PortDistributor(base_port=worker_base_port, port_number=WORKER_PORT_NUM)
@@ -401,6 +464,140 @@ class AuthKeys:
return self.generate_token(scope="tenant", tenant_id=str(tenant_id))
class MockS3Server:
"""
Starts a mock S3 server for testing on a port given, errors if the server fails to start or exits prematurely.
Relies that `poetry` and `moto` server are installed, since it's the way the tests are run.
Also provides a set of methods to derive the connection properties from and the method to kill the underlying server.
"""
def __init__(
self,
port: int,
):
self.port = port
# XXX: do not use `shell=True` or add `exec ` to the command here otherwise.
# We use `self.subprocess.kill()` to shut down the server, which would not "just" work in Linux
# if a process is started from the shell process.
self.subprocess = subprocess.Popen(["poetry", "run", "moto_server", "s3", f"-p{port}"])
error = None
try:
return_code = self.subprocess.poll()
if return_code is not None:
error = f"expected mock s3 server to run but it exited with code {return_code}. stdout: '{self.subprocess.stdout}', stderr: '{self.subprocess.stderr}'"
except Exception as e:
error = f"expected mock s3 server to start but it failed with exception: {e}. stdout: '{self.subprocess.stdout}', stderr: '{self.subprocess.stderr}'"
if error is not None:
log.error(error)
self.kill()
raise RuntimeError("failed to start s3 mock server")
def endpoint(self) -> str:
return f"http://127.0.0.1:{self.port}"
def region(self) -> str:
return "us-east-1"
def access_key(self) -> str:
return "test"
def secret_key(self) -> str:
return "test"
def kill(self):
self.subprocess.kill()
@enum.unique
class RemoteStorageKind(str, enum.Enum):
LOCAL_FS = "local_fs"
MOCK_S3 = "mock_s3"
REAL_S3 = "real_s3"
# Pass to tests that are generic to remote storage
# to ensure the test pass with or without the remote storage
NOOP = "noop"
def available_remote_storages() -> List[RemoteStorageKind]:
remote_storages = [RemoteStorageKind.LOCAL_FS, RemoteStorageKind.MOCK_S3]
if os.getenv("ENABLE_REAL_S3_REMOTE_STORAGE") is not None:
remote_storages.append(RemoteStorageKind.REAL_S3)
log.info("Enabling real s3 storage for tests")
else:
log.info("Using mock implementations to test remote storage")
return remote_storages
def available_s3_storages() -> List[RemoteStorageKind]:
remote_storages = [RemoteStorageKind.MOCK_S3]
if os.getenv("ENABLE_REAL_S3_REMOTE_STORAGE") is not None:
remote_storages.append(RemoteStorageKind.REAL_S3)
log.info("Enabling real s3 storage for tests")
else:
log.info("Using mock implementations to test remote storage")
return remote_storages
@dataclass
class LocalFsStorage:
root: Path
@dataclass
class S3Storage:
bucket_name: str
bucket_region: str
access_key: str
secret_key: str
endpoint: Optional[str] = None
prefix_in_bucket: Optional[str] = ""
def access_env_vars(self) -> Dict[str, str]:
return {
"AWS_ACCESS_KEY_ID": self.access_key,
"AWS_SECRET_ACCESS_KEY": self.secret_key,
}
def to_string(self) -> str:
return json.dumps(
{
"bucket": self.bucket_name,
"region": self.bucket_region,
"endpoint": self.endpoint,
"prefix": self.prefix_in_bucket,
}
)
RemoteStorage = Union[LocalFsStorage, S3Storage]
# serialize as toml inline table
def remote_storage_to_toml_inline_table(remote_storage: RemoteStorage) -> str:
if isinstance(remote_storage, LocalFsStorage):
remote_storage_config = f"local_path='{remote_storage.root}'"
elif isinstance(remote_storage, S3Storage):
remote_storage_config = f"bucket_name='{remote_storage.bucket_name}',\
bucket_region='{remote_storage.bucket_region}'"
if remote_storage.prefix_in_bucket is not None:
remote_storage_config += f",prefix_in_bucket='{remote_storage.prefix_in_bucket}'"
if remote_storage.endpoint is not None:
remote_storage_config += f",endpoint='{remote_storage.endpoint}'"
else:
raise Exception("invalid remote storage type")
return f"{{{remote_storage_config}}}"
class RemoteStorageUsers(Flag):
PAGESERVER = auto()
SAFEKEEPER = auto()
class NeonEnvBuilder:
"""
Builder object to create a Neon runtime environment
@@ -1398,11 +1595,7 @@ class NeonCli(AbstractNeonCli):
if endpoint_id is not None:
args.append(endpoint_id)
s3_env_vars = None
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
s3_env_vars = self.env.remote_storage.access_env_vars()
res = self.raw_cli(args, extra_env_vars=s3_env_vars)
res = self.raw_cli(args)
res.check_returncode()
return res
@@ -1529,7 +1722,6 @@ class NeonPageserver(PgProtocol):
".*Compaction failed, retrying in .*: queue is in state Stopped.*",
# Pageserver timeline deletion should be polled until it gets 404, so ignore it globally
".*Error processing HTTP request: NotFound: Timeline .* was not found",
".*took more than expected to complete.*",
]
def start(
@@ -2703,6 +2895,59 @@ class SafekeeperHttpClient(requests.Session):
return metrics
@dataclass
class NeonBroker:
"""An object managing storage_broker instance"""
logfile: Path
port: int
neon_binpath: Path
handle: Optional[subprocess.Popen[Any]] = None # handle of running daemon
def listen_addr(self):
return f"127.0.0.1:{self.port}"
def client_url(self):
return f"http://{self.listen_addr()}"
def check_status(self):
return True # TODO
def try_start(self):
if self.handle is not None:
log.debug(f"storage_broker is already running on port {self.port}")
return
listen_addr = self.listen_addr()
log.info(f'starting storage_broker to listen incoming connections at "{listen_addr}"')
with open(self.logfile, "wb") as logfile:
args = [
str(self.neon_binpath / "storage_broker"),
f"--listen-addr={listen_addr}",
]
self.handle = subprocess.Popen(args, stdout=logfile, stderr=logfile)
# wait for start
started_at = time.time()
while True:
try:
self.check_status()
except Exception as e:
elapsed = time.time() - started_at
if elapsed > 5:
raise RuntimeError(
f"timed out waiting {elapsed:.0f}s for storage_broker start: {e}"
) from e
time.sleep(0.5)
else:
break # success
def stop(self):
if self.handle is not None:
self.handle.terminate()
self.handle.wait()
def get_test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
"""Compute the working directory for an individual test."""
test_name = request.node.name

View File

@@ -1,77 +0,0 @@
import re
import socket
from contextlib import closing
from typing import Dict, Union
from fixtures.log_helper import log
def can_bind(host: str, port: int) -> bool:
"""
Check whether a host:port is available to bind for listening
Inspired by the can_bind() perl function used in Postgres tests, in
vendor/postgres-v14/src/test/perl/PostgresNode.pm
"""
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
# TODO: The pageserver and safekeepers don't use SO_REUSEADDR at the
# moment. If that changes, we should use start using SO_REUSEADDR here
# too, to allow reusing ports more quickly.
# See https://github.com/neondatabase/neon/issues/801
# sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
try:
sock.bind((host, port))
sock.listen()
return True
except socket.error:
log.info(f"Port {port} is in use, skipping")
return False
finally:
sock.close()
class PortDistributor:
def __init__(self, base_port: int, port_number: int):
self.iterator = iter(range(base_port, base_port + port_number))
self.port_map: Dict[int, int] = {}
def get_port(self) -> int:
for port in self.iterator:
if can_bind("localhost", port):
return port
raise RuntimeError(
"port range configured for test is exhausted, consider enlarging the range"
)
def replace_with_new_port(self, value: Union[int, str]) -> Union[int, str]:
"""
Returns a new port for a port number in a string (like "localhost:1234") or int.
Replacements are memorised, so a substitution for the same port is always the same.
"""
# TODO: replace with structural pattern matching for Python >= 3.10
if isinstance(value, int):
return self._replace_port_int(value)
if isinstance(value, str):
return self._replace_port_str(value)
raise TypeError(f"unsupported type {type(value)} of {value=}")
def _replace_port_int(self, value: int) -> int:
known_port = self.port_map.get(value)
if known_port is None:
known_port = self.port_map[value] = self.get_port()
return known_port
def _replace_port_str(self, value: str) -> str:
# Use regex to find port in a string
# urllib.parse.urlparse produces inconvenient results for cases without scheme like "localhost:5432"
# See https://bugs.python.org/issue27657
ports = re.findall(r":(\d+)(?:/|$)", value)
assert len(ports) == 1, f"can't find port in {value}"
port_int = int(ports[0])
return value.replace(f":{port_int}", f":{self._replace_port_int(port_int)}")

View File

@@ -1,143 +0,0 @@
import enum
import json
import os
import subprocess
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Union
from fixtures.log_helper import log
class MockS3Server:
"""
Starts a mock S3 server for testing on a port given, errors if the server fails to start or exits prematurely.
Relies that `poetry` and `moto` server are installed, since it's the way the tests are run.
Also provides a set of methods to derive the connection properties from and the method to kill the underlying server.
"""
def __init__(
self,
port: int,
):
self.port = port
# XXX: do not use `shell=True` or add `exec ` to the command here otherwise.
# We use `self.subprocess.kill()` to shut down the server, which would not "just" work in Linux
# if a process is started from the shell process.
self.subprocess = subprocess.Popen(["poetry", "run", "moto_server", "s3", f"-p{port}"])
error = None
try:
return_code = self.subprocess.poll()
if return_code is not None:
error = f"expected mock s3 server to run but it exited with code {return_code}. stdout: '{self.subprocess.stdout}', stderr: '{self.subprocess.stderr}'"
except Exception as e:
error = f"expected mock s3 server to start but it failed with exception: {e}. stdout: '{self.subprocess.stdout}', stderr: '{self.subprocess.stderr}'"
if error is not None:
log.error(error)
self.kill()
raise RuntimeError("failed to start s3 mock server")
def endpoint(self) -> str:
return f"http://127.0.0.1:{self.port}"
def region(self) -> str:
return "us-east-1"
def access_key(self) -> str:
return "test"
def secret_key(self) -> str:
return "test"
def kill(self):
self.subprocess.kill()
@enum.unique
class RemoteStorageKind(str, enum.Enum):
LOCAL_FS = "local_fs"
MOCK_S3 = "mock_s3"
REAL_S3 = "real_s3"
# Pass to tests that are generic to remote storage
# to ensure the test pass with or without the remote storage
NOOP = "noop"
def available_remote_storages() -> List[RemoteStorageKind]:
remote_storages = [RemoteStorageKind.LOCAL_FS, RemoteStorageKind.MOCK_S3]
if os.getenv("ENABLE_REAL_S3_REMOTE_STORAGE") is not None:
remote_storages.append(RemoteStorageKind.REAL_S3)
log.info("Enabling real s3 storage for tests")
else:
log.info("Using mock implementations to test remote storage")
return remote_storages
def available_s3_storages() -> List[RemoteStorageKind]:
remote_storages = [RemoteStorageKind.MOCK_S3]
if os.getenv("ENABLE_REAL_S3_REMOTE_STORAGE") is not None:
remote_storages.append(RemoteStorageKind.REAL_S3)
log.info("Enabling real s3 storage for tests")
else:
log.info("Using mock implementations to test remote storage")
return remote_storages
@dataclass
class LocalFsStorage:
root: Path
@dataclass
class S3Storage:
bucket_name: str
bucket_region: str
access_key: str
secret_key: str
endpoint: Optional[str] = None
prefix_in_bucket: Optional[str] = ""
def access_env_vars(self) -> Dict[str, str]:
return {
"AWS_ACCESS_KEY_ID": self.access_key,
"AWS_SECRET_ACCESS_KEY": self.secret_key,
}
def to_string(self) -> str:
return json.dumps(
{
"bucket": self.bucket_name,
"region": self.bucket_region,
"endpoint": self.endpoint,
"prefix": self.prefix_in_bucket,
}
)
RemoteStorage = Union[LocalFsStorage, S3Storage]
# serialize as toml inline table
def remote_storage_to_toml_inline_table(remote_storage: RemoteStorage) -> str:
if isinstance(remote_storage, LocalFsStorage):
remote_storage_config = f"local_path='{remote_storage.root}'"
elif isinstance(remote_storage, S3Storage):
remote_storage_config = f"bucket_name='{remote_storage.bucket_name}',\
bucket_region='{remote_storage.bucket_region}'"
if remote_storage.prefix_in_bucket is not None:
remote_storage_config += f",prefix_in_bucket='{remote_storage.prefix_in_bucket}'"
if remote_storage.endpoint is not None:
remote_storage_config += f",endpoint='{remote_storage.endpoint}'"
else:
raise Exception("invalid remote storage type")
return f"{{{remote_storage_config}}}"
class RemoteStorageUsers(enum.Flag):
PAGESERVER = enum.auto()
SAFEKEEPER = enum.auto()

View File

@@ -3,11 +3,12 @@ from typing import Generator, Optional
import pytest
from fixtures.neon_fixtures import (
LocalFsStorage,
NeonEnv,
NeonEnvBuilder,
RemoteStorageKind,
)
from fixtures.pageserver.http import PageserverApiException, TenantConfig
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
from fixtures.types import TenantId
from fixtures.utils import wait_until

View File

@@ -13,6 +13,7 @@ from fixtures.neon_fixtures import (
NeonCli,
NeonEnvBuilder,
PgBin,
PortDistributor,
)
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pageserver.utils import (
@@ -21,7 +22,6 @@ from fixtures.pageserver.utils import (
wait_for_upload,
)
from fixtures.pg_version import PgVersion
from fixtures.port_distributor import PortDistributor
from fixtures.types import Lsn
from pytest import FixtureRequest
@@ -337,7 +337,6 @@ def check_neon_works(
config.pg_version = pg_version
config.initial_tenant = snapshot_config["default_tenant_id"]
config.pg_distrib_dir = pg_distrib_dir
config.remote_storage = None
# Use the "target" binaries to launch the storage nodes
config_target = config

View File

@@ -7,14 +7,15 @@ import pytest
import toml
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
LocalFsStorage,
NeonEnv,
NeonEnvBuilder,
PgBin,
RemoteStorageKind,
wait_for_last_flush_lsn,
)
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pageserver.utils import wait_for_upload_queue_empty
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import wait_until

View File

@@ -8,9 +8,10 @@ import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
RemoteStorageKind,
available_s3_storages,
)
from fixtures.pg_version import PgVersion
from fixtures.remote_storage import RemoteStorageKind, available_s3_storages
# Cleaning up downloaded files is important for local tests

View File

@@ -5,9 +5,9 @@ from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
PgBin,
PortDistributor,
VanillaPostgres,
)
from fixtures.port_distributor import PortDistributor
from fixtures.types import Lsn, TimelineId
from fixtures.utils import query_scalar, subprocess_capture

View File

@@ -8,9 +8,9 @@ from fixtures.neon_fixtures import (
Endpoint,
NeonEnv,
NeonEnvBuilder,
RemoteStorageKind,
wait_for_last_flush_lsn,
)
from fixtures.remote_storage import RemoteStorageKind
from fixtures.types import TenantId, TimelineId
from fixtures.utils import query_scalar

View File

@@ -4,10 +4,10 @@ import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
RemoteStorageKind,
wait_for_last_flush_lsn,
)
from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
from fixtures.remote_storage import RemoteStorageKind
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import query_scalar

View File

@@ -13,11 +13,11 @@ from fixtures.neon_fixtures import (
PSQL,
NeonEnvBuilder,
NeonProxy,
PortDistributor,
RemoteStorageKind,
VanillaPostgres,
wait_for_last_flush_lsn,
)
from fixtures.port_distributor import PortDistributor
from fixtures.remote_storage import RemoteStorageKind
from fixtures.types import TenantId, TimelineId
from fixtures.utils import query_scalar
from pytest_httpserver import HTTPServer

View File

@@ -1,5 +1,4 @@
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.port_distributor import PortDistributor
from fixtures.neon_fixtures import NeonEnvBuilder, PortDistributor
# Test that neon cli is able to start and stop all processes with the user defaults.

View File

@@ -10,6 +10,8 @@ import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
RemoteStorageKind,
available_remote_storages,
last_flush_lsn_upload,
wait_for_last_flush_lsn,
)
@@ -21,7 +23,6 @@ from fixtures.pageserver.utils import (
wait_for_upload_queue_empty,
wait_until_tenant_state,
)
from fixtures.remote_storage import RemoteStorageKind, available_remote_storages
from fixtures.types import Lsn
from fixtures.utils import query_scalar, wait_until

View File

@@ -12,7 +12,10 @@ from typing import Dict, List, Optional, Tuple
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
LocalFsStorage,
NeonEnvBuilder,
RemoteStorageKind,
available_remote_storages,
wait_for_last_flush_lsn,
)
from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
@@ -23,11 +26,6 @@ from fixtures.pageserver.utils import (
wait_until_tenant_active,
wait_until_tenant_state,
)
from fixtures.remote_storage import (
LocalFsStorage,
RemoteStorageKind,
available_remote_storages,
)
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import print_gc_result, query_scalar, wait_until
from requests import ReadTimeout

View File

@@ -6,8 +6,7 @@ from typing import Optional, Type
import backoff
from fixtures.log_helper import log
from fixtures.neon_fixtures import PgProtocol, VanillaPostgres
from fixtures.port_distributor import PortDistributor
from fixtures.neon_fixtures import PgProtocol, PortDistributor, VanillaPostgres
def generate_tls_cert(cn, certout, keyout):

View File

@@ -4,10 +4,11 @@ from contextlib import closing
import psycopg2.extras
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
LocalFsStorage,
NeonEnvBuilder,
RemoteStorageKind,
)
from fixtures.pageserver.utils import assert_tenant_state, wait_for_upload
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
from fixtures.types import Lsn
from fixtures.utils import wait_until

View File

@@ -11,6 +11,8 @@ from fixtures.neon_fixtures import (
Endpoint,
NeonEnv,
NeonEnvBuilder,
RemoteStorageKind,
available_remote_storages,
)
from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
from fixtures.pageserver.utils import (
@@ -18,7 +20,6 @@ from fixtures.pageserver.utils import (
wait_for_upload,
wait_until_tenant_state,
)
from fixtures.remote_storage import RemoteStorageKind, available_remote_storages
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import query_scalar, wait_until
from prometheus_client.samples import Sample

View File

@@ -7,12 +7,15 @@ from pathlib import Path
from typing import Any, Dict, Optional, Tuple
import pytest
from fixtures.broker import NeonBroker
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
NeonBroker,
NeonEnv,
NeonEnvBuilder,
PortDistributor,
RemoteStorageKind,
available_remote_storages,
)
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pageserver.utils import (
@@ -21,8 +24,6 @@ from fixtures.pageserver.utils import (
wait_for_last_record_lsn,
wait_for_upload,
)
from fixtures.port_distributor import PortDistributor
from fixtures.remote_storage import RemoteStorageKind, available_remote_storages
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import (
query_scalar,

View File

@@ -18,9 +18,10 @@ from fixtures.metrics import (
from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
RemoteStorageKind,
available_remote_storages,
)
from fixtures.pageserver.utils import timeline_delete_wait_completed
from fixtures.remote_storage import RemoteStorageKind, available_remote_storages
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import wait_until
from prometheus_client.samples import Sample

View File

@@ -16,8 +16,11 @@ import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
LocalFsStorage,
NeonEnv,
NeonEnvBuilder,
RemoteStorageKind,
available_remote_storages,
last_flush_lsn_upload,
)
from fixtures.pageserver.utils import (
@@ -25,11 +28,6 @@ from fixtures.pageserver.utils import (
wait_for_last_record_lsn,
wait_for_upload,
)
from fixtures.remote_storage import (
LocalFsStorage,
RemoteStorageKind,
available_remote_storages,
)
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import query_scalar, wait_until

View File

@@ -6,10 +6,10 @@ from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
PgBin,
RemoteStorageKind,
last_flush_lsn_upload,
)
from fixtures.pageserver.http import LayerMapInfo
from fixtures.remote_storage import RemoteStorageKind
from fixtures.types import TimelineId
from pytest_httpserver import HTTPServer

View File

@@ -13,6 +13,9 @@ from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
PgBin,
RemoteStorageKind,
S3Storage,
available_remote_storages,
last_flush_lsn_upload,
wait_for_last_flush_lsn,
)
@@ -25,11 +28,6 @@ from fixtures.pageserver.utils import (
wait_until_tenant_active,
wait_until_timeline_state,
)
from fixtures.remote_storage import (
RemoteStorageKind,
S3Storage,
available_remote_storages,
)
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import query_scalar, wait_until

View File

@@ -16,6 +16,8 @@ from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
PgBin,
PortDistributor,
RemoteStorageKind,
VanillaPostgres,
wait_for_last_flush_lsn,
)
@@ -27,8 +29,6 @@ from fixtures.pageserver.utils import (
wait_until_tenant_active,
)
from fixtures.pg_version import PgVersion
from fixtures.port_distributor import PortDistributor
from fixtures.remote_storage import RemoteStorageKind
from fixtures.types import TenantId, TimelineId
from fixtures.utils import get_timeline_dir_size, wait_until

View File

@@ -15,18 +15,22 @@ from typing import Any, List, Optional
import psycopg2
import pytest
from fixtures.broker import NeonBroker
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
NeonBroker,
NeonEnv,
NeonEnvBuilder,
NeonPageserver,
PgBin,
PgProtocol,
PortDistributor,
RemoteStorageKind,
RemoteStorageUsers,
Safekeeper,
SafekeeperHttpClient,
SafekeeperPort,
available_remote_storages,
)
from fixtures.pageserver.utils import (
timeline_delete_wait_completed,
@@ -34,12 +38,6 @@ from fixtures.pageserver.utils import (
wait_for_upload,
)
from fixtures.pg_version import PgVersion
from fixtures.port_distributor import PortDistributor
from fixtures.remote_storage import (
RemoteStorageKind,
RemoteStorageUsers,
available_remote_storages,
)
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import get_dir_size, query_scalar, start_in_background

View File

@@ -5,9 +5,9 @@ import pytest
from fixtures.neon_fixtures import (
NeonEnvBuilder,
PgBin,
PortDistributor,
VanillaPostgres,
)
from fixtures.port_distributor import PortDistributor
from fixtures.types import TenantId, TimelineId