mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-03 10:40:37 +00:00
Compare commits
62 Commits
release-co
...
gc_feedbac
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9ea17556b3 | ||
|
|
a5db639a2f | ||
|
|
64aa8e5c6b | ||
|
|
6a1f2c8b71 | ||
|
|
a10ba532dd | ||
|
|
f783596825 | ||
|
|
4b2b175db8 | ||
|
|
3902868e68 | ||
|
|
1ffca3eadb | ||
|
|
b499ade206 | ||
|
|
0cd01e33c3 | ||
|
|
95dd5c71bf | ||
|
|
c2731e17a9 | ||
|
|
a7cf926aeb | ||
|
|
ffdf7df2ea | ||
|
|
74ab232afb | ||
|
|
2cf02b381c | ||
|
|
6d687c198b | ||
|
|
755166e275 | ||
|
|
7d4ebf8485 | ||
|
|
9b9b125d13 | ||
|
|
4d76c2916e | ||
|
|
199771371c | ||
|
|
43187715d6 | ||
|
|
1e63fc99db | ||
|
|
7b58f82f7b | ||
|
|
2af45505b8 | ||
|
|
3c5b99b4b9 | ||
|
|
8b05a87f75 | ||
|
|
3bc4a7c1e2 | ||
|
|
3d0a51567f | ||
|
|
7e6dbc32d1 | ||
|
|
f12f6b0275 | ||
|
|
0fbd85f64b | ||
|
|
af75d59b4c | ||
|
|
4618739cb3 | ||
|
|
f838a11514 | ||
|
|
f0fe03ea80 | ||
|
|
be22be7b24 | ||
|
|
451479305e | ||
|
|
5e690307fb | ||
|
|
3e6288d7d8 | ||
|
|
2d015a1464 | ||
|
|
0785d92577 | ||
|
|
fcb9bac847 | ||
|
|
7a3d6531b8 | ||
|
|
3275305a30 | ||
|
|
0deca452bf | ||
|
|
98de2a6d93 | ||
|
|
88257b91d7 | ||
|
|
e8066631a6 | ||
|
|
e069c409ef | ||
|
|
7f81d57d52 | ||
|
|
787c4a8bbb | ||
|
|
6ec9922184 | ||
|
|
9b241f29cd | ||
|
|
9b418a71ac | ||
|
|
1bb8ca0806 | ||
|
|
a1c8e74fb9 | ||
|
|
f9999c84d9 | ||
|
|
c01c31d045 | ||
|
|
4da24ba34f |
@@ -5,7 +5,7 @@ use std::ops::Range;
|
||||
///
|
||||
/// Represents a set of Keys, in a compact form.
|
||||
///
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct KeySpace {
|
||||
/// Contiguous ranges of keys that belong to the key space. In key order,
|
||||
/// and with no overlap.
|
||||
@@ -61,6 +61,58 @@ impl KeySpace {
|
||||
|
||||
KeyPartitioning { parts }
|
||||
}
|
||||
|
||||
///
|
||||
/// Calculate logical size of delta layers: total size of all blocks covered by it's key range
|
||||
///
|
||||
pub fn get_logical_size(&self, range: &Range<Key>) -> u64 {
|
||||
let mut start_key = range.start;
|
||||
let n_ranges = self.ranges.len();
|
||||
let start_index = match self.ranges.binary_search_by_key(&start_key, |r| r.start) {
|
||||
Ok(index) => index, // keyspace range starts with start_key
|
||||
Err(index) => {
|
||||
if index != 0 && self.ranges[index - 1].end > start_key {
|
||||
index - 1 // previous keyspace range overlaps with specified
|
||||
} else if index == n_ranges {
|
||||
return 0; // no intersection with specified range
|
||||
} else {
|
||||
start_key = self.ranges[index].start;
|
||||
index
|
||||
}
|
||||
}
|
||||
};
|
||||
let mut size = 0u64;
|
||||
for i in start_index..n_ranges {
|
||||
if self.ranges[i].start >= range.end {
|
||||
break;
|
||||
}
|
||||
let end_key = if self.ranges[i].end < range.end {
|
||||
self.ranges[i].end
|
||||
} else {
|
||||
range.end
|
||||
};
|
||||
let n_blocks = key_range_size(&(start_key..end_key));
|
||||
if n_blocks != u32::MAX {
|
||||
size += n_blocks as u64 * BLCKSZ as u64;
|
||||
}
|
||||
if i + 1 < n_ranges {
|
||||
start_key = self.ranges[i + 1].start;
|
||||
}
|
||||
}
|
||||
size
|
||||
}
|
||||
|
||||
///
|
||||
/// Check if key space contains overlapping range
|
||||
///
|
||||
pub fn overlaps(&self, range: &Range<Key>) -> bool {
|
||||
match self.ranges.binary_search_by_key(&range.end, |r| r.start) {
|
||||
Ok(0) => false,
|
||||
Err(0) => false,
|
||||
Ok(index) => self.ranges[index - 1].end > range.start,
|
||||
Err(index) => self.ranges[index - 1].end > range.start,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
@@ -129,3 +181,226 @@ impl KeySpaceAccum {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// A helper object, to collect a set of keys and key ranges into a KeySpace
|
||||
/// object. Key ranges may be inserted in any order and can overlap.
|
||||
///
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct KeySpaceRandomAccum {
|
||||
ranges: Vec<Range<Key>>,
|
||||
}
|
||||
|
||||
impl KeySpaceRandomAccum {
|
||||
pub fn new() -> Self {
|
||||
Self { ranges: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn add_key(&mut self, key: Key) {
|
||||
self.add_range(singleton_range(key))
|
||||
}
|
||||
|
||||
pub fn add_range(&mut self, range: Range<Key>) {
|
||||
self.ranges.push(range);
|
||||
}
|
||||
|
||||
pub fn to_keyspace(mut self) -> KeySpace {
|
||||
let mut ranges = Vec::new();
|
||||
if !self.ranges.is_empty() {
|
||||
self.ranges.sort_by_key(|r| r.start);
|
||||
let mut start = self.ranges.first().unwrap().start;
|
||||
let mut end = self.ranges.first().unwrap().end;
|
||||
for r in self.ranges {
|
||||
assert!(r.start >= start);
|
||||
if r.start > end {
|
||||
ranges.push(start..end);
|
||||
start = r.start;
|
||||
end = r.end;
|
||||
} else if r.end > end {
|
||||
end = r.end;
|
||||
}
|
||||
}
|
||||
ranges.push(start..end);
|
||||
}
|
||||
KeySpace { ranges }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fmt::Write;
|
||||
|
||||
// Helper function to create a key range.
|
||||
//
|
||||
// Make the tests below less verbose.
|
||||
fn kr(irange: Range<i128>) -> Range<Key> {
|
||||
Key::from_i128(irange.start)..Key::from_i128(irange.end)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn dump_keyspace(ks: &KeySpace) {
|
||||
for r in ks.ranges.iter() {
|
||||
println!(" {}..{}", r.start.to_i128(), r.end.to_i128());
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_ks_eq(actual: &KeySpace, expected: Vec<Range<Key>>) {
|
||||
if actual.ranges != expected {
|
||||
let mut msg = String::new();
|
||||
|
||||
writeln!(msg, "expected:").unwrap();
|
||||
for r in &expected {
|
||||
writeln!(msg, " {}..{}", r.start.to_i128(), r.end.to_i128()).unwrap();
|
||||
}
|
||||
writeln!(msg, "got:").unwrap();
|
||||
for r in &actual.ranges {
|
||||
writeln!(msg, " {}..{}", r.start.to_i128(), r.end.to_i128()).unwrap();
|
||||
}
|
||||
panic!("{}", msg);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keyspace_add_range() {
|
||||
// two separate ranges
|
||||
//
|
||||
// #####
|
||||
// #####
|
||||
let mut ks = KeySpaceRandomAccum::default();
|
||||
ks.add_range(kr(0..10));
|
||||
ks.add_range(kr(20..30));
|
||||
assert_ks_eq(&ks.to_keyspace(), vec![kr(0..10), kr(20..30)]);
|
||||
|
||||
// two separate ranges, added in reverse order
|
||||
//
|
||||
// #####
|
||||
// #####
|
||||
let mut ks = KeySpaceRandomAccum::default();
|
||||
ks.add_range(kr(20..30));
|
||||
ks.add_range(kr(0..10));
|
||||
|
||||
// add range that is adjacent to the end of an existing range
|
||||
//
|
||||
// #####
|
||||
// #####
|
||||
ks.add_range(kr(0..10));
|
||||
ks.add_range(kr(10..30));
|
||||
assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
|
||||
|
||||
// add range that is adjacent to the start of an existing range
|
||||
//
|
||||
// #####
|
||||
// #####
|
||||
let mut ks = KeySpaceRandomAccum::default();
|
||||
ks.add_range(kr(10..30));
|
||||
ks.add_range(kr(0..10));
|
||||
assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
|
||||
|
||||
// add range that overlaps with the end of an existing range
|
||||
//
|
||||
// #####
|
||||
// #####
|
||||
let mut ks = KeySpaceRandomAccum::default();
|
||||
ks.add_range(kr(0..10));
|
||||
ks.add_range(kr(5..30));
|
||||
assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
|
||||
|
||||
// add range that overlaps with the start of an existing range
|
||||
//
|
||||
// #####
|
||||
// #####
|
||||
let mut ks = KeySpaceRandomAccum::default();
|
||||
ks.add_range(kr(5..30));
|
||||
ks.add_range(kr(0..10));
|
||||
assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
|
||||
|
||||
// add range that is fully covered by an existing range
|
||||
//
|
||||
// #########
|
||||
// #####
|
||||
let mut ks = KeySpaceRandomAccum::default();
|
||||
ks.add_range(kr(0..30));
|
||||
ks.add_range(kr(10..20));
|
||||
assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
|
||||
|
||||
// add range that extends an existing range from both ends
|
||||
//
|
||||
// #####
|
||||
// #########
|
||||
let mut ks = KeySpaceRandomAccum::default();
|
||||
ks.add_range(kr(10..20));
|
||||
ks.add_range(kr(0..30));
|
||||
assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
|
||||
|
||||
// add a range that overlaps with two existing ranges, joining them
|
||||
//
|
||||
// ##### #####
|
||||
// #######
|
||||
let mut ks = KeySpaceRandomAccum::default();
|
||||
ks.add_range(kr(0..10));
|
||||
ks.add_range(kr(20..30));
|
||||
ks.add_range(kr(5..25));
|
||||
assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keyspace_overlaps() {
|
||||
let mut ks = KeySpaceRandomAccum::default();
|
||||
ks.add_range(kr(10..20));
|
||||
ks.add_range(kr(30..40));
|
||||
let ks = ks.to_keyspace();
|
||||
|
||||
// ##### #####
|
||||
// xxxx
|
||||
assert!(!ks.overlaps(&kr(0..5)));
|
||||
|
||||
// ##### #####
|
||||
// xxxx
|
||||
assert!(!ks.overlaps(&kr(5..9)));
|
||||
|
||||
// ##### #####
|
||||
// xxxx
|
||||
assert!(!ks.overlaps(&kr(5..10)));
|
||||
|
||||
// ##### #####
|
||||
// xxxx
|
||||
assert!(ks.overlaps(&kr(5..11)));
|
||||
|
||||
// ##### #####
|
||||
// xxxx
|
||||
assert!(ks.overlaps(&kr(10..15)));
|
||||
|
||||
// ##### #####
|
||||
// xxxx
|
||||
assert!(ks.overlaps(&kr(15..20)));
|
||||
|
||||
// ##### #####
|
||||
// xxxx
|
||||
assert!(ks.overlaps(&kr(15..25)));
|
||||
|
||||
// ##### #####
|
||||
// xxxx
|
||||
assert!(!ks.overlaps(&kr(22..28)));
|
||||
|
||||
// ##### #####
|
||||
// xxxx
|
||||
assert!(!ks.overlaps(&kr(25..30)));
|
||||
|
||||
// ##### #####
|
||||
// xxxx
|
||||
assert!(ks.overlaps(&kr(35..35)));
|
||||
|
||||
// ##### #####
|
||||
// xxxx
|
||||
assert!(!ks.overlaps(&kr(40..45)));
|
||||
|
||||
// ##### #####
|
||||
// xxxx
|
||||
assert!(!ks.overlaps(&kr(45..50)));
|
||||
|
||||
// ##### #####
|
||||
// xxxxxxxxxxx
|
||||
assert!(ks.overlaps(&kr(0..30))); // XXXXX This fails currently!
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2195,7 +2195,7 @@ impl Tenant {
|
||||
// made.
|
||||
break;
|
||||
}
|
||||
let result = timeline.gc().await?;
|
||||
let result = timeline.gc(ctx).await?;
|
||||
totals += result;
|
||||
}
|
||||
|
||||
@@ -3754,7 +3754,7 @@ mod tests {
|
||||
.await?;
|
||||
tline.freeze_and_flush().await?;
|
||||
tline.compact(&ctx).await?;
|
||||
tline.gc().await?;
|
||||
tline.gc(&ctx).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -3826,7 +3826,7 @@ mod tests {
|
||||
.await?;
|
||||
tline.freeze_and_flush().await?;
|
||||
tline.compact(&ctx).await?;
|
||||
tline.gc().await?;
|
||||
tline.gc(&ctx).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -3910,7 +3910,7 @@ mod tests {
|
||||
.await?;
|
||||
tline.freeze_and_flush().await?;
|
||||
tline.compact(&ctx).await?;
|
||||
tline.gc().await?;
|
||||
tline.gc(&ctx).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -22,8 +22,7 @@ use tracing::*;
|
||||
use utils::id::TenantTimelineId;
|
||||
|
||||
use std::cmp::{max, min, Ordering};
|
||||
use std::collections::BinaryHeap;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{BinaryHeap, HashMap};
|
||||
use std::fs;
|
||||
use std::ops::{Deref, Range};
|
||||
use std::path::{Path, PathBuf};
|
||||
@@ -48,7 +47,7 @@ use crate::tenant::{
|
||||
};
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::keyspace::{KeyPartitioning, KeySpace};
|
||||
use crate::keyspace::{KeyPartitioning, KeySpace, KeySpaceRandomAccum};
|
||||
use crate::metrics::{TimelineMetrics, UNEXPECTED_ONDEMAND_DOWNLOADS};
|
||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||
use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
|
||||
@@ -123,6 +122,17 @@ pub struct Timeline {
|
||||
|
||||
pub(super) layers: RwLock<LayerMap<dyn PersistentLayer>>,
|
||||
|
||||
/// Set of key ranges which should be covered by image layers to
|
||||
/// allow GC to remove old layers. This set is created by GC and its cutoff LSN is also stored.
|
||||
/// It is used by compaction task when it checks if new image layer should be created.
|
||||
/// Newly created image layer doesn't help to remove the delta layer, until the
|
||||
/// newly created image layer falls off the PITR horizon. So on next GC cycle,
|
||||
/// gc_timeline may still want the new image layer to be created. To avoid redundant
|
||||
/// image layers creation we should check if image layer exists but beyond PITR horizon.
|
||||
/// This is why we need remember GC cutoff LSN.
|
||||
///
|
||||
wanted_image_layers: Mutex<Option<(Lsn, KeySpace)>>,
|
||||
|
||||
last_freeze_at: AtomicLsn,
|
||||
// Atomic would be more appropriate here.
|
||||
last_freeze_ts: RwLock<Instant>,
|
||||
@@ -1354,6 +1364,7 @@ impl Timeline {
|
||||
tenant_id,
|
||||
pg_version,
|
||||
layers: RwLock::new(LayerMap::default()),
|
||||
wanted_image_layers: Mutex::new(None),
|
||||
|
||||
walredo_mgr,
|
||||
walreceiver,
|
||||
@@ -2904,6 +2915,30 @@ impl Timeline {
|
||||
let layers = self.layers.read().unwrap();
|
||||
|
||||
let mut max_deltas = 0;
|
||||
{
|
||||
let wanted_image_layers = self.wanted_image_layers.lock().unwrap();
|
||||
if let Some((cutoff_lsn, wanted)) = &*wanted_image_layers {
|
||||
let img_range =
|
||||
partition.ranges.first().unwrap().start..partition.ranges.last().unwrap().end;
|
||||
if wanted.overlaps(&img_range) {
|
||||
//
|
||||
// gc_timeline only pays attention to image layers that are older than the GC cutoff,
|
||||
// but create_image_layers creates image layers at last-record-lsn.
|
||||
// So it's possible that gc_timeline wants a new image layer to be created for a key range,
|
||||
// but the range is already covered by image layers at more recent LSNs. Before we
|
||||
// create a new image layer, check if the range is already covered at more recent LSNs.
|
||||
if !layers
|
||||
.image_layer_exists(&img_range, &(Lsn::min(lsn, *cutoff_lsn)..lsn + 1))?
|
||||
{
|
||||
debug!(
|
||||
"Force generation of layer {}-{} wanted by GC, cutoff={}, lsn={})",
|
||||
img_range.start, img_range.end, cutoff_lsn, lsn
|
||||
);
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for part_range in &partition.ranges {
|
||||
let image_coverage = layers.image_coverage(part_range, lsn)?;
|
||||
@@ -3023,6 +3058,12 @@ impl Timeline {
|
||||
image_layers.push(image_layer);
|
||||
}
|
||||
}
|
||||
// All layers that the GC wanted us to create have now been created.
|
||||
//
|
||||
// It's possible that another GC cycle happened while we were compacting, and added
|
||||
// something new to wanted_image_layers, and we now clear that before processing it.
|
||||
// That's OK, because the next GC iteration will put it back in.
|
||||
*self.wanted_image_layers.lock().unwrap() = None;
|
||||
|
||||
// Sync the new layer to disk before adding it to the layer map, to make sure
|
||||
// we don't garbage collect something based on the new layer, before it has
|
||||
@@ -3626,7 +3667,7 @@ impl Timeline {
|
||||
/// within a layer file. We can only remove the whole file if it's fully
|
||||
/// obsolete.
|
||||
///
|
||||
pub(super) async fn gc(&self) -> anyhow::Result<GcResult> {
|
||||
pub(super) async fn gc(&self, ctx: &RequestContext) -> anyhow::Result<GcResult> {
|
||||
let timer = self.metrics.garbage_collect_histo.start_timer();
|
||||
|
||||
fail_point!("before-timeline-gc");
|
||||
@@ -3656,6 +3697,7 @@ impl Timeline {
|
||||
pitr_cutoff,
|
||||
retain_lsns,
|
||||
new_gc_cutoff,
|
||||
ctx,
|
||||
)
|
||||
.instrument(
|
||||
info_span!("gc_timeline", timeline = %self.timeline_id, cutoff = %new_gc_cutoff),
|
||||
@@ -3675,6 +3717,7 @@ impl Timeline {
|
||||
pitr_cutoff: Lsn,
|
||||
retain_lsns: Vec<Lsn>,
|
||||
new_gc_cutoff: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<GcResult> {
|
||||
let now = SystemTime::now();
|
||||
let mut result: GcResult = GcResult::default();
|
||||
@@ -3720,6 +3763,16 @@ impl Timeline {
|
||||
}
|
||||
|
||||
let mut layers_to_remove = Vec::new();
|
||||
let mut wanted_image_layers = KeySpaceRandomAccum::default();
|
||||
// Do not collect keyspace for Unit tests
|
||||
let gc_keyspace = if ctx.task_kind() == TaskKind::GarbageCollector {
|
||||
Some(
|
||||
self.collect_keyspace(self.get_last_record_lsn(), ctx)
|
||||
.await?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Scan all layers in the timeline (remote or on-disk).
|
||||
//
|
||||
@@ -3803,6 +3856,21 @@ impl Timeline {
|
||||
"keeping {} because it is the latest layer",
|
||||
l.filename().file_name()
|
||||
);
|
||||
// Collect delta key ranges that need image layers to allow garbage
|
||||
// collecting the layers.
|
||||
// It is not so obvious whether we need to propagate information only about
|
||||
// delta layers. Image layers can form "stairs" preventing old image from been deleted.
|
||||
// But image layers are in any case less sparse than delta layers. Also we need some
|
||||
// protection from replacing recent image layers with new one after each GC iteration.
|
||||
if l.is_incremental() && !LayerMap::is_l0(&*l) {
|
||||
if let Some(keyspace) = &gc_keyspace {
|
||||
let layer_logical_size = keyspace.get_logical_size(&l.get_key_range());
|
||||
let layer_age = new_gc_cutoff.0 - l.get_lsn_range().start.0;
|
||||
if layer_logical_size <= layer_age {
|
||||
wanted_image_layers.add_range(l.get_key_range());
|
||||
}
|
||||
}
|
||||
}
|
||||
result.layers_not_updated += 1;
|
||||
continue 'outer;
|
||||
}
|
||||
@@ -3815,6 +3883,10 @@ impl Timeline {
|
||||
);
|
||||
layers_to_remove.push(Arc::clone(&l));
|
||||
}
|
||||
self.wanted_image_layers
|
||||
.lock()
|
||||
.unwrap()
|
||||
.replace((new_gc_cutoff, wanted_image_layers.to_keyspace()));
|
||||
|
||||
let mut updates = layers.batch_update();
|
||||
if !layers_to_remove.is_empty() {
|
||||
|
||||
76
test_runner/performance/test_gc_feedback.py
Normal file
76
test_runner/performance/test_gc_feedback.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import pytest
|
||||
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
@pytest.mark.timeout(10000)
|
||||
def test_gc_feedback(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):
|
||||
"""
|
||||
Test that GC is able to collect all old layers even if them are forming
|
||||
"stairs" and there are not three delta layers since last image layer.
|
||||
|
||||
Information about image layers needed to collect old layers should
|
||||
be propagated by GC to compaction task which should take in in account
|
||||
when make a decision which new image layers needs to be created.
|
||||
"""
|
||||
env = neon_env_builder.init_start()
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_id, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
# disable default GC and compaction
|
||||
"gc_period": "1000 m",
|
||||
"compaction_period": "0 s",
|
||||
"gc_horizon": f"{1024 ** 2}",
|
||||
"checkpoint_distance": f"{1024 ** 2}",
|
||||
"compaction_target_size": f"{1024 ** 2}",
|
||||
# set PITR interval to be small, so we can do GC
|
||||
"pitr_interval": "10 s",
|
||||
# "compaction_threshold": "3",
|
||||
# "image_creation_threshold": "2",
|
||||
}
|
||||
)
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
|
||||
n_steps = 10
|
||||
n_update_iters = 100
|
||||
step_size = 10000
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("SET statement_timeout='1000s'")
|
||||
cur.execute(
|
||||
"CREATE TABLE t(step bigint, count bigint default 0, payload text default repeat(' ', 100)) with (fillfactor=50)"
|
||||
)
|
||||
cur.execute("CREATE INDEX ON t(step)")
|
||||
# In each step, we insert 'step_size' new rows, and update the newly inserted rows
|
||||
# 'n_update_iters' times. This creates a lot of churn and generates lots of WAL at the end of the table,
|
||||
# without modifying the earlier parts of the table.
|
||||
for step in range(n_steps):
|
||||
cur.execute(f"INSERT INTO t (step) SELECT {step} FROM generate_series(1, {step_size})")
|
||||
for i in range(n_update_iters):
|
||||
cur.execute(f"UPDATE t set count=count+1 where step = {step}")
|
||||
cur.execute("vacuum t")
|
||||
|
||||
# cur.execute("select pg_table_size('t')")
|
||||
# logical_size = cur.fetchone()[0]
|
||||
logical_size = client.timeline_detail(tenant_id, timeline_id)["current_logical_size"]
|
||||
log.info(f"Logical storage size {logical_size}")
|
||||
|
||||
client.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
# Do compaction and GC
|
||||
client.timeline_gc(tenant_id, timeline_id, 0)
|
||||
client.timeline_compact(tenant_id, timeline_id)
|
||||
# One more iteration to check that no excessive image layers are generated
|
||||
client.timeline_gc(tenant_id, timeline_id, 0)
|
||||
client.timeline_compact(tenant_id, timeline_id)
|
||||
|
||||
physical_size = client.timeline_detail(tenant_id, timeline_id)["current_physical_size"]
|
||||
log.info(f"Physical storage size {physical_size}")
|
||||
|
||||
MB = 1024 * 1024
|
||||
zenbenchmark.record("logical_size", logical_size // MB, "Mb", MetricReport.LOWER_IS_BETTER)
|
||||
zenbenchmark.record("physical_size", physical_size // MB, "Mb", MetricReport.LOWER_IS_BETTER)
|
||||
zenbenchmark.record(
|
||||
"physical/logical ratio", physical_size / logical_size, "", MetricReport.LOWER_IS_BETTER
|
||||
)
|
||||
Reference in New Issue
Block a user