Refactor keyspace code

Have separate classes for the KeySpace, a partitioning of the KeySpace
(KeyPartitioning), and a builder object used to construct the KeySpace.
Previously, KeyPartitioning did all those things, and it was a bit
confusing.
This commit is contained in:
Heikki Linnakangas
2022-03-11 16:24:13 +02:00
parent d5b8380dae
commit ee40297758
3 changed files with 103 additions and 60 deletions

View File

@@ -1,30 +1,101 @@
use crate::repository::{key_range_size, singleton_range, Key};
use postgres_ffi::pg_constants;
use std::ops::Range;
use crate::repository::{key_range_size, singleton_range, Key};
use postgres_ffi::pg_constants;
// Target file size, when creating iage and delta layers
// Target file size, when creating image and delta layers
pub const TARGET_FILE_SIZE_BYTES: u64 = 128 * 1024 * 1024; // 128 MB
///
/// Represents a set of Keys, in a compact form.
///
#[derive(Debug, Clone)]
pub struct KeyPartitioning {
accum: Option<Range<Key>>,
pub struct KeySpace {
// Contiguous ranges of keys that belong to the key space. In key order, and
// with no overlap.
ranges: Vec<Range<Key>>,
}
impl KeySpace {
///
/// Partition a key space into roughly chunks of roughly 'target_size' bytes in
/// each patition.
///
pub fn partition(&self, target_size: u64) -> KeyPartitioning {
// Assume that each value is 8k in size.
let target_nblocks = (target_size / pg_constants::BLCKSZ as u64) as usize;
let mut partitions = Vec::new();
let mut current_part = Vec::new();
let mut current_part_size: usize = 0;
for range in &self.ranges {
// If appending the next contiguous range in the keyspace to the current
// partition would cause it to be too large, start a new partition.
let this_size = key_range_size(range) as usize;
if current_part_size + this_size > target_nblocks && !current_part.is_empty() {
partitions.push(current_part);
current_part = Vec::new();
current_part_size = 0;
}
// If the next range is larger than 'target_size', split it into
// 'target_size' chunks.
let mut remain_size = this_size;
let mut start = range.start;
while remain_size > target_nblocks {
let next = start.add(target_nblocks as u32);
partitions.push(vec![start..next]);
start = next;
remain_size -= target_nblocks
}
current_part.push(start..range.end);
current_part_size += remain_size;
}
// add last partition that wasn't full yet.
if !current_part.is_empty() {
partitions.push(current_part);
}
KeyPartitioning { partitions }
}
}
///
/// Represents a partitioning of the key space.
///
/// The only kind of partitioning we do is to partition the key space into
/// partitions that are roughly equal in physical size (see KeySpace::partition).
/// But this data structure could represent any partitioning.
///
#[derive(Clone, Debug, Default)]
pub struct KeyPartitioning {
pub partitions: Vec<Vec<Range<Key>>>,
}
impl KeyPartitioning {
pub fn new() -> Self {
KeyPartitioning {
partitions: Vec::new(),
}
}
}
///
/// A helper object, to collect a set of keys and key ranges into a KeySpace
/// object. This takes care of merging adjacent keys and key ranges into
/// contiguous ranges.
///
#[derive(Clone, Debug, Default)]
pub struct KeySpaceAccum {
accum: Option<Range<Key>>,
ranges: Vec<Range<Key>>,
}
impl KeySpaceAccum {
pub fn new() -> Self {
Self {
accum: None,
ranges: Vec::new(),
partitions: Vec::new(),
}
}
@@ -47,44 +118,12 @@ impl KeyPartitioning {
}
}
pub fn repartition(&mut self, target_size: u64) {
let target_nblocks = (target_size / pg_constants::BLCKSZ as u64) as usize;
pub fn to_keyspace(mut self) -> KeySpace {
if let Some(accum) = self.accum.take() {
self.ranges.push(accum);
}
self.partitions = Vec::new();
let mut current_part = Vec::new();
let mut current_part_size: usize = 0;
for range in &self.ranges {
let this_size = key_range_size(range) as usize;
if current_part_size + this_size > target_nblocks && !current_part.is_empty() {
self.partitions.push(current_part);
current_part = Vec::new();
current_part_size = 0;
}
let mut remain_size = this_size;
let mut start = range.start;
while remain_size > target_nblocks {
let next = start.add(target_nblocks as u32);
self.partitions.push(vec![start..next]);
start = next;
remain_size -= target_nblocks
}
current_part.push(start..range.end);
current_part_size += remain_size;
}
if !current_part.is_empty() {
self.partitions.push(current_part);
KeySpace {
ranges: self.ranges,
}
}
}
impl Default for KeyPartitioning {
fn default() -> Self {
Self::new()
}
}

View File

@@ -1910,6 +1910,7 @@ fn rename_to_backup(path: PathBuf) -> anyhow::Result<()> {
#[cfg(test)]
mod tests {
use super::*;
use crate::keyspace::KeySpaceAccum;
use crate::repository::repo_harness::*;
use rand::thread_rng;
use rand::Rng;
@@ -2009,7 +2010,7 @@ mod tests {
let mut lsn = Lsn(0x10);
let mut parts = KeyPartitioning::new();
let mut keyspace = KeySpaceAccum::new();
let mut test_key = Key::from_hex("012222222233333333444444445500000000").unwrap();
let mut blknum = 0;
@@ -2025,14 +2026,17 @@ mod tests {
writer.advance_last_record_lsn(lsn);
drop(writer);
parts.add_key(test_key);
keyspace.add_key(test_key);
lsn = Lsn(lsn.0 + 0x10);
blknum += 1;
}
let cutoff = tline.get_last_record_lsn();
parts.repartition(TEST_FILE_SIZE as u64);
let parts = keyspace
.clone()
.to_keyspace()
.partition(TEST_FILE_SIZE as u64);
tline.hint_partitioning(parts.clone(), lsn)?;
tline.update_gc_info(Vec::new(), cutoff);
@@ -2053,7 +2057,7 @@ mod tests {
let mut test_key = Key::from_hex("012222222233333333444444445500000000").unwrap();
let mut parts = KeyPartitioning::new();
let mut keyspace = KeySpaceAccum::new();
// Track when each page was last modified. Used to assert that
// a read sees the latest page version.
@@ -2074,10 +2078,10 @@ mod tests {
updated[blknum] = lsn;
drop(writer);
parts.add_key(test_key);
keyspace.add_key(test_key);
}
parts.repartition(TEST_FILE_SIZE as u64);
let parts = keyspace.to_keyspace().partition(TEST_FILE_SIZE as u64);
tline.hint_partitioning(parts, lsn)?;
for _ in 0..50 {
@@ -2127,7 +2131,7 @@ mod tests {
let mut test_key = Key::from_hex("012222222233333333444444445500000000").unwrap();
let mut parts = KeyPartitioning::new();
let mut keyspace = KeySpaceAccum::new();
// Track when each page was last modified. Used to assert that
// a read sees the latest page version.
@@ -2148,10 +2152,10 @@ mod tests {
updated[blknum] = lsn;
drop(writer);
parts.add_key(test_key);
keyspace.add_key(test_key);
}
parts.repartition(TEST_FILE_SIZE as u64);
let parts = keyspace.to_keyspace().partition(TEST_FILE_SIZE as u64);
tline.hint_partitioning(parts, lsn)?;
let mut tline_id = TIMELINE_ID;

View File

@@ -7,7 +7,7 @@
//! Clarify that)
//!
use crate::keyspace::{KeyPartitioning, TARGET_FILE_SIZE_BYTES};
use crate::keyspace::{KeySpace, KeySpaceAccum, TARGET_FILE_SIZE_BYTES};
use crate::relish::*;
use crate::repository::*;
use crate::repository::{Repository, Timeline};
@@ -336,9 +336,9 @@ impl<R: Repository> DatadirTimeline<R> {
Ok(total_size * pg_constants::BLCKSZ as usize)
}
fn collect_keyspace(&self, lsn: Lsn) -> Result<KeyPartitioning> {
fn collect_keyspace(&self, lsn: Lsn) -> Result<KeySpace> {
// Iterate through key ranges, greedily packing them into partitions
let mut result = KeyPartitioning::new();
let mut result = KeySpaceAccum::new();
// Add dbdir
result.add_key(DBDIR_KEY);
@@ -404,7 +404,7 @@ impl<R: Repository> DatadirTimeline<R> {
result.add_key(CONTROLFILE_KEY);
result.add_key(CHECKPOINT_KEY);
Ok(result)
Ok(result.to_keyspace())
}
}
@@ -801,8 +801,8 @@ impl<'a, R: Repository> DatadirTimelineWriter<'a, R> {
if last_partitioning == Lsn(0)
|| self.lsn.0 - last_partitioning.0 > TARGET_FILE_SIZE_BYTES / 8
{
let mut partitioning = self.tline.collect_keyspace(self.lsn)?;
partitioning.repartition(TARGET_FILE_SIZE_BYTES);
let keyspace = self.tline.collect_keyspace(self.lsn)?;
let partitioning = keyspace.partition(TARGET_FILE_SIZE_BYTES);
self.tline.tline.hint_partitioning(partitioning, self.lsn)?;
self.tline.last_partitioning.store(self.lsn);
}