Use R-Tree for layer map

This commit is contained in:
Konstantin Knizhnik
2022-03-28 16:15:28 +03:00
parent 07342f7519
commit cd0fdada82
4 changed files with 136 additions and 73 deletions

View File

@@ -44,6 +44,7 @@ nix = "0.23"
once_cell = "1.8.0"
crossbeam-utils = "0.8.5"
fail = "0.5.0"
rstar = "0.9.2"
rust-s3 = { version = "0.28", default-features = false, features = ["no-verify-ssl", "tokio-rustls-tls"] }
async-compression = {version = "0.3", features = ["zstd", "tokio"]}

View File

@@ -1928,7 +1928,7 @@ impl LayeredTimeline {
l.filename().display(),
l.is_incremental(),
);
layers_to_remove.push(Arc::clone(l));
layers_to_remove.push(Arc::clone(&l));
}
// Actually delete the layers from disk and remove them from the map.

View File

@@ -16,6 +16,7 @@ use crate::layered_repository::InMemoryLayer;
use crate::repository::Key;
use anyhow::Result;
use lazy_static::lazy_static;
use rstar::{RTree, RTreeObject, AABB};
use std::collections::VecDeque;
use std::ops::Range;
use std::sync::Arc;
@@ -51,14 +52,42 @@ pub struct LayerMap {
pub frozen_layers: VecDeque<Arc<InMemoryLayer>>,
/// All the historic layers are kept here
historic_layers: RTree<LayerEnvelope>,
/// TODO: This is a placeholder implementation of a data structure
/// to hold information about all the layer files on disk and in
/// S3. Currently, it's just a vector and all operations perform a
/// linear scan over it. That obviously becomes slow as the
/// number of layers grows. I'm imagining that an R-tree or some
/// other 2D data structure would be the long-term solution here.
historic_layers: Vec<Arc<dyn Layer>>,
/// L0 layers has key range: (Key::MIN..Key::MAX) and locating them using R-Tree search is very inefficient
l0_layers: Vec<Arc<dyn Layer>>,
}
struct LayerEnvelope {
layer: Arc<dyn Layer>,
}
impl PartialEq for LayerEnvelope {
fn eq(&self, other: &Self) -> bool {
// FIXME: ptr_eq might fail to return true for 'dyn'
// references. Clippy complains about this. In practice it
// seems to work, the assertion below would be triggered
// otherwise but this ought to be fixed.
#[allow(clippy::vtable_address_comparisons)]
Arc::ptr_eq(&self.layer, &other.layer)
}
fn ne(&self, other: &Self) -> bool {
!self.eq(other)
}
}
impl Eq for LayerEnvelope {}
impl RTreeObject for LayerEnvelope {
type Envelope = AABB<[i128; 2]>;
fn envelope(&self) -> Self::Envelope {
let key_range = self.layer.get_key_range();
let lsn_range = self.layer.get_lsn_range();
AABB::from_corners(
[key_range.start.to_i128(), lsn_range.start.0 as i128],
[key_range.end.to_i128(), lsn_range.end.0 as i128 - 1], // end is exlusive
)
}
}
/// Return value of LayerMap::search
@@ -84,19 +113,21 @@ impl LayerMap {
// Find the latest image layer that covers the given key
let mut latest_img: Option<Arc<dyn Layer>> = None;
let mut latest_img_lsn: Option<Lsn> = None;
for l in self.historic_layers.iter() {
let envelope = AABB::from_corners(
[key.to_i128(), 0i128],
[key.to_i128(), end_lsn.0 as i128 - 1],
);
for e in self
.historic_layers
.locate_in_envelope_intersecting(&envelope)
{
let l = &e.layer;
if l.is_incremental() {
continue;
}
if !l.get_key_range().contains(&key) {
continue;
}
assert!(l.get_key_range().contains(&key));
let img_lsn = l.get_lsn_range().start;
if img_lsn >= end_lsn {
// too new
continue;
}
assert!(img_lsn < end_lsn);
if Lsn(img_lsn.0 + 1) == end_lsn {
// found exact match
return Ok(Some(SearchResult {
@@ -112,19 +143,16 @@ impl LayerMap {
// Search the delta layers
let mut latest_delta: Option<Arc<dyn Layer>> = None;
for l in self.historic_layers.iter() {
for e in self
.historic_layers
.locate_in_envelope_intersecting(&envelope)
{
let l = &e.layer;
if !l.is_incremental() {
continue;
}
if !l.get_key_range().contains(&key) {
continue;
}
if l.get_lsn_range().start >= end_lsn {
// too new
continue;
}
assert!(l.get_key_range().contains(&key));
assert!(l.get_lsn_range().start < end_lsn);
if l.get_lsn_range().end >= end_lsn {
// this layer contains the requested point in the key/lsn space.
// No need to search any further
@@ -182,7 +210,10 @@ impl LayerMap {
/// Insert an on-disk layer
///
pub fn insert_historic(&mut self, layer: Arc<dyn Layer>) {
self.historic_layers.push(layer);
if layer.get_key_range() == (Key::MIN..Key::MAX) {
self.l0_layers.push(layer.clone());
}
self.historic_layers.insert(LayerEnvelope { layer });
NUM_ONDISK_LAYERS.inc();
}
@@ -193,17 +224,21 @@ impl LayerMap {
///
#[allow(dead_code)]
pub fn remove_historic(&mut self, layer: Arc<dyn Layer>) {
let len_before = self.historic_layers.len();
if layer.get_key_range() == (Key::MIN..Key::MAX) {
let len_before = self.l0_layers.len();
// FIXME: ptr_eq might fail to return true for 'dyn'
// references. Clippy complains about this. In practice it
// seems to work, the assertion below would be triggered
// otherwise but this ought to be fixed.
#[allow(clippy::vtable_address_comparisons)]
self.historic_layers
.retain(|other| !Arc::ptr_eq(other, &layer));
assert_eq!(self.historic_layers.len(), len_before - 1);
// FIXME: ptr_eq might fail to return true for 'dyn'
// references. Clippy complains about this. In practice it
// seems to work, the assertion below would be triggered
// otherwise but this ought to be fixed.
#[allow(clippy::vtable_address_comparisons)]
self.l0_layers.retain(|other| !Arc::ptr_eq(other, &layer));
assert_eq!(self.l0_layers.len(), len_before - 1);
}
assert!(self
.historic_layers
.remove(&LayerEnvelope { layer })
.is_some());
NUM_ONDISK_LAYERS.dec();
}
@@ -224,13 +259,20 @@ impl LayerMap {
loop {
let mut made_progress = false;
for l in self.historic_layers.iter() {
let envelope = AABB::from_corners(
[range_remain.start.to_i128(), lsn.0 as i128],
[range_remain.end.to_i128(), disk_consistent_lsn.0 as i128],
);
for e in self
.historic_layers
.locate_in_envelope_intersecting(&envelope)
{
let l = &e.layer;
if l.is_incremental() {
continue;
}
let img_lsn = l.get_lsn_range().start;
if !l.is_incremental()
&& l.get_key_range().contains(&range_remain.start)
if l.get_key_range().contains(&range_remain.start)
&& img_lsn > lsn
&& img_lsn < disk_consistent_lsn
{
@@ -266,8 +308,8 @@ impl LayerMap {
}
*/
pub fn iter_historic_layers(&self) -> std::slice::Iter<Arc<dyn Layer>> {
self.historic_layers.iter()
pub fn iter_historic_layers(&self) -> Box<dyn Iterator<Item = Arc<dyn Layer>> + '_> {
Box::new(self.historic_layers.iter().map(|e| e.layer.clone()))
}
/// Find the last image layer that covers 'key', ignoring any image layers
@@ -275,19 +317,19 @@ impl LayerMap {
fn find_latest_image(&self, key: Key, lsn: Lsn) -> Option<Arc<dyn Layer>> {
let mut candidate_lsn = Lsn(0);
let mut candidate = None;
for l in self.historic_layers.iter() {
let envelope = AABB::from_corners([key.to_i128(), 0], [key.to_i128(), lsn.0 as i128]);
for e in self
.historic_layers
.locate_in_envelope_intersecting(&envelope)
{
let l = &e.layer;
if l.is_incremental() {
continue;
}
if !l.get_key_range().contains(&key) {
continue;
}
assert!(l.get_key_range().contains(&key));
let this_lsn = l.get_lsn_range().start;
if this_lsn > lsn {
continue;
}
assert!(this_lsn <= lsn);
if this_lsn < candidate_lsn {
// our previous candidate was better
continue;
@@ -315,10 +357,16 @@ impl LayerMap {
let mut points: Vec<Key>;
points = vec![key_range.start];
for l in self.historic_layers.iter() {
if l.get_lsn_range().start > lsn {
continue;
}
let envelope = AABB::from_corners(
[key_range.start.to_i128(), 0],
[key_range.end.to_i128(), lsn.0 as i128],
);
for e in self
.historic_layers
.locate_in_envelope_intersecting(&envelope)
{
let l = &e.layer;
assert!(l.get_lsn_range().start <= lsn);
let range = l.get_key_range();
if key_range.contains(&range.start) {
points.push(l.get_key_range().start);
@@ -351,16 +399,27 @@ impl LayerMap {
/// given key and LSN range.
pub fn count_deltas(&self, key_range: &Range<Key>, lsn_range: &Range<Lsn>) -> Result<usize> {
let mut result = 0;
for l in self.historic_layers.iter() {
let envelope = AABB::from_corners(
[key_range.start.to_i128(), lsn_range.start.0 as i128],
[key_range.end.to_i128(), lsn_range.end.0 as i128 - 1],
);
for e in self
.historic_layers
.locate_in_envelope_intersecting(&envelope)
{
let l = &e.layer;
if !l.is_incremental() {
continue;
}
if !range_overlaps(&l.get_lsn_range(), lsn_range) {
continue;
}
if !range_overlaps(&l.get_key_range(), key_range) {
continue;
info!(
"l.lsn_range={:?}, lsn_range={:?}",
l.get_lsn_range(),
lsn_range
);
}
assert!(range_overlaps(&l.get_lsn_range(), lsn_range));
assert!(range_overlaps(&l.get_key_range(), key_range));
// We ignore level0 delta layers. Unless the whole keyspace fits
// into one partition
@@ -377,25 +436,15 @@ impl LayerMap {
/// Return all L0 delta layers
pub fn get_level0_deltas(&self) -> Result<Vec<Arc<dyn Layer>>> {
let mut deltas = Vec::new();
for l in self.historic_layers.iter() {
if !l.is_incremental() {
continue;
}
if l.get_key_range() != (Key::MIN..Key::MAX) {
continue;
}
deltas.push(Arc::clone(l));
}
Ok(deltas)
Ok(self.l0_layers.clone())
}
/// debugging function to print out the contents of the layer map
#[allow(unused)]
pub fn dump(&self) -> Result<()> {
println!("Begin dump LayerMap");
for layer in self.historic_layers.iter() {
layer.dump()?;
for e in self.historic_layers.iter() {
e.layer.dump()?;
}
println!("End dump LayerMap");
Ok(())

View File

@@ -29,6 +29,19 @@ pub struct Key {
}
impl Key {
/// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish.
/// As far as Zenith is not supporting tablespace (because of lack of access to local file system),
/// we can assume that only some predefined namespace OIDs are used which can fit in u16
pub fn to_i128(&self) -> i128 {
assert!(self.field2 < 0xFFFFF || self.field2 == !0u32);
(((self.field1 & 0x7f) as i128) << 120)
| ((self.field5 as i128) << 112)
| (((self.field2 & 0xFFFF) as i128) << 96)
| ((self.field3 as i128) << 64)
| ((self.field4 as i128) << 32)
| self.field6 as i128
}
pub fn next(&self) -> Key {
self.add(1)
}