mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-14 08:52:56 +00:00
Use R-Tree for layer map
This commit is contained in:
@@ -44,6 +44,7 @@ nix = "0.23"
|
||||
once_cell = "1.8.0"
|
||||
crossbeam-utils = "0.8.5"
|
||||
fail = "0.5.0"
|
||||
rstar = "0.9.2"
|
||||
|
||||
rust-s3 = { version = "0.28", default-features = false, features = ["no-verify-ssl", "tokio-rustls-tls"] }
|
||||
async-compression = {version = "0.3", features = ["zstd", "tokio"]}
|
||||
|
||||
@@ -1928,7 +1928,7 @@ impl LayeredTimeline {
|
||||
l.filename().display(),
|
||||
l.is_incremental(),
|
||||
);
|
||||
layers_to_remove.push(Arc::clone(l));
|
||||
layers_to_remove.push(Arc::clone(&l));
|
||||
}
|
||||
|
||||
// Actually delete the layers from disk and remove them from the map.
|
||||
|
||||
@@ -16,6 +16,7 @@ use crate::layered_repository::InMemoryLayer;
|
||||
use crate::repository::Key;
|
||||
use anyhow::Result;
|
||||
use lazy_static::lazy_static;
|
||||
use rstar::{RTree, RTreeObject, AABB};
|
||||
use std::collections::VecDeque;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
@@ -51,14 +52,42 @@ pub struct LayerMap {
|
||||
pub frozen_layers: VecDeque<Arc<InMemoryLayer>>,
|
||||
|
||||
/// All the historic layers are kept here
|
||||
historic_layers: RTree<LayerEnvelope>,
|
||||
|
||||
/// TODO: This is a placeholder implementation of a data structure
|
||||
/// to hold information about all the layer files on disk and in
|
||||
/// S3. Currently, it's just a vector and all operations perform a
|
||||
/// linear scan over it. That obviously becomes slow as the
|
||||
/// number of layers grows. I'm imagining that an R-tree or some
|
||||
/// other 2D data structure would be the long-term solution here.
|
||||
historic_layers: Vec<Arc<dyn Layer>>,
|
||||
/// L0 layers has key range: (Key::MIN..Key::MAX) and locating them using R-Tree search is very inefficient
|
||||
l0_layers: Vec<Arc<dyn Layer>>,
|
||||
}
|
||||
|
||||
struct LayerEnvelope {
|
||||
layer: Arc<dyn Layer>,
|
||||
}
|
||||
|
||||
impl PartialEq for LayerEnvelope {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
// FIXME: ptr_eq might fail to return true for 'dyn'
|
||||
// references. Clippy complains about this. In practice it
|
||||
// seems to work, the assertion below would be triggered
|
||||
// otherwise but this ought to be fixed.
|
||||
#[allow(clippy::vtable_address_comparisons)]
|
||||
Arc::ptr_eq(&self.layer, &other.layer)
|
||||
}
|
||||
fn ne(&self, other: &Self) -> bool {
|
||||
!self.eq(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for LayerEnvelope {}
|
||||
|
||||
impl RTreeObject for LayerEnvelope {
|
||||
type Envelope = AABB<[i128; 2]>;
|
||||
fn envelope(&self) -> Self::Envelope {
|
||||
let key_range = self.layer.get_key_range();
|
||||
let lsn_range = self.layer.get_lsn_range();
|
||||
AABB::from_corners(
|
||||
[key_range.start.to_i128(), lsn_range.start.0 as i128],
|
||||
[key_range.end.to_i128(), lsn_range.end.0 as i128 - 1], // end is exlusive
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Return value of LayerMap::search
|
||||
@@ -84,19 +113,21 @@ impl LayerMap {
|
||||
// Find the latest image layer that covers the given key
|
||||
let mut latest_img: Option<Arc<dyn Layer>> = None;
|
||||
let mut latest_img_lsn: Option<Lsn> = None;
|
||||
for l in self.historic_layers.iter() {
|
||||
let envelope = AABB::from_corners(
|
||||
[key.to_i128(), 0i128],
|
||||
[key.to_i128(), end_lsn.0 as i128 - 1],
|
||||
);
|
||||
for e in self
|
||||
.historic_layers
|
||||
.locate_in_envelope_intersecting(&envelope)
|
||||
{
|
||||
let l = &e.layer;
|
||||
if l.is_incremental() {
|
||||
continue;
|
||||
}
|
||||
if !l.get_key_range().contains(&key) {
|
||||
continue;
|
||||
}
|
||||
assert!(l.get_key_range().contains(&key));
|
||||
let img_lsn = l.get_lsn_range().start;
|
||||
|
||||
if img_lsn >= end_lsn {
|
||||
// too new
|
||||
continue;
|
||||
}
|
||||
assert!(img_lsn < end_lsn);
|
||||
if Lsn(img_lsn.0 + 1) == end_lsn {
|
||||
// found exact match
|
||||
return Ok(Some(SearchResult {
|
||||
@@ -112,19 +143,16 @@ impl LayerMap {
|
||||
|
||||
// Search the delta layers
|
||||
let mut latest_delta: Option<Arc<dyn Layer>> = None;
|
||||
for l in self.historic_layers.iter() {
|
||||
for e in self
|
||||
.historic_layers
|
||||
.locate_in_envelope_intersecting(&envelope)
|
||||
{
|
||||
let l = &e.layer;
|
||||
if !l.is_incremental() {
|
||||
continue;
|
||||
}
|
||||
if !l.get_key_range().contains(&key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if l.get_lsn_range().start >= end_lsn {
|
||||
// too new
|
||||
continue;
|
||||
}
|
||||
|
||||
assert!(l.get_key_range().contains(&key));
|
||||
assert!(l.get_lsn_range().start < end_lsn);
|
||||
if l.get_lsn_range().end >= end_lsn {
|
||||
// this layer contains the requested point in the key/lsn space.
|
||||
// No need to search any further
|
||||
@@ -182,7 +210,10 @@ impl LayerMap {
|
||||
/// Insert an on-disk layer
|
||||
///
|
||||
pub fn insert_historic(&mut self, layer: Arc<dyn Layer>) {
|
||||
self.historic_layers.push(layer);
|
||||
if layer.get_key_range() == (Key::MIN..Key::MAX) {
|
||||
self.l0_layers.push(layer.clone());
|
||||
}
|
||||
self.historic_layers.insert(LayerEnvelope { layer });
|
||||
NUM_ONDISK_LAYERS.inc();
|
||||
}
|
||||
|
||||
@@ -193,17 +224,21 @@ impl LayerMap {
|
||||
///
|
||||
#[allow(dead_code)]
|
||||
pub fn remove_historic(&mut self, layer: Arc<dyn Layer>) {
|
||||
let len_before = self.historic_layers.len();
|
||||
if layer.get_key_range() == (Key::MIN..Key::MAX) {
|
||||
let len_before = self.l0_layers.len();
|
||||
|
||||
// FIXME: ptr_eq might fail to return true for 'dyn'
|
||||
// references. Clippy complains about this. In practice it
|
||||
// seems to work, the assertion below would be triggered
|
||||
// otherwise but this ought to be fixed.
|
||||
#[allow(clippy::vtable_address_comparisons)]
|
||||
self.historic_layers
|
||||
.retain(|other| !Arc::ptr_eq(other, &layer));
|
||||
|
||||
assert_eq!(self.historic_layers.len(), len_before - 1);
|
||||
// FIXME: ptr_eq might fail to return true for 'dyn'
|
||||
// references. Clippy complains about this. In practice it
|
||||
// seems to work, the assertion below would be triggered
|
||||
// otherwise but this ought to be fixed.
|
||||
#[allow(clippy::vtable_address_comparisons)]
|
||||
self.l0_layers.retain(|other| !Arc::ptr_eq(other, &layer));
|
||||
assert_eq!(self.l0_layers.len(), len_before - 1);
|
||||
}
|
||||
assert!(self
|
||||
.historic_layers
|
||||
.remove(&LayerEnvelope { layer })
|
||||
.is_some());
|
||||
NUM_ONDISK_LAYERS.dec();
|
||||
}
|
||||
|
||||
@@ -224,13 +259,20 @@ impl LayerMap {
|
||||
|
||||
loop {
|
||||
let mut made_progress = false;
|
||||
for l in self.historic_layers.iter() {
|
||||
let envelope = AABB::from_corners(
|
||||
[range_remain.start.to_i128(), lsn.0 as i128],
|
||||
[range_remain.end.to_i128(), disk_consistent_lsn.0 as i128],
|
||||
);
|
||||
for e in self
|
||||
.historic_layers
|
||||
.locate_in_envelope_intersecting(&envelope)
|
||||
{
|
||||
let l = &e.layer;
|
||||
if l.is_incremental() {
|
||||
continue;
|
||||
}
|
||||
let img_lsn = l.get_lsn_range().start;
|
||||
if !l.is_incremental()
|
||||
&& l.get_key_range().contains(&range_remain.start)
|
||||
if l.get_key_range().contains(&range_remain.start)
|
||||
&& img_lsn > lsn
|
||||
&& img_lsn < disk_consistent_lsn
|
||||
{
|
||||
@@ -266,8 +308,8 @@ impl LayerMap {
|
||||
}
|
||||
*/
|
||||
|
||||
pub fn iter_historic_layers(&self) -> std::slice::Iter<Arc<dyn Layer>> {
|
||||
self.historic_layers.iter()
|
||||
pub fn iter_historic_layers(&self) -> Box<dyn Iterator<Item = Arc<dyn Layer>> + '_> {
|
||||
Box::new(self.historic_layers.iter().map(|e| e.layer.clone()))
|
||||
}
|
||||
|
||||
/// Find the last image layer that covers 'key', ignoring any image layers
|
||||
@@ -275,19 +317,19 @@ impl LayerMap {
|
||||
fn find_latest_image(&self, key: Key, lsn: Lsn) -> Option<Arc<dyn Layer>> {
|
||||
let mut candidate_lsn = Lsn(0);
|
||||
let mut candidate = None;
|
||||
for l in self.historic_layers.iter() {
|
||||
let envelope = AABB::from_corners([key.to_i128(), 0], [key.to_i128(), lsn.0 as i128]);
|
||||
for e in self
|
||||
.historic_layers
|
||||
.locate_in_envelope_intersecting(&envelope)
|
||||
{
|
||||
let l = &e.layer;
|
||||
if l.is_incremental() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if !l.get_key_range().contains(&key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert!(l.get_key_range().contains(&key));
|
||||
let this_lsn = l.get_lsn_range().start;
|
||||
if this_lsn > lsn {
|
||||
continue;
|
||||
}
|
||||
assert!(this_lsn <= lsn);
|
||||
if this_lsn < candidate_lsn {
|
||||
// our previous candidate was better
|
||||
continue;
|
||||
@@ -315,10 +357,16 @@ impl LayerMap {
|
||||
let mut points: Vec<Key>;
|
||||
|
||||
points = vec![key_range.start];
|
||||
for l in self.historic_layers.iter() {
|
||||
if l.get_lsn_range().start > lsn {
|
||||
continue;
|
||||
}
|
||||
let envelope = AABB::from_corners(
|
||||
[key_range.start.to_i128(), 0],
|
||||
[key_range.end.to_i128(), lsn.0 as i128],
|
||||
);
|
||||
for e in self
|
||||
.historic_layers
|
||||
.locate_in_envelope_intersecting(&envelope)
|
||||
{
|
||||
let l = &e.layer;
|
||||
assert!(l.get_lsn_range().start <= lsn);
|
||||
let range = l.get_key_range();
|
||||
if key_range.contains(&range.start) {
|
||||
points.push(l.get_key_range().start);
|
||||
@@ -351,16 +399,27 @@ impl LayerMap {
|
||||
/// given key and LSN range.
|
||||
pub fn count_deltas(&self, key_range: &Range<Key>, lsn_range: &Range<Lsn>) -> Result<usize> {
|
||||
let mut result = 0;
|
||||
for l in self.historic_layers.iter() {
|
||||
let envelope = AABB::from_corners(
|
||||
[key_range.start.to_i128(), lsn_range.start.0 as i128],
|
||||
[key_range.end.to_i128(), lsn_range.end.0 as i128 - 1],
|
||||
);
|
||||
for e in self
|
||||
.historic_layers
|
||||
.locate_in_envelope_intersecting(&envelope)
|
||||
{
|
||||
let l = &e.layer;
|
||||
if !l.is_incremental() {
|
||||
continue;
|
||||
}
|
||||
if !range_overlaps(&l.get_lsn_range(), lsn_range) {
|
||||
continue;
|
||||
}
|
||||
if !range_overlaps(&l.get_key_range(), key_range) {
|
||||
continue;
|
||||
info!(
|
||||
"l.lsn_range={:?}, lsn_range={:?}",
|
||||
l.get_lsn_range(),
|
||||
lsn_range
|
||||
);
|
||||
}
|
||||
assert!(range_overlaps(&l.get_lsn_range(), lsn_range));
|
||||
assert!(range_overlaps(&l.get_key_range(), key_range));
|
||||
|
||||
// We ignore level0 delta layers. Unless the whole keyspace fits
|
||||
// into one partition
|
||||
@@ -377,25 +436,15 @@ impl LayerMap {
|
||||
|
||||
/// Return all L0 delta layers
|
||||
pub fn get_level0_deltas(&self) -> Result<Vec<Arc<dyn Layer>>> {
|
||||
let mut deltas = Vec::new();
|
||||
for l in self.historic_layers.iter() {
|
||||
if !l.is_incremental() {
|
||||
continue;
|
||||
}
|
||||
if l.get_key_range() != (Key::MIN..Key::MAX) {
|
||||
continue;
|
||||
}
|
||||
deltas.push(Arc::clone(l));
|
||||
}
|
||||
Ok(deltas)
|
||||
Ok(self.l0_layers.clone())
|
||||
}
|
||||
|
||||
/// debugging function to print out the contents of the layer map
|
||||
#[allow(unused)]
|
||||
pub fn dump(&self) -> Result<()> {
|
||||
println!("Begin dump LayerMap");
|
||||
for layer in self.historic_layers.iter() {
|
||||
layer.dump()?;
|
||||
for e in self.historic_layers.iter() {
|
||||
e.layer.dump()?;
|
||||
}
|
||||
println!("End dump LayerMap");
|
||||
Ok(())
|
||||
|
||||
@@ -29,6 +29,19 @@ pub struct Key {
|
||||
}
|
||||
|
||||
impl Key {
|
||||
/// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish.
|
||||
/// As far as Zenith is not supporting tablespace (because of lack of access to local file system),
|
||||
/// we can assume that only some predefined namespace OIDs are used which can fit in u16
|
||||
pub fn to_i128(&self) -> i128 {
|
||||
assert!(self.field2 < 0xFFFFF || self.field2 == !0u32);
|
||||
(((self.field1 & 0x7f) as i128) << 120)
|
||||
| ((self.field5 as i128) << 112)
|
||||
| (((self.field2 & 0xFFFF) as i128) << 96)
|
||||
| ((self.field3 as i128) << 64)
|
||||
| ((self.field4 as i128) << 32)
|
||||
| self.field6 as i128
|
||||
}
|
||||
|
||||
pub fn next(&self) -> Key {
|
||||
self.add(1)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user