mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-22 07:30:37 +00:00
Bunch of fixes, smarter iterator, metrics exporter
This commit is contained in:
@@ -182,7 +182,7 @@ fn next_recurse<'e, V: Value>(
|
||||
assert!(path.len() < min_key.len());
|
||||
|
||||
use std::cmp::Ordering;
|
||||
let mut key_byte = match path.as_slice().cmp(&min_key[0..path.len()]) {
|
||||
let mut min_key_byte = match path.as_slice().cmp(&min_key[0..path.len()]) {
|
||||
Ordering::Less => {
|
||||
rnode.read_unlock_or_restart()?;
|
||||
return Ok(None);
|
||||
@@ -191,17 +191,11 @@ fn next_recurse<'e, V: Value>(
|
||||
Ordering::Greater => 0,
|
||||
};
|
||||
loop {
|
||||
// TODO: This iterates through all possible byte values. That's pretty unoptimal.
|
||||
// Implement a function to scan the node for next key value efficiently.
|
||||
match rnode.find_child_or_value_or_restart(key_byte)? {
|
||||
match rnode.find_next_child_or_value_or_restart(min_key_byte)? {
|
||||
None => {
|
||||
if key_byte == u8::MAX {
|
||||
return Ok(None);
|
||||
}
|
||||
key_byte += 1;
|
||||
continue;
|
||||
}
|
||||
Some(ChildOrValue::Child(child_ref)) => {
|
||||
return Ok(None);
|
||||
},
|
||||
Some((key_byte, ChildOrValue::Child(child_ref))) => {
|
||||
let path_len = path.len();
|
||||
path.push(key_byte);
|
||||
let result = next_recurse(min_key, path, child_ref, epoch_pin)?;
|
||||
@@ -212,9 +206,9 @@ fn next_recurse<'e, V: Value>(
|
||||
return Ok(None);
|
||||
}
|
||||
path.truncate(path_len);
|
||||
key_byte += 1;
|
||||
}
|
||||
Some(ChildOrValue::Value(vptr)) => {
|
||||
min_key_byte = key_byte + 1;
|
||||
},
|
||||
Some((key_byte, ChildOrValue::Value(vptr))) => {
|
||||
path.push(key_byte);
|
||||
assert_eq!(path.len(), min_key.len());
|
||||
// safety: It's OK to return a ref of the pointer because we checked the version
|
||||
@@ -222,7 +216,7 @@ fn next_recurse<'e, V: Value>(
|
||||
// as long as the epoch is pinned.
|
||||
let v = unsafe { vptr.as_ref().unwrap() };
|
||||
return Ok(Some(v))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,8 +100,6 @@ pub struct NodeInternal16<V> {
|
||||
child_ptrs: [NodePtr<V>; 16],
|
||||
}
|
||||
|
||||
const INVALID_CHILD_INDEX: u8 = u8::MAX;
|
||||
|
||||
#[repr(C)]
|
||||
pub struct NodeInternal48<V> {
|
||||
tag: NodeTag,
|
||||
@@ -114,6 +112,7 @@ pub struct NodeInternal48<V> {
|
||||
child_indexes: [u8; 256],
|
||||
child_ptrs: [NodePtr<V>; 48],
|
||||
}
|
||||
const INVALID_CHILD_INDEX: u8 = u8::MAX;
|
||||
|
||||
#[repr(C)]
|
||||
pub struct NodeInternal256<V> {
|
||||
@@ -339,6 +338,35 @@ impl<V: Value> NodePtr<V> {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn find_next_child_or_value(&self, key_byte: u8) -> Option<(u8, ChildOrValuePtr<V>)> {
|
||||
match self.variant() {
|
||||
NodeVariant::Internal4(n) => n
|
||||
.find_next_child(key_byte)
|
||||
.map(|(k, c)| (k, ChildOrValuePtr::Child(c))),
|
||||
NodeVariant::Internal16(n) => n
|
||||
.find_next_child(key_byte)
|
||||
.map(|(k, c)| (k, ChildOrValuePtr::Child(c))),
|
||||
NodeVariant::Internal48(n) => n
|
||||
.find_next_child(key_byte)
|
||||
.map(|(k, c)| (k, ChildOrValuePtr::Child(c))),
|
||||
NodeVariant::Internal256(n) => n
|
||||
.find_next_child(key_byte)
|
||||
.map(|(k, c)| (k, ChildOrValuePtr::Child(c))),
|
||||
NodeVariant::Leaf4(n) => n
|
||||
.find_next_leaf_value(key_byte)
|
||||
.map(|(k, v)| (k, ChildOrValuePtr::Value(v))),
|
||||
NodeVariant::Leaf16(n) => n
|
||||
.find_next_leaf_value(key_byte)
|
||||
.map(|(k, v)| (k, ChildOrValuePtr::Value(v))),
|
||||
NodeVariant::Leaf48(n) => n
|
||||
.find_next_leaf_value(key_byte)
|
||||
.map(|(k, v)| (k, ChildOrValuePtr::Value(v))),
|
||||
NodeVariant::Leaf256(n) => n
|
||||
.find_next_leaf_value(key_byte)
|
||||
.map(|(k, v)| (k, ChildOrValuePtr::Value(v))),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn truncate_prefix(&mut self, new_prefix_len: usize) {
|
||||
match self.variant_mut() {
|
||||
NodeVariantMut::Internal4(n) => n.truncate_prefix(new_prefix_len),
|
||||
@@ -512,6 +540,27 @@ impl<V: Value> NodeInternal4<V> {
|
||||
None
|
||||
}
|
||||
|
||||
fn find_next_child(&self, min_key: u8) -> Option<(u8, NodePtr<V>)> {
|
||||
let mut found: Option<(usize, u8)> = None;
|
||||
for i in 0..self.num_children as usize {
|
||||
let this_key = self.child_keys[i];
|
||||
if this_key >= min_key {
|
||||
if let Some((_, found_key)) = found {
|
||||
if this_key < found_key {
|
||||
found = Some((i, this_key));
|
||||
}
|
||||
} else {
|
||||
found = Some((i, this_key));
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some((found_idx, found_key)) = found {
|
||||
Some((found_key, self.child_ptrs[found_idx]))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
|
||||
for i in 0..self.num_children as usize {
|
||||
if self.child_keys[i] == key_byte {
|
||||
@@ -584,6 +633,27 @@ impl<V: Value> NodeInternal16<V> {
|
||||
None
|
||||
}
|
||||
|
||||
fn find_next_child(&self, min_key: u8) -> Option<(u8, NodePtr<V>)> {
|
||||
let mut found: Option<(usize, u8)> = None;
|
||||
for i in 0..self.num_children as usize {
|
||||
let this_key = self.child_keys[i];
|
||||
if this_key >= min_key {
|
||||
if let Some((_, found_key)) = found {
|
||||
if this_key < found_key {
|
||||
found = Some((i, this_key));
|
||||
}
|
||||
} else {
|
||||
found = Some((i, this_key));
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some((found_idx, found_key)) = found {
|
||||
Some((found_key, self.child_ptrs[found_idx]))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
|
||||
for i in 0..self.num_children as usize {
|
||||
if self.child_keys[i] == key_byte {
|
||||
@@ -657,6 +727,16 @@ impl<V: Value> NodeInternal48<V> {
|
||||
}
|
||||
}
|
||||
|
||||
fn find_next_child(&self, min_key: u8) -> Option<(u8, NodePtr<V>)> {
|
||||
for key in min_key..=u8::MAX {
|
||||
let idx = self.child_indexes[key as usize];
|
||||
if idx != INVALID_CHILD_INDEX {
|
||||
return Some((key, self.child_ptrs[idx as usize]));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
|
||||
let idx = self.child_indexes[key_byte as usize];
|
||||
if idx != INVALID_CHILD_INDEX {
|
||||
@@ -729,6 +809,15 @@ impl<V: Value> NodeInternal256<V> {
|
||||
}
|
||||
}
|
||||
|
||||
fn find_next_child(&self, min_key: u8) -> Option<(u8, NodePtr<V>)> {
|
||||
for key in min_key..=u8::MAX {
|
||||
if !self.child_ptrs[key as usize].is_null() {
|
||||
return Some((key, self.child_ptrs[key as usize]));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
|
||||
let idx = key_byte as usize;
|
||||
if !self.child_ptrs[idx].is_null() {
|
||||
@@ -774,6 +863,28 @@ impl<V: Value> NodeLeaf4<V> {
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn find_next_leaf_value<'a: 'b, 'b>(&'a self, min_key: u8) -> Option<(u8, &'b V)> {
|
||||
let mut found: Option<(usize, u8)> = None;
|
||||
for i in 0..self.num_values as usize {
|
||||
let this_key = self.child_keys[i];
|
||||
if this_key >= min_key {
|
||||
if let Some((_, found_key)) = found {
|
||||
if this_key < found_key {
|
||||
found = Some((i, this_key));
|
||||
}
|
||||
} else {
|
||||
found = Some((i, this_key));
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some((found_idx, found_key)) = found {
|
||||
Some((found_key, self.child_values[found_idx].as_ref().unwrap()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn is_full(&self) -> bool {
|
||||
self.num_values == 4
|
||||
}
|
||||
@@ -853,6 +964,28 @@ impl<V: Value> NodeLeaf16<V> {
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn find_next_leaf_value<'a: 'b, 'b>(&'a self, min_key: u8) -> Option<(u8, &'b V)> {
|
||||
let mut found: Option<(usize, u8)> = None;
|
||||
for i in 0..self.num_values as usize {
|
||||
let this_key = self.child_keys[i];
|
||||
if this_key >= min_key {
|
||||
if let Some((_, found_key)) = found {
|
||||
if this_key < found_key {
|
||||
found = Some((i, this_key));
|
||||
}
|
||||
} else {
|
||||
found = Some((i, this_key));
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some((found_idx, found_key)) = found {
|
||||
Some((found_key, self.child_values[found_idx].as_ref().unwrap()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn is_full(&self) -> bool {
|
||||
self.num_values == 16
|
||||
}
|
||||
@@ -932,6 +1065,17 @@ impl<V: Value> NodeLeaf48<V> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn find_next_leaf_value<'a: 'b, 'b>(&'a self, min_key: u8) -> Option<(u8, &'b V)> {
|
||||
for key in min_key..=u8::MAX {
|
||||
let idx = self.child_indexes[key as usize];
|
||||
if idx != INVALID_CHILD_INDEX {
|
||||
return Some((key, &self.child_values[idx as usize].as_ref().unwrap()));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn is_full(&self) -> bool {
|
||||
self.num_values == 48
|
||||
}
|
||||
@@ -1017,6 +1161,16 @@ impl<V: Value> NodeLeaf256<V> {
|
||||
let idx = key as usize;
|
||||
self.child_values[idx].as_ref()
|
||||
}
|
||||
|
||||
fn find_next_leaf_value<'a: 'b, 'b>(&'a self, min_key: u8) -> Option<(u8, &'b V)> {
|
||||
for key in min_key..=u8::MAX {
|
||||
if let Some(v) = &self.child_values[key as usize] {
|
||||
return Some((key, v));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn is_full(&self) -> bool {
|
||||
self.num_values == 256
|
||||
}
|
||||
|
||||
@@ -94,6 +94,23 @@ impl<'e, V: Value> ReadLockedNodeRef<'e, V> {
|
||||
}))),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn find_next_child_or_value_or_restart(
|
||||
&self,
|
||||
min_key_byte: u8,
|
||||
) -> Result<Option<(u8, ChildOrValue<'e, V>)>, ConcurrentUpdateError> {
|
||||
let child_or_value = self.ptr.find_next_child_or_value(min_key_byte);
|
||||
self.ptr.lockword().check_or_restart(self.version)?;
|
||||
|
||||
match child_or_value {
|
||||
None => Ok(None),
|
||||
Some((k, ChildOrValuePtr::Value(vptr)) )=> Ok(Some((k, ChildOrValue::Value(vptr)))),
|
||||
Some((k, ChildOrValuePtr::Child(child_ptr))) => Ok(Some((k, ChildOrValue::Child(NodeRef {
|
||||
ptr: child_ptr,
|
||||
phantom: self.phantom,
|
||||
})))),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn upgrade_to_write_lock_or_restart(
|
||||
self,
|
||||
|
||||
@@ -116,6 +116,8 @@ impl<'t> BlockAllocator<'t> {
|
||||
return INVALID_BLOCK;
|
||||
}
|
||||
|
||||
// TODO: this is currently unused. The slab allocator never releases blocks
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn release_block(&self, block_ptr: *mut u8) {
|
||||
let blockno = unsafe { block_ptr.byte_offset_from(self.blocks_ptr) / BLOCK_SIZE as isize };
|
||||
self.release_block_internal(blockno as u64);
|
||||
|
||||
@@ -324,9 +324,8 @@ where
|
||||
}
|
||||
|
||||
impl<'e, K: Key, V: Value> TreeReadGuard<'e, K, V> {
|
||||
pub fn get(&self, key: &K) -> Option<V> {
|
||||
let vref = algorithm::search(key, self.tree.root, &self.epoch_pin);
|
||||
vref.cloned()
|
||||
pub fn get(&'e self, key: &K) -> Option<&'e V> {
|
||||
algorithm::search(key, self.tree.root, &self.epoch_pin)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -347,9 +346,8 @@ where
|
||||
impl<'t, K: Key, V: Value, A: ArtAllocator<V>> TreeWriteGuard<'t, K, V, A> {
|
||||
|
||||
/// Get a value
|
||||
pub fn get(&mut self, key: &K) -> Option<V> {
|
||||
let v = algorithm::search(key, self.tree_writer.tree.root, &self.epoch_pin);
|
||||
v.cloned()
|
||||
pub fn get(&'t mut self, key: &K) -> Option<&'t V> {
|
||||
algorithm::search(key, self.tree_writer.tree.root, &self.epoch_pin)
|
||||
}
|
||||
|
||||
/// Insert a value
|
||||
@@ -377,13 +375,11 @@ impl<'t, K: Key, V: Value, A: ArtAllocator<V>> TreeWriteGuard<'t, K, V, A> {
|
||||
where
|
||||
F: FnOnce(Option<&V>) -> Option<V>,
|
||||
{
|
||||
let result = algorithm::update_fn(key, value_fn, self.tree_writer.tree.root, &mut self);
|
||||
algorithm::update_fn(key, value_fn, self.tree_writer.tree.root, &mut self);
|
||||
|
||||
if self.created_garbage {
|
||||
let n = self.collect_garbage();
|
||||
eprintln!("collected {n} obsolete nodes");
|
||||
let _ = self.collect_garbage();
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn remember_obsolete_node(&mut self, ptr: NodePtr<V>) {
|
||||
@@ -415,7 +411,7 @@ pub struct TreeIterator<K>
|
||||
where K: Key + for<'a> From<&'a [u8]>,
|
||||
{
|
||||
done: bool,
|
||||
next_key: Vec<u8>,
|
||||
pub next_key: Vec<u8>,
|
||||
max_key: Option<Vec<u8>>,
|
||||
|
||||
phantom_key: PhantomData<K>,
|
||||
@@ -436,12 +432,16 @@ impl<K> TreeIterator<K>
|
||||
}
|
||||
|
||||
pub fn new(range: &std::ops::Range<K>) -> TreeIterator<K> {
|
||||
TreeIterator {
|
||||
let result = TreeIterator {
|
||||
done: false,
|
||||
next_key: Vec::from(range.start.as_bytes()),
|
||||
max_key: Some(Vec::from(range.end.as_bytes())),
|
||||
phantom_key: PhantomData,
|
||||
}
|
||||
};
|
||||
assert_eq!(result.next_key.len(), K::KEY_LEN);
|
||||
assert_eq!(result.max_key.as_ref().unwrap().len(), K::KEY_LEN);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
|
||||
@@ -451,27 +451,48 @@ impl<K> TreeIterator<K>
|
||||
if self.done {
|
||||
return None;
|
||||
}
|
||||
if let Some((k , v)) = algorithm::iter_next(&mut self.next_key, read_guard.tree.root, &read_guard.epoch_pin) {
|
||||
assert_eq!(k.len(), self.next_key.len());
|
||||
|
||||
// Check if we reached the end of the range
|
||||
if let Some(max_key) = &self.max_key {
|
||||
assert_eq!(k.len(), max_key.len());
|
||||
if k.as_slice() >= max_key.as_slice() {
|
||||
self.done = true;
|
||||
return None;
|
||||
let mut wrapped_around = false;
|
||||
loop {
|
||||
assert_eq!(self.next_key.len(), K::KEY_LEN);
|
||||
if let Some((k , v)) = algorithm::iter_next(&mut self.next_key, read_guard.tree.root, &read_guard.epoch_pin) {
|
||||
assert_eq!(k.len(), K::KEY_LEN);
|
||||
assert_eq!(self.next_key.len(), K::KEY_LEN);
|
||||
|
||||
// Check if we reached the end of the range
|
||||
if let Some(max_key) = &self.max_key {
|
||||
if k.as_slice() >= max_key.as_slice() {
|
||||
self.done = true;
|
||||
break None;
|
||||
}
|
||||
}
|
||||
|
||||
// increment the key
|
||||
self.next_key = k.clone();
|
||||
increment_key(self.next_key.as_mut_slice());
|
||||
let k = k.as_slice().into();
|
||||
|
||||
break Some((k, v))
|
||||
} else {
|
||||
if self.max_key.is_some() {
|
||||
self.done = true;
|
||||
} else {
|
||||
// Start from beginning
|
||||
if !wrapped_around {
|
||||
for i in 0..K::KEY_LEN {
|
||||
self.next_key[i] = 0;
|
||||
}
|
||||
wrapped_around = true;
|
||||
continue;
|
||||
} else {
|
||||
// The tree is completely empty
|
||||
// FIXME: perhaps we should remember the starting point instead.
|
||||
// Currently this will scan some ranges twice.
|
||||
break None;
|
||||
}
|
||||
}
|
||||
break None
|
||||
}
|
||||
|
||||
// increment the key
|
||||
self.next_key = k.clone();
|
||||
increment_key(self.next_key.as_mut_slice());
|
||||
let k = k.as_slice().into();
|
||||
|
||||
Some((k, v))
|
||||
} else {
|
||||
self.done = true;
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,7 +61,7 @@ fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
|
||||
for (idx, k) in keys.iter().enumerate() {
|
||||
let r = tree_writer.start_read();
|
||||
let value = r.get(&(*k).into());
|
||||
assert_eq!(value, Some(idx));
|
||||
assert_eq!(value, Some(idx).as_ref());
|
||||
}
|
||||
|
||||
eprintln!("stats: {:?}", tree_writer.start_write().get_statistics());
|
||||
|
||||
Reference in New Issue
Block a user