mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-06 13:02:55 +00:00
feat(pageserver): add metadata key range and aux key encoding (#7401)
Extracted from https://github.com/neondatabase/neon/pull/7375. We assume everything >= 0x80 are metadata keys. AUX file keys are part of the metadata keys, and we use `0x90` as the prefix for AUX file keys. The AUX file encoding is described in the code comment. We use xxhash128 as the hash algorithm. It seems to be portable according to the introduction, > xxHash is an Extremely fast Hash algorithm, processing at RAM speed limits. Code is highly portable, and produces hashes identical across all platforms (little / big endian). ...though whether the Rust version follows the same convention is unknown and might need manual review of the library. Anyways, we can always change the hash algorithm before rolling it out in staging/end-user, and I made a quick decision to use xxhash here because it generates 128b hash + portable. We can save the discussion of which hash algorithm to use later. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
use anyhow::{bail, Result};
|
||||
use byteorder::{ByteOrder, BE};
|
||||
use bytes::BufMut;
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::{Oid, TransactionId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::ops::RangeInclusive;
|
||||
use std::{fmt, ops::Range};
|
||||
|
||||
use crate::reltag::{BlockNumber, RelTag, SlruKind};
|
||||
@@ -21,9 +23,81 @@ pub struct Key {
|
||||
pub field6: u32,
|
||||
}
|
||||
|
||||
/// The storage key size.
|
||||
pub const KEY_SIZE: usize = 18;
|
||||
|
||||
/// The metadata key size. 2B fewer than the storage key size because field2 is not fully utilized.
|
||||
/// See [`Key::to_i128`] for more information on the encoding.
|
||||
pub const METADATA_KEY_SIZE: usize = 16;
|
||||
|
||||
/// The key prefix start range for the metadata keys. All keys with the first byte >= 0x80 is a metadata key.
|
||||
pub const METADATA_KEY_BEGIN_PREFIX: u8 = 0x80;
|
||||
|
||||
/// The (reserved) key prefix of relation sizes.
|
||||
pub const RELATION_SIZE_PREFIX: u8 = 0x81;
|
||||
|
||||
/// The key prefix of AUX file keys.
|
||||
pub const AUX_KEY_PREFIX: u8 = 0x82;
|
||||
|
||||
/// Check if the key falls in the range of metadata keys.
|
||||
pub const fn is_metadata_key_slice(key: &[u8]) -> bool {
|
||||
key[0] >= METADATA_KEY_BEGIN_PREFIX
|
||||
}
|
||||
|
||||
impl Key {
|
||||
/// Check if the key falls in the range of metadata keys.
|
||||
pub const fn is_metadata_key(&self) -> bool {
|
||||
self.field1 >= METADATA_KEY_BEGIN_PREFIX
|
||||
}
|
||||
|
||||
/// Encode a metadata key to a storage key.
|
||||
pub fn from_metadata_key_fixed_size(key: &[u8; METADATA_KEY_SIZE]) -> Self {
|
||||
assert!(is_metadata_key_slice(key), "key not in metadata key range");
|
||||
Key {
|
||||
field1: key[0],
|
||||
field2: u16::from_be_bytes(key[1..3].try_into().unwrap()) as u32,
|
||||
field3: u32::from_be_bytes(key[3..7].try_into().unwrap()),
|
||||
field4: u32::from_be_bytes(key[7..11].try_into().unwrap()),
|
||||
field5: key[11],
|
||||
field6: u32::from_be_bytes(key[12..16].try_into().unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode a metadata key to a storage key.
|
||||
pub fn from_metadata_key(key: &[u8]) -> Self {
|
||||
Self::from_metadata_key_fixed_size(key.try_into().expect("expect 16 byte metadata key"))
|
||||
}
|
||||
|
||||
/// Extract a metadata key to a writer. The result should always be 16 bytes.
|
||||
pub fn extract_metadata_key_to_writer(&self, mut writer: impl BufMut) {
|
||||
writer.put_u8(self.field1);
|
||||
assert!(self.field2 <= 0xFFFF);
|
||||
writer.put_u16(self.field2 as u16);
|
||||
writer.put_u32(self.field3);
|
||||
writer.put_u32(self.field4);
|
||||
writer.put_u8(self.field5);
|
||||
writer.put_u32(self.field6);
|
||||
}
|
||||
|
||||
/// Get the range of metadata keys.
|
||||
pub fn metadata_key_range() -> RangeInclusive<Self> {
|
||||
Key {
|
||||
field1: METADATA_KEY_BEGIN_PREFIX,
|
||||
field2: 0,
|
||||
field3: 0,
|
||||
field4: 0,
|
||||
field5: 0,
|
||||
field6: 0,
|
||||
}..=Key {
|
||||
field1: u8::MAX,
|
||||
field2: u16::MAX as u32,
|
||||
field3: u32::MAX,
|
||||
field4: u32::MAX,
|
||||
field5: u8::MAX,
|
||||
field6: u32::MAX,
|
||||
}
|
||||
}
|
||||
|
||||
/// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish.
|
||||
/// As long as Neon does not support tablespace (because of lack of access to local file system),
|
||||
/// we can assume that only some predefined namespace OIDs are used which can fit in u16
|
||||
@@ -81,6 +155,8 @@ impl Key {
|
||||
key
|
||||
}
|
||||
|
||||
/// Convert a 18B slice to a key. This function should not be used for metadata keys because field2 is handled differently.
|
||||
/// Use [`Key::from_metadata_key`] instead.
|
||||
pub fn from_slice(b: &[u8]) -> Self {
|
||||
Key {
|
||||
field1: b[0],
|
||||
@@ -92,6 +168,8 @@ impl Key {
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a key to a 18B slice. This function should not be used for metadata keys because field2 is handled differently.
|
||||
/// Use [`Key::extract_metadata_key_to_writer`] instead.
|
||||
pub fn write_to_byte_slice(&self, buf: &mut [u8]) {
|
||||
buf[0] = self.field1;
|
||||
BE::write_u32(&mut buf[1..5], self.field2);
|
||||
@@ -558,11 +636,14 @@ impl std::str::FromStr for Key {
|
||||
mod tests {
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::key::is_metadata_key_slice;
|
||||
use crate::key::Key;
|
||||
|
||||
use rand::Rng;
|
||||
use rand::SeedableRng;
|
||||
|
||||
use super::AUX_KEY_PREFIX;
|
||||
|
||||
#[test]
|
||||
fn display_fromstr_bijection() {
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||
@@ -578,4 +659,16 @@ mod tests {
|
||||
|
||||
assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_metadata_keys() {
|
||||
let mut metadata_key = vec![AUX_KEY_PREFIX];
|
||||
metadata_key.extend_from_slice(&[0xFF; 15]);
|
||||
let encoded_key = Key::from_metadata_key(&metadata_key);
|
||||
let mut output_key = Vec::new();
|
||||
encoded_key.extract_metadata_key_to_writer(&mut output_key);
|
||||
assert_eq!(metadata_key, output_key);
|
||||
assert!(encoded_key.is_metadata_key());
|
||||
assert!(is_metadata_key_slice(&metadata_key));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user