diff --git a/libs/pageserver_api/src/key.rs b/libs/pageserver_api/src/key.rs index f0cd713c38..328dea5dec 100644 --- a/libs/pageserver_api/src/key.rs +++ b/libs/pageserver_api/src/key.rs @@ -706,7 +706,7 @@ pub fn repl_origin_key_range() -> Range { /// Non inherited range for vectored get. pub const NON_INHERITED_RANGE: Range = AUX_FILES_KEY..AUX_FILES_KEY.next(); /// Sparse keyspace range for vectored get. Missing key error will be ignored for this range. -pub const NON_INHERITED_SPARSE_RANGE: Range = Key::metadata_key_range(); +pub const SPARSE_RANGE: Range = Key::metadata_key_range(); impl Key { // AUX_FILES currently stores only data for logical replication (slots etc), and @@ -714,7 +714,42 @@ impl Key { // switch (and generally it likely should be optional), so ignore these. #[inline(always)] pub fn is_inherited_key(self) -> bool { - !NON_INHERITED_RANGE.contains(&self) && !NON_INHERITED_SPARSE_RANGE.contains(&self) + if self.is_sparse() { + self.is_inherited_sparse_key() + } else { + !NON_INHERITED_RANGE.contains(&self) + } + } + + #[inline(always)] + pub fn is_sparse(self) -> bool { + self.field1 >= METADATA_KEY_BEGIN_PREFIX && self.field1 < METADATA_KEY_END_PREFIX + } + + /// Check if the key belongs to the inherited keyspace. + fn is_inherited_sparse_key(self) -> bool { + debug_assert!(self.is_sparse()); + self.field1 == RELATION_SIZE_PREFIX + } + + pub fn sparse_non_inherited_keyspace() -> Range { + // The two keys are adjacent; if we will have non-adjancent keys in the future, we should return a keyspace + debug_assert_eq!(AUX_KEY_PREFIX + 1, REPL_ORIGIN_KEY_PREFIX); + Key { + field1: AUX_KEY_PREFIX, + field2: 0, + field3: 0, + field4: 0, + field5: 0, + field6: 0, + }..Key { + field1: REPL_ORIGIN_KEY_PREFIX + 1, + field2: 0, + field3: 0, + field4: 0, + field5: 0, + field6: 0, + } } #[inline(always)] diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 2928c435cb..070593b104 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -5682,7 +5682,7 @@ mod tests { use bytes::{Bytes, BytesMut}; use hex_literal::hex; use itertools::Itertools; - use pageserver_api::key::{Key, AUX_KEY_PREFIX, NON_INHERITED_RANGE}; + use pageserver_api::key::{Key, AUX_KEY_PREFIX, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX}; use pageserver_api::keyspace::KeySpace; use pageserver_api::models::{CompactionAlgorithm, CompactionAlgorithmSettings}; use pageserver_api::value::Value; @@ -7741,7 +7741,18 @@ mod tests { let base_key = Key::from_hex("620000000033333333444444445500000000").unwrap(); let base_key_child = Key::from_hex("620000000033333333444444445500000001").unwrap(); let base_key_nonexist = Key::from_hex("620000000033333333444444445500000002").unwrap(); + let base_key_overwrite = Key::from_hex("620000000033333333444444445500000003").unwrap(); + + let base_inherited_key = Key::from_hex("610000000033333333444444445500000000").unwrap(); + let base_inherited_key_child = + Key::from_hex("610000000033333333444444445500000001").unwrap(); + let base_inherited_key_nonexist = + Key::from_hex("610000000033333333444444445500000002").unwrap(); + let base_inherited_key_overwrite = + Key::from_hex("610000000033333333444444445500000003").unwrap(); + assert_eq!(base_key.field1, AUX_KEY_PREFIX); // in case someone accidentally changed the prefix... + assert_eq!(base_inherited_key.field1, RELATION_SIZE_PREFIX); let tline = tenant .create_test_timeline_with_layers( @@ -7750,7 +7761,18 @@ mod tests { DEFAULT_PG_VERSION, &ctx, Vec::new(), // delta layers - vec![(Lsn(0x20), vec![(base_key, test_img("metadata key 1"))])], // image layers + vec![( + Lsn(0x20), + vec![ + (base_inherited_key, test_img("metadata inherited key 1")), + ( + base_inherited_key_overwrite, + test_img("metadata key overwrite 1a"), + ), + (base_key, test_img("metadata key 1")), + (base_key_overwrite, test_img("metadata key overwrite 1b")), + ], + )], // image layers Lsn(0x20), // it's fine to not advance LSN to 0x30 while using 0x30 to get below because `get_vectored_impl` does not wait for LSN ) .await?; @@ -7764,7 +7786,18 @@ mod tests { Vec::new(), // delta layers vec![( Lsn(0x30), - vec![(base_key_child, test_img("metadata key 2"))], + vec![ + ( + base_inherited_key_child, + test_img("metadata inherited key 2"), + ), + ( + base_inherited_key_overwrite, + test_img("metadata key overwrite 2a"), + ), + (base_key_child, test_img("metadata key 2")), + (base_key_overwrite, test_img("metadata key overwrite 2b")), + ], )], // image layers Lsn(0x30), ) @@ -7786,6 +7819,26 @@ mod tests { get_vectored_impl_wrapper(&tline, base_key_nonexist, lsn, &ctx).await?, None ); + assert_eq!( + get_vectored_impl_wrapper(&tline, base_key_overwrite, lsn, &ctx).await?, + Some(test_img("metadata key overwrite 1b")) + ); + assert_eq!( + get_vectored_impl_wrapper(&tline, base_inherited_key, lsn, &ctx).await?, + Some(test_img("metadata inherited key 1")) + ); + assert_eq!( + get_vectored_impl_wrapper(&tline, base_inherited_key_child, lsn, &ctx).await?, + None + ); + assert_eq!( + get_vectored_impl_wrapper(&tline, base_inherited_key_nonexist, lsn, &ctx).await?, + None + ); + assert_eq!( + get_vectored_impl_wrapper(&tline, base_inherited_key_overwrite, lsn, &ctx).await?, + Some(test_img("metadata key overwrite 1a")) + ); // test vectored get on child timeline assert_eq!( @@ -7800,6 +7853,82 @@ mod tests { get_vectored_impl_wrapper(&child, base_key_nonexist, lsn, &ctx).await?, None ); + assert_eq!( + get_vectored_impl_wrapper(&child, base_inherited_key, lsn, &ctx).await?, + Some(test_img("metadata inherited key 1")) + ); + assert_eq!( + get_vectored_impl_wrapper(&child, base_inherited_key_child, lsn, &ctx).await?, + Some(test_img("metadata inherited key 2")) + ); + assert_eq!( + get_vectored_impl_wrapper(&child, base_inherited_key_nonexist, lsn, &ctx).await?, + None + ); + assert_eq!( + get_vectored_impl_wrapper(&child, base_key_overwrite, lsn, &ctx).await?, + Some(test_img("metadata key overwrite 2b")) + ); + assert_eq!( + get_vectored_impl_wrapper(&child, base_inherited_key_overwrite, lsn, &ctx).await?, + Some(test_img("metadata key overwrite 2a")) + ); + + // test vectored scan on parent timeline + let mut reconstruct_state = ValuesReconstructState::new(); + let res = tline + .get_vectored_impl( + KeySpace::single(Key::metadata_key_range()), + lsn, + &mut reconstruct_state, + &ctx, + ) + .await?; + + assert_eq!( + res.into_iter() + .map(|(k, v)| (k, v.unwrap())) + .collect::>(), + vec![ + (base_inherited_key, test_img("metadata inherited key 1")), + ( + base_inherited_key_overwrite, + test_img("metadata key overwrite 1a") + ), + (base_key, test_img("metadata key 1")), + (base_key_overwrite, test_img("metadata key overwrite 1b")), + ] + ); + + // test vectored scan on child timeline + let mut reconstruct_state = ValuesReconstructState::new(); + let res = child + .get_vectored_impl( + KeySpace::single(Key::metadata_key_range()), + lsn, + &mut reconstruct_state, + &ctx, + ) + .await?; + + assert_eq!( + res.into_iter() + .map(|(k, v)| (k, v.unwrap())) + .collect::>(), + vec![ + (base_inherited_key, test_img("metadata inherited key 1")), + ( + base_inherited_key_child, + test_img("metadata inherited key 2") + ), + ( + base_inherited_key_overwrite, + test_img("metadata key overwrite 2a") + ), + (base_key_child, test_img("metadata key 2")), + (base_key_overwrite, test_img("metadata key overwrite 2b")), + ] + ); Ok(()) } diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index b8206fca5a..3913637ca0 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -12,7 +12,7 @@ pub mod merge_iterator; use crate::context::{AccessStatsBehavior, RequestContext}; use bytes::Bytes; -use pageserver_api::key::{Key, NON_INHERITED_SPARSE_RANGE}; +use pageserver_api::key::Key; use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum}; use pageserver_api::record::NeonWalRecord; use pageserver_api::value::Value; @@ -209,7 +209,7 @@ impl ValuesReconstructState { .keys .entry(*key) .or_insert(Ok(VectoredValueReconstructState::default())); - let is_sparse_key = NON_INHERITED_SPARSE_RANGE.contains(key); + let is_sparse_key = key.is_sparse(); if let Ok(state) = state { let key_done = match state.situation { ValueReconstructSituation::Complete => { diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index c1b71262e0..f7227efeba 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -27,7 +27,7 @@ use pageserver_api::{ config::tenant_conf_defaults::DEFAULT_COMPACTION_THRESHOLD, key::{ KEY_SIZE, METADATA_KEY_BEGIN_PREFIX, METADATA_KEY_END_PREFIX, NON_INHERITED_RANGE, - NON_INHERITED_SPARSE_RANGE, + SPARSE_RANGE, }, keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning}, models::{ @@ -3221,7 +3221,7 @@ impl Timeline { // We don't return a blanket [`GetVectoredError::MissingKey`] to avoid // stalling compaction. keyspace.remove_overlapping_with(&KeySpace { - ranges: vec![NON_INHERITED_RANGE, NON_INHERITED_SPARSE_RANGE], + ranges: vec![NON_INHERITED_RANGE, Key::sparse_non_inherited_keyspace()], }); // Keyspace is fully retrieved @@ -3242,7 +3242,11 @@ impl Timeline { // keys from `keyspace`, we expect there to be no overlap between it and the image covered key // space. If that's not the case, we had at least one key encounter a gap in the image layer // and stop the search as a result of that. - let removed = keyspace.remove_overlapping_with(&image_covered_keyspace); + let mut removed = keyspace.remove_overlapping_with(&image_covered_keyspace); + // Do not fire missing key error for sparse keys. + removed.remove_overlapping_with(&KeySpace { + ranges: vec![SPARSE_RANGE], + }); if !removed.is_empty() { break Some(removed); } @@ -3257,6 +3261,21 @@ impl Timeline { timeline = &*timeline_owned; }; + // Remove sparse keys from the keyspace so that it doesn't fire errors. + let missing_keyspace = if let Some(missing_keyspace) = missing_keyspace { + let mut missing_keyspace = missing_keyspace; + missing_keyspace.remove_overlapping_with(&KeySpace { + ranges: vec![SPARSE_RANGE], + }); + if missing_keyspace.is_empty() { + None + } else { + Some(missing_keyspace) + } + } else { + None + }; + if let Some(missing_keyspace) = missing_keyspace { return Err(GetVectoredError::MissingKey(MissingKeyError { key: missing_keyspace.start().unwrap(), /* better if we can store the full keyspace */