diff --git a/libs/pageserver_api/src/key.rs b/libs/pageserver_api/src/key.rs index 77da58d63e..77d744e4da 100644 --- a/libs/pageserver_api/src/key.rs +++ b/libs/pageserver_api/src/key.rs @@ -108,14 +108,41 @@ impl Key { } } + /// This function checks more extensively what keys we can take on the write path. + /// If a key beginning with 00 does not have a global/default tablespace OID, it + /// will be rejected on the write path. + #[allow(dead_code)] + pub fn is_valid_key_on_write_path_strong(&self) -> bool { + use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID}; + if !self.is_i128_representable() { + return false; + } + if self.field1 == 0 + && !(self.field2 == GLOBALTABLESPACE_OID + || self.field2 == DEFAULTTABLESPACE_OID + || self.field2 == 0) + { + return false; // User defined tablespaces are not supported + } + true + } + + /// This is a weaker version of `is_valid_key_on_write_path_strong` that simply + /// checks if the key is i128 representable. Note that some keys can be successfully + /// ingested into the pageserver, but will cause errors on generating basebackup. + pub fn is_valid_key_on_write_path(&self) -> bool { + self.is_i128_representable() + } + + pub fn is_i128_representable(&self) -> bool { + self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222 + } + /// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish. /// As long as Neon does not support tablespace (because of lack of access to local file system), /// we can assume that only some predefined namespace OIDs are used which can fit in u16 pub fn to_i128(&self) -> i128 { - assert!( - self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222, - "invalid key: {self}", - ); + assert!(self.is_i128_representable(), "invalid key: {self}"); (((self.field1 & 0x7F) as i128) << 120) | (((self.field2 & 0xFFFF) as i128) << 104) | ((self.field3 as i128) << 72) diff --git a/pageserver/ctl/src/layers.rs b/pageserver/ctl/src/layers.rs index a183a3968d..e0f978eaa2 100644 --- a/pageserver/ctl/src/layers.rs +++ b/pageserver/ctl/src/layers.rs @@ -90,6 +90,7 @@ async fn read_delta_file(path: impl AsRef, ctx: &RequestContext) -> Result for (k, v) in all { let value = cursor.read_blob(v.pos(), ctx).await?; println!("key:{} value_len:{}", k, value.len()); + assert!(k.is_i128_representable(), "invalid key: "); } // TODO(chi): special handling for last key? Ok(()) diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index b7110d69b6..edcbac970b 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -12,7 +12,7 @@ use crate::keyspace::{KeySpace, KeySpaceAccum}; use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id; use crate::walrecord::NeonWalRecord; use crate::{aux_file, repository::*}; -use anyhow::{ensure, Context}; +use anyhow::{bail, ensure, Context}; use bytes::{Buf, Bytes, BytesMut}; use enum_map::Enum; use pageserver_api::key::{ @@ -1791,6 +1791,11 @@ impl<'a> DatadirModification<'a> { // Flush relation and SLRU data blocks, keep metadata. let mut retained_pending_updates = HashMap::<_, Vec<_>>::new(); for (key, values) in self.pending_updates.drain() { + if !key.is_valid_key_on_write_path() { + bail!( + "the request contains data not supported by pageserver at TimelineWriter::put: {}", key + ); + } let mut write_batch = Vec::new(); for (lsn, value_ser_size, value) in values { if key.is_rel_block_key() || key.is_slru_block_key() { @@ -1843,10 +1848,13 @@ impl<'a> DatadirModification<'a> { .drain() .flat_map(|(key, values)| { values.into_iter().map(move |(lsn, val_ser_size, value)| { - (key.to_compact(), lsn, val_ser_size, value) + if !key.is_valid_key_on_write_path() { + bail!("the request contains data not supported by pageserver at TimelineWriter::put: {}", key); + } + Ok((key.to_compact(), lsn, val_ser_size, value)) }) }) - .collect::>(); + .collect::>>()?; writer.put_batch(batch, ctx).await?; } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 63d59e06a5..35e0825bac 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -5746,6 +5746,12 @@ impl<'a> TimelineWriter<'a> { ctx: &RequestContext, ) -> anyhow::Result<()> { use utils::bin_ser::BeSer; + if !key.is_valid_key_on_write_path() { + bail!( + "the request contains data not supported by pageserver at TimelineWriter::put: {}", + key + ); + } let val_ser_size = value.serialized_size().unwrap() as usize; self.put_batch( vec![(key.to_compact(), lsn, val_ser_size, value.clone())],