From 750d65fa77c6b93002dfaea5219bf4c820808616 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Fri, 25 Apr 2025 16:38:08 +0200 Subject: [PATCH] [WIP] Add logs for pg-regress --- pageserver/src/page_service.rs | 27 ++++-- pageserver/src/pgdatadir_mapping.rs | 124 ++++++++++++++++++++++++++-- 2 files changed, 141 insertions(+), 10 deletions(-) diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index d1a210a786..59b0d802cd 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -21,7 +21,7 @@ use pageserver_api::config::{ PageServicePipeliningConfig, PageServicePipeliningConfigPipelined, PageServiceProtocolPipelinedBatchingStrategy, PageServiceProtocolPipelinedExecutionStrategy, }; -use pageserver_api::key::rel_block_to_key; +use pageserver_api::key::{Key, rel_block_to_key}; use pageserver_api::models::{ self, PageTraceEvent, PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse, PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse, @@ -29,7 +29,7 @@ use pageserver_api::models::{ PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse, PagestreamProtocolVersion, PagestreamRequest, TenantState, }; -use pageserver_api::reltag::SlruKind; +use pageserver_api::reltag::{RelTag, SlruKind}; use pageserver_api::shard::TenantShardId; use postgres_backend::{ AuthType, PostgresBackend, PostgresBackendReader, QueryError, is_expected_io_error, @@ -1035,10 +1035,10 @@ impl PageServerHandler { // avoid a somewhat costly Span::record() by constructing the entire span in one go. macro_rules! mkspan { (before shard routing) => {{ - tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn) + tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, not_modified_since_lsn = %req.hdr.not_modified_since) }}; ($shard_id:expr) => {{ - tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, shard_id = %$shard_id) + tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, not_modified_since_lsn = %req.hdr.not_modified_since, shard_id = %$shard_id) }}; } @@ -1140,9 +1140,10 @@ impl PageServerHandler { .await?; // We're holding the Handle + let last_record_lsn = shard.get_last_record_lsn(); let effective_request_lsn = match Self::effective_request_lsn( &shard, - shard.get_last_record_lsn(), + last_record_lsn, req.hdr.request_lsn, req.hdr.not_modified_since, &shard.get_applied_gc_cutoff_lsn(), @@ -1153,6 +1154,22 @@ impl PageServerHandler { } }; + let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap(); + let trouble_rel = RelTag { + spcnode: trouble_key.field2, + dbnode: trouble_key.field3, + relnode: trouble_key.field4, + forknum: trouble_key.field5, + }; + if req.rel == trouble_rel { + tracing::info!( + request_lsn=%req.hdr.request_lsn, + not_modified_since_lsn=%req.hdr.not_modified_since, + %last_record_lsn, + "effective_request_lsn for {} is {}", key, effective_request_lsn + ); + } + BatchedFeMessage::GetPage { span, shard: shard.downgrade(), diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 81e548a095..1fb98a55bd 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -185,6 +185,7 @@ impl Timeline { pending_directory_entries: Vec::new(), pending_metadata_bytes: 0, lsn, + extra_log: false, } } @@ -265,6 +266,14 @@ impl Timeline { let mut req_keyspaces: HashMap = HashMap::with_capacity(pages.len()); + let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap(); + let trouble_rel = RelTag { + spcnode: trouble_key.field2, + dbnode: trouble_key.field3, + relnode: trouble_key.field4, + forknum: trouble_key.field5, + }; + for (response_slot_idx, (tag, blknum, lsn, ctx)) in pages.enumerate() { if tag.relnode == 0 { result_slots[response_slot_idx].write(Err(PageReconstructError::Other( @@ -275,6 +284,14 @@ impl Timeline { continue; } + if *tag == trouble_rel { + tracing::info!( + "Getting rel size for {} at LSN {}", + rel_block_to_key(*tag, *blknum), + lsn + ); + } + let nblocks = match self .get_rel_size(*tag, Version::Lsn(lsn), &ctx) .maybe_perf_instrument(&ctx, |crnt_perf_span| { @@ -1402,6 +1419,8 @@ pub struct DatadirModification<'a> { /// An **approximation** of how many metadata bytes will be written to the EphemeralFile. pending_metadata_bytes: usize, + + extra_log: bool, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -1620,6 +1639,32 @@ impl DatadirModification<'_> { ) -> Result<(), WalIngestError> { let mut gaps_at_lsns = Vec::default(); + let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap(); + let trouble_rel = RelTag { + spcnode: trouble_key.field2, + dbnode: trouble_key.field3, + relnode: trouble_key.field4, + forknum: trouble_key.field5, + }; + + for meta in batch.metadata.iter().filter_map(|m| match m { + ValueMeta::Serialized(serialized_value_meta) => Some(serialized_value_meta), + ValueMeta::Observed(_) => None, + }) { + let key = Key::from_compact(meta.key); + let rel = RelTag { + spcnode: key.field2, + dbnode: key.field3, + relnode: key.field4, + forknum: key.field5, + }; + + if rel == trouble_rel { + tracing::info!("Put for {key} at LSN {}", meta.lsn); + self.extra_log = true; + } + } + for meta in batch.metadata.iter() { let key = Key::from_compact(meta.key()); let (rel, blkno) = key @@ -1950,6 +1995,19 @@ impl DatadirModification<'_> { "invalid relnode" )))?; } + + let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap(); + let trouble_rel = RelTag { + spcnode: trouble_key.field2, + dbnode: trouble_key.field3, + relnode: trouble_key.field4, + forknum: trouble_key.field5, + }; + + if rel == trouble_rel { + self.extra_log = true; + } + // It's possible that this is the first rel for this db in this // tablespace. Create the reldir entry for it if so. let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await?)?; @@ -1969,6 +2027,10 @@ impl DatadirModification<'_> { true }; + if rel == trouble_rel { + tracing::info!(%dbdir_exists, "Maybe created db dir for {} at LSN {}", trouble_key.to_compact(), self.lsn); + } + let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode); let mut rel_dir = if !dbdir_exists { // Create the RelDirectory @@ -2014,6 +2076,10 @@ impl DatadirModification<'_> { } self.pending_directory_entries .push((DirectoryKind::RelV2, MetricsUpdate::Add(1))); + + if rel == trouble_rel { + tracing::info!(%dbdir_exists, "Created v2 rel for {} at LSN {}", trouble_key.to_compact(), self.lsn); + } } else { // Add the new relation to the rel directory entry, and write it back if !rel_dir.rels.insert((rel.relnode, rel.forknum)) { @@ -2029,6 +2095,10 @@ impl DatadirModification<'_> { rel_dir_key, Value::Image(Bytes::from(RelDirectory::ser(&rel_dir)?)), ); + + if rel == trouble_rel { + tracing::info!(%dbdir_exists, "Created v1 rel for {} at LSN {}", trouble_key.to_compact(), self.lsn); + } } // Put size @@ -2463,11 +2533,19 @@ impl DatadirModification<'_> { }; if let Some(batch) = maybe_batch { - tracing::debug!( - "Flushing batch with max_lsn={}. Last record LSN is {}", - batch.max_lsn, - self.tline.get_last_record_lsn() - ); + if self.extra_log { + tracing::info!( + "Flushing batch with max_lsn={}. Last record LSN is {}", + batch.max_lsn, + self.tline.get_last_record_lsn() + ); + } else { + tracing::debug!( + "Flushing batch with max_lsn={}. Last record LSN is {}", + batch.max_lsn, + self.tline.get_last_record_lsn() + ); + } // This bails out on first error without modifying pending_updates. // That's Ok, cf this function's doc comment. @@ -2501,6 +2579,14 @@ impl DatadirModification<'_> { self.pending_metadata_bytes = 0; + if self.extra_log { + tracing::info!( + "Flushed batch. Last record LSN is {}", + self.tline.get_last_record_lsn() + ); + self.extra_log = false; + } + Ok(()) } @@ -2591,6 +2677,26 @@ impl DatadirModification<'_> { .pending_data_batch .get_or_insert_with(SerializedValueBatch::default); batch.put(key, val, self.lsn); + + let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap(); + let trouble_rel = RelTag { + spcnode: trouble_key.field2, + dbnode: trouble_key.field3, + relnode: trouble_key.field4, + forknum: trouble_key.field5, + }; + let key = Key::from_compact(key); + let rel = RelTag { + spcnode: key.field2, + dbnode: key.field3, + relnode: key.field4, + forknum: key.field5, + }; + + if rel == trouble_rel { + tracing::info!("Put for {key} at LSN {}", self.lsn); + self.extra_log = true; + } } fn put_metadata(&mut self, key: CompactKey, val: Value) { @@ -2617,6 +2723,14 @@ impl DatadirModification<'_> { if key == CHECKPOINT_KEY.to_compact() { tracing::debug!("Checkpoint key added to pending with size {val_serialized_size}"); } + + let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF") + .unwrap() + .to_compact(); + if key == trouble_key { + tracing::info!("Put for {trouble_key} at LSN {}", self.lsn); + self.extra_log = true; + } } fn delete(&mut self, key_range: Range) {