mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-25 22:30:38 +00:00
Compare commits
2 Commits
release-84
...
vlad/debug
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
750d65fa77 | ||
|
|
2b0248cd76 |
@@ -21,7 +21,7 @@ use pageserver_api::config::{
|
||||
PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
|
||||
PageServiceProtocolPipelinedBatchingStrategy, PageServiceProtocolPipelinedExecutionStrategy,
|
||||
};
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::key::{Key, rel_block_to_key};
|
||||
use pageserver_api::models::{
|
||||
self, PageTraceEvent, PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
|
||||
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
|
||||
@@ -29,7 +29,7 @@ use pageserver_api::models::{
|
||||
PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
|
||||
PagestreamProtocolVersion, PagestreamRequest, TenantState,
|
||||
};
|
||||
use pageserver_api::reltag::SlruKind;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use postgres_backend::{
|
||||
AuthType, PostgresBackend, PostgresBackendReader, QueryError, is_expected_io_error,
|
||||
@@ -1035,10 +1035,10 @@ impl PageServerHandler {
|
||||
// avoid a somewhat costly Span::record() by constructing the entire span in one go.
|
||||
macro_rules! mkspan {
|
||||
(before shard routing) => {{
|
||||
tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn)
|
||||
tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, not_modified_since_lsn = %req.hdr.not_modified_since)
|
||||
}};
|
||||
($shard_id:expr) => {{
|
||||
tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, shard_id = %$shard_id)
|
||||
tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, not_modified_since_lsn = %req.hdr.not_modified_since, shard_id = %$shard_id)
|
||||
}};
|
||||
}
|
||||
|
||||
@@ -1140,9 +1140,10 @@ impl PageServerHandler {
|
||||
.await?;
|
||||
|
||||
// We're holding the Handle
|
||||
let last_record_lsn = shard.get_last_record_lsn();
|
||||
let effective_request_lsn = match Self::effective_request_lsn(
|
||||
&shard,
|
||||
shard.get_last_record_lsn(),
|
||||
last_record_lsn,
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&shard.get_applied_gc_cutoff_lsn(),
|
||||
@@ -1153,6 +1154,22 @@ impl PageServerHandler {
|
||||
}
|
||||
};
|
||||
|
||||
let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap();
|
||||
let trouble_rel = RelTag {
|
||||
spcnode: trouble_key.field2,
|
||||
dbnode: trouble_key.field3,
|
||||
relnode: trouble_key.field4,
|
||||
forknum: trouble_key.field5,
|
||||
};
|
||||
if req.rel == trouble_rel {
|
||||
tracing::info!(
|
||||
request_lsn=%req.hdr.request_lsn,
|
||||
not_modified_since_lsn=%req.hdr.not_modified_since,
|
||||
%last_record_lsn,
|
||||
"effective_request_lsn for {} is {}", key, effective_request_lsn
|
||||
);
|
||||
}
|
||||
|
||||
BatchedFeMessage::GetPage {
|
||||
span,
|
||||
shard: shard.downgrade(),
|
||||
|
||||
@@ -185,6 +185,7 @@ impl Timeline {
|
||||
pending_directory_entries: Vec::new(),
|
||||
pending_metadata_bytes: 0,
|
||||
lsn,
|
||||
extra_log: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -265,6 +266,14 @@ impl Timeline {
|
||||
let mut req_keyspaces: HashMap<Lsn, KeySpaceRandomAccum> =
|
||||
HashMap::with_capacity(pages.len());
|
||||
|
||||
let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap();
|
||||
let trouble_rel = RelTag {
|
||||
spcnode: trouble_key.field2,
|
||||
dbnode: trouble_key.field3,
|
||||
relnode: trouble_key.field4,
|
||||
forknum: trouble_key.field5,
|
||||
};
|
||||
|
||||
for (response_slot_idx, (tag, blknum, lsn, ctx)) in pages.enumerate() {
|
||||
if tag.relnode == 0 {
|
||||
result_slots[response_slot_idx].write(Err(PageReconstructError::Other(
|
||||
@@ -275,6 +284,14 @@ impl Timeline {
|
||||
continue;
|
||||
}
|
||||
|
||||
if *tag == trouble_rel {
|
||||
tracing::info!(
|
||||
"Getting rel size for {} at LSN {}",
|
||||
rel_block_to_key(*tag, *blknum),
|
||||
lsn
|
||||
);
|
||||
}
|
||||
|
||||
let nblocks = match self
|
||||
.get_rel_size(*tag, Version::Lsn(lsn), &ctx)
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| {
|
||||
@@ -1084,17 +1101,8 @@ impl Timeline {
|
||||
let mut result = HashMap::new();
|
||||
for (k, v) in kv {
|
||||
let v = v?;
|
||||
if v.is_empty() {
|
||||
// This is a tombstone -- we can skip it.
|
||||
// Originally, the replorigin code uses `Lsn::INVALID` to represent a tombstone. However, as it part of
|
||||
// the sparse keyspace and the sparse keyspace uses an empty image to universally represent a tombstone,
|
||||
// we also need to consider that. Such tombstones might be written on the detach ancestor code path to
|
||||
// avoid the value going into the child branch. (See [`crate::tenant::timeline::detach_ancestor::generate_tombstone_image_layer`] for more details.)
|
||||
continue;
|
||||
}
|
||||
let origin_id = k.field6 as RepOriginId;
|
||||
let origin_lsn = Lsn::des(&v)
|
||||
.with_context(|| format!("decode replorigin value for {}: {v:?}", origin_id))?;
|
||||
let origin_lsn = Lsn::des(&v).unwrap();
|
||||
if origin_lsn != Lsn::INVALID {
|
||||
result.insert(origin_id, origin_lsn);
|
||||
}
|
||||
@@ -1411,6 +1419,8 @@ pub struct DatadirModification<'a> {
|
||||
|
||||
/// An **approximation** of how many metadata bytes will be written to the EphemeralFile.
|
||||
pending_metadata_bytes: usize,
|
||||
|
||||
extra_log: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
@@ -1629,6 +1639,32 @@ impl DatadirModification<'_> {
|
||||
) -> Result<(), WalIngestError> {
|
||||
let mut gaps_at_lsns = Vec::default();
|
||||
|
||||
let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap();
|
||||
let trouble_rel = RelTag {
|
||||
spcnode: trouble_key.field2,
|
||||
dbnode: trouble_key.field3,
|
||||
relnode: trouble_key.field4,
|
||||
forknum: trouble_key.field5,
|
||||
};
|
||||
|
||||
for meta in batch.metadata.iter().filter_map(|m| match m {
|
||||
ValueMeta::Serialized(serialized_value_meta) => Some(serialized_value_meta),
|
||||
ValueMeta::Observed(_) => None,
|
||||
}) {
|
||||
let key = Key::from_compact(meta.key);
|
||||
let rel = RelTag {
|
||||
spcnode: key.field2,
|
||||
dbnode: key.field3,
|
||||
relnode: key.field4,
|
||||
forknum: key.field5,
|
||||
};
|
||||
|
||||
if rel == trouble_rel {
|
||||
tracing::info!("Put for {key} at LSN {}", meta.lsn);
|
||||
self.extra_log = true;
|
||||
}
|
||||
}
|
||||
|
||||
for meta in batch.metadata.iter() {
|
||||
let key = Key::from_compact(meta.key());
|
||||
let (rel, blkno) = key
|
||||
@@ -1959,6 +1995,19 @@ impl DatadirModification<'_> {
|
||||
"invalid relnode"
|
||||
)))?;
|
||||
}
|
||||
|
||||
let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap();
|
||||
let trouble_rel = RelTag {
|
||||
spcnode: trouble_key.field2,
|
||||
dbnode: trouble_key.field3,
|
||||
relnode: trouble_key.field4,
|
||||
forknum: trouble_key.field5,
|
||||
};
|
||||
|
||||
if rel == trouble_rel {
|
||||
self.extra_log = true;
|
||||
}
|
||||
|
||||
// It's possible that this is the first rel for this db in this
|
||||
// tablespace. Create the reldir entry for it if so.
|
||||
let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await?)?;
|
||||
@@ -1978,6 +2027,10 @@ impl DatadirModification<'_> {
|
||||
true
|
||||
};
|
||||
|
||||
if rel == trouble_rel {
|
||||
tracing::info!(%dbdir_exists, "Maybe created db dir for {} at LSN {}", trouble_key.to_compact(), self.lsn);
|
||||
}
|
||||
|
||||
let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
|
||||
let mut rel_dir = if !dbdir_exists {
|
||||
// Create the RelDirectory
|
||||
@@ -2023,6 +2076,10 @@ impl DatadirModification<'_> {
|
||||
}
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::RelV2, MetricsUpdate::Add(1)));
|
||||
|
||||
if rel == trouble_rel {
|
||||
tracing::info!(%dbdir_exists, "Created v2 rel for {} at LSN {}", trouble_key.to_compact(), self.lsn);
|
||||
}
|
||||
} else {
|
||||
// Add the new relation to the rel directory entry, and write it back
|
||||
if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {
|
||||
@@ -2038,6 +2095,10 @@ impl DatadirModification<'_> {
|
||||
rel_dir_key,
|
||||
Value::Image(Bytes::from(RelDirectory::ser(&rel_dir)?)),
|
||||
);
|
||||
|
||||
if rel == trouble_rel {
|
||||
tracing::info!(%dbdir_exists, "Created v1 rel for {} at LSN {}", trouble_key.to_compact(), self.lsn);
|
||||
}
|
||||
}
|
||||
|
||||
// Put size
|
||||
@@ -2472,11 +2533,19 @@ impl DatadirModification<'_> {
|
||||
};
|
||||
|
||||
if let Some(batch) = maybe_batch {
|
||||
tracing::debug!(
|
||||
"Flushing batch with max_lsn={}. Last record LSN is {}",
|
||||
batch.max_lsn,
|
||||
self.tline.get_last_record_lsn()
|
||||
);
|
||||
if self.extra_log {
|
||||
tracing::info!(
|
||||
"Flushing batch with max_lsn={}. Last record LSN is {}",
|
||||
batch.max_lsn,
|
||||
self.tline.get_last_record_lsn()
|
||||
);
|
||||
} else {
|
||||
tracing::debug!(
|
||||
"Flushing batch with max_lsn={}. Last record LSN is {}",
|
||||
batch.max_lsn,
|
||||
self.tline.get_last_record_lsn()
|
||||
);
|
||||
}
|
||||
|
||||
// This bails out on first error without modifying pending_updates.
|
||||
// That's Ok, cf this function's doc comment.
|
||||
@@ -2510,6 +2579,14 @@ impl DatadirModification<'_> {
|
||||
|
||||
self.pending_metadata_bytes = 0;
|
||||
|
||||
if self.extra_log {
|
||||
tracing::info!(
|
||||
"Flushed batch. Last record LSN is {}",
|
||||
self.tline.get_last_record_lsn()
|
||||
);
|
||||
self.extra_log = false;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -2587,11 +2664,6 @@ impl DatadirModification<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn put_for_unit_test(&mut self, key: Key, val: Value) {
|
||||
self.put(key, val);
|
||||
}
|
||||
|
||||
fn put(&mut self, key: Key, val: Value) {
|
||||
if Self::is_data_key(&key) {
|
||||
self.put_data(key.to_compact(), val)
|
||||
@@ -2605,6 +2677,26 @@ impl DatadirModification<'_> {
|
||||
.pending_data_batch
|
||||
.get_or_insert_with(SerializedValueBatch::default);
|
||||
batch.put(key, val, self.lsn);
|
||||
|
||||
let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap();
|
||||
let trouble_rel = RelTag {
|
||||
spcnode: trouble_key.field2,
|
||||
dbnode: trouble_key.field3,
|
||||
relnode: trouble_key.field4,
|
||||
forknum: trouble_key.field5,
|
||||
};
|
||||
let key = Key::from_compact(key);
|
||||
let rel = RelTag {
|
||||
spcnode: key.field2,
|
||||
dbnode: key.field3,
|
||||
relnode: key.field4,
|
||||
forknum: key.field5,
|
||||
};
|
||||
|
||||
if rel == trouble_rel {
|
||||
tracing::info!("Put for {key} at LSN {}", self.lsn);
|
||||
self.extra_log = true;
|
||||
}
|
||||
}
|
||||
|
||||
fn put_metadata(&mut self, key: CompactKey, val: Value) {
|
||||
@@ -2631,6 +2723,14 @@ impl DatadirModification<'_> {
|
||||
if key == CHECKPOINT_KEY.to_compact() {
|
||||
tracing::debug!("Checkpoint key added to pending with size {val_serialized_size}");
|
||||
}
|
||||
|
||||
let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF")
|
||||
.unwrap()
|
||||
.to_compact();
|
||||
if key == trouble_key {
|
||||
tracing::info!("Put for {trouble_key} at LSN {}", self.lsn);
|
||||
self.extra_log = true;
|
||||
}
|
||||
}
|
||||
|
||||
fn delete(&mut self, key_range: Range<Key>) {
|
||||
|
||||
@@ -5931,9 +5931,7 @@ mod tests {
|
||||
use itertools::Itertools;
|
||||
#[cfg(feature = "testing")]
|
||||
use models::CompactLsnRange;
|
||||
use pageserver_api::key::{
|
||||
AUX_KEY_PREFIX, Key, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX, repl_origin_key,
|
||||
};
|
||||
use pageserver_api::key::{AUX_KEY_PREFIX, Key, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX};
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
#[cfg(feature = "testing")]
|
||||
use pageserver_api::keyspace::KeySpaceRandomAccum;
|
||||
@@ -8169,54 +8167,6 @@ mod tests {
|
||||
assert_eq!(files.get("pg_logical/mappings/test2"), None);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_repl_origin_tombstones() {
|
||||
let harness = TenantHarness::create("test_repl_origin_tombstones")
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
let io_concurrency = IoConcurrency::spawn_for_test();
|
||||
|
||||
let mut lsn = Lsn(0x08);
|
||||
|
||||
let tline: Arc<Timeline> = tenant
|
||||
.create_test_timeline(TIMELINE_ID, lsn, DEFAULT_PG_VERSION, &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let repl_lsn = Lsn(0x10);
|
||||
{
|
||||
lsn += 8;
|
||||
let mut modification = tline.begin_modification(lsn);
|
||||
modification.put_for_unit_test(repl_origin_key(2), Value::Image(Bytes::new()));
|
||||
modification.set_replorigin(1, repl_lsn).await.unwrap();
|
||||
modification.commit(&ctx).await.unwrap();
|
||||
}
|
||||
|
||||
// we can read everything from the storage
|
||||
let repl_origins = tline
|
||||
.get_replorigins(lsn, &ctx, io_concurrency.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(repl_origins.len(), 1);
|
||||
assert_eq!(repl_origins[&1], lsn);
|
||||
|
||||
{
|
||||
lsn += 8;
|
||||
let mut modification = tline.begin_modification(lsn);
|
||||
modification.put_for_unit_test(
|
||||
repl_origin_key(3),
|
||||
Value::Image(Bytes::copy_from_slice(b"cannot_decode_this")),
|
||||
);
|
||||
modification.commit(&ctx).await.unwrap();
|
||||
}
|
||||
let result = tline
|
||||
.get_replorigins(lsn, &ctx, io_concurrency.clone())
|
||||
.await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_metadata_image_creation() -> anyhow::Result<()> {
|
||||
let harness = TenantHarness::create("test_metadata_image_creation").await?;
|
||||
|
||||
@@ -178,7 +178,7 @@ impl Attempt {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn generate_tombstone_image_layer(
|
||||
async fn generate_tombstone_image_layer(
|
||||
detached: &Arc<Timeline>,
|
||||
ancestor: &Arc<Timeline>,
|
||||
ancestor_lsn: Lsn,
|
||||
|
||||
@@ -8,7 +8,7 @@ use crate::error::{ErrorKind, ReportableError, UserFacingError};
|
||||
use crate::proxy::retry::CouldRetry;
|
||||
|
||||
/// A go-to error message which doesn't leak any detail.
|
||||
pub(crate) const REQUEST_FAILED: &str = "Console request failed";
|
||||
pub(crate) const REQUEST_FAILED: &str = "Control plane request failed";
|
||||
|
||||
/// Common console API error.
|
||||
#[derive(Debug, Error)]
|
||||
|
||||
@@ -323,6 +323,7 @@ class NeonProject:
|
||||
if self.restart_pgbench_on_console_errors and (
|
||||
"ERROR: Couldn't connect to compute node" in err
|
||||
or "ERROR: Console request failed" in err
|
||||
or "ERROR: Control plane request failed" in err
|
||||
):
|
||||
log.info("Restarting benchmark for %s", target)
|
||||
self.benchmarks.pop(target)
|
||||
|
||||
Reference in New Issue
Block a user