pageserver: drop unwanted keys during compaction after split

This commit is contained in:
John Spray
2023-12-27 15:57:17 +00:00
parent 753d97bd77
commit 6c79e12630
3 changed files with 46 additions and 5 deletions

View File

@@ -124,6 +124,9 @@ impl KeySpaceAccum {
if range.start == accum.end {
accum.end = range.end;
} else {
// TODO: to efficiently support small sharding stripe sizes, we should avoid starting
// a new range here if the skipped region was all keys that don't belong on this shard.
// (https://github.com/neondatabase/neon/issues/6247)
assert!(range.start > accum.end);
self.ranges.push(accum.clone());
*accum = range;

View File

@@ -422,6 +422,21 @@ impl ShardIdentity {
}
}
/// Return true if the key should be discarded if found in this shard's
/// data store, e.g. during compaction after a split
pub fn is_key_disposable(&self, key: &Key) -> bool {
if key_is_shard0(key) {
// Q: Why can't we dispose of shard0 content if we're not shard 0?
// A: because the WAL ingestion logic currently ingests some shard 0
// content on all shards, even though it's only read on shard 0. If we
// dropped it, then subsequent WAL ingest to these keys would encounter
// an error.
false
} else {
!self.is_key_local(key)
}
}
pub fn shard_slug(&self) -> String {
if self.count > ShardCount(0) {
format!("-{:02x}{:02x}", self.number.0, self.count.0)

View File

@@ -496,6 +496,11 @@ impl Timeline {
return Err(PageReconstructError::Other(anyhow::anyhow!("Invalid LSN")));
}
// This check is debug-only because of the cost of hashing, and because it's a double-check: we
// already checked the key against the shard_identity when looking up the Timeline from
// page_service.
debug_assert!(!self.shard_identity.is_key_disposable(&key));
// XXX: structured stats collection for layer eviction here.
trace!(
"get page request for {}@{} from task kind {:?}",
@@ -2224,13 +2229,13 @@ impl Timeline {
return Err(layer_traversal_error(
if cfg!(test) {
format!(
"could not find data for key {} at LSN {}, for request at LSN {}\n{}",
key, cont_lsn, request_lsn, std::backtrace::Backtrace::force_capture(),
"could not find data for key {} (shard {:?}) at LSN {}, for request at LSN {}\n{}",
key, self.shard_identity.get_shard_number(&key), cont_lsn, request_lsn, std::backtrace::Backtrace::force_capture(),
)
} else {
format!(
"could not find data for key {} at LSN {}, for request at LSN {}",
key, cont_lsn, request_lsn
"could not find data for key {} (shard {:?}) at LSN {}, for request at LSN {}",
key, self.shard_identity.get_shard_number(&key), cont_lsn, request_lsn
)
},
traversal_path,
@@ -3054,6 +3059,15 @@ impl Timeline {
for range in &partition.ranges {
let mut key = range.start;
while key < range.end {
if self.shard_identity.is_key_disposable(&key) {
debug!(
"Dropping key {} during compaction (it belongs on shard {:?})",
key,
self.shard_identity.get_shard_number(&key)
);
key = key.next();
continue;
}
let img = match self.get(key, lsn, ctx).await {
Ok(img) => img,
Err(err) => {
@@ -3080,6 +3094,7 @@ impl Timeline {
}
}
};
image_layer_writer.put_image(key, &img).await?;
key = key.next();
}
@@ -3650,7 +3665,15 @@ impl Timeline {
)))
});
writer.as_mut().unwrap().put_value(key, lsn, value).await?;
if !self.shard_identity.is_key_disposable(&key) {
writer.as_mut().unwrap().put_value(key, lsn, value).await?;
} else {
debug!(
"Dropping key {} during compaction (it belongs on shard {:?})",
key,
self.shard_identity.get_shard_number(&key)
);
}
if !new_layers.is_empty() {
fail_point!("after-timeline-compacted-first-L1");