mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-04 03:52:56 +00:00
feat(pageserver): drop disposable keys during gc-compaction (#9765)
close https://github.com/neondatabase/neon/issues/9552, close https://github.com/neondatabase/neon/issues/8920, part of https://github.com/neondatabase/neon/issues/9114 ## Summary of changes * Drop keys not belonging to this shard during gc-compaction to avoid constructing history that might have been truncated during shard compaction. * Run gc-compaction at the end of shard compaction test. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -2021,6 +2021,14 @@ impl Timeline {
|
||||
if cancel.is_cancelled() {
|
||||
return Err(anyhow!("cancelled")); // TODO: refactor to CompactionError and pass cancel error
|
||||
}
|
||||
if self.shard_identity.is_key_disposable(&key) {
|
||||
// If this shard does not need to store this key, simply skip it.
|
||||
//
|
||||
// This is not handled in the filter iterator because shard is determined by hash.
|
||||
// Therefore, it does not give us any performance benefit to do things like skip
|
||||
// a whole layer file as handling key spaces (ranges).
|
||||
continue;
|
||||
}
|
||||
if !job_desc.compaction_key_range.contains(&key) {
|
||||
if !desc.is_delta {
|
||||
continue;
|
||||
|
||||
@@ -122,10 +122,19 @@ LARGE_STRIPES = 32768
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"shard_count,stripe_size", [(None, None), (4, TINY_STRIPES), (4, LARGE_STRIPES)]
|
||||
"shard_count,stripe_size,gc_compaction",
|
||||
[
|
||||
(None, None, False),
|
||||
(4, TINY_STRIPES, False),
|
||||
(4, LARGE_STRIPES, False),
|
||||
(4, LARGE_STRIPES, True),
|
||||
],
|
||||
)
|
||||
def test_sharding_compaction(
|
||||
neon_env_builder: NeonEnvBuilder, stripe_size: int, shard_count: Optional[int]
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
stripe_size: int,
|
||||
shard_count: Optional[int],
|
||||
gc_compaction: bool,
|
||||
):
|
||||
"""
|
||||
Use small stripes, small layers, and small compaction thresholds to exercise how compaction
|
||||
@@ -217,6 +226,17 @@ def test_sharding_compaction(
|
||||
# Assert that everything is still readable
|
||||
workload.validate()
|
||||
|
||||
if gc_compaction:
|
||||
# trigger gc compaction to get more coverage for that, piggyback on the existing workload
|
||||
for shard in env.storage_controller.locate(tenant_id):
|
||||
pageserver = env.get_pageserver(shard["node_id"])
|
||||
tenant_shard_id = shard["shard_id"]
|
||||
pageserver.http_client().timeline_compact(
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
enhanced_gc_bottom_most_compaction=True,
|
||||
)
|
||||
|
||||
|
||||
class CompactionAlgorithm(str, enum.Enum):
|
||||
LEGACY = "legacy"
|
||||
|
||||
Reference in New Issue
Block a user