fix(pageserver): allow incomplete history in btm-gc-compaction (#8500)

This pull request (should) fix the failure of test_gc_feedback. See the
explanation in the newly-added test case.

Part of https://github.com/neondatabase/neon/issues/8002

Allow incomplete history for the compaction algorithm.

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2024-07-25 12:56:37 -04:00
committed by GitHub
parent 3977e0a7a3
commit bea0468f1f
2 changed files with 125 additions and 9 deletions

View File

@@ -7309,7 +7309,9 @@ mod tests {
(
key,
Lsn(0x80),
Value::WalRecord(NeonWalRecord::wal_append(";0x80")),
Value::Image(Bytes::copy_from_slice(
b"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80",
)),
),
(
key,
@@ -7371,7 +7373,9 @@ mod tests {
),
(
Lsn(0x80),
Value::WalRecord(NeonWalRecord::wal_append(";0x80")),
Value::Image(Bytes::copy_from_slice(
b"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80",
)),
),
(
Lsn(0x90),
@@ -7380,7 +7384,118 @@ mod tests {
]),
};
assert_eq!(res, expected_res);
// TODO: more tests with mixed image + delta, adding with k-merge test cases; e2e compaction test
// We expect GC-compaction to run with the original GC. This would create a situation that
// the original GC algorithm removes some delta layers b/c there are full image coverage,
// therefore causing some keys to have an incomplete history below the lowest retain LSN.
// For example, we have
// ```plain
// init delta @ 0x10, image @ 0x20, delta @ 0x30 (gc_horizon), image @ 0x40.
// ```
// Now the GC horizon moves up, and we have
// ```plain
// init delta @ 0x10, image @ 0x20, delta @ 0x30, image @ 0x40 (gc_horizon)
// ```
// The original GC algorithm kicks in, and removes delta @ 0x10, image @ 0x20.
// We will end up with
// ```plain
// delta @ 0x30, image @ 0x40 (gc_horizon)
// ```
// Now we run the GC-compaction, and this key does not have a full history.
// We should be able to handle this partial history and drop everything before the
// gc_horizon image.
let history = vec![
(
key,
Lsn(0x20),
Value::WalRecord(NeonWalRecord::wal_append(";0x20")),
),
(
key,
Lsn(0x30),
Value::WalRecord(NeonWalRecord::wal_append(";0x30")),
),
(
key,
Lsn(0x40),
Value::Image(Bytes::copy_from_slice(b"0x10;0x20;0x30;0x40")),
),
(
key,
Lsn(0x50),
Value::WalRecord(NeonWalRecord::wal_append(";0x50")),
),
(
key,
Lsn(0x60),
Value::WalRecord(NeonWalRecord::wal_append(";0x60")),
),
(
key,
Lsn(0x70),
Value::WalRecord(NeonWalRecord::wal_append(";0x70")),
),
(
key,
Lsn(0x80),
Value::Image(Bytes::copy_from_slice(
b"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80",
)),
),
(
key,
Lsn(0x90),
Value::WalRecord(NeonWalRecord::wal_append(";0x90")),
),
];
let res = tline
.generate_key_retention(key, &history, Lsn(0x60), &[Lsn(0x40), Lsn(0x50)], 3)
.await
.unwrap();
let expected_res = KeyHistoryRetention {
below_horizon: vec![
(
Lsn(0x40),
KeyLogAtLsn(vec![(
Lsn(0x40),
Value::Image(Bytes::copy_from_slice(b"0x10;0x20;0x30;0x40")),
)]),
),
(
Lsn(0x50),
KeyLogAtLsn(vec![(
Lsn(0x50),
Value::WalRecord(NeonWalRecord::wal_append(";0x50")),
)]),
),
(
Lsn(0x60),
KeyLogAtLsn(vec![(
Lsn(0x60),
Value::WalRecord(NeonWalRecord::wal_append(";0x60")),
)]),
),
],
above_horizon: KeyLogAtLsn(vec![
(
Lsn(0x70),
Value::WalRecord(NeonWalRecord::wal_append(";0x70")),
),
(
Lsn(0x80),
Value::Image(Bytes::copy_from_slice(
b"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80",
)),
),
(
Lsn(0x90),
Value::WalRecord(NeonWalRecord::wal_append(";0x90")),
),
]),
};
assert_eq!(res, expected_res);
Ok(())
}

View File

@@ -1122,9 +1122,10 @@ impl Timeline {
);
}
}
if let Value::WalRecord(rec) = &history[0].2 {
assert!(rec.will_init(), "no base image");
}
// There was an assertion for no base image that checks if the first
// record in the history is `will_init` before, but it was removed.
// This is explained in the test cases for generate_key_retention.
// Search "incomplete history" for more information.
for lsn in retain_lsn_below_horizon {
assert!(lsn < &horizon, "retain lsn must be below horizon")
}
@@ -1200,9 +1201,6 @@ impl Timeline {
false
};
replay_history.extend(split_for_lsn.iter().map(|x| (*x).clone()));
if let Some((_, _, val)) = replay_history.first() {
assert!(val.will_init(), "invalid history, no base image");
}
// Only retain the items after the last image record
for idx in (0..replay_history.len()).rev() {
if replay_history[idx].2.will_init() {
@@ -1210,6 +1208,9 @@ impl Timeline {
break;
}
}
if let Some((_, _, val)) = replay_history.first() {
assert!(val.will_init(), "invalid history, no base image");
}
if generate_image && records_since_last_image > 0 {
records_since_last_image = 0;
let history = std::mem::take(&mut replay_history);