Compare commits

...

6 Commits

Author SHA1 Message Date
Thang Pham
5f6f5f517a revert fix for #707 2022-07-05 09:53:01 -04:00
Thang Pham
93e136a03c update PITR 2022-07-05 09:51:18 -04:00
Thang Pham
0205e29185 update test_branch_and_gc test 2022-07-04 19:41:09 -04:00
Dhammika Pathirana
605ec2b4aa Fix add ps tenant config
Signed-off-by: Dhammika Pathirana <dhammika@gmail.com>
2022-07-04 15:26:43 -04:00
Dhammika Pathirana
ff0ad4213c Add test config with ps compaction_threshold
Signed-off-by: Dhammika Pathirana <dhammika@gmail.com>
2022-07-04 15:25:01 -04:00
Dhammika Pathirana
0664100755 Add a test for gc dropping active layers (#707)
Signed-off-by: Dhammika Pathirana <dhammika@gmail.com>
2022-07-04 15:25:01 -04:00
2 changed files with 106 additions and 4 deletions

View File

@@ -2190,7 +2190,7 @@ impl LayeredTimeline {
// Calculate pitr cutoff point.
// If we cannot determine a cutoff LSN, be conservative and don't GC anything.
let mut pitr_cutoff_lsn: Lsn = *self.get_latest_gc_cutoff_lsn();
let mut pitr_cutoff_lsn = *self.get_latest_gc_cutoff_lsn();
if let Ok(timeline) =
tenant_mgr::get_local_timeline_with_load(self.tenant_id, self.timeline_id)
@@ -2328,9 +2328,10 @@ impl LayeredTimeline {
// If GC horizon is at 2500, we can remove layers A and B, but
// we cannot remove C, even though it's older than 2500, because
// the delta layer 2000-3000 depends on it.
if !layers
.image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))?
{
if !layers.image_layer_exists(
&l.get_key_range(),
&(l.get_lsn_range().end..disk_consistent_lsn + 1),
)? {
debug!(
"keeping {} because it is the latest layer",
l.filename().display()

View File

@@ -0,0 +1,101 @@
import time
from asyncpg.connection import os
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
from fixtures.utils import lsn_from_hex
# Test the GC implementation when running with branching
# This test reproduces the issue https://github.com/neondatabase/neon/issues/707.
#
# Consider two LSNs `lsn1` and `lsn2` with some delta files as folows:
# ...
# ... -> has an image layer xx_p with p < lsn1
# ...
# lsn1
# ...
# ... -> has an image layer xx_q with lsn1 < q < lsn2
# ...
# lsn2
#
# Consider running a GC iteration such that the GC horizon is between p and lsn1
# ...
# ... -> has an image layer xx_p with p < lsn1
# ...
# ||| -------> a delta layer D's start
# ... -> gc horizon h such that p < h < lsn1
# lsn1
# ||| -------> a delta layer D's end
# ...
# ... -> has an image layer xx_q with lsn1 < q < lsn2
# ...
# lsn2
#
# As described in the issue #707, the image layer xx_p will be deleted as
# there exists a newer image layer xx_q. However, removing xx_p will corrupt
# any delta layers that depend on xx_p that are not deleted by GC.
# For example, the delta layer D is corrupted in the above example.
#
# Because the delta layer D covering lsn1 is corrupted, creating a branch
# starting from lsn1 should return an error as follows:
# could not find data for key ... at LSN ..., for request at LSN ...
def test_branch_and_gc(neon_simple_env: NeonEnv):
env = neon_simple_env
tenant, _ = env.neon_cli.create_tenant(
conf={
# disable background GC
'gc_period': '10 m',
'gc_horizon': f'{10 * 1024 ** 3}',
# small checkpoint distance to create more delta layer files
'checkpoint_distance': f'{1024 ** 2}',
# set the target size to be large to allow the image layer to cover the whole key space
'compaction_target_size': f'{1024 ** 3}',
# tweak the default settings to allow quickly create image layers and L1 layers
'compaction_period': '1 s',
'compaction_threshold': '2',
'image_creation_threshold': '1',
# set PITR interval to be small, so we can do GC
'pitr_interval': '1 s'
})
timeline_main = env.neon_cli.create_timeline(f'test_main', tenant_id=tenant)
pg_main = env.postgres.create_start('test_main', tenant_id=tenant)
main_cur = pg_main.connect().cursor()
main_cur.execute(
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')"
)
main_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)')
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
lsn1 = main_cur.fetchone()[0]
log.info(f'LSN1: {lsn1}')
main_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)')
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
lsn2 = main_cur.fetchone()[0]
log.info(f'LSN2: {lsn2}')
# set the GC horizon such that it doesn't cover lsn1 so that
# we can create a new branch starting from lsn1
env.pageserver.safe_psql(
f'''do_gc {tenant.hex} {timeline_main.hex} {lsn_from_hex(lsn2) - lsn_from_hex(lsn1) + 1024}'''
)
env.neon_cli.create_branch('test_branch',
'test_main',
tenant_id=tenant,
ancestor_start_lsn=lsn1)
pg_branch = env.postgres.create_start('test_branch', tenant_id=tenant)
branch_cur = pg_branch.connect().cursor()
branch_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)')
branch_cur.execute('SELECT count(*) FROM foo')
assert branch_cur.fetchone() == (200000, )