mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-06 21:12:55 +00:00
This introduces new timeline field latest_gc_cutoff. It is updated before each gc iteration. New check is added to branch_timelines to prevent branch creation with start point less than latest_gc_cutoff. Also this adds a check to get_page_at_lsn which asserts that lsn at which the page is requested was not garbage collected. This check currently is triggered for readonly nodes which are pinned to specific lsn and because they are not tracked in pageserver garbage collection can remove data that still might be referenced. This is a bug and will be fixed separately.
134 lines
6.5 KiB
Python
134 lines
6.5 KiB
Python
from contextlib import closing
|
|
import psycopg2.extras
|
|
import time
|
|
from fixtures.utils import print_gc_result
|
|
from fixtures.zenith_fixtures import ZenithEnv
|
|
from fixtures.log_helper import log
|
|
|
|
pytest_plugins = ("fixtures.zenith_fixtures")
|
|
|
|
|
|
#
|
|
# Test Garbage Collection of old layer files
|
|
#
|
|
# This test is pretty tightly coupled with the current implementation of layered
|
|
# storage, in layered_repository.rs.
|
|
#
|
|
def test_layerfiles_gc(zenith_simple_env: ZenithEnv):
|
|
env = zenith_simple_env
|
|
env.zenith_cli(["branch", "test_layerfiles_gc", "empty"])
|
|
pg = env.postgres.create_start('test_layerfiles_gc')
|
|
|
|
with closing(pg.connect()) as conn:
|
|
with conn.cursor() as cur:
|
|
with closing(env.pageserver.connect()) as psconn:
|
|
with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
|
|
|
|
# Get the timeline ID of our branch. We need it for the 'do_gc' command
|
|
cur.execute("SHOW zenith.zenith_timeline")
|
|
timeline = cur.fetchone()[0]
|
|
|
|
# Create a test table
|
|
cur.execute("CREATE TABLE foo(x integer)")
|
|
cur.execute("INSERT INTO foo VALUES (1)")
|
|
|
|
cur.execute("select relfilenode from pg_class where oid = 'foo'::regclass")
|
|
row = cur.fetchone()
|
|
log.info(f"relfilenode is {row[0]}")
|
|
|
|
# Run GC, to clear out any garbage left behind in the catalogs by
|
|
# the CREATE TABLE command. We want to have a clean slate with no garbage
|
|
# before running the actual tests below, otherwise the counts won't match
|
|
# what we expect.
|
|
#
|
|
# Also run vacuum first to make it less likely that autovacuum or pruning
|
|
# kicks in and confuses our numbers.
|
|
cur.execute("VACUUM")
|
|
|
|
# delete the row, to update the Visibility Map. We don't want the VM
|
|
# update to confuse our numbers either.
|
|
cur.execute("DELETE FROM foo")
|
|
|
|
log.info("Running GC before test")
|
|
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
|
row = pscur.fetchone()
|
|
print_gc_result(row)
|
|
# remember the number of files
|
|
layer_relfiles_remain = (row['layer_relfiles_total'] -
|
|
row['layer_relfiles_removed'])
|
|
assert layer_relfiles_remain > 0
|
|
|
|
# Insert a row and run GC. Checkpoint should freeze the layer
|
|
# so that there is only the most recent image layer left for the rel,
|
|
# removing the old image and delta layer.
|
|
log.info("Inserting one row and running GC")
|
|
cur.execute("INSERT INTO foo VALUES (1)")
|
|
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
|
row = pscur.fetchone()
|
|
print_gc_result(row)
|
|
assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
|
|
assert row['layer_relfiles_removed'] == 2
|
|
assert row['layer_relfiles_dropped'] == 0
|
|
|
|
# Insert two more rows and run GC.
|
|
# This should create new image and delta layer file with the new contents, and
|
|
# then remove the old one image and the just-created delta layer.
|
|
log.info("Inserting two more rows and running GC")
|
|
cur.execute("INSERT INTO foo VALUES (2)")
|
|
cur.execute("INSERT INTO foo VALUES (3)")
|
|
|
|
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
|
row = pscur.fetchone()
|
|
print_gc_result(row)
|
|
assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
|
|
assert row['layer_relfiles_removed'] == 2
|
|
assert row['layer_relfiles_dropped'] == 0
|
|
|
|
# Do it again. Should again create two new layer files and remove old ones.
|
|
log.info("Inserting two more rows and running GC")
|
|
cur.execute("INSERT INTO foo VALUES (2)")
|
|
cur.execute("INSERT INTO foo VALUES (3)")
|
|
|
|
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
|
row = pscur.fetchone()
|
|
print_gc_result(row)
|
|
assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
|
|
assert row['layer_relfiles_removed'] == 2
|
|
assert row['layer_relfiles_dropped'] == 0
|
|
|
|
# Run GC again, with no changes in the database. Should not remove anything.
|
|
log.info("Run GC again, with nothing to do")
|
|
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
|
row = pscur.fetchone()
|
|
print_gc_result(row)
|
|
assert row['layer_relfiles_total'] == layer_relfiles_remain
|
|
assert row['layer_relfiles_removed'] == 0
|
|
assert row['layer_relfiles_dropped'] == 0
|
|
|
|
#
|
|
# Test DROP TABLE checks that relation data and metadata was deleted by GC from object storage
|
|
#
|
|
log.info("Drop table and run GC again")
|
|
cur.execute("DROP TABLE foo")
|
|
|
|
pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
|
|
row = pscur.fetchone()
|
|
print_gc_result(row)
|
|
|
|
# We still cannot remove the latest layers
|
|
# because they serve as tombstones for earlier layers.
|
|
assert row['layer_relfiles_dropped'] == 0
|
|
# Each relation fork is counted separately, hence 3.
|
|
assert row['layer_relfiles_needed_as_tombstone'] == 3
|
|
|
|
# The catalog updates also create new layer files of the catalogs, which
|
|
# are counted as 'removed'
|
|
assert row['layer_relfiles_removed'] > 0
|
|
|
|
# TODO Change the test to check actual CG of dropped layers.
|
|
# Each relation fork is counted separately, hence 3.
|
|
#assert row['layer_relfiles_dropped'] == 3
|
|
|
|
# TODO: perhaps we should count catalog and user relations separately,
|
|
# to make this kind of testing more robust
|