mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-14 17:02:56 +00:00
Previously, the /v1/tenant/:tenant_id/timeline/:timeline_id/do_gc API call performed a flush and compaction on the timeline before GC. Change it not to do that, and change all the tests that used that API to perform compaction explicitly. The compaction happens at a slightly different point now. Previously, the code performed the `refresh_gc_info_internal` step first, and only then did compaction on all the timelines. I don't think that was what was originally intended here. Presumably the idea with compaction was to make some old layer files available for GC. But if we're going to flush the current in-memory layer to disk, surely you would want to include the newly-written layer in the compaction too. I guess this didn't make any difference to the tests in practice, but in any case, the tests now perform the flush and compaction before any of the GC steps. Some of the tests might not need the compaction at all, but I didn't try hard to determine which ones might need it. I left it out from a few tests that intentionally tested calling do_gc with an invalid tenant or timeline ID, though.
76 lines
2.7 KiB
Python
76 lines
2.7 KiB
Python
from fixtures.log_helper import log
|
|
from fixtures.neon_fixtures import NeonEnvBuilder
|
|
from fixtures.types import TimelineId
|
|
from fixtures.utils import print_gc_result, query_scalar
|
|
|
|
|
|
#
|
|
# Check pitr_interval GC behavior.
|
|
# Insert some data, run GC and create a branch in the past.
|
|
#
|
|
def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
|
|
# Set pitr interval such that we need to keep the data
|
|
neon_env_builder.pageserver_config_override = (
|
|
"tenant_config={pitr_interval = '1 day', gc_horizon = 0}"
|
|
)
|
|
|
|
env = neon_env_builder.init_start()
|
|
pgmain = env.postgres.create_start("main")
|
|
log.info("postgres is running on 'main' branch")
|
|
|
|
main_pg_conn = pgmain.connect()
|
|
main_cur = main_pg_conn.cursor()
|
|
timeline = TimelineId(query_scalar(main_cur, "SHOW neon.timeline_id"))
|
|
|
|
# Create table
|
|
main_cur.execute("CREATE TABLE foo (t text)")
|
|
|
|
for i in range(10000):
|
|
main_cur.execute(
|
|
"""
|
|
INSERT INTO foo
|
|
SELECT 'long string to consume some space';
|
|
"""
|
|
)
|
|
|
|
if i == 99:
|
|
# keep some early lsn to test branch creation after GC
|
|
main_cur.execute("SELECT pg_current_wal_insert_lsn(), txid_current()")
|
|
res = main_cur.fetchone()
|
|
assert res is not None
|
|
lsn_a = res[0]
|
|
xid_a = res[1]
|
|
log.info(f"LSN after 100 rows: {lsn_a} xid {xid_a}")
|
|
|
|
main_cur.execute("SELECT pg_current_wal_insert_lsn(), txid_current()")
|
|
res = main_cur.fetchone()
|
|
assert res is not None
|
|
|
|
debug_lsn = res[0]
|
|
debug_xid = res[1]
|
|
log.info(f"LSN after 10000 rows: {debug_lsn} xid {debug_xid}")
|
|
|
|
# run GC
|
|
with env.pageserver.http_client() as pageserver_http:
|
|
pageserver_http.timeline_checkpoint(env.initial_tenant, timeline)
|
|
pageserver_http.timeline_compact(env.initial_tenant, timeline)
|
|
# perform aggressive GC. Data still should be kept because of the PITR setting.
|
|
gc_result = pageserver_http.timeline_gc(env.initial_tenant, timeline, 0)
|
|
print_gc_result(gc_result)
|
|
|
|
# Branch at the point where only 100 rows were inserted
|
|
# It must have been preserved by PITR setting
|
|
env.neon_cli.create_branch("test_pitr_gc_hundred", "main", ancestor_start_lsn=lsn_a)
|
|
|
|
pg_hundred = env.postgres.create_start("test_pitr_gc_hundred")
|
|
|
|
# On the 'hundred' branch, we should see only 100 rows
|
|
hundred_pg_conn = pg_hundred.connect()
|
|
hundred_cur = hundred_pg_conn.cursor()
|
|
hundred_cur.execute("SELECT count(*) FROM foo")
|
|
assert hundred_cur.fetchone() == (100,)
|
|
|
|
# All the rows are visible on the main branch
|
|
main_cur.execute("SELECT count(*) FROM foo")
|
|
assert main_cur.fetchone() == (10000,)
|