Handle update of VM in XLOG_HEAP_LOCK/XLOG_HEAP2_LOCK_UPDATED WAL records (#4896)

## Problem

VM should be updated if XLH_LOCK_ALL_FROZEN_CLEARED flags is set in
XLOG_HEAP_LOCK,XLOG_HEAP_2_LOCK_UPDATED WAL records

## Summary of changes

Add handling of this records in walingest.rs

## Checklist before requesting a review

- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.

## Checklist before merging

- [ ] Do not forget to reformat commit message to not include the above
checklist

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
This commit is contained in:
Konstantin Knizhnik
2023-09-15 17:47:29 +03:00
committed by GitHub
parent 9e6b5b686c
commit 66fa176cc8
4 changed files with 244 additions and 13 deletions

View File

@@ -111,3 +111,103 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
assert cur_new.fetchall() == []
cur_new.execute("SELECT id FROM vmtest_cold_update2 WHERE id = 1")
assert cur_new.fetchall() == []
#
# Test that the ALL_FROZEN VM bit is cleared correctly at a HEAP_LOCK
# record.
#
def test_vm_bit_clear_on_heap_lock(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_vm_bit_clear_on_heap_lock", "empty")
endpoint = env.endpoints.create_start(
"test_vm_bit_clear_on_heap_lock",
config_lines=[
"log_autovacuum_min_duration = 0",
# Perform anti-wraparound vacuuming aggressively
"autovacuum_naptime='1 s'",
"autovacuum_freeze_max_age = 1000000",
],
)
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
# Install extension containing function needed for test
cur.execute("CREATE EXTENSION neon_test_utils")
cur.execute("SELECT pg_switch_wal()")
# Create a test table and freeze it to set the all-frozen VM bit on all pages.
cur.execute("CREATE TABLE vmtest_lock (id integer PRIMARY KEY)")
cur.execute("INSERT INTO vmtest_lock SELECT g FROM generate_series(1, 50000) g")
cur.execute("VACUUM FREEZE vmtest_lock")
# Lock a row. This clears the all-frozen VM bit for that page.
cur.execute("SELECT * FROM vmtest_lock WHERE id = 40000 FOR UPDATE")
# Remember the XID. We will use it later to verify that we have consumed a lot of
# XIDs after this.
cur.execute("select pg_current_xact_id()")
locking_xid = cur.fetchall()[0][0]
# Stop and restart postgres, to clear the buffer cache.
#
# NOTE: clear_buffer_cache() will not do, because it evicts the dirty pages
# in a "clean" way. Our neon extension will write a full-page image of the VM
# page, and we want to avoid that.
endpoint.stop()
endpoint.start()
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
cur.execute("select xmin, xmax, * from vmtest_lock where id = 40000 ")
tup = cur.fetchall()
xmax_before = tup[0][1]
# Consume a lot of XIDs, so that anti-wraparound autovacuum kicks
# in and the clog gets truncated. We set autovacuum_freeze_max_age to a very
# low value, so it doesn't take all that many XIDs for autovacuum to kick in.
for i in range(1000):
cur.execute(
"""
CREATE TEMP TABLE othertable (i int) ON COMMIT DROP;
do $$
begin
for i in 1..100000 loop
-- Use a begin-exception block to generate a new subtransaction on each iteration
begin
insert into othertable values (i);
exception when others then
raise 'not expected %', sqlerrm;
end;
end loop;
end;
$$;
"""
)
cur.execute("select xmin, xmax, * from vmtest_lock where id = 40000 ")
tup = cur.fetchall()
log.info(f"tuple = {tup}")
xmax = tup[0][1]
assert xmax == xmax_before
if i % 50 == 0:
cur.execute("select datfrozenxid from pg_database where datname='postgres'")
datfrozenxid = cur.fetchall()[0][0]
if datfrozenxid > locking_xid:
break
cur.execute("select pg_current_xact_id()")
curr_xid = cur.fetchall()[0][0]
assert int(curr_xid) - int(locking_xid) >= 100000
# Now, if the VM all-frozen bit was not correctly cleared on
# replay, we will try to fetch the status of the XID that was
# already truncated away.
#
# ERROR: could not access status of transaction 1027
cur.execute("select xmin, xmax, * from vmtest_lock where id = 40000 for update")
tup = cur.fetchall()
log.info(f"tuple = {tup}")