Fix multixacts members WAL redo.

The logic to compute the page number was broken, and as a result, only
the first page of multixact members was updated correctly. All the
rest were left as zeros. Improve test_multixact.py to generate more
multixacts, to cover this case.

Also fix the check that the restored PG data directory matches the
original one. Previously, the test compared the 'pg_new' cluster,
which is a bit silly because the test restored the 'pg_new' cluster
only a few lines earlier, so if the multixact WAL redo is somehow
broken, the comparison will just compare two broken data directories
and report success. Change it to compare the original datadir, the one
where the multixacts were originally created, with a restored image of
the same.
This commit is contained in:
Heikki Linnakangas
2021-12-21 17:50:06 +02:00
parent f56db3da68
commit bcf80eaa95
2 changed files with 18 additions and 8 deletions

View File

@@ -403,12 +403,13 @@ impl PostgresRedoManager {
{
if slru == SlruKind::MultiXactMembers {
for i in 0..xlrec.nmembers {
let pageno = i / pg_constants::MULTIXACT_MEMBERS_PER_PAGE as u32;
let offset = xlrec.moff + i;
let pageno =
offset / pg_constants::MULTIXACT_MEMBERS_PER_PAGE as u32;
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
if segno == rec_segno && rpageno == blknum {
// update only target block
let offset = xlrec.moff + i;
let memberoff = mx_offset_to_member_offset(offset);
let flagsoff = mx_offset_to_flags_offset(offset);
let bshift = mx_offset_to_flags_bitshift(offset);

View File

@@ -28,16 +28,25 @@ def test_multixact(zenith_simple_env: ZenithEnv, test_output_dir):
cur.execute('SELECT next_multixact_id FROM pg_control_checkpoint()')
next_multixact_id_old = cur.fetchone()[0]
# Lock entries in parallel connections to set multixact
nclients = 3
# Lock entries using parallel connections in a round-robin fashion.
nclients = 20
connections = []
for i in range(nclients):
# Do not turn on autocommit. We want to hold the key-share locks.
conn = pg.connect(autocommit=False)
conn.cursor().execute('select * from t1 for key share')
connections.append(conn)
# We should have a multixact now. We can close the connections.
# On each iteration, we commit the previous transaction on a connection,
# and issue antoher select. Each SELECT generates a new multixact that
# includes the new XID, and the XIDs of all the other parallel transactions.
# This generates enough traffic on both multixact offsets and members SLRUs
# to cross page boundaries.
for i in range(5000):
conn = connections[i % nclients]
conn.commit()
conn.cursor().execute('select * from t1 for key share')
# We have multixacts now. We can close the connections.
for c in connections:
c.close()
@@ -66,5 +75,5 @@ def test_multixact(zenith_simple_env: ZenithEnv, test_output_dir):
# Check that we restored pg_controlfile correctly
assert next_multixact_id_new == next_multixact_id
# Check that we restore the content of the datadir correctly
check_restored_datadir_content(test_output_dir, env, pg_new)
# Check that we can restore the content of the datadir correctly
check_restored_datadir_content(test_output_dir, env, pg)