diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index 95affc08aa..0605849642 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -618,7 +618,7 @@ neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno, result->effective_request_lsn = result->request_lsn; Assert(last_written_lsn <= result->request_lsn); - neon_log(DEBUG1, "neon_get_request_lsns request lsn %X/%X, not_modified_since %X/%X", + neon_log(DEBUG2, "neon_get_request_lsns request lsn %X/%X, not_modified_since %X/%X", LSN_FORMAT_ARGS(result->request_lsn), LSN_FORMAT_ARGS(result->not_modified_since)); } } @@ -642,7 +642,7 @@ neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno, * must still in the buffer cache, so our request cannot concern * those. */ - neon_log(DEBUG1, "neon_get_request_lsns GetLastWrittenLSN lsn %X/%X", + neon_log(DEBUG2, "neon_get_request_lsns GetLastWrittenLSN lsn %X/%X", LSN_FORMAT_ARGS(last_written_lsn)); /* @@ -2033,7 +2033,7 @@ neon_start_unlogged_build(SMgrRelation reln) if (unlogged_build_phase != UNLOGGED_BUILD_NOT_IN_PROGRESS) neon_log(ERROR, "unlogged relation build is already in progress"); - ereport(SmgrTrace, + ereport(DEBUG1, (errmsg(NEON_TAG "starting unlogged build of relation %u/%u/%u", RelFileInfoFmt(InfoFromSMgrRel(reln))))); @@ -2108,7 +2108,7 @@ neon_finish_unlogged_build_phase_1(SMgrRelation reln) { Assert(RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln))); - ereport(SmgrTrace, + ereport(DEBUG1, (errmsg(NEON_TAG "finishing phase 1 of unlogged build of relation %u/%u/%u", RelFileInfoFmt((unlogged_build_rel_info))))); @@ -2146,7 +2146,7 @@ neon_end_unlogged_build(SMgrRelation reln) Assert(RelFileInfoEquals(unlogged_build_rel_info, rinfo)); - ereport(SmgrTrace, + ereport(DEBUG1, (errmsg(NEON_TAG "ending unlogged build of relation %u/%u/%u", RelFileInfoFmt(unlogged_build_rel_info)))); diff --git a/test_runner/performance/test_unlogged_build.py b/test_runner/performance/test_unlogged_build.py index 691af81465..fd9c43fecc 100644 --- a/test_runner/performance/test_unlogged_build.py +++ b/test_runner/performance/test_unlogged_build.py @@ -4,40 +4,80 @@ import threading from typing import TYPE_CHECKING import pytest +from fixtures.log_helper import log +from fixtures.utils import query_scalar if TYPE_CHECKING: from fixtures.neon_fixtures import NeonEnvBuilder -# Checks that the is no race between end of unlogged build and backends evicting pages of this index. -# We need to create quite large index (more than one gigabyte segment) to reproduce write error caused by this race condition -# (backend completed unlogged build removes local files while backend evicting page tries to write to the file). -# If index size is smaller than segment size, the problem is avoided by file descriptor cache which prevents file deletion. @pytest.mark.timeout(10000) def test_unlogged_build(neon_env_builder: NeonEnvBuilder): + """ + Check for race conditions between end of unlogged build and backends + evicting pages of this index. We used to have a race condition where a + backend completed unlogged build and removed local files, while another + backend is evicting a page belonging to the newly-built index and tries to + write to the file that's removed. + """ n_connections = 4 - shared_buffers = 128 + + # A small shared_buffers setting forces backends to evict more pages from + # the buffer cache during the build, which is needed to expose the bug with + # concurrent eviction. + shared_buffers = "128MB" + env = neon_env_builder.init_start() endpoint = env.endpoints.create_start( - "main", config_lines=[f"shared_buffers='{shared_buffers}MB'"] + "main", config_lines=[f"shared_buffers='{shared_buffers}'"] ) def unlogged_build(i: int): con = endpoint.connect() cur = con.cursor() cur.execute("set statement_timeout=0") - cur.execute(f"CREATE TABLE quad_box_tbl_{i} (id int, b box)") + cur.execute(f"CREATE TABLE ginarray_tbl_{i} (arr bigint[])") cur.execute( - f"INSERT INTO quad_box_tbl_{i} SELECT (x - 1) * 100 + y, box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5)) FROM generate_series(1, 100) x, generate_series(1, 1200 * 100) y" + f"INSERT INTO ginarray_tbl_{i} SELECT array_agg(x*1000000+y) FROM generate_series(1, 100) x, generate_series(1, 3000 * 100) y group by y" ) - cur.execute(f"CREATE INDEX quad_box_tbl_idx_{i} ON quad_box_tbl_{i} USING spgist(b)") - cur.execute( - f"EXPLAIN (COSTS OFF) SELECT rank() OVER (ORDER BY b <-> point '123,456') n, b <-> point '123,456' dist, id FROM quad_box_tbl_{i}" + + cur.execute("SET log_min_messages='debug1'") + cur.execute(f"CREATE INDEX ginarray_tbl_idx_{i} ON ginarray_tbl_{i} USING gin(arr)") + cur.execute("RESET log_min_messages") + + # Check that the index is larger than 1 GB, so that it doesn't fit into + # a single segment file. Otherwise, the race condition is avoided, + # because when the backend evicts and writes out the page, it already + # has the file descriptor opened and the write succeeds even if the + # underlying file is deleted concurrently. + assert ( + query_scalar(cur, f"select pg_relation_size('ginarray_tbl_idx_{i}')") + > 1024 * 1024 * 1024 ) + # Run a simple query that uses the index as a sanity check. (In the + # original bug, the CREATE INDEX step failed already. But since we + # already went through all the effort to build the index, might as well + # check that it also works.) + qry = f"SELECT sum(elem) FROM (SELECT unnest(arr) as elem FROM ginarray_tbl_{i} WHERE ARRAY[1*1000000+1234]::bigint[] <@ arr) x" + result = query_scalar(cur, qry) + assert result == 5050123400 + + # Verify that the query actually uses the index + cur.execute(f"EXPLAIN (COSTS OFF) {qry}") + rows = cur.fetchall() + plan = "\n".join(r[0] for r in rows) + log.debug(plan) + assert "Bitmap Index Scan on ginarray_tbl_idx" in plan + threads = [threading.Thread(target=unlogged_build, args=(i,)) for i in range(n_connections)] for thread in threads: thread.start() for thread in threads: thread.join() + + # Sanity check that the indexes were buitl with the "unlogged build" + # method. GIN always indexes use that currently, but if a different, more + # efficient, method is invented later, that might invalidate this test. + assert endpoint.log_contains("starting unlogged build of relation")