Improve the added test case

- Make it faster by using GIN instead of SP-GiST

- Make it more robust by checking some of the assumptions, like that
  the index is larger than 1 GB

- Improve comments
This commit is contained in:
Heikki Linnakangas
2025-07-28 14:40:07 +03:00
committed by Konstantin Knizhnik
parent 71980e3103
commit 43b0106abf
2 changed files with 56 additions and 16 deletions

View File

@@ -618,7 +618,7 @@ neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
result->effective_request_lsn = result->request_lsn;
Assert(last_written_lsn <= result->request_lsn);
neon_log(DEBUG1, "neon_get_request_lsns request lsn %X/%X, not_modified_since %X/%X",
neon_log(DEBUG2, "neon_get_request_lsns request lsn %X/%X, not_modified_since %X/%X",
LSN_FORMAT_ARGS(result->request_lsn), LSN_FORMAT_ARGS(result->not_modified_since));
}
}
@@ -642,7 +642,7 @@ neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
* must still in the buffer cache, so our request cannot concern
* those.
*/
neon_log(DEBUG1, "neon_get_request_lsns GetLastWrittenLSN lsn %X/%X",
neon_log(DEBUG2, "neon_get_request_lsns GetLastWrittenLSN lsn %X/%X",
LSN_FORMAT_ARGS(last_written_lsn));
/*
@@ -2033,7 +2033,7 @@ neon_start_unlogged_build(SMgrRelation reln)
if (unlogged_build_phase != UNLOGGED_BUILD_NOT_IN_PROGRESS)
neon_log(ERROR, "unlogged relation build is already in progress");
ereport(SmgrTrace,
ereport(DEBUG1,
(errmsg(NEON_TAG "starting unlogged build of relation %u/%u/%u",
RelFileInfoFmt(InfoFromSMgrRel(reln)))));
@@ -2108,7 +2108,7 @@ neon_finish_unlogged_build_phase_1(SMgrRelation reln)
{
Assert(RelFileInfoEquals(unlogged_build_rel_info, InfoFromSMgrRel(reln)));
ereport(SmgrTrace,
ereport(DEBUG1,
(errmsg(NEON_TAG "finishing phase 1 of unlogged build of relation %u/%u/%u",
RelFileInfoFmt((unlogged_build_rel_info)))));
@@ -2146,7 +2146,7 @@ neon_end_unlogged_build(SMgrRelation reln)
Assert(RelFileInfoEquals(unlogged_build_rel_info, rinfo));
ereport(SmgrTrace,
ereport(DEBUG1,
(errmsg(NEON_TAG "ending unlogged build of relation %u/%u/%u",
RelFileInfoFmt(unlogged_build_rel_info))));

View File

@@ -4,40 +4,80 @@ import threading
from typing import TYPE_CHECKING
import pytest
from fixtures.log_helper import log
from fixtures.utils import query_scalar
if TYPE_CHECKING:
from fixtures.neon_fixtures import NeonEnvBuilder
# Checks that the is no race between end of unlogged build and backends evicting pages of this index.
# We need to create quite large index (more than one gigabyte segment) to reproduce write error caused by this race condition
# (backend completed unlogged build removes local files while backend evicting page tries to write to the file).
# If index size is smaller than segment size, the problem is avoided by file descriptor cache which prevents file deletion.
@pytest.mark.timeout(10000)
def test_unlogged_build(neon_env_builder: NeonEnvBuilder):
"""
Check for race conditions between end of unlogged build and backends
evicting pages of this index. We used to have a race condition where a
backend completed unlogged build and removed local files, while another
backend is evicting a page belonging to the newly-built index and tries to
write to the file that's removed.
"""
n_connections = 4
shared_buffers = 128
# A small shared_buffers setting forces backends to evict more pages from
# the buffer cache during the build, which is needed to expose the bug with
# concurrent eviction.
shared_buffers = "128MB"
env = neon_env_builder.init_start()
endpoint = env.endpoints.create_start(
"main", config_lines=[f"shared_buffers='{shared_buffers}MB'"]
"main", config_lines=[f"shared_buffers='{shared_buffers}'"]
)
def unlogged_build(i: int):
con = endpoint.connect()
cur = con.cursor()
cur.execute("set statement_timeout=0")
cur.execute(f"CREATE TABLE quad_box_tbl_{i} (id int, b box)")
cur.execute(f"CREATE TABLE ginarray_tbl_{i} (arr bigint[])")
cur.execute(
f"INSERT INTO quad_box_tbl_{i} SELECT (x - 1) * 100 + y, box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5)) FROM generate_series(1, 100) x, generate_series(1, 1200 * 100) y"
f"INSERT INTO ginarray_tbl_{i} SELECT array_agg(x*1000000+y) FROM generate_series(1, 100) x, generate_series(1, 3000 * 100) y group by y"
)
cur.execute(f"CREATE INDEX quad_box_tbl_idx_{i} ON quad_box_tbl_{i} USING spgist(b)")
cur.execute(
f"EXPLAIN (COSTS OFF) SELECT rank() OVER (ORDER BY b <-> point '123,456') n, b <-> point '123,456' dist, id FROM quad_box_tbl_{i}"
cur.execute("SET log_min_messages='debug1'")
cur.execute(f"CREATE INDEX ginarray_tbl_idx_{i} ON ginarray_tbl_{i} USING gin(arr)")
cur.execute("RESET log_min_messages")
# Check that the index is larger than 1 GB, so that it doesn't fit into
# a single segment file. Otherwise, the race condition is avoided,
# because when the backend evicts and writes out the page, it already
# has the file descriptor opened and the write succeeds even if the
# underlying file is deleted concurrently.
assert (
query_scalar(cur, f"select pg_relation_size('ginarray_tbl_idx_{i}')")
> 1024 * 1024 * 1024
)
# Run a simple query that uses the index as a sanity check. (In the
# original bug, the CREATE INDEX step failed already. But since we
# already went through all the effort to build the index, might as well
# check that it also works.)
qry = f"SELECT sum(elem) FROM (SELECT unnest(arr) as elem FROM ginarray_tbl_{i} WHERE ARRAY[1*1000000+1234]::bigint[] <@ arr) x"
result = query_scalar(cur, qry)
assert result == 5050123400
# Verify that the query actually uses the index
cur.execute(f"EXPLAIN (COSTS OFF) {qry}")
rows = cur.fetchall()
plan = "\n".join(r[0] for r in rows)
log.debug(plan)
assert "Bitmap Index Scan on ginarray_tbl_idx" in plan
threads = [threading.Thread(target=unlogged_build, args=(i,)) for i in range(n_connections)]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
# Sanity check that the indexes were buitl with the "unlogged build"
# method. GIN always indexes use that currently, but if a different, more
# efficient, method is invented later, that might invalidate this test.
assert endpoint.log_contains("starting unlogged build of relation")