Add test_unlogged_build.py

This commit is contained in:
Kosntantin Knizhnik
2025-07-18 15:11:49 +03:00
committed by Konstantin Knizhnik
parent de33affb1f
commit 3c54a235dd
3 changed files with 60 additions and 5 deletions

View File

@@ -1622,7 +1622,7 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
/*
* There is no lock hold between get_cached_relkind and set_cached_relkind.
* We assume that multiple backends can repeat this check and get the same result (there is assert in set_cached_relkind).
* And concurrent setting UNLOGGED_BUILD is not possible because only one relation can perform unlogged build.
* And concurrent setting UNLOGGED_BUILD is not possible because only one backend can perform unlogged build.
*/
set_cached_relkind(rinfo, relkind);
}

View File

@@ -9,10 +9,22 @@ if TYPE_CHECKING:
from fixtures.neon_fixtures import NeonEnvBuilder
# This test checks that the is no race between end of unlogged build and backends evicting pages of this index.
# We need to create quite large index (more than one gigabyte segment) to reproduce write error caused by this race condition
# (backend completed unlogged build removes local files while backend evicting page tries to write to the file).
# If index size is smaller than segment size, the problem is avoided by file descriptor cache which prevents file deletion.
#
# This test demonstrates effect of relkind cache. Postgres doesn't store relation persistence in shared buffer tag.
# It means that if page is evicted from shared buffers and relation is not cache in relation cache, then persistence=0 (auto) is used.
# For vanilla Postgres it is not important, because in both cases we need to write changes to the file.
# In Neon for permanent relations neon_write does nothing, while for unlogged relation - should store data in local file.
# Originally Neon uses `mdexists` call to check if local file exists and so determine if it is unlogged relation.
# mdexists is not so cheap: it closes and opens file. Relkind cache allow to eliminate this checks.
#
# This test tries to emulate situation when most of writes are with persistence=0.
# We create multiple connections to the database and in each fill it's own table. So each backends writes only it's own table and other tables
# descriptors are not cached. At the same time all backends perform eviction from shared buffers. Probability that backends evicts page of it's own
# relation is 1/N when N is number of relations=number of backends. The more relations, the smaller probability.
# For large enough number of relations most of writes are with unknown persistence.
#
# At Linux this test shows about 2x time speed improvement.
#
@pytest.mark.timeout(10000)
def test_unlogged(neon_env_builder: NeonEnvBuilder):
n_tables = 20

View File

@@ -0,0 +1,43 @@
from __future__ import annotations
import threading
from typing import TYPE_CHECKING
import pytest
if TYPE_CHECKING:
from fixtures.neon_fixtures import NeonEnvBuilder
# Checks that the is no race between end of unlogged build and backends evicting pages of this index.
# We need to create quite large index (more than one gigabyte segment) to reproduce write error caused by this race condition
# (backend completed unlogged build removes local files while backend evicting page tries to write to the file).
# If index size is smaller than segment size, the problem is avoided by file descriptor cache which prevents file deletion.
@pytest.mark.timeout(10000)
def test_unlogged_build(neon_env_builder: NeonEnvBuilder):
n_connections = 4
shared_buffers = 1024
env = neon_env_builder.init_start()
endpoint = env.endpoints.create_start(
"main", config_lines=[f"shared_buffers='{shared_buffers}MB'"]
)
def unlogged_build(i: int):
con = endpoint.connect()
cur = con.cursor()
cur.execute("set statement_timeout=0")
cur.execute(f"CREATE TABLE quad_box_tbl_{i} (id int, b box)")
cur.execute(
f"INSERT INTO quad_box_tbl_{i} SELECT (x - 1) * 100 + y, box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5)) FROM generate_series(1, 100) x, generate_series(1, 1200 * 100) y"
)
cur.execute(f"CREATE INDEX quad_box_tbl_idx_{i} ON quad_box_tbl_{i} USING spgist(b)")
cur.execute(
f"EXPLAIN (COSTS OFF) SELECT rank() OVER (ORDER BY b <-> point '123,456') n, b <-> point '123,456' dist, id FROM quad_box_tbl_{i}"
)
threads = [threading.Thread(target=unlogged_build, args=(i,)) for i in range(n_connections)]
for thread in threads:
thread.start()
for thread in threads:
thread.join()