mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-05 12:32:54 +00:00
Fix issues with prefetch ring buffer resize (#9847)
## Problem See https://neondb.slack.com/archives/C04DGM6SMTM/p1732110190129479 We observe the following error in the logs ``` [XX000] ERROR: [NEON_SMGR] [shard 3] Incorrect prefetch read: status=1 response=0x7fafef335138 my=128 receive=128 ``` most likely caused by changing `neon.readahead_buffer_size` ## Summary of changes 1. Copy shard state 2. Do not use prefetch_set_unused in readahead_buffer_resize 3. Change prefetch buffer overflow criteria --------- Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
This commit is contained in:
committed by
GitHub
parent
fae8e7ba76
commit
aad809b048
@@ -439,6 +439,8 @@ readahead_buffer_resize(int newsize, void *extra)
|
||||
newPState->ring_unused = newsize;
|
||||
newPState->ring_receive = newsize;
|
||||
newPState->ring_flush = newsize;
|
||||
newPState->max_shard_no = MyPState->max_shard_no;
|
||||
memcpy(newPState->shard_bitmap, MyPState->shard_bitmap, sizeof(MyPState->shard_bitmap));
|
||||
|
||||
/*
|
||||
* Copy over the prefetches.
|
||||
@@ -495,7 +497,11 @@ readahead_buffer_resize(int newsize, void *extra)
|
||||
|
||||
for (; end >= MyPState->ring_last && end != UINT64_MAX; end -= 1)
|
||||
{
|
||||
prefetch_set_unused(end);
|
||||
PrefetchRequest *slot = GetPrfSlot(end);
|
||||
if (slot->status == PRFS_RECEIVED)
|
||||
{
|
||||
pfree(slot->response);
|
||||
}
|
||||
}
|
||||
|
||||
prfh_destroy(MyPState->prf_hash);
|
||||
@@ -944,6 +950,9 @@ Retry:
|
||||
Assert(entry == NULL);
|
||||
Assert(slot == NULL);
|
||||
|
||||
/* There should be no buffer overflow */
|
||||
Assert(MyPState->ring_last + readahead_buffer_size >= MyPState->ring_unused);
|
||||
|
||||
/*
|
||||
* If the prefetch queue is full, we need to make room by clearing the
|
||||
* oldest slot. If the oldest slot holds a buffer that was already
|
||||
@@ -958,7 +967,7 @@ Retry:
|
||||
* a prefetch request kind of goes against the principles of
|
||||
* prefetching)
|
||||
*/
|
||||
if (MyPState->ring_last + readahead_buffer_size - 1 == MyPState->ring_unused)
|
||||
if (MyPState->ring_last + readahead_buffer_size == MyPState->ring_unused)
|
||||
{
|
||||
uint64 cleanup_index = MyPState->ring_last;
|
||||
|
||||
|
||||
40
test_runner/regress/test_prefetch_buffer_resize.py
Normal file
40
test_runner/regress/test_prefetch_buffer_resize.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
@pytest.mark.parametrize("shard_count", [None, 4])
|
||||
@pytest.mark.timeout(600)
|
||||
def test_prefetch(neon_env_builder: NeonEnvBuilder, shard_count: int | None):
|
||||
if shard_count is not None:
|
||||
neon_env_builder.num_pageservers = shard_count
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_shard_count=shard_count,
|
||||
)
|
||||
n_iter = 10
|
||||
n_rec = 100000
|
||||
|
||||
endpoint = env.endpoints.create_start(
|
||||
"main",
|
||||
config_lines=[
|
||||
"shared_buffers=10MB",
|
||||
],
|
||||
)
|
||||
|
||||
cur = endpoint.connect().cursor()
|
||||
|
||||
cur.execute("CREATE TABLE t(pk integer, filler text default repeat('?', 200))")
|
||||
cur.execute(f"insert into t (pk) values (generate_series(1,{n_rec}))")
|
||||
|
||||
cur.execute("set statement_timeout=0")
|
||||
cur.execute("set effective_io_concurrency=20")
|
||||
cur.execute("set max_parallel_workers_per_gather=0")
|
||||
|
||||
for _ in range(n_iter):
|
||||
buf_size = random.randrange(16, 32)
|
||||
cur.execute(f"set neon.readahead_buffer_size={buf_size}")
|
||||
limit = random.randrange(1, n_rec)
|
||||
cur.execute(f"select sum(pk) from (select pk from t limit {limit}) s")
|
||||
Reference in New Issue
Block a user