Fix issues with prefetch ring buffer resize (#9847)

## Problem

See https://neondb.slack.com/archives/C04DGM6SMTM/p1732110190129479


We observe the following error in the logs 
```
[XX000] ERROR: [NEON_SMGR] [shard 3] Incorrect prefetch read: status=1 response=0x7fafef335138 my=128 receive=128
```
most likely caused by changing `neon.readahead_buffer_size`

## Summary of changes

1. Copy shard state
2. Do not use prefetch_set_unused in readahead_buffer_resize
3. Change prefetch buffer overflow criteria

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
This commit is contained in:
Konstantin Knizhnik
2024-12-01 17:47:28 +02:00
committed by GitHub
parent fae8e7ba76
commit aad809b048
2 changed files with 51 additions and 2 deletions

View File

@@ -439,6 +439,8 @@ readahead_buffer_resize(int newsize, void *extra)
newPState->ring_unused = newsize;
newPState->ring_receive = newsize;
newPState->ring_flush = newsize;
newPState->max_shard_no = MyPState->max_shard_no;
memcpy(newPState->shard_bitmap, MyPState->shard_bitmap, sizeof(MyPState->shard_bitmap));
/*
* Copy over the prefetches.
@@ -495,7 +497,11 @@ readahead_buffer_resize(int newsize, void *extra)
for (; end >= MyPState->ring_last && end != UINT64_MAX; end -= 1)
{
prefetch_set_unused(end);
PrefetchRequest *slot = GetPrfSlot(end);
if (slot->status == PRFS_RECEIVED)
{
pfree(slot->response);
}
}
prfh_destroy(MyPState->prf_hash);
@@ -944,6 +950,9 @@ Retry:
Assert(entry == NULL);
Assert(slot == NULL);
/* There should be no buffer overflow */
Assert(MyPState->ring_last + readahead_buffer_size >= MyPState->ring_unused);
/*
* If the prefetch queue is full, we need to make room by clearing the
* oldest slot. If the oldest slot holds a buffer that was already
@@ -958,7 +967,7 @@ Retry:
* a prefetch request kind of goes against the principles of
* prefetching)
*/
if (MyPState->ring_last + readahead_buffer_size - 1 == MyPState->ring_unused)
if (MyPState->ring_last + readahead_buffer_size == MyPState->ring_unused)
{
uint64 cleanup_index = MyPState->ring_last;

View File

@@ -0,0 +1,40 @@
from __future__ import annotations
import random
import pytest
from fixtures.neon_fixtures import NeonEnvBuilder
@pytest.mark.parametrize("shard_count", [None, 4])
@pytest.mark.timeout(600)
def test_prefetch(neon_env_builder: NeonEnvBuilder, shard_count: int | None):
if shard_count is not None:
neon_env_builder.num_pageservers = shard_count
env = neon_env_builder.init_start(
initial_tenant_shard_count=shard_count,
)
n_iter = 10
n_rec = 100000
endpoint = env.endpoints.create_start(
"main",
config_lines=[
"shared_buffers=10MB",
],
)
cur = endpoint.connect().cursor()
cur.execute("CREATE TABLE t(pk integer, filler text default repeat('?', 200))")
cur.execute(f"insert into t (pk) values (generate_series(1,{n_rec}))")
cur.execute("set statement_timeout=0")
cur.execute("set effective_io_concurrency=20")
cur.execute("set max_parallel_workers_per_gather=0")
for _ in range(n_iter):
buf_size = random.randrange(16, 32)
cur.execute(f"set neon.readahead_buffer_size={buf_size}")
limit = random.randrange(1, n_rec)
cur.execute(f"select sum(pk) from (select pk from t limit {limit}) s")