mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-09 05:30:37 +00:00
Compare commits
7 Commits
knz/vpc_ca
...
test-pull-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bb45db3982 | ||
|
|
597125e124 | ||
|
|
e71d20d392 | ||
|
|
aa0554fd1e | ||
|
|
b853f78136 | ||
|
|
6ad99826c1 | ||
|
|
311ee793b9 |
@@ -810,7 +810,7 @@ impl Endpoint {
|
||||
}
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.timeout(Duration::from_secs(120))
|
||||
.build()
|
||||
.unwrap();
|
||||
let response = client
|
||||
|
||||
@@ -4506,7 +4506,12 @@ impl Tenant {
|
||||
// - this timeline was created while we were finding cutoffs
|
||||
// - lsn for timestamp search fails for this timeline repeatedly
|
||||
if let Some(cutoffs) = gc_cutoffs.get(&timeline.timeline_id) {
|
||||
target.cutoffs = cutoffs.clone();
|
||||
let original_cutoffs = target.cutoffs.clone();
|
||||
// GC cutoffs should never go back
|
||||
target.cutoffs = GcCutoffs {
|
||||
space: Lsn(cutoffs.space.0.max(original_cutoffs.space.0)),
|
||||
time: Lsn(cutoffs.time.0.max(original_cutoffs.time.0)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "libpq/pqformat.h"
|
||||
#include "miscadmin.h"
|
||||
#include "pgstat.h"
|
||||
#include "portability/instr_time.h"
|
||||
#include "postmaster/interrupt.h"
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/ipc.h"
|
||||
@@ -118,6 +119,11 @@ typedef struct
|
||||
*/
|
||||
PSConnectionState state;
|
||||
PGconn *conn;
|
||||
|
||||
/* request / response counters for debugging */
|
||||
uint64 nrequests_sent;
|
||||
uint64 nresponses_received;
|
||||
|
||||
/*---
|
||||
* WaitEventSet containing:
|
||||
* - WL_SOCKET_READABLE on 'conn'
|
||||
@@ -628,6 +634,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
|
||||
}
|
||||
|
||||
shard->state = PS_Connected;
|
||||
shard->nrequests_sent = 0;
|
||||
shard->nresponses_received = 0;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
case PS_Connected:
|
||||
@@ -656,6 +664,27 @@ call_PQgetCopyData(shardno_t shard_no, char **buffer)
|
||||
int ret;
|
||||
PageServer *shard = &page_servers[shard_no];
|
||||
PGconn *pageserver_conn = shard->conn;
|
||||
instr_time now,
|
||||
start_ts,
|
||||
since_start,
|
||||
last_log_ts,
|
||||
since_last_log;
|
||||
bool logged = false;
|
||||
|
||||
/*
|
||||
* As a debugging aid, if we don't get a response for a long time, print a
|
||||
* log message.
|
||||
*
|
||||
* 10 s is a very generous threshold, normally we expect a response in a
|
||||
* few milliseconds. We have metrics to track latencies in normal ranges,
|
||||
* but in the cases that take exceptionally long, it's useful to log the
|
||||
* exact timestamps.
|
||||
*/
|
||||
#define LOG_INTERVAL_US UINT64CONST(10 * 1000000)
|
||||
|
||||
INSTR_TIME_SET_CURRENT(now);
|
||||
start_ts = last_log_ts = now;
|
||||
INSTR_TIME_SET_ZERO(since_last_log);
|
||||
|
||||
retry:
|
||||
ret = PQgetCopyData(pageserver_conn, buffer, 1 /* async */ );
|
||||
@@ -663,9 +692,12 @@ retry:
|
||||
if (ret == 0)
|
||||
{
|
||||
WaitEvent event;
|
||||
long timeout;
|
||||
|
||||
timeout = Min(0, LOG_INTERVAL_US - INSTR_TIME_GET_MICROSEC(since_last_log));
|
||||
|
||||
/* Sleep until there's something to do */
|
||||
(void) WaitEventSetWait(shard->wes_read, -1L, &event, 1,
|
||||
(void) WaitEventSetWait(shard->wes_read, timeout, &event, 1,
|
||||
WAIT_EVENT_NEON_PS_READ);
|
||||
ResetLatch(MyLatch);
|
||||
|
||||
@@ -684,9 +716,40 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Print a message to the log if a long time has passed with no
|
||||
* response.
|
||||
*/
|
||||
INSTR_TIME_SET_CURRENT(now);
|
||||
since_last_log = now;
|
||||
INSTR_TIME_SUBTRACT(since_last_log, last_log_ts);
|
||||
if (INSTR_TIME_GET_MICROSEC(since_last_log) >= LOG_INTERVAL_US)
|
||||
{
|
||||
since_start = now;
|
||||
INSTR_TIME_SUBTRACT(since_start, start_ts);
|
||||
neon_shard_log(shard_no, LOG, "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses)",
|
||||
INSTR_TIME_GET_DOUBLE(since_start),
|
||||
shard->nrequests_sent, shard->nresponses_received);
|
||||
last_log_ts = now;
|
||||
logged = true;
|
||||
}
|
||||
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we logged earlier that the response is taking a long time, log
|
||||
* another message when the response is finally received.
|
||||
*/
|
||||
if (logged)
|
||||
{
|
||||
INSTR_TIME_SET_CURRENT(now);
|
||||
since_start = now;
|
||||
INSTR_TIME_SUBTRACT(since_start, start_ts);
|
||||
neon_shard_log(shard_no, LOG, "received response from pageserver after %0.3f s",
|
||||
INSTR_TIME_GET_DOUBLE(since_start));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -786,6 +849,7 @@ pageserver_send(shardno_t shard_no, NeonRequest *request)
|
||||
* PGRES_POLLING_WRITING state. It's kinda dirty to disconnect at this
|
||||
* point, but on the grand scheme of things it's only a small issue.
|
||||
*/
|
||||
shard->nrequests_sent++;
|
||||
if (PQputCopyData(pageserver_conn, req_buff.data, req_buff.len) <= 0)
|
||||
{
|
||||
char *msg = pchomp(PQerrorMessage(pageserver_conn));
|
||||
@@ -878,6 +942,7 @@ pageserver_receive(shardno_t shard_no)
|
||||
neon_shard_log(shard_no, ERROR, "pageserver_receive disconnect: unexpected PQgetCopyData return value: %d", rc);
|
||||
}
|
||||
|
||||
shard->nresponses_received++;
|
||||
return (NeonResponse *) resp;
|
||||
}
|
||||
|
||||
|
||||
@@ -423,7 +423,11 @@ readahead_buffer_resize(int newsize, void *extra)
|
||||
* ensuring we have received all but the last n requests (n = newsize).
|
||||
*/
|
||||
if (MyPState->n_requests_inflight > newsize)
|
||||
prefetch_wait_for(MyPState->ring_unused - newsize);
|
||||
{
|
||||
Assert(MyPState->ring_unused >= MyPState->n_requests_inflight - newsize);
|
||||
prefetch_wait_for(MyPState->ring_unused - (MyPState->n_requests_inflight - newsize));
|
||||
Assert(MyPState->n_requests_inflight <= newsize);
|
||||
}
|
||||
|
||||
/* construct the new PrefetchState, and copy over the memory contexts */
|
||||
newPState = MemoryContextAllocZero(TopMemoryContext, newprfs_size);
|
||||
@@ -438,7 +442,6 @@ readahead_buffer_resize(int newsize, void *extra)
|
||||
newPState->ring_last = newsize;
|
||||
newPState->ring_unused = newsize;
|
||||
newPState->ring_receive = newsize;
|
||||
newPState->ring_flush = newsize;
|
||||
newPState->max_shard_no = MyPState->max_shard_no;
|
||||
memcpy(newPState->shard_bitmap, MyPState->shard_bitmap, sizeof(MyPState->shard_bitmap));
|
||||
|
||||
@@ -489,6 +492,7 @@ readahead_buffer_resize(int newsize, void *extra)
|
||||
}
|
||||
newPState->n_unused -= 1;
|
||||
}
|
||||
newPState->ring_flush = newPState->ring_receive;
|
||||
|
||||
MyNeonCounters->getpage_prefetches_buffered =
|
||||
MyPState->n_responses_buffered;
|
||||
@@ -498,6 +502,7 @@ readahead_buffer_resize(int newsize, void *extra)
|
||||
for (; end >= MyPState->ring_last && end != UINT64_MAX; end -= 1)
|
||||
{
|
||||
PrefetchRequest *slot = GetPrfSlot(end);
|
||||
Assert(slot->status != PRFS_REQUESTED);
|
||||
if (slot->status == PRFS_RECEIVED)
|
||||
{
|
||||
pfree(slot->response);
|
||||
|
||||
@@ -86,6 +86,8 @@ enum Command {
|
||||
/// For safekeeper node_kind only, json list of timelines and their lsn info
|
||||
#[arg(long, default_value = None)]
|
||||
timeline_lsns: Option<String>,
|
||||
#[arg(long, default_value_t = false)]
|
||||
verbose: bool,
|
||||
},
|
||||
TenantSnapshot {
|
||||
#[arg(long = "tenant-id")]
|
||||
@@ -166,6 +168,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
dump_db_connstr,
|
||||
dump_db_table,
|
||||
timeline_lsns,
|
||||
verbose,
|
||||
} => {
|
||||
if let NodeKind::Safekeeper = node_kind {
|
||||
let db_or_list = match (timeline_lsns, dump_db_connstr) {
|
||||
@@ -203,6 +206,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
tenant_ids,
|
||||
json,
|
||||
post_to_storcon,
|
||||
verbose,
|
||||
cli.exit_code,
|
||||
)
|
||||
.await
|
||||
@@ -313,6 +317,7 @@ pub async fn run_cron_job(
|
||||
Vec::new(),
|
||||
true,
|
||||
post_to_storcon,
|
||||
false, // default to non-verbose mode
|
||||
exit_code,
|
||||
)
|
||||
.await?;
|
||||
@@ -362,12 +367,13 @@ pub async fn scan_pageserver_metadata_cmd(
|
||||
tenant_shard_ids: Vec<TenantShardId>,
|
||||
json: bool,
|
||||
post_to_storcon: bool,
|
||||
verbose: bool,
|
||||
exit_code: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
if controller_client.is_none() && post_to_storcon {
|
||||
return Err(anyhow!("Posting pageserver scan health status to storage controller requires `--controller-api` and `--controller-jwt` to run"));
|
||||
}
|
||||
match scan_pageserver_metadata(bucket_config.clone(), tenant_shard_ids).await {
|
||||
match scan_pageserver_metadata(bucket_config.clone(), tenant_shard_ids, verbose).await {
|
||||
Err(e) => {
|
||||
tracing::error!("Failed: {e}");
|
||||
Err(e)
|
||||
|
||||
@@ -21,8 +21,12 @@ pub struct MetadataSummary {
|
||||
tenant_count: usize,
|
||||
timeline_count: usize,
|
||||
timeline_shard_count: usize,
|
||||
with_errors: HashSet<TenantShardTimelineId>,
|
||||
with_warnings: HashSet<TenantShardTimelineId>,
|
||||
/// Tenant-shard timeline (key) mapping to errors. The key has to be a string because it will be serialized to a JSON.
|
||||
/// The key is generated using `TenantShardTimelineId::to_string()`.
|
||||
with_errors: HashMap<String, Vec<String>>,
|
||||
/// Tenant-shard timeline (key) mapping to warnings. The key has to be a string because it will be serialized to a JSON.
|
||||
/// The key is generated using `TenantShardTimelineId::to_string()`.
|
||||
with_warnings: HashMap<String, Vec<String>>,
|
||||
with_orphans: HashSet<TenantShardTimelineId>,
|
||||
indices_by_version: HashMap<usize, usize>,
|
||||
|
||||
@@ -52,7 +56,12 @@ impl MetadataSummary {
|
||||
}
|
||||
}
|
||||
|
||||
fn update_analysis(&mut self, id: &TenantShardTimelineId, analysis: &TimelineAnalysis) {
|
||||
fn update_analysis(
|
||||
&mut self,
|
||||
id: &TenantShardTimelineId,
|
||||
analysis: &TimelineAnalysis,
|
||||
verbose: bool,
|
||||
) {
|
||||
if analysis.is_healthy() {
|
||||
self.healthy_tenant_shards.insert(id.tenant_shard_id);
|
||||
} else {
|
||||
@@ -61,11 +70,17 @@ impl MetadataSummary {
|
||||
}
|
||||
|
||||
if !analysis.errors.is_empty() {
|
||||
self.with_errors.insert(*id);
|
||||
let entry = self.with_errors.entry(id.to_string()).or_default();
|
||||
if verbose {
|
||||
entry.extend(analysis.errors.iter().cloned());
|
||||
}
|
||||
}
|
||||
|
||||
if !analysis.warnings.is_empty() {
|
||||
self.with_warnings.insert(*id);
|
||||
let entry = self.with_warnings.entry(id.to_string()).or_default();
|
||||
if verbose {
|
||||
entry.extend(analysis.warnings.iter().cloned());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,6 +135,7 @@ Index versions: {version_summary}
|
||||
pub async fn scan_pageserver_metadata(
|
||||
bucket_config: BucketConfig,
|
||||
tenant_ids: Vec<TenantShardId>,
|
||||
verbose: bool,
|
||||
) -> anyhow::Result<MetadataSummary> {
|
||||
let (remote_client, target) = init_remote(bucket_config, NodeKind::Pageserver).await?;
|
||||
|
||||
@@ -164,6 +180,7 @@ pub async fn scan_pageserver_metadata(
|
||||
mut tenant_objects: TenantObjectListing,
|
||||
timelines: Vec<(TenantShardTimelineId, RemoteTimelineBlobData)>,
|
||||
highest_shard_count: ShardCount,
|
||||
verbose: bool,
|
||||
) {
|
||||
summary.tenant_count += 1;
|
||||
|
||||
@@ -203,7 +220,7 @@ pub async fn scan_pageserver_metadata(
|
||||
Some(data),
|
||||
)
|
||||
.await;
|
||||
summary.update_analysis(&ttid, &analysis);
|
||||
summary.update_analysis(&ttid, &analysis, verbose);
|
||||
|
||||
timeline_ids.insert(ttid.timeline_id);
|
||||
} else {
|
||||
@@ -271,10 +288,6 @@ pub async fn scan_pageserver_metadata(
|
||||
summary.update_data(&data);
|
||||
|
||||
match tenant_id {
|
||||
None => {
|
||||
tenant_id = Some(ttid.tenant_shard_id.tenant_id);
|
||||
highest_shard_count = highest_shard_count.max(ttid.tenant_shard_id.shard_count);
|
||||
}
|
||||
Some(prev_tenant_id) => {
|
||||
if prev_tenant_id != ttid.tenant_shard_id.tenant_id {
|
||||
// New tenant: analyze this tenant's timelines, clear accumulated tenant_timeline_results
|
||||
@@ -287,6 +300,7 @@ pub async fn scan_pageserver_metadata(
|
||||
tenant_objects,
|
||||
timelines,
|
||||
highest_shard_count,
|
||||
verbose,
|
||||
)
|
||||
.instrument(info_span!("analyze-tenant", tenant = %prev_tenant_id))
|
||||
.await;
|
||||
@@ -296,6 +310,10 @@ pub async fn scan_pageserver_metadata(
|
||||
highest_shard_count = highest_shard_count.max(ttid.tenant_shard_id.shard_count);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
tenant_id = Some(ttid.tenant_shard_id.tenant_id);
|
||||
highest_shard_count = highest_shard_count.max(ttid.tenant_shard_id.shard_count);
|
||||
}
|
||||
}
|
||||
|
||||
match &data.blob_data {
|
||||
@@ -326,6 +344,7 @@ pub async fn scan_pageserver_metadata(
|
||||
tenant_objects,
|
||||
tenant_timeline_results,
|
||||
highest_shard_count,
|
||||
verbose,
|
||||
)
|
||||
.instrument(info_span!("analyze-tenant", tenant = %tenant_id))
|
||||
.await;
|
||||
|
||||
@@ -4556,6 +4556,7 @@ class StorageScrubber:
|
||||
def __init__(self, env: NeonEnv, log_dir: Path):
|
||||
self.env = env
|
||||
self.log_dir = log_dir
|
||||
self.allowed_errors: list[str] = []
|
||||
|
||||
def scrubber_cli(
|
||||
self, args: list[str], timeout, extra_env: dict[str, str] | None = None
|
||||
@@ -4633,19 +4634,70 @@ class StorageScrubber:
|
||||
if timeline_lsns is not None:
|
||||
args.append("--timeline-lsns")
|
||||
args.append(json.dumps(timeline_lsns))
|
||||
if node_kind == NodeKind.PAGESERVER:
|
||||
args.append("--verbose")
|
||||
stdout = self.scrubber_cli(args, timeout=30, extra_env=extra_env)
|
||||
|
||||
try:
|
||||
summary = json.loads(stdout)
|
||||
# summary does not contain "with_warnings" if node_kind is the safekeeper
|
||||
no_warnings = "with_warnings" not in summary or not summary["with_warnings"]
|
||||
healthy = not summary["with_errors"] and no_warnings
|
||||
healthy = self._check_run_healthy(summary)
|
||||
return healthy, summary
|
||||
except:
|
||||
log.error("Failed to decode JSON output from `scan-metadata`. Dumping stdout:")
|
||||
log.error(stdout)
|
||||
raise
|
||||
|
||||
def _check_line_allowed(self, line: str) -> bool:
|
||||
for a in self.allowed_errors:
|
||||
try:
|
||||
if re.match(a, line):
|
||||
return True
|
||||
except re.error:
|
||||
log.error(f"Invalid regex: '{a}'")
|
||||
raise
|
||||
return False
|
||||
|
||||
def _check_line_list_allowed(self, lines: list[str]) -> bool:
|
||||
for line in lines:
|
||||
if not self._check_line_allowed(line):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _check_run_healthy(self, summary: dict[str, Any]) -> bool:
|
||||
# summary does not contain "with_warnings" if node_kind is the safekeeper
|
||||
healthy = True
|
||||
with_warnings = summary.get("with_warnings", None)
|
||||
if with_warnings is not None:
|
||||
if isinstance(with_warnings, list):
|
||||
if len(with_warnings) > 0:
|
||||
# safekeeper scan_metadata output is a list of tenants
|
||||
healthy = False
|
||||
else:
|
||||
for _, warnings in with_warnings.items():
|
||||
assert (
|
||||
len(warnings) > 0
|
||||
), "with_warnings value should not be empty, running without verbose mode?"
|
||||
if not self._check_line_list_allowed(warnings):
|
||||
healthy = False
|
||||
break
|
||||
if not healthy:
|
||||
return healthy
|
||||
with_errors = summary.get("with_errors", None)
|
||||
if with_errors is not None:
|
||||
if isinstance(with_errors, list):
|
||||
if len(with_errors) > 0:
|
||||
# safekeeper scan_metadata output is a list of tenants
|
||||
healthy = False
|
||||
else:
|
||||
for _, errors in with_errors.items():
|
||||
assert (
|
||||
len(errors) > 0
|
||||
), "with_errors value should not be empty, running without verbose mode?"
|
||||
if not self._check_line_list_allowed(errors):
|
||||
healthy = False
|
||||
break
|
||||
return healthy
|
||||
|
||||
def tenant_snapshot(self, tenant_id: TenantId, output_path: Path):
|
||||
stdout = self.scrubber_cli(
|
||||
["tenant-snapshot", "--tenant-id", str(tenant_id), "--output-path", str(output_path)],
|
||||
|
||||
124
test_runner/regress/test_nbtree_pagesplit_cycleid.py
Normal file
124
test_runner/regress/test_nbtree_pagesplit_cycleid.py
Normal file
@@ -0,0 +1,124 @@
|
||||
import threading
|
||||
import time
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
|
||||
BTREE_NUM_CYCLEID_PAGES = """
|
||||
WITH raw_pages AS (
|
||||
SELECT blkno, get_raw_page_at_lsn('t_uidx', 'main', blkno, NULL, NULL) page
|
||||
FROM generate_series(1, pg_relation_size('t_uidx'::regclass) / 8192) blkno
|
||||
),
|
||||
parsed_pages AS (
|
||||
/* cycle ID is the last 2 bytes of the btree page */
|
||||
SELECT blkno, SUBSTRING(page FROM 8191 FOR 2) as cycle_id
|
||||
FROM raw_pages
|
||||
)
|
||||
SELECT count(*),
|
||||
encode(cycle_id, 'hex')
|
||||
FROM parsed_pages
|
||||
WHERE encode(cycle_id, 'hex') != '0000'
|
||||
GROUP BY encode(cycle_id, 'hex');
|
||||
"""
|
||||
|
||||
|
||||
def test_nbtree_pagesplit_cycleid(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
ses1 = endpoint.connect().cursor()
|
||||
ses1.execute("ALTER SYSTEM SET autovacuum = off;")
|
||||
ses1.execute("ALTER SYSTEM SET enable_seqscan = off;")
|
||||
ses1.execute("ALTER SYSTEM SET full_page_writes = off;")
|
||||
ses1.execute("SELECT pg_reload_conf();")
|
||||
ses1.execute("CREATE EXTENSION neon_test_utils;")
|
||||
# prepare a large index
|
||||
ses1.execute("CREATE TABLE t(id integer GENERATED ALWAYS AS IDENTITY, txt text);")
|
||||
ses1.execute("CREATE UNIQUE INDEX t_uidx ON t(id);")
|
||||
ses1.execute("INSERT INTO t (txt) SELECT i::text FROM generate_series(1, 2035) i;")
|
||||
|
||||
ses1.execute("SELECT neon_xlogflush();")
|
||||
ses1.execute(BTREE_NUM_CYCLEID_PAGES)
|
||||
pages = ses1.fetchall()
|
||||
assert (
|
||||
len(pages) == 0
|
||||
), f"0 back splits with cycle ID expected, real {len(pages)} first {pages[0]}"
|
||||
# Delete enough tuples to clear the first index page.
|
||||
# (there are up to 407 rows per 8KiB page; 406 for non-rightmost leafs.
|
||||
ses1.execute("DELETE FROM t WHERE id <= 406;")
|
||||
# Make sure the page is cleaned up
|
||||
ses1.execute("VACUUM (FREEZE, INDEX_CLEANUP ON) t;")
|
||||
|
||||
# Do another delete-then-indexcleanup cycle, to move the pages from
|
||||
# "dead" to "reusable"
|
||||
ses1.execute("DELETE FROM t WHERE id <= 446;")
|
||||
ses1.execute("VACUUM (FREEZE, INDEX_CLEANUP ON) t;")
|
||||
|
||||
# Make sure the vacuum we're about to trigger in s3 has cleanup work to do
|
||||
ses1.execute("DELETE FROM t WHERE id <= 610;")
|
||||
|
||||
# Flush wal, for checking purposes
|
||||
ses1.execute("SELECT neon_xlogflush();")
|
||||
ses1.execute(BTREE_NUM_CYCLEID_PAGES)
|
||||
pages = ses1.fetchall()
|
||||
assert len(pages) == 0, f"No back splits with cycle ID expected, got batches of {pages} instead"
|
||||
|
||||
ses2 = endpoint.connect().cursor()
|
||||
ses3 = endpoint.connect().cursor()
|
||||
|
||||
# Session 2 pins a btree page, which prevents vacuum from processing that
|
||||
# page, thus allowing us to reliably split pages while a concurrent vacuum
|
||||
# is running.
|
||||
ses2.execute("BEGIN;")
|
||||
ses2.execute(
|
||||
"DECLARE foo NO SCROLL CURSOR FOR SELECT row_number() over () FROM t ORDER BY id ASC"
|
||||
)
|
||||
ses2.execute("FETCH FROM foo;") # pins the leaf page with id 611
|
||||
wait_evt = threading.Event()
|
||||
|
||||
# Session 3 runs the VACUUM command. Note that this will block, and
|
||||
# therefore must run on another thread.
|
||||
# We rely on this running quickly enough to hit the pinned page from
|
||||
# session 2 by the time we start other work again in session 1, but
|
||||
# technically there is a race where the thread (and/or PostgreSQL process)
|
||||
# don't get to that pinned page with vacuum until >2s after evt.set() was
|
||||
# called, and session 1 thus might already have split pages.
|
||||
def vacuum_freeze_t(ses3, evt: threading.Event):
|
||||
# Begin parallel vacuum that should hit the index
|
||||
evt.set()
|
||||
# this'll hang until s2 fetches enough new data from its cursor.
|
||||
# this is technically a race with the time.sleep(2) below, but if this
|
||||
# command doesn't hit
|
||||
ses3.execute("VACUUM (FREEZE, INDEX_CLEANUP on, DISABLE_PAGE_SKIPPING on) t;")
|
||||
|
||||
ses3t = threading.Thread(target=vacuum_freeze_t, args=(ses3, wait_evt))
|
||||
ses3t.start()
|
||||
wait_evt.wait()
|
||||
# Make extra sure we got the thread started and vacuum is stuck, by waiting
|
||||
# some time even after wait_evt got set. This isn't truly reliable (it is
|
||||
# possible
|
||||
time.sleep(2)
|
||||
|
||||
# Insert 2 pages worth of new data.
|
||||
# This should reuse the one empty page, plus another page at the end of
|
||||
# the index relation; with split ordering
|
||||
# old_blk -> blkno=1 -> old_blk + 1.
|
||||
# As this is run while vacuum in session 3 is happening, these splits
|
||||
# should receive cycle IDs where applicable.
|
||||
ses1.execute("INSERT INTO t (txt) SELECT i::text FROM generate_series(1, 812) i;")
|
||||
# unpin the btree page, allowing s3's vacuum to complete
|
||||
ses2.execute("FETCH ALL FROM foo;")
|
||||
ses2.execute("ROLLBACK;")
|
||||
# flush WAL to make sure PS is up-to-date
|
||||
ses1.execute("SELECT neon_xlogflush();")
|
||||
# check that our expectations are correct
|
||||
ses1.execute(BTREE_NUM_CYCLEID_PAGES)
|
||||
pages = ses1.fetchall()
|
||||
assert (
|
||||
len(pages) == 1 and pages[0][0] == 3
|
||||
), f"3 page splits with cycle ID expected; actual {pages}"
|
||||
|
||||
# final cleanup
|
||||
ses3t.join()
|
||||
ses1.close()
|
||||
ses2.close()
|
||||
ses3.close()
|
||||
@@ -572,4 +572,10 @@ def test_scrubber_scan_pageserver_metadata(
|
||||
unhealthy = env.storage_controller.metadata_health_list_unhealthy()["unhealthy_tenant_shards"]
|
||||
assert len(unhealthy) == 1 and unhealthy[0] == str(tenant_shard_id)
|
||||
|
||||
neon_env_builder.disable_scrub_on_exit()
|
||||
healthy, _ = env.storage_scrubber.scan_metadata()
|
||||
assert not healthy
|
||||
env.storage_scrubber.allowed_errors.append(".*not present in remote storage.*")
|
||||
healthy, _ = env.storage_scrubber.scan_metadata()
|
||||
assert healthy
|
||||
|
||||
neon_env_builder.disable_scrub_on_exit() # We already ran scrubber, no need to do an extra run
|
||||
|
||||
2
vendor/postgres-v14
vendored
2
vendor/postgres-v14
vendored
Submodule vendor/postgres-v14 updated: 373f9decad...13ff324150
2
vendor/postgres-v15
vendored
2
vendor/postgres-v15
vendored
Submodule vendor/postgres-v15 updated: 972e325e62...8736b10c1d
2
vendor/postgres-v16
vendored
2
vendor/postgres-v16
vendored
Submodule vendor/postgres-v16 updated: dff6615a8e...81428621f7
2
vendor/postgres-v17
vendored
2
vendor/postgres-v17
vendored
Submodule vendor/postgres-v17 updated: a10d95be67...01fa3c4866
8
vendor/revisions.json
vendored
8
vendor/revisions.json
vendored
@@ -1,18 +1,18 @@
|
||||
{
|
||||
"v17": [
|
||||
"17.2",
|
||||
"a10d95be67265e0f10a422ba0457f5a7af01de71"
|
||||
"01fa3c48664ca030cfb69bb4a350aa9df4691d88"
|
||||
],
|
||||
"v16": [
|
||||
"16.6",
|
||||
"dff6615a8e48a10bb17a03fa3c00635f1ace7a92"
|
||||
"81428621f7c04aed03671cf80a928e0a36d92505"
|
||||
],
|
||||
"v15": [
|
||||
"15.10",
|
||||
"972e325e62b455957adbbdd8580e31275bb5b8c9"
|
||||
"8736b10c1d93d11b9c0489872dd529c4c0f5338f"
|
||||
],
|
||||
"v14": [
|
||||
"14.15",
|
||||
"373f9decad933d2d46f321231032ae8b0da81acd"
|
||||
"13ff324150fceaac72920e01742addc053db9462"
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user