test(pageserver): use reldirv2 by default in regress tests (#11081)

## Problem

For pg_regress test, we do both v1 and v2; for all the rest, we default
to v2.

part of https://github.com/neondatabase/neon/issues/9516

## Summary of changes

Use reldir v2 across test cases by default.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2025-03-05 16:02:44 -05:00
committed by GitHub
parent 1fe23fe8d2
commit 2de3629b88
5 changed files with 48 additions and 16 deletions

View File

@@ -1964,14 +1964,12 @@ impl DatadirModification<'_> {
.context("deserialize db")?
};
// Add the new relation to the rel directory entry, and write it back
if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {
return Err(RelationError::AlreadyExists);
}
let v2_enabled = self.maybe_enable_rel_size_v2()?;
if v2_enabled {
if rel_dir.rels.contains(&(rel.relnode, rel.forknum)) {
return Err(RelationError::AlreadyExists);
}
let sparse_rel_dir_key =
rel_tag_sparse_key(rel.spcnode, rel.dbnode, rel.relnode, rel.forknum);
// check if the rel_dir_key exists in v2
@@ -2006,6 +2004,10 @@ impl DatadirModification<'_> {
self.pending_directory_entries
.push((DirectoryKind::RelV2, MetricsUpdate::Add(1)));
} else {
// Add the new relation to the rel directory entry, and write it back
if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {
return Err(RelationError::AlreadyExists);
}
if !dbdir_exists {
self.pending_directory_entries
.push((DirectoryKind::Rel, MetricsUpdate::Set(0)))
@@ -2019,6 +2021,7 @@ impl DatadirModification<'_> {
)),
);
}
// Put size
let size_key = rel_size_to_key(rel);
let buf = nblocks.to_le_bytes();
@@ -2141,7 +2144,7 @@ impl DatadirModification<'_> {
// Remove entry from relation size cache
self.tline.remove_cached_rel_size(&rel_tag);
// Delete size entry, as well as all blocks
// Delete size entry, as well as all blocks; this is currently a no-op because we haven't implemented tombstones in storage.
self.delete(rel_key_range(rel_tag));
}
}

View File

@@ -2451,6 +2451,7 @@ impl Tenant {
create_guard,
initdb_lsn,
None,
None,
)
.await
}
@@ -2782,6 +2783,7 @@ impl Tenant {
timeline_create_guard,
initdb_lsn,
None,
None,
)
.await
}
@@ -4869,6 +4871,7 @@ impl Tenant {
timeline_create_guard,
start_lsn + 1,
Some(Arc::clone(src_timeline)),
Some(src_timeline.get_rel_size_v2_status()),
)
.await?;
@@ -5142,6 +5145,7 @@ impl Tenant {
timeline_create_guard,
pgdata_lsn,
None,
None,
)
.await?;
@@ -5220,13 +5224,14 @@ impl Tenant {
create_guard: TimelineCreateGuard,
start_lsn: Lsn,
ancestor: Option<Arc<Timeline>>,
rel_size_v2_status: Option<RelSizeMigration>,
) -> anyhow::Result<UninitializedTimeline<'a>> {
let tenant_shard_id = self.tenant_shard_id;
let resources = self.build_timeline_resources(new_timeline_id);
resources
.remote_client
.init_upload_queue_for_empty_remote(new_metadata)?;
.init_upload_queue_for_empty_remote(new_metadata, rel_size_v2_status.clone())?;
let timeline_struct = self
.create_timeline_struct(
@@ -5238,7 +5243,7 @@ impl Tenant {
CreateTimelineCause::Load,
create_guard.idempotency.clone(),
None,
None,
rel_size_v2_status,
)
.context("Failed to create timeline data structure")?;

View File

@@ -437,9 +437,13 @@ impl RemoteTimelineClient {
/// Initialize the upload queue for the case where the remote storage is empty,
/// i.e., it doesn't have an `IndexPart`.
///
/// `rel_size_v2_status` needs to be carried over during branching, and that's why
/// it's passed in here.
pub fn init_upload_queue_for_empty_remote(
&self,
local_metadata: &TimelineMetadata,
rel_size_v2_status: Option<RelSizeMigration>,
) -> anyhow::Result<()> {
// Set the maximum number of inprogress tasks to the remote storage concurrency. There's
// certainly no point in starting more upload tasks than this.
@@ -449,7 +453,9 @@ impl RemoteTimelineClient {
.as_ref()
.map_or(0, |r| r.concurrency_limit());
let mut upload_queue = self.upload_queue.lock().unwrap();
upload_queue.initialize_empty_remote(local_metadata, inprogress_limit)?;
let initialized_queue =
upload_queue.initialize_empty_remote(local_metadata, inprogress_limit)?;
initialized_queue.dirty.rel_size_migration = rel_size_v2_status;
self.update_remote_physical_size_gauge(None);
info!("initialized upload queue as empty");
Ok(())

View File

@@ -1197,6 +1197,9 @@ class NeonEnv:
config.pageserver_default_tenant_config_compaction_algorithm
)
tenant_config = ps_cfg.setdefault("tenant_config", {})
tenant_config["rel_size_v2_enabled"] = True # Enable relsize_v2 by default in tests
if self.pageserver_remote_storage is not None:
ps_cfg["remote_storage"] = remote_storage_to_toml_dict(
self.pageserver_remote_storage

View File

@@ -5,7 +5,7 @@ from __future__ import annotations
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import TYPE_CHECKING, cast
from typing import TYPE_CHECKING, Any, cast
import pytest
from fixtures.log_helper import log
@@ -118,10 +118,20 @@ def post_checks(env: NeonEnv, test_output_dir: Path, db_name: str, endpoint: End
pageserver.http_client().timeline_gc(shard, env.initial_timeline, None)
def patch_tenant_conf(tenant_conf: dict[str, Any], reldir_type: str) -> dict[str, Any]:
tenant_conf = tenant_conf.copy()
if reldir_type == "v2":
tenant_conf["rel_size_v2_enabled"] = "true"
else:
tenant_conf["rel_size_v2_enabled"] = "false"
return tenant_conf
# Run the main PostgreSQL regression tests, in src/test/regress.
#
@pytest.mark.timeout(3000) # Contains many sub-tests, is slow in debug builds
@pytest.mark.parametrize("shard_count", [None, 4])
@pytest.mark.parametrize("reldir_type", ["v1", "v2"])
def test_pg_regress(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
@@ -130,6 +140,7 @@ def test_pg_regress(
base_dir: Path,
pg_distrib_dir: Path,
shard_count: int | None,
reldir_type: str,
):
DBNAME = "regression"
@@ -142,7 +153,7 @@ def test_pg_regress(
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
env = neon_env_builder.init_start(
initial_tenant_conf=TENANT_CONF,
initial_tenant_conf=patch_tenant_conf(TENANT_CONF, reldir_type),
initial_tenant_shard_count=shard_count,
)
@@ -196,6 +207,7 @@ def test_pg_regress(
#
@pytest.mark.timeout(1500) # Contains many sub-tests, is slow in debug builds
@pytest.mark.parametrize("shard_count", [None, 4])
@pytest.mark.parametrize("reldir_type", ["v1", "v2"])
def test_isolation(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
@@ -204,6 +216,7 @@ def test_isolation(
base_dir: Path,
pg_distrib_dir: Path,
shard_count: int | None,
reldir_type: str,
):
DBNAME = "isolation_regression"
@@ -211,7 +224,8 @@ def test_isolation(
neon_env_builder.num_pageservers = shard_count
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
env = neon_env_builder.init_start(
initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count
initial_tenant_conf=patch_tenant_conf(TENANT_CONF, reldir_type),
initial_tenant_shard_count=shard_count,
)
# Connect to postgres and create a database called "regression".
@@ -267,6 +281,7 @@ def test_isolation(
# Run extra Neon-specific pg_regress-based tests. The tests and their
# schedule file are in the sql_regress/ directory.
@pytest.mark.parametrize("shard_count", [None, 4])
@pytest.mark.parametrize("reldir_type", ["v1", "v2"])
def test_sql_regress(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
@@ -275,6 +290,7 @@ def test_sql_regress(
base_dir: Path,
pg_distrib_dir: Path,
shard_count: int | None,
reldir_type: str,
):
DBNAME = "regression"
@@ -282,7 +298,8 @@ def test_sql_regress(
neon_env_builder.num_pageservers = shard_count
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
env = neon_env_builder.init_start(
initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count
initial_tenant_conf=patch_tenant_conf(TENANT_CONF, reldir_type),
initial_tenant_shard_count=shard_count,
)
# Connect to postgres and create a database called "regression".
@@ -345,9 +362,7 @@ def test_tx_abort_with_many_relations(
"""
env = neon_env_builder.init_start(
initial_tenant_conf={
"rel_size_v2_enabled": "true" if reldir_type == "v2" else "false",
}
initial_tenant_conf=patch_tenant_conf({}, reldir_type),
)
ep = env.endpoints.create_start(
"main",