diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index e663060d17..8bcc6d58ec 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1964,14 +1964,12 @@ impl DatadirModification<'_> { .context("deserialize db")? }; - // Add the new relation to the rel directory entry, and write it back - if !rel_dir.rels.insert((rel.relnode, rel.forknum)) { - return Err(RelationError::AlreadyExists); - } - let v2_enabled = self.maybe_enable_rel_size_v2()?; if v2_enabled { + if rel_dir.rels.contains(&(rel.relnode, rel.forknum)) { + return Err(RelationError::AlreadyExists); + } let sparse_rel_dir_key = rel_tag_sparse_key(rel.spcnode, rel.dbnode, rel.relnode, rel.forknum); // check if the rel_dir_key exists in v2 @@ -2006,6 +2004,10 @@ impl DatadirModification<'_> { self.pending_directory_entries .push((DirectoryKind::RelV2, MetricsUpdate::Add(1))); } else { + // Add the new relation to the rel directory entry, and write it back + if !rel_dir.rels.insert((rel.relnode, rel.forknum)) { + return Err(RelationError::AlreadyExists); + } if !dbdir_exists { self.pending_directory_entries .push((DirectoryKind::Rel, MetricsUpdate::Set(0))) @@ -2019,6 +2021,7 @@ impl DatadirModification<'_> { )), ); } + // Put size let size_key = rel_size_to_key(rel); let buf = nblocks.to_le_bytes(); @@ -2141,7 +2144,7 @@ impl DatadirModification<'_> { // Remove entry from relation size cache self.tline.remove_cached_rel_size(&rel_tag); - // Delete size entry, as well as all blocks + // Delete size entry, as well as all blocks; this is currently a no-op because we haven't implemented tombstones in storage. self.delete(rel_key_range(rel_tag)); } } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 3694381078..c78d15c9b5 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -2451,6 +2451,7 @@ impl Tenant { create_guard, initdb_lsn, None, + None, ) .await } @@ -2782,6 +2783,7 @@ impl Tenant { timeline_create_guard, initdb_lsn, None, + None, ) .await } @@ -4869,6 +4871,7 @@ impl Tenant { timeline_create_guard, start_lsn + 1, Some(Arc::clone(src_timeline)), + Some(src_timeline.get_rel_size_v2_status()), ) .await?; @@ -5142,6 +5145,7 @@ impl Tenant { timeline_create_guard, pgdata_lsn, None, + None, ) .await?; @@ -5220,13 +5224,14 @@ impl Tenant { create_guard: TimelineCreateGuard, start_lsn: Lsn, ancestor: Option>, + rel_size_v2_status: Option, ) -> anyhow::Result> { let tenant_shard_id = self.tenant_shard_id; let resources = self.build_timeline_resources(new_timeline_id); resources .remote_client - .init_upload_queue_for_empty_remote(new_metadata)?; + .init_upload_queue_for_empty_remote(new_metadata, rel_size_v2_status.clone())?; let timeline_struct = self .create_timeline_struct( @@ -5238,7 +5243,7 @@ impl Tenant { CreateTimelineCause::Load, create_guard.idempotency.clone(), None, - None, + rel_size_v2_status, ) .context("Failed to create timeline data structure")?; diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 2ca482ca43..a784a05972 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -437,9 +437,13 @@ impl RemoteTimelineClient { /// Initialize the upload queue for the case where the remote storage is empty, /// i.e., it doesn't have an `IndexPart`. + /// + /// `rel_size_v2_status` needs to be carried over during branching, and that's why + /// it's passed in here. pub fn init_upload_queue_for_empty_remote( &self, local_metadata: &TimelineMetadata, + rel_size_v2_status: Option, ) -> anyhow::Result<()> { // Set the maximum number of inprogress tasks to the remote storage concurrency. There's // certainly no point in starting more upload tasks than this. @@ -449,7 +453,9 @@ impl RemoteTimelineClient { .as_ref() .map_or(0, |r| r.concurrency_limit()); let mut upload_queue = self.upload_queue.lock().unwrap(); - upload_queue.initialize_empty_remote(local_metadata, inprogress_limit)?; + let initialized_queue = + upload_queue.initialize_empty_remote(local_metadata, inprogress_limit)?; + initialized_queue.dirty.rel_size_migration = rel_size_v2_status; self.update_remote_physical_size_gauge(None); info!("initialized upload queue as empty"); Ok(()) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index ef9d8cb46f..8e3277a34a 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1197,6 +1197,9 @@ class NeonEnv: config.pageserver_default_tenant_config_compaction_algorithm ) + tenant_config = ps_cfg.setdefault("tenant_config", {}) + tenant_config["rel_size_v2_enabled"] = True # Enable relsize_v2 by default in tests + if self.pageserver_remote_storage is not None: ps_cfg["remote_storage"] = remote_storage_to_toml_dict( self.pageserver_remote_storage diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py index d2a78b16e4..1d9f385358 100644 --- a/test_runner/regress/test_pg_regress.py +++ b/test_runner/regress/test_pg_regress.py @@ -5,7 +5,7 @@ from __future__ import annotations from concurrent.futures import ThreadPoolExecutor from pathlib import Path -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING, Any, cast import pytest from fixtures.log_helper import log @@ -118,10 +118,20 @@ def post_checks(env: NeonEnv, test_output_dir: Path, db_name: str, endpoint: End pageserver.http_client().timeline_gc(shard, env.initial_timeline, None) +def patch_tenant_conf(tenant_conf: dict[str, Any], reldir_type: str) -> dict[str, Any]: + tenant_conf = tenant_conf.copy() + if reldir_type == "v2": + tenant_conf["rel_size_v2_enabled"] = "true" + else: + tenant_conf["rel_size_v2_enabled"] = "false" + return tenant_conf + + # Run the main PostgreSQL regression tests, in src/test/regress. # @pytest.mark.timeout(3000) # Contains many sub-tests, is slow in debug builds @pytest.mark.parametrize("shard_count", [None, 4]) +@pytest.mark.parametrize("reldir_type", ["v1", "v2"]) def test_pg_regress( neon_env_builder: NeonEnvBuilder, test_output_dir: Path, @@ -130,6 +140,7 @@ def test_pg_regress( base_dir: Path, pg_distrib_dir: Path, shard_count: int | None, + reldir_type: str, ): DBNAME = "regression" @@ -142,7 +153,7 @@ def test_pg_regress( neon_env_builder.enable_pageserver_remote_storage(s3_storage()) env = neon_env_builder.init_start( - initial_tenant_conf=TENANT_CONF, + initial_tenant_conf=patch_tenant_conf(TENANT_CONF, reldir_type), initial_tenant_shard_count=shard_count, ) @@ -196,6 +207,7 @@ def test_pg_regress( # @pytest.mark.timeout(1500) # Contains many sub-tests, is slow in debug builds @pytest.mark.parametrize("shard_count", [None, 4]) +@pytest.mark.parametrize("reldir_type", ["v1", "v2"]) def test_isolation( neon_env_builder: NeonEnvBuilder, test_output_dir: Path, @@ -204,6 +216,7 @@ def test_isolation( base_dir: Path, pg_distrib_dir: Path, shard_count: int | None, + reldir_type: str, ): DBNAME = "isolation_regression" @@ -211,7 +224,8 @@ def test_isolation( neon_env_builder.num_pageservers = shard_count neon_env_builder.enable_pageserver_remote_storage(s3_storage()) env = neon_env_builder.init_start( - initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count + initial_tenant_conf=patch_tenant_conf(TENANT_CONF, reldir_type), + initial_tenant_shard_count=shard_count, ) # Connect to postgres and create a database called "regression". @@ -267,6 +281,7 @@ def test_isolation( # Run extra Neon-specific pg_regress-based tests. The tests and their # schedule file are in the sql_regress/ directory. @pytest.mark.parametrize("shard_count", [None, 4]) +@pytest.mark.parametrize("reldir_type", ["v1", "v2"]) def test_sql_regress( neon_env_builder: NeonEnvBuilder, test_output_dir: Path, @@ -275,6 +290,7 @@ def test_sql_regress( base_dir: Path, pg_distrib_dir: Path, shard_count: int | None, + reldir_type: str, ): DBNAME = "regression" @@ -282,7 +298,8 @@ def test_sql_regress( neon_env_builder.num_pageservers = shard_count neon_env_builder.enable_pageserver_remote_storage(s3_storage()) env = neon_env_builder.init_start( - initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count + initial_tenant_conf=patch_tenant_conf(TENANT_CONF, reldir_type), + initial_tenant_shard_count=shard_count, ) # Connect to postgres and create a database called "regression". @@ -345,9 +362,7 @@ def test_tx_abort_with_many_relations( """ env = neon_env_builder.init_start( - initial_tenant_conf={ - "rel_size_v2_enabled": "true" if reldir_type == "v2" else "false", - } + initial_tenant_conf=patch_tenant_conf({}, reldir_type), ) ep = env.endpoints.create_start( "main",