Merge branch 'main' into cicd/debug-regress-tests-on-arm

2026-01-16 18:02:56 +00:00 · 2024-08-20 15:45:42 +03:00
parent 6d9fd2bbb5 02a28c01ca
commit b66d4e0bdc
30 changed files with 99 additions and 201 deletions
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -43,7 +43,7 @@ inputs:
  pg_version:
    description: 'Postgres version to use for tests'
    required: false
-    default: 'v14'
+    default: 'v16'
  benchmark_durations:
    description: 'benchmark durations JSON'
    required: false
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -280,6 +280,7 @@ jobs:
          save_perf_report: ${{ github.ref_name == 'main' }}
          extra_params: --splits 5 --group ${{ matrix.pytest_split_group }}
          benchmark_durations: ${{ needs.get-benchmarks-durations.outputs.json }}
+          pg_version: v16
        env:
          VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
          PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
--- a/README.md
+++ b/README.md
@@ -262,7 +262,7 @@ By default, this runs both debug and release modes, and all supported postgres v
 testing locally, it is convenient to run just one set of permutations, like this:

 ```sh
-DEFAULT_PG_VERSION=15 BUILD_TYPE=release ./scripts/pytest
+DEFAULT_PG_VERSION=16 BUILD_TYPE=release ./scripts/pytest
 ```

 ## Flamegraphs
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -54,7 +54,7 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
 const DEFAULT_BRANCH_NAME: &str = "main";
 project_git_version!(GIT_VERSION);

-const DEFAULT_PG_VERSION: &str = "15";
+const DEFAULT_PG_VERSION: &str = "16";

 const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";

--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -27,7 +27,7 @@ use crate::pageserver::PageServerNode;
 use crate::pageserver::PAGESERVER_REMOTE_STORAGE_DIR;
 use crate::safekeeper::SafekeeperNode;

-pub const DEFAULT_PG_VERSION: u32 = 15;
+pub const DEFAULT_PG_VERSION: u32 = 16;

 //
 // This data structures represents neon_local CLI config
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -49,7 +49,7 @@ use tracing::{info, info_span};
 /// backwards-compatible changes to the metadata format.
 pub const STORAGE_FORMAT_VERSION: u16 = 3;

-pub const DEFAULT_PG_VERSION: u32 = 15;
+pub const DEFAULT_PG_VERSION: u32 = 16;

 // Magic constants used to identify different kinds of files
 pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -393,7 +393,7 @@ struct PageServerTask {

    /// Tasks may optionally be launched for a particular tenant/timeline, enabling
    /// later cancelling tasks for that tenant/timeline in [`shutdown_tasks`]
-    tenant_shard_id: Option<TenantShardId>,
+    tenant_shard_id: TenantShardId,
    timeline_id: Option<TimelineId>,

    mutable: Mutex<MutableTaskState>,
@@ -405,7 +405,7 @@ struct PageServerTask {
 pub fn spawn<F>(
    runtime: &tokio::runtime::Handle,
    kind: TaskKind,
-    tenant_shard_id: Option<TenantShardId>,
+    tenant_shard_id: TenantShardId,
    timeline_id: Option<TimelineId>,
    name: &str,
    future: F,
@@ -550,7 +550,7 @@ pub async fn shutdown_tasks(
        let tasks = TASKS.lock().unwrap();
        for task in tasks.values() {
            if (kind.is_none() || Some(task.kind) == kind)
-                && (tenant_shard_id.is_none() || task.tenant_shard_id == tenant_shard_id)
+                && (tenant_shard_id.is_none() || Some(task.tenant_shard_id) == tenant_shard_id)
                && (timeline_id.is_none() || task.timeline_id == timeline_id)
            {
                task.cancel.cancel();
@@ -573,13 +573,8 @@ pub async fn shutdown_tasks(
        };
        if let Some(mut join_handle) = join_handle {
            if log_all {
-                if tenant_shard_id.is_none() {
-                    // there are quite few of these
-                    info!(name = task.name, kind = ?task_kind, "stopping global task");
-                } else {
-                    // warn to catch these in tests; there shouldn't be any
-                    warn!(name = task.name, tenant_shard_id = ?tenant_shard_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
-                }
+                // warn to catch these in tests; there shouldn't be any
+                warn!(name = task.name, tenant_shard_id = ?tenant_shard_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
            }
            if tokio::time::timeout(std::time::Duration::from_secs(1), &mut join_handle)
                .await
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -798,7 +798,7 @@ impl Tenant {
        task_mgr::spawn(
            &tokio::runtime::Handle::current(),
            TaskKind::Attach,
-            Some(tenant_shard_id),
+            tenant_shard_id,
            None,
            "attach tenant",
            async move {
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -565,7 +565,7 @@ mod tests {
        );
        let expected_bytes = vec![
            /* TimelineMetadataHeader */
-            4, 37, 101, 34, 0, 70, 0, 4, // checksum, size, format_version (4 + 2 + 2)
+            74, 104, 158, 105, 0, 70, 0, 4, // checksum, size, format_version (4 + 2 + 2)
            /* TimelineMetadataBodyV2 */
            0, 0, 0, 0, 0, 0, 2, 0, // disk_consistent_lsn (8 bytes)
            1, 0, 0, 0, 0, 0, 0, 1, 0, // prev_record_lsn (9 bytes)
@@ -574,7 +574,7 @@ mod tests {
            0, 0, 0, 0, 0, 0, 0, 0, // ancestor_lsn (8 bytes)
            0, 0, 0, 0, 0, 0, 0, 0, // latest_gc_cutoff_lsn (8 bytes)
            0, 0, 0, 0, 0, 0, 0, 0, // initdb_lsn (8 bytes)
-            0, 0, 0, 15, // pg_version (4 bytes)
+            0, 0, 0, 16, // pg_version (4 bytes)
            /* padding bytes */
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -1728,7 +1728,7 @@ impl RemoteTimelineClient {
            task_mgr::spawn(
                &self.runtime,
                TaskKind::RemoteUploadTask,
-                Some(self.tenant_shard_id),
+                self.tenant_shard_id,
                Some(self.timeline_id),
                "remote upload",
                async move {
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -98,7 +98,7 @@ pub fn start_background_loops(
    task_mgr::spawn(
        BACKGROUND_RUNTIME.handle(),
        TaskKind::Compaction,
-        Some(tenant_shard_id),
+        tenant_shard_id,
        None,
        &format!("compactor for tenant {tenant_shard_id}"),
        {
@@ -121,7 +121,7 @@ pub fn start_background_loops(
    task_mgr::spawn(
        BACKGROUND_RUNTIME.handle(),
        TaskKind::GarbageCollector,
-        Some(tenant_shard_id),
+        tenant_shard_id,
        None,
        &format!("garbage collector for tenant {tenant_shard_id}"),
        {
@@ -144,7 +144,7 @@ pub fn start_background_loops(
    task_mgr::spawn(
        BACKGROUND_RUNTIME.handle(),
        TaskKind::IngestHousekeeping,
-        Some(tenant_shard_id),
+        tenant_shard_id,
        None,
        &format!("ingest housekeeping for tenant {tenant_shard_id}"),
        {
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -2281,7 +2281,7 @@ impl Timeline {
        task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
            task_mgr::TaskKind::LayerFlushTask,
-            Some(self.tenant_shard_id),
+            self.tenant_shard_id,
            Some(self.timeline_id),
            "layer flush task",
            async move {
@@ -2635,7 +2635,7 @@ impl Timeline {
        task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
            task_mgr::TaskKind::InitialLogicalSizeCalculation,
-            Some(self.tenant_shard_id),
+            self.tenant_shard_id,
            Some(self.timeline_id),
            "initial size calculation",
            // NB: don't log errors here, task_mgr will do that.
@@ -2803,7 +2803,7 @@ impl Timeline {
        task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
            task_mgr::TaskKind::OndemandLogicalSizeCalculation,
-            Some(self.tenant_shard_id),
+            self.tenant_shard_id,
            Some(self.timeline_id),
            "ondemand logical size calculation",
            async move {
@@ -5162,7 +5162,7 @@ impl Timeline {
        let task_id = task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
            task_mgr::TaskKind::DownloadAllRemoteLayers,
-            Some(self.tenant_shard_id),
+            self.tenant_shard_id,
            Some(self.timeline_id),
            "download all remote layers task",
            async move {
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -395,7 +395,7 @@ impl DeleteTimelineFlow {
        task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
            TaskKind::TimelineDeletionWorker,
-            Some(tenant_shard_id),
+            tenant_shard_id,
            Some(timeline_id),
            "timeline_delete",
            async move {
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -60,7 +60,7 @@ impl Timeline {
        task_mgr::spawn(
            BACKGROUND_RUNTIME.handle(),
            TaskKind::Eviction,
-            Some(self.tenant_shard_id),
+            self.tenant_shard_id,
            Some(self.timeline_id),
            &format!(
                "layer eviction for {}/{}",
--- a/scripts/ps_ec2_setup_instance_store
+++ b/scripts/ps_ec2_setup_instance_store
@@ -44,7 +44,7 @@ run the following commands from the top of the neon.git checkout

    # test suite run
    export TEST_OUTPUT="$TEST_OUTPUT"
-    DEFAULT_PG_VERSION=15 BUILD_TYPE=release ./scripts/pytest test_runner/performance/test_latency.py
+    DEFAULT_PG_VERSION=16 BUILD_TYPE=release ./scripts/pytest test_runner/performance/test_latency.py

    # for interactive use
    export NEON_REPO_DIR="$NEON_REPO_DIR"
--- a/storage_scrubber/src/checks.rs
+++ b/storage_scrubber/src/checks.rs
@@ -10,8 +10,8 @@ use utils::generation::Generation;
 use utils::id::TimelineId;

 use crate::cloud_admin_api::BranchData;
-use crate::metadata_stream::stream_listing_generic;
-use crate::{download_object_with_retries_generic, RootTarget, TenantShardTimelineId};
+use crate::metadata_stream::stream_listing;
+use crate::{download_object_with_retries, RootTarget, TenantShardTimelineId};
 use futures_util::StreamExt;
 use pageserver::tenant::remote_timeline_client::{parse_remote_index_path, remote_layer_path};
 use pageserver::tenant::storage_layer::LayerName;
@@ -320,17 +320,17 @@ pub(crate) fn parse_layer_object_name(name: &str) -> Result<(LayerName, Generati
    }
 }

-pub(crate) async fn list_timeline_blobs_generic(
+pub(crate) async fn list_timeline_blobs(
    remote_client: &GenericRemoteStorage,
    id: TenantShardTimelineId,
-    s3_root: &RootTarget,
+    root_target: &RootTarget,
 ) -> anyhow::Result<RemoteTimelineBlobData> {
    let mut s3_layers = HashSet::new();

    let mut errors = Vec::new();
    let mut unknown_keys = Vec::new();

-    let mut timeline_dir_target = s3_root.timeline_root(&id);
+    let mut timeline_dir_target = root_target.timeline_root(&id);
    timeline_dir_target.delimiter = String::new();

    let mut index_part_keys: Vec<ListingObject> = Vec::new();
@@ -341,7 +341,7 @@ pub(crate) async fn list_timeline_blobs_generic(
        .strip_prefix("/")
        .unwrap_or(&timeline_dir_target.prefix_in_bucket);

-    let mut stream = std::pin::pin!(stream_listing_generic(remote_client, &timeline_dir_target));
+    let mut stream = std::pin::pin!(stream_listing(remote_client, &timeline_dir_target));
    while let Some(obj) = stream.next().await {
        let (key, Some(obj)) = obj? else {
            panic!("ListingObject not specified");
@@ -421,7 +421,7 @@ pub(crate) async fn list_timeline_blobs_generic(

    if let Some(index_part_object_key) = index_part_object.as_ref() {
        let index_part_bytes =
-            download_object_with_retries_generic(remote_client, &index_part_object_key.key)
+            download_object_with_retries(remote_client, &index_part_object_key.key)
                .await
                .context("index_part.json download")?;

--- a/storage_scrubber/src/find_large_objects.rs
+++ b/storage_scrubber/src/find_large_objects.rs
@@ -6,7 +6,7 @@ use remote_storage::ListingMode;
 use serde::{Deserialize, Serialize};

 use crate::{
-    checks::parse_layer_object_name, init_remote_generic, metadata_stream::stream_tenants_generic,
+    checks::parse_layer_object_name, init_remote, metadata_stream::stream_tenants,
    stream_objects_with_retries, BucketConfig, NodeKind,
 };

@@ -50,9 +50,8 @@ pub async fn find_large_objects(
    ignore_deltas: bool,
    concurrency: usize,
 ) -> anyhow::Result<LargeObjectListing> {
-    let (remote_client, target) =
-        init_remote_generic(bucket_config.clone(), NodeKind::Pageserver).await?;
-    let tenants = pin!(stream_tenants_generic(&remote_client, &target));
+    let (remote_client, target) = init_remote(bucket_config.clone(), NodeKind::Pageserver).await?;
+    let tenants = pin!(stream_tenants(&remote_client, &target));

    let objects_stream = tenants.map_ok(|tenant_shard_id| {
        let mut tenant_root = target.tenant_root(&tenant_shard_id);
--- a/storage_scrubber/src/garbage.rs
+++ b/storage_scrubber/src/garbage.rs
@@ -19,8 +19,8 @@ use utils::id::TenantId;

 use crate::{
    cloud_admin_api::{CloudAdminApiClient, MaybeDeleted, ProjectData},
-    init_remote_generic, list_objects_with_retries_generic,
-    metadata_stream::{stream_tenant_timelines_generic, stream_tenants_generic},
+    init_remote, list_objects_with_retries,
+    metadata_stream::{stream_tenant_timelines, stream_tenants},
    BucketConfig, ConsoleConfig, NodeKind, TenantShardTimelineId, TraversingDepth,
 };

@@ -153,7 +153,7 @@ async fn find_garbage_inner(
    node_kind: NodeKind,
 ) -> anyhow::Result<GarbageList> {
    // Construct clients for S3 and for Console API
-    let (remote_client, target) = init_remote_generic(bucket_config.clone(), node_kind).await?;
+    let (remote_client, target) = init_remote(bucket_config.clone(), node_kind).await?;
    let cloud_admin_api_client = Arc::new(CloudAdminApiClient::new(console_config));

    // Build a set of console-known tenants, for quickly eliminating known-active tenants without having
@@ -179,7 +179,7 @@ async fn find_garbage_inner(

    // Enumerate Tenants in S3, and check if each one exists in Console
    tracing::info!("Finding all tenants in bucket {}...", bucket_config.bucket);
-    let tenants = stream_tenants_generic(&remote_client, &target);
+    let tenants = stream_tenants(&remote_client, &target);
    let tenants_checked = tenants.map_ok(|t| {
        let api_client = cloud_admin_api_client.clone();
        let console_cache = console_cache.clone();
@@ -237,14 +237,13 @@ async fn find_garbage_inner(
        // Special case: If it's missing in console, check for known bugs that would enable us to conclusively
        // identify it as purge-able anyway
        if console_result.is_none() {
-            let timelines =
-                stream_tenant_timelines_generic(&remote_client, &target, tenant_shard_id)
-                    .await?
-                    .collect::<Vec<_>>()
-                    .await;
+            let timelines = stream_tenant_timelines(&remote_client, &target, tenant_shard_id)
+                .await?
+                .collect::<Vec<_>>()
+                .await;
            if timelines.is_empty() {
                // No timelines, but a heatmap: the deletion bug where we deleted everything but heatmaps
-                let tenant_objects = list_objects_with_retries_generic(
+                let tenant_objects = list_objects_with_retries(
                    &remote_client,
                    ListingMode::WithDelimiter,
                    &target.tenant_root(&tenant_shard_id),
@@ -265,7 +264,7 @@ async fn find_garbage_inner(

                for timeline_r in timelines {
                    let timeline = timeline_r?;
-                    let timeline_objects = list_objects_with_retries_generic(
+                    let timeline_objects = list_objects_with_retries(
                        &remote_client,
                        ListingMode::WithDelimiter,
                        &target.timeline_root(&timeline),
@@ -331,8 +330,7 @@ async fn find_garbage_inner(

    // Construct a stream of all timelines within active tenants
    let active_tenants = tokio_stream::iter(active_tenants.iter().map(Ok));
-    let timelines =
-        active_tenants.map_ok(|t| stream_tenant_timelines_generic(&remote_client, &target, *t));
+    let timelines = active_tenants.map_ok(|t| stream_tenant_timelines(&remote_client, &target, *t));
    let timelines = timelines.try_buffer_unordered(S3_CONCURRENCY);
    let timelines = timelines.try_flatten();

@@ -507,7 +505,7 @@ pub async fn purge_garbage(
    );

    let (remote_client, _target) =
-        init_remote_generic(garbage_list.bucket_config.clone(), garbage_list.node_kind).await?;
+        init_remote(garbage_list.bucket_config.clone(), garbage_list.node_kind).await?;

    assert_eq!(
        &garbage_list.bucket_config.bucket,
--- a/storage_scrubber/src/lib.rs
+++ b/storage_scrubber/src/lib.rs
@@ -15,7 +15,7 @@ use std::fmt::Display;
 use std::sync::Arc;
 use std::time::Duration;

-use anyhow::{anyhow, Context};
+use anyhow::Context;
 use aws_config::retry::{RetryConfigBuilder, RetryMode};
 use aws_sdk_s3::config::Region;
 use aws_sdk_s3::error::DisplayErrorContext;
@@ -352,7 +352,7 @@ fn make_root_target(
    }
 }

-async fn init_remote(
+async fn init_remote_s3(
    bucket_config: BucketConfig,
    node_kind: NodeKind,
 ) -> anyhow::Result<(Arc<Client>, RootTarget)> {
@@ -369,7 +369,7 @@ async fn init_remote(
    Ok((s3_client, s3_root))
 }

-async fn init_remote_generic(
+async fn init_remote(
    bucket_config: BucketConfig,
    node_kind: NodeKind,
 ) -> anyhow::Result<(GenericRemoteStorage, RootTarget)> {
@@ -394,45 +394,10 @@ async fn init_remote_generic(

    // We already pass the prefix to the remote client above
    let prefix_in_root_target = String::new();
-    let s3_root = make_root_target(bucket_config.bucket, prefix_in_root_target, node_kind);
+    let root_target = make_root_target(bucket_config.bucket, prefix_in_root_target, node_kind);

    let client = GenericRemoteStorage::from_config(&storage_config).await?;
-    Ok((client, s3_root))
-}
-
-async fn list_objects_with_retries(
-    s3_client: &Client,
-    s3_target: &S3Target,
-    continuation_token: Option<String>,
-) -> anyhow::Result<aws_sdk_s3::operation::list_objects_v2::ListObjectsV2Output> {
-    for trial in 0..MAX_RETRIES {
-        match s3_client
-            .list_objects_v2()
-            .bucket(&s3_target.bucket_name)
-            .prefix(&s3_target.prefix_in_bucket)
-            .delimiter(&s3_target.delimiter)
-            .set_continuation_token(continuation_token.clone())
-            .send()
-            .await
-        {
-            Ok(response) => return Ok(response),
-            Err(e) => {
-                if trial == MAX_RETRIES - 1 {
-                    return Err(e)
-                        .with_context(|| format!("Failed to list objects {MAX_RETRIES} times"));
-                }
-                error!(
-                    "list_objects_v2 query failed: bucket_name={}, prefix={}, delimiter={}, error={}",
-                    s3_target.bucket_name,
-                    s3_target.prefix_in_bucket,
-                    s3_target.delimiter,
-                    DisplayErrorContext(e),
-                );
-                tokio::time::sleep(Duration::from_secs(1)).await;
-            }
-        }
-    }
-    Err(anyhow!("unreachable unless MAX_RETRIES==0"))
+    Ok((client, root_target))
 }

 /// Listing possibly large amounts of keys in a streaming fashion.
@@ -479,7 +444,7 @@ fn stream_objects_with_retries<'a>(

 /// If you want to list a bounded amount of prefixes or keys. For larger numbers of keys/prefixes,
 /// use [`stream_objects_with_retries`] instead.
-async fn list_objects_with_retries_generic(
+async fn list_objects_with_retries(
    remote_client: &GenericRemoteStorage,
    listing_mode: ListingMode,
    s3_target: &S3Target,
@@ -516,7 +481,7 @@ async fn list_objects_with_retries_generic(
    panic!("MAX_RETRIES is not allowed to be 0");
 }

-async fn download_object_with_retries_generic(
+async fn download_object_with_retries(
    remote_client: &GenericRemoteStorage,
    key: &RemotePath,
 ) -> anyhow::Result<Vec<u8>> {
@@ -552,7 +517,7 @@ async fn download_object_with_retries_generic(
    anyhow::bail!("Failed to download objects with key {key} {MAX_RETRIES} times")
 }

-async fn download_object_to_file(
+async fn download_object_to_file_s3(
    s3_client: &Client,
    bucket_name: &str,
    key: &str,
--- a/storage_scrubber/src/metadata_stream.rs
+++ b/storage_scrubber/src/metadata_stream.rs
@@ -2,20 +2,19 @@ use std::str::FromStr;

 use anyhow::{anyhow, Context};
 use async_stream::{stream, try_stream};
-use aws_sdk_s3::Client;
 use futures::StreamExt;
 use remote_storage::{GenericRemoteStorage, ListingMode, ListingObject, RemotePath};
 use tokio_stream::Stream;

 use crate::{
-    list_objects_with_retries, list_objects_with_retries_generic, stream_objects_with_retries,
-    RootTarget, S3Target, TenantShardTimelineId,
+    list_objects_with_retries, stream_objects_with_retries, RootTarget, S3Target,
+    TenantShardTimelineId,
 };
 use pageserver_api::shard::TenantShardId;
 use utils::id::{TenantId, TimelineId};

 /// Given a remote storage and a target, output a stream of TenantIds discovered via listing prefixes
-pub fn stream_tenants_generic<'a>(
+pub fn stream_tenants<'a>(
    remote_client: &'a GenericRemoteStorage,
    target: &'a RootTarget,
 ) -> impl Stream<Item = anyhow::Result<TenantShardId>> + 'a {
@@ -36,44 +35,6 @@ pub fn stream_tenants_generic<'a>(
    }
 }

-/// Given an S3 bucket, output a stream of TenantIds discovered via ListObjectsv2
-pub fn stream_tenants<'a>(
-    s3_client: &'a Client,
-    target: &'a RootTarget,
-) -> impl Stream<Item = anyhow::Result<TenantShardId>> + 'a {
-    try_stream! {
-        let mut continuation_token = None;
-        let tenants_target = target.tenants_root();
-        loop {
-            let fetch_response =
-                list_objects_with_retries(s3_client, &tenants_target, continuation_token.clone()).await?;
-
-            let new_entry_ids = fetch_response
-                .common_prefixes()
-                .iter()
-                .filter_map(|prefix| prefix.prefix())
-                .filter_map(|prefix| -> Option<&str> {
-                    prefix
-                        .strip_prefix(&tenants_target.prefix_in_bucket)?
-                        .strip_suffix('/')
-                }).map(|entry_id_str| {
-                entry_id_str
-                    .parse()
-                    .with_context(|| format!("Incorrect entry id str: {entry_id_str}"))
-            });
-
-            for i in new_entry_ids {
-                yield i?;
-            }
-
-            match fetch_response.next_continuation_token {
-                Some(new_token) => continuation_token = Some(new_token),
-                None => break,
-            }
-        }
-    }
-}
-
 pub async fn stream_tenant_shards<'a>(
    remote_client: &'a GenericRemoteStorage,
    target: &'a RootTarget,
@@ -85,12 +46,9 @@ pub async fn stream_tenant_shards<'a>(
    let prefix_str = &strip_prefix.strip_prefix("/").unwrap_or(&strip_prefix);

    tracing::info!("Listing shards in {}", shards_target.prefix_in_bucket);
-    let listing = list_objects_with_retries_generic(
-        remote_client,
-        ListingMode::WithDelimiter,
-        &shards_target,
-    )
-    .await?;
+    let listing =
+        list_objects_with_retries(remote_client, ListingMode::WithDelimiter, &shards_target)
+            .await?;

    let tenant_shard_ids = listing
        .prefixes
@@ -118,7 +76,7 @@ pub async fn stream_tenant_shards<'a>(
 /// Given a `TenantShardId`, output a stream of the timelines within that tenant, discovered
 /// using a listing. The listing is done before the stream is built, so that this
 /// function can be used to generate concurrency on a stream using buffer_unordered.
-pub async fn stream_tenant_timelines_generic<'a>(
+pub async fn stream_tenant_timelines<'a>(
    remote_client: &'a GenericRemoteStorage,
    target: &'a RootTarget,
    tenant: TenantShardId,
@@ -173,7 +131,7 @@ pub async fn stream_tenant_timelines_generic<'a>(
    })
 }

-pub(crate) fn stream_listing_generic<'a>(
+pub(crate) fn stream_listing<'a>(
    remote_client: &'a GenericRemoteStorage,
    target: &'a S3Target,
 ) -> impl Stream<Item = anyhow::Result<(RemotePath, Option<ListingObject>)>> + 'a {
--- a/storage_scrubber/src/pageserver_physical_gc.rs
+++ b/storage_scrubber/src/pageserver_physical_gc.rs
@@ -2,9 +2,9 @@ use std::collections::{BTreeMap, BTreeSet, HashMap};
 use std::sync::Arc;
 use std::time::Duration;

-use crate::checks::{list_timeline_blobs_generic, BlobDataParseResult};
-use crate::metadata_stream::{stream_tenant_timelines_generic, stream_tenants_generic};
-use crate::{init_remote_generic, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId};
+use crate::checks::{list_timeline_blobs, BlobDataParseResult};
+use crate::metadata_stream::{stream_tenant_timelines, stream_tenants};
+use crate::{init_remote, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId};
 use futures_util::{StreamExt, TryStreamExt};
 use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
 use pageserver::tenant::remote_timeline_client::{parse_remote_index_path, remote_layer_path};
@@ -352,7 +352,7 @@ async fn gc_ancestor(
    summary: &mut GcSummary,
 ) -> anyhow::Result<()> {
    // Scan timelines in the ancestor
-    let timelines = stream_tenant_timelines_generic(remote_client, root_target, ancestor).await?;
+    let timelines = stream_tenant_timelines(remote_client, root_target, ancestor).await?;
    let mut timelines = std::pin::pin!(timelines);

    // Build a list of keys to retain
@@ -360,7 +360,7 @@ async fn gc_ancestor(
    while let Some(ttid) = timelines.next().await {
        let ttid = ttid?;

-        let data = list_timeline_blobs_generic(remote_client, ttid, root_target).await?;
+        let data = list_timeline_blobs(remote_client, ttid, root_target).await?;

        let s3_layers = match data.blob_data {
            BlobDataParseResult::Parsed {
@@ -456,11 +456,10 @@ pub async fn pageserver_physical_gc(
    min_age: Duration,
    mode: GcMode,
 ) -> anyhow::Result<GcSummary> {
-    let (remote_client, target) =
-        init_remote_generic(bucket_config.clone(), NodeKind::Pageserver).await?;
+    let (remote_client, target) = init_remote(bucket_config.clone(), NodeKind::Pageserver).await?;

    let tenants = if tenant_shard_ids.is_empty() {
-        futures::future::Either::Left(stream_tenants_generic(&remote_client, &target))
+        futures::future::Either::Left(stream_tenants(&remote_client, &target))
    } else {
        futures::future::Either::Right(futures::stream::iter(tenant_shard_ids.into_iter().map(Ok)))
    };
@@ -473,7 +472,7 @@ pub async fn pageserver_physical_gc(
    let accumulator = Arc::new(std::sync::Mutex::new(TenantRefAccumulator::default()));

    // Generate a stream of TenantTimelineId
-    let timelines = tenants.map_ok(|t| stream_tenant_timelines_generic(&remote_client, &target, t));
+    let timelines = tenants.map_ok(|t| stream_tenant_timelines(&remote_client, &target, t));
    let timelines = timelines.try_buffered(CONCURRENCY);
    let timelines = timelines.try_flatten();

@@ -487,7 +486,7 @@ pub async fn pageserver_physical_gc(
        accumulator: &Arc<std::sync::Mutex<TenantRefAccumulator>>,
    ) -> anyhow::Result<GcSummary> {
        let mut summary = GcSummary::default();
-        let data = list_timeline_blobs_generic(remote_client, ttid, target).await?;
+        let data = list_timeline_blobs(remote_client, ttid, target).await?;

        let (index_part, latest_gen, candidates) = match &data.blob_data {
            BlobDataParseResult::Parsed {
--- a/storage_scrubber/src/scan_pageserver_metadata.rs
+++ b/storage_scrubber/src/scan_pageserver_metadata.rs
@@ -1,11 +1,11 @@
 use std::collections::{HashMap, HashSet};

 use crate::checks::{
-    branch_cleanup_and_check_errors, list_timeline_blobs_generic, BlobDataParseResult,
+    branch_cleanup_and_check_errors, list_timeline_blobs, BlobDataParseResult,
    RemoteTimelineBlobData, TenantObjectListing, TimelineAnalysis,
 };
-use crate::metadata_stream::{stream_tenant_timelines_generic, stream_tenants_generic};
-use crate::{init_remote_generic, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId};
+use crate::metadata_stream::{stream_tenant_timelines, stream_tenants};
+use crate::{init_remote, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId};
 use futures_util::{StreamExt, TryStreamExt};
 use pageserver::tenant::remote_timeline_client::remote_layer_path;
 use pageserver_api::controller_api::MetadataHealthUpdateRequest;
@@ -120,10 +120,10 @@ pub async fn scan_pageserver_metadata(
    bucket_config: BucketConfig,
    tenant_ids: Vec<TenantShardId>,
 ) -> anyhow::Result<MetadataSummary> {
-    let (remote_client, target) = init_remote_generic(bucket_config, NodeKind::Pageserver).await?;
+    let (remote_client, target) = init_remote(bucket_config, NodeKind::Pageserver).await?;

    let tenants = if tenant_ids.is_empty() {
-        futures::future::Either::Left(stream_tenants_generic(&remote_client, &target))
+        futures::future::Either::Left(stream_tenants(&remote_client, &target))
    } else {
        futures::future::Either::Right(futures::stream::iter(tenant_ids.into_iter().map(Ok)))
    };
@@ -133,7 +133,7 @@ pub async fn scan_pageserver_metadata(
    const CONCURRENCY: usize = 32;

    // Generate a stream of TenantTimelineId
-    let timelines = tenants.map_ok(|t| stream_tenant_timelines_generic(&remote_client, &target, t));
+    let timelines = tenants.map_ok(|t| stream_tenant_timelines(&remote_client, &target, t));
    let timelines = timelines.try_buffered(CONCURRENCY);
    let timelines = timelines.try_flatten();

@@ -143,7 +143,7 @@ pub async fn scan_pageserver_metadata(
        target: &RootTarget,
        ttid: TenantShardTimelineId,
    ) -> anyhow::Result<(TenantShardTimelineId, RemoteTimelineBlobData)> {
-        let data = list_timeline_blobs_generic(remote_client, ttid, target).await?;
+        let data = list_timeline_blobs(remote_client, ttid, target).await?;
        Ok((ttid, data))
    }
    let timelines = timelines.map_ok(|ttid| report_on_timeline(&remote_client, &target, ttid));
--- a/storage_scrubber/src/scan_safekeeper_metadata.rs
+++ b/storage_scrubber/src/scan_safekeeper_metadata.rs
@@ -14,9 +14,8 @@ use utils::{
 };

 use crate::{
-    cloud_admin_api::CloudAdminApiClient, init_remote_generic,
-    metadata_stream::stream_listing_generic, BucketConfig, ConsoleConfig, NodeKind, RootTarget,
-    TenantShardTimelineId,
+    cloud_admin_api::CloudAdminApiClient, init_remote, metadata_stream::stream_listing,
+    BucketConfig, ConsoleConfig, NodeKind, RootTarget, TenantShardTimelineId,
 };

 /// Generally we should ask safekeepers, but so far we use everywhere default 16MB.
@@ -107,7 +106,7 @@ pub async fn scan_safekeeper_metadata(
    let timelines = client.query(&query, &[]).await?;
    info!("loaded {} timelines", timelines.len());

-    let (remote_client, target) = init_remote_generic(bucket_config, NodeKind::Safekeeper).await?;
+    let (remote_client, target) = init_remote(bucket_config, NodeKind::Safekeeper).await?;
    let console_config = ConsoleConfig::from_env()?;
    let cloud_admin_api_client = CloudAdminApiClient::new(console_config);

@@ -193,7 +192,7 @@ async fn check_timeline(
        .strip_prefix("/")
        .unwrap_or(&timeline_dir_target.prefix_in_bucket);

-    let mut stream = std::pin::pin!(stream_listing_generic(remote_client, &timeline_dir_target));
+    let mut stream = std::pin::pin!(stream_listing(remote_client, &timeline_dir_target));
    while let Some(obj) = stream.next().await {
        let (key, _obj) = obj?;

--- a/storage_scrubber/src/tenant_snapshot.rs
+++ b/storage_scrubber/src/tenant_snapshot.rs
@@ -1,10 +1,10 @@
 use std::collections::HashMap;
 use std::sync::Arc;

-use crate::checks::{list_timeline_blobs_generic, BlobDataParseResult, RemoteTimelineBlobData};
-use crate::metadata_stream::{stream_tenant_shards, stream_tenant_timelines_generic};
+use crate::checks::{list_timeline_blobs, BlobDataParseResult, RemoteTimelineBlobData};
+use crate::metadata_stream::{stream_tenant_shards, stream_tenant_timelines};
 use crate::{
-    download_object_to_file, init_remote, init_remote_generic, BucketConfig, NodeKind, RootTarget,
+    download_object_to_file_s3, init_remote, init_remote_s3, BucketConfig, NodeKind, RootTarget,
    TenantShardTimelineId,
 };
 use anyhow::Context;
@@ -36,7 +36,8 @@ impl SnapshotDownloader {
        output_path: Utf8PathBuf,
        concurrency: usize,
    ) -> anyhow::Result<Self> {
-        let (s3_client, s3_root) = init_remote(bucket_config.clone(), NodeKind::Pageserver).await?;
+        let (s3_client, s3_root) =
+            init_remote_s3(bucket_config.clone(), NodeKind::Pageserver).await?;
        Ok(Self {
            s3_client,
            s3_root,
@@ -93,7 +94,7 @@ impl SnapshotDownloader {
            let Some(version) = versions.versions.as_ref().and_then(|v| v.first()) else {
                return Err(anyhow::anyhow!("No versions found for {remote_layer_path}"));
            };
-            download_object_to_file(
+            download_object_to_file_s3(
                &self.s3_client,
                &self.bucket_config.bucket,
                &remote_layer_path,
@@ -218,7 +219,7 @@ impl SnapshotDownloader {

    pub async fn download(&self) -> anyhow::Result<()> {
        let (remote_client, target) =
-            init_remote_generic(self.bucket_config.clone(), NodeKind::Pageserver).await?;
+            init_remote(self.bucket_config.clone(), NodeKind::Pageserver).await?;

        // Generate a stream of TenantShardId
        let shards = stream_tenant_shards(&remote_client, &target, self.tenant_id).await?;
@@ -239,7 +240,7 @@ impl SnapshotDownloader {

        for shard in shards.into_iter().filter(|s| s.shard_count == shard_count) {
            // Generate a stream of TenantTimelineId
-            let timelines = stream_tenant_timelines_generic(&remote_client, &target, shard).await?;
+            let timelines = stream_tenant_timelines(&remote_client, &target, shard).await?;

            // Generate a stream of S3TimelineBlobData
            async fn load_timeline_index(
@@ -247,7 +248,7 @@ impl SnapshotDownloader {
                target: &RootTarget,
                ttid: TenantShardTimelineId,
            ) -> anyhow::Result<(TenantShardTimelineId, RemoteTimelineBlobData)> {
-                let data = list_timeline_blobs_generic(remote_client, ttid, target).await?;
+                let data = list_timeline_blobs(remote_client, ttid, target).await?;
                Ok((ttid, data))
            }
            let timelines =
--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -71,8 +71,7 @@ a subdirectory for each version with naming convention `v{PG_VERSION}/`.
 Inside that dir, a `bin/postgres` binary should be present.
 `DEFAULT_PG_VERSION`: The version of Postgres to use,
 This is used to construct full path to the postgres binaries.
-Format is 2-digit major version nubmer, i.e. `DEFAULT_PG_VERSION="14"`. Alternatively,
-you can use `--pg-version` argument.
+Format is 2-digit major version nubmer, i.e. `DEFAULT_PG_VERSION=16`
 `TEST_OUTPUT`: Set the directory where test state and test output files
 should go.
 `TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
--- a/test_runner/fixtures/pg_version.py
+++ b/test_runner/fixtures/pg_version.py
@@ -3,8 +3,6 @@ import os
 from typing import Optional

 import pytest
-from _pytest.config import Config
-from _pytest.config.argparsing import Parser

 """
 This fixture is used to determine which version of Postgres to use for tests.
@@ -52,7 +50,7 @@ class PgVersion(str, enum.Enum):
        return None


-DEFAULT_VERSION: PgVersion = PgVersion.V15
+DEFAULT_VERSION: PgVersion = PgVersion.V16


 def skip_on_postgres(version: PgVersion, reason: str):
@@ -69,22 +67,8 @@ def xfail_on_postgres(version: PgVersion, reason: str):
    )


-def pytest_addoption(parser: Parser):
-    parser.addoption(
-        "--pg-version",
-        action="store",
-        type=PgVersion,
-        help="DEPRECATED: Postgres version to use for tests",
-    )
-
-
 def run_only_on_default_postgres(reason: str):
    return pytest.mark.skipif(
        PgVersion(os.environ.get("DEFAULT_PG_VERSION", DEFAULT_VERSION)) is not DEFAULT_VERSION,
        reason=reason,
    )
-
-
-def pytest_configure(config: Config):
-    if config.getoption("--pg-version"):
-        raise Exception("--pg-version is deprecated, use DEFAULT_PG_VERSION env var instead")
--- a/test_runner/performance/README.md
+++ b/test_runner/performance/README.md
@@ -7,7 +7,7 @@ easier to see if you have compile errors without scrolling up.
 You may also need to run `./scripts/pysync`.

 Then run the tests
-`DEFAULT_PG_VERSION=15 NEON_BIN=./target/release poetry run pytest test_runner/performance`
+`DEFAULT_PG_VERSION=16 NEON_BIN=./target/release poetry run pytest test_runner/performance`

 Some handy pytest flags for local development:
 - `-x` tells pytest to stop on first error
--- a/test_runner/performance/pageserver/README.md
+++ b/test_runner/performance/pageserver/README.md
@@ -11,6 +11,6 @@ It supports mounting snapshots using overlayfs, which improves iteration time.
 Here's a full command line.

 ```
-RUST_BACKTRACE=1 NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS=1 DEFAULT_PG_VERSION=15 BUILD_TYPE=release \
+RUST_BACKTRACE=1 NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS=1 DEFAULT_PG_VERSION=16 BUILD_TYPE=release \
    ./scripts/pytest test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
 ````
--- a/test_runner/performance/pageserver/interactive/test_many_small_tenants.py
+++ b/test_runner/performance/pageserver/interactive/test_many_small_tenants.py
@@ -14,7 +14,7 @@ from performance.pageserver.util import ensure_pageserver_ready_for_benchmarking

 """
 Usage:
-DEFAULT_PG_VERSION=15 BUILD_TYPE=debug NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS=1 INTERACTIVE=true \
+DEFAULT_PG_VERSION=16 BUILD_TYPE=debug NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS=1 INTERACTIVE=true \
    ./scripts/pytest --timeout 0 test_runner/performance/pageserver/interactive/test_many_small_tenants.py
 """

--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -39,7 +39,7 @@ from fixtures.workload import Workload
 #
 # How to run `test_backward_compatibility` locally:
 #
-#    export DEFAULT_PG_VERSION=15
+#    export DEFAULT_PG_VERSION=16
 #    export BUILD_TYPE=release
 #    export CHECK_ONDISK_DATA_COMPATIBILITY=true
 #    export COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION}
@@ -61,7 +61,7 @@ from fixtures.workload import Workload
 #
 # How to run `test_forward_compatibility` locally:
 #
-#    export DEFAULT_PG_VERSION=15
+#    export DEFAULT_PG_VERSION=16
 #    export BUILD_TYPE=release
 #    export CHECK_ONDISK_DATA_COMPATIBILITY=true
 #    export COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE}