From 018c8b0e2ba5a6875bd1e3ebfcfd7e75d3f908f0 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Fri, 17 Mar 2023 18:19:05 +0200 Subject: [PATCH] Use proper tokens and delimeters when listing S3 --- libs/remote_storage/src/s3_bucket.rs | 5 ++- test_runner/regress/test_remote_storage.py | 46 +++++++++++++++++++++- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/libs/remote_storage/src/s3_bucket.rs b/libs/remote_storage/src/s3_bucket.rs index 93f5e0596e..a476ff32e0 100644 --- a/libs/remote_storage/src/s3_bucket.rs +++ b/libs/remote_storage/src/s3_bucket.rs @@ -291,6 +291,7 @@ impl RemoteStorage for S3Bucket { .list_objects_v2() .bucket(self.bucket_name.clone()) .set_prefix(self.prefix_in_bucket.clone()) + .delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()) .set_continuation_token(continuation_token) .send() .await @@ -306,7 +307,7 @@ impl RemoteStorage for S3Bucket { .filter_map(|o| Some(self.s3_object_to_relative_path(o.key()?))), ); - match fetch_response.continuation_token { + match fetch_response.next_continuation_token { Some(new_token) => continuation_token = Some(new_token), None => break, } @@ -371,7 +372,7 @@ impl RemoteStorage for S3Bucket { .filter_map(|o| Some(self.s3_object_to_relative_path(o.prefix()?))), ); - match fetch_response.continuation_token { + match fetch_response.next_continuation_token { Some(new_token) => continuation_token = Some(new_token), None => break, } diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index f6600e8974..b9709d9b83 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -6,7 +6,7 @@ import shutil import threading import time from pathlib import Path -from typing import Dict, List, Tuple +from typing import Dict, List, Set, Tuple import pytest from fixtures.log_helper import log @@ -717,6 +717,50 @@ def test_empty_branch_remote_storage_upload_on_restart( ), f"New branch should have been reuploaded on pageserver restart to the remote storage path '{new_branch_on_remote_storage}'" +# Test creates >1000 timelines and upload them to the remote storage. +# AWS S3 does not return more than 1000 items and starts paginating, ensure that pageserver paginates correctly. +@pytest.mark.skip("Too slow to run, requires too much disk space to run") +@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.MOCK_S3]) +def test_thousands_of_branches( + neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind +): + neon_env_builder.enable_remote_storage( + remote_storage_kind=remote_storage_kind, + test_name="test_compaction_downloads_on_demand_without_image_creation", + ) + + env = neon_env_builder.init_start() + client = env.pageserver.http_client() + expected_timelines: Set[TimelineId] = set([]) + tenant_id = env.initial_tenant + pg = env.postgres.create_start("main", tenant_id=tenant_id) + + max_timelines = 1500 + for i in range(0, max_timelines): + new_timeline_id = TimelineId.generate() + log.info(f"Creating timeline {new_timeline_id}, {i + 1} out of {max_timelines}") + expected_timelines.add(new_timeline_id) + + client.timeline_create(tenant_id, new_timeline_id=new_timeline_id) + client.timeline_checkpoint(tenant_id, new_timeline_id) + wait_for_last_flush_lsn(env, pg, tenant_id, new_timeline_id) + with pg.cursor() as cur: + current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) + wait_for_upload(client, tenant_id, new_timeline_id, current_lsn) + + client.tenant_detach(tenant_id=tenant_id) + client.tenant_attach(tenant_id=tenant_id) + + timelines_after_reattach = set( + [timeline["timeline_id"] for timeline in client.timeline_list(tenant_id=tenant_id)] + ) + + assert ( + expected_timelines == timelines_after_reattach + ), f"Timelines after reattach do not match the ones created initially. \ + Missing timelines: {expected_timelines - timelines_after_reattach}, extra timelines: {timelines_after_reattach - expected_timelines}" + + def wait_upload_queue_empty( client: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId ):