Use proper tokens and delimeters when listing S3

This commit is contained in:
Kirill Bulatov
2023-03-17 18:19:05 +02:00
committed by Kirill Bulatov
parent b52389f228
commit 018c8b0e2b
2 changed files with 48 additions and 3 deletions

View File

@@ -291,6 +291,7 @@ impl RemoteStorage for S3Bucket {
.list_objects_v2()
.bucket(self.bucket_name.clone())
.set_prefix(self.prefix_in_bucket.clone())
.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string())
.set_continuation_token(continuation_token)
.send()
.await
@@ -306,7 +307,7 @@ impl RemoteStorage for S3Bucket {
.filter_map(|o| Some(self.s3_object_to_relative_path(o.key()?))),
);
match fetch_response.continuation_token {
match fetch_response.next_continuation_token {
Some(new_token) => continuation_token = Some(new_token),
None => break,
}
@@ -371,7 +372,7 @@ impl RemoteStorage for S3Bucket {
.filter_map(|o| Some(self.s3_object_to_relative_path(o.prefix()?))),
);
match fetch_response.continuation_token {
match fetch_response.next_continuation_token {
Some(new_token) => continuation_token = Some(new_token),
None => break,
}

View File

@@ -6,7 +6,7 @@ import shutil
import threading
import time
from pathlib import Path
from typing import Dict, List, Tuple
from typing import Dict, List, Set, Tuple
import pytest
from fixtures.log_helper import log
@@ -717,6 +717,50 @@ def test_empty_branch_remote_storage_upload_on_restart(
), f"New branch should have been reuploaded on pageserver restart to the remote storage path '{new_branch_on_remote_storage}'"
# Test creates >1000 timelines and upload them to the remote storage.
# AWS S3 does not return more than 1000 items and starts paginating, ensure that pageserver paginates correctly.
@pytest.mark.skip("Too slow to run, requires too much disk space to run")
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.MOCK_S3])
def test_thousands_of_branches(
neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind
):
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storage_kind,
test_name="test_compaction_downloads_on_demand_without_image_creation",
)
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
expected_timelines: Set[TimelineId] = set([])
tenant_id = env.initial_tenant
pg = env.postgres.create_start("main", tenant_id=tenant_id)
max_timelines = 1500
for i in range(0, max_timelines):
new_timeline_id = TimelineId.generate()
log.info(f"Creating timeline {new_timeline_id}, {i + 1} out of {max_timelines}")
expected_timelines.add(new_timeline_id)
client.timeline_create(tenant_id, new_timeline_id=new_timeline_id)
client.timeline_checkpoint(tenant_id, new_timeline_id)
wait_for_last_flush_lsn(env, pg, tenant_id, new_timeline_id)
with pg.cursor() as cur:
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
wait_for_upload(client, tenant_id, new_timeline_id, current_lsn)
client.tenant_detach(tenant_id=tenant_id)
client.tenant_attach(tenant_id=tenant_id)
timelines_after_reattach = set(
[timeline["timeline_id"] for timeline in client.timeline_list(tenant_id=tenant_id)]
)
assert (
expected_timelines == timelines_after_reattach
), f"Timelines after reattach do not match the ones created initially. \
Missing timelines: {expected_timelines - timelines_after_reattach}, extra timelines: {timelines_after_reattach - expected_timelines}"
def wait_upload_queue_empty(
client: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId
):