remote_storage: fix prefix handling in remote storage & clean up (#7431)

## Problem

Split off from https://github.com/neondatabase/neon/pull/7399, which is
the first piece of code that does a WithDelimiter object listing using a
prefix that isn't a full directory name.

## Summary of changes

- Revise list function to not append a `/` to the prefix -- prefixes
don't have to end with a slash.
- Fix local_fs implementation of list to not assume that WithDelimiter
case will always use a directory as a prerfix.
- Remove `list_files`, `list_prefixes` wrappers, as they add little
value and obscure the underlying list function -- we need callers to
understand the semantics of what they're really calling (listobjectsv2)
This commit is contained in:
John Spray
2024-04-23 16:24:51 +01:00
committed by GitHub
parent 89f023e6b0
commit e22c072064
10 changed files with 305 additions and 268 deletions

View File

@@ -1,5 +1,6 @@
use anyhow::Context;
use camino::Utf8Path;
use remote_storage::ListingMode;
use remote_storage::RemotePath;
use std::sync::Arc;
use std::{collections::HashSet, num::NonZeroU32};
@@ -54,9 +55,9 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
let base_prefix = RemotePath::new(Utf8Path::new(ctx.enabled.base_prefix))
.context("common_prefix construction")?;
let root_remote_prefixes = test_client
.list_prefixes(None, &cancel)
.await
.context("client list root prefixes failure")?
.list(None, ListingMode::WithDelimiter, None, &cancel)
.await?
.prefixes
.into_iter()
.collect::<HashSet<_>>();
assert_eq!(
@@ -65,9 +66,14 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
);
let nested_remote_prefixes = test_client
.list_prefixes(Some(&base_prefix), &cancel)
.await
.context("client list nested prefixes failure")?
.list(
Some(&base_prefix.add_trailing_slash()),
ListingMode::WithDelimiter,
None,
&cancel,
)
.await?
.prefixes
.into_iter()
.collect::<HashSet<_>>();
let remote_only_prefixes = nested_remote_prefixes
@@ -90,11 +96,13 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
///
/// First, create a set of S3 objects with keys `random_prefix/folder{j}/blob_{i}.txt` in [`upload_remote_data`]
/// Then performs the following queries:
/// 1. `list_files(None)`. This should return all files `random_prefix/folder{j}/blob_{i}.txt`
/// 2. `list_files("folder1")`. This should return all files `random_prefix/folder1/blob_{i}.txt`
/// 1. `list(None)`. This should return all files `random_prefix/folder{j}/blob_{i}.txt`
/// 2. `list("folder1")`. This should return all files `random_prefix/folder1/blob_{i}.txt`
#[test_context(MaybeEnabledStorageWithSimpleTestBlobs)]
#[tokio::test]
async fn list_files_works(ctx: &mut MaybeEnabledStorageWithSimpleTestBlobs) -> anyhow::Result<()> {
async fn list_no_delimiter_works(
ctx: &mut MaybeEnabledStorageWithSimpleTestBlobs,
) -> anyhow::Result<()> {
let ctx = match ctx {
MaybeEnabledStorageWithSimpleTestBlobs::Enabled(ctx) => ctx,
MaybeEnabledStorageWithSimpleTestBlobs::Disabled => return Ok(()),
@@ -107,29 +115,36 @@ async fn list_files_works(ctx: &mut MaybeEnabledStorageWithSimpleTestBlobs) -> a
let base_prefix =
RemotePath::new(Utf8Path::new("folder1")).context("common_prefix construction")?;
let root_files = test_client
.list_files(None, None, &cancel)
.list(None, ListingMode::NoDelimiter, None, &cancel)
.await
.context("client list root files failure")?
.keys
.into_iter()
.collect::<HashSet<_>>();
assert_eq!(
root_files,
ctx.remote_blobs.clone(),
"remote storage list_files on root mismatches with the uploads."
"remote storage list on root mismatches with the uploads."
);
// Test that max_keys limit works. In total there are about 21 files (see
// upload_simple_remote_data call in test_real_s3.rs).
let limited_root_files = test_client
.list_files(None, Some(NonZeroU32::new(2).unwrap()), &cancel)
.list(
None,
ListingMode::NoDelimiter,
Some(NonZeroU32::new(2).unwrap()),
&cancel,
)
.await
.context("client list root files failure")?;
assert_eq!(limited_root_files.len(), 2);
assert_eq!(limited_root_files.keys.len(), 2);
let nested_remote_files = test_client
.list_files(Some(&base_prefix), None, &cancel)
.list(Some(&base_prefix), ListingMode::NoDelimiter, None, &cancel)
.await
.context("client list nested files failure")?
.keys
.into_iter()
.collect::<HashSet<_>>();
let trim_remote_blobs: HashSet<_> = ctx
@@ -141,7 +156,7 @@ async fn list_files_works(ctx: &mut MaybeEnabledStorageWithSimpleTestBlobs) -> a
.collect();
assert_eq!(
nested_remote_files, trim_remote_blobs,
"remote storage list_files on subdirrectory mismatches with the uploads."
"remote storage list on subdirrectory mismatches with the uploads."
);
Ok(())
}
@@ -199,7 +214,11 @@ async fn delete_objects_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<(
ctx.client.delete_objects(&[path1, path2], &cancel).await?;
let prefixes = ctx.client.list_prefixes(None, &cancel).await?;
let prefixes = ctx
.client
.list(None, ListingMode::WithDelimiter, None, &cancel)
.await?
.prefixes;
assert_eq!(prefixes.len(), 1);