mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-01 04:20:39 +00:00
test: Remote storage refactorings (#5243)
Remote storage cleanup split from #5198: - pageserver, extensions, and safekeepers now have their separate remote storage - RemoteStorageKind has the configuration code - S3Storage has the cleanup code - with MOCK_S3, pageserver, extensions, safekeepers use different buckets - with LOCAL_FS, `repo_dir / "local_fs_remote_storage" / $user` is used as path, where $user is `pageserver`, `safekeeper` - no more `NeonEnvBuilder.enable_xxx_remote_storage` but one `enable_{pageserver,extensions,safekeeper}_remote_storage` Should not have any real changes. These will allow us to default to `LOCAL_FS` for pageserver on the next PR, remove `RemoteStorageKind.NOOP`, work towards #5172. Co-authored-by: Alexander Bayandin <alexander@neon.tech>
This commit is contained in:
@@ -24,7 +24,6 @@ from urllib.parse import urlparse
|
||||
|
||||
import asyncpg
|
||||
import backoff
|
||||
import boto3
|
||||
import jwt
|
||||
import psycopg2
|
||||
import pytest
|
||||
@@ -32,7 +31,6 @@ import requests
|
||||
from _pytest.config import Config
|
||||
from _pytest.config.argparsing import Parser
|
||||
from _pytest.fixtures import FixtureRequest
|
||||
from mypy_boto3_s3 import S3Client
|
||||
|
||||
# Type-related stuff
|
||||
from psycopg2.extensions import connection as PgConnection
|
||||
@@ -47,11 +45,10 @@ from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.port_distributor import PortDistributor
|
||||
from fixtures.remote_storage import (
|
||||
LocalFsStorage,
|
||||
MockS3Server,
|
||||
RemoteStorage,
|
||||
RemoteStorageKind,
|
||||
RemoteStorageUsers,
|
||||
RemoteStorageUser,
|
||||
S3Storage,
|
||||
remote_storage_to_toml_inline_table,
|
||||
)
|
||||
@@ -416,8 +413,7 @@ class NeonEnvBuilder:
|
||||
pg_version: PgVersion,
|
||||
test_name: str,
|
||||
test_output_dir: Path,
|
||||
remote_storage: Optional[RemoteStorage] = None,
|
||||
remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
|
||||
pageserver_remote_storage: Optional[RemoteStorage] = None,
|
||||
pageserver_config_override: Optional[str] = None,
|
||||
num_safekeepers: int = 1,
|
||||
# Use non-standard SK ids to check for various parsing bugs
|
||||
@@ -434,10 +430,14 @@ class NeonEnvBuilder:
|
||||
self.repo_dir = repo_dir
|
||||
self.rust_log_override = rust_log_override
|
||||
self.port_distributor = port_distributor
|
||||
self.remote_storage = remote_storage
|
||||
|
||||
# Pageserver remote storage
|
||||
self.pageserver_remote_storage = pageserver_remote_storage
|
||||
# Extensions remote storage
|
||||
self.ext_remote_storage: Optional[S3Storage] = None
|
||||
self.remote_storage_client: Optional[S3Client] = None
|
||||
self.remote_storage_users = remote_storage_users
|
||||
# Safekeepers remote storage
|
||||
self.sk_remote_storage: Optional[RemoteStorage] = None
|
||||
|
||||
self.broker = broker
|
||||
self.run_id = run_id
|
||||
self.mock_s3_server: MockS3Server = mock_s3_server
|
||||
@@ -448,7 +448,6 @@ class NeonEnvBuilder:
|
||||
self.auth_enabled = auth_enabled
|
||||
self.default_branch_name = default_branch_name
|
||||
self.env: Optional[NeonEnv] = None
|
||||
self.remote_storage_prefix: Optional[str] = None
|
||||
self.keep_remote_storage_contents: bool = True
|
||||
self.neon_binpath = neon_binpath
|
||||
self.pg_distrib_dir = pg_distrib_dir
|
||||
@@ -501,150 +500,75 @@ class NeonEnvBuilder:
|
||||
the test didn't produce any invalid remote state.
|
||||
"""
|
||||
|
||||
if not isinstance(self.remote_storage, S3Storage):
|
||||
if not isinstance(self.pageserver_remote_storage, S3Storage):
|
||||
# The scrubber can't talk to e.g. LocalFS -- it needs
|
||||
# an HTTP endpoint (mock is fine) to connect to.
|
||||
raise RuntimeError(
|
||||
"Cannot scrub with remote_storage={self.remote_storage}, require an S3 endpoint"
|
||||
"Cannot scrub with remote_storage={self.pageserver_remote_storage}, require an S3 endpoint"
|
||||
)
|
||||
|
||||
self.scrub_on_exit = True
|
||||
|
||||
def enable_remote_storage(
|
||||
def enable_pageserver_remote_storage(
|
||||
self,
|
||||
remote_storage_kind: RemoteStorageKind,
|
||||
force_enable: bool = True,
|
||||
enable_remote_extensions: bool = False,
|
||||
):
|
||||
bucket_name = re.sub(r"[_\[\]]", "-", self.test_name)[:63]
|
||||
assert self.pageserver_remote_storage is None, "remote storage is enabled already"
|
||||
ret = self._configure_and_create_remote_storage(
|
||||
remote_storage_kind, RemoteStorageUser.PAGESERVER
|
||||
)
|
||||
self.pageserver_remote_storage = ret
|
||||
|
||||
if remote_storage_kind == RemoteStorageKind.NOOP:
|
||||
return
|
||||
elif remote_storage_kind == RemoteStorageKind.LOCAL_FS:
|
||||
self.enable_local_fs_remote_storage(force_enable=force_enable)
|
||||
elif remote_storage_kind == RemoteStorageKind.MOCK_S3:
|
||||
self.enable_mock_s3_remote_storage(
|
||||
bucket_name=bucket_name,
|
||||
force_enable=force_enable,
|
||||
enable_remote_extensions=enable_remote_extensions,
|
||||
)
|
||||
elif remote_storage_kind == RemoteStorageKind.REAL_S3:
|
||||
self.enable_real_s3_remote_storage(
|
||||
test_name=bucket_name,
|
||||
force_enable=force_enable,
|
||||
enable_remote_extensions=enable_remote_extensions,
|
||||
)
|
||||
else:
|
||||
raise RuntimeError(f"Unknown storage type: {remote_storage_kind}")
|
||||
def enable_extensions_remote_storage(self, kind: RemoteStorageKind):
|
||||
assert self.ext_remote_storage is None, "already configured extensions remote storage"
|
||||
|
||||
self.remote_storage_kind = remote_storage_kind
|
||||
# there is an assumption that REAL_S3 for extensions is never
|
||||
# cleaned up these are also special in that they have a hardcoded
|
||||
# bucket and region, which is most likely the same as our normal
|
||||
ext = self._configure_and_create_remote_storage(
|
||||
kind,
|
||||
RemoteStorageUser.EXTENSIONS,
|
||||
bucket_name="neon-dev-extensions-eu-central-1",
|
||||
bucket_region="eu-central-1",
|
||||
)
|
||||
assert isinstance(
|
||||
ext, S3Storage
|
||||
), "unsure why, but only MOCK_S3 and REAL_S3 are currently supported for extensions"
|
||||
ext.cleanup = False
|
||||
self.ext_remote_storage = ext
|
||||
|
||||
def enable_local_fs_remote_storage(self, force_enable: bool = True):
|
||||
"""
|
||||
Sets up the pageserver to use the local fs at the `test_dir/local_fs_remote_storage` path.
|
||||
Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
|
||||
"""
|
||||
assert force_enable or self.remote_storage is None, "remote storage is enabled already"
|
||||
self.remote_storage = LocalFsStorage(Path(self.repo_dir / "local_fs_remote_storage"))
|
||||
def enable_safekeeper_remote_storage(self, kind: RemoteStorageKind):
|
||||
assert self.sk_remote_storage is None, "sk_remote_storage already configured"
|
||||
|
||||
def enable_mock_s3_remote_storage(
|
||||
self.sk_remote_storage = self._configure_and_create_remote_storage(
|
||||
kind, RemoteStorageUser.SAFEKEEPER
|
||||
)
|
||||
|
||||
def _configure_and_create_remote_storage(
|
||||
self,
|
||||
bucket_name: str,
|
||||
force_enable: bool = True,
|
||||
enable_remote_extensions: bool = False,
|
||||
):
|
||||
"""
|
||||
Sets up the pageserver to use the S3 mock server, creates the bucket, if it's not present already.
|
||||
Starts up the mock server, if that does not run yet.
|
||||
Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
|
||||
|
||||
Also creates the bucket for extensions, self.ext_remote_storage bucket
|
||||
"""
|
||||
assert force_enable or self.remote_storage is None, "remote storage is enabled already"
|
||||
mock_endpoint = self.mock_s3_server.endpoint()
|
||||
mock_region = self.mock_s3_server.region()
|
||||
|
||||
self.remote_storage_client = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=mock_endpoint,
|
||||
region_name=mock_region,
|
||||
aws_access_key_id=self.mock_s3_server.access_key(),
|
||||
aws_secret_access_key=self.mock_s3_server.secret_key(),
|
||||
)
|
||||
self.remote_storage_client.create_bucket(Bucket=bucket_name)
|
||||
|
||||
self.remote_storage = S3Storage(
|
||||
kind: RemoteStorageKind,
|
||||
user: RemoteStorageUser,
|
||||
bucket_name: Optional[str] = None,
|
||||
bucket_region: Optional[str] = None,
|
||||
) -> Optional[RemoteStorage]:
|
||||
ret = kind.configure(
|
||||
self.repo_dir,
|
||||
self.mock_s3_server,
|
||||
str(self.run_id),
|
||||
self.test_name,
|
||||
user,
|
||||
bucket_name=bucket_name,
|
||||
endpoint=mock_endpoint,
|
||||
bucket_region=mock_region,
|
||||
access_key=self.mock_s3_server.access_key(),
|
||||
secret_key=self.mock_s3_server.secret_key(),
|
||||
prefix_in_bucket="pageserver",
|
||||
bucket_region=bucket_region,
|
||||
)
|
||||
|
||||
if enable_remote_extensions:
|
||||
self.ext_remote_storage = S3Storage(
|
||||
bucket_name=bucket_name,
|
||||
endpoint=mock_endpoint,
|
||||
bucket_region=mock_region,
|
||||
access_key=self.mock_s3_server.access_key(),
|
||||
secret_key=self.mock_s3_server.secret_key(),
|
||||
prefix_in_bucket="ext",
|
||||
)
|
||||
if kind == RemoteStorageKind.MOCK_S3:
|
||||
assert isinstance(ret, S3Storage)
|
||||
ret.client.create_bucket(Bucket=ret.bucket_name)
|
||||
elif kind == RemoteStorageKind.REAL_S3:
|
||||
assert isinstance(ret, S3Storage)
|
||||
assert ret.cleanup, "we should not leave files in REAL_S3"
|
||||
|
||||
def enable_real_s3_remote_storage(
|
||||
self,
|
||||
test_name: str,
|
||||
force_enable: bool = True,
|
||||
enable_remote_extensions: bool = False,
|
||||
):
|
||||
"""
|
||||
Sets up configuration to use real s3 endpoint without mock server
|
||||
"""
|
||||
assert force_enable or self.remote_storage is None, "remote storage is enabled already"
|
||||
|
||||
access_key = os.getenv("AWS_ACCESS_KEY_ID")
|
||||
assert access_key, "no aws access key provided"
|
||||
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
|
||||
assert secret_key, "no aws access key provided"
|
||||
|
||||
# session token is needed for local runs with sso auth
|
||||
session_token = os.getenv("AWS_SESSION_TOKEN")
|
||||
|
||||
bucket_name = os.getenv("REMOTE_STORAGE_S3_BUCKET")
|
||||
assert bucket_name, "no remote storage bucket name provided"
|
||||
region = os.getenv("REMOTE_STORAGE_S3_REGION")
|
||||
assert region, "no remote storage region provided"
|
||||
|
||||
# do not leave data in real s3
|
||||
self.keep_remote_storage_contents = False
|
||||
|
||||
# construct a prefix inside bucket for the particular test case and test run
|
||||
self.remote_storage_prefix = f"{self.run_id}/{test_name}"
|
||||
|
||||
self.remote_storage_client = boto3.client(
|
||||
"s3",
|
||||
region_name=region,
|
||||
aws_access_key_id=access_key,
|
||||
aws_secret_access_key=secret_key,
|
||||
aws_session_token=session_token,
|
||||
)
|
||||
self.remote_storage = S3Storage(
|
||||
bucket_name=bucket_name,
|
||||
bucket_region=region,
|
||||
access_key=access_key,
|
||||
secret_key=secret_key,
|
||||
prefix_in_bucket=self.remote_storage_prefix,
|
||||
)
|
||||
|
||||
if enable_remote_extensions:
|
||||
self.ext_remote_storage = S3Storage(
|
||||
bucket_name="neon-dev-extensions-eu-central-1",
|
||||
bucket_region="eu-central-1",
|
||||
access_key=access_key,
|
||||
secret_key=secret_key,
|
||||
prefix_in_bucket=None,
|
||||
)
|
||||
return ret
|
||||
|
||||
def cleanup_local_storage(self):
|
||||
if self.preserve_database_files:
|
||||
@@ -669,60 +593,10 @@ class NeonEnvBuilder:
|
||||
directory_to_clean.rmdir()
|
||||
|
||||
def cleanup_remote_storage(self):
|
||||
# here wee check for true remote storage, no the local one
|
||||
# local cleanup is not needed after test because in ci all env will be destroyed anyway
|
||||
if self.remote_storage_prefix is None:
|
||||
log.info("no remote storage was set up, skipping cleanup")
|
||||
return
|
||||
|
||||
# Making mypy happy with allowing only `S3Storage` further.
|
||||
# `self.remote_storage_prefix` is coupled with `S3Storage` storage type,
|
||||
# so this line effectively a no-op
|
||||
assert isinstance(self.remote_storage, S3Storage)
|
||||
assert self.remote_storage_client is not None
|
||||
|
||||
if self.keep_remote_storage_contents:
|
||||
log.info("keep_remote_storage_contents skipping remote storage cleanup")
|
||||
return
|
||||
|
||||
log.info(
|
||||
"removing data from test s3 bucket %s by prefix %s",
|
||||
self.remote_storage.bucket_name,
|
||||
self.remote_storage_prefix,
|
||||
)
|
||||
paginator = self.remote_storage_client.get_paginator("list_objects_v2")
|
||||
pages = paginator.paginate(
|
||||
Bucket=self.remote_storage.bucket_name,
|
||||
Prefix=self.remote_storage_prefix,
|
||||
)
|
||||
|
||||
# Using Any because DeleteTypeDef (from boto3-stubs) doesn't fit our case
|
||||
objects_to_delete: Any = {"Objects": []}
|
||||
cnt = 0
|
||||
for item in pages.search("Contents"):
|
||||
# weirdly when nothing is found it returns [None]
|
||||
if item is None:
|
||||
break
|
||||
|
||||
objects_to_delete["Objects"].append({"Key": item["Key"]})
|
||||
|
||||
# flush once aws limit reached
|
||||
if len(objects_to_delete["Objects"]) >= 1000:
|
||||
self.remote_storage_client.delete_objects(
|
||||
Bucket=self.remote_storage.bucket_name,
|
||||
Delete=objects_to_delete,
|
||||
)
|
||||
objects_to_delete = {"Objects": []}
|
||||
cnt += 1
|
||||
|
||||
# flush rest
|
||||
if len(objects_to_delete["Objects"]):
|
||||
self.remote_storage_client.delete_objects(
|
||||
Bucket=self.remote_storage.bucket_name,
|
||||
Delete=objects_to_delete,
|
||||
)
|
||||
|
||||
log.info(f"deleted {cnt} objects from remote storage")
|
||||
# extensions are currently not cleaned up, disabled when creating
|
||||
for x in [self.pageserver_remote_storage, self.ext_remote_storage, self.sk_remote_storage]:
|
||||
if isinstance(x, S3Storage):
|
||||
x.do_cleanup()
|
||||
|
||||
def __enter__(self) -> "NeonEnvBuilder":
|
||||
return self
|
||||
@@ -817,14 +691,13 @@ class NeonEnv:
|
||||
self.endpoints = EndpointFactory(self)
|
||||
self.safekeepers: List[Safekeeper] = []
|
||||
self.broker = config.broker
|
||||
self.remote_storage = config.remote_storage
|
||||
self.remote_storage_users = config.remote_storage_users
|
||||
self.pageserver_remote_storage = config.pageserver_remote_storage
|
||||
self.ext_remote_storage = config.ext_remote_storage
|
||||
self.safekeepers_remote_storage = config.sk_remote_storage
|
||||
self.pg_version = config.pg_version
|
||||
self.neon_binpath = config.neon_binpath
|
||||
self.pg_distrib_dir = config.pg_distrib_dir
|
||||
self.endpoint_counter = 0
|
||||
self.remote_storage_client = config.remote_storage_client
|
||||
self.ext_remote_storage = config.ext_remote_storage
|
||||
|
||||
# generate initial tenant ID here instead of letting 'neon init' generate it,
|
||||
# so that we don't need to dig it out of the config file afterwards.
|
||||
@@ -907,13 +780,10 @@ class NeonEnv:
|
||||
auth_enabled = true
|
||||
"""
|
||||
)
|
||||
if (
|
||||
bool(self.remote_storage_users & RemoteStorageUsers.SAFEKEEPER)
|
||||
and self.remote_storage is not None
|
||||
):
|
||||
if config.sk_remote_storage is not None:
|
||||
toml += textwrap.dedent(
|
||||
f"""
|
||||
remote_storage = "{remote_storage_to_toml_inline_table(self.remote_storage)}"
|
||||
remote_storage = "{remote_storage_to_toml_inline_table(config.sk_remote_storage)}"
|
||||
"""
|
||||
)
|
||||
safekeeper = Safekeeper(env=self, id=id, port=port)
|
||||
@@ -1339,18 +1209,17 @@ class NeonCli(AbstractNeonCli):
|
||||
|
||||
cmd = ["init", f"--config={tmp.name}", "--pg-version", self.env.pg_version]
|
||||
|
||||
storage = self.env.pageserver_remote_storage
|
||||
|
||||
append_pageserver_param_overrides(
|
||||
params_to_update=cmd,
|
||||
remote_storage=self.env.remote_storage,
|
||||
remote_storage_users=self.env.remote_storage_users,
|
||||
remote_storage=storage,
|
||||
pageserver_config_override=self.env.pageserver.config_override,
|
||||
)
|
||||
|
||||
s3_env_vars = None
|
||||
if self.env.remote_storage is not None and isinstance(
|
||||
self.env.remote_storage, S3Storage
|
||||
):
|
||||
s3_env_vars = self.env.remote_storage.access_env_vars()
|
||||
if isinstance(storage, S3Storage):
|
||||
s3_env_vars = storage.access_env_vars()
|
||||
res = self.raw_cli(cmd, extra_env_vars=s3_env_vars)
|
||||
res.check_returncode()
|
||||
return res
|
||||
@@ -1371,15 +1240,15 @@ class NeonCli(AbstractNeonCli):
|
||||
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
start_args = ["pageserver", "start", *overrides]
|
||||
storage = self.env.pageserver_remote_storage
|
||||
append_pageserver_param_overrides(
|
||||
params_to_update=start_args,
|
||||
remote_storage=self.env.remote_storage,
|
||||
remote_storage_users=self.env.remote_storage_users,
|
||||
remote_storage=storage,
|
||||
pageserver_config_override=self.env.pageserver.config_override,
|
||||
)
|
||||
|
||||
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
|
||||
s3_env_vars = self.env.remote_storage.access_env_vars()
|
||||
if isinstance(storage, S3Storage):
|
||||
s3_env_vars = storage.access_env_vars()
|
||||
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
|
||||
|
||||
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
|
||||
@@ -1396,8 +1265,8 @@ class NeonCli(AbstractNeonCli):
|
||||
self, id: int, extra_opts: Optional[List[str]] = None
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
s3_env_vars = None
|
||||
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
|
||||
s3_env_vars = self.env.remote_storage.access_env_vars()
|
||||
if isinstance(self.env.safekeepers_remote_storage, S3Storage):
|
||||
s3_env_vars = self.env.safekeepers_remote_storage.access_env_vars()
|
||||
|
||||
if extra_opts is not None:
|
||||
extra_opts = [f"-e={opt}" for opt in extra_opts]
|
||||
@@ -1485,9 +1354,10 @@ class NeonCli(AbstractNeonCli):
|
||||
if endpoint_id is not None:
|
||||
args.append(endpoint_id)
|
||||
|
||||
storage = self.env.ext_remote_storage
|
||||
s3_env_vars = None
|
||||
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
|
||||
s3_env_vars = self.env.remote_storage.access_env_vars()
|
||||
if isinstance(storage, S3Storage):
|
||||
s3_env_vars = storage.access_env_vars()
|
||||
|
||||
res = self.raw_cli(args, extra_env_vars=s3_env_vars)
|
||||
res.check_returncode()
|
||||
@@ -1762,10 +1632,9 @@ class NeonPageserver(PgProtocol):
|
||||
def append_pageserver_param_overrides(
|
||||
params_to_update: List[str],
|
||||
remote_storage: Optional[RemoteStorage],
|
||||
remote_storage_users: RemoteStorageUsers,
|
||||
pageserver_config_override: Optional[str] = None,
|
||||
):
|
||||
if bool(remote_storage_users & RemoteStorageUsers.PAGESERVER) and remote_storage is not None:
|
||||
if remote_storage is not None:
|
||||
remote_storage_toml_table = remote_storage_to_toml_inline_table(remote_storage)
|
||||
|
||||
params_to_update.append(
|
||||
@@ -2858,8 +2727,8 @@ class S3Scrubber:
|
||||
self.log_dir = log_dir
|
||||
|
||||
def scrubber_cli(self, args, timeout):
|
||||
assert isinstance(self.env.remote_storage, S3Storage)
|
||||
s3_storage = self.env.remote_storage
|
||||
assert isinstance(self.env.pageserver_remote_storage, S3Storage)
|
||||
s3_storage = self.env.pageserver_remote_storage
|
||||
|
||||
env = {
|
||||
"REGION": s3_storage.bucket_region,
|
||||
|
||||
@@ -260,15 +260,11 @@ def list_prefix(
|
||||
Note that this function takes into account prefix_in_bucket.
|
||||
"""
|
||||
# For local_fs we need to properly handle empty directories, which we currently dont, so for simplicity stick to s3 api.
|
||||
assert neon_env_builder.remote_storage_kind in (
|
||||
RemoteStorageKind.MOCK_S3,
|
||||
RemoteStorageKind.REAL_S3,
|
||||
)
|
||||
# For mypy
|
||||
assert isinstance(neon_env_builder.remote_storage, S3Storage)
|
||||
assert neon_env_builder.remote_storage_client is not None
|
||||
remote = neon_env_builder.pageserver_remote_storage
|
||||
assert isinstance(remote, S3Storage), "localfs is currently not supported"
|
||||
assert remote.client is not None
|
||||
|
||||
prefix_in_bucket = neon_env_builder.remote_storage.prefix_in_bucket or ""
|
||||
prefix_in_bucket = remote.prefix_in_bucket or ""
|
||||
if not prefix:
|
||||
prefix = prefix_in_bucket
|
||||
else:
|
||||
@@ -277,9 +273,9 @@ def list_prefix(
|
||||
prefix = "/".join((prefix_in_bucket, prefix))
|
||||
|
||||
# Note that this doesnt use pagination, so list is not guaranteed to be exhaustive.
|
||||
response = neon_env_builder.remote_storage_client.list_objects_v2(
|
||||
response = remote.client.list_objects_v2(
|
||||
Delimiter="/",
|
||||
Bucket=neon_env_builder.remote_storage.bucket_name,
|
||||
Bucket=remote.bucket_name,
|
||||
Prefix=prefix,
|
||||
)
|
||||
return response
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
import enum
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Union
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import boto3
|
||||
from mypy_boto3_s3 import S3Client
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
@@ -12,6 +17,20 @@ from fixtures.types import TenantId, TimelineId
|
||||
TIMELINE_INDEX_PART_FILE_NAME = "index_part.json"
|
||||
|
||||
|
||||
@enum.unique
|
||||
class RemoteStorageUser(str, enum.Enum):
|
||||
"""
|
||||
Instead of using strings for the users, use a more strict enum.
|
||||
"""
|
||||
|
||||
PAGESERVER = "pageserver"
|
||||
EXTENSIONS = "ext"
|
||||
SAFEKEEPER = "safekeeper"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.value
|
||||
|
||||
|
||||
class MockS3Server:
|
||||
"""
|
||||
Starts a mock S3 server for testing on a port given, errors if the server fails to start or exits prematurely.
|
||||
@@ -58,6 +77,124 @@ class MockS3Server:
|
||||
self.subprocess.kill()
|
||||
|
||||
|
||||
@dataclass
|
||||
class LocalFsStorage:
|
||||
root: Path
|
||||
|
||||
def tenant_path(self, tenant_id: TenantId) -> Path:
|
||||
return self.root / "tenants" / str(tenant_id)
|
||||
|
||||
def timeline_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
|
||||
return self.tenant_path(tenant_id) / "timelines" / str(timeline_id)
|
||||
|
||||
def index_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
|
||||
return self.timeline_path(tenant_id, timeline_id) / TIMELINE_INDEX_PART_FILE_NAME
|
||||
|
||||
def index_content(self, tenant_id: TenantId, timeline_id: TimelineId):
|
||||
with self.index_path(tenant_id, timeline_id).open("r") as f:
|
||||
return json.load(f)
|
||||
|
||||
def to_toml_inline_table(self) -> str:
|
||||
return f"local_path='{self.root}'"
|
||||
|
||||
def cleanup(self):
|
||||
# no cleanup is done here, because there's NeonEnvBuilder.cleanup_local_storage which will remove everything, including localfs files
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def component_path(repo_dir: Path, user: RemoteStorageUser) -> Path:
|
||||
return repo_dir / "local_fs_remote_storage" / str(user)
|
||||
|
||||
|
||||
@dataclass
|
||||
class S3Storage:
|
||||
bucket_name: str
|
||||
bucket_region: str
|
||||
access_key: str
|
||||
secret_key: str
|
||||
prefix_in_bucket: str
|
||||
client: S3Client
|
||||
cleanup: bool
|
||||
endpoint: Optional[str] = None
|
||||
|
||||
def access_env_vars(self) -> Dict[str, str]:
|
||||
return {
|
||||
"AWS_ACCESS_KEY_ID": self.access_key,
|
||||
"AWS_SECRET_ACCESS_KEY": self.secret_key,
|
||||
}
|
||||
|
||||
def to_string(self) -> str:
|
||||
return json.dumps(
|
||||
{
|
||||
"bucket": self.bucket_name,
|
||||
"region": self.bucket_region,
|
||||
"endpoint": self.endpoint,
|
||||
"prefix": self.prefix_in_bucket,
|
||||
}
|
||||
)
|
||||
|
||||
def to_toml_inline_table(self) -> str:
|
||||
s = [
|
||||
f"bucket_name='{self.bucket_name}'",
|
||||
f"bucket_region='{self.bucket_region}'",
|
||||
]
|
||||
|
||||
if self.prefix_in_bucket is not None:
|
||||
s.append(f"prefix_in_bucket='{self.prefix_in_bucket}'")
|
||||
|
||||
if self.endpoint is not None:
|
||||
s.append(f"endpoint='{self.endpoint}'")
|
||||
|
||||
return ",".join(s)
|
||||
|
||||
def do_cleanup(self):
|
||||
if not self.cleanup:
|
||||
# handles previous keep_remote_storage_contents
|
||||
return
|
||||
|
||||
log.info(
|
||||
"removing data from test s3 bucket %s by prefix %s",
|
||||
self.bucket_name,
|
||||
self.prefix_in_bucket,
|
||||
)
|
||||
paginator = self.client.get_paginator("list_objects_v2")
|
||||
pages = paginator.paginate(
|
||||
Bucket=self.bucket_name,
|
||||
Prefix=self.prefix_in_bucket,
|
||||
)
|
||||
|
||||
# Using Any because DeleteTypeDef (from boto3-stubs) doesn't fit our case
|
||||
objects_to_delete: Any = {"Objects": []}
|
||||
cnt = 0
|
||||
for item in pages.search("Contents"):
|
||||
# weirdly when nothing is found it returns [None]
|
||||
if item is None:
|
||||
break
|
||||
|
||||
objects_to_delete["Objects"].append({"Key": item["Key"]})
|
||||
|
||||
# flush once aws limit reached
|
||||
if len(objects_to_delete["Objects"]) >= 1000:
|
||||
self.client.delete_objects(
|
||||
Bucket=self.bucket_name,
|
||||
Delete=objects_to_delete,
|
||||
)
|
||||
objects_to_delete = {"Objects": []}
|
||||
cnt += 1
|
||||
|
||||
# flush rest
|
||||
if len(objects_to_delete["Objects"]):
|
||||
self.client.delete_objects(
|
||||
Bucket=self.bucket_name,
|
||||
Delete=objects_to_delete,
|
||||
)
|
||||
|
||||
log.info(f"deleted {cnt} objects from remote storage")
|
||||
|
||||
|
||||
RemoteStorage = Union[LocalFsStorage, S3Storage]
|
||||
|
||||
|
||||
@enum.unique
|
||||
class RemoteStorageKind(str, enum.Enum):
|
||||
LOCAL_FS = "local_fs"
|
||||
@@ -67,6 +204,104 @@ class RemoteStorageKind(str, enum.Enum):
|
||||
# to ensure the test pass with or without the remote storage
|
||||
NOOP = "noop"
|
||||
|
||||
def configure(
|
||||
self,
|
||||
repo_dir: Path,
|
||||
mock_s3_server,
|
||||
run_id: str,
|
||||
test_name: str,
|
||||
user: RemoteStorageUser,
|
||||
bucket_name: Optional[str] = None,
|
||||
bucket_region: Optional[str] = None,
|
||||
) -> Optional[RemoteStorage]:
|
||||
if self == RemoteStorageKind.NOOP:
|
||||
return None
|
||||
|
||||
if self == RemoteStorageKind.LOCAL_FS:
|
||||
return LocalFsStorage(LocalFsStorage.component_path(repo_dir, user))
|
||||
|
||||
# real_s3 uses this as part of prefix, mock_s3 uses this as part of
|
||||
# bucket name, giving all users unique buckets because we have to
|
||||
# create them
|
||||
test_name = re.sub(r"[_\[\]]", "-", test_name)
|
||||
|
||||
def to_bucket_name(user: str, test_name: str) -> str:
|
||||
s = f"{user}-{test_name}"
|
||||
|
||||
if len(s) > 63:
|
||||
prefix = s[:30]
|
||||
suffix = hashlib.sha256(test_name.encode()).hexdigest()[:32]
|
||||
s = f"{prefix}-{suffix}"
|
||||
assert len(s) == 63
|
||||
|
||||
return s
|
||||
|
||||
if self == RemoteStorageKind.MOCK_S3:
|
||||
# there's a single mock_s3 server for each process running the tests
|
||||
mock_endpoint = mock_s3_server.endpoint()
|
||||
mock_region = mock_s3_server.region()
|
||||
|
||||
access_key, secret_key = mock_s3_server.access_key(), mock_s3_server.secret_key()
|
||||
|
||||
client = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=mock_endpoint,
|
||||
region_name=mock_region,
|
||||
aws_access_key_id=access_key,
|
||||
aws_secret_access_key=secret_key,
|
||||
)
|
||||
|
||||
bucket_name = to_bucket_name(user, test_name)
|
||||
log.info(
|
||||
f"using mock_s3 bucket name {bucket_name} for user={user}, test_name={test_name}"
|
||||
)
|
||||
|
||||
return S3Storage(
|
||||
bucket_name=bucket_name,
|
||||
endpoint=mock_endpoint,
|
||||
bucket_region=mock_region,
|
||||
access_key=access_key,
|
||||
secret_key=secret_key,
|
||||
prefix_in_bucket="",
|
||||
client=client,
|
||||
cleanup=False,
|
||||
)
|
||||
|
||||
assert self == RemoteStorageKind.REAL_S3
|
||||
|
||||
env_access_key = os.getenv("AWS_ACCESS_KEY_ID")
|
||||
assert env_access_key, "no aws access key provided"
|
||||
env_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
|
||||
assert env_secret_key, "no aws access key provided"
|
||||
|
||||
# session token is needed for local runs with sso auth
|
||||
session_token = os.getenv("AWS_SESSION_TOKEN")
|
||||
|
||||
bucket_name = bucket_name or os.getenv("REMOTE_STORAGE_S3_BUCKET")
|
||||
assert bucket_name is not None, "no remote storage bucket name provided"
|
||||
bucket_region = bucket_region or os.getenv("REMOTE_STORAGE_S3_REGION")
|
||||
assert bucket_region is not None, "no remote storage region provided"
|
||||
|
||||
prefix_in_bucket = f"{run_id}/{test_name}/{user}"
|
||||
|
||||
client = boto3.client(
|
||||
"s3",
|
||||
region_name=bucket_region,
|
||||
aws_access_key_id=env_access_key,
|
||||
aws_secret_access_key=env_secret_key,
|
||||
aws_session_token=session_token,
|
||||
)
|
||||
|
||||
return S3Storage(
|
||||
bucket_name=bucket_name,
|
||||
bucket_region=bucket_region,
|
||||
access_key=env_access_key,
|
||||
secret_key=env_secret_key,
|
||||
prefix_in_bucket=prefix_in_bucket,
|
||||
client=client,
|
||||
cleanup=True,
|
||||
)
|
||||
|
||||
|
||||
def available_remote_storages() -> List[RemoteStorageKind]:
|
||||
remote_storages = [RemoteStorageKind.LOCAL_FS, RemoteStorageKind.MOCK_S3]
|
||||
@@ -101,72 +336,9 @@ def s3_storage() -> RemoteStorageKind:
|
||||
return RemoteStorageKind.MOCK_S3
|
||||
|
||||
|
||||
@dataclass
|
||||
class LocalFsStorage:
|
||||
root: Path
|
||||
|
||||
def tenant_path(self, tenant_id: TenantId) -> Path:
|
||||
return self.root / "tenants" / str(tenant_id)
|
||||
|
||||
def timeline_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
|
||||
return self.tenant_path(tenant_id) / "timelines" / str(timeline_id)
|
||||
|
||||
def index_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
|
||||
return self.timeline_path(tenant_id, timeline_id) / TIMELINE_INDEX_PART_FILE_NAME
|
||||
|
||||
def index_content(self, tenant_id: TenantId, timeline_id: TimelineId):
|
||||
with self.index_path(tenant_id, timeline_id).open("r") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
@dataclass
|
||||
class S3Storage:
|
||||
bucket_name: str
|
||||
bucket_region: str
|
||||
access_key: str
|
||||
secret_key: str
|
||||
endpoint: Optional[str] = None
|
||||
prefix_in_bucket: Optional[str] = ""
|
||||
|
||||
def access_env_vars(self) -> Dict[str, str]:
|
||||
return {
|
||||
"AWS_ACCESS_KEY_ID": self.access_key,
|
||||
"AWS_SECRET_ACCESS_KEY": self.secret_key,
|
||||
}
|
||||
|
||||
def to_string(self) -> str:
|
||||
return json.dumps(
|
||||
{
|
||||
"bucket": self.bucket_name,
|
||||
"region": self.bucket_region,
|
||||
"endpoint": self.endpoint,
|
||||
"prefix": self.prefix_in_bucket,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
RemoteStorage = Union[LocalFsStorage, S3Storage]
|
||||
|
||||
|
||||
# serialize as toml inline table
|
||||
def remote_storage_to_toml_inline_table(remote_storage: RemoteStorage) -> str:
|
||||
if isinstance(remote_storage, LocalFsStorage):
|
||||
remote_storage_config = f"local_path='{remote_storage.root}'"
|
||||
elif isinstance(remote_storage, S3Storage):
|
||||
remote_storage_config = f"bucket_name='{remote_storage.bucket_name}',\
|
||||
bucket_region='{remote_storage.bucket_region}'"
|
||||
|
||||
if remote_storage.prefix_in_bucket is not None:
|
||||
remote_storage_config += f",prefix_in_bucket='{remote_storage.prefix_in_bucket}'"
|
||||
|
||||
if remote_storage.endpoint is not None:
|
||||
remote_storage_config += f",endpoint='{remote_storage.endpoint}'"
|
||||
else:
|
||||
if not isinstance(remote_storage, (LocalFsStorage, S3Storage)):
|
||||
raise Exception("invalid remote storage type")
|
||||
|
||||
return f"{{{remote_storage_config}}}"
|
||||
|
||||
|
||||
class RemoteStorageUsers(enum.Flag):
|
||||
PAGESERVER = enum.auto()
|
||||
SAFEKEEPER = enum.auto()
|
||||
return f"{{{remote_storage.to_toml_inline_table()}}}"
|
||||
|
||||
Reference in New Issue
Block a user