Remove large database files after every test suite (#3090)

Closes https://github.com/neondatabase/neon/issues/1984 
Closes https://github.com/neondatabase/neon/pull/2830

A follow-up of https://github.com/neondatabase/neon/pull/2830, I've
noticed that benchmarks failed again due to out of space issues.

Removes most of the pageserver and safekeeper files from disk after
every pytest suite run.

```
$ poetry run pytest -vvsk "test_tenant_redownloads_truncated_file_on_startup[local_fs]" 
# ...
$ du -h test_output/test_tenant_redownloads_truncated_file_on_startup\[local_fs\]  
# ...
104K    test_output/test_tenant_redownloads_truncated_file_on_startup[local_fs]

$ poetry run pytest -vvsk "test_tenant_redownloads_truncated_file_on_startup[local_fs]" --preserve-database-files
# ...
$ du -h test_output/test_tenant_redownloads_truncated_file_on_startup\[local_fs\]  
# ...
123M    test_output/test_tenant_redownloads_truncated_file_on_startup[local_fs]
```

Co-authored-by: Bojan Serafimov <bojan.serafimov7@gmail.com>
This commit is contained in:
Kirill Bulatov
2022-12-14 15:09:08 +02:00
committed by GitHub
parent d3787f9b47
commit 4d201619ed
4 changed files with 76 additions and 3 deletions

View File

@@ -76,9 +76,15 @@ Format is 2-digit major version nubmer, i.e. `DEFAULT_PG_VERSION="14"`
should go.
`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
`NEON_PAGESERVER_OVERRIDES`: add a `;`-separated set of configs that will be passed as
`--pageserver-config-override=${value}` parameter values when neon_local cli is invoked
`RUST_LOG`: logging configuration to pass into Neon CLI
Useful parameters and commands:
`--pageserver-config-override=${value}` `-c` values to pass into pageserver through neon_local cli
`--preserve-database-files` to preserve pageserver (layer) and safekeer (segment) timeline files on disk
after running a test suite. Such files might be large, so removed by default; but might be useful for debugging or creation of svg images with layer file contents.
Let stdout, stderr and `INFO` log messages go to the terminal instead of capturing them:
`./scripts/pytest -s --log-cli-level=INFO ...`
(Note many tests capture subprocess outputs separately, so this may not

View File

@@ -30,10 +30,17 @@ import psycopg2
import pytest
import requests
from _pytest.config import Config
from _pytest.config.argparsing import Parser
from _pytest.fixtures import FixtureRequest
from fixtures.log_helper import log
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import Fn, allure_attach_from_dir, get_self_dir, subprocess_capture
from fixtures.utils import (
ATTACHMENT_NAME_REGEX,
Fn,
allure_attach_from_dir,
get_self_dir,
subprocess_capture,
)
# Type-related stuff
from psycopg2.extensions import connection as PgConnection
@@ -590,6 +597,7 @@ class NeonEnvBuilder:
auth_enabled: bool = False,
rust_log_override: Optional[str] = None,
default_branch_name: str = DEFAULT_BRANCH_NAME,
preserve_database_files: bool = False,
):
self.repo_dir = repo_dir
self.rust_log_override = rust_log_override
@@ -611,6 +619,7 @@ class NeonEnvBuilder:
self.neon_binpath = neon_binpath
self.pg_distrib_dir = pg_distrib_dir
self.pg_version = pg_version
self.preserve_database_files = preserve_database_files
def init(self) -> NeonEnv:
# Cannot create more than one environment from one builder
@@ -718,6 +727,28 @@ class NeonEnvBuilder:
prefix_in_bucket=self.remote_storage_prefix,
)
def cleanup_local_storage(self):
if self.preserve_database_files:
return
directories_to_clean: List[Path] = []
for test_entry in Path(self.repo_dir).glob("**/*"):
if test_entry.is_file():
test_file = test_entry
if ATTACHMENT_NAME_REGEX.fullmatch(test_file.name):
continue
if SMALL_DB_FILE_NAME_REGEX.fullmatch(test_file.name):
continue
log.debug(f"Removing large database {test_file} file")
test_file.unlink()
elif test_entry.is_dir():
directories_to_clean.append(test_entry)
for directory_to_clean in reversed(directories_to_clean):
if not os.listdir(directory_to_clean):
log.debug(f"Removing empty directory {directory_to_clean}")
directory_to_clean.rmdir()
def cleanup_remote_storage(self):
# here wee check for true remote storage, no the local one
# local cleanup is not needed after test because in ci all env will be destroyed anyway
@@ -783,7 +814,22 @@ class NeonEnvBuilder:
sk.stop(immediate=True)
self.env.pageserver.stop(immediate=True)
self.cleanup_remote_storage()
cleanup_error = None
try:
self.cleanup_remote_storage()
except Exception as e:
log.error(f"Error during remote storage cleanup: {e}")
cleanup_error = e
try:
self.cleanup_local_storage()
except Exception as e:
log.error(f"Error during local storage cleanup: {e}")
if cleanup_error is not None:
cleanup_error = e
if cleanup_error is not None:
raise cleanup_error
self.env.pageserver.assert_no_errors()
@@ -949,6 +995,7 @@ class NeonEnv:
@pytest.fixture(scope=shareable_scope)
def _shared_simple_env(
request: FixtureRequest,
pytestconfig: Config,
port_distributor: PortDistributor,
mock_s3_server: MockS3Server,
default_broker: NeonBroker,
@@ -980,6 +1027,7 @@ def _shared_simple_env(
pg_distrib_dir=pg_distrib_dir,
pg_version=pg_version,
run_id=run_id,
preserve_database_files=pytestconfig.getoption("--preserve-database-files"),
) as builder:
env = builder.init_start()
@@ -1006,6 +1054,7 @@ def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:
@pytest.fixture(scope="function")
def neon_env_builder(
pytestconfig: Config,
test_output_dir: str,
port_distributor: PortDistributor,
mock_s3_server: MockS3Server,
@@ -1041,6 +1090,7 @@ def neon_env_builder(
pg_version=pg_version,
broker=default_broker,
run_id=run_id,
preserve_database_files=pytestconfig.getoption("--preserve-database-files"),
) as builder:
yield builder
@@ -2735,6 +2785,20 @@ def get_test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
return test_dir
def pytest_addoption(parser: Parser):
parser.addoption(
"--preserve-database-files",
action="store_true",
default=False,
help="Preserve timeline files after the test suite is over",
)
SMALL_DB_FILE_NAME_REGEX: re.Pattern = re.compile( # type: ignore[type-arg]
r"config|metadata|.+\.(?:toml|pid|json|sql)"
)
# This is autouse, so the test output directory always gets created, even
# if a test doesn't put anything there. It also solves a problem with the
# neon_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it

View File

@@ -16,6 +16,7 @@ Some handy pytest flags for local development:
- `-s` shows test output
- `-k` selects a test to run
- `--timeout=0` disables our default timeout of 300s (see `setup.cfg`)
- `--cleanup-test-ouput` cleans up after each test
# What performance tests do we have and how we run them

View File

@@ -47,6 +47,7 @@ def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_o
neon_env_builder.pg_version = "14"
neon_env_builder.num_safekeepers = 3
neon_env_builder.enable_local_fs_remote_storage()
neon_env_builder.preserve_database_files = True
env = neon_env_builder.init_start()
pg = env.postgres.create_start("main")
@@ -331,6 +332,7 @@ def check_neon_works(
config.initial_tenant = snapshot_config["default_tenant_id"]
config.neon_binpath = neon_binpath
config.pg_distrib_dir = pg_distrib_dir
config.preserve_database_files = True
cli = NeonCli(config)
cli.raw_cli(["start"])