Files
neon/test_runner/performance/test_pageserver_pagebench.py
Christian Schwarz 77efb8b58b many_tenants: fix: waiting for all tenants to become active doesn't work with 20k tenants
While at it, also remove the returning of template_timeline and tenants
from the many_tenants return type. It wasn't correctly rehydrated
anyway.
2024-01-09 16:24:32 +00:00

97 lines
3.3 KiB
Python

import json
from pathlib import Path
from typing import List, Tuple
import pytest
from fixtures.benchmark_fixture import NeonBenchmarker
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
PgBin,
SnapshotDir,
last_flush_lsn_upload,
)
from fixtures.types import TenantId, TimelineId
import fixtures.pageserver.many_tenants
@pytest.fixture(scope="function")
@pytest.mark.timeout(1000)
def getpage_throughput_fixture(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
test_snapshot_dir: SnapshotDir,
) -> fixtures.pageserver.many_tenants.SingleTimeline:
def setup_template(env: NeonEnv):
# create our template tenant
config = {
"gc_period": "0s",
"checkpoint_timeout": "10 years",
"compaction_period": "20 s",
"compaction_threshold": 10,
"compaction_target_size": 134217728,
"checkpoint_distance": 268435456,
"image_creation_threshold": 3,
}
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant)
env.pageserver.tenant_attach(template_tenant, config)
# with env.endpoints.create_start("main", tenant_id=template_tenant) as ep:
# pg_bin.run_capture(["pgbench", "-i", "-s5", ep.connstr()])
# last_flush_lsn_upload(env, ep, template_tenant, template_timeline)
ep = env.endpoints.create_start("main", tenant_id=template_tenant)
ep.safe_psql("create table foo(b text)")
for i in range(0, 8):
ep.safe_psql("insert into foo(b) values ('some text')")
last_flush_lsn_upload(env, ep, template_tenant, template_timeline)
ep.stop_and_destroy()
return (template_tenant, template_timeline, config)
return fixtures.pageserver.many_tenants.single_timeline(
neon_env_builder,
test_snapshot_dir,
setup_template,
20_000,
)
def test_getpage_throughput(
getpage_throughput_fixture: fixtures.pageserver.many_tenants.SingleTimeline,
zenbenchmark: NeonBenchmarker,
pg_bin: PgBin,
):
env = getpage_throughput_fixture.env
ps_http = env.pageserver.http_client()
# run the benchmark with one client per timeline, each doing 10k requests to random keys.
duration = "10s"
cmd = [
str(env.neon_binpath / "pagebench"),
"get-page-latest-lsn",
"--mgmt-api-endpoint",
ps_http.base_url,
"--page-service-connstring",
env.pageserver.connstr(password=None),
"--runtime",
duration,
# "--per-target-rate-limit", "50",
# don't specify the targets, our fixture prepares us exactly 20k tenants,
# and pagebench will auto-discover them
]
log.info(f"command: {' '.join(cmd)}")
basepath = pg_bin.run_capture(cmd, with_command_header=False)
results_path = Path(basepath + ".stdout")
log.info(f"Benchmark results at: {results_path}")
with open(results_path, "r") as f:
results = json.load(f)
log.info(f"Results:\n{json.dumps(results, sort_keys=True, indent=2)}")
env.pageserver.stop(
immediate=True
) # with 20k tenants, we hit neon_local's shutdown timeout of 10 seconds
zenbenchmark.record_pagebench_results("get-page-latest-lsn", results, duration)