From 9b9eff397367b9eca7e050163c0cfa60e77c9946 Mon Sep 17 00:00:00 2001
From: Vlad Lazar <vlalazar.vlad@gmail.com>
Date: Fri, 16 Feb 2024 14:24:52 +0000
Subject: [PATCH] test: add benchmark for tenants with large slrus

The new bencmark generates tenants with around 300 SLRU blocks
of 8KiB each. Both vectored get implementations are benchmarked.
For the vectored implementation validation is disabled.
---
 .../pagebench/test_large_slru_basebackup.py   | 196 ++++++++++++++++++
 ...er_max_throughput_getpage_at_latest_lsn.py | 144 ++++++-------
 test_runner/performance/pageserver/util.py    |  31 ++-
 3 files changed, 293 insertions(+), 78 deletions(-)
 create mode 100644 test_runner/performance/pageserver/pagebench/test_large_slru_basebackup.py

diff --git a/test_runner/performance/pageserver/pagebench/test_large_slru_basebackup.py b/test_runner/performance/pageserver/pagebench/test_large_slru_basebackup.py
new file mode 100644
index 0000000000..1a7e54a652
--- /dev/null
+++ b/test_runner/performance/pageserver/pagebench/test_large_slru_basebackup.py
@@ -0,0 +1,196 @@
+from pathlib import Path
+import asyncio
+import json
+import pytest
+
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, PgBin, wait_for_last_flush_lsn, Endpoint
+from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
+from performance.pageserver.util import (
+    ensure_pageserver_ready_for_benchmarking,
+    setup_pageserver_with_tenants,
+)
+
+from fixtures.utils import get_scale_for_db, humantime_to_ms
+
+
+@pytest.mark.parametrize("duration", [30])
+@pytest.mark.parametrize("pgbench_scale", [get_scale_for_db(200)])
+@pytest.mark.parametrize("n_tenants", [10])
+@pytest.mark.parametrize("get_vectored_impl", ["sequential", "vectored"])
+@pytest.mark.timeout(1000)
+def test_basebackup_with_high_slru_count(
+    neon_env_builder: NeonEnvBuilder,
+    zenbenchmark: NeonBenchmarker,
+    pg_bin: PgBin,
+    get_vectored_impl: str,
+    n_tenants: int,
+    pgbench_scale: int,
+    duration: int,
+):
+    def record(metric, **kwargs):
+        zenbenchmark.record(metric_name=f"pageserver_basebackup.{metric}", **kwargs)
+
+    params: Dict[str, Tuple[Any, Dict[str, Any]]] = {}
+
+    # params from fixtures
+    params.update(
+        {
+            "n_tenants": (n_tenants, {"unit": ""}),
+            "pgbench_scale": (pgbench_scale, {"unit": ""}),
+            "duration": (duration, {"unit": "s"}),
+        }
+    )
+
+    # configure cache sizes like in prod
+    page_cache_size = 16384
+    max_file_descriptors = 500000
+    neon_env_builder.pageserver_config_override = (
+        f"page_cache_size={page_cache_size}; max_file_descriptors={max_file_descriptors}; "
+        f"get_vectored_impl='{get_vectored_impl}'; validate_vectored_get=false"
+    )
+    params.update(
+        {
+            "pageserver_config_override.page_cache_size": (
+                page_cache_size * 8192,
+                {"unit": "byte"},
+            ),
+            "pageserver_config_override.max_file_descriptors": (max_file_descriptors, {"unit": ""}),
+        }
+    )
+
+    for param, (value, kwargs) in params.items():
+        record(param, metric_value=value, report=MetricReport.TEST_PARAM, **kwargs)
+
+    n_txns = 100000
+
+    def setup_wrapper(env: NeonEnv):
+        return setup_tenant_template(env, n_txns)
+
+    env = setup_pageserver_with_tenants(
+        neon_env_builder, f"large_slru_count-{n_tenants}-{n_txns}", n_tenants, setup_wrapper
+    )
+    run_benchmark(env, pg_bin, record, duration)
+
+
+def setup_tenant_template(env: NeonEnv, n_txns: int):
+    config = {
+        "gc_period": "0s",  # disable periodic gc
+        "checkpoint_timeout": "10 years",
+        "compaction_period": "0s",  # disable periodic compaction
+        "compaction_threshold": 10,
+        "compaction_target_size": 134217728,
+        "checkpoint_distance": 268435456,
+        "image_creation_threshold": 3,
+    }
+
+    template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
+    env.pageserver.tenant_detach(template_tenant)
+    env.pageserver.allowed_errors.append(
+        # tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely
+        ".*Dropped remote consistent LSN updates.*",
+    )
+    env.pageserver.tenant_attach(template_tenant, config)
+
+    ps_http = env.pageserver.http_client()
+
+    with env.endpoints.create_start(
+        "main", tenant_id=template_tenant, config_lines=["shared_buffers=1MB"]
+    ) as ep:
+        rels = 10
+
+        with ep.cursor() as cur:
+            asyncio.run(run_updates(ep, n_txns, rels))
+
+        wait_for_last_flush_lsn(env, ep, template_tenant, template_timeline)
+        ps_http.timeline_checkpoint(template_tenant, template_timeline)
+        ps_http.timeline_compact(template_tenant, template_timeline)
+
+    return (template_tenant, template_timeline, config)
+
+
+# Takes about 5 minutes and produces tenants with around 300 SLRU blocks
+# of 8 KiB each.
+async def run_updates(ep: Endpoint, n_txns: int, workers_count: int):
+    workers = []
+    for i in range(workers_count):
+        workers.append(asyncio.create_task(run_update_loop_worker(ep, n_txns, i)))
+
+    await asyncio.gather(*workers)
+
+
+async def run_update_loop_worker(ep: Endpoint, n_txns: int, idx: int):
+    table = f"t_{idx}"
+    conn = await ep.connect_async()
+    await conn.execute(f"CREATE TABLE {table} (pk integer PRIMARY KEY, x integer)")
+    await conn.execute(f"ALTER TABLE {table} SET (autovacuum_enabled = false)")
+    await conn.execute(f"INSERT INTO {table} VALUES (1, 0)")
+    await conn.execute(
+        """
+         CREATE PROCEDURE updating{0}() as
+         $$
+             DECLARE
+             i integer;
+             BEGIN
+             FOR i IN 1..{1} LOOP
+                 UPDATE {0} SET x = x + 1 WHERE pk=1;
+                 COMMIT;
+             END LOOP;
+             END
+         $$ LANGUAGE plpgsql
+         """.format(table, n_txns)
+    )
+    await conn.execute("SET statement_timeout=0")
+    await conn.execute(f"call updating{table}()")
+
+
+def run_benchmark(env: NeonEnv, pg_bin: PgBin, record, duration_secs: int):
+    ps_http = env.pageserver.http_client()
+    cmd = [
+        str(env.neon_binpath / "pagebench"),
+        "basebackup",
+        "--mgmt-api-endpoint",
+        ps_http.base_url,
+        "--page-service-connstring",
+        env.pageserver.connstr(password=None),
+        "--gzip-probability",
+        "1",
+        "--runtime",
+        f"{duration_secs}s",
+        # don't specify the targets explicitly, let pagebench auto-discover them
+    ]
+
+    log.info(f"command: {' '.join(cmd)}")
+    basepath = pg_bin.run_capture(cmd, with_command_header=False)
+    results_path = Path(basepath + ".stdout")
+    log.info(f"Benchmark results at: {results_path}")
+
+    with open(results_path, "r") as f:
+        results = json.load(f)
+    log.info(f"Results:\n{json.dumps(results, sort_keys=True, indent=2)}")
+
+    total = results["total"]
+    metric = "request_count"
+    record(
+        metric,
+        metric_value=total[metric],
+        unit="",
+        report=MetricReport.HIGHER_IS_BETTER,
+    )
+
+    metric = "latency_mean"
+    record(
+        metric,
+        metric_value=humantime_to_ms(total[metric]),
+        unit="ms",
+        report=MetricReport.LOWER_IS_BETTER,
+    )
+
+    metric = "latency_percentiles"
+    for k, v in total[metric].items():
+        record(
+            f"{metric}.{k}",
+            metric_value=humantime_to_ms(v),
+            unit="ms",
+            report=MetricReport.LOWER_IS_BETTER,
+        )
diff --git a/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py b/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
index 1ed7e577b9..f8125920c8 100644
--- a/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
+++ b/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
@@ -14,7 +14,10 @@ from fixtures.neon_fixtures import (
 )
 from fixtures.utils import get_scale_for_db, humantime_to_ms
 
-from performance.pageserver.util import ensure_pageserver_ready_for_benchmarking
+from performance.pageserver.util import (
+    ensure_pageserver_ready_for_benchmarking,
+    setup_pageserver_with_tenants,
+)
 
 
 # For reference, the space usage of the snapshots:
@@ -75,10 +78,72 @@ def test_pageserver_max_throughput_getpage_at_latest_lsn(
 
     for param, (value, kwargs) in params.items():
         record(param, metric_value=value, report=MetricReport.TEST_PARAM, **kwargs)
-    env = setup_pageserver_with_pgbench_tenants(neon_env_builder, pg_bin, n_tenants, pgbench_scale)
+
+    def setup_wrapper(env: NeonEnv):
+        return setup_tenant_template(env, pg_bin, pgbench_scale)
+
+    env = setup_pageserver_with_tenants(
+        neon_env_builder,
+        f"max_throughput_latest_lsn-{n_tenants}-{pgbench_scale}",
+        n_tenants,
+        setup_wrapper,
+    )
     run_benchmark_max_throughput_latest_lsn(env, pg_bin, record, duration)
 
 
+def setup_tenant_template(env: NeonEnv, pg_bin: PgBin, scale: int):
+    # use a config that makes production of on-disk state timing-insensitive
+    # as we ingest data into the tenant.
+    config = {
+        "gc_period": "0s",  # disable periodic gc
+        "checkpoint_timeout": "10 years",
+        "compaction_period": "0s",  # disable periodic compaction
+        "compaction_threshold": 10,
+        "compaction_target_size": 134217728,
+        "checkpoint_distance": 268435456,
+        "image_creation_threshold": 3,
+    }
+    template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
+    env.pageserver.tenant_detach(template_tenant)
+    env.pageserver.allowed_errors.append(
+        # tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely
+        ".*Dropped remote consistent LSN updates.*",
+    )
+    env.pageserver.tenant_attach(template_tenant, config)
+    ps_http = env.pageserver.http_client()
+    with env.endpoints.create_start("main", tenant_id=template_tenant) as ep:
+        pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", "-I", "dtGvp", ep.connstr()])
+        wait_for_last_flush_lsn(env, ep, template_tenant, template_timeline)
+        ps_http.timeline_checkpoint(template_tenant, template_timeline)
+        ps_http.timeline_compact(template_tenant, template_timeline)
+        for _ in range(
+            0, 17
+        ):  # some prime number to avoid potential resonances with the "_threshold" variables from the config
+            # the L0s produced by this appear to have size ~5MiB
+            num_txns = 10_000
+            pg_bin.run_capture(
+                ["pgbench", "-N", "-c1", "--transactions", f"{num_txns}", ep.connstr()]
+            )
+            wait_for_last_flush_lsn(env, ep, template_tenant, template_timeline)
+            ps_http.timeline_checkpoint(template_tenant, template_timeline)
+            ps_http.timeline_compact(template_tenant, template_timeline)
+    # for reference, the output at scale=6 looked like so (306M total)
+    # ls -sh test_output/shared-snapshots/max_throughput_latest_lsn-2-6/snapshot/pageserver_1/tenants/35c30b88ea16a7a09f82d9c6a115551b/timelines/da902b378eebe83dc8a4e81cd3dc1c59
+    # total 306M
+    # 188M 000000000000000000000000000000000000-030000000000000000000000000000000003__000000000149F060-0000000009E75829
+    # 4.5M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000009E75829-000000000A21E919
+    #  33M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000000A21E919-000000000C20CB71
+    #  36M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000000C20CB71-000000000E470791
+    #  16M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000000E470791-000000000F34AEF1
+    # 8.2M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000000F34AEF1-000000000FABA8A9
+    # 6.0M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000000FABA8A9-000000000FFE0639
+    # 6.1M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000000FFE0639-000000001051D799
+    # 4.7M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000001051D799-0000000010908F19
+    # 4.6M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000010908F19-0000000010CD3021
+
+    return (template_tenant, template_timeline, config)
+
+
 def run_benchmark_max_throughput_latest_lsn(
     env: NeonEnv, pg_bin: PgBin, record, duration_secs: int
 ):
@@ -133,78 +198,3 @@ def run_benchmark_max_throughput_latest_lsn(
             unit="ms",
             report=MetricReport.LOWER_IS_BETTER,
         )
-
-
-def setup_pageserver_with_pgbench_tenants(
-    neon_env_builder: NeonEnvBuilder,
-    pg_bin: PgBin,
-    n_tenants: int,
-    scale: int,
-) -> NeonEnv:
-    """
-    Utility function to set up a pageserver with a given number of identical tenants.
-    Each tenant is a pgbench tenant, initialize to a certain scale, and treated afterwards
-    with a repeat application of (pgbench simple-update workload, checkpoint, compact).
-    """
-
-    def setup_template(env: NeonEnv):
-        # use a config that makes production of on-disk state timing-insensitive
-        # as we ingest data into the tenant.
-        config = {
-            "gc_period": "0s",  # disable periodic gc
-            "checkpoint_timeout": "10 years",
-            "compaction_period": "0s",  # disable periodic compaction
-            "compaction_threshold": 10,
-            "compaction_target_size": 134217728,
-            "checkpoint_distance": 268435456,
-            "image_creation_threshold": 3,
-        }
-        template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
-        env.pageserver.tenant_detach(template_tenant)
-        env.pageserver.allowed_errors.append(
-            # tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely
-            ".*Dropped remote consistent LSN updates.*",
-        )
-        env.pageserver.tenant_attach(template_tenant, config)
-        ps_http = env.pageserver.http_client()
-        with env.endpoints.create_start("main", tenant_id=template_tenant) as ep:
-            pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", "-I", "dtGvp", ep.connstr()])
-            wait_for_last_flush_lsn(env, ep, template_tenant, template_timeline)
-            ps_http.timeline_checkpoint(template_tenant, template_timeline)
-            ps_http.timeline_compact(template_tenant, template_timeline)
-            for _ in range(
-                0, 17
-            ):  # some prime number to avoid potential resonances with the "_threshold" variables from the config
-                # the L0s produced by this appear to have size ~5MiB
-                num_txns = 10_000
-                pg_bin.run_capture(
-                    ["pgbench", "-N", "-c1", "--transactions", f"{num_txns}", ep.connstr()]
-                )
-                wait_for_last_flush_lsn(env, ep, template_tenant, template_timeline)
-                ps_http.timeline_checkpoint(template_tenant, template_timeline)
-                ps_http.timeline_compact(template_tenant, template_timeline)
-        # for reference, the output at scale=6 looked like so (306M total)
-        # ls -sh test_output/shared-snapshots/max_throughput_latest_lsn-2-6/snapshot/pageserver_1/tenants/35c30b88ea16a7a09f82d9c6a115551b/timelines/da902b378eebe83dc8a4e81cd3dc1c59
-        # total 306M
-        # 188M 000000000000000000000000000000000000-030000000000000000000000000000000003__000000000149F060-0000000009E75829
-        # 4.5M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000009E75829-000000000A21E919
-        #  33M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000000A21E919-000000000C20CB71
-        #  36M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000000C20CB71-000000000E470791
-        #  16M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000000E470791-000000000F34AEF1
-        # 8.2M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000000F34AEF1-000000000FABA8A9
-        # 6.0M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000000FABA8A9-000000000FFE0639
-        # 6.1M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000000FFE0639-000000001051D799
-        # 4.7M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000000001051D799-0000000010908F19
-        # 4.6M 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000010908F19-0000000010CD3021
-
-        return (template_tenant, template_timeline, config)
-
-    def doit(neon_env_builder: NeonEnvBuilder) -> NeonEnv:
-        return many_tenants.single_timeline(neon_env_builder, setup_template, n_tenants)
-
-    env = neon_env_builder.build_and_use_snapshot(
-        f"max_throughput_latest_lsn-{n_tenants}-{scale}", doit
-    )
-    env.start()
-    ensure_pageserver_ready_for_benchmarking(env, n_tenants)
-    return env
diff --git a/test_runner/performance/pageserver/util.py b/test_runner/performance/pageserver/util.py
index 45eb652362..590f759ffc 100644
--- a/test_runner/performance/pageserver/util.py
+++ b/test_runner/performance/pageserver/util.py
@@ -2,9 +2,17 @@
 Utilities used by all code in this sub-directory
 """
 
+from typing import Any, Callable, Dict, Tuple
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnv
+from fixtures.neon_fixtures import (
+    NeonEnv,
+    NeonEnvBuilder,
+    PgBin,
+    wait_for_last_flush_lsn,
+)
+import fixtures.pageserver.many_tenants as many_tenants
 from fixtures.pageserver.utils import wait_until_all_tenants_state
+from fixtures.types import TenantId, TimelineId
 
 
 def ensure_pageserver_ready_for_benchmarking(env: NeonEnv, n_tenants: int):
@@ -27,3 +35,24 @@ def ensure_pageserver_ready_for_benchmarking(env: NeonEnv, n_tenants: int):
                 assert not layer.remote
 
     log.info("ready")
+
+
+def setup_pageserver_with_tenants(
+    neon_env_builder: NeonEnvBuilder,
+    name: str,
+    n_tenants: int,
+    setup: Callable[[NeonEnv], Tuple[TenantId, TimelineId, Dict[str, Any]]],
+) -> NeonEnv:
+    """
+    Utility function to set up a pageserver with a given number of identical tenants.
+    Each tenant is a pgbench tenant, initialize to a certain scale, and treated afterwards
+    with a repeat application of (pgbench simple-update workload, checkpoint, compact).
+    """
+
+    def doit(neon_env_builder: NeonEnvBuilder) -> NeonEnv:
+        return many_tenants.single_timeline(neon_env_builder, setup, n_tenants)
+
+    env = neon_env_builder.build_and_use_snapshot(name, doit)
+    env.start()
+    ensure_pageserver_ready_for_benchmarking(env, n_tenants)
+    return env