Merge branch 'main' into ps-trace

2026-05-23 16:10:37 +00:00 · 2022-10-05 13:24:06 -04:00
parent 5315614974 b99bed510d
commit d4065f2e85
165 changed files with 5246 additions and 2708 deletions
--- a/test_runner/performance/test_layer_map.py
+++ b/test_runner/performance/test_layer_map.py
@@ -0,0 +1,39 @@
+import time
+
+from fixtures.neon_fixtures import NeonEnvBuilder
+
+
+#
+# Benchmark searching the layer map, when there are a lot of small layer files.
+#
+def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark):
+
+    env = neon_env_builder.init_start()
+    n_iters = 10
+    n_records = 100000
+
+    # We want to have a lot of lot of layer files to exercise the layer map. Make
+    # gc_horizon and checkpoint_distance very small, so that we get a lot of small layer files.
+    tenant, _ = env.neon_cli.create_tenant(
+        conf={
+            "gc_period": "100 m",
+            "gc_horizon": "1048576",
+            "checkpoint_distance": "8192",
+            "compaction_period": "1 s",
+            "compaction_threshold": "1",
+            "compaction_target_size": "8192",
+        }
+    )
+
+    env.neon_cli.create_timeline("test_layer_map", tenant_id=tenant)
+    pg = env.postgres.create_start("test_layer_map", tenant_id=tenant)
+    cur = pg.connect().cursor()
+    cur.execute("create table t(x integer)")
+    for i in range(n_iters):
+        cur.execute(f"insert into t values (generate_series(1,{n_records}))")
+        time.sleep(1)
+
+    cur.execute("vacuum t")
+    with zenbenchmark.record_duration("test_query"):
+        cur.execute("SELECT count(*) from t")
+        assert cur.fetchone() == (n_iters * n_records,)
--- a/test_runner/performance/test_perf_pgbench.py
+++ b/test_runner/performance/test_perf_pgbench.py
@@ -4,7 +4,7 @@ import os
 import timeit
 from datetime import datetime
 from pathlib import Path
-from typing import List
+from typing import Dict, List

 import pytest
 from fixtures.benchmark_fixture import MetricReport, PgBenchInitResult, PgBenchRunResult
@@ -24,14 +24,18 @@ def utc_now_timestamp() -> int:
    return calendar.timegm(datetime.utcnow().utctimetuple())


-def init_pgbench(env: PgCompare, cmdline):
+def init_pgbench(env: PgCompare, cmdline, password: None):
+    environ: Dict[str, str] = {}
+    if password is not None:
+        environ["PGPASSWORD"] = password
+
    # calculate timestamps and durations separately
    # timestamp is intended to be used for linking to grafana and logs
    # duration is actually a metric and uses float instead of int for timestamp
    start_timestamp = utc_now_timestamp()
    t0 = timeit.default_timer()
    with env.record_pageserver_writes("init.pageserver_writes"):
-        out = env.pg_bin.run_capture(cmdline)
+        out = env.pg_bin.run_capture(cmdline, env=environ)
        env.flush()

    duration = timeit.default_timer() - t0
@@ -48,13 +52,15 @@ def init_pgbench(env: PgCompare, cmdline):
    env.zenbenchmark.record_pg_bench_init_result("init", res)


-def run_pgbench(env: PgCompare, prefix: str, cmdline):
+def run_pgbench(env: PgCompare, prefix: str, cmdline, password: None):
+    environ: Dict[str, str] = {}
+    if password is not None:
+        environ["PGPASSWORD"] = password
+
    with env.record_pageserver_writes(f"{prefix}.pageserver_writes"):
        run_start_timestamp = utc_now_timestamp()
        t0 = timeit.default_timer()
-        out = env.pg_bin.run_capture(
-            cmdline,
-        )
+        out = env.pg_bin.run_capture(cmdline, env=environ)
        run_duration = timeit.default_timer() - t0
        run_end_timestamp = utc_now_timestamp()
        env.flush()
@@ -82,11 +88,14 @@ def run_pgbench(env: PgCompare, prefix: str, cmdline):
 def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: PgBenchLoadType):
    env.zenbenchmark.record("scale", scale, "", MetricReport.TEST_PARAM)

+    password = env.pg.default_options.get("password", None)
+    options = "-cstatement_timeout=1h " + env.pg.default_options.get("options", "")
+    # drop password from the connection string by passing password=None and set password separately
+    connstr = env.pg.connstr(password=None, options=options)
+
    if workload_type == PgBenchLoadType.INIT:
        # Run initialize
-        init_pgbench(
-            env, ["pgbench", f"-s{scale}", "-i", env.pg.connstr(options="-cstatement_timeout=1h")]
-        )
+        init_pgbench(env, ["pgbench", f"-s{scale}", "-i", connstr], password=password)

    if workload_type == PgBenchLoadType.SIMPLE_UPDATE:
        # Run simple-update workload
@@ -100,8 +109,9 @@ def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: P
                f"-T{duration}",
                "-P2",
                "--progress-timestamp",
-                env.pg.connstr(),
+                connstr,
            ],
+            password=password,
        )

    if workload_type == PgBenchLoadType.SELECT_ONLY:
@@ -116,8 +126,9 @@ def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: P
                f"-T{duration}",
                "-P2",
                "--progress-timestamp",
-                env.pg.connstr(),
+                connstr,
            ],
+            password=password,
        )

    env.report_size()