mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-02 04:50:38 +00:00
it repros
This commit is contained in:
@@ -1,19 +1,13 @@
|
||||
# NB: there are benchmarks that double-serve as tests inside the `performance` directory.
|
||||
|
||||
import threading
|
||||
import time
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import requests.exceptions
|
||||
|
||||
import fixtures
|
||||
from fixtures.common_types import NodeId
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, StorageControllerApiException
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
def test_slow_flush(neon_env_builder: NeonEnvBuilder):
|
||||
tablesize_mib = 500
|
||||
|
||||
def test_slow_flush(neon_env_builder: NeonEnvBuilder, neon_binpath: Path):
|
||||
def patch_pageserver_toml(config):
|
||||
config["page_service_pipelining"] = {
|
||||
"mode": "pipelined",
|
||||
@@ -22,84 +16,30 @@ def test_slow_flush(neon_env_builder: NeonEnvBuilder):
|
||||
}
|
||||
|
||||
neon_env_builder.pageserver_config_override = patch_pageserver_toml
|
||||
neon_env_builder.num_pageservers = 2
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
ep = env.endpoints.create_start(
|
||||
"main",
|
||||
config_lines=[
|
||||
"max_parallel_workers_per_gather=0", # disable parallel backends
|
||||
"effective_io_concurrency=100", # give plenty of opportunity for pipelining
|
||||
"neon.readahead_buffer_size=128", # this is the default value at time of writing
|
||||
"shared_buffers=128MB", # keep lower than tablesize_mib
|
||||
# debug
|
||||
"log_statement=all",
|
||||
log.info("make flush appear slow")
|
||||
ps_http = env.pageserver.http_client()
|
||||
ps_http.configure_failpoints(("page_service:flush:pre", "return(10000000)"))
|
||||
|
||||
log.info("filling pipe")
|
||||
child = subprocess.Popen(
|
||||
[
|
||||
neon_binpath / "test_helper_slow_client_reads",
|
||||
env.pageserver.connstr(),
|
||||
str(env.initial_tenant),
|
||||
str(env.initial_timeline),
|
||||
],
|
||||
bufsize=0, # unbuffered
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
)
|
||||
buf = child.stdout.read(1)
|
||||
if len(buf) != 1:
|
||||
raise Exception("unexpected EOF")
|
||||
if buf != b"R":
|
||||
raise Exception(f"unexpected data: {buf!r}")
|
||||
log.info("helper reports pipe filled")
|
||||
|
||||
conn = ep.connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("CREATE EXTENSION IF NOT EXISTS neon;")
|
||||
cur.execute("CREATE EXTENSION IF NOT EXISTS neon_test_utils;")
|
||||
|
||||
log.info("Filling the table")
|
||||
cur.execute("CREATE TABLE t (data char(1000)) with (fillfactor=10)")
|
||||
tablesize = tablesize_mib * 1024 * 1024
|
||||
npages = tablesize // (8 * 1024)
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1, %s)", (npages,))
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
def workload(stop: threading.Event, max_iters=None):
|
||||
iters = 0
|
||||
while stop.is_set() is False and (max_iters == None or iters < max_iters):
|
||||
log.info("Seqscan %d", iters)
|
||||
conn = ep.connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"select clear_buffer_cache()"
|
||||
) # TODO: what about LFC? doesn't matter right now because LFC isn't enabled by default in tests
|
||||
cur.execute("select sum(data::bigint) from t")
|
||||
assert cur.fetchall()[0][0] == npages * (npages + 1) // 2
|
||||
iters += 1
|
||||
log.info("workload done")
|
||||
|
||||
stop = threading.Event()
|
||||
|
||||
log.info("calibrating workload duration")
|
||||
workload(stop, 1)
|
||||
before = time.time()
|
||||
workload(stop, 1)
|
||||
after = time.time()
|
||||
duration = after - before
|
||||
log.info("duration: %f", duration)
|
||||
assert(duration > 3)
|
||||
|
||||
log.info("begin")
|
||||
threading.Thread(target=workload, args=[stop]).start()
|
||||
|
||||
# make flush appear slow
|
||||
ps_http = [p.http_client() for p in env.pageservers]
|
||||
ps_http[0].configure_failpoints(("page_service:flush:pre", "return(10000000)"))
|
||||
ps_http[1].configure_failpoints(("page_service:flush:pre", "return(10000000)"))
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
# try to shut down the tenant
|
||||
for i in range(1, 10):
|
||||
log.info(f"start migration {i}")
|
||||
try:
|
||||
env.storage_controller.tenant_shard_migrate(env.initial_tenant, (i % 2)+1)
|
||||
except StorageControllerApiException as e:
|
||||
log.info(f"shard migrate request failed: {e}")
|
||||
while True:
|
||||
node_id = NodeId(env.storage_controller.tenant_describe(env.initial_tenant)["node_id"])
|
||||
if node_id == NodeId(i % 2)+1:
|
||||
break
|
||||
log.info(f"waiting for migration to complete")
|
||||
time.sleep(1)
|
||||
log.info(f"migration done")
|
||||
time.sleep(1)
|
||||
|
||||
log.info("try to shut down the tenant")
|
||||
env.pageserver.tenant_detach(env.initial_tenant)
|
||||
|
||||
Reference in New Issue
Block a user