mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
This PR commits the benchmarks I ran to qualify concurrent IO before we
released it.
Changes:
- Add `l0stack` fixture; a reusable abstraction for creating a stack of
L0 deltas
each of which has 1 Value::Delta per page.
- Such a stack of L0 deltas is a good and understandable demo for
concurrent IO
because to reconstruct any page, $layer_stack_height` Values need to be
read.
Before concurrent IO, the reads were sequential.
With concurrent IO, they are executed concurrently.
- So, switch `test_latency` to use the l0stack.
- Teach `pagebench`, which is used by `test_latency`, to limit itself to
the blocks of the relation created by the l0stack abstraction.
- Additional parametrization of `test_latency` over dimensions
`ps_io_concurrency,l0_stack_height,queue_depth`
- Use better names for the tests to reflect what they do, leave
interpretation of the (now quite high-dimensional) results to the reader
- `test_{throughput => postgres_seqscan}`
- `test_{latency => random_reads}`
- Cut down on permutations to those we use in production. Runtime is
about 2min.
Refs
- concurrent IO epic https://github.com/neondatabase/neon/issues/9378
- batching task: fixes https://github.com/neondatabase/neon/issues/9837
---------
Co-authored-by: Peter Bendel <peterbendel@neon.tech>
149 lines
5.6 KiB
Python
149 lines
5.6 KiB
Python
from dataclasses import dataclass
|
|
|
|
from psycopg2.extensions import connection as PgConnection
|
|
|
|
from fixtures.common_types import Lsn, TenantShardId, TimelineId
|
|
from fixtures.log_helper import log
|
|
from fixtures.neon_fixtures import Endpoint
|
|
from fixtures.pageserver.http import PageserverHttpClient
|
|
from fixtures.pageserver.utils import wait_for_last_record_lsn
|
|
|
|
|
|
@dataclass
|
|
class L0StackShape:
|
|
logical_table_size_mib: int = 50
|
|
delta_stack_height: int = 20
|
|
|
|
|
|
def make_l0_stack(endpoint: Endpoint, shape: L0StackShape):
|
|
"""
|
|
Creates stack of L0 deltas each of which should have 1 Value::Delta per page in table `data`.
|
|
"""
|
|
env = endpoint.env
|
|
|
|
# TDOO: wait for storcon to finish any reonciles before jumping to action here?
|
|
description = env.storage_controller.tenant_describe(endpoint.tenant_id)
|
|
shards = description["shards"]
|
|
assert len(shards) == 1, "does not support sharding"
|
|
tenant_shard_id = TenantShardId.parse(shards[0]["tenant_shard_id"])
|
|
|
|
endpoint.config(["full_page_writes=off"])
|
|
endpoint.reconfigure()
|
|
|
|
ps = env.get_pageserver(shards[0]["node_attached"])
|
|
|
|
timeline_id = endpoint.show_timeline_id()
|
|
|
|
vps_http = env.storage_controller.pageserver_api()
|
|
ps_http = ps.http_client()
|
|
endpoint_conn = endpoint.connect()
|
|
make_l0_stack_standalone(vps_http, ps_http, tenant_shard_id, timeline_id, endpoint_conn, shape)
|
|
|
|
|
|
def make_l0_stack_standalone(
|
|
vps_http: PageserverHttpClient,
|
|
ps_http: PageserverHttpClient,
|
|
tenant_shard_id: TenantShardId,
|
|
timeline_id: TimelineId,
|
|
endpoint_conn: PgConnection,
|
|
shape: L0StackShape,
|
|
):
|
|
"""
|
|
See make_l0_stack for details.
|
|
|
|
This function is a standalone version of make_l0_stack, usable from not-test code.
|
|
"""
|
|
|
|
assert not tenant_shard_id.shard_index.is_sharded, (
|
|
"the current implementation only supports unsharded tenants"
|
|
)
|
|
|
|
tenant_id = tenant_shard_id.tenant_id
|
|
conn = endpoint_conn
|
|
desired_size = shape.logical_table_size_mib * 1024 * 1024
|
|
|
|
config = {
|
|
"gc_period": "0s", # disable periodic gc
|
|
"checkpoint_timeout": "10 years",
|
|
"compaction_period": "1h", # doesn't matter, but 0 value will kill walredo every 10s
|
|
"compaction_threshold": 100000, # we just want L0s
|
|
"compaction_target_size": 134217728,
|
|
"checkpoint_distance": 268435456,
|
|
"image_creation_threshold": 100000, # we just want L0s
|
|
}
|
|
|
|
vps_http.set_tenant_config(tenant_id, config)
|
|
|
|
conn.autocommit = True
|
|
cur = conn.cursor()
|
|
|
|
# Ensure full_page_writes are disabled so that all Value::Delta in
|
|
# pageserver are !will_init, and therefore a getpage needs to read
|
|
# the entire delta stack.
|
|
cur.execute("SHOW full_page_writes")
|
|
assert cur.fetchall()[0][0] == "off", "full_page_writes should be off"
|
|
|
|
# each tuple is 23 (header) + 100 bytes = 123 bytes
|
|
# page header si 24 bytes
|
|
# 8k page size
|
|
# (8k-24bytes) / 123 bytes = 63 tuples per page
|
|
# set fillfactor to 10 to have 6 tuples per page
|
|
cur.execute("DROP TABLE IF EXISTS data")
|
|
cur.execute("CREATE TABLE data(id bigint, row char(92)) with (fillfactor=10)")
|
|
need_pages = desired_size // 8192
|
|
need_rows = need_pages * 6
|
|
log.info(f"Need {need_pages} pages, {need_rows} rows")
|
|
cur.execute(f"INSERT INTO data SELECT i,'row'||i FROM generate_series(1, {need_rows}) as i")
|
|
# Raise fillfactor to 100% so that all updates are HOT updates.
|
|
# We assert they're hot updates by checking fetch_id_to_page_mapping remains the same.
|
|
cur.execute("ALTER TABLE data SET (fillfactor=100)")
|
|
|
|
def settle_and_flush():
|
|
cur.execute("SELECT pg_current_wal_flush_lsn()")
|
|
flush_lsn = Lsn(cur.fetchall()[0][0])
|
|
wait_for_last_record_lsn(ps_http, tenant_shard_id, timeline_id, flush_lsn)
|
|
ps_http.timeline_checkpoint(tenant_id, timeline_id)
|
|
|
|
# create an L0 for the initial data we just inserted
|
|
settle_and_flush()
|
|
|
|
# assert we wrote what we think we wrote
|
|
cur.execute("""
|
|
with ntuples_per_page as (
|
|
select (ctid::text::point)[0]::bigint pageno,count(*) ntuples from data group by pageno
|
|
)
|
|
select ntuples, count(*) npages from ntuples_per_page group by ntuples order by ntuples;
|
|
""")
|
|
rows = cur.fetchall()
|
|
log.info(f"initial table layout: {rows}")
|
|
assert len(rows) == 1
|
|
assert rows[0][0] == 6, f"expected 6 tuples per page, got {rows[0][0]}"
|
|
assert rows[0][1] == need_pages, f"expected {need_pages} pages, got {rows[0][1]}"
|
|
|
|
def fetch_id_to_page_mapping():
|
|
cur.execute("""
|
|
SELECT id,(ctid::text::point)[0]::bigint pageno FROM data ORDER BY id
|
|
""")
|
|
return cur.fetchall()
|
|
|
|
initial_mapping = fetch_id_to_page_mapping()
|
|
|
|
# every iteration updates one tuple in each page
|
|
delta_stack_height = shape.delta_stack_height
|
|
for i in range(0, delta_stack_height):
|
|
log.info(i)
|
|
cur.execute(f"UPDATE data set row = row||',u' where id % 6 = {i % 6}")
|
|
log.info(f"modified rows: {cur.rowcount}")
|
|
assert cur.rowcount == need_pages
|
|
settle_and_flush()
|
|
post_update_mapping = fetch_id_to_page_mapping()
|
|
assert initial_mapping == post_update_mapping, "Postgres should be doing HOT updates"
|
|
|
|
# Assert the layer count is what we expect it is
|
|
layer_map = vps_http.layer_map_info(tenant_id, timeline_id)
|
|
assert (
|
|
len(layer_map.delta_l0_layers()) == delta_stack_height + 1 + 1
|
|
) # +1 for the initdb layer + 1 for the table creation & fill
|
|
assert len(layer_map.delta_l0_layers()) == len(layer_map.delta_layers()) # it's all L0s
|
|
assert len(layer_map.image_layers()) == 0 # no images
|