mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-05 04:22:56 +00:00
Replace the layer array and linear search with R-tree So far, the in-memory layer map that holds information about layer files that exist, has used a simple Vec, in no particular order, to hold information about all the layers. That obviously doesn't scale very well; with thousands of layer files the linear search was consuming a lot of CPU. Replace it with a two-dimensional R-tree, with Key and LSN ranges as the dimensions. For the R-tree, use the 'rstar' crate. To be able to use that, we convert the Keys and LSNs into 256-bit integers. 64 bits would be enough to represent LSNs, and 128 bits would be enough to represent Keys. However, we use 256 bits, because rstar internally performs multiplication to calculate the area of rectangles, and the result of multiplying two 128 bit integers doesn't necessarily fit in 128 bits, causing integer overflow and, if overflow-checks are enabled, panic. To avoid that, we use 256 bit integers. Add a performance test that creates a lot of layer files, to demonstrate the benefit.
40 lines
1.3 KiB
Python
40 lines
1.3 KiB
Python
import time
|
|
|
|
from fixtures.neon_fixtures import NeonEnvBuilder
|
|
|
|
|
|
#
|
|
# Benchmark searching the layer map, when there are a lot of small layer files.
|
|
#
|
|
def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark):
|
|
|
|
env = neon_env_builder.init_start()
|
|
n_iters = 10
|
|
n_records = 100000
|
|
|
|
# We want to have a lot of lot of layer files to exercise the layer map. Make
|
|
# gc_horizon and checkpoint_distance very small, so that we get a lot of small layer files.
|
|
tenant, _ = env.neon_cli.create_tenant(
|
|
conf={
|
|
"gc_period": "100 m",
|
|
"gc_horizon": "1048576",
|
|
"checkpoint_distance": "8192",
|
|
"compaction_period": "1 s",
|
|
"compaction_threshold": "1",
|
|
"compaction_target_size": "8192",
|
|
}
|
|
)
|
|
|
|
env.neon_cli.create_timeline("test_layer_map", tenant_id=tenant)
|
|
pg = env.postgres.create_start("test_layer_map", tenant_id=tenant)
|
|
cur = pg.connect().cursor()
|
|
cur.execute("create table t(x integer)")
|
|
for i in range(n_iters):
|
|
cur.execute(f"insert into t values (generate_series(1,{n_records}))")
|
|
time.sleep(1)
|
|
|
|
cur.execute("vacuum t")
|
|
with zenbenchmark.record_duration("test_query"):
|
|
cur.execute("SELECT count(*) from t")
|
|
assert cur.fetchone() == (n_iters * n_records,)
|