Files
neon/test_runner/performance/test_gc_feedback.py
Alex Chi Z 9b98823d61 bottom-most-compaction: use in test_gc_feedback + fix bugs (#8103)
Adds manual compaction trigger; add gc compaction to test_gc_feedback

Part of https://github.com/neondatabase/neon/issues/8002

```
test_gc_feedback[debug-pg15].logical_size: 50 Mb
test_gc_feedback[debug-pg15].physical_size: 2269 Mb
test_gc_feedback[debug-pg15].physical/logical ratio: 44.5302 
test_gc_feedback[debug-pg15].max_total_num_of_deltas: 7 
test_gc_feedback[debug-pg15].max_num_of_deltas_above_image: 2 
test_gc_feedback[debug-pg15].logical_size_after_bottom_most_compaction: 50 Mb
test_gc_feedback[debug-pg15].physical_size_after_bottom_most_compaction: 287 Mb
test_gc_feedback[debug-pg15].physical/logical ratio after bottom_most_compaction: 5.6312 
test_gc_feedback[debug-pg15].max_total_num_of_deltas_after_bottom_most_compaction: 4 
test_gc_feedback[debug-pg15].max_num_of_deltas_above_image_after_bottom_most_compaction: 1
```

## Summary of changes

* Add the manual compaction trigger
* Use in test_gc_feedback
* Add a guard to avoid running it with retain_lsns
* Fix: Do `schedule_compaction_update` after compaction
* Fix: Supply deltas in the correct order to reconstruct value

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
2024-06-25 23:00:14 +00:00

152 lines
6.2 KiB
Python

import json
import pytest
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
@pytest.mark.timeout(10000)
def test_gc_feedback(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):
"""
Test that GC is able to collect all old layers even if them are forming
"stairs" and there are not three delta layers since last image layer.
Information about image layers needed to collect old layers should
be propagated by GC to compaction task which should take in in account
when make a decision which new image layers needs to be created.
NB: this test demonstrates the problem. The source tree contained the
`gc_feedback` mechanism for about 9 months, but, there were problems
with it and it wasn't enabled at runtime.
This PR removed the code: https://github.com/neondatabase/neon/pull/6863
"""
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
tenant_id, _ = env.neon_cli.create_tenant(
conf={
# disable default GC and compaction
"gc_period": "1000 m",
"compaction_period": "0 s",
"gc_horizon": f"{1024 ** 2}",
"checkpoint_distance": f"{1024 ** 2}",
"compaction_target_size": f"{1024 ** 2}",
# set PITR interval to be small, so we can do GC
"pitr_interval": "60 s",
# "compaction_threshold": "3",
# "image_creation_threshold": "2",
}
)
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
n_steps = 10
n_update_iters = 100
step_size = 10000
with endpoint.cursor() as cur:
cur.execute("SET statement_timeout='1000s'")
cur.execute(
"CREATE TABLE t(step bigint, count bigint default 0, payload text default repeat(' ', 100)) with (fillfactor=50)"
)
cur.execute("CREATE INDEX ON t(step)")
# In each step, we insert 'step_size' new rows, and update the newly inserted rows
# 'n_update_iters' times. This creates a lot of churn and generates lots of WAL at the end of the table,
# without modifying the earlier parts of the table.
for step in range(n_steps):
cur.execute(f"INSERT INTO t (step) SELECT {step} FROM generate_series(1, {step_size})")
for _ in range(n_update_iters):
cur.execute(f"UPDATE t set count=count+1 where step = {step}")
cur.execute("vacuum t")
# cur.execute("select pg_table_size('t')")
# logical_size = cur.fetchone()[0]
logical_size = client.timeline_detail(tenant_id, timeline_id)["current_logical_size"]
log.info(f"Logical storage size {logical_size}")
client.timeline_checkpoint(tenant_id, timeline_id)
# Do compaction and GC
client.timeline_gc(tenant_id, timeline_id, 0)
client.timeline_compact(tenant_id, timeline_id)
# One more iteration to check that no excessive image layers are generated
client.timeline_gc(tenant_id, timeline_id, 0)
client.timeline_compact(tenant_id, timeline_id)
physical_size = client.timeline_detail(tenant_id, timeline_id)["current_physical_size"]
log.info(f"Physical storage size {physical_size}")
max_num_of_deltas_above_image = 0
max_total_num_of_deltas = 0
for key_range in client.perf_info(tenant_id, timeline_id):
max_total_num_of_deltas = max(max_total_num_of_deltas, key_range["total_num_of_deltas"])
max_num_of_deltas_above_image = max(
max_num_of_deltas_above_image, key_range["num_of_deltas_above_image"]
)
MB = 1024 * 1024
zenbenchmark.record("logical_size", logical_size // MB, "Mb", MetricReport.LOWER_IS_BETTER)
zenbenchmark.record("physical_size", physical_size // MB, "Mb", MetricReport.LOWER_IS_BETTER)
zenbenchmark.record(
"physical/logical ratio", physical_size / logical_size, "", MetricReport.LOWER_IS_BETTER
)
zenbenchmark.record(
"max_total_num_of_deltas", max_total_num_of_deltas, "", MetricReport.LOWER_IS_BETTER
)
zenbenchmark.record(
"max_num_of_deltas_above_image",
max_num_of_deltas_above_image,
"",
MetricReport.LOWER_IS_BETTER,
)
client.timeline_compact(tenant_id, timeline_id, enhanced_gc_bottom_most_compaction=True)
tline_detail = client.timeline_detail(tenant_id, timeline_id)
logical_size = tline_detail["current_logical_size"]
physical_size = tline_detail["current_physical_size"]
max_num_of_deltas_above_image = 0
max_total_num_of_deltas = 0
for key_range in client.perf_info(tenant_id, timeline_id):
max_total_num_of_deltas = max(max_total_num_of_deltas, key_range["total_num_of_deltas"])
max_num_of_deltas_above_image = max(
max_num_of_deltas_above_image, key_range["num_of_deltas_above_image"]
)
zenbenchmark.record(
"logical_size_after_bottom_most_compaction",
logical_size // MB,
"Mb",
MetricReport.LOWER_IS_BETTER,
)
zenbenchmark.record(
"physical_size_after_bottom_most_compaction",
physical_size // MB,
"Mb",
MetricReport.LOWER_IS_BETTER,
)
zenbenchmark.record(
"physical/logical ratio after bottom_most_compaction",
physical_size / logical_size,
"",
MetricReport.LOWER_IS_BETTER,
)
zenbenchmark.record(
"max_total_num_of_deltas_after_bottom_most_compaction",
max_total_num_of_deltas,
"",
MetricReport.LOWER_IS_BETTER,
)
zenbenchmark.record(
"max_num_of_deltas_above_image_after_bottom_most_compaction",
max_num_of_deltas_above_image,
"",
MetricReport.LOWER_IS_BETTER,
)
with endpoint.cursor() as cur:
cur.execute("SELECT * FROM t") # ensure data is not corrupted
layer_map_path = env.repo_dir / "layer-map.json"
log.info(f"Writing layer map to {layer_map_path}")
with layer_map_path.open("w") as f:
f.write(json.dumps(client.timeline_layer_map_info(tenant_id, timeline_id)))