mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
This PR commits the benchmarks I ran to qualify concurrent IO before we
released it.
Changes:
- Add `l0stack` fixture; a reusable abstraction for creating a stack of
L0 deltas
each of which has 1 Value::Delta per page.
- Such a stack of L0 deltas is a good and understandable demo for
concurrent IO
because to reconstruct any page, $layer_stack_height` Values need to be
read.
Before concurrent IO, the reads were sequential.
With concurrent IO, they are executed concurrently.
- So, switch `test_latency` to use the l0stack.
- Teach `pagebench`, which is used by `test_latency`, to limit itself to
the blocks of the relation created by the l0stack abstraction.
- Additional parametrization of `test_latency` over dimensions
`ps_io_concurrency,l0_stack_height,queue_depth`
- Use better names for the tests to reflect what they do, leave
interpretation of the (now quite high-dimensional) results to the reader
- `test_{throughput => postgres_seqscan}`
- `test_{latency => random_reads}`
- Cut down on permutations to those we use in production. Runtime is
about 2min.
Refs
- concurrent IO epic https://github.com/neondatabase/neon/issues/9378
- batching task: fixes https://github.com/neondatabase/neon/issues/9837
---------
Co-authored-by: Peter Bendel <peterbendel@neon.tech>
58 lines
1.5 KiB
Python
58 lines
1.5 KiB
Python
# Tool to convert the JSON output from running a perf test with `--out-dir` to a CSV that
|
|
# can be easily pasted into a spreadsheet for quick viz & analysis.
|
|
# Check the `./README.md` in this directory for `--out-dir`.
|
|
#
|
|
# TODO: add the pytest.mark.parametrize to the json and make them columns here
|
|
# https://github.com/neondatabase/neon/issues/11878
|
|
|
|
import csv
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
|
|
def json_to_csv(json_file):
|
|
with open(json_file) as f:
|
|
data = json.load(f)
|
|
|
|
# Collect all possible metric names to form headers
|
|
all_metrics = set()
|
|
for result in data.get("result", []):
|
|
for metric in result.get("data", []):
|
|
all_metrics.add(metric["name"])
|
|
|
|
# Sort metrics for consistent output
|
|
metrics = sorted(list(all_metrics))
|
|
|
|
# Create headers
|
|
headers = ["suit"] + metrics
|
|
|
|
# Prepare rows
|
|
rows = []
|
|
for result in data.get("result", []):
|
|
row = {"suit": result["suit"]}
|
|
|
|
# Initialize all metrics to empty
|
|
for metric in metrics:
|
|
row[metric] = ""
|
|
|
|
# Fill in available metrics
|
|
for item in result.get("data", []):
|
|
row[item["name"]] = item["value"]
|
|
|
|
rows.append(row)
|
|
|
|
# Write to stdout as CSV
|
|
writer = csv.DictWriter(sys.stdout, fieldnames=headers)
|
|
writer.writeheader()
|
|
writer.writerows(rows)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print(f"Usage: python {os.path.basename(__file__)} <json_file>")
|
|
sys.exit(1)
|
|
|
|
json_file = sys.argv[1]
|
|
json_to_csv(json_file)
|