Compare commits

...

214 Commits

Author SHA1 Message Date
Christian Schwarz
9d0405b798 Merge remote-tracking branch 'origin/main' into problame/batching-sidecar-task 2024-11-30 00:03:30 +01:00
Christian Schwarz
60b9238383 switch back to serial mode by default 2024-11-29 20:55:22 +01:00
Christian Schwarz
ce83418aa4 spsc_fold: fix missing wakeup on receiver or sender drop while other side is waiting 2024-11-29 20:54:28 +01:00
Christian Schwarz
1cd333ce38 read_messages => batcher 2024-11-29 20:40:51 +01:00
Matthias van de Meent
973a8d2680 Fix timeout value used in XLogWaitForReplayOf (#9937)
The previous value assumed usec precision, while the timeout used is in
milliseconds, causing replica backends to wait for (potentially) many
hours for WAL replay without the expected progress reports in logs.

This fixes the issue.

Reported-By: Alexander Lakhin <exclusion@gmail.com>

## Problem


https://github.com/neondatabase/postgres/pull/279#issuecomment-2507671817

The timeout value was configured with the assumption the indicated value
would be microseconds, where it's actually milliseconds. That causes the
backend to wait for much longer (2h46m40s) before it emits the "I'm
waiting for recovery" message. While we do have wait events configured
on this, it's not great to have stuck backends without clear logs, so
this fixes the timeout value in all our PostgreSQL branches.

## PG PRs

* PG14: https://github.com/neondatabase/postgres/pull/542
* PG15: https://github.com/neondatabase/postgres/pull/543
* PG16: https://github.com/neondatabase/postgres/pull/544
* PG17: https://github.com/neondatabase/postgres/pull/545
2024-11-29 19:10:26 +00:00
Gleb Novikov
c848f25ec2 Fixed fast_import pgbin in calling get_pg_version (#9933)
Was working on https://github.com/neondatabase/cloud/pull/20795 and
discovered that fast_import is not working normally.
2024-11-29 17:58:36 +00:00
Christian Schwarz
1cf5b1463f benchmark results on hetzner box
--------------------------------------------------------------------------- Benchmark results ---------------------------------------------------------------------------
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].pipelining_config.mode: serial
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].counters.time: 0.9880
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].counters.pageserver_cpu_seconds_total: 0.8227
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 1.0653
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.9743
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 1.0309
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.8490
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 1.0774
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.9896
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 1.0229
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.8428
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].pipelining_config.mode: serial
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].counters.time: 0.7338
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].counters.pageserver_cpu_seconds_total: 0.7370
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.6314
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.7970
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.7640
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.7692
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.4609
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,402.7188
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 3,206.7344
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,402.7188
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.5242
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.9966
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.4988
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,402.9500
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 3,206.9667
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,402.9500
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.4975
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.9966
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.3504
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,402.0824
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 1,651.0941
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,402.0824
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3765
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 3.8775
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.3973
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,402.3600
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 1,651.3733
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,402.3600
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3847
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 3.8770
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.3140
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,401.8632
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 876.8737
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.8632
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3249
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 7.3008
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.3275
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,401.9451
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 876.9560
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.9451
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3145
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 7.3002
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.2905
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,401.7184
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 490.7282
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.7184
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.2897
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 13.0453
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.3148
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,401.8632
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 490.8737
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.8632
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3002
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 13.0418
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.2587
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,401.5259
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 297.5345
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.5259
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.2539
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 21.5152
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.2804
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,401.6698
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 297.6792
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.6698
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.2674
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 21.5053
test_latency[release-pg16-pipelining_config0-{'mode': 'serial'}].latency_mean: 0.128 ms
test_latency[release-pg16-pipelining_config0-{'mode': 'serial'}].latency_percentiles.p95: 0.153 ms
test_latency[release-pg16-pipelining_config0-{'mode': 'serial'}].latency_percentiles.p99: 0.169 ms
test_latency[release-pg16-pipelining_config0-{'mode': 'serial'}].latency_percentiles.p99.9: 0.257 ms
test_latency[release-pg16-pipelining_config0-{'mode': 'serial'}].latency_percentiles.p99.99: 0.588 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_mean: 0.128 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p95: 0.153 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99: 0.169 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.9: 0.283 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.99: 0.556 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].latency_mean: 0.148 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p95: 0.178 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p99: 0.193 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p99.9: 0.308 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p99.99: 0.632 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_mean: 0.126 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p95: 0.150 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99: 0.173 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.9: 0.268 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.99: 0.631 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].latency_mean: 0.130 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p95: 0.150 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p99: 0.165 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p99.9: 0.267 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p99.99: 0.530 ms
2024-11-29 17:42:50 +01:00
Christian Schwarz
2cab051921 fix escaping of lfc path (exposed by the benchmark) 2024-11-29 17:25:54 +01:00
Christian Schwarz
9b65b268ed stop Box'ing stuff & clean up the passing-through of errors (remove enum Batch) 2024-11-29 16:14:03 +01:00
Christian Schwarz
53e18b24fc less repetitive match arms; https://github.com/neondatabase/neon/pull/9851#discussion_r1860535860 2024-11-29 16:12:19 +01:00
John Spray
d5624cc505 pageserver: download small objects using a smaller timeout (#9938)
## Problem

It appears that the Azure storage API tends to hang TCP connections more
than S3 does.

Currently we use a 2 minute timeout for all downloads. This is large
because sometimes the objects we download are large. However, waiting 2
minutes when doing something like downloading a manifest on tenant
attach is problematic, because when someone is doing a "create tenant,
create timeline" workflow, that 2 minutes is long enough for them
reasonably to give up creating that timeline.

Rather than propagate oversized timeouts further up the stack, we should
use a different timeout for objects that we expect to be small.

Closes: https://github.com/neondatabase/neon/issues/9836

## Summary of changes

- Add a `small_timeout` configuration attribute to remote storage,
defaulting to 30 seconds (still a very generous period to do something
like download an index)
- Add a DownloadKind parameter to DownloadOpts, so that callers can
indicate whether they expect the object to be small or large.
- In the azure client, use small timeout for HEAD requests, and for GET
requests if DownloadKind::Small is used.
- Use DownloadKind::Small for manifests, indices, and heatmap downloads.

This PR intentionally does not make the equivalent change to the S3
client, to reduce blast radius in case this has unexpected consequences
(we could accomplish the same thing by editing lots of configs, but just
skipping the code is simpler for right now)
2024-11-29 15:11:44 +00:00
Christian Schwarz
0d28084985 merge brought back test_pageserver_getpage_merge.py 2024-11-29 15:23:47 +01:00
Alexey Kondratov
538e2312a6 feat(compute_ctl): Always set application_name (#9934)
## Problem

It was not always possible to judge what exactly some `cloud_admin`
connections were doing because we didn't consistently set
`application_name` everywhere.

## Summary of changes

Unify the way we connect to Postgres:
1. Switch to building configs everywhere
2. Always set `application_name` and make naming consistent

Follow-up for #9919
Part of neondatabase/cloud#20948
2024-11-29 13:55:56 +00:00
Erik Grinaker
a6073b5013 safekeeper: use jemalloc (#9780)
## Problem

To add Safekeeper heap profiling in #9778, we need to switch to an
allocator that supports it. Pageserver and proxy already use jemalloc.

Touches #9534.

## Summary of changes

Use jemalloc in Safekeeper.
2024-11-29 13:38:04 +00:00
John Spray
ea3798e3b3 storage controller: use proper ScheduleContext when evacuating a node (#9908)
## Problem

When picking locations for a shard, we should use a ScheduleContext that
includes all the other shards in the tenant, so that we apply proper
anti-affinity between shards. If we don't do this, then it can lead to
unstable scheduling, where we place a shard somewhere that the optimizer
will then immediately move it away from.

We didn't always do this, because it was a bit awkward to accumulate the
context for a tenant rather than just walking tenants.

This was a TODO in `handle_node_availability_transition`:
```
                        // TODO: populate a ScheduleContext including all shards in the same tenant_id (only matters
                        // for tenants without secondary locations: if they have a secondary location, then this
                        // schedule() call is just promoting an existing secondary)
```

This is a precursor to https://github.com/neondatabase/neon/issues/8264,
where the current imperfect scheduling during node evacuation hampers
testing.

## Summary of changes

- Add an iterator type that yields each shard along with a
schedulecontext that includes all the other shards from the same tenant
- Use the iterator to replace hand-crafted logic in optimize_all_plan
(functionally identical)
- Use the iterator in `handle_node_availability_transition` to apply
proper anti-affinity during node evacuation.
2024-11-29 13:27:49 +00:00
Christian Schwarz
199a4bd6c1 Merge remote-tracking branch 'origin/main' into problame/batching-sidecar-task 2024-11-29 14:26:37 +01:00
Christian Schwarz
bab3dd009d Merge remote-tracking branch 'origin/main' into problame/batching-sidecar-task 2024-11-29 14:19:06 +01:00
Christian Schwarz
90ef03c3b5 benchmarks on hetzner box
--------------------------------------------------------------------------- Benchmark results ---------------------------------------------------------------------------
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].pipelining_config.mode: serial
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].counters.time: 0.8920
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].counters.pageserver_cpu_seconds_total: 0.7482
test_throughput[release-pg16-50-pipelining_config0-30-1-128-not batchable {'mode': 'serial'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.9038
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.8488
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.9325
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.7834
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.9282
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.8647
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.9148
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.7734
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].pipelining_config.mode: serial
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].counters.time: 0.6814
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].counters.pageserver_cpu_seconds_total: 0.6663
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'mode': 'serial'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.6394
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.7907
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.7021
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.6926
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.4521
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,402.6818
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 3,206.6970
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,402.6818
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.4967
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.9967
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.4691
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,402.7619
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 3,206.7778
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,402.7619
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.4438
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 1.9966
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.3613
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,402.1585
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 1,651.1707
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,402.1585
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3716
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 3.8773
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.3925
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,402.3289
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 1,651.3421
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,402.3289
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3608
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 3.8770
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.3002
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,401.7879
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 876.7980
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.7879
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.2960
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 7.3013
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.3337
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,401.9888
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 877.0112
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.9888
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3044
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 7.2998
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.2844
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,401.6857
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 490.6952
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.6857
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.2677
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 13.0462
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.2959
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,401.7525
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 490.7624
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.7525
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.2654
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 13.0445
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.execution: concurrent-futures
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.2662
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,401.5804
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 297.5893
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.5804
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.2445
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 21.5115
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.execution: tasks
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].pipelining_config.mode: pipelined
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.time: 0.2798
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_getpage_count: 6,401.6542
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_vectored_get_count: 297.6636
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.6542
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.2504
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].perfmetric.batching_factor: 21.5063
test_latency[release-pg16-pipelining_config0-{'mode': 'serial'}].latency_mean: 0.123 ms
test_latency[release-pg16-pipelining_config0-{'mode': 'serial'}].latency_percentiles.p95: 0.159 ms
test_latency[release-pg16-pipelining_config0-{'mode': 'serial'}].latency_percentiles.p99: 0.170 ms
test_latency[release-pg16-pipelining_config0-{'mode': 'serial'}].latency_percentiles.p99.9: 0.306 ms
test_latency[release-pg16-pipelining_config0-{'mode': 'serial'}].latency_percentiles.p99.99: 0.579 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_mean: 0.122 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p95: 0.163 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99: 0.181 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.9: 0.290 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.99: 0.623 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].latency_mean: 0.143 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p95: 0.175 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p99: 0.188 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p99.9: 0.322 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p99.99: 0.636 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_mean: 0.122 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p95: 0.161 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99: 0.175 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.9: 0.281 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.99: 0.605 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].latency_mean: 0.117 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p95: 0.132 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p99: 0.154 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p99.9: 0.402 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'execution': 'tasks', 'mode': 'pipelined'}].latency_percentiles.p99.99: 0.569 ms
2024-11-29 13:50:28 +01:00
Christian Schwarz
dfcbb139fb the None configuration in the benchmark would use the default instead
of the serial configuration; fix that
2024-11-29 13:35:24 +01:00
Christian Schwarz
27c72e4ff3 benchmark on hetzner box
--------------------------------------------------------------------------- Benchmark results ---------------------------------------------------------------------------
test_throughput[release-pg16-50-None-30-1-128-not batchable None].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-None-30-1-128-not batchable None].pipelining_enabled: 0
test_throughput[release-pg16-50-None-30-1-128-not batchable None].effective_io_concurrency: 1
test_throughput[release-pg16-50-None-30-1-128-not batchable None].readhead_buffer_size: 128
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.time: 0.8864
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.pageserver_cpu_seconds_total: 0.8297
test_throughput[release-pg16-50-None-30-1-128-not batchable None].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.9974
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.9223
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.9171
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.7762
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.8903
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.8303
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.9611
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.8074
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-None-30-100-128-batchable None].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-None-30-100-128-batchable None].pipelining_enabled: 0
test_throughput[release-pg16-50-None-30-100-128-batchable None].effective_io_concurrency: 100
test_throughput[release-pg16-50-None-30-100-128-batchable None].readhead_buffer_size: 128
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.time: 0.2695
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.pageserver_getpage_count: 6,401.5946
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.pageserver_vectored_get_count: 297.5946
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.compute_getpage_count: 6,401.5946
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.pageserver_cpu_seconds_total: 0.2469
test_throughput[release-pg16-50-None-30-100-128-batchable None].perfmetric.batching_factor: 21.5111
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.6611
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.8180
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.7554
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.7308
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.4535
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,402.6364
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 3,206.6515
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,402.6364
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.4974
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.9967
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.4630
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,402.7656
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 3,206.7812
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,402.7656
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.4397
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.9966
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.3465
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,402.0581
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 1,651.0698
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,402.0581
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.3615
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 3.8775
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.3628
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,402.1585
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 1,651.1707
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,402.1585
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.3394
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 3.8773
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.2961
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.7525
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 876.7525
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.7525
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.2923
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 7.3017
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.3317
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,401.9667
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 876.9778
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,401.9667
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.3008
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 7.3000
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.2885
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.6893
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 490.7087
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.6893
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.2701
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 13.0458
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.3042
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,401.8061
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 490.8163
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,401.8061
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.2699
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 13.0432
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.2704
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.6091
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 297.6182
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.6091
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.2476
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 21.5095
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.2706
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,401.5946
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 297.5946
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,401.5946
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.2425
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 21.5111
test_latency[release-pg16-None-None].latency_mean: 0.136 ms
test_latency[release-pg16-None-None].latency_percentiles.p95: 0.172 ms
test_latency[release-pg16-None-None].latency_percentiles.p99: 0.194 ms
test_latency[release-pg16-None-None].latency_percentiles.p99.9: 0.319 ms
test_latency[release-pg16-None-None].latency_percentiles.p99.99: 0.637 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_mean: 0.121 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p95: 0.150 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99: 0.168 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.9: 0.317 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.99: 0.607 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_mean: 0.124 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p95: 0.161 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99: 0.170 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.9: 0.294 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.99: 0.592 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_mean: 0.122 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p95: 0.157 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99: 0.170 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.9: 0.267 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.99: 0.606 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_mean: 0.125 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p95: 0.161 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99: 0.170 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.9: 0.287 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.99: 0.610 ms
2024-11-29 12:08:05 +01:00
Conrad Ludgate
1d642d6a57 chore(proxy): vendor a subset of rust-postgres (#9930)
Our rust-postgres fork is getting messy. Mostly because proxy wants more
control over the raw protocol than tokio-postgres provides. As such,
it's diverging more and more. Storage and compute also make use of
rust-postgres, but in more normal usage, thus they don't need our crazy
changes.

Idea: 
* proxy maintains their subset
* other teams use a minimal patch set against upstream rust-postgres

Reviewing this code will be difficult. To implement it, I
1. Copied tokio-postgres, postgres-protocol and postgres-types from
00940fcdb5
2. Updated their package names with the `2` suffix to make them compile
in the workspace.
3. Updated proxy to use those packages
4. Copied in the code from tokio-postgres-rustls 0.13 (with some patches
applied https://github.com/jbg/tokio-postgres-rustls/pull/32
https://github.com/jbg/tokio-postgres-rustls/pull/33)
5. Removed as much dead code as I could find in the vendored libraries
6. Updated the tokio-postgres-rustls code to use our existing channel
binding implementation
2024-11-29 11:08:01 +00:00
Christian Schwarz
9a5611a5ef merge reader&batcher stages, update docs 2024-11-29 11:39:16 +01:00
Erik Grinaker
3ffe6de0b9 test_runner/performance: add logical message ingest benchmark (#9749)
Adds a benchmark for logical message WAL ingestion throughput
end-to-end. Logical messages are essentially noops, and thus ignored by
the Pageserver.

Example results from my MacBook, with fsync enabled:

```
postgres_ingest: 14.445 s
safekeeper_ingest: 29.948 s
pageserver_ingest: 30.013 s
pageserver_recover_ingest: 8.633 s
wal_written: 10,340 MB
message_count: 1310720 messages
postgres_throughput: 715 MB/s
safekeeper_throughput: 345 MB/s
pageserver_throughput: 344 MB/s
pageserver_recover_throughput: 1197 MB/s
```

See
https://github.com/neondatabase/neon/issues/9642#issuecomment-2475995205
for running analysis.

Touches #9642.
2024-11-29 09:40:08 +00:00
Christian Schwarz
f44bfcc7f4 benchmark on hetzner runner
-------------------------------------------------------------------------------------------------------------------- Benchmark results ---------------------------------------------------------------------------------------------------------------------
test_throughput[release-pg16-50-None-30-1-128-not batchable None].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-None-30-1-128-not batchable None].pipelining_enabled: 0
test_throughput[release-pg16-50-None-30-1-128-not batchable None].effective_io_concurrency: 1
test_throughput[release-pg16-50-None-30-1-128-not batchable None].readhead_buffer_size: 128
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.time: 0.8905
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.pageserver_cpu_seconds_total: 0.8585
test_throughput[release-pg16-50-None-30-1-128-not batchable None].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.8965
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.8694
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.9287
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.7891
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.8859
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.8582
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.9158
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.7703
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-None-30-100-128-batchable None].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-None-30-100-128-batchable None].pipelining_enabled: 0
test_throughput[release-pg16-50-None-30-100-128-batchable None].effective_io_concurrency: 100
test_throughput[release-pg16-50-None-30-100-128-batchable None].readhead_buffer_size: 128
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.time: 0.2526
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.pageserver_getpage_count: 6,401.5000
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.pageserver_vectored_get_count: 307.8475
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.compute_getpage_count: 6,401.5000
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.pageserver_cpu_seconds_total: 0.2999
test_throughput[release-pg16-50-None-30-100-128-batchable None].perfmetric.batching_factor: 20.7944
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.6182
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.7483
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.6925
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.6863
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.4250
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,402.5286
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 3,207.5714
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,402.5286
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.5241
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.9961
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.4981
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,402.7500
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 3,300.7500
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,402.7500
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.4903
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.9398
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.3438
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,402.0345
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 1,660.0230
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,402.0345
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.4123
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 3.8566
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.3766
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,402.2405
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 1,752.2405
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,402.2405
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.3699
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 3.6537
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.3048
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.8061
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 886.7755
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.8061
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.3583
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 7.2192
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.3517
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,402.0824
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 978.0941
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,402.0824
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.3421
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 6.5455
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.2682
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.5946
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 500.5495
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.5946
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.3187
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 12.7891
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.3163
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,401.8830
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 591.5851
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,401.8830
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.3117
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 10.8216
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.2504
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.4874
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 307.8067
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.4874
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.2972
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 20.7971
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.2899
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,401.7184
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 398.4466
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,401.7184
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.2901
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 16.0667
test_latency[release-pg16-None-None].latency_mean: 0.138 ms
test_latency[release-pg16-None-None].latency_percentiles.p95: 0.173 ms
test_latency[release-pg16-None-None].latency_percentiles.p99: 0.193 ms
test_latency[release-pg16-None-None].latency_percentiles.p99.9: 0.302 ms
test_latency[release-pg16-None-None].latency_percentiles.p99.99: 0.667 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_mean: 0.116 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p95: 0.137 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99: 0.165 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.9: 0.406 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.99: 0.560 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_mean: 0.140 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p95: 0.172 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99: 0.189 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.9: 0.315 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.99: 0.705 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_mean: 0.133 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p95: 0.170 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99: 0.192 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.9: 0.337 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.99: 0.653 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_mean: 0.128 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p95: 0.166 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99: 0.176 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.9: 0.284 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.99: 0.616 ms
2024-11-28 22:40:53 +01:00
Alexey Kondratov
42fb3c4d30 fix(compute_ctl): Allow usage of DB names with whitespaces (#9919)
## Problem

We used `set_path()` to replace the database name in the connection
string. It automatically does url-safe encoding if the path is not
already encoded, but it does it as per the URL standard, which assumes
that tabs can be safely removed from the path without changing the
meaning of the URL. See, e.g.,
https://url.spec.whatwg.org/#concept-basic-url-parser. It also breaks
for DBs with properly %-encoded names, like with `%20`, as they are kept
intact, but actually should be escaped.

Yet, this is not true for Postgres, where it's completely valid to have
trailing tabs in the database name.

I think this is the PR that caused this regression
https://github.com/neondatabase/neon/pull/9717, as it switched from
`postgres::config::Config` back to `set_path()`.

This was fixed a while ago already [1], btw, I just haven't added a test
to catch this regression back then :(

## Summary of changes

This commit changes the code back to use
`postgres/tokio_postgres::Config` everywhere.

While on it, also do some changes around, as I had to touch this code:
1. Bump some logging from `debug` to `info` in the spec apply path. We
do not use `debug` in prod, and it was tricky to understand what was
going on with this bug in prod.
2. Refactor configuration concurrency calculation code so it was
reusable. Yet, still keep `1` in the case of reconfiguration. The
database can be actively used at this moment, so we cannot guarantee
that there will be enough spare connection slots, and the underlying
code won't handle connection errors properly.
3. Simplify the installed extensions code. It was spawning a blocking
task inside async function, which doesn't make much sense. Instead, just
have a main sync function and call it with `spawn_blocking` in the API
code -- the only place we need it to be async.
4. Add regression python test to cover this and related problems in the
future. Also, add more extensive testing of schema dump and DBs and
roles listing API.

[1]:
4d1e48f3b9
[2]:
https://www.postgresql.org/message-id/flat/20151023003445.931.91267%40wrigleys.postgresql.org

Resolves neondatabase/cloud#20869
2024-11-28 21:38:30 +00:00
Christian Schwarz
a2a3613185 reintroduce task-based execution 2024-11-28 20:50:06 +01:00
Christian Schwarz
6bd39f95f5 rn benchmark on hetzner runner
-------------------------------------------------------------------------------------------------------------------- Benchmark results ---------------------------------------------------------------------------------------------------------------------
test_throughput[release-pg16-50-None-30-1-128-not batchable None].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-None-30-1-128-not batchable None].pipelining_enabled: 0
test_throughput[release-pg16-50-None-30-1-128-not batchable None].effective_io_concurrency: 1
test_throughput[release-pg16-50-None-30-1-128-not batchable None].readhead_buffer_size: 128
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.time: 0.8905
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.pageserver_cpu_seconds_total: 0.8633
test_throughput[release-pg16-50-None-30-1-128-not batchable None].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1}].counters.time: 0.9195
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1}].counters.pageserver_cpu_seconds_total: 0.8925
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 32}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 32}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 32}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 32}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 32}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 32}].counters.time: 0.8724
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 32}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 32}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 32}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 32}].counters.pageserver_cpu_seconds_total: 0.8406
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 32}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-None-30-100-128-batchable None].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-None-30-100-128-batchable None].pipelining_enabled: 0
test_throughput[release-pg16-50-None-30-100-128-batchable None].effective_io_concurrency: 100
test_throughput[release-pg16-50-None-30-100-128-batchable None].readhead_buffer_size: 128
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.time: 0.2576
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.pageserver_getpage_count: 6,401.5259
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.pageserver_vectored_get_count: 307.8534
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.compute_getpage_count: 6,401.5259
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.pageserver_cpu_seconds_total: 0.3043
test_throughput[release-pg16-50-None-30-100-128-batchable None].perfmetric.batching_factor: 20.7941
test_throughput[release-pg16-50-pipelining_config4-30-100-128-batchable {'max_batch_size': 1}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config4-30-100-128-batchable {'max_batch_size': 1}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config4-30-100-128-batchable {'max_batch_size': 1}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config4-30-100-128-batchable {'max_batch_size': 1}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config4-30-100-128-batchable {'max_batch_size': 1}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config4-30-100-128-batchable {'max_batch_size': 1}].counters.time: 0.6187
test_throughput[release-pg16-50-pipelining_config4-30-100-128-batchable {'max_batch_size': 1}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-100-128-batchable {'max_batch_size': 1}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-100-128-batchable {'max_batch_size': 1}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-100-128-batchable {'max_batch_size': 1}].counters.pageserver_cpu_seconds_total: 0.7473
test_throughput[release-pg16-50-pipelining_config4-30-100-128-batchable {'max_batch_size': 1}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'max_batch_size': 2}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'max_batch_size': 2}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'max_batch_size': 2}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'max_batch_size': 2}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'max_batch_size': 2}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'max_batch_size': 2}].counters.time: 0.4419
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'max_batch_size': 2}].counters.pageserver_getpage_count: 6,402.6418
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'max_batch_size': 2}].counters.pageserver_vectored_get_count: 3,207.7015
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'max_batch_size': 2}].counters.compute_getpage_count: 6,402.6418
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'max_batch_size': 2}].counters.pageserver_cpu_seconds_total: 0.5391
test_throughput[release-pg16-50-pipelining_config5-30-100-128-batchable {'max_batch_size': 2}].perfmetric.batching_factor: 1.9960
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 4}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 4}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 4}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 4}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 4}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 4}].counters.time: 0.3569
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 4}].counters.pageserver_getpage_count: 6,402.1071
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 4}].counters.pageserver_vectored_get_count: 1,660.0952
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 4}].counters.compute_getpage_count: 6,402.1071
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 4}].counters.pageserver_cpu_seconds_total: 0.4244
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 4}].perfmetric.batching_factor: 3.8565
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 8}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 8}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 8}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 8}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 8}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 8}].counters.time: 0.2977
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 8}].counters.pageserver_getpage_count: 6,401.7700
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 8}].counters.pageserver_vectored_get_count: 886.6900
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 8}].counters.compute_getpage_count: 6,401.7700
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 8}].counters.pageserver_cpu_seconds_total: 0.3511
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 8}].perfmetric.batching_factor: 7.2199
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 16}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 16}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 16}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 16}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 16}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 16}].counters.time: 0.2697
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 16}].counters.pageserver_getpage_count: 6,401.5946
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 16}].counters.pageserver_vectored_get_count: 500.5766
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 16}].counters.compute_getpage_count: 6,401.5946
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 16}].counters.pageserver_cpu_seconds_total: 0.3195
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 16}].perfmetric.batching_factor: 12.7884
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 32}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 32}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 32}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 32}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 32}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 32}].counters.time: 0.2548
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 32}].counters.pageserver_getpage_count: 6,401.5128
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 32}].counters.pageserver_vectored_get_count: 307.7692
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 32}].counters.compute_getpage_count: 6,401.5128
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 32}].counters.pageserver_cpu_seconds_total: 0.3015
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 32}].perfmetric.batching_factor: 20.7997
test_latency[release-pg16-None-None].latency_mean: 0.127 ms
test_latency[release-pg16-None-None].latency_percentiles.p95: 0.166 ms
test_latency[release-pg16-None-None].latency_percentiles.p99: 0.187 ms
test_latency[release-pg16-None-None].latency_percentiles.p99.9: 0.292 ms
test_latency[release-pg16-None-None].latency_percentiles.p99.99: 0.624 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1}].latency_mean: 0.139 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1}].latency_percentiles.p95: 0.175 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1}].latency_percentiles.p99: 0.200 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1}].latency_percentiles.p99.9: 0.444 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1}].latency_percentiles.p99.99: 0.658 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 32}].latency_mean: 0.119 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 32}].latency_percentiles.p95: 0.155 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 32}].latency_percentiles.p99: 0.172 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 32}].latency_percentiles.p99.9: 0.267 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 32}].latency_percentiles.p99.99: 0.587 ms
2024-11-28 20:24:01 +01:00
Christian Schwarz
07358dea89 converge on approach that pushes read Result through pipeline 2024-11-28 20:06:15 +01:00
Alexander Bayandin
e04dd3be0b test_runner: rerun all failed tests (#9917)
## Problem

Currently, we rerun only known flaky tests. This approach was chosen to
reduce the number of tests that go unnoticed (by forcing people to take
a look at failed tests and rerun the job manually), but it has some
drawbacks:
- In PRs, people tend to push new changes without checking failed tests
(that's ok)
- In the main, tests are just restarted without checking
(understandable)
- Parametrised tests become flaky one by one, i.e. if `test[1]` is flaky
`, test[2]` is not marked as flaky automatically (which may or may not
be the case).

I suggest rerunning all failed tests to increase the stability of GitHub
jobs and using the Grafana Dashboard with flaky tests for deeper
analysis.

## Summary of changes
- Rerun all failed tests twice at max
2024-11-28 19:02:57 +00:00
Vlad Lazar
eb520a14ce pageserver: return correct LSN for interpreted proto keep alive responses (#9928)
## Problem

For the interpreted proto the pageserver is not returning the correct
LSN
in replies to keep alive requests. This is because the interpreted
protocol arm
was not updating `last_rec_lsn`.

## Summary of changes

* Return correct LSN in keep-alive responses
* Fix shard field in wal sender traces
2024-11-28 17:38:47 +00:00
Arpad Müller
eb5d832e6f Update rust to 1.83.0, also update cargo adjacent tools (#9926)
We keep the practice of keeping the compiler up to date, pointing to the
latest release. This is done by many other projects in the Rust
ecosystem as well.

[Release notes](https://releases.rs/docs/1.83.0/).

Also update `cargo-hakari`, `cargo-deny`, `cargo-hack` and
`cargo-nextest` to their latest versions.

Prior update was in #9445.
2024-11-28 15:49:30 +00:00
Erik Grinaker
70780e310c Makefile: build pg_visibility (#9922)
Build the `pg_visibility` extension for use with `neon_local`. This is
useful to inspect the visibility map for debugging.

Touches #9914.
2024-11-28 15:48:18 +00:00
Vlad Lazar
e82f7f0dfc remote_storage/abs: count 404 and 304 for get as ok for metrics (#9912)
## Problem

We currently see elevated levels of errors for GetBlob requests. This is
because 404 and 304 are counted as errors for metric reporting.

## Summary of Changes

Bring the implementation in line with the S3 client and treat 404 and
304 responses as ok for metric purposes.

Related: https://github.com/neondatabase/cloud/issues/20666
2024-11-28 10:11:08 +00:00
Ivan Efremov
8173dc600a proxy: spawn cancellation checks in the background (#9918)
## Problem
For cancellation, a connection is open during all the cancel checks.
## Summary of changes
Spawn cancellation checks in the background, and close connection
immediately.
Use task_tracker for cancellation checks.
2024-11-28 06:32:22 +00:00
Erik Grinaker
da1daa2426 pageserver: only apply ClearVmBits on relevant shards (#9895)
# Problem

VM (visibility map) pages are stored and managed as any regular relation
page, in the VM fork of the main relation. They are also sharded like
other pages. Regular WAL writes to the VM pages (typically performed by
vacuum) are routed to the correct shard as usual. However, VM pages are
also updated via `ClearVmBits` metadata records emitted when main
relation pages are updated. These metadata records were sent to all
shards, like other metadata records. This had the following effects:

* On shards responsible for VM pages, the `ClearVmBits` applies as
expected.

* On shard 0, which knows about the VM relation and its size but doesn't
necessarily have any VM pages, the `ClearVmBits` writes may have been
applied without also having applied the explicit WAL writes to VM pages.

* If VM pages are spread across multiple shards (unlikely with 256MB
stripe size), all shards may have applied `ClearVmBits` if the pages
fall within their local view of the relation size, even for pages they
do not own.

* On other shards, this caused a relation size cache miss and a DbDir
and RelDir lookup before dropping the `ClearVmBits`. With many
relations, this could cause significant CPU overhead.

This is not believed to be a correctness problem, but this will be
verified in #9914.

Resolves #9855.

# Changes

Route `ClearVmBits` metadata records only to the shards responsible for
the VM pages.

Verification of the current VM handling and cleanup of incomplete VM
pages on shard 0 (and potentially elsewhere) is left as follow-up work.
2024-11-27 19:44:24 +00:00
Alex Chi Z.
9e3cb75bc7 fix(pageserver): flush deletion queue in reload shutdown mode (#9884)
## Problem

close https://github.com/neondatabase/neon/issues/9859

## Summary of changes

Ensure that the deletion queue gets fully flushed (i.e., the deletion
lists get applied) during a graceful shutdown.

It is still possible that an incomplete shutdown would leave deletion
list behind and cause race upon the next startup, but we assume this
will unlikely happen, and even if it happened, the pageserver should
already be at a tainted state and the tenant should be moved to a new
tenant with a new generation number.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
2024-11-27 18:30:54 +00:00
Folke Behrens
5c41707bee proxy: promote two logs to error, fix multiline log (#9913)
* Promote two logs from mpsc send errors to error level. The channels
are unbounded and there shouldn't be errors.
* Fix one multiline log from anyhow::Error. Use Debug instead of
Display.
2024-11-27 18:05:46 +00:00
Erik Grinaker
cc37fa0f33 pageserver: add metrics for unknown ClearVmBits pages (#9911)
## Problem

When ingesting implicit `ClearVmBits` operations, we silently drop the
writes if the relation or page is unknown. There are implicit
assumptions around VM pages wrt. explicit/implicit updates, sharding,
and relation sizes, which can possibly drop writes incorrectly. Adding a
few metrics will allow us to investigate further and tighten up the
logic.

Touches #9855.

## Summary of changes

Add a `pageserver_wal_ingest_clear_vm_bits_unknown` metric to record
dropped `ClearVmBits` writes.

Also add comments clarifying the behavior of relation sizes on non-zero
shards.
2024-11-27 17:16:41 +00:00
Alex Chi Z.
23f5a27146 fix(storage-scrubber): valid layermap error degrades to warning (#9902)
Valid layer assumption is a necessary condition for a layer map to be
valid. It's a stronger check imposed by gc-compaction than the actual
valid layermap definition. Actually, the system can work as long as
there are no overlapping layer maps. Therefore, we degrade that into a
warning.

Signed-off-by: Alex Chi Z <chi@neon.tech>
2024-11-27 16:07:39 +00:00
Erik Grinaker
e4f437a354 pageserver: add relsize cache metrics (#9890)
## Problem

We don't have any observability for the relation size cache. We have
seen cache misses cause significant performance impact with high
relation counts.

Touches #9855.

## Summary of changes

Adds the following metrics:

* `pageserver_relsize_cache_entries`
* `pageserver_relsize_cache_hits`
* `pageserver_relsize_cache_misses`
* `pageserver_relsize_cache_misses_old`
2024-11-27 13:54:14 +00:00
Vlad Lazar
8fdf786217 pageserver: add tenant config override for wal receiver proto (#9888)
## Problem

Can't change protocol at tenant granularity.

## Summary of changes

Add tenant config level override for wal receiver protocol.

## Links

Related: https://github.com/neondatabase/neon/issues/9336
Epic: https://github.com/neondatabase/neon/issues/9329
2024-11-27 13:46:23 +00:00
Vlad Lazar
9e0148de11 safekeeper: use protobuf for sending compressed records to pageserver (#9821)
## Problem

https://github.com/neondatabase/neon/pull/9746 lifted decoding and
interpretation of WAL to the safekeeper.
This reduced the ingested amount on the pageservers by around 10x for a
tenant with 8 shards, but doubled
the ingested amount for single sharded tenants.

Also, https://github.com/neondatabase/neon/pull/9746 uses bincode which
doesn't support schema evolution.
Technically the schema can be evolved, but it's very cumbersome.

## Summary of changes

This patch set addresses both problems by adding protobuf support for
the interpreted wal records and adding compression support. Compressed
protobuf reduced the ingested amount by 100x on the 32 shards
`test_sharded_ingest` case (compared to non-interpreted proto). For the
1 shard case the reduction is 5x.

Sister change to `rust-postgres` is
[here](https://github.com/neondatabase/rust-postgres/pull/33).

## Links

Related: https://github.com/neondatabase/neon/issues/9336
Epic: https://github.com/neondatabase/neon/issues/9329
2024-11-27 12:12:21 +00:00
Christian Schwarz
82e1fa3f83 WIP 2024-11-27 12:31:56 +01:00
Alexander Bayandin
7b41ee872e CI(pre-merge-checks): build only one build-tools-image (#9718)
## Problem

The `pre-merge-checks` workflow relies on the build-tools image. 
If changes to the `build-tools` image have been merged into the main
branch since the last CI run for a PR (with other changes to the
`build-tools`), the image will be rebuilt during the merge queue run.
Otherwise, cached images are used.
Rebuilding the image adds approximately 10 minutes on x86-64 and 20
minutes on arm64 to the process.

## Summary of changes
- parametrise `build-build-tools-image` job with arch and Debian version
- Run `pre-merge-checks` only on Debian 12 x86-64 image
2024-11-27 10:42:26 +00:00
Peter Bendel
277c33ba3f ingest benchmark: after effective_io_concurrency = 100 we can increase compute side parallelism (#9904)
## Problem

ingest benchmark tests project migration to Neon involving steps
- COPY relation data
- create indexes
- create constraints

Previously we used only 4 copy jobs, 4 create index jobs and 7
maintenance workers. After increasing effective_io_concurrency on
compute we see that we can sustain more parallelism in the ingest bench

## Summary of changes

Increase copy jobs to 8, create index jobs to 8 and maintenance workers
to 16
2024-11-27 10:09:01 +00:00
Christian Schwarz
7fb3d95596 review & identified a cast that isn't handled, document that 2024-11-27 10:33:53 +01:00
Christian Schwarz
e0123c8a80 explain the pipeline cancellation story 2024-11-27 10:13:51 +01:00
Christian Schwarz
18ffaba975 fix pipeline cancellation 2024-11-26 20:44:36 +01:00
Tristan Partin
2b788cb53f Bump neon.logical_replication_max_snap_files default to 10000 (#9896)
This bump comes from a recommendation from Chi.

Signed-off-by: Tristan Partin <tristan@neon.tech>
2024-11-26 17:49:37 +00:00
Christian Schwarz
41ddc6772c benchmark
non-package-mode-py3.10christian@neon-hetzner-dev-christian:[~/src/neon]: DEFAULT_PG_VERSION=16 BUILD_TYPE=release poetry run pytest --alluredir ~/tmp/alluredir --clean-alluredir 'test_runner/performance/pageserver/test_page_service_batching.py' --maxfail=1

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Benchmark results ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_throughput[release-pg16-50-None-30-1-128-not batchable None].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-None-30-1-128-not batchable None].pipelining_enabled: 0
test_throughput[release-pg16-50-None-30-1-128-not batchable None].effective_io_concurrency: 1
test_throughput[release-pg16-50-None-30-1-128-not batchable None].readhead_buffer_size: 128
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.time: 0.9443
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-None-30-1-128-not batchable None].counters.pageserver_cpu_seconds_total: 0.9010
test_throughput[release-pg16-50-None-30-1-128-not batchable None].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.9273
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.8844
test_throughput[release-pg16-50-pipelining_config1-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.9105
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.7669
test_throughput[release-pg16-50-pipelining_config2-30-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.8828
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.8512
test_throughput[release-pg16-50-pipelining_config3-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.9431
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.7971
test_throughput[release-pg16-50-pipelining_config4-30-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-None-30-100-128-batchable None].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-None-30-100-128-batchable None].pipelining_enabled: 0
test_throughput[release-pg16-50-None-30-100-128-batchable None].effective_io_concurrency: 100
test_throughput[release-pg16-50-None-30-100-128-batchable None].readhead_buffer_size: 128
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.time: 0.2604
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.pageserver_getpage_count: 6,401.5391
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.pageserver_vectored_get_count: 307.7217
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.compute_getpage_count: 6,401.5391
test_throughput[release-pg16-50-None-30-100-128-batchable None].counters.pageserver_cpu_seconds_total: 0.3023
test_throughput[release-pg16-50-None-30-100-128-batchable None].perfmetric.batching_factor: 20.8030
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.6268
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.7596
test_throughput[release-pg16-50-pipelining_config6-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.6696
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.6684
test_throughput[release-pg16-50-pipelining_config7-30-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.4530
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,402.6515
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 3,207.7121
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,402.6515
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.5427
test_throughput[release-pg16-50-pipelining_config8-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.9960
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.5434
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 3,301.0000
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.5318
test_throughput[release-pg16-50-pipelining_config9-30-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.9397
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.3455
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,402.0581
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 1,660.0349
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,402.0581
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.4078
test_throughput[release-pg16-50-pipelining_config10-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 3.8566
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.3785
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,402.2785
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 1,752.2785
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,402.2785
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.3705
test_throughput[release-pg16-50-pipelining_config11-30-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 3.6537
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.3063
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.8247
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 886.7629
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.8247
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.3537
test_throughput[release-pg16-50-pipelining_config12-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 7.2193
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.3365
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,401.9888
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 978.0000
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,401.9888
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.3256
test_throughput[release-pg16-50-pipelining_config13-30-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 6.5460
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.2730
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.6239
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 500.2936
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.6239
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.3162
test_throughput[release-pg16-50-pipelining_config14-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 12.7957
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.3091
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,401.8438
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 591.5312
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,401.8438
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.3022
test_throughput[release-pg16-50-pipelining_config15-30-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 10.8225
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.2609
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.5391
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 307.6174
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.5391
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.3014
test_throughput[release-pg16-50-pipelining_config16-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 20.8101
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.2910
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,401.7184
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 398.4660
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,401.7184
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.2903
test_throughput[release-pg16-50-pipelining_config17-30-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 16.0659
test_latency[release-pg16-None-None].latency_mean: 0.120 ms
test_latency[release-pg16-None-None].latency_percentiles.p95: 0.151 ms
test_latency[release-pg16-None-None].latency_percentiles.p99: 0.172 ms
test_latency[release-pg16-None-None].latency_percentiles.p99.9: 0.276 ms
test_latency[release-pg16-None-None].latency_percentiles.p99.99: 0.609 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_mean: 0.128 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p95: 0.167 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99: 0.186 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.9: 0.294 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.99: 0.642 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_mean: 0.136 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p95: 0.170 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99: 0.185 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.9: 0.294 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.99: 0.623 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_mean: 0.117 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p95: 0.156 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99: 0.174 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.9: 0.279 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.99: 0.598 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_mean: 0.121 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p95: 0.141 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99: 0.156 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.9: 0.256 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.99: 0.518 ms
2024-11-26 13:56:46 +01:00
Christian Schwarz
a23abb2cc0 adopt spsc_fold 2024-11-26 13:30:40 +01:00
Christian Schwarz
9bf2618c45 implement spsc_fold 2024-11-26 13:27:28 +01:00
Peter Bendel
13feda0669 track how much time the flush loop is stalled waiting for uploads (#9885)
## Problem

We don't know how much time PS is losing during ingest when waiting for
remote storage uploads in the flush frozen layer loop.
Also we don't know how many remote storage requests get an permit
without waiting (not throttled by remote_storage concurrency_limit).

## Summary of changes

- Add a metric that accumulates the time waited per shard/PS
- in [remote storage semaphore wait
seconds](https://neonprod.grafana.net/d/febd9732-9bcf-4992-a821-49b1f6b02724/remote-storage?orgId=1&var-datasource=HUNg6jvVk&var-instance=pageserver-26.us-east-2.aws.neon.build&var-instance=pageserver-27.us-east-2.aws.neon.build&var-instance=pageserver-28.us-east-2.aws.neon.build&var-instance=pageserver-29.us-east-2.aws.neon.build&var-instance=pageserver-30.us-east-2.aws.neon.build&var-instance=pageserver-31.us-east-2.aws.neon.build&var-instance=pageserver-36.us-east-2.aws.neon.build&var-instance=pageserver-37.us-east-2.aws.neon.build&var-instance=pageserver-38.us-east-2.aws.neon.build&var-instance=pageserver-39.us-east-2.aws.neon.build&var-instance=pageserver-40.us-east-2.aws.neon.build&var-instance=pageserver-41.us-east-2.aws.neon.build&var-request_type=put_object&from=1731961336340&to=1731964762933&viewPanel=3)
add a first bucket with 100 microseconds to count requests that do not
need to wait on semaphore

Update: created a new version that uses a Gauge (one increasing value
per PS/shard) instead of histogram as suggested by review
2024-11-26 11:46:58 +00:00
Conrad Ludgate
96a1b71c84 chore(proxy): discard request context span during passthrough (#9882)
## Problem

The RequestContext::span shouldn't live for the entire postgres
connection, only the handshake.

## Summary of changes

* Slight refactor to the RequestContext to discard the span upon
handshake completion.
* Make sure the temporary future for the handshake is dropped (not bound
to a variable)
* Runs our nightly fmt script
2024-11-25 21:32:53 +00:00
Arpad Müller
a74ab9338d fast_import: remove hardcoding of pg_version (#9878)
Before, we hardcoded the pg_version to 140000, while the code expected
version numbers like 14. Now we use an enum, and code from
`extension_server.rs` to auto-detect the correct version. The enum helps
when we add support for a version: enums ensure that compilation fails
if one forgets to put the version to one of the `match` locations.

cc https://github.com/neondatabase/neon/pull/9218
2024-11-25 20:23:42 +00:00
Folke Behrens
7404887b81 proxy: Demote errors from cplane request routines to debug (#9886)
## Problem

Any errors from these async blocks are unconditionally logged at error
level
even though we already handle such errors based on context.

## Summary of changes

* Log raw errors from creating and executing cplane requests at debug
level.
* Inline macro calls to retain the correct callsite.
2024-11-25 19:35:32 +00:00
Folke Behrens
87e4dd23a1 proxy: Demote all cplane error replies to info log level (#9880)
## Problem

The vast majority of the error/warn logs from cplane are about time or
data transfer quotas exceeded or endpoint-not-found errors and not
operational errors in proxy or cplane.

## Summary of changes

* Demote cplane error replies to info level.
* Raise other errors from warn back to error.
2024-11-25 17:53:26 +00:00
Vlad Lazar
7a2f0ed8d4 safekeeper: lift decoding and interpretation of WAL to the safekeeper (#9746)
## Problem

For any given tenant shard, pageservers receive all of the tenant's WAL
from the safekeeper.
This soft-blocks us from using larger shard counts due to bandwidth
concerns and CPU overhead of filtering
out the records.

## Summary of changes

This PR lifts the decoding and interpretation of WAL from the pageserver
into the safekeeper.

A customised PG replication protocol is used where instead of sending
raw WAL, the safekeeper sends
filtered, interpreted records. The receiver drives the protocol
selection, so, on the pageserver side, usage
of the new protocol is gated by a new pageserver config:
`wal_receiver_protocol`.

 More granularly the changes are:
1. Optionally inject the protocol and shard identity into the arguments
used for starting replication
2. On the safekeeper side, implement a new wal sending primitive which
decodes and interprets records
 before sending them over
3. On the pageserver side, implement the ingestion of this new
replication message type. It's very similar
 to what we already have for raw wal (minus decoding and interpreting).
 
 ## Notes
 
* This PR currently uses my [branch of
rust-postgres](https://github.com/neondatabase/rust-postgres/tree/vlad/interpreted-wal-record-replication-support)
which includes the deserialization logic for the new replication message
type. PR for that is open
[here](https://github.com/neondatabase/rust-postgres/pull/32).
* This PR contains changes for both pageservers and safekeepers. It's
safe to merge because the new protocol is disabled by default on the
pageserver side. We can gradually start enabling it in subsequent
releases.
* CI tests are running on https://github.com/neondatabase/neon/pull/9747
 
 ## Links
 
 Related: https://github.com/neondatabase/neon/issues/9336
 Epic: https://github.com/neondatabase/neon/issues/9329
2024-11-25 17:29:28 +00:00
Christian Schwarz
5c2356988e page_service: add benchmark for batching (#9820)
This PR adds two benchmark to demonstrate the effect of server-side
getpage request batching added in
https://github.com/neondatabase/neon/pull/9321.

For the CPU usage, I found the the `prometheus` crate's built-in CPU
usage accounts the seconds at integer granularity. That's not enough you
reduce the target benchmark runtime for local iteration. So, add a new
`libmetrics` metric and report that.

The benchmarks are disabled because [on our benchmark nodes, timer
resolution isn't high
enough](https://neondb.slack.com/archives/C059ZC138NR/p1732264223207449).
They work (no statement about quality) on my bare-metal devbox.

They will be refined and enabled once we find a fix. Candidates at time
of writing are:
- https://github.com/neondatabase/neon/pull/9822
- https://github.com/neondatabase/neon/pull/9851


Refs:

- Epic: https://github.com/neondatabase/neon/issues/9376
- Extracted from https://github.com/neondatabase/neon/pull/9792
2024-11-25 15:52:39 +00:00
Konstantin Knizhnik
441612c1ce Prefetch on macos (#9875)
## Problem

Prefetch is disabled at MacODS because `posix_fadvise` is not available.
But Neon prefetch is not using this function and for testing at MacOS is
it very convenient that prefetch is available.

## Summary of changes

Define `USE_PREFETCH` in Makefile.

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2024-11-25 15:21:52 +00:00
Arpad Müller
77630e5408 Address beta clippy lint needless_lifetimes (#9877)
The 1.82.0 version of Rust will be stable soon, let's get the clippy
lint fixes in before the compiler version upgrade.
2024-11-25 14:59:12 +00:00
Alexander Bayandin
3d380acbd1 Bump default Debian version to Bookworm everywhere (#9863)
## Problem

We have a couple of CI workflows that still run on Debian Bullseye, and
the default Debian version in images is Bullseye as well (we explicitly
set building on Bookworm)

## Summary of changes
- Run `pgbench-pgvector` on Bookworm (fix a couple of packages)
- Run `trigger_bench_on_ec2_machine_in_eu_central_1` on Bookworm
- Change default `DEBIAN_VERSION` in Dockerfiles to Bookworm
- Make `pinned` docker tag an alias to `pinned-bookworm`
2024-11-25 14:43:32 +00:00
Alex Chi Z.
4630b70962 fix(pageserver): ensure all layers are flushed before measuring RSS (#9861)
## Problem

close https://github.com/neondatabase/neon/issues/9761

The test assumed that no new L0 layers are flushed throughout the
process, which is not true.

## Summary of changes

Fix the test case `test_compaction_l0_memory` by flushing in-memory
layers before compaction.

Signed-off-by: Alex Chi Z <chi@neon.tech>
2024-11-25 14:25:18 +00:00
Conrad Ludgate
6f6749c4a9 chore: update rustls (#9871) 2024-11-25 12:01:30 +00:00
Folke Behrens
0d1e82f0a7 Bump futures-* crates, drop unused license, hide duplicate crate warnings (#9858)
* The futures-util crate we use was yanked. Bump it and its siblings to
new patch release.
https://github.com/rust-lang/futures-rs/releases/tag/0.3.31
* cargo-deny: Drop an unused license.
* cargo-deny: Don't warn about duplicate crate. Duplicate crates are
unavoidable and the noise just hides real warnings.
2024-11-25 10:59:49 +00:00
Christian Schwarz
99b664c9ed expand fix to tasks mode; add some comments 2024-11-25 11:51:58 +01:00
Christian Schwarz
b9477aa945 fix: batcher wouldn't shut down after executor exits 2024-11-25 11:28:30 +01:00
Christian Schwarz
0bb037240d logging to debug test_pageserver_restarts_under_worload 2024-11-25 10:36:48 +01:00
Alexander Bayandin
6f7aeaa1c5 test_runner: use LFC by default (#8613)
## Problem
LFC is not enabled by default in tests, but it is enabled in production.
This increases the risk of errors in the production environment, which
were not found during the routine workflow.
However, enabling LFC for all the tests may overload the disk on our
servers and increase the number of failures.
So, we try enabling  LFC in one case to evaluate the possible risk.

## Summary of changes
A new environment variable, USE_LFC is introduced. If it is set to true,
LFC is enabled by default in all the tests.
In our workflow, we enable LFC for PG17, release, x86-64, and disabled
for all other combinations.

---------

Co-authored-by: Alexey Masterov <alexeymasterov@neon.tech>
Co-authored-by: a-masterov <72613290+a-masterov@users.noreply.github.com>
2024-11-25 09:01:05 +00:00
Christian Schwarz
6ec5ac1ac8 DO NOT MERGE: enable pipelining (32,concurrent-futures) by default so we get test suite coverage 2024-11-25 09:16:09 +01:00
Christian Schwarz
450be26bbb fast imports: initial Importer and Storage changes (#9218)
Co-authored-by: Heikki Linnakangas <heikki@neon.tech>
Co-authored-by: Stas Kelvic <stas@neon.tech>

# Context

This PR contains PoC-level changes for a product feature that allows
onboarding large databases into Neon without going through the regular
data path.

# Changes

This internal RFC provides all the context
* https://github.com/neondatabase/cloud/pull/19799

In the language of the RFC, this PR covers

* the Importer code (`fast_import`) 
* all the Pageserver changes (mgmt API changes, flow implementation,
etc)
* a basic test for the Pageserver changes

# Reviewing

As acknowledged in the RFC, the code added in this PR is not ready for
general availability.
Also, the **architecture is not to be discussed in this PR**, but in the
RFC and associated Slack channel instead.

Reviewers of this PR should take that into consideration.
The quality bar to apply during review depends on what area of the code
is being reviewed:

* Importer code (`fast_import`): practically anything goes
* Core flow (`flow.rs`):
* Malicious input data must be expected and the existing threat models
apply.
* The code must not be safe to execute on *dedicated* Pageserver
instances:
* This means in particular that tenants *on other* Pageserver instances
must not be affected negatively wrt data confidentiality, integrity or
availability.
* Other code: the usual quality bar
* Pay special attention to correct use of gate guards, timeline
cancellation in all places during shutdown & migration, etc.
* Consider the broader system impact; if you find potentially
problematic interactions with Storage features that were not covered in
the RFC, bring that up during the review.

I recommend submitting three separate reviews, for the three high-level
areas with different quality bars.


# References

(Internal-only)

* refs https://github.com/neondatabase/cloud/issues/17507
* refs https://github.com/neondatabase/company_projects/issues/293
* refs https://github.com/neondatabase/company_projects/issues/309
* refs https://github.com/neondatabase/cloud/issues/20646

---------

Co-authored-by: Stas Kelvich <stas.kelvich@gmail.com>
Co-authored-by: Heikki Linnakangas <heikki@neon.tech>
Co-authored-by: John Spray <john@neon.tech>
2024-11-22 22:47:06 +00:00
Anastasia Lubennikova
3245f7b88d Rename 'installed_extensions' metric to 'compute_installed_extensions' (#9759)
to keep it consistent with existing compute metrics.

flux-fleet change is not needed, because it doesn't have any filter by
metric name for compute metrics.
2024-11-22 19:27:04 +00:00
Alex Chi Z.
c1937d073f fix(pageserver): ensure upload happens after delete (#9844)
## Problem

Follow up of https://github.com/neondatabase/neon/pull/9682, that patch
didn't fully address the problem: what if shutdown fails due to whatever
reason and then we reattach the tenant? Then we will still remove the
future layer. The underlying problem is that the fix for #5878 gets
voided because of the generation optimizations.

Of course, we also need to ensure that delete happens after uploads, but
note that we only schedule deletes when there are no ongoing upload
tasks, so that's fine.

## Summary of changes

* Add a test case to reproduce the behavior (by changing the original
test case to attach the same generation).
* If layer upload happens after the deletion, drain the deletion queue
before uploading.
* If blocked_deletion is enabled, directly remove it from the
blocked_deletion queue.
* Local fs backend fix to avoid race between deletion and preload.
* test_emergency_mode does not need to wait for uploads (and it's
generally not possible to wait for uploads).
* ~~Optimize deletion executor to skip validation if there are no files
to delete.~~ this doesn't work

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
2024-11-22 18:30:53 +00:00
Alex Chi Z.
6f8b1eb5a6 test(pageserver): add detach ancestor smoke test (#9842)
## Problem

Follow up to https://github.com/neondatabase/neon/pull/9682, hopefully
we can detect some issues or assure ourselves that this is ready for
production.

## Summary of changes

* Add a compaction-detach-ancestor smoke test.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
2024-11-22 18:21:51 +00:00
Erik Grinaker
e939d36dd4 safekeeper,pageserver: fix CPU profiling allowlists (#9856)
## Problem

The HTTP router allowlists matched both on the path and the query
string. This meant that only `/profile/cpu` would be allowed without
auth, while `/profile/cpu?format=svg` would require auth.

Follows #9764.

## Summary of changes

* Match allowlists on URI path, rather than the entire URI.
* Fix the allowlist for Safekeeper to use `/profile/cpu` rather than the
old `/pprof/profile`.
* Just use a constant slice for the allowlist; it's only a handful of
items, and these handlers are not on hot paths.
2024-11-22 17:50:33 +00:00
Alex Chi Z.
211e4174d2 fix(pageserver): preempt and retry azure list operation (#9840)
## Problem

close https://github.com/neondatabase/neon/issues/9836

Looking at Azure SDK, the only related issue I can find is
https://github.com/azure/azure-sdk-for-rust/issues/1549. Azure uses
reqwest as the backend, so I assume there's some underlying magic
unknown to us that might have caused the stuck in #9836.

The observation is:
* We didn't get an explicit out of resource HTTP error from Azure.
* The connection simply gets stuck and times out.
* But when we retry after we reach the timeout, it succeeds.

This issue is hard to identify -- maybe something went wrong at the ABS
side, or something wrong with our side. But we know that a retry will
usually succeed if we give up the stuck connection.

Therefore, I propose the fix that we preempt stuck HTTP operation and
actively retry. This would mitigate the problem, while in the long run,
we need to keep an eye on ABS usage and see if we can fully resolve this
problem.

The reasoning of such timeout mechanism: we use a much smaller timeout
than before to preempt, while it is possible that a normal listing
operation would take a longer time than the initial timeout if it
contains a lot of keys. Therefore, after we terminate the connection, we
should double the timeout, so that such requests would eventually
succeed.

## Summary of changes

* Use exponential growth for ABS list timeout.
* Rather than using a fixed timeout, use a timeout that starts small and
grows
* Rather than exposing timeouts to the list_streaming caller as soon as
we see them, only do so after we have retried a few times

Signed-off-by: Alex Chi Z <chi@neon.tech>
2024-11-22 17:50:00 +00:00
Ivan Efremov
3b1ac8b14a proxy: Implement cancellation rate limiting (#9739)
Implement cancellation rate limiting and ip allowlist checks. Add
ip_allowlist to the cancel closure

Fixes [#16456](https://github.com/neondatabase/cloud/issues/16456)
2024-11-22 16:46:38 +00:00
Alexander Bayandin
b3b579b45e test_bulk_insert: fix typing for PgVersion (#9854)
## Problem

Along with the migration to Python 3.11, I switched `C(str, Enum)` with
`C(StrEnum)`; one such example is the `PgVersion` enum.
It required more changes in `PgVersion` itself (before, it accepted both
`str` and `int`, and after it, it supports only `str`), which caused the
`test_bulk_insert` test to fail.

## Summary of changes
- `test_bulk_insert`: explicitly cast pg_version from `timeline_detail`
to str
2024-11-22 16:13:53 +00:00
Conrad Ludgate
8ab96cc71f chore(proxy/jwks): reduce the rightward drift of jwks renewal (#9853)
I found the rightward drift of the `renew_jwks` function hard to review.

This PR splits out some major logic and uses early returns to make the
happy path more linear.
2024-11-22 14:51:32 +00:00
Alexander Bayandin
51d26a261b build(deps): bump mypy from 1.3.0 to 1.13.0 (#9670)
## Problem
We use a pretty old version of `mypy` 1.3 (released 1.5 years ago), it
produces false positives for `typing.Self`.

## Summary of changes
- Bump `mypy` from 1.3 to 1.13
- Fix new warnings and errors
- Use `typing.Self` whenever we `return self`
2024-11-22 14:31:36 +00:00
Christian Schwarz
bd31f42f52 run benchmarks 2024-11-22 15:06:11 +01:00
Tristan Partin
c10b7f7de9 Write a newline after adding dynamic_shared_memory_type to PG conf (#9843)
Without adding a newline, we can end up with a conf line that looks like
the following:

dynamic_shared_memory_type = mmap# Managed by compute_ctl: begin

This leads to Postgres logging:

LOG: configuration file
"/var/db/postgres/compute/pgdata/postgresql.conf" contains errors;
unaffected changes were applied

Signed-off-by: Tristan Partin <tristan@neon.tech>
2024-11-22 13:37:06 +00:00
Christian Schwarz
990e44dda4 longer target runtime 2024-11-22 14:37:01 +01:00
Christian Schwarz
cbe18393d3 Benchmark results (metal node, AMD Ryzen 9 7950X3D 16-Core Processor)
------------------------------------------ Benchmark results -------------------------------------------
test_throughput[release-pg16-50-None-5-1-128-not batchable None].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-None-5-1-128-not batchable None].pipelining_enabled: 0
test_throughput[release-pg16-50-None-5-1-128-not batchable None].effective_io_concurrency: 1
test_throughput[release-pg16-50-None-5-1-128-not batchable None].readhead_buffer_size: 128
test_throughput[release-pg16-50-None-5-1-128-not batchable None].counters.time: 0.8347
test_throughput[release-pg16-50-None-5-1-128-not batchable None].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-None-5-1-128-not batchable None].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-None-5-1-128-not batchable None].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-None-5-1-128-not batchable None].counters.pageserver_cpu_seconds_total: 0.7400
test_throughput[release-pg16-50-None-5-1-128-not batchable None].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config1-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config1-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config1-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config1-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config1-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config1-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config1-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.8589
test_throughput[release-pg16-50-pipelining_config1-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.8740
test_throughput[release-pg16-50-pipelining_config1-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config2-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config2-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config2-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config2-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config2-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config2-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config2-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.7986
test_throughput[release-pg16-50-pipelining_config2-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.7067
test_throughput[release-pg16-50-pipelining_config2-5-1-128-not batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config3-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config3-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config3-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config3-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config3-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config3-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config3-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.7606
test_throughput[release-pg16-50-pipelining_config3-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config3-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.7700
test_throughput[release-pg16-50-pipelining_config3-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config4-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config4-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config4-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 1
test_throughput[release-pg16-50-pipelining_config4-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config4-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config4-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config4-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.7943
test_throughput[release-pg16-50-pipelining_config4-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config4-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.7083
test_throughput[release-pg16-50-pipelining_config4-5-1-128-not batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-None-5-100-128-batchable None].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-None-5-100-128-batchable None].pipelining_enabled: 0
test_throughput[release-pg16-50-None-5-100-128-batchable None].effective_io_concurrency: 100
test_throughput[release-pg16-50-None-5-100-128-batchable None].readhead_buffer_size: 128
test_throughput[release-pg16-50-None-5-100-128-batchable None].counters.time: 0.5889
test_throughput[release-pg16-50-None-5-100-128-batchable None].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-None-5-100-128-batchable None].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-None-5-100-128-batchable None].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-None-5-100-128-batchable None].counters.pageserver_cpu_seconds_total: 0.5875
test_throughput[release-pg16-50-None-5-100-128-batchable None].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config6-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config6-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config6-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config6-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config6-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config6-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config6-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.5360
test_throughput[release-pg16-50-pipelining_config6-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config6-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.6700
test_throughput[release-pg16-50-pipelining_config6-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config7-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config7-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config7-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config7-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config7-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 1
test_throughput[release-pg16-50-pipelining_config7-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config7-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.6398
test_throughput[release-pg16-50-pipelining_config7-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config7-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.6386
test_throughput[release-pg16-50-pipelining_config7-5-100-128-batchable {'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config8-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config8-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config8-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config8-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config8-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config8-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config8-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.4210
test_throughput[release-pg16-50-pipelining_config8-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,402.4545
test_throughput[release-pg16-50-pipelining_config8-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 3,207.0909
test_throughput[release-pg16-50-pipelining_config8-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,402.4545
test_throughput[release-pg16-50-pipelining_config8-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.5109
test_throughput[release-pg16-50-pipelining_config8-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 1.9963
test_throughput[release-pg16-50-pipelining_config9-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config9-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config9-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config9-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config9-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 2
test_throughput[release-pg16-50-pipelining_config9-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config9-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.4619
test_throughput[release-pg16-50-pipelining_config9-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,402.7000
test_throughput[release-pg16-50-pipelining_config9-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 3,300.7000
test_throughput[release-pg16-50-pipelining_config9-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,402.7000
test_throughput[release-pg16-50-pipelining_config9-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.4650
test_throughput[release-pg16-50-pipelining_config9-5-100-128-batchable {'max_batch_size': 2, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 1.9398
test_throughput[release-pg16-50-pipelining_config10-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config10-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config10-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config10-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config10-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config10-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config10-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.3333
test_throughput[release-pg16-50-pipelining_config10-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.9286
test_throughput[release-pg16-50-pipelining_config10-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 1,659.2857
test_throughput[release-pg16-50-pipelining_config10-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.9286
test_throughput[release-pg16-50-pipelining_config10-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.3914
test_throughput[release-pg16-50-pipelining_config10-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 3.8582
test_throughput[release-pg16-50-pipelining_config11-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config11-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config11-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config11-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config11-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 4
test_throughput[release-pg16-50-pipelining_config11-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config11-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.3526
test_throughput[release-pg16-50-pipelining_config11-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,402.1429
test_throughput[release-pg16-50-pipelining_config11-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 1,752.1429
test_throughput[release-pg16-50-pipelining_config11-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,402.1429
test_throughput[release-pg16-50-pipelining_config11-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.3507
test_throughput[release-pg16-50-pipelining_config11-5-100-128-batchable {'max_batch_size': 4, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 3.6539
test_throughput[release-pg16-50-pipelining_config12-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config12-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config12-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config12-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config12-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config12-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config12-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.2921
test_throughput[release-pg16-50-pipelining_config12-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.7647
test_throughput[release-pg16-50-pipelining_config12-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 885.9412
test_throughput[release-pg16-50-pipelining_config12-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.6471
test_throughput[release-pg16-50-pipelining_config12-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.3353
test_throughput[release-pg16-50-pipelining_config12-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 7.2259
test_throughput[release-pg16-50-pipelining_config13-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config13-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config13-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config13-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config13-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 8
test_throughput[release-pg16-50-pipelining_config13-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config13-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.3317
test_throughput[release-pg16-50-pipelining_config13-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,402.0000
test_throughput[release-pg16-50-pipelining_config13-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 978.0000
test_throughput[release-pg16-50-pipelining_config13-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,402.0000
test_throughput[release-pg16-50-pipelining_config13-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.3300
test_throughput[release-pg16-50-pipelining_config13-5-100-128-batchable {'max_batch_size': 8, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 6.5460
test_throughput[release-pg16-50-pipelining_config14-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config14-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config14-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config14-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config14-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config14-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config14-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.2409
test_throughput[release-pg16-50-pipelining_config14-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.5000
test_throughput[release-pg16-50-pipelining_config14-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 499.8000
test_throughput[release-pg16-50-pipelining_config14-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.5000
test_throughput[release-pg16-50-pipelining_config14-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.2820
test_throughput[release-pg16-50-pipelining_config14-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 12.8081
test_throughput[release-pg16-50-pipelining_config15-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config15-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config15-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config15-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config15-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 16
test_throughput[release-pg16-50-pipelining_config15-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config15-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.2807
test_throughput[release-pg16-50-pipelining_config15-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,401.5882
test_throughput[release-pg16-50-pipelining_config15-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 590.6471
test_throughput[release-pg16-50-pipelining_config15-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,401.5882
test_throughput[release-pg16-50-pipelining_config15-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.2882
test_throughput[release-pg16-50-pipelining_config15-5-100-128-batchable {'max_batch_size': 16, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 10.8383
test_throughput[release-pg16-50-pipelining_config16-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config16-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config16-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config16-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config16-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config16-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].pipelining_config.protocol_pipelining_mode: concurrent-futures
test_throughput[release-pg16-50-pipelining_config16-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.time: 0.2510
test_throughput[release-pg16-50-pipelining_config16-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_getpage_count: 6,401.4211
test_throughput[release-pg16-50-pipelining_config16-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_vectored_get_count: 307.3684
test_throughput[release-pg16-50-pipelining_config16-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.compute_getpage_count: 6,401.4211
test_throughput[release-pg16-50-pipelining_config16-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].counters.pageserver_cpu_seconds_total: 0.2889
test_throughput[release-pg16-50-pipelining_config16-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].perfmetric.batching_factor: 20.8265
test_throughput[release-pg16-50-pipelining_config17-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].tablesize_mib: 50 MiB
test_throughput[release-pg16-50-pipelining_config17-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_enabled: 1
test_throughput[release-pg16-50-pipelining_config17-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].effective_io_concurrency: 100
test_throughput[release-pg16-50-pipelining_config17-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].readhead_buffer_size: 128
test_throughput[release-pg16-50-pipelining_config17-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.max_batch_size: 32
test_throughput[release-pg16-50-pipelining_config17-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].pipelining_config.protocol_pipelining_mode: tasks
test_throughput[release-pg16-50-pipelining_config17-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.time: 0.2517
test_throughput[release-pg16-50-pipelining_config17-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_getpage_count: 6,401.4211
test_throughput[release-pg16-50-pipelining_config17-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_vectored_get_count: 397.4737
test_throughput[release-pg16-50-pipelining_config17-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.compute_getpage_count: 6,401.4211
test_throughput[release-pg16-50-pipelining_config17-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].counters.pageserver_cpu_seconds_total: 0.2658
test_throughput[release-pg16-50-pipelining_config17-5-100-128-batchable {'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].perfmetric.batching_factor: 16.1053
test_latency[release-pg16-None-None].latency_mean: 0.117 ms
test_latency[release-pg16-None-None].latency_percentiles.p95: 0.153 ms
test_latency[release-pg16-None-None].latency_percentiles.p99: 0.162 ms
test_latency[release-pg16-None-None].latency_percentiles.p99.9: 0.248 ms
test_latency[release-pg16-None-None].latency_percentiles.p99.99: 0.429 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_mean: 0.127 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p95: 0.161 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99: 0.181 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.9: 0.295 ms
test_latency[release-pg16-pipelining_config1-{'max_batch_size': 1, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.99: 0.553 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_mean: 0.123 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p95: 0.162 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99: 0.174 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.9: 0.264 ms
test_latency[release-pg16-pipelining_config2-{'max_batch_size': 1, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.99: 0.499 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_mean: 0.120 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p95: 0.159 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99: 0.175 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.9: 0.258 ms
test_latency[release-pg16-pipelining_config3-{'max_batch_size': 32, 'protocol_pipelining_mode': 'concurrent-futures'}].latency_percentiles.p99.99: 0.522 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_mean: 0.116 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p95: 0.131 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99: 0.150 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.9: 0.404 ms
test_latency[release-pg16-pipelining_config4-{'max_batch_size': 32, 'protocol_pipelining_mode': 'tasks'}].latency_percentiles.p99.99: 0.499 ms
2024-11-22 14:33:55 +01:00
Heikki Linnakangas
7372312a73 Avoid unnecessary send_replace calls in seqwait (#9852)
The notifications need to be sent whenever the waiters heap changes, per
the comment in `update_status`. But if 'advance' is called when there
are no waiters, or the new LSN is lower than the waiters so that no one
needs to be woken up, there's no need to send notifications. This saves
some CPU cycles in the common case that there are no waiters.
2024-11-22 13:29:49 +00:00
Christian Schwarz
5796f3ba57 fix test 2024-11-22 14:27:59 +01:00
Christian Schwarz
11dc7135b1 rename test file to test_page_service_batching 2024-11-22 13:19:12 +01:00
Christian Schwarz
d6e5a46015 eliminate the word batch and stale doc comments 2024-11-22 12:46:52 +01:00
Christian Schwarz
a28c54dac1 cosmetics 2024-11-22 12:44:31 +01:00
Christian Schwarz
ef502f8311 remove async-timer heritage 2024-11-22 12:43:55 +01:00
Christian Schwarz
39e45f9e51 improve tests 2024-11-22 12:27:38 +01:00
Christian Schwarz
c1e8347160 make configurable whether pipelining should use concurrent futures or tasks 2024-11-22 11:27:23 +01:00
John Spray
d9de65ee8f pageserver: permit reads behind GC cutoff during LSN grace period (#9833)
## Problem

In https://github.com/neondatabase/neon/issues/9754 and the flakiness of
`test_readonly_node_gc`, we saw that although our logic for controlling
GC was sound, the validation of getpage requests was not, because it
could not consider LSN leases when requests arrived shortly after
restart.

Closes https://github.com/neondatabase/neon/issues/9754

## Summary of changes

This is the "Option 3" discussed verbally -- rather than holding back gc
cutoff, we waive the usual validation of request LSN if we are still
waiting for leases to be sent after startup

- When validating LSN in `wait_or_get_last_lsn`, skip the validation
relative to GC cutoff if the timeline is still in its LSN lease grace
period
- Re-enable test_readonly_node_gc
2024-11-22 09:24:23 +00:00
Fedor Dikarev
83b73fc24e Batch scrape workflows up to last 30 days and stop ad-hoc (#9846)
Comparing Batch and Ad-hoc collectors there is no big difference, just
we need scrape for longer duration to catch retries.
Dashboard with comparison:

https://neonprod.grafana.net/d/be3pjm7c9ne2oe/compare-ad-hoc-and-batch?orgId=1&from=1731345095814&to=1731946295814

I should anyway raise support case with Github relating to that,
meanwhile that should be working solution and should save us some cost,
so it worths to switch to Batch now.

Ref: https://github.com/neondatabase/cloud/issues/17503
2024-11-22 09:06:00 +00:00
Christian Schwarz
093674b2fb impmlement the serial mode 2024-11-22 09:53:08 +01:00
Christian Schwarz
0fa8ae3c0a WIP refactor to allow truly serial mode 2024-11-22 09:47:49 +01:00
Christian Schwarz
c1040bc25d task-based mode 2024-11-22 09:36:45 +01:00
Peter Bendel
1e05e3a6e2 minor PostgreSQL update in benchmarking (#9845)
## Problem

in benchmarking.yml job pgvector we install postgres from deb packages.
After the minor postgres update the referenced packages no longer exist

[Failing job:
](https://github.com/neondatabase/neon/actions/runs/11965785323/job/33360391115#step:4:41)

## Summary of changes

Reference and install the updated packages.

[Successful job after this
fix](https://github.com/neondatabase/neon/actions/runs/11967959920/job/33366011934#step:4:45)
2024-11-22 08:31:54 +00:00
Christian Schwarz
a3d1cf636b config changes to express pipelining config (not respected yet) 2024-11-22 08:36:17 +01:00
Christian Schwarz
89d9d16130 cherry-pick from problame/batching-benchmark while it's waiting for merge 2024-11-22 08:17:30 +01:00
Christian Schwarz
88fd8aed52 watch-based approach 2024-11-21 23:03:21 +01:00
Christian Schwarz
db9093f938 revert back to 'span fixes' commit 2024-11-21 22:07:05 +01:00
Christian Schwarz
240e48df59 improvements 2024-11-21 21:57:53 +01:00
Christian Schwarz
7680aa12a8 draft 2024-11-21 21:34:58 +01:00
Tristan Partin
37962e729e Fix panic in compute_ctl metrics collection (#9831)
Calling unwrap on the encoder is a little overzealous. One of the errors
that can be returned by the encode function in particular is the
non-existence of metrics for a metric family, so we should prematurely
filter instances like that out. I believe that the cause of this panic
was caused by a race condition between the prometheus collector and the
compute collecting the installed extensions metric for the first time.
The HTTP server is spawned on a separate thread before we even start
bringing up Postgres.

Signed-off-by: Tristan Partin <tristan@neon.tech>
2024-11-21 20:19:02 +00:00
Christian Schwarz
56de07154e fruitless debugging 2024-11-21 20:46:56 +01:00
Christian Schwarz
73046fdf5b span fixes 2024-11-21 20:21:55 +01:00
Erik Grinaker
190e8cebac safekeeper,pageserver: add CPU profiling (#9764)
## Problem

We don't have a convenient way to gather CPU profiles from a running
binary, e.g. during production incidents or end-to-end benchmarks, nor
during microbenchmarks (particularly on macOS).

We would also like to have continuous profiling in production, likely
using [Grafana Cloud
Profiles](https://grafana.com/products/cloud/profiles-for-continuous-profiling/).
We may choose to use either eBPF profiles or pprof profiles for this
(pending testing and discussion with SREs), but pprof profiles appear
useful regardless for the reasons listed above. See
https://github.com/neondatabase/cloud/issues/14888.

This PR is intended as a proof of concept, to try it out in staging and
drive further discussions about profiling more broadly.

Touches #9534.
Touches https://github.com/neondatabase/cloud/issues/14888.

## Summary of changes

Adds a HTTP route `/profile/cpu` that takes a CPU profile and returns
it. Defaults to a 5-second pprof Protobuf profile for use with e.g.
`pprof` or Grafana Alloy, but can also emit an SVG flamegraph. Query
parameters:

* `format`: output format (`pprof` or `svg`)
* `frequency`: sampling frequency in microseconds (default 100)
* `seconds`: number of seconds to profile (default 5)

Also integrates pprof profiles into Criterion benchmarks, such that
flamegraph reports can be taken with `cargo bench ... --profile-duration
<seconds>`. Output under `target/criterion/*/profile/flamegraph.svg`.

Example profiles:

* pprof profile (use [`pprof`](https://github.com/google/pprof)):
[profile.pb.gz](https://github.com/user-attachments/files/17756788/profile.pb.gz)
  * Web interface: `pprof -http :6060 profile.pb.gz`
* Interactive flamegraph:
[profile.svg.gz](https://github.com/user-attachments/files/17756782/profile.svg.gz)
2024-11-21 18:59:46 +00:00
Christian Schwarz
408bc8fc71 cleanups 2024-11-21 19:42:43 +01:00
Christian Schwarz
345f8b6c3b fix ready_for_next_batch order 2024-11-21 19:11:57 +01:00
Christian Schwarz
aa1032aeff no need for cancel & ctx in pagestream_do_batch 2024-11-21 18:40:22 +01:00
Christian Schwarz
a1bb2e7bb0 WIP: pipelined batching 2024-11-21 18:33:34 +01:00
Conrad Ludgate
725a5ff003 fix(proxy): CancelKeyData display log masking (#9838)
Fixes the masking for the CancelKeyData display format. Due to negative
i32 cast to u64, the top-bits all had `0xffffffff` prefix. On the
bitwise-or that followed, these took priority.

This PR also compresses 3 logs during sql-over-http into 1 log with
durations as label fields, as prior discussed.
2024-11-21 16:46:30 +00:00
Alexander Bayandin
8d1c44039e Python 3.11 (#9515)
## Problem

On Debian 12 (Bookworm), Python 3.11 is the latest available version.

## Summary of changes
- Update Python to 3.11 in build-tools
- Fix ruff check / format
- Fix mypy
- Use `StrEnum` instead of pair `str`, `Enum`
- Update docs
2024-11-21 16:25:31 +00:00
Konstantin Knizhnik
0713ff3176 Bump Postgres version (#9808)
## Problem

I have made a mistake in merging Postgre PRs

## Summary of changes

Restore consistency of submodule referenced.

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2024-11-21 14:56:56 +00:00
Christian Schwarz
09e7485004 Merge branch 'problame/merge-getpage-test' into problame/batching-timer 2024-11-21 11:28:12 +01:00
Christian Schwarz
058b35f884 Merge branch 'problame/batching-benchmark' into problame/merge-getpage-test 2024-11-21 11:27:16 +01:00
Christian Schwarz
ff0aa152f1 Merge remote-tracking branch 'origin/main' into problame/batching-benchmark 2024-11-21 11:25:23 +01:00
Christian Schwarz
3375f28990 pytest.approx; https://github.com/neondatabase/neon/pull/9820#discussion_r1850679974 2024-11-21 11:21:50 +01:00
Christian Schwarz
e82deb2ccc high-resolution CPU usage 2024-11-21 11:16:00 +01:00
Christian Schwarz
fa7ce2ca07 the final choice: async-timer 1.0beta15 with features=["tokio1"] 2024-11-21 11:15:02 +01:00
John Spray
42bda5d632 pageserver: revise metrics lifetime for SecondaryTenant (#9818)
## Problem

We saw a scale test failure when one shard went
secondary->attached->secondary in a short period of time -- the metrics
for the shard failed a validation assertion that is meant to ensure the
size metric matches the sum of layer sizes in the SecondaryDetail
struct.

This appears to be due to two SecondaryTenants being alive at the same
time -- the first one was shut down but still had its contributions to
the metrics.

Closes: https://github.com/neondatabase/neon/issues/9628

## Summary of changes

- Refactor code for validating metrics and call it in shutdown as well
as during downloads
- Move code for dropping per-tenant secondary metrics from drop() into
shutdown(), so that once shutdown() completes it is definitely safe to
instantiate another SecondaryTenant for the same tenant.
2024-11-21 08:31:24 +00:00
Arpad Müller
59c2c3f8ad compute_ctl: print OpenTelemetry errors via tracing, not stdout (#9830)
Before, `OpenTelemetry` errors were printed to stdout/stderr directly,
causing one of the few log lines without a timestamp, like:

```
OpenTelemetry trace error occurred. error sending request for url (http://localhost:4318/v1/traces)
```

Now, we print:

```
2024-11-21T02:24:20.511160Z  INFO OpenTelemetry error: error sending request for url (http://localhost:4318/v1/traces)
```

I found this while investigating #9731.
2024-11-21 04:46:01 +00:00
Ivan Efremov
2d6bf176a0 proxy: Refactor http conn pool (#9785)
- Use the same ConnPoolEntry for http connection pool.
- Rename EndpointConnPool to the HttpConnPool.
- Narrow clone bound for client

Fixes #9284
2024-11-20 19:36:29 +00:00
Vadim Kharitonov
313ebfdb88 [proxy] chore: allow bypassing empty params to /sql endpoint (#9827)
## Problem

```
curl -H "Neon-Connection-String: postgresql://neondb_owner:PASSWORD@ep-autumn-rain-a58lubg0.us-east-2.aws.neon.tech/neondb?sslmode=require" https://ep-autumn-rain-a58lubg0.us-east-2.aws.neon.tech/sql -d '{"query":"SELECT 1","params":[]}'
```

For such a query, I also need to send `params`. Do I really need it?

## Summary of changes
I've marked `params` as optional
2024-11-20 19:36:23 +00:00
Arpad Müller
811fab136f scrubber: allow restricting find_garbage to a partial tenant id prefix (#9814)
Adds support to the `find_garbage` command to restrict itself to a
partial tenant ID prefix, say `a`, and then it only traverses tenants
with IDs starting with `a`. One can now pass the `--tenant-id-prefix`
parameter.

That way, one can shard the `find_garbage` command and make it run in
parallel.

The PR also does a change of how `remote_storage` first removes trailing
`/`s, only to then add them in the listing function. It turns out that
this isn't neccessary and it prevents the prefix functionality from
working. S3 doesn't do this either.
2024-11-20 19:31:02 +00:00
Christian Schwarz
89b6cb8eba Revert "vanilla tokio based timer impl based on tokio::time::Sleep"
This reverts commit 517dda849f.
2024-11-20 20:17:49 +01:00
Christian Schwarz
c68661dfb3 Revert "undo local modifications to benchmark"
This reverts commit 7be13bc5a6.
2024-11-20 19:53:06 +01:00
Christian Schwarz
517dda849f vanilla tokio based timer impl based on tokio::time::Sleep 2024-11-20 19:52:47 +01:00
Christian Schwarz
f22ad868cf Revert "tokio_timerfd::Delay based impl"
This reverts commit fcda7a72c6.
2024-11-20 19:45:37 +01:00
Christian Schwarz
fcda7a72c6 tokio_timerfd::Delay based impl
Performs identically great to the async-timer::Timer features=tokio1 impl
Makes sense because it's the same thing that's happening under the hood.

https://www.notion.so/neondatabase/benchmarking-notes-143f189e004780c4a630cb5f426e39ba?pvs=4#144f189e004780ea9decc82281f6b8d1
2024-11-20 19:42:00 +01:00
Christian Schwarz
469ce810fc Revert "async-timer based approach (again, with data)"
This reverts commit 689788cbba.
2024-11-20 19:40:24 +01:00
Christian Schwarz
21866faa8a Revert "try async-timer 1.0.0-beta15 (still signal-based timers)"
This reverts commit c73e9e40e9.
2024-11-20 19:37:51 +01:00
Christian Schwarz
cbb5817997 Revert "async-timer 1.0.0-beta15 with features=tokio1"
This reverts commit 68550f0f50.
2024-11-20 19:37:44 +01:00
Vlad Lazar
ee26f09e45 pageserver: remove shard split hard link assertion (#9829)
## Problem

We were hitting this assertion in debug mode tests sometimes.

This case was being hit when the parent shard has no resident layers.
For instance, this is the case on split retry where the previous attempt
shut-down the parent and deleted local state for it. If the logical size
calculation does not download some layers before we get to the
hardlinking, then the assertion is hit.

## Summary of Changes

Remove the assertion. It's fine for the ancestor to not have any
resident layers at the time of the split.

Closes https://github.com/neondatabase/neon/issues/9412
2024-11-20 18:33:05 +00:00
Christian Schwarz
5f3e6f398c Revert "try interval-based impl to cross-chec"
This reverts commit 721643beed.
2024-11-20 18:52:55 +01:00
Christian Schwarz
721643beed try interval-based impl to cross-chec
=> zero batching

https://www.notion.so/neondatabase/benchmarking-notes-143f189e004780c4a630cb5f426e39ba?pvs=4#144f189e00478065a9b3e51726082885
2024-11-20 18:50:48 +01:00
Conrad Ludgate
f36f0068b8 chore(proxy): demote more logs during successful connection attempts (#9828)
Follow up to #9803 

See https://github.com/neondatabase/cloud/issues/14378

In collaboration with @cloneable and @awarus, we sifted through logs and
simply demoted some logs to debug. This is not at all finished and there
are more logs to review, but we ran out of time in the session we
organised. In any slightly more nuanced cases, we didn't touch the log,
instead leaving a TODO comment.

I've also slightly refactored the sql-over-http body read/length reject
code. I can split that into a separate PR. It just felt natural after I
switched to `read_body_with_limit` as we discussed during the meet.
2024-11-20 17:50:39 +00:00
Christian Schwarz
68550f0f50 async-timer 1.0.0-beta15 with features=tokio1
Best batching factor so far with no worse degradation of
un-batchable workloads than the other candidates.

https://www.notion.so/neondatabase/benchmarking-notes-143f189e004780c4a630cb5f426e39ba?pvs=4#144f189e004780c0921fe99e1da0e8c9
2024-11-20 18:41:31 +01:00
Christian Schwarz
c73e9e40e9 try async-timer 1.0.0-beta15 (still signal-based timers)
Results unchanged to 0.7.4

https://www.notion.so/neondatabase/benchmarking-notes-143f189e004780c4a630cb5f426e39ba?pvs=4#144f189e004780e18416cc0faf2aca65
2024-11-20 18:32:53 +01:00
John Spray
5ff2f1ee7d pageserver: enable compaction to proceed while live-migrating (#5397)
## Problem

Long ago, in #5299 the tenant states for migration are added, but
respected only in a coarse-grained way: when hinted not to do deletions,
tenants will just avoid doing all GC or compaction.

Skipping compaction is not necessary for AttachedMulti, as we will soon
become the primary attached location, and it is not a waste of resources
to proceed with compaction. Instead, per the RFC
https://github.com/neondatabase/neon/pull/5029/files), deletions should
be queued up in this state, and executed later when we switch to
AttachedSingle.

Avoiding compaction in AttachedMulti can have an operational impact if a
tenant is under significant write load, as a long-running migration can
result in a large accumulation of delta layers with commensurate impact
on read latency.

Closes: https://github.com/neondatabase/neon/issues/5396

## Summary of changes

- Add a 'config' part to RemoteTimelineClient so that it can be aware of
the mode of the tenant it belongs to, and wire this through for
construction + updates
- Add a special buffer for delayed deletions, and when in AttachedMulti
route deletions here instead of into the main remote client queue. This
is drained when transitioning to AttachedSingle. If the tenant is
detached or our process dies before then, then these objects are leaked.
- As a quality of life improvement, also use the remote timeline
client's knowledge of the tenant state to avoid submitting remote
consistent LSN updates for validation when in AttachedStale (as we know
these will fail)

## Checklist before requesting a review

- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.

## Checklist before merging

- [ ] Do not forget to reformat commit message to not include the above
checklist
2024-11-20 17:31:55 +00:00
John Spray
67f5f83edc pageserver: avoid reading SLRU blocks for GC on shards >0 (#9423)
## Problem

SLRU blocks, which can add up to several gigabytes, are currently
ingested by all shards, multiplying their capacity cost by the shard
count and slowing down ingest. We do this because all shards need the
SLRU pages to do timestamp->LSN lookup for GC.

Related: https://github.com/neondatabase/neon/issues/7512

## Summary of changes

- On non-zero shards, learn the GC offset from shard 0's index instead
of calculating it.
- Add a test `test_sharding_gc` that exercises this
- Do GC in test_pg_regress as a general smoke test that GC functions run
(e.g. this would fail if we were using SLRUs we didn't have)

In this PR we are still ingesting SLRUs everywhere, but not using them
any more. Part 2 PR (https://github.com/neondatabase/neon/pull/9786)
makes the change to not store them at all.

## Checklist before requesting a review

- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.

## Checklist before merging

- [ ] Do not forget to reformat commit message to not include the above
checklist
2024-11-20 15:56:14 +00:00
Christian Schwarz
7be13bc5a6 undo local modifications to benchmark 2024-11-20 16:00:19 +01:00
John Spray
593e35027a tests: use fewer pageservers in test_sharding_split_smoke (#9804)
## Problem

This test uses a gratuitous number of pageservers (16). This works fine
when there are plenty of system resources, but causes issues on test
runners that have limited resources and run many tests concurrently.

Related: https://github.com/neondatabase/neon/issues/9802

## Summary of changes

- Split from 2 shards to 4, instead of 4 to 8
- Don't give every shard a separate pageserver, let two locations share
each pageserver.

Net result is 4 pageservers instead of 16
2024-11-20 14:57:59 +00:00
Christian Schwarz
689788cbba async-timer based approach (again, with data)
Yep, it's clearly the best one with best batching factor at lowest CPU
usage.

https://www.notion.so/neondatabase/benchmarking-notes-143f189e004780c4a630cb5f426e39ba?pvs=4#144f189e004780d0a205e081458b46db
2024-11-20 15:36:10 +01:00
Christian Schwarz
f9bf038d2c Revert "tokio_timerfd::Interval"
This reverts commit 12124b28d0.
2024-11-20 15:25:52 +01:00
Christian Schwarz
12124b28d0 tokio_timerfd::Interval
Resolution not high enough to do _any_ batching at 10us or 20us

https://www.notion.so/neondatabase/benchmarking-notes-143f189e004780c4a630cb5f426e39ba?pvs=4#144f189e0047800fb74bd8f4ab6cf8e2
2024-11-20 15:25:14 +01:00
Christian Schwarz
1d85bec0ea Revert "tokio::time::Interval based approach"
This reverts commit 81d99704ee.
2024-11-20 15:13:26 +01:00
Christian Schwarz
81d99704ee tokio::time::Interval based approach
batching at 10us doesn't work well enough, prob the future is ready
too soon. batching factor is just 1.5

https://www.notion.so/neondatabase/benchmarking-notes-143f189e004780c4a630cb5f426e39ba?pvs=4#144f189e004780b79c8dd6d007dbb120
2024-11-20 15:13:11 +01:00
Christian Schwarz
f3ed5692ea Revert "async-timer based approach"
This reverts commit 1639b26002.
2024-11-20 14:49:09 +01:00
Christian Schwarz
1639b26002 async-timer based approach
With this, 10us batching timeout works, but it has some other wrinkles:

- it uses the signal-based timer APIs instead of going through epoll (=> timerfd)
= it needs to make a syscall for each batch, which costs around 1-2us, so, probably significant CPU time wasted on this.
2024-11-20 14:49:01 +01:00
Christian Schwarz
af95320a8c Revert "Revert "switch back to tokio::time::sleep, to get the numbers""
This reverts commit aa695b2ad7.
2024-11-20 14:25:05 +01:00
Christian Schwarz
b299eb19e2 fixup whitespace stuff 2024-11-20 14:23:55 +01:00
Christian Schwarz
88d52b31b7 Merge branch 'problame/batching-benchmark' into problame/merge-getpage-test 2024-11-20 14:23:22 +01:00
Christian Schwarz
aa695b2ad7 Revert "switch back to tokio::time::sleep, to get the numbers"
This reverts commit b9746168ff.
2024-11-20 14:22:31 +01:00
Christian Schwarz
b695907752 page_service: add benchmark for batching
This PR adds a benchmark to demonstrate the effect of server-side
getpage request batching added in https://github.com/neondatabase/neon/pull/9321.

Refs:

- Epic: https://github.com/neondatabase/neon/issues/9376
- Extracted from https://github.com/neondatabase/neon/pull/9792
2024-11-20 14:18:42 +01:00
Christian Schwarz
75041cb61b bench fixups 2024-11-20 13:59:21 +01:00
Christian Schwarz
e80ce970f7 collect CPU utilization 2024-11-20 13:55:05 +01:00
Christian Schwarz
f2de5b504f make it a proper benchmark 2024-11-20 13:52:05 +01:00
Folke Behrens
bf7d859a8b proxy: Rename RequestMonitoring to RequestContext (#9805)
## Problem

It is called context/ctx everywhere and the Monitoring suffix needlessly
confuses with proper monitoring code.

## Summary of changes

* Rename RequestMonitoring to RequestContext
* Rename RequestMonitoringInner to RequestContextInner
2024-11-20 12:50:36 +00:00
Alexander Bayandin
899933e159 scan_log_for_errors: check that regex is correct (#9815)
## Problem

I've noticed that we have 2 flaky tests which failed with error:
```
re.error: missing ), unterminated subpattern at position 21
```

- `test_timeline_archival_chaos` — has been already fixed 
- `test_sharded_tad_interleaved_after_partial_success` — I didn't manage
to find the incorrect regex

[Internal link](https://neonprod.grafana.net/goto/yfmVHV7NR?orgId=1) 

## Summary of changes
- Wrap `re.match` in `try..except` block and print incorrect regex
2024-11-20 12:48:21 +00:00
Alexander Bayandin
46beecacce CI(benchmarking): route test failures to on-call-qa-staging-stream (#9813)
## Problem

We want to keep `#on-call-staging-stream` channel close to the prod one
and redirect notifications from failing benchmarks to another channel
for investigation.

## Summary of changes
- Send notifications regarding failures in `benchmarking` job to
`#on-call-staging-stream`
- Send notifications regarding failures in `periodic_pagebench` job to
`#on-call-staging-stream`
2024-11-20 12:23:41 +00:00
Fedor Dikarev
94e4a0e2a0 update macos version for runner (#9817)
Closes: https://github.com/neondatabase/neon/issues/9816

Run MacOs builds on `macos-15`.
As `pkg-config` is bundled in runner image, don't install it with `brew`
2024-11-20 13:04:14 +01:00
Christian Schwarz
b9746168ff switch back to tokio::time::sleep, to get the numbers
=> https://www.notion.so/neondatabase/benchmarking-notes-143f189e004780c4a630cb5f426e39ba?pvs=4#144f189e00478054b8a3e325735ffa19

=> unacceptable
2024-11-20 12:50:29 +01:00
Christian Schwarz
5cc0059088 parametrize more test 2024-11-20 12:48:47 +01:00
John Spray
33dce25af8 safekeeper: block deletion on protocol handler shutdown (#9364)
## Problem

Two recently observed log errors indicate safekeeper tasks for a
timeline running after that timeline's deletion has started.
- https://github.com/neondatabase/neon/issues/8972
- https://github.com/neondatabase/neon/issues/8974

These code paths do not have a mechanism that coordinates task shutdown
with the overall shutdown of the timeline.

## Summary of changes

- Add a `Gate` to `Timeline`
- Take the gate as part of resident timeline guard: any code that holds
a guard over a timeline staying resident should also hold a guard over
the timeline's total lifetime.
- Take the gate from the wal removal task
- Respect Timeline::cancel in WAL send/recv code, so that we do not
block shutdown indefinitely.
- Add a test that deletes timelines with open pageserver+compute
connections, to check these get torn down as expected.

There is some risk to introducing gates: if there is code holding a gate
which does not properly respect a cancellation token, it can cause
shutdown hangs. The risk of this for safekeepers is lower in practice
than it is for other services, because in a healthy timeline deletion,
the compute is shutdown first, then the timeline is deleted on the
pageserver, and finally it is deleted on the safekeepers -- that makes
it much less likely that some protocol handler will still be running.

Closes: #8972
Closes: #8974
2024-11-20 11:07:45 +00:00
Conrad Ludgate
3ae0b2149e chore(proxy): demote a ton of logs for successful connection attempts (#9803)
See https://github.com/neondatabase/cloud/issues/14378

In collaboration with @cloneable and @awarus, we sifted through logs and
simply demoted some logs to debug. This is not at all finished and there
are more logs to review, but we ran out of time in the session we
organised. In any slightly more nuanced cases, we didn't touch the log,
instead leaving a TODO comment.
2024-11-20 10:14:28 +00:00
Arpad Müller
0a499a3176 Don't preload offloaded timelines (#9646)
In timeline preloading, we also do a preload for offloaded timelines.
This includes the download of `index-part.json`. Ultimately, such a
download is wasteful, therefore avoid it. Same goes for the remote
client, we just discard it immediately thereafter.

Part of #8088

---------

Co-authored-by: Christian Schwarz <christian@neon.tech>
2024-11-20 05:44:23 +00:00
Matthias van de Meent
ea1858e3b6 compute_ctl: Streamline and Pipeline startup SQL (#9717)
Before, compute_ctl didn't have a good registry for what command would
run when, depending exclusively on sync code to apply changes. When
users have many databases/roles to manage, this step can take a
substantial amount of time, breaking assumptions about low (re)start
times in other systems.

This commit reduces the time compute_ctl takes to restart when changes
must be applied, by making all commands more or less blind writes, and
applying these commands in an asynchronous context, only waiting for
completion once we know the commands have all been sent.

Additionally, this reduces time spent by batching per-database
operations where previously we would create a new SQL connection for
every user-database operation we planned to execute.
2024-11-20 02:14:58 +01:00
Alexander Bayandin
2281a02c49 CODEOWNERS: add developer-productivity team (#9810)
Notify @neondatabase/developer-productivity team about changes in CI
(i.e. in `.github/` directory)
2024-11-20 00:30:24 +00:00
Alexander Bayandin
725e0a1ac9 CI(release): create reusable workflow for releases (#9806)
## Problem

We have a bunch of duplicated code for automated releases. There will be
even more, once we have `release-compute` branch
(https://github.com/neondatabase/neon/pull/9637).

Another issue with the current `release` workflow is that it creates a
PR from the main as is. If we create 2 different releases from the
same commit, GitHub could mix up results from different PRs.

## Summary of changes
- Create a reusable workflow for releases
- Create an empty commit to differentiate releases
2024-11-19 23:03:15 +00:00
Konstantin Knizhnik
770ac34ae6 Register custom xlog reader callbacks for on-demand WAL download in StartupDecodingContext (#9007)
## Problem

See https://github.com/neondatabase/neon/issues/8931
On-demand WAL download are not set in all cases where WAL is accessed by
logical replication

## Summary of changes

Set customer xlog reader handles in StartupDecodingContext

Related changes in Postgres modules:

https://github.com/neondatabase/postgres/pull/495
https://github.com/neondatabase/postgres/pull/496
https://github.com/neondatabase/postgres/pull/497
https://github.com/neondatabase/postgres/pull/498

## Checklist before requesting a review

- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.

## Checklist before merging

- [ ] Do not forget to reformat commit message to not include the above
checklist

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2024-11-19 22:29:57 +02:00
Alex Chi Z.
b22a84a7bf feat(pageserver): support key range for manual compaction trigger (#9723)
part of https://github.com/neondatabase/neon/issues/9114, we want to be
able to run partial gc-compaction in tests. In the future, we can also
expand this functionality to legacy compaction, so that we can trigger
compaction for a specific key range.

## Summary of changes

* Support passing compaction key range through pageserver routes.
* Refactor input parameters of compact related function to take the new
`CompactOptions`.
* Add tests for partial compaction. Note that the test may or may not
trigger compaction based on GC horizon. We need to improve the test case
to ensure things always get below the gc_horizon and the gc-compaction
can be triggered.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
2024-11-19 19:38:41 +00:00
Arpad Müller
b092126c94 scrubber: fix parsing issue with Azure (#9797)
Apparently Azure returns timelines ending with `/` which confuses the
parsing. So remove all trailing `/`s before attempting to parse.

Part of https://github.com/neondatabase/cloud/issues/19963
2024-11-19 20:10:53 +01:00
Alex Chi Z.
5e3fbef721 fix(pageserver): queue stopped error should be ignored during create timeline (#9767)
close https://github.com/neondatabase/neon/issues/9730

The test case tests if anything goes wrong during pageserver restart +
*during timeline creation not complete*. Therefore, queue is stopped
error is normal in this case, except that it should be categorized as a
shutdown error instead of a real error.

## Summary of changes

* More comments for the test case.
* Queue stopped error will now be forwarded as
CreateTimelineError::ShuttingDown.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
2024-11-19 14:10:09 -05:00
dependabot[bot]
15468cd23c build(deps): bump aiohttp from 3.10.2 to 3.10.11 (#9794) 2024-11-19 19:08:00 +00:00
Christian Schwarz
911946a3cd fixes 2024-11-19 19:01:55 +01:00
Christian Schwarz
61ff84a3a2 compiles 2024-11-19 18:40:56 +01:00
Peter Bendel
a8ac895b83 re-acquire S3 OIDC token after long running tests for report upload to S3 (#9799)
## Problem

If a benchmark or test-case runs longer than the AWS OIDC token lifetime
successive upload of test reports to S3 fail - example:


https://github.com/neondatabase/neon/actions/runs/11905529176/job/33176168174#step:9:243

## Summary of changes

In actions that require access to S3 and which are invoked after a long
running python testcase we re-acquire the OIDC token explicitly.
Note that we need to pass down the aws_oicd_role_arn from the workflow
to the action because actions have no access to GitHub vars for security
reasons.

Sample run
https://github.com/neondatabase/neon/actions/runs/11912328276/job/33195676867
2024-11-19 18:22:51 +01:00
Heikki Linnakangas
ada84400b7 PostgreSQL minor version updates (17.2, 16.6, 15.10, 14.15) (#9795)
The community decided to make a new off-schedule release due to ABI
breakage in last week's release. We're not affected by the ABI
breakage because we rebuild all extensions in our docker images, but
let's stay up-to-date. There were a few other fixes in the release
too.
2024-11-19 17:01:05 +02:00
Conrad Ludgate
191f745c81 fix(proxy/auth_broker): ignore -pooler suffix (#9800)
Fixes https://github.com/neondatabase/cloud/issues/20400

We cannot mix local_proxy and pgbouncer, so we are filtering out the
`-pooler` suffix prior to calling wake_compute.
2024-11-19 13:58:26 +00:00
Conrad Ludgate
37b97b3a68 chore(local_proxy): reduce some startup logging (#9798)
Currently, local_proxy will write an error log if it doesn't find the
config file. This is expected for startup, so it's just noise. It is an
error if we do receive an explicit SIGHUP though.

I've also demoted the build info logs to be debug level. We don't need
them in the compute image since we have other ways to determine what
code is running.

Lastly, I've demoted SIGHUP signal handling from warn to info, since
it's not really a warning event.

See https://github.com/neondatabase/cloud/issues/10880 for more details
2024-11-19 13:58:11 +00:00
Konstantin Knizhnik
c9acd214ae Do not create DSM segment for wal_redo_postgres (#9793)
## Problem

See  https://github.com/neondatabase/neon/issues/9738

## Summary of changes

Do not create DSM segment for wal_redo Postgres

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2024-11-19 11:56:40 +02:00
Peter Bendel
982cb1c15d Move logic for ingest benchmark from GitHub workflow into python testcase (#9762)
## Problem

The first version of the ingest benchmark had some parsing and reporting
logic in shell script inside GitHub workflow.
it is better to move that logic into a python testcase so that we can
also run it locally.

## Summary of changes

- Create new python testcase
- invoke pgcopydb inside python test case
- move the following logic into python testcase
  - determine backpressure
  - invoke pgcopydb and report its progress
  - parse pgcopydb log and extract metrics
  - insert metrics into perf test database
 
- add additional column to perf test database that can receive endpoint
ID used for pgcopydb run to have it available in grafana dashboard when
retrieving other metrics for an endpoint

## Example run


https://github.com/neondatabase/neon/actions/runs/11860622170/job/33056264386
2024-11-19 09:46:46 +00:00
Christian Schwarz
15e21c714b got it working and turn it more into a benchmark 2024-11-18 23:57:14 +01:00
Christian Schwarz
0689965282 WIP: page_service: add basic testcase for merging
The steps in the test work in neon_local + psql
but for some reason they don't work in the test.

Asked compute team on Slack for help:
https://neondb.slack.com/archives/C04DGM6SMTM/p1731952688386789
2024-11-18 23:57:14 +01:00
Arpad Müller
9b6af2bcad Add the ability to configure GenericRemoteStorage for the scrubber (#9652)
Earlier work (#7547) has made the scrubber internally generic, but one
could only configure it to use S3 storage.

This is the final piece to make (most of, snapshotting still requires
S3) the scrubber be able to be configured via GenericRemoteStorage.

I.e. you can now set an env var like:

```
REMOTE_STORAGE_CONFIG='remote_storage = { bucket_name = "neon-dev-safekeeper-us-east-2d", bucket_region = "us-east-2" }
```

and the scrubber will read it instead.
2024-11-18 21:01:48 +00:00
Arpad Müller
4fc3af15dd Remove at most one retain_lsn entry from (possibly offloaded) timelne's parent (#9791)
There is a potential data corruption issue, not one I've encountered,
but it's still not hard to hit with some correct looking code given our
current architecture. It has to do with the timeline's memory object storage
via reference counted `Arc`s, and the removal of `retain_lsn` entries at
the drop of the last `Arc` reference.

The corruption steps are as follows:

1. timeline gets offloaded. timeline object A doesn't get dropped
though, because some long-running task accesses it
2. the same timeline gets unoffloaded again. timeline object B gets
created for it, timeline object A still referenced. both point to the
same timeline.
3. the task keeping the reference to timeline object A exits. destructor
for object A runs, removing `retain_lsn` in the timeline's parent.
4. the timeline's parent runs gc without the `retain_lsn` of the still
exant timleine's child, leading to data corruption.

In general we are susceptible each time when we recreate a `Timeline`
object in the same process, which happens both during a timeline
offload/unoffload cycle, as well as during an ancestor detach operation.

The solution this PR implements is to make the destructor for a timeline
as well as an offloaded timeline remove at most one `retain_lsn`.

PR #9760 has added a log line to print the refcounts at timeline
offload, but this only detects one of the places where we do such a
recycle operation. Plus it doesn't prevent the actual issue.

I doubt that this occurs in practice. It is more a defense in depth measure.
Usually I'd assume that the timeline gets dropped immediately in step 1,
as there is no background tasks referencing it after its shutdown.
But one never knows, and reducing the stakes of step 1 actually occurring
is a really good idea, from potential data corruption to waste of CPU time.

Part of #8088
2024-11-18 21:42:19 +01:00
Vlad Lazar
d7662fdc7b feat(page_service): timeout-based batching of requests (#9321)
## Problem

We don't take advantage of queue depth generated by the compute
on the pageserver. We can process getpage requests more efficiently
by batching them. 

## Summary of changes

Batch up incoming getpage requests that arrive within a configurable
time window (`server_side_batch_timeout`).
Then process the entire batch via one `get_vectored` timeline operation.
By default, no merging takes place.

## Testing

* **Functional**: https://github.com/neondatabase/neon/pull/9792
* **Performance**: will be done in staging/pre-prod

# Refs

* https://github.com/neondatabase/neon/issues/9377
* https://github.com/neondatabase/neon/issues/9376

Co-authored-by: Christian Schwarz <christian@neon.tech>
2024-11-18 20:24:03 +00:00
Alex Chi Z.
e5c89f3da3 feat(pageserver): drop disposable keys during gc-compaction (#9765)
close https://github.com/neondatabase/neon/issues/9552, close
https://github.com/neondatabase/neon/issues/8920, part of
https://github.com/neondatabase/neon/issues/9114

## Summary of changes

* Drop keys not belonging to this shard during gc-compaction to avoid
constructing history that might have been truncated during shard
compaction.
* Run gc-compaction at the end of shard compaction test.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
2024-11-18 19:27:52 +00:00
Alexey Kondratov
5f0e9c9a94 feat(compute/tests): Report successful replication test runs as well (#9787)
It should increase the visibility of whether they run and pass.
2024-11-18 16:05:09 +00:00
Alexander Bayandin
44f33b2bd6 Bump default Postgres version for tests to v17 (#9777)
## Problem
Tests that are marked with `run_only_on_default_postgres` do not run on
debug builds on CI because we run debug builds only for the latest
Postgres version (which is 17)

## Summary of changes
- Bump `PgVersion.DEFAULT` to `v17`
- Skip `test_timeline_archival_chaos` in debug builds
2024-11-18 15:06:24 +00:00
Alexander Bayandin
913b5b7027 CI: remove separate check-build-tools-image workflow (#9708)
## Problem

We call `check-build-tools-image` twice for each workflow whenever we
use it, along with `build-build-tools-image`, once as a workflow itself,
and the second time from `build-build-tools-image`. This is not
necessary.

## Summary of changes
- Inline `check-build-tools-image` into `build-build-tools-image`
- Remove separate `check-build-tools-image` workflow
2024-11-18 13:14:28 +00:00
John Spray
3f401a328f tests: mitigate bug to stabilize test_storage_controller_many_tenants (#9771)
## Problem

Due to #9471 , the scale test occasionally gets 404s while trying to
modify the config of a timeline that belongs to a tenant being migrated.
We rarely see this narrow race in the field, but the test is quite good
at reproducing it.

## Summary of changes

- Ignore 404 errors in this test.
2024-11-18 11:33:27 +00:00
Peter Bendel
c3eecf6763 adapt pgvector bench to minor version upgrades of PostgreSql (#9784)
## Problem

pgvector benchmark is failing because after PostgreSQL minor version
upgrade previous version packages are no longer available in deb
repository

[example
failure](https://github.com/neondatabase/neon/actions/runs/11875503070/job/33092787149#step:4:40)

## Summary of changes

Update postgres minor version of packages to current version

[Example run after this
change](https://github.com/neondatabase/neon/actions/runs/11888978279/job/33124614605)
2024-11-18 10:47:43 +00:00
Konstantin Knizhnik
6fa9b0cd8c Use DATA_DIR instead of current workign directory in restore_from_wal script (#9729)
## Problem

See https://github.com/neondatabase/neon/issues/7750

test_wal_restore.sh is copying file to current working directory which
can cause interfere of test_wa_restore.py tests spawned of different
configurations.

## Summary of changes

Copy file to $DATA_DIR

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2024-11-18 11:55:38 +02:00
a-masterov
10bc1903e1 Fix the regression test running against the staging instance (#9773)
## Problem
The Postgres version was updated. The patch has to be updated
accordingly.
## Summary of changes
The patch of the regression test was updated.
2024-11-18 10:30:50 +01:00
John Spray
261d065e6f pageserver: respect no_sync in VirtualFile (#9772)
## Problem

`no_sync` initially just skipped syncfs on startup (#9677). I'm also
interested in flaky tests that time out during pageserver shutdown while
flushing l0s, so to eliminate disk throughput as a source of issues
there,

## Summary of changes

- Drive-by change for test timeouts: add a couple more ::info logs
during pageserver startup so it's obvious which part got stuck.
- Add a SyncMode enum to configure VirtualFile and respect it in
sync_all and sync_data functions
- During pageserver startup, set SyncMode according to `no_sync`
2024-11-18 08:59:05 +00:00
Christian Schwarz
b6154b03f4 build(deps): bump smallvec to 1.13.2 to get UB fix (#9781)
Smallvec 1.13.2 contains [an UB
fix](https://github.com/servo/rust-smallvec/pull/345).

Upstream opened [a
request](https://github.com/rustsec/advisory-db/issues/1960)
for this in the advisory-db but it never got acted upon.

Found while working on https://github.com/neondatabase/neon/pull/9321.
2024-11-17 21:25:16 +01:00
Erik Grinaker
8880134171 Cargo.toml: upgrade tikv-jemallocator to 0.6.0 (#9779) 2024-11-17 19:52:05 +01:00
Erik Grinaker
de7e4a34ca safekeeper: send AppendResponse on segment flush (#9692)
## Problem

When processing pipelined `AppendRequest`s, we explicitly flush the WAL
every second and return an `AppendResponse`. However, the WAL is also
implicitly flushed on segment bounds, but this does not result in an
`AppendResponse`. Because of this, concurrent transactions may take up
to 1 second to commit and writes may take up to 1 second before sending
to the pageserver.

## Summary of changes

Advance `flush_lsn` when a WAL segment is closed and flushed, and emit
an `AppendResponse`. To accommodate this, track the `flush_lsn` in
addition to the `flush_record_lsn`.
2024-11-17 18:19:14 +01:00
Vlad Lazar
ac689ab014 wal_decoder: rename end_lsn to next_record_lsn (#9776)
## Problem

It turns out that `WalStreamDecoder::poll_decode` returns the start LSN
of the next record and not the end LSN of the current record. They are
not always equal. For example, they're not equal when the record in
question is an XLOG SWITCH record.

## Summary of changes

Rename things to reflect that.
2024-11-15 21:53:11 +00:00
Tristan Partin
23eabb9919 Fix PG_MAJORVERSION_NUM typo
In ea32f1d0a3, Matthias added a feature to
our extension to expose more granular wait events. However, due to the
typo, those wait events were never registered, so we used the more
generic wait events instead.

Signed-off-by: Tristan Partin <tristan@neon.tech>
2024-11-15 15:17:23 -06:00
Vlad Lazar
2af791ba83 wal_decoder: make InterpretedWalRecord serde (#9775)
## Problem

We want to serialize interpreted records to send them over the wire from
safekeeper to pageserver.

## Summary of changes

Make `InterpretedWalRecord` ser/de. This is a temporary change to get
the bulk of the lift merged in
https://github.com/neondatabase/neon/pull/9746. For going to prod, we
don't want to use bincode since we can't evolve the schema.
Questions on serialization will be tackled separately.
2024-11-15 20:34:48 +00:00
Mikhail Kot
e12628fe93 Collect max_connections metric (#9770)
This will further allow us to expose this metric to users
2024-11-15 17:42:41 +00:00
Arpad Müller
7880c246f1 Correct mistakes in offloaded timeline retain_lsn management (#9760)
PR #9308 has modified tenant activation code to take offloaded child
timelines into account for populating the list of `retain_lsn` values.
However, there is more places than just tenant activation where one
needs to update the `retain_lsn`s.

This PR fixes some bugs of the current code that could lead to
corruption in the worst case:

1. Deleting of an offloaded timeline would not get its `retain_lsn`
purged from its parent. With the patch we now do it, but as the parent
can be offloaded as well, the situatoin is a bit trickier than for
non-offloaded timelines which can just keep a pointer to their parent.
Here we can't keep a pointer because the parent might get offloaded,
then unoffloaded again, creating a dangling pointer situation. Keeping a
pointer to the *tenant* is not good either, because we might drop the
offloaded timeline in a context where a `offloaded_timelines` lock is
already held: so we don't want to acquire a lock in the drop code of
OffloadedTimeline.
2. Unoffloading a timeline would not get its `retain_lsn` values
populated, leading to it maybe garbage collecting values that its
children might need. We now call `initialize_gc_info` on the parent.
3. Offloading of a timeline would not get its `retain_lsn` values
registered as offloaded at the parent. So if we drop the `Timeline`
object, and its registration is removed, the parent would not have any
of the child's `retain_lsn`s around. Also, before, the `Timeline` object
would delete anything related to its timeline ID, now it only deletes
`retain_lsn`s that have `MaybeOffloaded::No` set.

Incorporates Chi's reproducer from #9753. cc
https://github.com/neondatabase/cloud/issues/20199

The `test_timeline_retain_lsn` test is extended:

1. it gains a new dimension, duplicating each mode, to either have the
"main" branch be the direct parent of the timeline we archive, or the
"test_archived_parent" branch intermediary, creating a three timeline
structure. This doesn't test anything fixed by this PR in particular,
just explores the vast space of possible configurations a little bit
more.
2. it gains two new modes, `offload-parent`, which tests the second
point, and `offload-no-restart` which tests the third point.

It's easy to verify the test actually is "sharp" by removing one of the
respective `self.initialize_gc_info()`, `gc_info.insert_child()` or
`ancestor_children.push()`.

Part of #8088

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
Co-authored-by: Alex Chi Z <chi@neon.tech>
2024-11-15 14:22:29 +01:00
John Spray
04938d9d55 tests: tolerate pageserver 500s in test_timeline_archival_chaos (#9769)
## Problem

Test exposes cases where pageserver gives 500 responses, causing
failures like
https://neon-github-public-dev.s3.amazonaws.com/reports/pr-9766/11844529470/index.html#suites/d1acc79950edeb0563fc86236c620898/3546be2ffed99ba6

## Summary of changes

- Tolerate such messages, and link an issue for cleaning up the
pageserver not to return such 500s.
2024-11-15 13:22:05 +00:00
Erik Grinaker
19f7d40c1d deny.toml: allow CDDL-1.0 license (#9766)
#9764, which adds profiling support to Safekeeper, pulls in the
dependency [`inferno`](https://crates.io/crates/inferno) via
[`pprof-rs`](https://crates.io/crates/pprof). This is licenced under the
[Common Development and Distribution License
1.0](https://spdx.org/licenses/CDDL-1.0.html), which is not allowed by
`cargo-deny`.

This patch allows the CDDL-1.0 license. It is a derivative of the
Mozilla Public License, which we already allow, but avoids some issues
around European copyright law that the MPL had. As such, I don't expect
this to be problematic.
2024-11-15 10:41:43 +00:00
John Spray
38563de7dd storcon: exclude non-Active tenants from shard autosplitting (#9743)
## Problem

We didn't have a neat way to prevent auto-splitting of tenants. This
could be useful during incidents or for testing.

Closes https://github.com/neondatabase/neon/issues/9332

## Summary of changes

- Filter splitting candidates by scheduling policy
2024-11-14 19:41:10 +00:00
John Spray
93939f123f tests: add test_timeline_archival_chaos (#9609)
## Problem

- We lack test coverage of cases where multiple timelines fight for
updates to the same manifest
(https://github.com/neondatabase/neon/pull/9557), and in timeline
archival changes while dual-attached
(https://github.com/neondatabase/neon/pull/9555)

## Summary of changes

- Add a chaos test for timeline creation->archival->offload->deletion
2024-11-14 17:31:35 +00:00
Tristan Partin
49b599c113 Remove the replication slot in test_snap_files at the end of the test
Analysis of the LR benchmarking tests indicates that in the duration of
test_subscriber_lag, a leftover 'slotter' replication slot can lead to
retained WAL growing on the publisher. This replication slot is not used
by any subscriber. The only purpose of the slot is to generate snapshot
files for the puspose of test_snap_files.

Signed-off-by: Tristan Partin <tristan@neon.tech>
2024-11-14 10:59:15 -06:00
Yuchen Liang
8cde37bc0b test: disable test_readonly_node_gc until proper fix (#9755)
## Problem

After investigation, we think to make `test_readonly_node_gc` less
flaky, we need to make a proper fix (likely involving persisting part of
the lease state). See https://github.com/neondatabase/neon/issues/9754
for details.

## Summary of changes

- skip the test until proper fix.

Signed-off-by: Yuchen Liang <yuchen@neon.tech>
2024-11-14 15:26:58 +00:00
Konstantin Knizhnik
f70611c8df Correctly truncate VM (#9342)
## Problem

https://github.com/neondatabase/neon/issues/9240

## Summary of changes

Correctly truncate VM page instead just replacing it with zero page.

## Checklist before requesting a review

- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.

## Checklist before merging

- [ ] Do not forget to reformat commit message to not include the above
checklist

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
Co-authored-by: Heikki Linnakangas <heikki@neon.tech>
2024-11-14 17:19:13 +02:00
Vlad Lazar
21282aa113 cargo: use neon branch of rust-postgres (#9757)
## Problem

We are pining our fork of rust-postgres to a commit hash and that
prevents us from making
further changes to it. The latest commit in rust-postgres requires
https://github.com/neondatabase/neon/pull/8747,
but that seems to have gone stale. I reverted rust-postgres `neon`
branch to the pinned commit in
https://github.com/neondatabase/rust-postgres/pull/31.

## Summary of changes

Switch back to using the `neon` branch of the rust-postgres fork.
2024-11-14 15:16:43 +00:00
380 changed files with 25936 additions and 5014 deletions

View File

@@ -46,6 +46,9 @@ workspace-members = [
"utils",
"wal_craft",
"walproposer",
"postgres-protocol2",
"postgres-types2",
"tokio-postgres2",
]
# Write out exact versions rather than a semver range. (Defaults to false.)

View File

@@ -7,6 +7,10 @@ inputs:
type: boolean
required: false
default: false
aws_oicd_role_arn:
description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
required: false
default: ''
outputs:
base-url:
@@ -79,6 +83,14 @@ runs:
ALLURE_VERSION: 2.27.0
ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777
- name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ inputs.aws_oicd_role_arn }}
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
# Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
- name: Acquire lock
shell: bash -euxo pipefail {0}

View File

@@ -8,6 +8,10 @@ inputs:
unique-key:
description: 'string to distinguish different results in the same run'
required: true
aws_oicd_role_arn:
description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
required: false
default: ''
runs:
using: "composite"
@@ -31,6 +35,14 @@ runs:
env:
REPORT_DIR: ${{ inputs.report-dir }}
- name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ inputs.aws_oicd_role_arn }}
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
- name: Upload test results
shell: bash -euxo pipefail {0}
run: |

View File

@@ -36,8 +36,8 @@ inputs:
description: 'Region name for real s3 tests'
required: false
default: ''
rerun_flaky:
description: 'Whether to rerun flaky tests'
rerun_failed:
description: 'Whether to rerun failed tests'
required: false
default: 'false'
pg_version:
@@ -48,6 +48,10 @@ inputs:
description: 'benchmark durations JSON'
required: false
default: '{}'
aws_oicd_role_arn:
description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
required: false
default: ''
runs:
using: "composite"
@@ -104,7 +108,7 @@ runs:
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
RERUN_FLAKY: ${{ inputs.rerun_flaky }}
RERUN_FAILED: ${{ inputs.rerun_failed }}
PG_VERSION: ${{ inputs.pg_version }}
shell: bash -euxo pipefail {0}
run: |
@@ -150,15 +154,8 @@ runs:
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
fi
if [ "${RERUN_FLAKY}" == "true" ]; then
mkdir -p $TEST_OUTPUT
poetry run ./scripts/flaky_tests.py "${TEST_RESULT_CONNSTR}" \
--days 7 \
--output "$TEST_OUTPUT/flaky.json" \
--pg-version "${DEFAULT_PG_VERSION}" \
--build-type "${BUILD_TYPE}"
EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS"
if [ "${RERUN_FAILED}" == "true" ]; then
EXTRA_PARAMS="--reruns 2 $EXTRA_PARAMS"
fi
# We use pytest-split plugin to run benchmarks in parallel on different CI runners
@@ -222,6 +219,13 @@ runs:
# (for example if we didn't run the test for non build-and-test workflow)
skip-if-does-not-exist: true
- name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ inputs.aws_oicd_role_arn }}
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
- name: Upload test results
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-store

View File

@@ -19,8 +19,8 @@ on:
description: 'debug or release'
required: true
type: string
pg-versions:
description: 'a json array of postgres versions to run regression tests on'
test-cfg:
description: 'a json object of postgres versions and lfc states to run regression tests on'
required: true
type: string
@@ -276,14 +276,14 @@ jobs:
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
strategy:
fail-fast: false
matrix:
pg_version: ${{ fromJson(inputs.pg-versions) }}
matrix: ${{ fromJSON(format('{{"include":{0}}}', inputs.test-cfg)) }}
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Pytest regression tests
continue-on-error: ${{ matrix.lfc_state == 'with-lfc' }}
uses: ./.github/actions/run-python-test-set
timeout-minutes: 60
with:
@@ -293,13 +293,14 @@ jobs:
run_with_real_s3: true
real_s3_bucket: neon-github-ci-tests
real_s3_region: eu-central-1
rerun_flaky: true
rerun_failed: true
pg_version: ${{ matrix.pg_version }}
env:
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
BUILD_TAG: ${{ inputs.build-tag }}
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
# Temporary disable this step until we figure out why it's so flaky
# Ref https://github.com/neondatabase/neon/issues/4540

View File

@@ -0,0 +1,79 @@
name: Create Release PR
on:
workflow_call:
inputs:
component-name:
description: 'Component name'
required: true
type: string
release-branch:
description: 'Release branch'
required: true
type: string
secrets:
ci-access-token:
description: 'CI access token'
required: true
defaults:
run:
shell: bash -euo pipefail {0}
jobs:
create-storage-release-branch:
runs-on: ubuntu-22.04
permissions:
contents: write # for `git push`
steps:
- uses: actions/checkout@v4
with:
ref: main
- name: Set variables
id: vars
env:
COMPONENT_NAME: ${{ inputs.component-name }}
RELEASE_BRANCH: ${{ inputs.release-branch }}
run: |
today=$(date +'%Y-%m-%d')
echo "title=${COMPONENT_NAME} release ${today}" | tee -a ${GITHUB_OUTPUT}
echo "rc-branch=rc/${RELEASE_BRANCH}/${today}" | tee -a ${GITHUB_OUTPUT}
- name: Configure git
run: |
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
- name: Create RC branch
env:
RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
TITLE: ${{ steps.vars.outputs.title }}
run: |
git checkout -b "${RC_BRANCH}"
# create an empty commit to distinguish workflow runs
# from other possible releases from the same commit
git commit --allow-empty -m "${TITLE}"
git push origin "${RC_BRANCH}"
- name: Create a PR into ${{ inputs.release-branch }}
env:
GH_TOKEN: ${{ secrets.ci-access-token }}
RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
RELEASE_BRANCH: ${{ inputs.release-branch }}
TITLE: ${{ steps.vars.outputs.title }}
run: |
cat << EOF > body.md
## ${TITLE}
**Please merge this Pull Request using 'Create a merge commit' button**
EOF
gh pr create --title "${TITLE}" \
--body-file "body.md" \
--head "${RC_BRANCH}" \
--base "${RELEASE_BRANCH}"

View File

@@ -122,6 +122,7 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
# Set --sparse-ordering option of pytest-order plugin
# to ensure tests are running in order of appears in the file.
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
@@ -133,6 +134,7 @@ jobs:
--ignore test_runner/performance/test_perf_pgvector_queries.py
--ignore test_runner/performance/test_logical_replication.py
--ignore test_runner/performance/test_physical_replication.py
--ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -149,12 +151,14 @@ jobs:
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: |
Periodic perf testing: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
@@ -210,6 +214,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 5400
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -226,6 +231,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 5400
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -237,11 +243,13 @@ jobs:
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
# Post both success and failure to the Slack channel
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
if: ${{ github.event.schedule }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C06T9AMNDQQ" # on-call-compute-staging-stream
@@ -444,6 +452,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -458,6 +467,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -472,6 +482,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -488,12 +499,14 @@ jobs:
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: |
Periodic perf testing on ${{ matrix.platform }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
@@ -528,7 +541,7 @@ jobs:
runs-on: ${{ matrix.RUNNER }}
container:
image: neondatabase/build-tools:pinned
image: neondatabase/build-tools:pinned-bookworm
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -545,12 +558,12 @@ jobs:
arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')
cd /home/nonroot
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.0-1.pgdg110+1_${arch}.deb"
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb"
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110+2_${arch}.deb"
dpkg -x libpq5_17.0-1.pgdg110+1_${arch}.deb pg
dpkg -x postgresql-16_16.4-1.pgdg110+2_${arch}.deb pg
dpkg -x postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb pg
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.2-1.pgdg120+1_${arch}.deb"
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.6-1.pgdg120+1_${arch}.deb"
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.6-1.pgdg120+1_${arch}.deb"
dpkg -x libpq5_17.2-1.pgdg120+1_${arch}.deb pg
dpkg -x postgresql-16_16.6-1.pgdg120+1_${arch}.deb pg
dpkg -x postgresql-client-16_16.6-1.pgdg120+1_${arch}.deb pg
mkdir -p /tmp/neon/pg_install/v16/bin
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
@@ -598,6 +611,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -612,6 +626,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -621,12 +636,14 @@ jobs:
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: |
Periodic perf testing on ${{ env.PLATFORM }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
@@ -722,6 +739,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 43200 -k test_clickbench
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -734,12 +752,14 @@ jobs:
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: |
Periodic OLAP perf testing on ${{ matrix.platform }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
@@ -836,6 +856,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -846,12 +867,14 @@ jobs:
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: |
Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
@@ -934,6 +957,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -943,12 +967,14 @@ jobs:
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: |
Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>

View File

@@ -3,17 +3,23 @@ name: Build build-tools image
on:
workflow_call:
inputs:
image-tag:
description: "build-tools image tag"
required: true
archs:
description: "Json array of architectures to build"
# Default values are set in `check-image` job, `set-variables` step
type: string
required: false
debians:
description: "Json array of Debian versions to build"
# Default values are set in `check-image` job, `set-variables` step
type: string
required: false
outputs:
image-tag:
description: "build-tools tag"
value: ${{ inputs.image-tag }}
value: ${{ jobs.check-image.outputs.tag }}
image:
description: "build-tools image"
value: neondatabase/build-tools:${{ inputs.image-tag }}
value: neondatabase/build-tools:${{ jobs.check-image.outputs.tag }}
defaults:
run:
@@ -35,7 +41,48 @@ permissions: {}
jobs:
check-image:
uses: ./.github/workflows/check-build-tools-image.yml
runs-on: ubuntu-22.04
outputs:
archs: ${{ steps.set-variables.outputs.archs }}
debians: ${{ steps.set-variables.outputs.debians }}
tag: ${{ steps.set-variables.outputs.image-tag }}
everything: ${{ steps.set-more-variables.outputs.everything }}
found: ${{ steps.set-more-variables.outputs.found }}
steps:
- uses: actions/checkout@v4
- name: Set variables
id: set-variables
env:
ARCHS: ${{ inputs.archs || '["x64","arm64"]' }}
DEBIANS: ${{ inputs.debians || '["bullseye","bookworm"]' }}
IMAGE_TAG: |
${{ hashFiles('build-tools.Dockerfile',
'.github/workflows/build-build-tools-image.yml') }}
run: |
echo "archs=${ARCHS}" | tee -a ${GITHUB_OUTPUT}
echo "debians=${DEBIANS}" | tee -a ${GITHUB_OUTPUT}
echo "image-tag=${IMAGE_TAG}" | tee -a ${GITHUB_OUTPUT}
- name: Set more variables
id: set-more-variables
env:
IMAGE_TAG: ${{ steps.set-variables.outputs.image-tag }}
EVERYTHING: |
${{ contains(fromJson(steps.set-variables.outputs.archs), 'x64') &&
contains(fromJson(steps.set-variables.outputs.archs), 'arm64') &&
contains(fromJson(steps.set-variables.outputs.debians), 'bullseye') &&
contains(fromJson(steps.set-variables.outputs.debians), 'bookworm') }}
run: |
if docker manifest inspect neondatabase/build-tools:${IMAGE_TAG}; then
found=true
else
found=false
fi
echo "everything=${EVERYTHING}" | tee -a ${GITHUB_OUTPUT}
echo "found=${found}" | tee -a ${GITHUB_OUTPUT}
build-image:
needs: [ check-image ]
@@ -43,25 +90,12 @@ jobs:
strategy:
matrix:
debian-version: [ bullseye, bookworm ]
arch: [ x64, arm64 ]
arch: ${{ fromJson(needs.check-image.outputs.archs) }}
debian: ${{ fromJson(needs.check-image.outputs.debians) }}
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
env:
IMAGE_TAG: ${{ inputs.image-tag }}
steps:
- name: Check `input.tag` is correct
env:
INPUTS_IMAGE_TAG: ${{ inputs.image-tag }}
CHECK_IMAGE_TAG : ${{ needs.check-image.outputs.image-tag }}
run: |
if [ "${INPUTS_IMAGE_TAG}" != "${CHECK_IMAGE_TAG}" ]; then
echo "'inputs.image-tag' (${INPUTS_IMAGE_TAG}) does not match the tag of the latest build-tools image 'inputs.image-tag' (${CHECK_IMAGE_TAG})"
exit 1
fi
- uses: actions/checkout@v4
- uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193
@@ -88,14 +122,14 @@ jobs:
push: true
pull: true
build-args: |
DEBIAN_VERSION=${{ matrix.debian-version }}
cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.debian-version }}-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0}-{1},mode=max', matrix.debian-version, matrix.arch) || '' }}
DEBIAN_VERSION=${{ matrix.debian }}
cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.debian }}-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0}-{1},mode=max', matrix.debian, matrix.arch) || '' }}
tags: |
neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.debian-version }}-${{ matrix.arch }}
neondatabase/build-tools:${{ needs.check-image.outputs.tag }}-${{ matrix.debian }}-${{ matrix.arch }}
merge-images:
needs: [ build-image ]
needs: [ check-image, build-image ]
runs-on: ubuntu-22.04
steps:
@@ -106,16 +140,22 @@ jobs:
- name: Create multi-arch image
env:
DEFAULT_DEBIAN_VERSION: bullseye
IMAGE_TAG: ${{ inputs.image-tag }}
DEFAULT_DEBIAN_VERSION: bookworm
ARCHS: ${{ join(fromJson(needs.check-image.outputs.archs), ' ') }}
DEBIANS: ${{ join(fromJson(needs.check-image.outputs.debians), ' ') }}
EVERYTHING: ${{ needs.check-image.outputs.everything }}
IMAGE_TAG: ${{ needs.check-image.outputs.tag }}
run: |
for debian_version in bullseye bookworm; do
tags=("-t" "neondatabase/build-tools:${IMAGE_TAG}-${debian_version}")
if [ "${debian_version}" == "${DEFAULT_DEBIAN_VERSION}" ]; then
for debian in ${DEBIANS}; do
tags=("-t" "neondatabase/build-tools:${IMAGE_TAG}-${debian}")
if [ "${EVERYTHING}" == "true" ] && [ "${debian}" == "${DEFAULT_DEBIAN_VERSION}" ]; then
tags+=("-t" "neondatabase/build-tools:${IMAGE_TAG}")
fi
docker buildx imagetools create "${tags[@]}" \
neondatabase/build-tools:${IMAGE_TAG}-${debian_version}-x64 \
neondatabase/build-tools:${IMAGE_TAG}-${debian_version}-arm64
for arch in ${ARCHS}; do
tags+=("neondatabase/build-tools:${IMAGE_TAG}-${debian}-${arch}")
done
docker buildx imagetools create "${tags[@]}"
done

View File

@@ -77,15 +77,9 @@ jobs:
shell: bash
id: build-tag
check-build-tools-image:
needs: [ check-permissions ]
uses: ./.github/workflows/check-build-tools-image.yml
build-build-tools-image:
needs: [ check-build-tools-image ]
needs: [ check-permissions ]
uses: ./.github/workflows/build-build-tools-image.yml
with:
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
secrets: inherit
check-codestyle-python:
@@ -259,7 +253,14 @@ jobs:
build-tag: ${{ needs.tag.outputs.build-tag }}
build-type: ${{ matrix.build-type }}
# Run tests on all Postgres versions in release builds and only on the latest version in debug builds
pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16", "v17"]' || '["v17"]' }}
# run without LFC on v17 release only
test-cfg: |
${{ matrix.build-type == 'release' && '[{"pg_version":"v14", "lfc_state": "without-lfc"},
{"pg_version":"v15", "lfc_state": "without-lfc"},
{"pg_version":"v16", "lfc_state": "without-lfc"},
{"pg_version":"v17", "lfc_state": "without-lfc"},
{"pg_version":"v17", "lfc_state": "with-lfc"}]'
|| '[{"pg_version":"v17", "lfc_state": "without-lfc"}]' }}
secrets: inherit
# Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking

View File

@@ -1,51 +0,0 @@
name: Check build-tools image
on:
workflow_call:
outputs:
image-tag:
description: "build-tools image tag"
value: ${{ jobs.check-image.outputs.tag }}
found:
description: "Whether the image is found in the registry"
value: ${{ jobs.check-image.outputs.found }}
defaults:
run:
shell: bash -euo pipefail {0}
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {}
jobs:
check-image:
runs-on: ubuntu-22.04
outputs:
tag: ${{ steps.get-build-tools-tag.outputs.image-tag }}
found: ${{ steps.check-image.outputs.found }}
steps:
- uses: actions/checkout@v4
- name: Get build-tools image tag for the current commit
id: get-build-tools-tag
env:
IMAGE_TAG: |
${{ hashFiles('build-tools.Dockerfile',
'.github/workflows/check-build-tools-image.yml',
'.github/workflows/build-build-tools-image.yml') }}
run: |
echo "image-tag=${IMAGE_TAG}" | tee -a $GITHUB_OUTPUT
- name: Check if such tag found in the registry
id: check-image
env:
IMAGE_TAG: ${{ steps.get-build-tools-tag.outputs.image-tag }}
run: |
if docker manifest inspect neondatabase/build-tools:${IMAGE_TAG}; then
found=true
else
found=false
fi
echo "found=${found}" | tee -a $GITHUB_OUTPUT

View File

@@ -1,4 +1,4 @@
name: Benchmarking
name: benchmarking ingest
on:
# uncomment to run on push for debugging your PR
@@ -74,18 +74,16 @@ jobs:
compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Initialize Neon project and retrieve current backpressure seconds
- name: Initialize Neon project
if: ${{ matrix.target_project == 'new_empty_project' }}
env:
NEW_PROJECT_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
run: |
echo "Initializing Neon project with project_id: ${NEW_PROJECT_ID}"
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV
echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV
- name: Create Neon Branch for large tenant
if: ${{ matrix.target_project == 'large_existing_project' }}
@@ -95,266 +93,55 @@ jobs:
project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Initialize Neon project and retrieve current backpressure seconds
- name: Initialize Neon project
if: ${{ matrix.target_project == 'large_existing_project' }}
env:
NEW_PROJECT_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}
BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}
NEW_BRANCH_ID: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }}
run: |
echo "Initializing Neon branch with branch_id: ${NEW_BRANCH_ID}"
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
# Extract the part before the database name
base_connstr="${NEW_PROJECT_CONNSTR%/*}"
base_connstr="${BENCHMARK_INGEST_TARGET_CONNSTR%/*}"
# Extract the query parameters (if any) after the database name
query_params="${NEW_PROJECT_CONNSTR#*\?}"
query_params="${BENCHMARK_INGEST_TARGET_CONNSTR#*\?}"
# Reconstruct the new connection string
if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then
if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then
new_connstr="${base_connstr}/neondb?${query_params}"
else
new_connstr="${base_connstr}/neondb"
fi
${PSQL} "${new_connstr}" -c "drop database ludicrous;"
${PSQL} "${new_connstr}" -c "CREATE DATABASE ludicrous;"
if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then
NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous?${query_params}"
if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then
BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous?${query_params}"
else
NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous"
BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous"
fi
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV
echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV
- name: Create pgcopydb filter file
run: |
cat << EOF > /tmp/pgcopydb_filter.txt
[include-only-table]
public.events
public.emails
public.email_transmissions
public.payments
public.editions
public.edition_modules
public.sp_content
public.email_broadcasts
public.user_collections
public.devices
public.user_accounts
public.lessons
public.lesson_users
public.payment_methods
public.orders
public.course_emails
public.modules
public.users
public.module_users
public.courses
public.payment_gateway_keys
public.accounts
public.roles
public.payment_gateways
public.management
public.event_names
EOF
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV
- name: Invoke pgcopydb
- name: Invoke pgcopydb
uses: ./.github/actions/run-python-test-set
with:
build_type: remote
test_selection: performance/test_perf_ingest_using_pgcopydb.py
run_in_parallel: false
extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb
pg_version: v16
save_perf_report: true
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
run: |
export LD_LIBRARY_PATH=${PGCOPYDB_LIB_PATH}:${PG_16_LIB_PATH}
export PGCOPYDB_SOURCE_PGURI="${BENCHMARK_INGEST_SOURCE_CONNSTR}"
export PGCOPYDB_TARGET_PGURI="${NEW_PROJECT_CONNSTR}"
export PGOPTIONS="-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7"
${PG_CONFIG} --bindir
${PGCOPYDB} --version
${PGCOPYDB} clone --skip-vacuum --no-owner --no-acl --skip-db-properties --table-jobs 4 \
--index-jobs 4 --restore-jobs 4 --split-tables-larger-than 10GB --skip-extensions \
--use-copy-binary --filters /tmp/pgcopydb_filter.txt 2>&1 | tee /tmp/pgcopydb_${{ matrix.target_project }}.log
BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
TARGET_PROJECT_TYPE: ${{ matrix.target_project }}
# we report PLATFORM in zenbenchmark NeonBenchmarker perf database and want to distinguish between new project and large tenant
PLATFORM: "${{ matrix.target_project }}-us-east-2-staging"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
# create dummy pgcopydb log to test parsing
# - name: create dummy log for parser test
# run: |
# cat << EOF > /tmp/pgcopydb_${{ matrix.target_project }}.log
# 2024-11-04 18:00:53.433 500861 INFO main.c:136 Running pgcopydb version 0.17.10.g8361a93 from "/usr/lib/postgresql/17/bin/pgcopydb"
# 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1225 [SOURCE] Copying database from "postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
# 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1226 [TARGET] Copying database into "postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
# 2024-11-04 18:00:53.442 500861 INFO copydb.c:105 Using work dir "/tmp/pgcopydb"
# 2024-11-04 18:00:53.541 500861 INFO snapshot.c:107 Exported snapshot "00000008-00000033-1" from the source database
# 2024-11-04 18:00:53.556 500865 INFO cli_clone_follow.c:543 STEP 1: fetch source database tables, indexes, and sequences
# 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:716 Splitting source candidate tables larger than 10 GB
# 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:829 Table public.events is 96 GB large which is larger than --split-tables-larger-than 10 GB, and does not have a unique column of type integer: splitting by CTID
# 2024-11-04 18:01:05.538 500865 INFO copydb_schema.c:905 Table public.events is 96 GB large, 10 COPY processes will be used, partitioning on ctid.
# 2024-11-04 18:01:05.564 500865 INFO copydb_schema.c:905 Table public.email_transmissions is 27 GB large, 4 COPY processes will be used, partitioning on id.
# 2024-11-04 18:01:05.584 500865 INFO copydb_schema.c:905 Table public.lessons is 25 GB large, 4 COPY processes will be used, partitioning on id.
# 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:905 Table public.lesson_users is 16 GB large, 3 COPY processes will be used, partitioning on id.
# 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:761 Fetched information for 26 tables (including 4 tables split in 21 partitions total), with an estimated total of 907 million tuples and 175 GB on-disk
# 2024-11-04 18:01:05.687 500865 INFO copydb_schema.c:968 Fetched information for 57 indexes (supporting 25 constraints)
# 2024-11-04 18:01:05.753 500865 INFO sequences.c:78 Fetching information for 24 sequences
# 2024-11-04 18:01:05.903 500865 INFO copydb_schema.c:1122 Fetched information for 4 extensions
# 2024-11-04 18:01:06.178 500865 INFO copydb_schema.c:1538 Found 0 indexes (supporting 0 constraints) in the target database
# 2024-11-04 18:01:06.184 500865 INFO cli_clone_follow.c:584 STEP 2: dump the source database schema (pre/post data)
# 2024-11-04 18:01:06.186 500865 INFO pgcmd.c:468 /usr/lib/postgresql/16/bin/pg_dump -Fc --snapshot 00000008-00000033-1 --section=pre-data --section=post-data --file /tmp/pgcopydb/schema/schema.dump 'postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60'
# 2024-11-04 18:01:06.952 500865 INFO cli_clone_follow.c:592 STEP 3: restore the pre-data section to the target database
# 2024-11-04 18:01:07.004 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section pre-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/pre-filtered.list /tmp/pgcopydb/schema/schema.dump
# 2024-11-04 18:01:07.438 500874 INFO table-data.c:656 STEP 4: starting 4 table-data COPY processes
# 2024-11-04 18:01:07.451 500877 INFO vacuum.c:139 STEP 8: skipping VACUUM jobs per --skip-vacuum
# 2024-11-04 18:01:07.457 500875 INFO indexes.c:182 STEP 6: starting 4 CREATE INDEX processes
# 2024-11-04 18:01:07.457 500875 INFO indexes.c:183 STEP 7: constraints are built by the CREATE INDEX processes
# 2024-11-04 18:01:07.507 500865 INFO blobs.c:74 Skipping large objects: none found.
# 2024-11-04 18:01:07.509 500865 INFO sequences.c:194 STEP 9: reset sequences values
# 2024-11-04 18:01:07.510 500886 INFO sequences.c:290 Set sequences values on the target database
# 2024-11-04 20:49:00.587 500865 INFO cli_clone_follow.c:608 STEP 10: restore the post-data section to the target database
# 2024-11-04 20:49:00.600 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section post-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/post-filtered.list /tmp/pgcopydb/schema/schema.dump
# 2024-11-05 10:50:58.508 500865 INFO cli_clone_follow.c:639 All step are now done, 16h49m elapsed
# 2024-11-05 10:50:58.508 500865 INFO summary.c:3155 Printing summary for 26 tables and 57 indexes
# OID | Schema | Name | Parts | copy duration | transmitted bytes | indexes | create index duration
# ------+--------+----------------------+-------+---------------+-------------------+---------+----------------------
# 24654 | public | events | 10 | 1d11h | 878 GB | 1 | 1h41m
# 24623 | public | email_transmissions | 4 | 4h46m | 99 GB | 3 | 2h04m
# 24665 | public | lessons | 4 | 4h42m | 161 GB | 4 | 1m11s
# 24661 | public | lesson_users | 3 | 2h46m | 49 GB | 3 | 39m35s
# 24631 | public | emails | 1 | 34m07s | 10 GB | 2 | 17s
# 24739 | public | payments | 1 | 5m47s | 1848 MB | 4 | 4m40s
# 24681 | public | module_users | 1 | 4m57s | 1610 MB | 3 | 1m50s
# 24694 | public | orders | 1 | 2m50s | 835 MB | 3 | 1m05s
# 24597 | public | devices | 1 | 1m45s | 498 MB | 2 | 40s
# 24723 | public | payment_methods | 1 | 1m24s | 548 MB | 2 | 31s
# 24765 | public | user_collections | 1 | 2m17s | 1005 MB | 2 | 968ms
# 24774 | public | users | 1 | 52s | 291 MB | 4 | 27s
# 24760 | public | user_accounts | 1 | 16s | 172 MB | 3 | 16s
# 24606 | public | edition_modules | 1 | 8s983 | 46 MB | 3 | 4s749
# 24583 | public | course_emails | 1 | 8s526 | 26 MB | 2 | 996ms
# 24685 | public | modules | 1 | 1s592 | 21 MB | 3 | 1s696
# 24610 | public | editions | 1 | 2s199 | 7483 kB | 2 | 1s032
# 24755 | public | sp_content | 1 | 1s555 | 4177 kB | 0 | 0ms
# 24619 | public | email_broadcasts | 1 | 744ms | 2645 kB | 2 | 677ms
# 24590 | public | courses | 1 | 387ms | 1540 kB | 2 | 367ms
# 24704 | public | payment_gateway_keys | 1 | 1s972 | 164 kB | 2 | 27ms
# 24576 | public | accounts | 1 | 58ms | 24 kB | 1 | 14ms
# 24647 | public | event_names | 1 | 32ms | 397 B | 1 | 8ms
# 24716 | public | payment_gateways | 1 | 1s675 | 117 B | 1 | 11ms
# 24748 | public | roles | 1 | 71ms | 173 B | 1 | 8ms
# 24676 | public | management | 1 | 33ms | 40 B | 1 | 19ms
# Step Connection Duration Transfer Concurrency
# -------------------------------------------------- ---------- ---------- ---------- ------------
# Catalog Queries (table ordering, filtering, etc) source 12s 1
# Dump Schema source 765ms 1
# Prepare Schema target 466ms 1
# COPY, INDEX, CONSTRAINTS, VACUUM (wall clock) both 2h47m 12
# COPY (cumulative) both 7h46m 1225 GB 4
# CREATE INDEX (cumulative) target 4h36m 4
# CONSTRAINTS (cumulative) target 8s493 4
# VACUUM (cumulative) target 0ms 4
# Reset Sequences both 60ms 1
# Large Objects (cumulative) (null) 0ms 0
# Finalize Schema both 14h01m 4
# -------------------------------------------------- ---------- ---------- ---------- ------------
# Total Wall Clock Duration both 16h49m 20
# EOF
- name: show tables sizes and retrieve current backpressure seconds
- name: show tables sizes after ingest
run: |
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "\dt+"
BACKPRESSURE_TIME_AFTER_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
echo "BACKPRESSURE_TIME_AFTER_INGEST=${BACKPRESSURE_TIME_AFTER_INGEST}" >> $GITHUB_ENV
- name: Parse pgcopydb log and report performance metrics
env:
PERF_TEST_RESULT_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }}
run: |
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
# Define the log file path
LOG_FILE="/tmp/pgcopydb_${{ matrix.target_project }}.log"
# Get the current git commit hash
git config --global --add safe.directory /__w/neon/neon
COMMIT_HASH=$(git rev-parse --short HEAD)
# Define the platform and test suite
PLATFORM="pg16-${{ matrix.target_project }}-us-east-2-staging"
SUIT="pgcopydb_ingest_bench"
# Function to convert time (e.g., "2h47m", "4h36m", "118ms", "8s493") to seconds
convert_to_seconds() {
local duration=$1
local total_seconds=0
# Check for hours (h)
if [[ "$duration" =~ ([0-9]+)h ]]; then
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 3600))
fi
# Check for seconds (s)
if [[ "$duration" =~ ([0-9]+)s ]]; then
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0}))
fi
# Check for milliseconds (ms) (if applicable)
if [[ "$duration" =~ ([0-9]+)ms ]]; then
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} / 1000))
duration=${duration/${BASH_REMATCH[0]}/} # need to remove it to avoid double counting with m
fi
# Check for minutes (m) - must be checked after ms because m is contained in ms
if [[ "$duration" =~ ([0-9]+)m ]]; then
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 60))
fi
echo $total_seconds
}
# Calculate the backpressure difference in seconds
BACKPRESSURE_TIME_DIFF=$(awk "BEGIN {print $BACKPRESSURE_TIME_AFTER_INGEST - $BACKPRESSURE_TIME_BEFORE_INGEST}")
# Insert the backpressure time difference into the performance database
if [ -n "$BACKPRESSURE_TIME_DIFF" ]; then
PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \"
INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp)
VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', 'backpressure_time', ${BACKPRESSURE_TIME_DIFF}, 'seconds', 'lower_is_better', now());
\""
echo "Inserting backpressure time difference: ${BACKPRESSURE_TIME_DIFF} seconds"
eval $PSQL_CMD
fi
# Extract and process log lines
while IFS= read -r line; do
METRIC_NAME=""
# Match each desired line and extract the relevant information
if [[ "$line" =~ COPY,\ INDEX,\ CONSTRAINTS,\ VACUUM.* ]]; then
METRIC_NAME="COPY, INDEX, CONSTRAINTS, VACUUM (wall clock)"
elif [[ "$line" =~ COPY\ \(cumulative\).* ]]; then
METRIC_NAME="COPY (cumulative)"
elif [[ "$line" =~ CREATE\ INDEX\ \(cumulative\).* ]]; then
METRIC_NAME="CREATE INDEX (cumulative)"
elif [[ "$line" =~ CONSTRAINTS\ \(cumulative\).* ]]; then
METRIC_NAME="CONSTRAINTS (cumulative)"
elif [[ "$line" =~ Finalize\ Schema.* ]]; then
METRIC_NAME="Finalize Schema"
elif [[ "$line" =~ Total\ Wall\ Clock\ Duration.* ]]; then
METRIC_NAME="Total Wall Clock Duration"
fi
# If a metric was matched, insert it into the performance database
if [ -n "$METRIC_NAME" ]; then
DURATION=$(echo "$line" | grep -oP '\d+h\d+m|\d+s|\d+ms|\d{1,2}h\d{1,2}m|\d+\.\d+s' | head -n 1)
METRIC_VALUE=$(convert_to_seconds "$DURATION")
PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \"
INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp)
VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', '${METRIC_NAME}', ${METRIC_VALUE}, 'seconds', 'lower_is_better', now());
\""
echo "Inserting ${METRIC_NAME} with value ${METRIC_VALUE} seconds"
eval $PSQL_CMD
fi
done < "$LOG_FILE"
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+"
- name: Delete Neon Project
if: ${{ always() && matrix.target_project == 'new_empty_project' }}

View File

@@ -26,15 +26,9 @@ jobs:
with:
github-event-name: ${{ github.event_name}}
check-build-tools-image:
needs: [ check-permissions ]
uses: ./.github/workflows/check-build-tools-image.yml
build-build-tools-image:
needs: [ check-build-tools-image ]
needs: [ check-permissions ]
uses: ./.github/workflows/build-build-tools-image.yml
with:
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
secrets: inherit
check-macos-build:
@@ -44,7 +38,7 @@ jobs:
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
github.ref_name == 'main'
timeout-minutes: 90
runs-on: macos-14
runs-on: macos-15
env:
# Use release build only, to have less debug info around
@@ -58,7 +52,7 @@ jobs:
submodules: true
- name: Install macOS postgres dependencies
run: brew install flex bison openssl protobuf icu4c pkg-config
run: brew install flex bison openssl protobuf icu4c
- name: Set pg 14 revision for caching
id: pg_v14_rev

View File

@@ -29,7 +29,7 @@ jobs:
trigger_bench_on_ec2_machine_in_eu_central_1:
runs-on: [ self-hosted, small ]
container:
image: neondatabase/build-tools:pinned
image: neondatabase/build-tools:pinned-bookworm
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -72,7 +72,7 @@ jobs:
echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV
fi
- name: Start Bench with run_id
- name: Start Bench with run_id
run: |
curl -k -X 'POST' \
"${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \
@@ -116,7 +116,7 @@ jobs:
-H 'accept: application/gzip' \
-H "Authorization: Bearer $API_KEY" \
--output "test_log_${GITHUB_RUN_ID}.gz"
- name: Unzip Test Log and Print it into this job's log
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
@@ -134,13 +134,13 @@ jobs:
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
- name: Cleanup Test Resources
if: always()
if: always()
run: |
curl -k -X 'POST' \
"${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \

View File

@@ -39,15 +39,9 @@ jobs:
with:
github-event-name: ${{ github.event_name }}
check-build-tools-image:
needs: [ check-permissions ]
uses: ./.github/workflows/check-build-tools-image.yml
build-build-tools-image:
needs: [ check-build-tools-image ]
needs: [ check-permissions ]
uses: ./.github/workflows/build-build-tools-image.yml
with:
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
secrets: inherit
test-logical-replication:

View File

@@ -94,7 +94,7 @@ jobs:
- name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub, ECR, and ACR
env:
DEFAULT_DEBIAN_VERSION: bullseye
DEFAULT_DEBIAN_VERSION: bookworm
run: |
for debian_version in bullseye bookworm; do
tags=()

View File

@@ -23,6 +23,8 @@ jobs:
id: python-src
with:
files: |
.github/workflows/_check-codestyle-python.yml
.github/workflows/build-build-tools-image.yml
.github/workflows/pre-merge-checks.yml
**/**.py
poetry.lock
@@ -34,16 +36,14 @@ jobs:
run: |
echo "${PYTHON_CHANGED_FILES}"
check-build-tools-image:
build-build-tools-image:
if: needs.get-changed-files.outputs.python-changed == 'true'
needs: [ get-changed-files ]
uses: ./.github/workflows/check-build-tools-image.yml
build-build-tools-image:
needs: [ check-build-tools-image ]
uses: ./.github/workflows/build-build-tools-image.yml
with:
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
# Build only one combination to save time
archs: '["x64"]'
debians: '["bookworm"]'
secrets: inherit
check-codestyle-python:
@@ -51,7 +51,8 @@ jobs:
needs: [ get-changed-files, build-build-tools-image ]
uses: ./.github/workflows/_check-codestyle-python.yml
with:
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
# `-bookworm-x64` suffix should match the combination in `build-build-tools-image`
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm-x64
secrets: inherit
# To get items from the merge queue merged into main we need to satisfy "Status checks that are required".

View File

@@ -26,82 +26,26 @@ defaults:
jobs:
create-storage-release-branch:
if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }}
runs-on: ubuntu-22.04
permissions:
contents: write # for `git push`
contents: write
steps:
- name: Check out code
uses: actions/checkout@v4
with:
ref: main
- name: Set environment variables
run: |
echo "RELEASE_DATE=$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
echo "RELEASE_BRANCH=rc/$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
- name: Create release branch
run: git checkout -b $RELEASE_BRANCH
- name: Push new branch
run: git push origin $RELEASE_BRANCH
- name: Create pull request into release
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
TITLE="Storage & Compute release ${RELEASE_DATE}"
cat << EOF > body.md
## ${TITLE}
**Please merge this Pull Request using 'Create a merge commit' button**
EOF
gh pr create --title "${TITLE}" \
--body-file "body.md" \
--head "${RELEASE_BRANCH}" \
--base "release"
uses: ./.github/workflows/_create-release-pr.yml
with:
component-name: 'Storage & Compute'
release-branch: 'release'
secrets:
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
create-proxy-release-branch:
if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }}
runs-on: ubuntu-22.04
permissions:
contents: write # for `git push`
contents: write
steps:
- name: Check out code
uses: actions/checkout@v4
with:
ref: main
- name: Set environment variables
run: |
echo "RELEASE_DATE=$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
echo "RELEASE_BRANCH=rc/proxy/$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
- name: Create release branch
run: git checkout -b $RELEASE_BRANCH
- name: Push new branch
run: git push origin $RELEASE_BRANCH
- name: Create pull request into release
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
TITLE="Proxy release ${RELEASE_DATE}"
cat << EOF > body.md
## ${TITLE}
**Please merge this Pull Request using 'Create a merge commit' button**
EOF
gh pr create --title "${TITLE}" \
--body-file "body.md" \
--head "${RELEASE_BRANCH}" \
--base "release-proxy"
uses: ./.github/workflows/_create-release-pr.yml
with:
component-name: 'Proxy'
release-branch: 'release-proxy'
secrets:
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}

View File

@@ -4,10 +4,12 @@ on:
schedule:
- cron: '*/15 * * * *'
- cron: '25 0 * * *'
- cron: '25 1 * * 6'
jobs:
gh-workflow-stats-batch:
name: GitHub Workflow Stats Batch
gh-workflow-stats-batch-2h:
name: GitHub Workflow Stats Batch 2 hours
if: github.event.schedule == '*/15 * * * *'
runs-on: ubuntu-22.04
permissions:
actions: read
@@ -16,14 +18,36 @@ jobs:
uses: neondatabase/gh-workflow-stats-action@v0.2.1
with:
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
db_table: "gh_workflow_stats_batch_neon"
db_table: "gh_workflow_stats_neon"
gh_token: ${{ secrets.GITHUB_TOKEN }}
duration: '2h'
- name: Export Workflow Run for the past 24 hours
if: github.event.schedule == '25 0 * * *'
gh-workflow-stats-batch-48h:
name: GitHub Workflow Stats Batch 48 hours
if: github.event.schedule == '25 0 * * *'
runs-on: ubuntu-22.04
permissions:
actions: read
steps:
- name: Export Workflow Run for the past 48 hours
uses: neondatabase/gh-workflow-stats-action@v0.2.1
with:
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
db_table: "gh_workflow_stats_batch_neon"
db_table: "gh_workflow_stats_neon"
gh_token: ${{ secrets.GITHUB_TOKEN }}
duration: '24h'
duration: '48h'
gh-workflow-stats-batch-30d:
name: GitHub Workflow Stats Batch 30 days
if: github.event.schedule == '25 1 * * 6'
runs-on: ubuntu-22.04
permissions:
actions: read
steps:
- name: Export Workflow Run for the past 30 days
uses: neondatabase/gh-workflow-stats-action@v0.2.1
with:
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
db_table: "gh_workflow_stats_neon"
gh_token: ${{ secrets.GITHUB_TOKEN }}
duration: '720h'

View File

@@ -1,42 +0,0 @@
name: Report Workflow Stats
on:
workflow_run:
workflows:
- Add `external` label to issues and PRs created by external users
- Benchmarking
- Build and Test
- Build and Test Locally
- Build build-tools image
- Check Permissions
- Check build-tools image
- Check neon with extra platform builds
- Cloud Regression Test
- Create Release Branch
- Handle `approved-for-ci-run` label
- Lint GitHub Workflows
- Notify Slack channel about upcoming release
- Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
- Pin build-tools image
- Prepare benchmarking databases by restoring dumps
- Push images to ACR
- Test Postgres client libraries
- Trigger E2E Tests
- cleanup caches by a branch
- Pre-merge checks
types: [completed]
jobs:
gh-workflow-stats:
name: Github Workflow Stats
runs-on: ubuntu-22.04
permissions:
actions: read
steps:
- name: Export GH Workflow Stats
uses: neondatabase/gh-workflow-stats-action@v0.1.4
with:
DB_URI: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
DB_TABLE: "gh_workflow_stats_neon"
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GH_RUN_ID: ${{ github.event.workflow_run.id }}

View File

@@ -1,6 +1,5 @@
/.github/ @neondatabase/developer-productivity
/compute_tools/ @neondatabase/control-plane @neondatabase/compute
/storage_controller @neondatabase/storage
/storage_scrubber @neondatabase/storage
/libs/pageserver_api/ @neondatabase/storage
/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage
/libs/remote_storage/ @neondatabase/storage
@@ -11,4 +10,6 @@
/pgxn/neon/ @neondatabase/compute @neondatabase/storage
/proxy/ @neondatabase/proxy
/safekeeper/ @neondatabase/storage
/storage_controller @neondatabase/storage
/storage_scrubber @neondatabase/storage
/vendor/ @neondatabase/compute

402
Cargo.lock generated
View File

@@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "RustyXML"
@@ -46,6 +46,15 @@ dependencies = [
"memchr",
]
[[package]]
name = "aligned-vec"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e0966165eaf052580bd70eb1b32cb3d6245774c0104d1b2793e9650bf83b52a"
dependencies = [
"equator",
]
[[package]]
name = "allocator-api2"
version = "0.2.16"
@@ -146,6 +155,12 @@ dependencies = [
"static_assertions",
]
[[package]]
name = "arrayvec"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "asn1-rs"
version = "0.6.2"
@@ -359,6 +374,28 @@ dependencies = [
"tracing",
]
[[package]]
name = "aws-sdk-kms"
version = "1.47.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "564a597a3c71a957d60a2e4c62c93d78ee5a0d636531e15b760acad983a5c18e"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-types",
"bytes",
"http 0.2.9",
"once_cell",
"regex-lite",
"tracing",
]
[[package]]
name = "aws-sdk-s3"
version = "1.52.0"
@@ -575,9 +612,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime"
version = "1.7.1"
version = "1.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1ce695746394772e7000b39fe073095db6d45a862d0767dd5ad0ac0d7f8eb87"
checksum = "a065c0fe6fdbdf9f11817eb68582b2ab4aff9e9c39e986ae48f7ec576c6322db"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
@@ -742,7 +779,7 @@ dependencies = [
"once_cell",
"paste",
"pin-project",
"quick-xml",
"quick-xml 0.31.0",
"rand 0.8.5",
"reqwest 0.11.19",
"rustc_version",
@@ -1220,6 +1257,10 @@ name = "compute_tools"
version = "0.1.0"
dependencies = [
"anyhow",
"aws-config",
"aws-sdk-kms",
"aws-sdk-s3",
"base64 0.13.1",
"bytes",
"camino",
"cfg-if",
@@ -1237,13 +1278,16 @@ dependencies = [
"opentelemetry",
"opentelemetry_sdk",
"postgres",
"postgres_initdb",
"prometheus",
"regex",
"remote_storage",
"reqwest 0.12.4",
"rlimit",
"rust-ini",
"serde",
"serde_json",
"serde_with",
"signal-hook",
"tar",
"thiserror",
@@ -1381,6 +1425,15 @@ version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
[[package]]
name = "cpp_demangle"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96e58d342ad113c2b878f16d5d034c03be492ae460cdbc02b7f0f2284d310c7d"
dependencies = [
"cfg-if",
]
[[package]]
name = "cpufeatures"
version = "0.2.9"
@@ -1664,6 +1717,12 @@ dependencies = [
"utils",
]
[[package]]
name = "diatomic-waker"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c"
[[package]]
name = "diesel"
version = "2.2.3"
@@ -1904,6 +1963,26 @@ dependencies = [
"termcolor",
]
[[package]]
name = "equator"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c35da53b5a021d2484a7cc49b2ac7f2d840f8236a286f84202369bd338d761ea"
dependencies = [
"equator-macro",
]
[[package]]
name = "equator-macro"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.52",
]
[[package]]
name = "equivalent"
version = "1.0.1"
@@ -2011,6 +2090,18 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "findshlibs"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64"
dependencies = [
"cc",
"lazy_static",
"libc",
"winapi",
]
[[package]]
name = "fixedbitset"
version = "0.4.2"
@@ -2089,9 +2180,9 @@ dependencies = [
[[package]]
name = "futures-channel"
version = "0.3.30"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78"
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
dependencies = [
"futures-core",
"futures-sink",
@@ -2099,9 +2190,9 @@ dependencies = [
[[package]]
name = "futures-core"
version = "0.3.30"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
[[package]]
name = "futures-executor"
@@ -2116,9 +2207,9 @@ dependencies = [
[[package]]
name = "futures-io"
version = "0.3.30"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
[[package]]
name = "futures-lite"
@@ -2137,9 +2228,9 @@ dependencies = [
[[package]]
name = "futures-macro"
version = "0.3.30"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
@@ -2148,15 +2239,15 @@ dependencies = [
[[package]]
name = "futures-sink"
version = "0.3.30"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5"
checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
[[package]]
name = "futures-task"
version = "0.3.30"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
[[package]]
name = "futures-timer"
@@ -2166,9 +2257,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
[[package]]
name = "futures-util"
version = "0.3.30"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48"
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
dependencies = [
"futures-channel",
"futures-core",
@@ -2714,6 +2805,24 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac"
[[package]]
name = "inferno"
version = "0.11.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
dependencies = [
"ahash",
"indexmap 2.0.1",
"is-terminal",
"itoa",
"log",
"num-format",
"once_cell",
"quick-xml 0.26.0",
"rgb",
"str_stack",
]
[[package]]
name = "inotify"
version = "0.9.6"
@@ -2764,9 +2873,9 @@ dependencies = [
[[package]]
name = "ipnet"
version = "2.9.0"
version = "2.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708"
[[package]]
name = "is-terminal"
@@ -3053,6 +3162,15 @@ version = "2.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
[[package]]
name = "memmap2"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45fd3a57831bf88bc63f8cebc0cf956116276e97fef3966103e96416209f7c92"
dependencies = [
"libc",
]
[[package]]
name = "memoffset"
version = "0.7.1"
@@ -3278,6 +3396,16 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-format"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3"
dependencies = [
"arrayvec",
"itoa",
]
[[package]]
name = "num-integer"
version = "0.1.45"
@@ -3578,7 +3706,6 @@ dependencies = [
"thiserror",
"tokio",
"tokio-util",
"toml_edit",
"utils",
"workspace_hack",
]
@@ -3620,6 +3747,7 @@ dependencies = [
"num_cpus",
"once_cell",
"pageserver_api",
"pageserver_client",
"pageserver_compaction",
"pin-project-lite",
"postgres",
@@ -3628,6 +3756,7 @@ dependencies = [
"postgres_backend",
"postgres_connection",
"postgres_ffi",
"postgres_initdb",
"pq_proto",
"procfs",
"rand 0.8.5",
@@ -3642,6 +3771,7 @@ dependencies = [
"serde_json",
"serde_path_to_error",
"serde_with",
"smallvec",
"storage_broker",
"strum",
"strum_macros",
@@ -4009,7 +4139,7 @@ dependencies = [
[[package]]
name = "postgres"
version = "0.19.4"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#00940fcdb57a8e99e805297b75839e7c4c7b1796"
dependencies = [
"bytes",
"fallible-iterator",
@@ -4022,7 +4152,7 @@ dependencies = [
[[package]]
name = "postgres-protocol"
version = "0.6.4"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#00940fcdb57a8e99e805297b75839e7c4c7b1796"
dependencies = [
"base64 0.20.0",
"byteorder",
@@ -4038,16 +4168,40 @@ dependencies = [
"tokio",
]
[[package]]
name = "postgres-protocol2"
version = "0.1.0"
dependencies = [
"base64 0.20.0",
"byteorder",
"bytes",
"fallible-iterator",
"hmac",
"md-5",
"memchr",
"rand 0.8.5",
"sha2",
"stringprep",
"tokio",
]
[[package]]
name = "postgres-types"
version = "0.2.4"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#00940fcdb57a8e99e805297b75839e7c4c7b1796"
dependencies = [
"bytes",
"fallible-iterator",
"postgres-protocol",
"serde",
"serde_json",
]
[[package]]
name = "postgres-types2"
version = "0.1.0"
dependencies = [
"bytes",
"fallible-iterator",
"postgres-protocol2",
]
[[package]]
@@ -4058,7 +4212,7 @@ dependencies = [
"bytes",
"once_cell",
"pq_proto",
"rustls 0.23.16",
"rustls 0.23.18",
"rustls-pemfile 2.1.1",
"serde",
"thiserror",
@@ -4102,12 +4256,48 @@ dependencies = [
"utils",
]
[[package]]
name = "postgres_initdb"
version = "0.1.0"
dependencies = [
"anyhow",
"camino",
"thiserror",
"tokio",
"workspace_hack",
]
[[package]]
name = "powerfmt"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "pprof"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebbe2f8898beba44815fdc9e5a4ae9c929e21c5dc29b0c774a15555f7f58d6d0"
dependencies = [
"aligned-vec",
"backtrace",
"cfg-if",
"criterion",
"findshlibs",
"inferno",
"libc",
"log",
"nix 0.26.4",
"once_cell",
"parking_lot 0.12.1",
"protobuf",
"protobuf-codegen-pure",
"smallvec",
"symbolic-demangle",
"tempfile",
"thiserror",
]
[[package]]
name = "ppv-lite86"
version = "0.2.17"
@@ -4260,6 +4450,31 @@ dependencies = [
"prost",
]
[[package]]
name = "protobuf"
version = "2.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
[[package]]
name = "protobuf-codegen"
version = "2.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "033460afb75cf755fcfc16dfaed20b86468082a2ea24e05ac35ab4a099a017d6"
dependencies = [
"protobuf",
]
[[package]]
name = "protobuf-codegen-pure"
version = "2.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95a29399fc94bcd3eeaa951c715f7bea69409b2445356b00519740bcd6ddd865"
dependencies = [
"protobuf",
"protobuf-codegen",
]
[[package]]
name = "proxy"
version = "0.1.0"
@@ -4316,7 +4531,7 @@ dependencies = [
"parquet_derive",
"pbkdf2",
"pin-project-lite",
"postgres-protocol",
"postgres-protocol2",
"postgres_backend",
"pq_proto",
"prometheus",
@@ -4333,7 +4548,7 @@ dependencies = [
"rsa",
"rstest",
"rustc-hash",
"rustls 0.23.16",
"rustls 0.23.18",
"rustls-native-certs 0.8.0",
"rustls-pemfile 2.1.1",
"scopeguard",
@@ -4351,8 +4566,7 @@ dependencies = [
"tikv-jemalloc-ctl",
"tikv-jemallocator",
"tokio",
"tokio-postgres",
"tokio-postgres-rustls",
"tokio-postgres2",
"tokio-rustls 0.26.0",
"tokio-tungstenite",
"tokio-util",
@@ -4371,6 +4585,15 @@ dependencies = [
"zerocopy",
]
[[package]]
name = "quick-xml"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd"
dependencies = [
"memchr",
]
[[package]]
name = "quick-xml"
version = "0.31.0"
@@ -4853,6 +5076,15 @@ dependencies = [
"subtle",
]
[[package]]
name = "rgb"
version = "0.8.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a"
dependencies = [
"bytemuck",
]
[[package]]
name = "ring"
version = "0.17.6"
@@ -5028,9 +5260,9 @@ dependencies = [
[[package]]
name = "rustls"
version = "0.23.16"
version = "0.23.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e"
checksum = "9c9cc1d47e243d655ace55ed38201c19ae02c148ae56412ab8750e8f0166ab7f"
dependencies = [
"log",
"once_cell",
@@ -5161,11 +5393,13 @@ dependencies = [
"itertools 0.10.5",
"metrics",
"once_cell",
"pageserver_api",
"parking_lot 0.12.1",
"postgres",
"postgres-protocol",
"postgres_backend",
"postgres_ffi",
"pprof",
"pq_proto",
"rand 0.8.5",
"regex",
@@ -5181,6 +5415,7 @@ dependencies = [
"strum",
"strum_macros",
"thiserror",
"tikv-jemallocator",
"tokio",
"tokio-io-timeout",
"tokio-postgres",
@@ -5191,6 +5426,7 @@ dependencies = [
"tracing-subscriber",
"url",
"utils",
"wal_decoder",
"walproposer",
"workspace_hack",
]
@@ -5663,9 +5899,9 @@ dependencies = [
[[package]]
name = "smallvec"
version = "1.13.1"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "smol_str"
@@ -5712,6 +5948,12 @@ dependencies = [
"der 0.7.8",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "static_assertions"
version = "1.1.0"
@@ -5738,7 +5980,7 @@ dependencies = [
"once_cell",
"parking_lot 0.12.1",
"prost",
"rustls 0.23.16",
"rustls 0.23.18",
"tokio",
"tonic",
"tonic-build",
@@ -5821,7 +6063,7 @@ dependencies = [
"postgres_ffi",
"remote_storage",
"reqwest 0.12.4",
"rustls 0.23.16",
"rustls 0.23.18",
"rustls-native-certs 0.8.0",
"serde",
"serde_json",
@@ -5858,6 +6100,12 @@ dependencies = [
"workspace_hack",
]
[[package]]
name = "str_stack"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb"
[[package]]
name = "stringprep"
version = "0.1.2"
@@ -5905,6 +6153,29 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20e16a0f46cf5fd675563ef54f26e83e20f2366bcf027bcb3cc3ed2b98aaf2ca"
[[package]]
name = "symbolic-common"
version = "12.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "366f1b4c6baf6cfefc234bbd4899535fca0b06c74443039a73f6dfb2fad88d77"
dependencies = [
"debugid",
"memmap2",
"stable_deref_trait",
"uuid",
]
[[package]]
name = "symbolic-demangle"
version = "12.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aba05ba5b9962ea5617baf556293720a8b2d0a282aa14ee4bf10e22efc7da8c8"
dependencies = [
"cpp_demangle",
"rustc-demangle",
"symbolic-common",
]
[[package]]
name = "syn"
version = "1.0.109"
@@ -6074,9 +6345,9 @@ dependencies = [
[[package]]
name = "tikv-jemalloc-ctl"
version = "0.5.4"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "619bfed27d807b54f7f776b9430d4f8060e66ee138a28632ca898584d462c31c"
checksum = "f21f216790c8df74ce3ab25b534e0718da5a1916719771d3fec23315c99e468b"
dependencies = [
"libc",
"paste",
@@ -6085,9 +6356,9 @@ dependencies = [
[[package]]
name = "tikv-jemalloc-sys"
version = "0.5.4+5.3.0-patched"
version = "0.6.0+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1"
checksum = "cd3c60906412afa9c2b5b5a48ca6a5abe5736aec9eb48ad05037a677e52e4e2d"
dependencies = [
"cc",
"libc",
@@ -6095,9 +6366,9 @@ dependencies = [
[[package]]
name = "tikv-jemallocator"
version = "0.5.4"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca"
checksum = "4cec5ff18518d81584f477e9bfdf957f5bb0979b0bac3af4ca30b5b3ae2d2865"
dependencies = [
"libc",
"tikv-jemalloc-sys",
@@ -6180,6 +6451,7 @@ dependencies = [
"libc",
"mio",
"num_cpus",
"parking_lot 0.12.1",
"pin-project-lite",
"signal-hook-registry",
"socket2",
@@ -6227,7 +6499,7 @@ dependencies = [
[[package]]
name = "tokio-postgres"
version = "0.7.7"
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#00940fcdb57a8e99e805297b75839e7c4c7b1796"
dependencies = [
"async-trait",
"byteorder",
@@ -6254,13 +6526,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04fb792ccd6bbcd4bba408eb8a292f70fc4a3589e5d793626f45190e6454b6ab"
dependencies = [
"ring",
"rustls 0.23.16",
"rustls 0.23.18",
"tokio",
"tokio-postgres",
"tokio-rustls 0.26.0",
"x509-certificate",
]
[[package]]
name = "tokio-postgres2"
version = "0.1.0"
dependencies = [
"async-trait",
"byteorder",
"bytes",
"fallible-iterator",
"futures-util",
"log",
"parking_lot 0.12.1",
"percent-encoding",
"phf",
"pin-project-lite",
"postgres-protocol2",
"postgres-types2",
"tokio",
"tokio-util",
]
[[package]]
name = "tokio-rustls"
version = "0.24.0"
@@ -6288,7 +6580,7 @@ version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
dependencies = [
"rustls 0.23.16",
"rustls 0.23.18",
"rustls-pki-types",
"tokio",
]
@@ -6697,7 +6989,7 @@ dependencies = [
"base64 0.22.1",
"log",
"once_cell",
"rustls 0.23.16",
"rustls 0.23.18",
"rustls-pki-types",
"url",
"webpki-roots 0.26.1",
@@ -6759,6 +7051,7 @@ dependencies = [
"chrono",
"const_format",
"criterion",
"diatomic-waker",
"fail",
"futures",
"git-version",
@@ -6772,15 +7065,18 @@ dependencies = [
"once_cell",
"pin-project-lite",
"postgres_connection",
"pprof",
"pq_proto",
"rand 0.8.5",
"regex",
"routerify",
"scopeguard",
"sentry",
"serde",
"serde_assert",
"serde_json",
"serde_path_to_error",
"serde_with",
"signal-hook",
"strum",
"strum_macros",
@@ -6877,10 +7173,16 @@ name = "wal_decoder"
version = "0.1.0"
dependencies = [
"anyhow",
"async-compression",
"bytes",
"pageserver_api",
"postgres_ffi",
"prost",
"serde",
"thiserror",
"tokio",
"tonic",
"tonic-build",
"tracing",
"utils",
"workspace_hack",
@@ -7306,6 +7608,7 @@ dependencies = [
"anyhow",
"axum",
"axum-core",
"base64 0.13.1",
"base64 0.21.1",
"base64ct",
"bytes",
@@ -7340,13 +7643,13 @@ dependencies = [
"libc",
"log",
"memchr",
"nix 0.26.4",
"nom",
"num-bigint",
"num-integer",
"num-traits",
"once_cell",
"parquet",
"postgres-types",
"prettyplease",
"proc-macro2",
"prost",
@@ -7356,7 +7659,7 @@ dependencies = [
"regex-automata 0.4.3",
"regex-syntax 0.8.2",
"reqwest 0.12.4",
"rustls 0.23.16",
"rustls 0.23.18",
"scopeguard",
"serde",
"serde_json",
@@ -7371,7 +7674,6 @@ dependencies = [
"time",
"time-macros",
"tokio",
"tokio-postgres",
"tokio-rustls 0.26.0",
"tokio-stream",
"tokio-util",

View File

@@ -34,6 +34,10 @@ members = [
"libs/vm_monitor",
"libs/walproposer",
"libs/wal_decoder",
"libs/postgres_initdb",
"libs/proxy/postgres-protocol2",
"libs/proxy/postgres-types2",
"libs/proxy/tokio-postgres2",
]
[workspace.package]
@@ -57,6 +61,7 @@ async-trait = "0.1"
aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
aws-sdk-s3 = "1.52"
aws-sdk-iam = "1.46.0"
aws-sdk-kms = "1.47.0"
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
aws-smithy-types = "1.2"
aws-credential-types = "1.2.0"
@@ -73,11 +78,12 @@ bytes = "1.0"
camino = "1.1.6"
cfg-if = "1.0.0"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
clap = { version = "4.0", features = ["derive"] }
clap = { version = "4.0", features = ["derive", "env"] }
comfy-table = "7.1"
const_format = "0.2"
crc32c = "0.6"
dashmap = { version = "5.5.0", features = ["raw-api"] }
diatomic-waker = { version = "0.2.3" }
either = "1.8"
enum-map = "2.4.2"
enumset = "1.0.12"
@@ -106,7 +112,7 @@ hyper-util = "0.1"
tokio-tungstenite = "0.21.0"
indexmap = "2"
indoc = "2"
ipnet = "2.9.0"
ipnet = "2.10.0"
itertools = "0.10"
itoa = "1.0.11"
jsonwebtoken = "9"
@@ -130,6 +136,7 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
parquet_derive = "53"
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
pin-project-lite = "0.2"
pprof = { version = "0.14", features = ["criterion", "flamegraph", "protobuf", "protobuf-codec"] }
procfs = "0.16"
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
prost = "0.13"
@@ -153,7 +160,7 @@ sentry = { version = "0.32", default-features = false, features = ["backtrace",
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
serde_path_to_error = "0.1"
serde_with = "2.0"
serde_with = { version = "2.0", features = [ "base64" ] }
serde_assert = "0.5.0"
sha2 = "0.10.2"
signal-hook = "0.3"
@@ -168,8 +175,8 @@ sync_wrapper = "0.1.2"
tar = "0.4"
test-context = "0.3"
thiserror = "1.0"
tikv-jemallocator = "0.5"
tikv-jemalloc-ctl = "0.5"
tikv-jemallocator = { version = "0.6", features = ["stats"] }
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
tokio = { version = "1.17", features = ["macros"] }
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
tokio-io-timeout = "1.2.0"
@@ -203,32 +210,23 @@ env_logger = "0.10"
log = "0.4"
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
# We want to use the 'neon' branch for these, but there's currently one
# incompatible change on the branch. See:
#
# - PR #8076 which contained changes that depended on the new changes in
# the rust-postgres crate, and
# - PR #8654 which reverted those changes and made the code in proxy incompatible
# with the tip of the 'neon' branch again.
#
# When those proxy changes are re-applied (see PR #8747), we can switch using
# the tip of the 'neon' branch again.
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
## Local libraries
compute_api = { version = "0.1", path = "./libs/compute_api/" }
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
metrics = { version = "0.1", path = "./libs/metrics/" }
pageserver = { path = "./pageserver" }
pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
pageserver_client = { path = "./pageserver/client" }
pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
postgres_initdb = { path = "./libs/postgres_initdb" }
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
@@ -255,7 +253,7 @@ tonic-build = "0.12"
[patch.crates-io]
# Needed to get `tokio-postgres-rustls` to depend on our fork.
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
################# Binary contents sections

View File

@@ -7,7 +7,7 @@ ARG IMAGE=build-tools
ARG TAG=pinned
ARG DEFAULT_PG_VERSION=17
ARG STABLE_PG_VERSION=16
ARG DEBIAN_VERSION=bullseye
ARG DEBIAN_VERSION=bookworm
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
# Build Postgres

View File

@@ -38,6 +38,7 @@ ifeq ($(UNAME_S),Linux)
# Seccomp BPF is only available for Linux
PG_CONFIGURE_OPTS += --with-libseccomp
else ifeq ($(UNAME_S),Darwin)
PG_CFLAGS += -DUSE_PREFETCH
ifndef DISABLE_HOMEBREW
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable
@@ -146,6 +147,8 @@ postgres-%: postgres-configure-% \
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_prewarm install
+@echo "Compiling pg_buffercache $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache install
+@echo "Compiling pg_visibility $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_visibility install
+@echo "Compiling pageinspect $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
+@echo "Compiling amcheck $*"

View File

@@ -132,7 +132,7 @@ make -j`sysctl -n hw.logicalcpu` -s
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.
To run the integration tests or Python scripts (not required to use the code), install
Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.
Python (3.11 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.
#### Running neon database

View File

@@ -1,4 +1,4 @@
ARG DEBIAN_VERSION=bullseye
ARG DEBIAN_VERSION=bookworm
FROM debian:bookworm-slim AS pgcopydb_builder
ARG DEBIAN_VERSION
@@ -57,9 +57,9 @@ RUN mkdir -p /pgcopydb/bin && \
mkdir -p /pgcopydb/lib && \
chmod -R 755 /pgcopydb && \
chown -R nonroot:nonroot /pgcopydb
COPY --from=pgcopydb_builder /usr/lib/postgresql/16/bin/pgcopydb /pgcopydb/bin/pgcopydb
COPY --from=pgcopydb_builder /pgcopydb/lib/libpq.so.5 /pgcopydb/lib/libpq.so.5
COPY --from=pgcopydb_builder /usr/lib/postgresql/16/bin/pgcopydb /pgcopydb/bin/pgcopydb
COPY --from=pgcopydb_builder /pgcopydb/lib/libpq.so.5 /pgcopydb/lib/libpq.so.5
# System deps
#
@@ -234,7 +234,7 @@ USER nonroot:nonroot
WORKDIR /home/nonroot
# Python
ENV PYTHON_VERSION=3.9.19 \
ENV PYTHON_VERSION=3.11.10 \
PYENV_ROOT=/home/nonroot/.pyenv \
PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
RUN set -e \
@@ -258,14 +258,14 @@ WORKDIR /home/nonroot
# Rust
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
ENV RUSTC_VERSION=1.82.0
ENV RUSTC_VERSION=1.83.0
ENV RUSTUP_HOME="/home/nonroot/.rustup"
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
ARG RUSTFILT_VERSION=0.2.1
ARG CARGO_HAKARI_VERSION=0.9.30
ARG CARGO_DENY_VERSION=0.16.1
ARG CARGO_HACK_VERSION=0.6.31
ARG CARGO_NEXTEST_VERSION=0.9.72
ARG CARGO_HAKARI_VERSION=0.9.33
ARG CARGO_DENY_VERSION=0.16.2
ARG CARGO_HACK_VERSION=0.6.33
ARG CARGO_NEXTEST_VERSION=0.9.85
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
chmod +x rustup-init && \
./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
@@ -289,7 +289,7 @@ RUN whoami \
&& cargo --version --verbose \
&& rustup --version --verbose \
&& rustc --version --verbose \
&& clang --version
&& clang --version
RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
LD_LIBRARY_PATH=/pgcopydb/lib /pgcopydb/bin/pgcopydb --version; \

View File

@@ -3,7 +3,7 @@ ARG REPOSITORY=neondatabase
ARG IMAGE=build-tools
ARG TAG=pinned
ARG BUILD_TAG
ARG DEBIAN_VERSION=bullseye
ARG DEBIAN_VERSION=bookworm
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
#########################################################################################
@@ -1243,7 +1243,7 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
#########################################################################################
#
# Compile and run the Neon-specific `compute_ctl` binary
# Compile and run the Neon-specific `compute_ctl` and `fast_import` binaries
#
#########################################################################################
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
@@ -1264,6 +1264,7 @@ RUN cd compute_tools && mold -run cargo build --locked --profile release-line-de
FROM debian:$DEBIAN_FLAVOR AS compute-tools-image
COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/fast_import /usr/local/bin/fast_import
#########################################################################################
#
@@ -1458,6 +1459,7 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/fast_import /usr/local/bin/fast_import
# pgbouncer and its config
COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
@@ -1533,6 +1535,25 @@ RUN apt update && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
# s5cmd 2.2.2 from https://github.com/peak/s5cmd/releases/tag/v2.2.2
# used by fast_import
ARG TARGETARCH
ADD https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_linux_$TARGETARCH.deb /tmp/s5cmd.deb
RUN set -ex; \
\
# Determine the expected checksum based on TARGETARCH
if [ "${TARGETARCH}" = "amd64" ]; then \
CHECKSUM="392c385320cd5ffa435759a95af77c215553d967e4b1c0fffe52e4f14c29cf85"; \
elif [ "${TARGETARCH}" = "arm64" ]; then \
CHECKSUM="939bee3cf4b5604ddb00e67f8c157b91d7c7a5b553d1fbb6890fad32894b7b46"; \
else \
echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \
fi; \
\
# Compute and validate the checksum
echo "${CHECKSUM} /tmp/s5cmd.deb" | sha256sum -c -
RUN dpkg -i /tmp/s5cmd.deb && rm /tmp/s5cmd.deb
ENV LANG=en_US.utf8
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -6,6 +6,7 @@
import 'sql_exporter/compute_backpressure_throttling_seconds.libsonnet',
import 'sql_exporter/compute_current_lsn.libsonnet',
import 'sql_exporter/compute_logical_snapshot_files.libsonnet',
import 'sql_exporter/compute_max_connections.libsonnet',
import 'sql_exporter/compute_receive_lsn.libsonnet',
import 'sql_exporter/compute_subscriptions_count.libsonnet',
import 'sql_exporter/connection_counts.libsonnet',

View File

@@ -0,0 +1,10 @@
{
metric_name: 'compute_max_connections',
type: 'gauge',
help: 'Max connections allowed for Postgres',
key_labels: null,
values: [
'max_connections',
],
query: importstr 'sql_exporter/compute_max_connections.sql',
}

View File

@@ -0,0 +1 @@
SELECT current_setting('max_connections') as max_connections;

View File

@@ -147,7 +147,7 @@ index 542c2e098c..0062d3024f 100644
ALTER TABLE ptnowner1 OWNER TO regress_ptnowner;
ALTER TABLE ptnowner OWNER TO regress_ptnowner;
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 97bbe53b64..eac3d42a79 100644
index 3f9a8f539c..0a51b52940 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1016,7 +1016,7 @@ select * from collate_test1 where b ilike 'ABC';
@@ -309,7 +309,7 @@ index b48365ec98..a6ef910055 100644
-- the wrong partition. This test is *not* guaranteed to trigger that bug, but
-- does so when shared_buffers is small enough. To test if we encountered the
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index faf1a4d1b0..a44c97db52 100644
index 9a74820ee8..22400a5551 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -553,8 +553,8 @@ select * from check_con_tbl;
@@ -573,7 +573,7 @@ index 93302a07ef..1a73f083ac 100644
-- that does not match with what's expected.
-- This checks all the object types that include schema qualifications.
diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out
index f3f8c7b5a2..3e3e54ff4c 100644
index f551624afb..57f1e432d4 100644
--- a/src/test/regress/expected/create_view.out
+++ b/src/test/regress/expected/create_view.out
@@ -18,7 +18,8 @@ CREATE TABLE real_city (
@@ -700,12 +700,12 @@ index 6ed50fdcfa..caa00a345d 100644
COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless';
CREATE FOREIGN DATA WRAPPER postgresql VALIDATOR postgresql_fdw_validator;
diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out
index 12e523c737..8872e23935 100644
index 6b8c2f2414..8e13b7fa46 100644
--- a/src/test/regress/expected/foreign_key.out
+++ b/src/test/regress/expected/foreign_key.out
@@ -1968,7 +1968,7 @@ ALTER TABLE fk_partitioned_fk ATTACH PARTITION fk_partitioned_fk_2
FOR VALUES IN (1600);
-- leave these tables around intentionally
@@ -1985,7 +1985,7 @@ ALTER TABLE fk_partitioned_fk_6 ATTACH PARTITION fk_partitioned_pk_6 FOR VALUES
ERROR: cannot ALTER TABLE "fk_partitioned_pk_61" because it is being used by active queries in this session
DROP TABLE fk_partitioned_pk_6, fk_partitioned_fk_6;
-- test the case when the referenced table is owned by a different user
-create role regress_other_partitioned_fk_owner;
+create role regress_other_partitioned_fk_owner PASSWORD NEON_PASSWORD_PLACEHOLDER;
@@ -713,7 +713,7 @@ index 12e523c737..8872e23935 100644
set role regress_other_partitioned_fk_owner;
create table other_partitioned_fk(a int, b int) partition by list (a);
diff --git a/src/test/regress/expected/generated.out b/src/test/regress/expected/generated.out
index 0f623f7119..b48588a54e 100644
index 5881420388..4ae21aa43c 100644
--- a/src/test/regress/expected/generated.out
+++ b/src/test/regress/expected/generated.out
@@ -534,7 +534,7 @@ CREATE TABLE gtest10a (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) STOR
@@ -762,7 +762,7 @@ index a2036a1597..805d73b9d2 100644
-- fields, leading to long bucket chains and lots of table expansion.
-- this is therefore a stress test of the bucket overflow code (unlike
diff --git a/src/test/regress/expected/identity.out b/src/test/regress/expected/identity.out
index cc7772349f..98a08eb48d 100644
index 1b74958de9..078187b542 100644
--- a/src/test/regress/expected/identity.out
+++ b/src/test/regress/expected/identity.out
@@ -520,7 +520,7 @@ ALTER TABLE itest7 ALTER COLUMN a SET GENERATED BY DEFAULT;
@@ -775,10 +775,10 @@ index cc7772349f..98a08eb48d 100644
GRANT SELECT, INSERT ON itest8 TO regress_identity_user1;
SET ROLE regress_identity_user1;
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index 4943429e9b..0257f22b15 100644
index 8f831c95c3..ec681b52af 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -2606,7 +2606,7 @@ create index on permtest_parent (left(c, 3));
@@ -2636,7 +2636,7 @@ create index on permtest_parent (left(c, 3));
insert into permtest_parent
select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i;
analyze permtest_parent;
@@ -1133,7 +1133,7 @@ index 8475231735..1afae5395f 100644
SELECT rolname, rolpassword
FROM pg_authid
diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out
index fbb0489a4f..2905194e2c 100644
index 5b9dba7b32..cc408dad42 100644
--- a/src/test/regress/expected/privileges.out
+++ b/src/test/regress/expected/privileges.out
@@ -20,19 +20,19 @@ SELECT lo_unlink(oid) FROM pg_largeobject_metadata WHERE oid >= 1000 AND oid < 3
@@ -1185,7 +1185,7 @@ index fbb0489a4f..2905194e2c 100644
GRANT pg_read_all_data TO regress_priv_user6;
GRANT pg_write_all_data TO regress_priv_user7;
GRANT pg_read_all_settings TO regress_priv_user8 WITH ADMIN OPTION;
@@ -145,8 +145,8 @@ REVOKE pg_read_all_settings FROM regress_priv_user8;
@@ -212,8 +212,8 @@ REVOKE pg_read_all_settings FROM regress_priv_user8;
DROP USER regress_priv_user10;
DROP USER regress_priv_user9;
DROP USER regress_priv_user8;
@@ -1196,7 +1196,7 @@ index fbb0489a4f..2905194e2c 100644
ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4;
GRANT regress_priv_group2 TO regress_priv_user2 GRANTED BY regress_priv_user1;
SET SESSION AUTHORIZATION regress_priv_user1;
@@ -172,12 +172,16 @@ GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY regre
@@ -239,12 +239,16 @@ GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY regre
ERROR: permission denied to grant privileges as role "regress_priv_role"
DETAIL: The grantor must have the ADMIN option on role "regress_priv_role".
GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY CURRENT_ROLE;
@@ -1213,7 +1213,7 @@ index fbb0489a4f..2905194e2c 100644
DROP ROLE regress_priv_role;
SET SESSION AUTHORIZATION regress_priv_user1;
SELECT session_user, current_user;
@@ -1709,7 +1713,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP
@@ -1776,7 +1780,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP
-- security-restricted operations
\c -
@@ -1222,7 +1222,7 @@ index fbb0489a4f..2905194e2c 100644
-- Check that index expressions and predicates are run as the table's owner
-- A dummy index function checking current_user
CREATE FUNCTION sro_ifun(int) RETURNS int AS $$
@@ -2601,8 +2605,8 @@ drop cascades to function testns.priv_testagg(integer)
@@ -2668,8 +2672,8 @@ drop cascades to function testns.priv_testagg(integer)
drop cascades to function testns.priv_testproc(integer)
-- Change owner of the schema & and rename of new schema owner
\c -
@@ -1233,7 +1233,7 @@ index fbb0489a4f..2905194e2c 100644
SET SESSION ROLE regress_schemauser1;
CREATE SCHEMA testns;
SELECT nspname, rolname FROM pg_namespace, pg_roles WHERE pg_namespace.nspname = 'testns' AND pg_namespace.nspowner = pg_roles.oid;
@@ -2725,7 +2729,7 @@ DROP USER regress_priv_user7;
@@ -2792,7 +2796,7 @@ DROP USER regress_priv_user7;
DROP USER regress_priv_user8; -- does not exist
ERROR: role "regress_priv_user8" does not exist
-- permissions with LOCK TABLE
@@ -1242,7 +1242,7 @@ index fbb0489a4f..2905194e2c 100644
CREATE TABLE lock_table (a int);
-- LOCK TABLE and SELECT permission
GRANT SELECT ON lock_table TO regress_locktable_user;
@@ -2807,7 +2811,7 @@ DROP USER regress_locktable_user;
@@ -2874,7 +2878,7 @@ DROP USER regress_locktable_user;
-- pg_backend_memory_contexts.
-- switch to superuser
\c -
@@ -1251,7 +1251,7 @@ index fbb0489a4f..2905194e2c 100644
SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no
has_table_privilege
---------------------
@@ -2851,10 +2855,10 @@ RESET ROLE;
@@ -2918,10 +2922,10 @@ RESET ROLE;
-- clean up
DROP ROLE regress_readallstats;
-- test role grantor machinery
@@ -1266,7 +1266,7 @@ index fbb0489a4f..2905194e2c 100644
GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE;
GRANT regress_group_direct_manager TO regress_group_indirect_manager;
SET SESSION AUTHORIZATION regress_group_direct_manager;
@@ -2883,9 +2887,9 @@ DROP ROLE regress_group_direct_manager;
@@ -2950,9 +2954,9 @@ DROP ROLE regress_group_direct_manager;
DROP ROLE regress_group_indirect_manager;
DROP ROLE regress_group_member;
-- test SET and INHERIT options with object ownership changes
@@ -1813,7 +1813,7 @@ index 5e6969b173..2c4d52237f 100644
-- clean up roles
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index 97ca9bf72c..b2a7a6f710 100644
index 218c0c2863..f7af0cfb12 100644
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -14,13 +14,13 @@ DROP ROLE IF EXISTS regress_rls_group2;
@@ -1917,6 +1917,19 @@ index b79fe9a1c0..e29fab88ab 100644
ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user
REVOKE INSERT ON TABLES FROM regress_selinto_user;
GRANT ALL ON SCHEMA selinto_schema TO public;
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index afc6ab08c2..dfcd891af3 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -1220,7 +1220,7 @@ SELECT 1 FROM tenk1_vw_sec
rollback;
-- test that function option SET ROLE works in parallel workers.
-create role regress_parallel_worker;
+create role regress_parallel_worker PASSWORD NEON_PASSWORD_PLACEHOLDER;
create function set_and_report_role() returns text as
$$ select current_setting('role') $$ language sql parallel safe
set role = regress_parallel_worker;
diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out
index 1aeed8452b..7d9427d070 100644
--- a/src/test/regress/expected/select_views.out
@@ -2369,7 +2382,7 @@ index 6cb9c926c0..5e689e4062 100644
ALTER TABLE ptnowner1 OWNER TO regress_ptnowner;
ALTER TABLE ptnowner OWNER TO regress_ptnowner;
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 3db9e25913..c66d5aa2c2 100644
index 8aa902d5ab..24bb823b86 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -353,7 +353,7 @@ reset enable_seqscan;
@@ -2532,7 +2545,7 @@ index 43d2e906dd..6c993d70f0 100644
-- An earlier bug (see commit b1ecb9b3fcf) could end up using a buffer from
-- the wrong partition. This test is *not* guaranteed to trigger that bug, but
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index d759635068..d58e50dcc5 100644
index cf3828c16e..cf3ca38175 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -365,8 +365,8 @@ copy check_con_tbl from stdin;
@@ -2774,7 +2787,7 @@ index 1b7064247a..be5b662ce1 100644
-- Cases where schema creation fails as objects are qualified with a schema
-- that does not match with what's expected.
diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql
index 3a78be1b0c..617d2dc8d6 100644
index ae6841308b..47bc792e30 100644
--- a/src/test/regress/sql/create_view.sql
+++ b/src/test/regress/sql/create_view.sql
@@ -23,7 +23,8 @@ CREATE TABLE real_city (
@@ -2901,11 +2914,11 @@ index aa147b14a9..370e0dd570 100644
CREATE FOREIGN DATA WRAPPER dummy;
COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless';
diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql
index 22e177f89b..7138d5e1d4 100644
index 45c7a534cb..32dd26b8cd 100644
--- a/src/test/regress/sql/foreign_key.sql
+++ b/src/test/regress/sql/foreign_key.sql
@@ -1418,7 +1418,7 @@ ALTER TABLE fk_partitioned_fk ATTACH PARTITION fk_partitioned_fk_2
-- leave these tables around intentionally
@@ -1435,7 +1435,7 @@ ALTER TABLE fk_partitioned_fk_6 ATTACH PARTITION fk_partitioned_pk_6 FOR VALUES
DROP TABLE fk_partitioned_pk_6, fk_partitioned_fk_6;
-- test the case when the referenced table is owned by a different user
-create role regress_other_partitioned_fk_owner;
@@ -2963,7 +2976,7 @@ index 527024f710..de49c0b85f 100644
-- the data in this file has a lot of duplicates in the index key
-- fields, leading to long bucket chains and lots of table expansion.
diff --git a/src/test/regress/sql/identity.sql b/src/test/regress/sql/identity.sql
index 91d2e443b4..241c93f373 100644
index 7537258a75..9041e35e34 100644
--- a/src/test/regress/sql/identity.sql
+++ b/src/test/regress/sql/identity.sql
@@ -287,7 +287,7 @@ ALTER TABLE itest7 ALTER COLUMN a RESTART;
@@ -2976,10 +2989,10 @@ index 91d2e443b4..241c93f373 100644
GRANT SELECT, INSERT ON itest8 TO regress_identity_user1;
SET ROLE regress_identity_user1;
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
index fe699c54d5..bdd5993f45 100644
index b5b554a125..109889ad24 100644
--- a/src/test/regress/sql/inherit.sql
+++ b/src/test/regress/sql/inherit.sql
@@ -950,7 +950,7 @@ create index on permtest_parent (left(c, 3));
@@ -958,7 +958,7 @@ create index on permtest_parent (left(c, 3));
insert into permtest_parent
select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i;
analyze permtest_parent;
@@ -3218,7 +3231,7 @@ index 53e86b0b6c..f07cf1ec54 100644
CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql
index 3f68cafcd1..004b26831d 100644
index 249df17a58..b258e7f26a 100644
--- a/src/test/regress/sql/privileges.sql
+++ b/src/test/regress/sql/privileges.sql
@@ -24,18 +24,18 @@ RESET client_min_messages;
@@ -3269,7 +3282,7 @@ index 3f68cafcd1..004b26831d 100644
GRANT pg_read_all_data TO regress_priv_user6;
GRANT pg_write_all_data TO regress_priv_user7;
@@ -130,8 +130,8 @@ DROP USER regress_priv_user10;
@@ -163,8 +163,8 @@ DROP USER regress_priv_user10;
DROP USER regress_priv_user9;
DROP USER regress_priv_user8;
@@ -3280,7 +3293,7 @@ index 3f68cafcd1..004b26831d 100644
ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4;
@@ -1124,7 +1124,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP
@@ -1157,7 +1157,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP
-- security-restricted operations
\c -
@@ -3289,7 +3302,7 @@ index 3f68cafcd1..004b26831d 100644
-- Check that index expressions and predicates are run as the table's owner
@@ -1620,8 +1620,8 @@ DROP SCHEMA testns CASCADE;
@@ -1653,8 +1653,8 @@ DROP SCHEMA testns CASCADE;
-- Change owner of the schema & and rename of new schema owner
\c -
@@ -3300,7 +3313,7 @@ index 3f68cafcd1..004b26831d 100644
SET SESSION ROLE regress_schemauser1;
CREATE SCHEMA testns;
@@ -1715,7 +1715,7 @@ DROP USER regress_priv_user8; -- does not exist
@@ -1748,7 +1748,7 @@ DROP USER regress_priv_user8; -- does not exist
-- permissions with LOCK TABLE
@@ -3309,7 +3322,7 @@ index 3f68cafcd1..004b26831d 100644
CREATE TABLE lock_table (a int);
-- LOCK TABLE and SELECT permission
@@ -1803,7 +1803,7 @@ DROP USER regress_locktable_user;
@@ -1836,7 +1836,7 @@ DROP USER regress_locktable_user;
-- switch to superuser
\c -
@@ -3318,7 +3331,7 @@ index 3f68cafcd1..004b26831d 100644
SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no
SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT'); -- no
@@ -1823,10 +1823,10 @@ RESET ROLE;
@@ -1856,10 +1856,10 @@ RESET ROLE;
DROP ROLE regress_readallstats;
-- test role grantor machinery
@@ -3333,7 +3346,7 @@ index 3f68cafcd1..004b26831d 100644
GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE;
GRANT regress_group_direct_manager TO regress_group_indirect_manager;
@@ -1848,9 +1848,9 @@ DROP ROLE regress_group_indirect_manager;
@@ -1881,9 +1881,9 @@ DROP ROLE regress_group_indirect_manager;
DROP ROLE regress_group_member;
-- test SET and INHERIT options with object ownership changes
@@ -3625,7 +3638,7 @@ index c961b2d730..0859b89c4f 100644
-- clean up roles
DROP ROLE regress_test_def_superuser;
diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql
index dec7340538..cdbc03a5cc 100644
index d3bfd53e23..919ce1d0c6 100644
--- a/src/test/regress/sql/rowsecurity.sql
+++ b/src/test/regress/sql/rowsecurity.sql
@@ -20,13 +20,13 @@ DROP SCHEMA IF EXISTS regress_rls_schema CASCADE;
@@ -3701,6 +3714,19 @@ index 689c448cc2..223ceb1d75 100644
ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user
REVOKE INSERT ON TABLES FROM regress_selinto_user;
GRANT ALL ON SCHEMA selinto_schema TO public;
diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql
index 33d78e16dc..cb193c9b27 100644
--- a/src/test/regress/sql/select_parallel.sql
+++ b/src/test/regress/sql/select_parallel.sql
@@ -464,7 +464,7 @@ SELECT 1 FROM tenk1_vw_sec
rollback;
-- test that function option SET ROLE works in parallel workers.
-create role regress_parallel_worker;
+create role regress_parallel_worker PASSWORD NEON_PASSWORD_PLACEHOLDER;
create function set_and_report_role() returns text as
$$ select current_setting('role') $$ language sql parallel safe
diff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql
index e742f13699..7bd0255df8 100644
--- a/src/test/regress/sql/select_views.sql

View File

@@ -10,6 +10,10 @@ default = []
testing = []
[dependencies]
base64.workspace = true
aws-config.workspace = true
aws-sdk-s3.workspace = true
aws-sdk-kms.workspace = true
anyhow.workspace = true
camino.workspace = true
chrono.workspace = true
@@ -27,6 +31,8 @@ opentelemetry.workspace = true
opentelemetry_sdk.workspace = true
postgres.workspace = true
regex.workspace = true
serde.workspace = true
serde_with.workspace = true
serde_json.workspace = true
signal-hook.workspace = true
tar.workspace = true
@@ -43,6 +49,7 @@ thiserror.workspace = true
url.workspace = true
prometheus.workspace = true
postgres_initdb.workspace = true
compute_api.workspace = true
utils.workspace = true
workspace_hack.workspace = true

View File

@@ -37,6 +37,7 @@ use std::collections::HashMap;
use std::fs::File;
use std::path::Path;
use std::process::exit;
use std::str::FromStr;
use std::sync::atomic::Ordering;
use std::sync::{mpsc, Arc, Condvar, Mutex, RwLock};
use std::{thread, time::Duration};
@@ -58,7 +59,7 @@ use compute_tools::compute::{
forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
};
use compute_tools::configurator::launch_configurator;
use compute_tools::extension_server::get_pg_version;
use compute_tools::extension_server::get_pg_version_string;
use compute_tools::http::api::launch_http_server;
use compute_tools::logger::*;
use compute_tools::monitor::launch_monitor;
@@ -105,6 +106,11 @@ fn main() -> Result<()> {
fn init() -> Result<(String, clap::ArgMatches)> {
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
opentelemetry::global::set_error_handler(|err| {
tracing::info!("OpenTelemetry error: {err}");
})
.expect("global error handler lock poisoned");
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
thread::spawn(move || {
for sig in signals.forever() {
@@ -317,11 +323,18 @@ fn wait_spec(
} else {
spec_set = false;
}
let connstr = Url::parse(connstr).context("cannot parse connstr as a URL")?;
let conn_conf = postgres::config::Config::from_str(connstr.as_str())
.context("cannot build postgres config from connstr")?;
let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr.as_str())
.context("cannot build tokio postgres config from connstr")?;
let compute_node = ComputeNode {
connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
connstr,
conn_conf,
tokio_conn_conf,
pgdata: pgdata.to_string(),
pgbin: pgbin.to_string(),
pgversion: get_pg_version(pgbin),
pgversion: get_pg_version_string(pgbin),
live_config_allowed,
state: Mutex::new(new_state),
state_changed: Condvar::new(),

View File

@@ -0,0 +1,346 @@
//! This program dumps a remote Postgres database into a local Postgres database
//! and uploads the resulting PGDATA into object storage for import into a Timeline.
//!
//! # Context, Architecture, Design
//!
//! See cloud.git Fast Imports RFC (<https://github.com/neondatabase/cloud/pull/19799>)
//! for the full picture.
//! The RFC describing the storage pieces of importing the PGDATA dump into a Timeline
//! is publicly accessible at <https://github.com/neondatabase/neon/pull/9538>.
//!
//! # This is a Prototype!
//!
//! This program is part of a prototype feature and not yet used in production.
//!
//! The cloud.git RFC contains lots of suggestions for improving e2e throughput
//! of this step of the timeline import process.
//!
//! # Local Testing
//!
//! - Comment out most of the pgxns in The Dockerfile.compute-tools to speed up the build.
//! - Build the image with the following command:
//!
//! ```bash
//! docker buildx build --platform linux/amd64 --build-arg DEBIAN_VERSION=bullseye --build-arg GIT_VERSION=local --build-arg PG_VERSION=v14 --build-arg BUILD_TAG="$(date --iso-8601=s -u)" -t localhost:3030/localregistry/compute-node-v14:latest -f compute/compute-node.Dockerfile .
//! docker push localhost:3030/localregistry/compute-node-v14:latest
//! ```
use anyhow::Context;
use aws_config::BehaviorVersion;
use camino::{Utf8Path, Utf8PathBuf};
use clap::Parser;
use compute_tools::extension_server::{get_pg_version, PostgresMajorVersion};
use nix::unistd::Pid;
use tracing::{info, info_span, warn, Instrument};
use utils::fs_ext::is_directory_empty;
#[path = "fast_import/child_stdio_to_log.rs"]
mod child_stdio_to_log;
#[path = "fast_import/s3_uri.rs"]
mod s3_uri;
#[path = "fast_import/s5cmd.rs"]
mod s5cmd;
#[derive(clap::Parser)]
struct Args {
#[clap(long)]
working_directory: Utf8PathBuf,
#[clap(long, env = "NEON_IMPORTER_S3_PREFIX")]
s3_prefix: s3_uri::S3Uri,
#[clap(long)]
pg_bin_dir: Utf8PathBuf,
#[clap(long)]
pg_lib_dir: Utf8PathBuf,
}
#[serde_with::serde_as]
#[derive(serde::Deserialize)]
struct Spec {
encryption_secret: EncryptionSecret,
#[serde_as(as = "serde_with::base64::Base64")]
source_connstring_ciphertext_base64: Vec<u8>,
}
#[derive(serde::Deserialize)]
enum EncryptionSecret {
#[allow(clippy::upper_case_acronyms)]
KMS { key_id: String },
}
#[tokio::main]
pub(crate) async fn main() -> anyhow::Result<()> {
utils::logging::init(
utils::logging::LogFormat::Plain,
utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
utils::logging::Output::Stdout,
)?;
info!("starting");
let Args {
working_directory,
s3_prefix,
pg_bin_dir,
pg_lib_dir,
} = Args::parse();
let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
let spec: Spec = {
let spec_key = s3_prefix.append("/spec.json");
let s3_client = aws_sdk_s3::Client::new(&aws_config);
let object = s3_client
.get_object()
.bucket(&spec_key.bucket)
.key(spec_key.key)
.send()
.await
.context("get spec from s3")?
.body
.collect()
.await
.context("download spec body")?;
serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
};
match tokio::fs::create_dir(&working_directory).await {
Ok(()) => {}
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
if !is_directory_empty(&working_directory)
.await
.context("check if working directory is empty")?
{
anyhow::bail!("working directory is not empty");
} else {
// ok
}
}
Err(e) => return Err(anyhow::Error::new(e).context("create working directory")),
}
let pgdata_dir = working_directory.join("pgdata");
tokio::fs::create_dir(&pgdata_dir)
.await
.context("create pgdata directory")?;
//
// Setup clients
//
let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
let kms_client = aws_sdk_kms::Client::new(&aws_config);
//
// Initialize pgdata
//
let pgbin = pg_bin_dir.join("postgres");
let pg_version = match get_pg_version(pgbin.as_ref()) {
PostgresMajorVersion::V14 => 14,
PostgresMajorVersion::V15 => 15,
PostgresMajorVersion::V16 => 16,
PostgresMajorVersion::V17 => 17,
};
let superuser = "cloud_admin"; // XXX: this shouldn't be hard-coded
postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
superuser,
locale: "en_US.UTF-8", // XXX: this shouldn't be hard-coded,
pg_version,
initdb_bin: pg_bin_dir.join("initdb").as_ref(),
library_search_path: &pg_lib_dir, // TODO: is this right? Prob works in compute image, not sure about neon_local.
pgdata: &pgdata_dir,
})
.await
.context("initdb")?;
let nproc = num_cpus::get();
//
// Launch postgres process
//
let mut postgres_proc = tokio::process::Command::new(pgbin)
.arg("-D")
.arg(&pgdata_dir)
.args(["-c", "wal_level=minimal"])
.args(["-c", "shared_buffers=10GB"])
.args(["-c", "max_wal_senders=0"])
.args(["-c", "fsync=off"])
.args(["-c", "full_page_writes=off"])
.args(["-c", "synchronous_commit=off"])
.args(["-c", "maintenance_work_mem=8388608"])
.args(["-c", &format!("max_parallel_maintenance_workers={nproc}")])
.args(["-c", &format!("max_parallel_workers={nproc}")])
.args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
.args(["-c", &format!("max_worker_processes={nproc}")])
.args(["-c", "effective_io_concurrency=100"])
.env_clear()
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.context("spawn postgres")?;
info!("spawned postgres, waiting for it to become ready");
tokio::spawn(
child_stdio_to_log::relay_process_output(
postgres_proc.stdout.take(),
postgres_proc.stderr.take(),
)
.instrument(info_span!("postgres")),
);
let restore_pg_connstring =
format!("host=localhost port=5432 user={superuser} dbname=postgres");
loop {
let res = tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await;
if res.is_ok() {
info!("postgres is ready, could connect to it");
break;
}
}
//
// Decrypt connection string
//
let source_connection_string = {
match spec.encryption_secret {
EncryptionSecret::KMS { key_id } => {
let mut output = kms_client
.decrypt()
.key_id(key_id)
.ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
spec.source_connstring_ciphertext_base64,
))
.send()
.await
.context("decrypt source connection string")?;
let plaintext = output
.plaintext
.take()
.context("get plaintext source connection string")?;
String::from_utf8(plaintext.into_inner())
.context("parse source connection string as utf8")?
}
}
};
//
// Start the work
//
let dumpdir = working_directory.join("dumpdir");
let common_args = [
// schema mapping (prob suffices to specify them on one side)
"--no-owner".to_string(),
"--no-privileges".to_string(),
"--no-publications".to_string(),
"--no-security-labels".to_string(),
"--no-subscriptions".to_string(),
"--no-tablespaces".to_string(),
// format
"--format".to_string(),
"directory".to_string(),
// concurrency
"--jobs".to_string(),
num_cpus::get().to_string(),
// progress updates
"--verbose".to_string(),
];
info!("dump into the working directory");
{
let mut pg_dump = tokio::process::Command::new(pg_bin_dir.join("pg_dump"))
.args(&common_args)
.arg("-f")
.arg(&dumpdir)
.arg("--no-sync")
// POSITIONAL args
// source db (db name included in connection string)
.arg(&source_connection_string)
// how we run it
.env_clear()
.kill_on_drop(true)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.context("spawn pg_dump")?;
info!(pid=%pg_dump.id().unwrap(), "spawned pg_dump");
tokio::spawn(
child_stdio_to_log::relay_process_output(pg_dump.stdout.take(), pg_dump.stderr.take())
.instrument(info_span!("pg_dump")),
);
let st = pg_dump.wait().await.context("wait for pg_dump")?;
info!(status=?st, "pg_dump exited");
if !st.success() {
warn!(status=%st, "pg_dump failed, restore will likely fail as well");
}
}
// TODO: do it in a streaming way, plenty of internal research done on this already
// TODO: do the unlogged table trick
info!("restore from working directory into vanilla postgres");
{
let mut pg_restore = tokio::process::Command::new(pg_bin_dir.join("pg_restore"))
.args(&common_args)
.arg("-d")
.arg(&restore_pg_connstring)
// POSITIONAL args
.arg(&dumpdir)
// how we run it
.env_clear()
.kill_on_drop(true)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.context("spawn pg_restore")?;
info!(pid=%pg_restore.id().unwrap(), "spawned pg_restore");
tokio::spawn(
child_stdio_to_log::relay_process_output(
pg_restore.stdout.take(),
pg_restore.stderr.take(),
)
.instrument(info_span!("pg_restore")),
);
let st = pg_restore.wait().await.context("wait for pg_restore")?;
info!(status=?st, "pg_restore exited");
if !st.success() {
warn!(status=%st, "pg_restore failed, restore will likely fail as well");
}
}
info!("shutdown postgres");
{
nix::sys::signal::kill(
Pid::from_raw(
i32::try_from(postgres_proc.id().unwrap()).expect("convert child pid to i32"),
),
nix::sys::signal::SIGTERM,
)
.context("signal postgres to shut down")?;
postgres_proc
.wait()
.await
.context("wait for postgres to shut down")?;
}
info!("upload pgdata");
s5cmd::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/"))
.await
.context("sync dump directory to destination")?;
info!("write status");
{
let status_dir = working_directory.join("status");
std::fs::create_dir(&status_dir).context("create status directory")?;
let status_file = status_dir.join("status");
std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
.context("write status file")?;
s5cmd::sync(&status_file, &s3_prefix.append("/status/pgdata"))
.await
.context("sync status directory to destination")?;
}
Ok(())
}

View File

@@ -0,0 +1,35 @@
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::{ChildStderr, ChildStdout};
use tracing::info;
/// Asynchronously relays the output from a child process's `stdout` and `stderr` to the tracing log.
/// Each line is read and logged individually, with lossy UTF-8 conversion.
///
/// # Arguments
///
/// * `stdout`: An `Option<ChildStdout>` from the child process.
/// * `stderr`: An `Option<ChildStderr>` from the child process.
///
pub(crate) async fn relay_process_output(stdout: Option<ChildStdout>, stderr: Option<ChildStderr>) {
let stdout_fut = async {
if let Some(stdout) = stdout {
let reader = BufReader::new(stdout);
let mut lines = reader.lines();
while let Ok(Some(line)) = lines.next_line().await {
info!(fd = "stdout", "{}", line);
}
}
};
let stderr_fut = async {
if let Some(stderr) = stderr {
let reader = BufReader::new(stderr);
let mut lines = reader.lines();
while let Ok(Some(line)) = lines.next_line().await {
info!(fd = "stderr", "{}", line);
}
}
};
tokio::join!(stdout_fut, stderr_fut);
}

View File

@@ -0,0 +1,75 @@
use anyhow::Result;
use std::str::FromStr;
/// Struct to hold parsed S3 components
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct S3Uri {
pub bucket: String,
pub key: String,
}
impl FromStr for S3Uri {
type Err = anyhow::Error;
/// Parse an S3 URI into a bucket and key
fn from_str(uri: &str) -> Result<Self> {
// Ensure the URI starts with "s3://"
if !uri.starts_with("s3://") {
return Err(anyhow::anyhow!("Invalid S3 URI scheme"));
}
// Remove the "s3://" prefix
let stripped_uri = &uri[5..];
// Split the remaining string into bucket and key parts
if let Some((bucket, key)) = stripped_uri.split_once('/') {
Ok(S3Uri {
bucket: bucket.to_string(),
key: key.to_string(),
})
} else {
Err(anyhow::anyhow!(
"Invalid S3 URI format, missing bucket or key"
))
}
}
}
impl S3Uri {
pub fn append(&self, suffix: &str) -> Self {
Self {
bucket: self.bucket.clone(),
key: format!("{}{}", self.key, suffix),
}
}
}
impl std::fmt::Display for S3Uri {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "s3://{}/{}", self.bucket, self.key)
}
}
impl clap::builder::TypedValueParser for S3Uri {
type Value = Self;
fn parse_ref(
&self,
_cmd: &clap::Command,
_arg: Option<&clap::Arg>,
value: &std::ffi::OsStr,
) -> Result<Self::Value, clap::Error> {
let value_str = value.to_str().ok_or_else(|| {
clap::Error::raw(
clap::error::ErrorKind::InvalidUtf8,
"Invalid UTF-8 sequence",
)
})?;
S3Uri::from_str(value_str).map_err(|e| {
clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
format!("Failed to parse S3 URI: {}", e),
)
})
}
}

View File

@@ -0,0 +1,27 @@
use anyhow::Context;
use camino::Utf8Path;
use super::s3_uri::S3Uri;
pub(crate) async fn sync(local: &Utf8Path, remote: &S3Uri) -> anyhow::Result<()> {
let mut builder = tokio::process::Command::new("s5cmd");
// s5cmd uses aws-sdk-go v1, hence doesn't support AWS_ENDPOINT_URL
if let Some(val) = std::env::var_os("AWS_ENDPOINT_URL") {
builder.arg("--endpoint-url").arg(val);
}
builder
.arg("sync")
.arg(local.as_str())
.arg(remote.to_string());
let st = builder
.spawn()
.context("spawn s5cmd")?
.wait()
.await
.context("wait for s5cmd")?;
if st.success() {
Ok(())
} else {
Err(anyhow::anyhow!("s5cmd failed"))
}
}

View File

@@ -1,38 +1,37 @@
use compute_api::{
responses::CatalogObjects,
spec::{Database, Role},
};
use futures::Stream;
use postgres::{Client, NoTls};
use postgres::NoTls;
use std::{path::Path, process::Stdio, result::Result, sync::Arc};
use tokio::{
io::{AsyncBufReadExt, BufReader},
process::Command,
task,
spawn,
};
use tokio_stream::{self as stream, StreamExt};
use tokio_util::codec::{BytesCodec, FramedRead};
use tracing::warn;
use crate::{
compute::ComputeNode,
pg_helpers::{get_existing_dbs, get_existing_roles},
};
use crate::compute::ComputeNode;
use crate::pg_helpers::{get_existing_dbs_async, get_existing_roles_async, postgres_conf_for_db};
use compute_api::responses::CatalogObjects;
pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
let connstr = compute.connstr.clone();
task::spawn_blocking(move || {
let mut client = Client::connect(connstr.as_str(), NoTls)?;
let roles: Vec<Role>;
{
let mut xact = client.transaction()?;
roles = get_existing_roles(&mut xact)?;
}
let databases: Vec<Database> = get_existing_dbs(&mut client)?.values().cloned().collect();
let conf = compute.get_tokio_conn_conf(Some("compute_ctl:get_dbs_and_roles"));
let (client, connection): (tokio_postgres::Client, _) = conf.connect(NoTls).await?;
Ok(CatalogObjects { roles, databases })
})
.await?
spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {}", e);
}
});
let roles = get_existing_roles_async(&client).await?;
let databases = get_existing_dbs_async(&client)
.await?
.into_values()
.collect();
Ok(CatalogObjects { roles, databases })
}
#[derive(Debug, thiserror::Error)]
@@ -41,6 +40,8 @@ pub enum SchemaDumpError {
DatabaseDoesNotExist,
#[error("Failed to execute pg_dump.")]
IO(#[from] std::io::Error),
#[error("Unexpected error.")]
Unexpected,
}
// It uses the pg_dump utility to dump the schema of the specified database.
@@ -58,11 +59,38 @@ pub async fn get_database_schema(
let pgbin = &compute.pgbin;
let basepath = Path::new(pgbin).parent().unwrap();
let pgdump = basepath.join("pg_dump");
let mut connstr = compute.connstr.clone();
connstr.set_path(dbname);
// Replace the DB in the connection string and disable it to parts.
// This is the only option to handle DBs with special characters.
let conf =
postgres_conf_for_db(&compute.connstr, dbname).map_err(|_| SchemaDumpError::Unexpected)?;
let host = conf
.get_hosts()
.first()
.ok_or(SchemaDumpError::Unexpected)?;
let host = match host {
tokio_postgres::config::Host::Tcp(ip) => ip.to_string(),
#[cfg(unix)]
tokio_postgres::config::Host::Unix(path) => path.to_string_lossy().to_string(),
};
let port = conf
.get_ports()
.first()
.ok_or(SchemaDumpError::Unexpected)?;
let user = conf.get_user().ok_or(SchemaDumpError::Unexpected)?;
let dbname = conf.get_dbname().ok_or(SchemaDumpError::Unexpected)?;
let mut cmd = Command::new(pgdump)
// XXX: this seems to be the only option to deal with DBs with `=` in the name
// See <https://www.postgresql.org/message-id/flat/20151023003445.931.91267%40wrigleys.postgresql.org>
.env("PGDATABASE", dbname)
.arg("--host")
.arg(host)
.arg("--port")
.arg(port.to_string())
.arg("--username")
.arg(user)
.arg("--schema-only")
.arg(connstr.as_str())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.kill_on_drop(true)

View File

@@ -1,43 +1,16 @@
use anyhow::{anyhow, Ok, Result};
use postgres::Client;
use tokio_postgres::NoTls;
use tracing::{error, instrument, warn};
use crate::compute::ComputeNode;
/// Create a special service table for availability checks
/// only if it does not exist already.
pub fn create_availability_check_data(client: &mut Client) -> Result<()> {
let query = "
DO $$
BEGIN
IF NOT EXISTS(
SELECT 1
FROM pg_catalog.pg_tables
WHERE tablename = 'health_check'
)
THEN
CREATE TABLE health_check (
id serial primary key,
updated_at timestamptz default now()
);
INSERT INTO health_check VALUES (1, now())
ON CONFLICT (id) DO UPDATE
SET updated_at = now();
END IF;
END
$$;";
client.execute(query, &[])?;
Ok(())
}
/// Update timestamp in a row in a special service table to check
/// that we can actually write some data in this particular timeline.
#[instrument(skip_all)]
pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
// Connect to the database.
let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
let conf = compute.get_tokio_conn_conf(Some("compute_ctl:availability_checker"));
let (client, connection) = conf.connect(NoTls).await?;
if client.is_closed() {
return Err(anyhow!("connection to postgres closed"));
}

View File

@@ -1,26 +1,28 @@
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::env;
use std::fs;
use std::iter::once;
use std::os::unix::fs::{symlink, PermissionsExt};
use std::path::Path;
use std::process::{Command, Stdio};
use std::str::FromStr;
use std::sync::atomic::AtomicU32;
use std::sync::atomic::Ordering;
use std::sync::{Condvar, Mutex, RwLock};
use std::sync::{Arc, Condvar, Mutex, RwLock};
use std::thread;
use std::time::Duration;
use std::time::Instant;
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use compute_api::spec::PgIdent;
use compute_api::spec::{PgIdent, Role};
use futures::future::join_all;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use nix::unistd::Pid;
use postgres;
use postgres::error::SqlState;
use postgres::{Client, NoTls};
use postgres::NoTls;
use tracing::{debug, error, info, instrument, warn};
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
@@ -31,15 +33,22 @@ use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion};
use utils::measured_stream::MeasuredReader;
use nix::sys::signal::{kill, Signal};
use remote_storage::{DownloadError, RemotePath};
use tokio::spawn;
use crate::checker::create_availability_check_data;
use crate::installed_extensions::get_installed_extensions_sync;
use crate::installed_extensions::get_installed_extensions;
use crate::local_proxy;
use crate::logger::inlinify;
use crate::pg_helpers::*;
use crate::spec::*;
use crate::spec_apply::ApplySpecPhase::{
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSuperUser,
DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
};
use crate::spec_apply::PerDatabasePhase::{
ChangeSchemaPerms, DeleteDBRoleReferences, HandleAnonExtension,
};
use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
use crate::sync_sk::{check_if_synced, ping_safekeeper};
use crate::{config, extension_server};
@@ -50,6 +59,10 @@ pub static PG_PID: AtomicU32 = AtomicU32::new(0);
pub struct ComputeNode {
// Url type maintains proper escaping
pub connstr: url::Url,
// We connect to Postgres from many different places, so build configs once
// and reuse them where needed.
pub conn_conf: postgres::config::Config,
pub tokio_conn_conf: tokio_postgres::config::Config,
pub pgdata: String,
pub pgbin: String,
pub pgversion: String,
@@ -224,10 +237,7 @@ fn maybe_cgexec(cmd: &str) -> Command {
}
}
/// Create special neon_superuser role, that's a slightly nerfed version of a real superuser
/// that we give to customers
#[instrument(skip_all)]
fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
pub(crate) fn construct_superuser_query(spec: &ComputeSpec) -> String {
let roles = spec
.cluster
.roles
@@ -296,11 +306,8 @@ fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()>
$$;"#,
roles_decl, database_decl,
);
info!("Neon superuser created: {}", inlinify(&query));
client
.simple_query(&query)
.map_err(|e| anyhow::anyhow!(e).context(query))?;
Ok(())
query
}
impl ComputeNode {
@@ -798,10 +805,10 @@ impl ComputeNode {
/// version. In the future, it may upgrade all 3rd-party extensions.
#[instrument(skip_all)]
pub fn post_apply_config(&self) -> Result<()> {
let connstr = self.connstr.clone();
let conf = self.get_conn_conf(Some("compute_ctl:post_apply_config"));
thread::spawn(move || {
let func = || {
let mut client = Client::connect(connstr.as_str(), NoTls)?;
let mut client = conf.connect(NoTls)?;
handle_neon_extension_upgrade(&mut client)
.context("handle_neon_extension_upgrade")?;
Ok::<_, anyhow::Error>(())
@@ -813,40 +820,51 @@ impl ComputeNode {
Ok(())
}
/// Do initial configuration of the already started Postgres.
#[instrument(skip_all)]
pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
// If connection fails,
// it may be the old node with `zenith_admin` superuser.
//
// In this case we need to connect with old `zenith_admin` name
// and create new user. We cannot simply rename connected user,
// but we can create a new one and grant it all privileges.
let mut connstr = self.connstr.clone();
connstr
.query_pairs_mut()
.append_pair("application_name", "apply_config");
pub fn get_conn_conf(&self, application_name: Option<&str>) -> postgres::Config {
let mut conf = self.conn_conf.clone();
if let Some(application_name) = application_name {
conf.application_name(application_name);
}
conf
}
let mut client = match Client::connect(connstr.as_str(), NoTls) {
pub fn get_tokio_conn_conf(&self, application_name: Option<&str>) -> tokio_postgres::Config {
let mut conf = self.tokio_conn_conf.clone();
if let Some(application_name) = application_name {
conf.application_name(application_name);
}
conf
}
async fn get_maintenance_client(
conf: &tokio_postgres::Config,
) -> Result<tokio_postgres::Client> {
let mut conf = conf.clone();
conf.application_name("compute_ctl:apply_config");
let (client, conn) = match conf.connect(NoTls).await {
// If connection fails, it may be the old node with `zenith_admin` superuser.
//
// In this case we need to connect with old `zenith_admin` name
// and create new user. We cannot simply rename connected user,
// but we can create a new one and grant it all privileges.
Err(e) => match e.code() {
Some(&SqlState::INVALID_PASSWORD)
| Some(&SqlState::INVALID_AUTHORIZATION_SPECIFICATION) => {
// connect with zenith_admin if cloud_admin could not authenticate
// Connect with zenith_admin if cloud_admin could not authenticate
info!(
"cannot connect to postgres: {}, retrying with `zenith_admin` username",
e
);
let mut zenith_admin_connstr = connstr.clone();
zenith_admin_connstr
.set_username("zenith_admin")
.map_err(|_| anyhow::anyhow!("invalid connstr"))?;
let mut zenith_admin_conf = postgres::config::Config::from(conf.clone());
zenith_admin_conf.application_name("compute_ctl:apply_config");
zenith_admin_conf.user("zenith_admin");
let mut client =
Client::connect(zenith_admin_connstr.as_str(), NoTls)
zenith_admin_conf.connect(NoTls)
.context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?;
// Disable forwarding so that users don't get a cloud_admin role
// Disable forwarding so that users don't get a cloud_admin role
let mut func = || {
client.simple_query("SET neon.forward_ddl = false")?;
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
@@ -857,58 +875,319 @@ impl ComputeNode {
drop(client);
// reconnect with connstring with expected name
Client::connect(connstr.as_str(), NoTls)?
// Reconnect with connstring with expected name
conf.connect(NoTls).await?
}
_ => return Err(e.into()),
},
Ok(client) => client,
Ok((client, conn)) => (client, conn),
};
// Disable DDL forwarding because control plane already knows about these roles/databases.
spawn(async move {
if let Err(e) = conn.await {
error!("maintenance client connection error: {}", e);
}
});
// Disable DDL forwarding because control plane already knows about the roles/databases
// we're about to modify.
client
.simple_query("SET neon.forward_ddl = false")
.await
.context("apply_config SET neon.forward_ddl = false")?;
// Proceed with post-startup configuration. Note, that order of operations is important.
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
create_neon_superuser(spec, &mut client).context("apply_config create_neon_superuser")?;
cleanup_instance(&mut client).context("apply_config cleanup_instance")?;
handle_roles(spec, &mut client).context("apply_config handle_roles")?;
handle_databases(spec, &mut client).context("apply_config handle_databases")?;
handle_role_deletions(spec, connstr.as_str(), &mut client)
.context("apply_config handle_role_deletions")?;
handle_grants(
spec,
&mut client,
connstr.as_str(),
self.has_feature(ComputeFeature::AnonExtension),
)
.context("apply_config handle_grants")?;
handle_extensions(spec, &mut client).context("apply_config handle_extensions")?;
handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?;
create_availability_check_data(&mut client)
.context("apply_config create_availability_check_data")?;
Ok(client)
}
// 'Close' connection
drop(client);
/// Apply the spec to the running PostgreSQL instance.
/// The caller can decide to run with multiple clients in parallel, or
/// single mode. Either way, the commands executed will be the same, and
/// only commands run in different databases are parallelized.
#[instrument(skip_all)]
pub fn apply_spec_sql(
&self,
spec: Arc<ComputeSpec>,
conf: Arc<tokio_postgres::Config>,
concurrency: usize,
) -> Result<()> {
let rt = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()?;
if let Some(ref local_proxy) = spec.local_proxy_config {
info!("Applying config with max {} concurrency", concurrency);
debug!("Config: {:?}", spec);
rt.block_on(async {
// Proceed with post-startup configuration. Note, that order of operations is important.
let client = Self::get_maintenance_client(&conf).await?;
let spec = spec.clone();
let databases = get_existing_dbs_async(&client).await?;
let roles = get_existing_roles_async(&client)
.await?
.into_iter()
.map(|role| (role.name.clone(), role))
.collect::<HashMap<String, Role>>();
let jwks_roles = Arc::new(
spec.as_ref()
.local_proxy_config
.iter()
.flat_map(|it| &it.jwks)
.flatten()
.flat_map(|setting| &setting.role_names)
.cloned()
.collect::<HashSet<_>>(),
);
let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext {
roles,
dbs: databases,
}));
for phase in [
CreateSuperUser,
DropInvalidDatabases,
RenameRoles,
CreateAndAlterRoles,
RenameAndDeleteDatabases,
CreateAndAlterDatabases,
] {
info!("Applying phase {:?}", &phase);
apply_operations(
spec.clone(),
ctx.clone(),
jwks_roles.clone(),
phase,
|| async { Ok(&client) },
)
.await?;
}
info!("Applying RunInEachDatabase phase");
let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
let db_processes = spec
.cluster
.databases
.iter()
.map(|db| DB::new(db.clone()))
// include
.chain(once(DB::SystemDB))
.map(|db| {
let spec = spec.clone();
let ctx = ctx.clone();
let jwks_roles = jwks_roles.clone();
let mut conf = conf.as_ref().clone();
let concurrency_token = concurrency_token.clone();
let db = db.clone();
debug!("Applying per-database phases for Database {:?}", &db);
match &db {
DB::SystemDB => {}
DB::UserDB(db) => {
conf.dbname(db.name.as_str());
}
}
let conf = Arc::new(conf);
let fut = Self::apply_spec_sql_db(
spec.clone(),
conf,
ctx.clone(),
jwks_roles.clone(),
concurrency_token.clone(),
db,
);
Ok(spawn(fut))
})
.collect::<Vec<Result<_, anyhow::Error>>>();
for process in db_processes.into_iter() {
let handle = process?;
handle.await??;
}
for phase in vec![
HandleOtherExtensions,
HandleNeonExtension,
CreateAvailabilityCheck,
DropRoles,
] {
debug!("Applying phase {:?}", &phase);
apply_operations(
spec.clone(),
ctx.clone(),
jwks_roles.clone(),
phase,
|| async { Ok(&client) },
)
.await?;
}
Ok::<(), anyhow::Error>(())
})?;
Ok(())
}
/// Apply SQL migrations of the RunInEachDatabase phase.
///
/// May opt to not connect to databases that don't have any scheduled
/// operations. The function is concurrency-controlled with the provided
/// semaphore. The caller has to make sure the semaphore isn't exhausted.
async fn apply_spec_sql_db(
spec: Arc<ComputeSpec>,
conf: Arc<tokio_postgres::Config>,
ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
jwks_roles: Arc<HashSet<String>>,
concurrency_token: Arc<tokio::sync::Semaphore>,
db: DB,
) -> Result<()> {
let _permit = concurrency_token.acquire().await?;
let mut client_conn = None;
for subphase in [
DeleteDBRoleReferences,
ChangeSchemaPerms,
HandleAnonExtension,
] {
apply_operations(
spec.clone(),
ctx.clone(),
jwks_roles.clone(),
RunInEachDatabase {
db: db.clone(),
subphase,
},
// Only connect if apply_operation actually wants a connection.
// It's quite possible this database doesn't need any queries,
// so by not connecting we save time and effort connecting to
// that database.
|| async {
if client_conn.is_none() {
let db_client = Self::get_maintenance_client(&conf).await?;
client_conn.replace(db_client);
}
let client = client_conn.as_ref().unwrap();
Ok(client)
},
)
.await?;
}
drop(client_conn);
Ok::<(), anyhow::Error>(())
}
/// Choose how many concurrent connections to use for applying the spec changes.
pub fn max_service_connections(
&self,
compute_state: &ComputeState,
spec: &ComputeSpec,
) -> usize {
// If the cluster is in Init state we don't have to deal with user connections,
// and can thus use all `max_connections` connection slots. However, that's generally not
// very efficient, so we generally still limit it to a smaller number.
if compute_state.status == ComputeStatus::Init {
// If the settings contain 'max_connections', use that as template
if let Some(config) = spec.cluster.settings.find("max_connections") {
config.parse::<usize>().ok()
} else {
// Otherwise, try to find the setting in the postgresql_conf string
spec.cluster
.postgresql_conf
.iter()
.flat_map(|conf| conf.split("\n"))
.filter_map(|line| {
if !line.contains("max_connections") {
return None;
}
let (key, value) = line.split_once("=")?;
let key = key
.trim_start_matches(char::is_whitespace)
.trim_end_matches(char::is_whitespace);
let value = value
.trim_start_matches(char::is_whitespace)
.trim_end_matches(char::is_whitespace);
if key != "max_connections" {
return None;
}
value.parse::<usize>().ok()
})
.next()
}
// If max_connections is present, use at most 1/3rd of that.
// When max_connections is lower than 30, try to use at least 10 connections, but
// never more than max_connections.
.map(|limit| match limit {
0..10 => limit,
10..30 => 10,
30.. => limit / 3,
})
// If we didn't find max_connections, default to 10 concurrent connections.
.unwrap_or(10)
} else {
// state == Running
// Because the cluster is already in the Running state, we should assume users are
// already connected to the cluster, and high concurrency could negatively
// impact user connectivity. Therefore, we can limit concurrency to the number of
// reserved superuser connections, which users wouldn't be able to use anyway.
spec.cluster
.settings
.find("superuser_reserved_connections")
.iter()
.filter_map(|val| val.parse::<usize>().ok())
.map(|val| if val > 1 { val - 1 } else { 1 })
.last()
.unwrap_or(3)
}
}
/// Do initial configuration of the already started Postgres.
#[instrument(skip_all)]
pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
let conf = self.get_tokio_conn_conf(Some("compute_ctl:apply_config"));
let conf = Arc::new(conf);
let spec = Arc::new(
compute_state
.pspec
.as_ref()
.expect("spec must be set")
.spec
.clone(),
);
let max_concurrent_connections = self.max_service_connections(compute_state, &spec);
// Merge-apply spec & changes to PostgreSQL state.
self.apply_spec_sql(spec.clone(), conf.clone(), max_concurrent_connections)?;
if let Some(ref local_proxy) = &spec.clone().local_proxy_config {
info!("configuring local_proxy");
local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
}
// Run migrations separately to not hold up cold starts
thread::spawn(move || {
let mut connstr = connstr.clone();
connstr
.query_pairs_mut()
.append_pair("application_name", "migrations");
let conf = conf.as_ref().clone();
let mut conf = postgres::config::Config::from(conf);
conf.application_name("compute_ctl:migrations");
let mut client = Client::connect(connstr.as_str(), NoTls)?;
let mut client = conf.connect(NoTls)?;
handle_migrations(&mut client).context("apply_config handle_migrations")
});
Ok(())
Ok::<(), anyhow::Error>(())
}
// Wrapped this around `pg_ctl reload`, but right now we don't use
@@ -965,38 +1244,29 @@ impl ComputeNode {
let pgdata_path = Path::new(&self.pgdata);
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
config::write_postgres_conf(&postgresql_conf_path, &spec, None)?;
// temporarily reset max_cluster_size in config
// TODO(ololobus): We need a concurrency during reconfiguration as well,
// but DB is already running and used by user. We can easily get out of
// `max_connections` limit, and the current code won't handle that.
// let compute_state = self.state.lock().unwrap().clone();
// let max_concurrent_connections = self.max_service_connections(&compute_state, &spec);
let max_concurrent_connections = 1;
// Temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are reconfiguring:
// creating new extensions, roles, etc...
// creating new extensions, roles, etc.
config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
self.pg_reload_conf()?;
let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
// Proceed with post-startup configuration. Note, that order of operations is important.
// Disable DDL forwarding because control plane already knows about these roles/databases.
if spec.mode == ComputeMode::Primary {
client.simple_query("SET neon.forward_ddl = false")?;
cleanup_instance(&mut client)?;
handle_roles(&spec, &mut client)?;
handle_databases(&spec, &mut client)?;
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
handle_grants(
&spec,
&mut client,
self.connstr.as_str(),
self.has_feature(ComputeFeature::AnonExtension),
)?;
handle_extensions(&spec, &mut client)?;
handle_extension_neon(&mut client)?;
// We can skip handle_migrations here because a new migration can only appear
// if we have a new version of the compute_ctl binary, which can only happen
// if compute got restarted, in which case we'll end up inside of apply_config
// instead of reconfigure.
}
let mut conf = tokio_postgres::Config::from_str(self.connstr.as_str()).unwrap();
conf.application_name("apply_config");
let conf = Arc::new(conf);
// 'Close' connection
drop(client);
let spec = Arc::new(spec.clone());
self.apply_spec_sql(spec, conf, max_concurrent_connections)?;
}
Ok(())
})?;
@@ -1119,9 +1389,19 @@ impl ComputeNode {
}
self.post_apply_config()?;
let connstr = self.connstr.clone();
let conf = self.get_conn_conf(None);
thread::spawn(move || {
get_installed_extensions_sync(connstr).context("get_installed_extensions")
let res = get_installed_extensions(conf);
match res {
Ok(extensions) => {
info!(
"[NEON_EXT_STAT] {}",
serde_json::to_string(&extensions)
.expect("failed to serialize extensions list")
);
}
Err(err) => error!("could not get installed extensions: {err:?}"),
}
});
}
@@ -1250,7 +1530,8 @@ impl ComputeNode {
/// Select `pg_stat_statements` data and return it as a stringified JSON
pub async fn collect_insights(&self) -> String {
let mut result_rows: Vec<String> = Vec::new();
let connect_result = tokio_postgres::connect(self.connstr.as_str(), NoTls).await;
let conf = self.get_tokio_conn_conf(Some("compute_ctl:collect_insights"));
let connect_result = conf.connect(NoTls).await;
let (client, connection) = connect_result.unwrap();
tokio::spawn(async move {
if let Err(e) = connection.await {
@@ -1376,10 +1657,9 @@ LIMIT 100",
privileges: &[Privilege],
role_name: &PgIdent,
) -> Result<()> {
use tokio_postgres::config::Config;
use tokio_postgres::NoTls;
let mut conf = Config::from_str(self.connstr.as_str()).unwrap();
let mut conf = self.get_tokio_conn_conf(Some("compute_ctl:set_role_grants"));
conf.dbname(db_name);
let (db_client, conn) = conf
@@ -1416,10 +1696,9 @@ LIMIT 100",
db_name: &PgIdent,
ext_version: ExtVersion,
) -> Result<ExtVersion> {
use tokio_postgres::config::Config;
use tokio_postgres::NoTls;
let mut conf = Config::from_str(self.connstr.as_str()).unwrap();
let mut conf = self.get_tokio_conn_conf(Some("compute_ctl:install_extension"));
conf.dbname(db_name);
let (db_client, conn) = conf

View File

@@ -116,7 +116,7 @@ pub fn write_postgres_conf(
vartype: "enum".to_owned(),
};
write!(file, "{}", opt.to_pg_setting())?;
writeln!(file, "{}", opt.to_pg_setting())?;
}
}

View File

@@ -103,14 +103,33 @@ fn get_pg_config(argument: &str, pgbin: &str) -> String {
.to_string()
}
pub fn get_pg_version(pgbin: &str) -> String {
pub fn get_pg_version(pgbin: &str) -> PostgresMajorVersion {
// pg_config --version returns a (platform specific) human readable string
// such as "PostgreSQL 15.4". We parse this to v14/v15/v16 etc.
let human_version = get_pg_config("--version", pgbin);
parse_pg_version(&human_version).to_string()
parse_pg_version(&human_version)
}
fn parse_pg_version(human_version: &str) -> &str {
pub fn get_pg_version_string(pgbin: &str) -> String {
match get_pg_version(pgbin) {
PostgresMajorVersion::V14 => "v14",
PostgresMajorVersion::V15 => "v15",
PostgresMajorVersion::V16 => "v16",
PostgresMajorVersion::V17 => "v17",
}
.to_owned()
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum PostgresMajorVersion {
V14,
V15,
V16,
V17,
}
fn parse_pg_version(human_version: &str) -> PostgresMajorVersion {
use PostgresMajorVersion::*;
// Normal releases have version strings like "PostgreSQL 15.4". But there
// are also pre-release versions like "PostgreSQL 17devel" or "PostgreSQL
// 16beta2" or "PostgreSQL 17rc1". And with the --with-extra-version
@@ -121,10 +140,10 @@ fn parse_pg_version(human_version: &str) -> &str {
.captures(human_version)
{
Some(captures) if captures.len() == 2 => match &captures["major"] {
"14" => return "v14",
"15" => return "v15",
"16" => return "v16",
"17" => return "v17",
"14" => return V14,
"15" => return V15,
"16" => return V16,
"17" => return V17,
_ => {}
},
_ => {}
@@ -263,24 +282,25 @@ mod tests {
#[test]
fn test_parse_pg_version() {
assert_eq!(parse_pg_version("PostgreSQL 15.4"), "v15");
assert_eq!(parse_pg_version("PostgreSQL 15.14"), "v15");
use super::PostgresMajorVersion::*;
assert_eq!(parse_pg_version("PostgreSQL 15.4"), V15);
assert_eq!(parse_pg_version("PostgreSQL 15.14"), V15);
assert_eq!(
parse_pg_version("PostgreSQL 15.4 (Ubuntu 15.4-0ubuntu0.23.04.1)"),
"v15"
V15
);
assert_eq!(parse_pg_version("PostgreSQL 14.15"), "v14");
assert_eq!(parse_pg_version("PostgreSQL 14.0"), "v14");
assert_eq!(parse_pg_version("PostgreSQL 14.15"), V14);
assert_eq!(parse_pg_version("PostgreSQL 14.0"), V14);
assert_eq!(
parse_pg_version("PostgreSQL 14.9 (Debian 14.9-1.pgdg120+1"),
"v14"
V14
);
assert_eq!(parse_pg_version("PostgreSQL 16devel"), "v16");
assert_eq!(parse_pg_version("PostgreSQL 16beta1"), "v16");
assert_eq!(parse_pg_version("PostgreSQL 16rc2"), "v16");
assert_eq!(parse_pg_version("PostgreSQL 16extra"), "v16");
assert_eq!(parse_pg_version("PostgreSQL 16devel"), V16);
assert_eq!(parse_pg_version("PostgreSQL 16beta1"), V16);
assert_eq!(parse_pg_version("PostgreSQL 16rc2"), V16);
assert_eq!(parse_pg_version("PostgreSQL 16extra"), V16);
}
#[test]

View File

@@ -20,6 +20,7 @@ use anyhow::Result;
use hyper::header::CONTENT_TYPE;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use metrics::proto::MetricFamily;
use metrics::Encoder;
use metrics::TextEncoder;
use tokio::task;
@@ -72,10 +73,22 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
(&Method::GET, "/metrics") => {
debug!("serving /metrics GET request");
let mut buffer = vec![];
let metrics = installed_extensions::collect();
// When we call TextEncoder::encode() below, it will immediately
// return an error if a metric family has no metrics, so we need to
// preemptively filter out metric families with no metrics.
let metrics = installed_extensions::collect()
.into_iter()
.filter(|m| !m.get_metric().is_empty())
.collect::<Vec<MetricFamily>>();
let encoder = TextEncoder::new();
encoder.encode(&metrics, &mut buffer).unwrap();
let mut buffer = vec![];
if let Err(err) = encoder.encode(&metrics, &mut buffer) {
let msg = format!("error handling /metrics request: {err}");
error!(msg);
return render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR);
}
match Response::builder()
.status(StatusCode::OK)
@@ -282,8 +295,12 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
return Response::new(Body::from(msg));
}
let connstr = compute.connstr.clone();
let res = crate::installed_extensions::get_installed_extensions(connstr).await;
let conf = compute.get_conn_conf(None);
let res =
task::spawn_blocking(move || installed_extensions::get_installed_extensions(conf))
.await
.unwrap();
match res {
Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
Err(e) => render_json_error(

View File

@@ -2,12 +2,9 @@ use compute_api::responses::{InstalledExtension, InstalledExtensions};
use metrics::proto::MetricFamily;
use std::collections::HashMap;
use std::collections::HashSet;
use tracing::info;
use url::Url;
use anyhow::Result;
use postgres::{Client, NoTls};
use tokio::task;
use metrics::core::Collector;
use metrics::{register_uint_gauge_vec, UIntGaugeVec};
@@ -42,80 +39,58 @@ fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
///
/// Same extension can be installed in multiple databases with different versions,
/// we only keep the highest and lowest version across all databases.
pub async fn get_installed_extensions(connstr: Url) -> Result<InstalledExtensions> {
let mut connstr = connstr.clone();
pub fn get_installed_extensions(mut conf: postgres::config::Config) -> Result<InstalledExtensions> {
conf.application_name("compute_ctl:get_installed_extensions");
let mut client = conf.connect(NoTls)?;
task::spawn_blocking(move || {
let mut client = Client::connect(connstr.as_str(), NoTls)?;
let databases: Vec<String> = list_dbs(&mut client)?;
let databases: Vec<String> = list_dbs(&mut client)?;
let mut extensions_map: HashMap<String, InstalledExtension> = HashMap::new();
for db in databases.iter() {
connstr.set_path(db);
let mut db_client = Client::connect(connstr.as_str(), NoTls)?;
let extensions: Vec<(String, String)> = db_client
.query(
"SELECT extname, extversion FROM pg_catalog.pg_extension;",
&[],
)?
.iter()
.map(|row| (row.get("extname"), row.get("extversion")))
.collect();
let mut extensions_map: HashMap<String, InstalledExtension> = HashMap::new();
for db in databases.iter() {
conf.dbname(db);
let mut db_client = conf.connect(NoTls)?;
let extensions: Vec<(String, String)> = db_client
.query(
"SELECT extname, extversion FROM pg_catalog.pg_extension;",
&[],
)?
.iter()
.map(|row| (row.get("extname"), row.get("extversion")))
.collect();
for (extname, v) in extensions.iter() {
let version = v.to_string();
for (extname, v) in extensions.iter() {
let version = v.to_string();
// increment the number of databases where the version of extension is installed
INSTALLED_EXTENSIONS
.with_label_values(&[extname, &version])
.inc();
// increment the number of databases where the version of extension is installed
INSTALLED_EXTENSIONS
.with_label_values(&[extname, &version])
.inc();
extensions_map
.entry(extname.to_string())
.and_modify(|e| {
e.versions.insert(version.clone());
// count the number of databases where the extension is installed
e.n_databases += 1;
})
.or_insert(InstalledExtension {
extname: extname.to_string(),
versions: HashSet::from([version.clone()]),
n_databases: 1,
});
}
extensions_map
.entry(extname.to_string())
.and_modify(|e| {
e.versions.insert(version.clone());
// count the number of databases where the extension is installed
e.n_databases += 1;
})
.or_insert(InstalledExtension {
extname: extname.to_string(),
versions: HashSet::from([version.clone()]),
n_databases: 1,
});
}
}
let res = InstalledExtensions {
extensions: extensions_map.values().cloned().collect(),
};
let res = InstalledExtensions {
extensions: extensions_map.into_values().collect(),
};
Ok(res)
})
.await?
}
// Gather info about installed extensions
pub fn get_installed_extensions_sync(connstr: Url) -> Result<()> {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("failed to create runtime");
let result = rt
.block_on(crate::installed_extensions::get_installed_extensions(
connstr,
))
.expect("failed to get installed extensions");
info!(
"[NEON_EXT_STAT] {}",
serde_json::to_string(&result).expect("failed to serialize extensions list")
);
Ok(())
Ok(res)
}
static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
register_uint_gauge_vec!(
"installed_extensions",
"compute_installed_extensions",
"Number of databases where the version of extension is installed",
&["extension_name", "version"]
)

View File

@@ -23,5 +23,6 @@ pub mod monitor;
pub mod params;
pub mod pg_helpers;
pub mod spec;
mod spec_apply;
pub mod swap;
pub mod sync_sk;

View File

@@ -17,11 +17,8 @@ const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
// should be handled gracefully.
fn watch_compute_activity(compute: &ComputeNode) {
// Suppose that `connstr` doesn't change
let mut connstr = compute.connstr.clone();
connstr
.query_pairs_mut()
.append_pair("application_name", "compute_activity_monitor");
let connstr = connstr.as_str();
let connstr = compute.connstr.clone();
let conf = compute.get_conn_conf(Some("compute_ctl:activity_monitor"));
// During startup and configuration we connect to every Postgres database,
// but we don't want to count this as some user activity. So wait until
@@ -29,7 +26,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
wait_for_postgres_start(compute);
// Define `client` outside of the loop to reuse existing connection if it's active.
let mut client = Client::connect(connstr, NoTls);
let mut client = conf.connect(NoTls);
let mut sleep = false;
let mut prev_active_time: Option<f64> = None;
@@ -57,7 +54,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
info!("connection to Postgres is closed, trying to reconnect");
// Connection is closed, reconnect and try again.
client = Client::connect(connstr, NoTls);
client = conf.connect(NoTls);
continue;
}
@@ -196,7 +193,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
debug!("could not connect to Postgres: {}, retrying", e);
// Establish a new connection and try again.
client = Client::connect(connstr, NoTls);
client = conf.connect(NoTls);
}
}
}

View File

@@ -6,15 +6,18 @@ use std::io::{BufRead, BufReader};
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::process::Child;
use std::str::FromStr;
use std::thread::JoinHandle;
use std::time::{Duration, Instant};
use anyhow::{bail, Result};
use futures::StreamExt;
use ini::Ini;
use notify::{RecursiveMode, Watcher};
use postgres::{Client, Transaction};
use postgres::config::Config;
use tokio::io::AsyncBufReadExt;
use tokio::time::timeout;
use tokio_postgres;
use tokio_postgres::NoTls;
use tracing::{debug, error, info, instrument};
@@ -197,27 +200,34 @@ impl Escaping for PgIdent {
}
/// Build a list of existing Postgres roles
pub fn get_existing_roles(xact: &mut Transaction<'_>) -> Result<Vec<Role>> {
let postgres_roles = xact
.query("SELECT rolname, rolpassword FROM pg_catalog.pg_authid", &[])?
.iter()
pub async fn get_existing_roles_async(client: &tokio_postgres::Client) -> Result<Vec<Role>> {
let postgres_roles = client
.query_raw::<str, &String, &[String; 0]>(
"SELECT rolname, rolpassword FROM pg_catalog.pg_authid",
&[],
)
.await?
.filter_map(|row| async { row.ok() })
.map(|row| Role {
name: row.get("rolname"),
encrypted_password: row.get("rolpassword"),
options: None,
})
.collect();
.collect()
.await;
Ok(postgres_roles)
}
/// Build a list of existing Postgres databases
pub fn get_existing_dbs(client: &mut Client) -> Result<HashMap<String, Database>> {
pub async fn get_existing_dbs_async(
client: &tokio_postgres::Client,
) -> Result<HashMap<String, Database>> {
// `pg_database.datconnlimit = -2` means that the database is in the
// invalid state. See:
// https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
let postgres_dbs: Vec<Database> = client
.query(
let rowstream = client
.query_raw::<str, &String, &[String; 0]>(
"SELECT
datname AS name,
datdba::regrole::text AS owner,
@@ -226,8 +236,11 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<HashMap<String, Database>
FROM
pg_catalog.pg_database;",
&[],
)?
.iter()
)
.await?;
let dbs_map = rowstream
.filter_map(|r| async { r.ok() })
.map(|row| Database {
name: row.get("name"),
owner: row.get("owner"),
@@ -235,12 +248,9 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<HashMap<String, Database>
invalid: row.get("invalid"),
options: None,
})
.collect();
let dbs_map = postgres_dbs
.iter()
.map(|db| (db.name.clone(), db.clone()))
.collect::<HashMap<_, _>>();
.collect::<HashMap<_, _>>()
.await;
Ok(dbs_map)
}
@@ -535,3 +545,11 @@ async fn handle_postgres_logs_async(stderr: tokio::process::ChildStderr) -> Resu
Ok(())
}
/// `Postgres::config::Config` handles database names with whitespaces
/// and special characters properly.
pub fn postgres_conf_for_db(connstr: &url::Url, dbname: &str) -> Result<Config> {
let mut conf = Config::from_str(connstr.as_str())?;
conf.dbname(dbname);
Ok(conf)
}

View File

@@ -1,22 +1,17 @@
use std::collections::HashSet;
use anyhow::{anyhow, bail, Result};
use postgres::Client;
use reqwest::StatusCode;
use std::fs::File;
use std::path::Path;
use std::str::FromStr;
use anyhow::{anyhow, bail, Context, Result};
use postgres::config::Config;
use postgres::{Client, NoTls};
use reqwest::StatusCode;
use tracing::{error, info, info_span, instrument, span_enabled, warn, Level};
use tracing::{error, info, instrument, warn};
use crate::config;
use crate::logger::inlinify;
use crate::migration::MigrationRunner;
use crate::params::PG_HBA_ALL_MD5;
use crate::pg_helpers::*;
use compute_api::responses::{ControlPlaneComputeStatus, ControlPlaneSpecResponse};
use compute_api::spec::{ComputeSpec, PgIdent, Role};
use compute_api::spec::ComputeSpec;
// Do control plane request and return response if any. In case of error it
// returns a bool flag indicating whether it makes sense to retry the request
@@ -151,625 +146,6 @@ pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {
Ok(())
}
/// Compute could be unexpectedly shut down, for example, during the
/// database dropping. This leaves the database in the invalid state,
/// which prevents new db creation with the same name. This function
/// will clean it up before proceeding with catalog updates. All
/// possible future cleanup operations may go here too.
#[instrument(skip_all)]
pub fn cleanup_instance(client: &mut Client) -> Result<()> {
let existing_dbs = get_existing_dbs(client)?;
for (_, db) in existing_dbs {
if db.invalid {
// After recent commit in Postgres, interrupted DROP DATABASE
// leaves the database in the invalid state. According to the
// commit message, the only option for user is to drop it again.
// See:
// https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
//
// Postgres Neon extension is done the way, that db is de-registered
// in the control plane metadata only after it is dropped. So there is
// a chance that it still thinks that db should exist. This means
// that it will be re-created by `handle_databases()`. Yet, it's fine
// as user can just repeat drop (in vanilla Postgres they would need
// to do the same, btw).
let query = format!("DROP DATABASE IF EXISTS {}", db.name.pg_quote());
info!("dropping invalid database {}", db.name);
client.execute(query.as_str(), &[])?;
}
}
Ok(())
}
/// Given a cluster spec json and open transaction it handles roles creation,
/// deletion and update.
#[instrument(skip_all)]
pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let mut xact = client.transaction()?;
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
let mut jwks_roles = HashSet::new();
if let Some(local_proxy) = &spec.local_proxy_config {
for jwks_setting in local_proxy.jwks.iter().flatten() {
for role_name in &jwks_setting.role_names {
jwks_roles.insert(role_name.clone());
}
}
}
// Print a list of existing Postgres roles (only in debug mode)
if span_enabled!(Level::INFO) {
let mut vec = Vec::new();
for r in &existing_roles {
vec.push(format!(
"{}:{}",
r.name,
if r.encrypted_password.is_some() {
"[FILTERED]"
} else {
"(null)"
}
));
}
info!("postgres roles (total {}): {:?}", vec.len(), vec);
}
// Process delta operations first
if let Some(ops) = &spec.delta_operations {
info!("processing role renames");
for op in ops {
match op.action.as_ref() {
"delete_role" => {
// no-op now, roles will be deleted at the end of configuration
}
// Renaming role drops its password, since role name is
// used as a salt there. It is important that this role
// is recorded with a new `name` in the `roles` list.
// Follow up roles update will set the new password.
"rename_role" => {
let new_name = op.new_name.as_ref().unwrap();
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
if existing_roles.iter().any(|r| r.name == op.name) {
let query: String = format!(
"ALTER ROLE {} RENAME TO {}",
op.name.pg_quote(),
new_name.pg_quote()
);
warn!("renaming role '{}' to '{}'", op.name, new_name);
xact.execute(query.as_str(), &[])?;
}
}
_ => {}
}
}
}
// Refresh Postgres roles info to handle possible roles renaming
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
info!(
"handling cluster spec roles (total {})",
spec.cluster.roles.len()
);
for role in &spec.cluster.roles {
let name = &role.name;
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
let pg_role = existing_roles.iter().find(|r| r.name == *name);
enum RoleAction {
None,
Update,
Create,
}
let action = if let Some(r) = pg_role {
if (r.encrypted_password.is_none() && role.encrypted_password.is_some())
|| (r.encrypted_password.is_some() && role.encrypted_password.is_none())
{
RoleAction::Update
} else if let Some(pg_pwd) = &r.encrypted_password {
// Check whether password changed or not (trim 'md5' prefix first if any)
//
// This is a backward compatibility hack, which comes from the times when we were using
// md5 for everyone and hashes were stored in the console db without md5 prefix. So when
// role comes from the control-plane (json spec) `Role.encrypted_password` doesn't have md5 prefix,
// but when role comes from Postgres (`get_existing_roles` / `existing_roles`) it has this prefix.
// Here is the only place so far where we compare hashes, so it seems to be the best candidate
// to place this compatibility layer.
let pg_pwd = if let Some(stripped) = pg_pwd.strip_prefix("md5") {
stripped
} else {
pg_pwd
};
if pg_pwd != *role.encrypted_password.as_ref().unwrap() {
RoleAction::Update
} else {
RoleAction::None
}
} else {
RoleAction::None
}
} else {
RoleAction::Create
};
match action {
RoleAction::None => {}
RoleAction::Update => {
// This can be run on /every/ role! Not just ones created through the console.
// This means that if you add some funny ALTER here that adds a permission,
// this will get run even on user-created roles! This will result in different
// behavior before and after a spec gets reapplied. The below ALTER as it stands
// now only grants LOGIN and changes the password. Please do not allow this branch
// to do anything silly.
let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
}
RoleAction::Create => {
// This branch only runs when roles are created through the console, so it is
// safe to add more permissions here. BYPASSRLS and REPLICATION are inherited
// from neon_superuser.
let mut query: String = format!(
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
name.pg_quote()
);
if jwks_roles.contains(name.as_str()) {
query = format!("CREATE ROLE {}", name.pg_quote());
}
info!("running role create query: '{}'", &query);
query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?;
}
}
if span_enabled!(Level::INFO) {
let pwd = if role.encrypted_password.is_some() {
"[FILTERED]"
} else {
"(null)"
};
let action_str = match action {
RoleAction::None => "",
RoleAction::Create => " -> create",
RoleAction::Update => " -> update",
};
info!(" - {}:{}{}", name, pwd, action_str);
}
}
xact.commit()?;
Ok(())
}
/// Reassign all dependent objects and delete requested roles.
#[instrument(skip_all)]
pub fn handle_role_deletions(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
if let Some(ops) = &spec.delta_operations {
// First, reassign all dependent objects to db owners.
info!("reassigning dependent objects of to-be-deleted roles");
// Fetch existing roles. We could've exported and used `existing_roles` from
// `handle_roles()`, but we only make this list there before creating new roles.
// Which is probably fine as we never create to-be-deleted roles, but that'd
// just look a bit untidy. Anyway, the entire `pg_roles` should be in shared
// buffers already, so this shouldn't be a big deal.
let mut xact = client.transaction()?;
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
xact.commit()?;
for op in ops {
// Check that role is still present in Postgres, as this could be a
// restart with the same spec after role deletion.
if op.action == "delete_role" && existing_roles.iter().any(|r| r.name == op.name) {
reassign_owned_objects(spec, connstr, &op.name)?;
}
}
// Second, proceed with role deletions.
info!("processing role deletions");
let mut xact = client.transaction()?;
for op in ops {
// We do not check either role exists or not,
// Postgres will take care of it for us
if op.action == "delete_role" {
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.pg_quote());
warn!("deleting role '{}'", &op.name);
xact.execute(query.as_str(), &[])?;
}
}
xact.commit()?;
}
Ok(())
}
fn reassign_owned_objects_in_one_db(
conf: Config,
role_name: &PgIdent,
db_owner: &PgIdent,
) -> Result<()> {
let mut client = conf.connect(NoTls)?;
// This will reassign all dependent objects to the db owner
let reassign_query = format!(
"REASSIGN OWNED BY {} TO {}",
role_name.pg_quote(),
db_owner.pg_quote()
);
info!(
"reassigning objects owned by '{}' in db '{}' to '{}'",
role_name,
conf.get_dbname().unwrap_or(""),
db_owner
);
client.simple_query(&reassign_query)?;
// This now will only drop privileges of the role
let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote());
client.simple_query(&drop_query)?;
Ok(())
}
// Reassign all owned objects in all databases to the owner of the database.
fn reassign_owned_objects(spec: &ComputeSpec, connstr: &str, role_name: &PgIdent) -> Result<()> {
for db in &spec.cluster.databases {
if db.owner != *role_name {
let mut conf = Config::from_str(connstr)?;
conf.dbname(&db.name);
reassign_owned_objects_in_one_db(conf, role_name, &db.owner)?;
}
}
// Also handle case when there are no databases in the spec.
// In this case we need to reassign objects in the default database.
let conf = Config::from_str(connstr)?;
let db_owner = PgIdent::from_str("cloud_admin")?;
reassign_owned_objects_in_one_db(conf, role_name, &db_owner)?;
Ok(())
}
/// It follows mostly the same logic as `handle_roles()` excepting that we
/// does not use an explicit transactions block, since major database operations
/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
/// atomicity should be enough here due to the order of operations and various checks,
/// which together provide us idempotency.
#[instrument(skip_all)]
pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let existing_dbs = get_existing_dbs(client)?;
// Print a list of existing Postgres databases (only in debug mode)
if span_enabled!(Level::INFO) {
let mut vec = Vec::new();
for (dbname, db) in &existing_dbs {
vec.push(format!("{}:{}", dbname, db.owner));
}
info!("postgres databases (total {}): {:?}", vec.len(), vec);
}
// Process delta operations first
if let Some(ops) = &spec.delta_operations {
info!("processing delta operations on databases");
for op in ops {
match op.action.as_ref() {
// We do not check either DB exists or not,
// Postgres will take care of it for us
"delete_db" => {
// In Postgres we can't drop a database if it is a template.
// So we need to unset the template flag first, but it could
// be a retry, so we could've already dropped the database.
// Check that database exists first to make it idempotent.
let unset_template_query: String = format!(
"
DO $$
BEGIN
IF EXISTS(
SELECT 1
FROM pg_catalog.pg_database
WHERE datname = {}
)
THEN
ALTER DATABASE {} is_template false;
END IF;
END
$$;",
escape_literal(&op.name),
&op.name.pg_quote()
);
// Use FORCE to drop database even if there are active connections.
// We run this from `cloud_admin`, so it should have enough privileges.
// NB: there could be other db states, which prevent us from dropping
// the database. For example, if db is used by any active subscription
// or replication slot.
// TODO: deal with it once we allow logical replication. Proper fix should
// involve returning an error code to the control plane, so it could
// figure out that this is a non-retryable error, return it to the user
// and fail operation permanently.
let drop_db_query: String = format!(
"DROP DATABASE IF EXISTS {} WITH (FORCE)",
&op.name.pg_quote()
);
warn!("deleting database '{}'", &op.name);
client.execute(unset_template_query.as_str(), &[])?;
client.execute(drop_db_query.as_str(), &[])?;
}
"rename_db" => {
let new_name = op.new_name.as_ref().unwrap();
if existing_dbs.contains_key(&op.name) {
let query: String = format!(
"ALTER DATABASE {} RENAME TO {}",
op.name.pg_quote(),
new_name.pg_quote()
);
warn!("renaming database '{}' to '{}'", op.name, new_name);
client.execute(query.as_str(), &[])?;
}
}
_ => {}
}
}
}
// Refresh Postgres databases info to handle possible renames
let existing_dbs = get_existing_dbs(client)?;
info!(
"handling cluster spec databases (total {})",
spec.cluster.databases.len()
);
for db in &spec.cluster.databases {
let name = &db.name;
let pg_db = existing_dbs.get(name);
enum DatabaseAction {
None,
Update,
Create,
}
let action = if let Some(r) = pg_db {
// XXX: db owner name is returned as quoted string from Postgres,
// when quoting is needed.
let new_owner = if r.owner.starts_with('"') {
db.owner.pg_quote()
} else {
db.owner.clone()
};
if new_owner != r.owner {
// Update the owner
DatabaseAction::Update
} else {
DatabaseAction::None
}
} else {
DatabaseAction::Create
};
match action {
DatabaseAction::None => {}
DatabaseAction::Update => {
let query: String = format!(
"ALTER DATABASE {} OWNER TO {}",
name.pg_quote(),
db.owner.pg_quote()
);
let _guard = info_span!("executing", query).entered();
client.execute(query.as_str(), &[])?;
}
DatabaseAction::Create => {
let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
query.push_str(&db.to_pg_options());
let _guard = info_span!("executing", query).entered();
client.execute(query.as_str(), &[])?;
let grant_query: String = format!(
"GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
name.pg_quote()
);
client.execute(grant_query.as_str(), &[])?;
}
};
if span_enabled!(Level::INFO) {
let action_str = match action {
DatabaseAction::None => "",
DatabaseAction::Create => " -> create",
DatabaseAction::Update => " -> update",
};
info!(" - {}:{}{}", db.name, db.owner, action_str);
}
}
Ok(())
}
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
#[instrument(skip_all)]
pub fn handle_grants(
spec: &ComputeSpec,
client: &mut Client,
connstr: &str,
enable_anon_extension: bool,
) -> Result<()> {
info!("modifying database permissions");
let existing_dbs = get_existing_dbs(client)?;
// Do some per-database access adjustments. We'd better do this at db creation time,
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
// atomically.
for db in &spec.cluster.databases {
match existing_dbs.get(&db.name) {
Some(pg_db) => {
if pg_db.restrict_conn || pg_db.invalid {
info!(
"skipping grants for db {} (invalid: {}, connections not allowed: {})",
db.name, pg_db.invalid, pg_db.restrict_conn
);
continue;
}
}
None => {
bail!(
"database {} doesn't exist in Postgres after handle_databases()",
db.name
);
}
}
let mut conf = Config::from_str(connstr)?;
conf.dbname(&db.name);
let mut db_client = conf.connect(NoTls)?;
// This will only change ownership on the schema itself, not the objects
// inside it. Without it owner of the `public` schema will be `cloud_admin`
// and database owner cannot do anything with it. SQL procedure ensures
// that it won't error out if schema `public` doesn't exist.
let alter_query = format!(
"DO $$\n\
DECLARE\n\
schema_owner TEXT;\n\
BEGIN\n\
IF EXISTS(\n\
SELECT nspname\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
)\n\
THEN\n\
SELECT nspowner::regrole::text\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
INTO schema_owner;\n\
\n\
IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'\n\
THEN\n\
ALTER SCHEMA public OWNER TO {};\n\
END IF;\n\
END IF;\n\
END\n\
$$;",
db.owner.pg_quote()
);
db_client.simple_query(&alter_query)?;
// Explicitly grant CREATE ON SCHEMA PUBLIC to the web_access user.
// This is needed because since postgres 15 this privilege is removed by default.
// TODO: web_access isn't created for almost 1 year. It could be that we have
// active users of 1 year old projects, but hopefully not, so check it and
// remove this code if possible. The worst thing that could happen is that
// user won't be able to use public schema in NEW databases created in the
// very OLD project.
//
// Also, alter default permissions so that relations created by extensions can be
// used by neon_superuser without permission issues.
let grant_query = "DO $$\n\
BEGIN\n\
IF EXISTS(\n\
SELECT nspname\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
) AND\n\
current_setting('server_version_num')::int/10000 >= 15\n\
THEN\n\
IF EXISTS(\n\
SELECT rolname\n\
FROM pg_catalog.pg_roles\n\
WHERE rolname = 'web_access'\n\
)\n\
THEN\n\
GRANT CREATE ON SCHEMA public TO web_access;\n\
END IF;\n\
END IF;\n\
IF EXISTS(\n\
SELECT nspname\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
)\n\
THEN\n\
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;\n\
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;\n\
END IF;\n\
END\n\
$$;"
.to_string();
info!(
"grant query for db {} : {}",
&db.name,
inlinify(&grant_query)
);
db_client.simple_query(&grant_query)?;
// it is important to run this after all grants
if enable_anon_extension {
handle_extension_anon(spec, &db.owner, &mut db_client, false)
.context("handle_grants handle_extension_anon")?;
}
}
Ok(())
}
/// Create required system extensions
#[instrument(skip_all)]
pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
if libs.contains("pg_stat_statements") {
// Create extension only if this compute really needs it
let query = "CREATE EXTENSION IF NOT EXISTS pg_stat_statements";
info!("creating system extensions with query: {}", query);
client.simple_query(query)?;
}
}
Ok(())
}
/// Run CREATE and ALTER EXTENSION neon UPDATE for postgres database
#[instrument(skip_all)]
pub fn handle_extension_neon(client: &mut Client) -> Result<()> {
info!("handle extension neon");
let mut query = "CREATE SCHEMA IF NOT EXISTS neon";
client.simple_query(query)?;
query = "CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon";
info!("create neon extension with query: {}", query);
client.simple_query(query)?;
query = "UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'";
client.simple_query(query)?;
query = "ALTER EXTENSION neon SET SCHEMA neon";
info!("alter neon extension schema with query: {}", query);
client.simple_query(query)?;
// this will be a no-op if extension is already up to date,
// which may happen in two cases:
// - extension was just installed
// - extension was already installed and is up to date
let query = "ALTER EXTENSION neon UPDATE";
info!("update neon extension version with query: {}", query);
if let Err(e) = client.simple_query(query) {
error!(
"failed to upgrade neon extension during `handle_extension_neon`: {}",
e
);
}
Ok(())
}
#[instrument(skip_all)]
pub fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
info!("handle neon extension upgrade");

View File

@@ -0,0 +1,680 @@
use std::collections::{HashMap, HashSet};
use std::fmt::{Debug, Formatter};
use std::future::Future;
use std::iter::empty;
use std::iter::once;
use std::sync::Arc;
use crate::compute::construct_superuser_query;
use crate::pg_helpers::{escape_literal, DatabaseExt, Escaping, GenericOptionsSearch, RoleExt};
use anyhow::{bail, Result};
use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role};
use futures::future::join_all;
use tokio::sync::RwLock;
use tokio_postgres::Client;
use tracing::{debug, info_span, Instrument};
#[derive(Clone)]
pub enum DB {
SystemDB,
UserDB(Database),
}
impl DB {
pub fn new(db: Database) -> DB {
Self::UserDB(db)
}
pub fn is_owned_by(&self, role: &PgIdent) -> bool {
match self {
DB::SystemDB => false,
DB::UserDB(db) => &db.owner == role,
}
}
}
impl Debug for DB {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
DB::SystemDB => f.debug_tuple("SystemDB").finish(),
DB::UserDB(db) => f.debug_tuple("UserDB").field(&db.name).finish(),
}
}
}
#[derive(Copy, Clone, Debug)]
pub enum PerDatabasePhase {
DeleteDBRoleReferences,
ChangeSchemaPerms,
HandleAnonExtension,
}
#[derive(Clone, Debug)]
pub enum ApplySpecPhase {
CreateSuperUser,
DropInvalidDatabases,
RenameRoles,
CreateAndAlterRoles,
RenameAndDeleteDatabases,
CreateAndAlterDatabases,
RunInEachDatabase { db: DB, subphase: PerDatabasePhase },
HandleOtherExtensions,
HandleNeonExtension,
CreateAvailabilityCheck,
DropRoles,
}
pub struct Operation {
pub query: String,
pub comment: Option<String>,
}
pub struct MutableApplyContext {
pub roles: HashMap<String, Role>,
pub dbs: HashMap<String, Database>,
}
/// Appply the operations that belong to the given spec apply phase.
///
/// Commands within a single phase are executed in order of Iterator yield.
/// Commands of ApplySpecPhase::RunInEachDatabase will execute in the database
/// indicated by its `db` field, and can share a single client for all changes
/// to that database.
///
/// Notes:
/// - Commands are pipelined, and thus may cause incomplete apply if one
/// command of many fails.
/// - Failing commands will fail the phase's apply step once the return value
/// is processed.
/// - No timeouts have (yet) been implemented.
/// - The caller is responsible for limiting and/or applying concurrency.
pub async fn apply_operations<'a, Fut, F>(
spec: Arc<ComputeSpec>,
ctx: Arc<RwLock<MutableApplyContext>>,
jwks_roles: Arc<HashSet<String>>,
apply_spec_phase: ApplySpecPhase,
client: F,
) -> Result<()>
where
F: FnOnce() -> Fut,
Fut: Future<Output = Result<&'a Client>>,
{
debug!("Starting phase {:?}", &apply_spec_phase);
let span = info_span!("db_apply_changes", phase=?apply_spec_phase);
let span2 = span.clone();
async move {
debug!("Processing phase {:?}", &apply_spec_phase);
let ctx = ctx;
let mut ops = get_operations(&spec, &ctx, &jwks_roles, &apply_spec_phase)
.await?
.peekable();
// Return (and by doing so, skip requesting the PostgreSQL client) if
// we don't have any operations scheduled.
if ops.peek().is_none() {
return Ok(());
}
let client = client().await?;
debug!("Applying phase {:?}", &apply_spec_phase);
let active_queries = ops
.map(|op| {
let Operation { comment, query } = op;
let inspan = match comment {
None => span.clone(),
Some(comment) => info_span!("phase {}: {}", comment),
};
async {
let query = query;
let res = client.simple_query(&query).await;
debug!(
"{} {}",
if res.is_ok() {
"successfully executed"
} else {
"failed to execute"
},
query
);
res
}
.instrument(inspan)
})
.collect::<Vec<_>>();
drop(ctx);
for it in join_all(active_queries).await {
drop(it?);
}
debug!("Completed phase {:?}", &apply_spec_phase);
Ok(())
}
.instrument(span2)
.await
}
/// Create a stream of operations to be executed for that phase of applying
/// changes.
///
/// In the future we may generate a single stream of changes and then
/// sort/merge/batch execution, but for now this is a nice way to improve
/// batching behaviour of the commands.
async fn get_operations<'a>(
spec: &'a ComputeSpec,
ctx: &'a RwLock<MutableApplyContext>,
jwks_roles: &'a HashSet<String>,
apply_spec_phase: &'a ApplySpecPhase,
) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {
match apply_spec_phase {
ApplySpecPhase::CreateSuperUser => {
let query = construct_superuser_query(spec);
Ok(Box::new(once(Operation {
query,
comment: None,
})))
}
ApplySpecPhase::DropInvalidDatabases => {
let mut ctx = ctx.write().await;
let databases = &mut ctx.dbs;
let keys: Vec<_> = databases
.iter()
.filter(|(_, db)| db.invalid)
.map(|(dbname, _)| dbname.clone())
.collect();
// After recent commit in Postgres, interrupted DROP DATABASE
// leaves the database in the invalid state. According to the
// commit message, the only option for user is to drop it again.
// See:
// https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
//
// Postgres Neon extension is done the way, that db is de-registered
// in the control plane metadata only after it is dropped. So there is
// a chance that it still thinks that the db should exist. This means
// that it will be re-created by the `CreateDatabases` phase. This
// is fine, as user can just drop the table again (in vanilla
// Postgres they would need to do the same).
let operations = keys
.into_iter()
.filter_map(move |dbname| ctx.dbs.remove(&dbname))
.map(|db| Operation {
query: format!("DROP DATABASE IF EXISTS {}", db.name.pg_quote()),
comment: Some(format!("Dropping invalid database {}", db.name)),
});
Ok(Box::new(operations))
}
ApplySpecPhase::RenameRoles => {
let mut ctx = ctx.write().await;
let operations = spec
.delta_operations
.iter()
.flatten()
.filter(|op| op.action == "rename_role")
.filter_map(move |op| {
let roles = &mut ctx.roles;
if roles.contains_key(op.name.as_str()) {
None
} else {
let new_name = op.new_name.as_ref().unwrap();
let mut role = roles.remove(op.name.as_str()).unwrap();
role.name = new_name.clone();
role.encrypted_password = None;
roles.insert(role.name.clone(), role);
Some(Operation {
query: format!(
"ALTER ROLE {} RENAME TO {}",
op.name.pg_quote(),
new_name.pg_quote()
),
comment: Some(format!("renaming role '{}' to '{}'", op.name, new_name)),
})
}
});
Ok(Box::new(operations))
}
ApplySpecPhase::CreateAndAlterRoles => {
let mut ctx = ctx.write().await;
let operations = spec.cluster.roles
.iter()
.filter_map(move |role| {
let roles = &mut ctx.roles;
let db_role = roles.get(&role.name);
match db_role {
Some(db_role) => {
if db_role.encrypted_password != role.encrypted_password {
// This can be run on /every/ role! Not just ones created through the console.
// This means that if you add some funny ALTER here that adds a permission,
// this will get run even on user-created roles! This will result in different
// behavior before and after a spec gets reapplied. The below ALTER as it stands
// now only grants LOGIN and changes the password. Please do not allow this branch
// to do anything silly.
Some(Operation {
query: format!(
"ALTER ROLE {} {}",
role.name.pg_quote(),
role.to_pg_options(),
),
comment: None,
})
} else {
None
}
}
None => {
let query = if !jwks_roles.contains(role.name.as_str()) {
format!(
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser {}",
role.name.pg_quote(),
role.to_pg_options(),
)
} else {
format!(
"CREATE ROLE {} {}",
role.name.pg_quote(),
role.to_pg_options(),
)
};
Some(Operation {
query,
comment: Some(format!("creating role {}", role.name)),
})
}
}
});
Ok(Box::new(operations))
}
ApplySpecPhase::RenameAndDeleteDatabases => {
let mut ctx = ctx.write().await;
let operations = spec
.delta_operations
.iter()
.flatten()
.filter_map(move |op| {
let databases = &mut ctx.dbs;
match op.action.as_str() {
// We do not check whether the DB exists or not,
// Postgres will take care of it for us
"delete_db" => {
// In Postgres we can't drop a database if it is a template.
// So we need to unset the template flag first, but it could
// be a retry, so we could've already dropped the database.
// Check that database exists first to make it idempotent.
let unset_template_query: String = format!(
include_str!("sql/unset_template_for_drop_dbs.sql"),
datname_str = escape_literal(&op.name),
datname = &op.name.pg_quote()
);
// Use FORCE to drop database even if there are active connections.
// We run this from `cloud_admin`, so it should have enough privileges.
// NB: there could be other db states, which prevent us from dropping
// the database. For example, if db is used by any active subscription
// or replication slot.
// TODO: deal with it once we allow logical replication. Proper fix should
// involve returning an error code to the control plane, so it could
// figure out that this is a non-retryable error, return it to the user
// and fail operation permanently.
let drop_db_query: String = format!(
"DROP DATABASE IF EXISTS {} WITH (FORCE)",
&op.name.pg_quote()
);
databases.remove(&op.name);
Some(vec![
Operation {
query: unset_template_query,
comment: Some(format!(
"optionally clearing template flags for DB {}",
op.name,
)),
},
Operation {
query: drop_db_query,
comment: Some(format!("deleting database {}", op.name,)),
},
])
}
"rename_db" => {
if let Some(mut db) = databases.remove(&op.name) {
// update state of known databases
let new_name = op.new_name.as_ref().unwrap();
db.name = new_name.clone();
databases.insert(db.name.clone(), db);
Some(vec![Operation {
query: format!(
"ALTER DATABASE {} RENAME TO {}",
op.name.pg_quote(),
new_name.pg_quote(),
),
comment: Some(format!(
"renaming database '{}' to '{}'",
op.name, new_name
)),
}])
} else {
None
}
}
_ => None,
}
})
.flatten();
Ok(Box::new(operations))
}
ApplySpecPhase::CreateAndAlterDatabases => {
let mut ctx = ctx.write().await;
let operations = spec
.cluster
.databases
.iter()
.filter_map(move |db| {
let databases = &mut ctx.dbs;
if let Some(edb) = databases.get_mut(&db.name) {
let change_owner = if edb.owner.starts_with('"') {
db.owner.pg_quote() != edb.owner
} else {
db.owner != edb.owner
};
edb.owner = db.owner.clone();
if change_owner {
Some(vec![Operation {
query: format!(
"ALTER DATABASE {} OWNER TO {}",
db.name.pg_quote(),
db.owner.pg_quote()
),
comment: Some(format!(
"changing database owner of database {} to {}",
db.name, db.owner
)),
}])
} else {
None
}
} else {
databases.insert(db.name.clone(), db.clone());
Some(vec![
Operation {
query: format!(
"CREATE DATABASE {} {}",
db.name.pg_quote(),
db.to_pg_options(),
),
comment: None,
},
Operation {
query: format!(
"GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
db.name.pg_quote()
),
comment: None,
},
])
}
})
.flatten();
Ok(Box::new(operations))
}
ApplySpecPhase::RunInEachDatabase { db, subphase } => {
match subphase {
PerDatabasePhase::DeleteDBRoleReferences => {
let ctx = ctx.read().await;
let operations =
spec.delta_operations
.iter()
.flatten()
.filter(|op| op.action == "delete_role")
.filter_map(move |op| {
if db.is_owned_by(&op.name) {
return None;
}
if !ctx.roles.contains_key(&op.name) {
return None;
}
let quoted = op.name.pg_quote();
let new_owner = match &db {
DB::SystemDB => PgIdent::from("cloud_admin").pg_quote(),
DB::UserDB(db) => db.owner.pg_quote(),
};
Some(vec![
// This will reassign all dependent objects to the db owner
Operation {
query: format!(
"REASSIGN OWNED BY {} TO {}",
quoted, new_owner,
),
comment: None,
},
// This now will only drop privileges of the role
Operation {
query: format!("DROP OWNED BY {}", quoted),
comment: None,
},
])
})
.flatten();
Ok(Box::new(operations))
}
PerDatabasePhase::ChangeSchemaPerms => {
let ctx = ctx.read().await;
let databases = &ctx.dbs;
let db = match &db {
// ignore schema permissions on the system database
DB::SystemDB => return Ok(Box::new(empty())),
DB::UserDB(db) => db,
};
if databases.get(&db.name).is_none() {
bail!("database {} doesn't exist in PostgreSQL", db.name);
}
let edb = databases.get(&db.name).unwrap();
if edb.restrict_conn || edb.invalid {
return Ok(Box::new(empty()));
}
let operations = vec![
Operation {
query: format!(
include_str!("sql/set_public_schema_owner.sql"),
db_owner = db.owner.pg_quote()
),
comment: None,
},
Operation {
query: String::from(include_str!("sql/default_grants.sql")),
comment: None,
},
]
.into_iter();
Ok(Box::new(operations))
}
PerDatabasePhase::HandleAnonExtension => {
// Only install Anon into user databases
let db = match &db {
DB::SystemDB => return Ok(Box::new(empty())),
DB::UserDB(db) => db,
};
// Never install Anon when it's not enabled as feature
if !spec.features.contains(&ComputeFeature::AnonExtension) {
return Ok(Box::new(empty()));
}
// Only install Anon when it's added in preload libraries
let opt_libs = spec.cluster.settings.find("shared_preload_libraries");
let libs = match opt_libs {
Some(libs) => libs,
None => return Ok(Box::new(empty())),
};
if !libs.contains("anon") {
return Ok(Box::new(empty()));
}
let db_owner = db.owner.pg_quote();
let operations = vec![
// Create anon extension if this compute needs it
// Users cannot create it themselves, because superuser is required.
Operation {
query: String::from("CREATE EXTENSION IF NOT EXISTS anon CASCADE"),
comment: Some(String::from("creating anon extension")),
},
// Initialize anon extension
// This also requires superuser privileges, so users cannot do it themselves.
Operation {
query: String::from("SELECT anon.init()"),
comment: Some(String::from("initializing anon extension data")),
},
Operation {
query: format!("GRANT ALL ON SCHEMA anon TO {}", db_owner),
comment: Some(String::from(
"granting anon extension schema permissions",
)),
},
Operation {
query: format!(
"GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}",
db_owner
),
comment: Some(String::from(
"granting anon extension schema functions permissions",
)),
},
// We need this, because some functions are defined as SECURITY DEFINER.
// In Postgres SECURITY DEFINER functions are executed with the privileges
// of the owner.
// In anon extension this it is needed to access some GUCs, which are only accessible to
// superuser. But we've patched postgres to allow db_owner to access them as well.
// So we need to change owner of these functions to db_owner.
Operation {
query: format!(
include_str!("sql/anon_ext_fn_reassign.sql"),
db_owner = db_owner,
),
comment: Some(String::from(
"change anon extension functions owner to database_owner",
)),
},
Operation {
query: format!(
"GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}",
db_owner,
),
comment: Some(String::from(
"granting anon extension tables permissions",
)),
},
Operation {
query: format!(
"GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}",
db_owner,
),
comment: Some(String::from(
"granting anon extension sequences permissions",
)),
},
]
.into_iter();
Ok(Box::new(operations))
}
}
}
// Interestingly, we only install p_s_s in the main database, even when
// it's preloaded.
ApplySpecPhase::HandleOtherExtensions => {
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
if libs.contains("pg_stat_statements") {
return Ok(Box::new(once(Operation {
query: String::from("CREATE EXTENSION IF NOT EXISTS pg_stat_statements"),
comment: Some(String::from("create system extensions")),
})));
}
}
Ok(Box::new(empty()))
}
ApplySpecPhase::HandleNeonExtension => {
let operations = vec![
Operation {
query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
comment: Some(String::from("init: add schema for extension")),
},
Operation {
query: String::from("CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon"),
comment: Some(String::from(
"init: install the extension if not already installed",
)),
},
Operation {
query: String::from(
"UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'",
),
comment: Some(String::from("compat/fix: make neon relocatable")),
},
Operation {
query: String::from("ALTER EXTENSION neon SET SCHEMA neon"),
comment: Some(String::from("compat/fix: alter neon extension schema")),
},
Operation {
query: String::from("ALTER EXTENSION neon UPDATE"),
comment: Some(String::from("compat/update: update neon extension version")),
},
]
.into_iter();
Ok(Box::new(operations))
}
ApplySpecPhase::CreateAvailabilityCheck => Ok(Box::new(once(Operation {
query: String::from(include_str!("sql/add_availabilitycheck_tables.sql")),
comment: None,
}))),
ApplySpecPhase::DropRoles => {
let operations = spec
.delta_operations
.iter()
.flatten()
.filter(|op| op.action == "delete_role")
.map(|op| Operation {
query: format!("DROP ROLE IF EXISTS {}", op.name.pg_quote()),
comment: None,
});
Ok(Box::new(operations))
}
}
}

View File

@@ -0,0 +1,18 @@
DO $$
BEGIN
IF NOT EXISTS(
SELECT 1
FROM pg_catalog.pg_tables
WHERE tablename = 'health_check'
)
THEN
CREATE TABLE health_check (
id serial primary key,
updated_at timestamptz default now()
);
INSERT INTO health_check VALUES (1, now())
ON CONFLICT (id) DO UPDATE
SET updated_at = now();
END IF;
END
$$

View File

@@ -0,0 +1,12 @@
DO $$
DECLARE
query varchar;
BEGIN
FOR query IN SELECT 'ALTER FUNCTION '||nsp.nspname||'.'||p.proname||'('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {db_owner};'
FROM pg_proc p
JOIN pg_namespace nsp ON p.pronamespace = nsp.oid
WHERE nsp.nspname = 'anon' LOOP
EXECUTE query;
END LOOP;
END
$$;

View File

@@ -0,0 +1,30 @@
DO
$$
BEGIN
IF EXISTS(
SELECT nspname
FROM pg_catalog.pg_namespace
WHERE nspname = 'public'
) AND
current_setting('server_version_num')::int / 10000 >= 15
THEN
IF EXISTS(
SELECT rolname
FROM pg_catalog.pg_roles
WHERE rolname = 'web_access'
)
THEN
GRANT CREATE ON SCHEMA public TO web_access;
END IF;
END IF;
IF EXISTS(
SELECT nspname
FROM pg_catalog.pg_namespace
WHERE nspname = 'public'
)
THEN
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
END IF;
END
$$;

View File

@@ -0,0 +1,23 @@
DO
$$
DECLARE
schema_owner TEXT;
BEGIN
IF EXISTS(
SELECT nspname
FROM pg_catalog.pg_namespace
WHERE nspname = 'public'
)
THEN
SELECT nspowner::regrole::text
FROM pg_catalog.pg_namespace
WHERE nspname = 'public'
INTO schema_owner;
IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'
THEN
ALTER SCHEMA public OWNER TO {db_owner};
END IF;
END IF;
END
$$;

View File

@@ -0,0 +1,12 @@
DO $$
BEGIN
IF EXISTS(
SELECT 1
FROM pg_catalog.pg_database
WHERE datname = {datname_str}
)
THEN
ALTER DATABASE {datname} is_template false;
END IF;
END
$$;

View File

@@ -1153,6 +1153,7 @@ async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Re
timeline_info.timeline_id
);
}
// TODO: rename to import-basebackup-plus-wal
TimelineCmd::Import(args) => {
let tenant_id = get_tenant_id(args.tenant_id, env)?;
let timeline_id = args.timeline_id;

View File

@@ -415,6 +415,11 @@ impl PageServerNode {
.map(|x| x.parse::<bool>())
.transpose()
.context("Failed to parse 'timeline_offloading' as bool")?,
wal_receiver_protocol_override: settings
.remove("wal_receiver_protocol_override")
.map(serde_json::from_str)
.transpose()
.context("parse `wal_receiver_protocol_override` from json")?,
};
if !settings.is_empty() {
bail!("Unrecognized tenant settings: {settings:?}")

View File

@@ -33,10 +33,10 @@ reason = "the marvin attack only affects private key decryption, not public key
[licenses]
allow = [
"Apache-2.0",
"Artistic-2.0",
"BSD-2-Clause",
"BSD-3-Clause",
"CC0-1.0",
"CDDL-1.0",
"ISC",
"MIT",
"MPL-2.0",
@@ -66,7 +66,7 @@ registries = []
# More documentation about the 'bans' section can be found here:
# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html
[bans]
multiple-versions = "warn"
multiple-versions = "allow"
wildcards = "allow"
highlight = "all"
workspace-default-features = "allow"

View File

@@ -113,21 +113,21 @@ so manual installation of dependencies is not recommended.
A single virtual environment with all dependencies is described in the single `Pipfile`.
### Prerequisites
- Install Python 3.9 (the minimal supported version) or greater.
- Install Python 3.11 (the minimal supported version) or greater.
- Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesn't work as expected.
- If you have some trouble with other version you can resolve it by installing Python 3.9 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
- If you have some trouble with other version you can resolve it by installing Python 3.11 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
```bash
# In Ubuntu
sudo add-apt-repository ppa:deadsnakes/ppa
sudo apt update
sudo apt install python3.9
sudo apt install python3.11
```
- Install `poetry`
- Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation).
- Install dependencies via `./scripts/pysync`.
- Note that CI uses specific Python version (look for `PYTHON_VERSION` [here](https://github.com/neondatabase/docker-images/blob/main/rust/Dockerfile))
so if you have different version some linting tools can yield different result locally vs in the CI.
- You can explicitly specify which Python to use by running `poetry env use /path/to/python`, e.g. `poetry env use python3.9`.
- You can explicitly specify which Python to use by running `poetry env use /path/to/python`, e.g. `poetry env use python3.11`.
This may also disable the `The currently activated Python version X.Y.Z is not supported by the project` warning.
Run `poetry shell` to activate the virtual environment.

View File

@@ -2,14 +2,28 @@
// This module has heavy inspiration from the prometheus crate's `process_collector.rs`.
use once_cell::sync::Lazy;
use prometheus::Gauge;
use crate::UIntGauge;
pub struct Collector {
descs: Vec<prometheus::core::Desc>,
vmlck: crate::UIntGauge,
cpu_seconds_highres: Gauge,
}
const NMETRICS: usize = 1;
const NMETRICS: usize = 2;
static CLK_TCK_F64: Lazy<f64> = Lazy::new(|| {
let long = unsafe { libc::sysconf(libc::_SC_CLK_TCK) };
if long == -1 {
panic!("sysconf(_SC_CLK_TCK) failed");
}
let convertible_to_f64: i32 =
i32::try_from(long).expect("sysconf(_SC_CLK_TCK) is larger than i32");
convertible_to_f64 as f64
});
impl prometheus::core::Collector for Collector {
fn desc(&self) -> Vec<&prometheus::core::Desc> {
@@ -27,6 +41,12 @@ impl prometheus::core::Collector for Collector {
mfs.extend(self.vmlck.collect())
}
}
if let Ok(stat) = myself.stat() {
let cpu_seconds = stat.utime + stat.stime;
self.cpu_seconds_highres
.set(cpu_seconds as f64 / *CLK_TCK_F64);
mfs.extend(self.cpu_seconds_highres.collect());
}
mfs
}
}
@@ -43,7 +63,23 @@ impl Collector {
.cloned(),
);
Self { descs, vmlck }
let cpu_seconds_highres = Gauge::new(
"libmetrics_process_cpu_seconds_highres",
"Total user and system CPU time spent in seconds.\
Sub-second resolution, hence better than `process_cpu_seconds_total`.",
)
.unwrap();
descs.extend(
prometheus::core::Collector::desc(&cpu_seconds_highres)
.into_iter()
.cloned(),
);
Self {
descs,
vmlck,
cpu_seconds_highres,
}
}
}

View File

@@ -33,6 +33,7 @@ remote_storage.workspace = true
postgres_backend.workspace = true
nix = {workspace = true, optional = true}
reqwest.workspace = true
rand.workspace = true
[dev-dependencies]
bincode.workspace = true

View File

@@ -18,7 +18,7 @@ use std::{
str::FromStr,
time::Duration,
};
use utils::logging::LogFormat;
use utils::{logging::LogFormat, postgres_client::PostgresClientProtocol};
use crate::models::ImageCompressionAlgorithm;
use crate::models::LsnLease;
@@ -97,6 +97,15 @@ pub struct ConfigToml {
pub control_plane_api: Option<reqwest::Url>,
pub control_plane_api_token: Option<String>,
pub control_plane_emergency_mode: bool,
/// Unstable feature: subject to change or removal without notice.
/// See <https://github.com/neondatabase/neon/pull/9218>.
pub import_pgdata_upcall_api: Option<reqwest::Url>,
/// Unstable feature: subject to change or removal without notice.
/// See <https://github.com/neondatabase/neon/pull/9218>.
pub import_pgdata_upcall_api_token: Option<String>,
/// Unstable feature: subject to change or removal without notice.
/// See <https://github.com/neondatabase/neon/pull/9218>.
pub import_pgdata_aws_endpoint_url: Option<reqwest::Url>,
pub heatmap_upload_concurrency: usize,
pub secondary_download_concurrency: usize,
pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
@@ -109,6 +118,8 @@ pub struct ConfigToml {
pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
#[serde(skip_serializing_if = "Option::is_none")]
pub no_sync: Option<bool>,
pub wal_receiver_protocol: PostgresClientProtocol,
pub page_service_pipelining: PageServicePipeliningConfig,
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -125,6 +136,28 @@ pub struct DiskUsageEvictionTaskConfig {
pub eviction_order: EvictionOrder,
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(tag = "mode", rename_all = "kebab-case")]
#[serde(deny_unknown_fields)]
pub enum PageServicePipeliningConfig {
Serial,
Pipelined(PageServicePipeliningConfigPipelined),
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(deny_unknown_fields)]
pub struct PageServicePipeliningConfigPipelined {
/// Causes runtime errors if larger than max get_vectored batch size.
pub max_batch_size: NonZeroUsize,
pub execution: PageServiceProtocolPipelinedExecutionStrategy,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum PageServiceProtocolPipelinedExecutionStrategy {
ConcurrentFutures,
Tasks,
}
pub mod statvfs {
pub mod mock {
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -266,6 +299,8 @@ pub struct TenantConfigToml {
/// Enable auto-offloading of timelines.
/// (either this flag or the pageserver-global one need to be set)
pub timeline_offloading: bool,
pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
}
pub mod defaults {
@@ -317,6 +352,9 @@ pub mod defaults {
pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
pub const DEFAULT_WAL_RECEIVER_PROTOCOL: utils::postgres_client::PostgresClientProtocol =
utils::postgres_client::PostgresClientProtocol::Vanilla;
}
impl Default for ConfigToml {
@@ -382,6 +420,10 @@ impl Default for ConfigToml {
control_plane_api_token: (None),
control_plane_emergency_mode: (false),
import_pgdata_upcall_api: (None),
import_pgdata_upcall_api_token: (None),
import_pgdata_aws_endpoint_url: (None),
heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),
@@ -399,6 +441,8 @@ impl Default for ConfigToml {
virtual_file_io_mode: None,
tenant_config: TenantConfigToml::default(),
no_sync: None,
wal_receiver_protocol: DEFAULT_WAL_RECEIVER_PROTOCOL,
page_service_pipelining: PageServicePipeliningConfig::Serial,
}
}
}
@@ -486,6 +530,7 @@ impl Default for TenantConfigToml {
lsn_lease_length: LsnLease::DEFAULT_LENGTH,
lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
timeline_offloading: false,
wal_receiver_protocol_override: None,
}
}
}

View File

@@ -24,7 +24,7 @@ pub struct Key {
/// When working with large numbers of Keys in-memory, it is more efficient to handle them as i128 than as
/// a struct of fields.
#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd)]
#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
pub struct CompactKey(i128);
/// The storage key size.
@@ -229,6 +229,18 @@ impl Key {
}
}
impl CompactKey {
pub fn raw(&self) -> i128 {
self.0
}
}
impl From<i128> for CompactKey {
fn from(value: i128) -> Self {
Self(value)
}
}
impl fmt::Display for Key {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(

View File

@@ -48,7 +48,7 @@ pub struct ShardedRange<'a> {
// Calculate the size of a range within the blocks of the same relation, or spanning only the
// top page in the previous relation's space.
fn contiguous_range_len(range: &Range<Key>) -> u32 {
pub fn contiguous_range_len(range: &Range<Key>) -> u32 {
debug_assert!(is_contiguous_range(range));
if range.start.field6 == 0xffffffff {
range.end.field6 + 1
@@ -67,7 +67,7 @@ fn contiguous_range_len(range: &Range<Key>) -> u32 {
/// This matters, because:
/// - Within such ranges, keys are used contiguously. Outside such ranges it is sparse.
/// - Within such ranges, we may calculate distances using simple subtraction of field6.
fn is_contiguous_range(range: &Range<Key>) -> bool {
pub fn is_contiguous_range(range: &Range<Key>) -> bool {
range.start.field1 == range.end.field1
&& range.start.field2 == range.end.field2
&& range.start.field3 == range.end.field3

View File

@@ -2,6 +2,8 @@ pub mod detach_ancestor;
pub mod partitioning;
pub mod utilization;
#[cfg(feature = "testing")]
use camino::Utf8PathBuf;
pub use utilization::PageserverUtilization;
use std::{
@@ -21,6 +23,7 @@ use utils::{
completion,
id::{NodeId, TenantId, TimelineId},
lsn::Lsn,
postgres_client::PostgresClientProtocol,
serde_system_time,
};
@@ -227,6 +230,9 @@ pub enum TimelineCreateRequestMode {
// we continue to accept it by having it here.
pg_version: Option<u32>,
},
ImportPgdata {
import_pgdata: TimelineCreateRequestModeImportPgdata,
},
// NB: Bootstrap is all-optional, and thus the serde(untagged) will cause serde to stop at Bootstrap.
// (serde picks the first matching enum variant, in declaration order).
Bootstrap {
@@ -236,6 +242,42 @@ pub enum TimelineCreateRequestMode {
},
}
#[derive(Serialize, Deserialize, Clone)]
pub struct TimelineCreateRequestModeImportPgdata {
pub location: ImportPgdataLocation,
pub idempotency_key: ImportPgdataIdempotencyKey,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub enum ImportPgdataLocation {
#[cfg(feature = "testing")]
LocalFs { path: Utf8PathBuf },
AwsS3 {
region: String,
bucket: String,
/// A better name for this would be `prefix`; changing requires coordination with cplane.
/// See <https://github.com/neondatabase/cloud/issues/20646>.
key: String,
},
}
#[derive(Serialize, Deserialize, Clone)]
#[serde(transparent)]
pub struct ImportPgdataIdempotencyKey(pub String);
impl ImportPgdataIdempotencyKey {
pub fn random() -> Self {
use rand::{distributions::Alphanumeric, Rng};
Self(
rand::thread_rng()
.sample_iter(&Alphanumeric)
.take(20)
.map(char::from)
.collect(),
)
}
}
#[derive(Serialize, Deserialize, Clone)]
pub struct LsnLeaseRequest {
pub lsn: Lsn,
@@ -311,6 +353,7 @@ pub struct TenantConfig {
pub lsn_lease_length: Option<String>,
pub lsn_lease_length_for_ts: Option<String>,
pub timeline_offloading: Option<bool>,
pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
}
/// The policy for the aux file storage.

View File

@@ -41,6 +41,11 @@ pub enum NeonWalRecord {
file_path: String,
content: Option<Bytes>,
},
// Truncate visibility map page
TruncateVisibilityMap {
trunc_byte: usize,
trunc_offs: usize,
},
/// A testing record for unit testing purposes. It supports append data to an existing image, or clear it.
#[cfg(feature = "testing")]

View File

@@ -24,7 +24,7 @@ use postgres_ffi::Oid;
// FIXME: should move 'forknum' as last field to keep this consistent with Postgres.
// Then we could replace the custom Ord and PartialOrd implementations below with
// deriving them. This will require changes in walredoproc.c.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize)]
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
pub struct RelTag {
pub forknum: u8,
pub spcnode: Oid,

View File

@@ -716,6 +716,9 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
Ok(())
}
// Proto looks like this:
// FeMessage::Query("pagestream_v2{FeMessage::CopyData(PagesetreamFeMessage::GetPage(..))}")
async fn process_message(
&mut self,
handler: &mut impl Handler<IO>,
@@ -831,7 +834,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
use CopyStreamHandlerEnd::*;
let expected_end = match &end {
ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF => true,
ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF | Cancelled => true,
CopyStreamHandlerEnd::Disconnected(ConnectionError::Io(io_error))
if is_expected_io_error(io_error) =>
{
@@ -871,6 +874,9 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
// message from server' when it receives ErrorResponse (anything but
// CopyData/CopyDone) back.
CopyFail => Some((end.to_string(), SQLSTATE_SUCCESSFUL_COMPLETION)),
// When cancelled, send no response: we must not risk blocking on sending that response
Cancelled => None,
_ => None,
};
if let Some((err, errcode)) = err_to_send_and_errcode {
@@ -1048,6 +1054,8 @@ pub enum CopyStreamHandlerEnd {
/// The connection was lost
#[error("connection error: {0}")]
Disconnected(#[from] ConnectionError),
#[error("Shutdown")]
Cancelled,
/// Some other error
#[error(transparent)]
Other(#[from] anyhow::Error),

View File

@@ -243,8 +243,11 @@ const FSM_LEAF_NODES_PER_PAGE: usize = FSM_NODES_PER_PAGE - FSM_NON_LEAF_NODES_P
pub const SLOTS_PER_FSM_PAGE: u32 = FSM_LEAF_NODES_PER_PAGE as u32;
/* From visibilitymap.c */
pub const VM_HEAPBLOCKS_PER_PAGE: u32 =
(BLCKSZ as usize - SIZEOF_PAGE_HEADER_DATA) as u32 * (8 / 2); // MAPSIZE * (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
pub const VM_MAPSIZE: usize = BLCKSZ as usize - MAXALIGN_SIZE_OF_PAGE_HEADER_DATA;
pub const VM_BITS_PER_HEAPBLOCK: usize = 2;
pub const VM_HEAPBLOCKS_PER_BYTE: usize = 8 / VM_BITS_PER_HEAPBLOCK;
pub const VM_HEAPBLOCKS_PER_PAGE: usize = VM_MAPSIZE * VM_HEAPBLOCKS_PER_BYTE;
/* From origin.c */
pub const REPLICATION_STATE_MAGIC: u32 = 0x1257DADE;

View File

@@ -16,7 +16,7 @@ use utils::bin_ser::DeserializeError;
use utils::lsn::Lsn;
#[repr(C)]
#[derive(Debug)]
#[derive(Debug, Serialize, Deserialize)]
pub struct XlMultiXactCreate {
pub mid: MultiXactId,
/* new MultiXact's ID */
@@ -46,7 +46,7 @@ impl XlMultiXactCreate {
}
#[repr(C)]
#[derive(Debug)]
#[derive(Debug, Serialize, Deserialize)]
pub struct XlMultiXactTruncate {
pub oldest_multi_db: Oid,
/* to-be-truncated range of multixact offsets */
@@ -72,7 +72,7 @@ impl XlMultiXactTruncate {
}
#[repr(C)]
#[derive(Debug)]
#[derive(Debug, Serialize, Deserialize)]
pub struct XlRelmapUpdate {
pub dbid: Oid, /* database ID, or 0 for shared map */
pub tsid: Oid, /* database's tablespace, or pg_global */
@@ -90,7 +90,7 @@ impl XlRelmapUpdate {
}
#[repr(C)]
#[derive(Debug)]
#[derive(Debug, Serialize, Deserialize)]
pub struct XlReploriginDrop {
pub node_id: RepOriginId,
}
@@ -104,7 +104,7 @@ impl XlReploriginDrop {
}
#[repr(C)]
#[derive(Debug)]
#[derive(Debug, Serialize, Deserialize)]
pub struct XlReploriginSet {
pub remote_lsn: Lsn,
pub node_id: RepOriginId,
@@ -120,7 +120,7 @@ impl XlReploriginSet {
}
#[repr(C)]
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct RelFileNode {
pub spcnode: Oid, /* tablespace */
pub dbnode: Oid, /* database */
@@ -911,7 +911,7 @@ impl XlSmgrCreate {
}
#[repr(C)]
#[derive(Debug)]
#[derive(Debug, Serialize, Deserialize)]
pub struct XlSmgrTruncate {
pub blkno: BlockNumber,
pub rnode: RelFileNode,
@@ -984,7 +984,7 @@ impl XlDropDatabase {
/// xl_xact_parsed_abort structs in PostgreSQL, but we use the same
/// struct for commits and aborts.
///
#[derive(Debug)]
#[derive(Debug, Serialize, Deserialize)]
pub struct XlXactParsedRecord {
pub xid: TransactionId,
pub info: u8,

View File

@@ -0,0 +1,12 @@
[package]
name = "postgres_initdb"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
anyhow.workspace = true
tokio.workspace = true
camino.workspace = true
thiserror.workspace = true
workspace_hack = { version = "0.1", path = "../../workspace_hack" }

View File

@@ -0,0 +1,103 @@
//! The canonical way we run `initdb` in Neon.
//!
//! initdb has implicit defaults that are dependent on the environment, e.g., locales & collations.
//!
//! This module's job is to eliminate the environment-dependence as much as possible.
use std::fmt;
use camino::Utf8Path;
pub struct RunInitdbArgs<'a> {
pub superuser: &'a str,
pub locale: &'a str,
pub initdb_bin: &'a Utf8Path,
pub pg_version: u32,
pub library_search_path: &'a Utf8Path,
pub pgdata: &'a Utf8Path,
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
Spawn(std::io::Error),
Failed {
status: std::process::ExitStatus,
stderr: Vec<u8>,
},
WaitOutput(std::io::Error),
Other(anyhow::Error),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Error::Spawn(e) => write!(f, "Error spawning command: {:?}", e),
Error::Failed { status, stderr } => write!(
f,
"Command failed with status {:?}: {}",
status,
String::from_utf8_lossy(stderr)
),
Error::WaitOutput(e) => write!(f, "Error waiting for command output: {:?}", e),
Error::Other(e) => write!(f, "Error: {:?}", e),
}
}
}
pub async fn do_run_initdb(args: RunInitdbArgs<'_>) -> Result<(), Error> {
let RunInitdbArgs {
superuser,
locale,
initdb_bin: initdb_bin_path,
pg_version,
library_search_path,
pgdata,
} = args;
let mut initdb_command = tokio::process::Command::new(initdb_bin_path);
initdb_command
.args(["--pgdata", pgdata.as_ref()])
.args(["--username", superuser])
.args(["--encoding", "utf8"])
.args(["--locale", locale])
.arg("--no-instructions")
.arg("--no-sync")
.env_clear()
.env("LD_LIBRARY_PATH", library_search_path)
.env("DYLD_LIBRARY_PATH", library_search_path)
.stdin(std::process::Stdio::null())
// stdout invocation produces the same output every time, we don't need it
.stdout(std::process::Stdio::null())
// we would be interested in the stderr output, if there was any
.stderr(std::process::Stdio::piped());
// Before version 14, only the libc provide was available.
if pg_version > 14 {
// Version 17 brought with it a builtin locale provider which only provides
// C and C.UTF-8. While being safer for collation purposes since it is
// guaranteed to be consistent throughout a major release, it is also more
// performant.
let locale_provider = if pg_version >= 17 { "builtin" } else { "libc" };
initdb_command.args(["--locale-provider", locale_provider]);
}
let initdb_proc = initdb_command.spawn().map_err(Error::Spawn)?;
// Ideally we'd select here with the cancellation token, but the problem is that
// we can't safely terminate initdb: it launches processes of its own, and killing
// initdb doesn't kill them. After we return from this function, we want the target
// directory to be able to be cleaned up.
// See https://github.com/neondatabase/neon/issues/6385
let initdb_output = initdb_proc
.wait_with_output()
.await
.map_err(Error::WaitOutput)?;
if !initdb_output.status.success() {
return Err(Error::Failed {
status: initdb_output.status,
stderr: initdb_output.stderr,
});
}
Ok(())
}

View File

@@ -185,7 +185,7 @@ pub struct CancelKeyData {
impl fmt::Display for CancelKeyData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let hi = (self.backend_pid as u64) << 32;
let lo = self.cancel_key as u64;
let lo = (self.cancel_key as u64) & 0xffffffff;
let id = hi | lo;
// This format is more compact and might work better for logs.
@@ -562,6 +562,9 @@ pub enum BeMessage<'a> {
options: &'a [&'a str],
},
KeepAlive(WalSndKeepAlive),
/// Batch of interpreted, shard filtered WAL records,
/// ready for the pageserver to ingest
InterpretedWalRecords(InterpretedWalRecordsBody<'a>),
}
/// Common shorthands.
@@ -672,6 +675,22 @@ pub struct WalSndKeepAlive {
pub request_reply: bool,
}
/// Batch of interpreted WAL records used in the interpreted
/// safekeeper to pageserver protocol.
///
/// Note that the pageserver uses the RawInterpretedWalRecordsBody
/// counterpart of this from the neondatabase/rust-postgres repo.
/// If you're changing this struct, you likely need to change its
/// twin as well.
#[derive(Debug)]
pub struct InterpretedWalRecordsBody<'a> {
/// End of raw WAL in [`Self::data`]
pub streaming_lsn: u64,
/// Current end of WAL on the server
pub commit_lsn: u64,
pub data: &'a [u8],
}
pub static HELLO_WORLD_ROW: BeMessage = BeMessage::DataRow(&[Some(b"hello world")]);
// single text column
@@ -996,6 +1015,19 @@ impl BeMessage<'_> {
Ok(())
})?
}
BeMessage::InterpretedWalRecords(rec) => {
// We use the COPY_DATA_TAG for our custom message
// since this tag is interpreted as raw bytes.
buf.put_u8(b'd');
write_body(buf, |buf| {
buf.put_u8(b'0'); // matches INTERPRETED_WAL_RECORD_TAG in postgres-protocol
// dependency
buf.put_u64(rec.streaming_lsn);
buf.put_u64(rec.commit_lsn);
buf.put_slice(rec.data);
});
}
}
Ok(())
}
@@ -1046,4 +1078,13 @@ mod tests {
let data = [0, 0, 0, 7, 0, 0, 0, 0];
FeStartupPacket::parse(&mut BytesMut::from_iter(data)).unwrap_err();
}
#[test]
fn cancel_key_data() {
let key = CancelKeyData {
backend_pid: -1817212860,
cancel_key: -1183897012,
};
assert_eq!(format!("{key}"), "CancelKeyData(93af8844b96f2a4c)");
}
}

6
libs/proxy/README.md Normal file
View File

@@ -0,0 +1,6 @@
This directory contains libraries that are specific for proxy.
Currently, it contains a signficant fork/refactoring of rust-postgres that no longer reflects the API
of the original library. Since it was so significant, it made sense to upgrade it to it's own set of libraries.
Proxy needs unique access to the protocol, which explains why such heavy modifications were necessary.

View File

@@ -0,0 +1,21 @@
[package]
name = "postgres-protocol2"
version = "0.1.0"
edition = "2018"
license = "MIT/Apache-2.0"
[dependencies]
base64 = "0.20"
byteorder.workspace = true
bytes.workspace = true
fallible-iterator.workspace = true
hmac.workspace = true
md-5 = "0.10"
memchr = "2.0"
rand.workspace = true
sha2.workspace = true
stringprep = "0.1"
tokio = { workspace = true, features = ["rt"] }
[dev-dependencies]
tokio = { workspace = true, features = ["full"] }

View File

@@ -0,0 +1,37 @@
//! Authentication protocol support.
use md5::{Digest, Md5};
pub mod sasl;
/// Hashes authentication information in a way suitable for use in response
/// to an `AuthenticationMd5Password` message.
///
/// The resulting string should be sent back to the database in a
/// `PasswordMessage` message.
#[inline]
pub fn md5_hash(username: &[u8], password: &[u8], salt: [u8; 4]) -> String {
let mut md5 = Md5::new();
md5.update(password);
md5.update(username);
let output = md5.finalize_reset();
md5.update(format!("{:x}", output));
md5.update(salt);
format!("md5{:x}", md5.finalize())
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn md5() {
let username = b"md5_user";
let password = b"password";
let salt = [0x2a, 0x3d, 0x8f, 0xe0];
assert_eq!(
md5_hash(username, password, salt),
"md562af4dd09bbb41884907a838a3233294"
);
}
}

View File

@@ -0,0 +1,516 @@
//! SASL-based authentication support.
use hmac::{Hmac, Mac};
use rand::{self, Rng};
use sha2::digest::FixedOutput;
use sha2::{Digest, Sha256};
use std::fmt::Write;
use std::io;
use std::iter;
use std::mem;
use std::str;
use tokio::task::yield_now;
const NONCE_LENGTH: usize = 24;
/// The identifier of the SCRAM-SHA-256 SASL authentication mechanism.
pub const SCRAM_SHA_256: &str = "SCRAM-SHA-256";
/// The identifier of the SCRAM-SHA-256-PLUS SASL authentication mechanism.
pub const SCRAM_SHA_256_PLUS: &str = "SCRAM-SHA-256-PLUS";
// since postgres passwords are not required to exclude saslprep-prohibited
// characters or even be valid UTF8, we run saslprep if possible and otherwise
// return the raw password.
fn normalize(pass: &[u8]) -> Vec<u8> {
let pass = match str::from_utf8(pass) {
Ok(pass) => pass,
Err(_) => return pass.to_vec(),
};
match stringprep::saslprep(pass) {
Ok(pass) => pass.into_owned().into_bytes(),
Err(_) => pass.as_bytes().to_vec(),
}
}
pub(crate) async fn hi(str: &[u8], salt: &[u8], iterations: u32) -> [u8; 32] {
let mut hmac =
Hmac::<Sha256>::new_from_slice(str).expect("HMAC is able to accept all key sizes");
hmac.update(salt);
hmac.update(&[0, 0, 0, 1]);
let mut prev = hmac.finalize().into_bytes();
let mut hi = prev;
for i in 1..iterations {
let mut hmac = Hmac::<Sha256>::new_from_slice(str).expect("already checked above");
hmac.update(&prev);
prev = hmac.finalize().into_bytes();
for (hi, prev) in hi.iter_mut().zip(prev) {
*hi ^= prev;
}
// yield every ~250us
// hopefully reduces tail latencies
if i % 1024 == 0 {
yield_now().await
}
}
hi.into()
}
enum ChannelBindingInner {
Unrequested,
Unsupported,
TlsServerEndPoint(Vec<u8>),
}
/// The channel binding configuration for a SCRAM authentication exchange.
pub struct ChannelBinding(ChannelBindingInner);
impl ChannelBinding {
/// The server did not request channel binding.
pub fn unrequested() -> ChannelBinding {
ChannelBinding(ChannelBindingInner::Unrequested)
}
/// The server requested channel binding but the client is unable to provide it.
pub fn unsupported() -> ChannelBinding {
ChannelBinding(ChannelBindingInner::Unsupported)
}
/// The server requested channel binding and the client will use the `tls-server-end-point`
/// method.
pub fn tls_server_end_point(signature: Vec<u8>) -> ChannelBinding {
ChannelBinding(ChannelBindingInner::TlsServerEndPoint(signature))
}
fn gs2_header(&self) -> &'static str {
match self.0 {
ChannelBindingInner::Unrequested => "y,,",
ChannelBindingInner::Unsupported => "n,,",
ChannelBindingInner::TlsServerEndPoint(_) => "p=tls-server-end-point,,",
}
}
fn cbind_data(&self) -> &[u8] {
match self.0 {
ChannelBindingInner::Unrequested | ChannelBindingInner::Unsupported => &[],
ChannelBindingInner::TlsServerEndPoint(ref buf) => buf,
}
}
}
/// A pair of keys for the SCRAM-SHA-256 mechanism.
/// See <https://datatracker.ietf.org/doc/html/rfc5802#section-3> for details.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ScramKeys<const N: usize> {
/// Used by server to authenticate client.
pub client_key: [u8; N],
/// Used by client to verify server's signature.
pub server_key: [u8; N],
}
/// Password or keys which were derived from it.
enum Credentials<const N: usize> {
/// A regular password as a vector of bytes.
Password(Vec<u8>),
/// A precomputed pair of keys.
Keys(Box<ScramKeys<N>>),
}
enum State {
Update {
nonce: String,
password: Credentials<32>,
channel_binding: ChannelBinding,
},
Finish {
server_key: [u8; 32],
auth_message: String,
},
Done,
}
/// A type which handles the client side of the SCRAM-SHA-256/SCRAM-SHA-256-PLUS authentication
/// process.
///
/// During the authentication process, if the backend sends an `AuthenticationSASL` message which
/// includes `SCRAM-SHA-256` as an authentication mechanism, this type can be used.
///
/// After a `ScramSha256` is constructed, the buffer returned by the `message()` method should be
/// sent to the backend in a `SASLInitialResponse` message along with the mechanism name.
///
/// The server will reply with an `AuthenticationSASLContinue` message. Its contents should be
/// passed to the `update()` method, after which the buffer returned by the `message()` method
/// should be sent to the backend in a `SASLResponse` message.
///
/// The server will reply with an `AuthenticationSASLFinal` message. Its contents should be passed
/// to the `finish()` method, after which the authentication process is complete.
pub struct ScramSha256 {
message: String,
state: State,
}
fn nonce() -> String {
// rand 0.5's ThreadRng is cryptographically secure
let mut rng = rand::thread_rng();
(0..NONCE_LENGTH)
.map(|_| {
let mut v = rng.gen_range(0x21u8..0x7e);
if v == 0x2c {
v = 0x7e
}
v as char
})
.collect()
}
impl ScramSha256 {
/// Constructs a new instance which will use the provided password for authentication.
pub fn new(password: &[u8], channel_binding: ChannelBinding) -> ScramSha256 {
let password = Credentials::Password(normalize(password));
ScramSha256::new_inner(password, channel_binding, nonce())
}
/// Constructs a new instance which will use the provided key pair for authentication.
pub fn new_with_keys(keys: ScramKeys<32>, channel_binding: ChannelBinding) -> ScramSha256 {
let password = Credentials::Keys(keys.into());
ScramSha256::new_inner(password, channel_binding, nonce())
}
fn new_inner(
password: Credentials<32>,
channel_binding: ChannelBinding,
nonce: String,
) -> ScramSha256 {
ScramSha256 {
message: format!("{}n=,r={}", channel_binding.gs2_header(), nonce),
state: State::Update {
nonce,
password,
channel_binding,
},
}
}
/// Returns the message which should be sent to the backend in an `SASLResponse` message.
pub fn message(&self) -> &[u8] {
if let State::Done = self.state {
panic!("invalid SCRAM state");
}
self.message.as_bytes()
}
/// Updates the state machine with the response from the backend.
///
/// This should be called when an `AuthenticationSASLContinue` message is received.
pub async fn update(&mut self, message: &[u8]) -> io::Result<()> {
let (client_nonce, password, channel_binding) =
match mem::replace(&mut self.state, State::Done) {
State::Update {
nonce,
password,
channel_binding,
} => (nonce, password, channel_binding),
_ => return Err(io::Error::new(io::ErrorKind::Other, "invalid SCRAM state")),
};
let message =
str::from_utf8(message).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
let parsed = Parser::new(message).server_first_message()?;
if !parsed.nonce.starts_with(&client_nonce) {
return Err(io::Error::new(io::ErrorKind::InvalidInput, "invalid nonce"));
}
let (client_key, server_key) = match password {
Credentials::Password(password) => {
let salt = match base64::decode(parsed.salt) {
Ok(salt) => salt,
Err(e) => return Err(io::Error::new(io::ErrorKind::InvalidInput, e)),
};
let salted_password = hi(&password, &salt, parsed.iteration_count).await;
let make_key = |name| {
let mut hmac = Hmac::<Sha256>::new_from_slice(&salted_password)
.expect("HMAC is able to accept all key sizes");
hmac.update(name);
let mut key = [0u8; 32];
key.copy_from_slice(hmac.finalize().into_bytes().as_slice());
key
};
(make_key(b"Client Key"), make_key(b"Server Key"))
}
Credentials::Keys(keys) => (keys.client_key, keys.server_key),
};
let mut hash = Sha256::default();
hash.update(client_key);
let stored_key = hash.finalize_fixed();
let mut cbind_input = vec![];
cbind_input.extend(channel_binding.gs2_header().as_bytes());
cbind_input.extend(channel_binding.cbind_data());
let cbind_input = base64::encode(&cbind_input);
self.message.clear();
write!(&mut self.message, "c={},r={}", cbind_input, parsed.nonce).unwrap();
let auth_message = format!("n=,r={},{},{}", client_nonce, message, self.message);
let mut hmac = Hmac::<Sha256>::new_from_slice(&stored_key)
.expect("HMAC is able to accept all key sizes");
hmac.update(auth_message.as_bytes());
let client_signature = hmac.finalize().into_bytes();
let mut client_proof = client_key;
for (proof, signature) in client_proof.iter_mut().zip(client_signature) {
*proof ^= signature;
}
write!(&mut self.message, ",p={}", base64::encode(client_proof)).unwrap();
self.state = State::Finish {
server_key,
auth_message,
};
Ok(())
}
/// Finalizes the authentication process.
///
/// This should be called when the backend sends an `AuthenticationSASLFinal` message.
/// Authentication has only succeeded if this method returns `Ok(())`.
pub fn finish(&mut self, message: &[u8]) -> io::Result<()> {
let (server_key, auth_message) = match mem::replace(&mut self.state, State::Done) {
State::Finish {
server_key,
auth_message,
} => (server_key, auth_message),
_ => return Err(io::Error::new(io::ErrorKind::Other, "invalid SCRAM state")),
};
let message =
str::from_utf8(message).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
let parsed = Parser::new(message).server_final_message()?;
let verifier = match parsed {
ServerFinalMessage::Error(e) => {
return Err(io::Error::new(
io::ErrorKind::Other,
format!("SCRAM error: {}", e),
));
}
ServerFinalMessage::Verifier(verifier) => verifier,
};
let verifier = match base64::decode(verifier) {
Ok(verifier) => verifier,
Err(e) => return Err(io::Error::new(io::ErrorKind::InvalidInput, e)),
};
let mut hmac = Hmac::<Sha256>::new_from_slice(&server_key)
.expect("HMAC is able to accept all key sizes");
hmac.update(auth_message.as_bytes());
hmac.verify_slice(&verifier)
.map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "SCRAM verification error"))
}
}
struct Parser<'a> {
s: &'a str,
it: iter::Peekable<str::CharIndices<'a>>,
}
impl<'a> Parser<'a> {
fn new(s: &'a str) -> Parser<'a> {
Parser {
s,
it: s.char_indices().peekable(),
}
}
fn eat(&mut self, target: char) -> io::Result<()> {
match self.it.next() {
Some((_, c)) if c == target => Ok(()),
Some((i, c)) => {
let m = format!(
"unexpected character at byte {}: expected `{}` but got `{}",
i, target, c
);
Err(io::Error::new(io::ErrorKind::InvalidInput, m))
}
None => Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"unexpected EOF",
)),
}
}
fn take_while<F>(&mut self, f: F) -> io::Result<&'a str>
where
F: Fn(char) -> bool,
{
let start = match self.it.peek() {
Some(&(i, _)) => i,
None => return Ok(""),
};
loop {
match self.it.peek() {
Some(&(_, c)) if f(c) => {
self.it.next();
}
Some(&(i, _)) => return Ok(&self.s[start..i]),
None => return Ok(&self.s[start..]),
}
}
}
fn printable(&mut self) -> io::Result<&'a str> {
self.take_while(|c| matches!(c, '\x21'..='\x2b' | '\x2d'..='\x7e'))
}
fn nonce(&mut self) -> io::Result<&'a str> {
self.eat('r')?;
self.eat('=')?;
self.printable()
}
fn base64(&mut self) -> io::Result<&'a str> {
self.take_while(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '/' | '+' | '='))
}
fn salt(&mut self) -> io::Result<&'a str> {
self.eat('s')?;
self.eat('=')?;
self.base64()
}
fn posit_number(&mut self) -> io::Result<u32> {
let n = self.take_while(|c| c.is_ascii_digit())?;
n.parse()
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))
}
fn iteration_count(&mut self) -> io::Result<u32> {
self.eat('i')?;
self.eat('=')?;
self.posit_number()
}
fn eof(&mut self) -> io::Result<()> {
match self.it.peek() {
Some(&(i, _)) => Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!("unexpected trailing data at byte {}", i),
)),
None => Ok(()),
}
}
fn server_first_message(&mut self) -> io::Result<ServerFirstMessage<'a>> {
let nonce = self.nonce()?;
self.eat(',')?;
let salt = self.salt()?;
self.eat(',')?;
let iteration_count = self.iteration_count()?;
self.eof()?;
Ok(ServerFirstMessage {
nonce,
salt,
iteration_count,
})
}
fn value(&mut self) -> io::Result<&'a str> {
self.take_while(|c| matches!(c, '\0' | '=' | ','))
}
fn server_error(&mut self) -> io::Result<Option<&'a str>> {
match self.it.peek() {
Some(&(_, 'e')) => {}
_ => return Ok(None),
}
self.eat('e')?;
self.eat('=')?;
self.value().map(Some)
}
fn verifier(&mut self) -> io::Result<&'a str> {
self.eat('v')?;
self.eat('=')?;
self.base64()
}
fn server_final_message(&mut self) -> io::Result<ServerFinalMessage<'a>> {
let message = match self.server_error()? {
Some(error) => ServerFinalMessage::Error(error),
None => ServerFinalMessage::Verifier(self.verifier()?),
};
self.eof()?;
Ok(message)
}
}
struct ServerFirstMessage<'a> {
nonce: &'a str,
salt: &'a str,
iteration_count: u32,
}
enum ServerFinalMessage<'a> {
Error(&'a str),
Verifier(&'a str),
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn parse_server_first_message() {
let message = "r=fyko+d2lbbFgONRv9qkxdawL3rfcNHYJY1ZVvWVs7j,s=QSXCR+Q6sek8bf92,i=4096";
let message = Parser::new(message).server_first_message().unwrap();
assert_eq!(message.nonce, "fyko+d2lbbFgONRv9qkxdawL3rfcNHYJY1ZVvWVs7j");
assert_eq!(message.salt, "QSXCR+Q6sek8bf92");
assert_eq!(message.iteration_count, 4096);
}
// recorded auth exchange from psql
#[tokio::test]
async fn exchange() {
let password = "foobar";
let nonce = "9IZ2O01zb9IgiIZ1WJ/zgpJB";
let client_first = "n,,n=,r=9IZ2O01zb9IgiIZ1WJ/zgpJB";
let server_first =
"r=9IZ2O01zb9IgiIZ1WJ/zgpJBjx/oIRLs02gGSHcw1KEty3eY,s=fs3IXBy7U7+IvVjZ,i\
=4096";
let client_final =
"c=biws,r=9IZ2O01zb9IgiIZ1WJ/zgpJBjx/oIRLs02gGSHcw1KEty3eY,p=AmNKosjJzS3\
1NTlQYNs5BTeQjdHdk7lOflDo5re2an8=";
let server_final = "v=U+ppxD5XUKtradnv8e2MkeupiA8FU87Sg8CXzXHDAzw=";
let mut scram = ScramSha256::new_inner(
Credentials::Password(normalize(password.as_bytes())),
ChannelBinding::unsupported(),
nonce.to_string(),
);
assert_eq!(str::from_utf8(scram.message()).unwrap(), client_first);
scram.update(server_first.as_bytes()).await.unwrap();
assert_eq!(str::from_utf8(scram.message()).unwrap(), client_final);
scram.finish(server_final.as_bytes()).unwrap();
}
}

View File

@@ -0,0 +1,93 @@
//! Provides functions for escaping literals and identifiers for use
//! in SQL queries.
//!
//! Prefer parameterized queries where possible. Do not escape
//! parameters in a parameterized query.
#[cfg(test)]
mod test;
/// Escape a literal and surround result with single quotes. Not
/// recommended in most cases.
///
/// If input contains backslashes, result will be of the form `
/// E'...'` so it is safe to use regardless of the setting of
/// standard_conforming_strings.
pub fn escape_literal(input: &str) -> String {
escape_internal(input, false)
}
/// Escape an identifier and surround result with double quotes.
pub fn escape_identifier(input: &str) -> String {
escape_internal(input, true)
}
// Translation of PostgreSQL libpq's PQescapeInternal(). Does not
// require a connection because input string is known to be valid
// UTF-8.
//
// Escape arbitrary strings. If as_ident is true, we escape the
// result as an identifier; if false, as a literal. The result is
// returned in a newly allocated buffer. If we fail due to an
// encoding violation or out of memory condition, we return NULL,
// storing an error message into conn.
fn escape_internal(input: &str, as_ident: bool) -> String {
let mut num_backslashes = 0;
let mut num_quotes = 0;
let quote_char = if as_ident { '"' } else { '\'' };
// Scan the string for characters that must be escaped.
for ch in input.chars() {
if ch == quote_char {
num_quotes += 1;
} else if ch == '\\' {
num_backslashes += 1;
}
}
// Allocate output String.
let mut result_size = input.len() + num_quotes + 3; // two quotes, plus a NUL
if !as_ident && num_backslashes > 0 {
result_size += num_backslashes + 2;
}
let mut output = String::with_capacity(result_size);
// If we are escaping a literal that contains backslashes, we use
// the escape string syntax so that the result is correct under
// either value of standard_conforming_strings. We also emit a
// leading space in this case, to guard against the possibility
// that the result might be interpolated immediately following an
// identifier.
if !as_ident && num_backslashes > 0 {
output.push(' ');
output.push('E');
}
// Opening quote.
output.push(quote_char);
// Use fast path if possible.
//
// We've already verified that the input string is well-formed in
// the current encoding. If it contains no quotes and, in the
// case of literal-escaping, no backslashes, then we can just copy
// it directly to the output buffer, adding the necessary quotes.
//
// If not, we must rescan the input and process each character
// individually.
if num_quotes == 0 && (num_backslashes == 0 || as_ident) {
output.push_str(input);
} else {
for ch in input.chars() {
if ch == quote_char || (!as_ident && ch == '\\') {
output.push(ch);
}
output.push(ch);
}
}
output.push(quote_char);
output
}

View File

@@ -0,0 +1,17 @@
use crate::escape::{escape_identifier, escape_literal};
#[test]
fn test_escape_idenifier() {
assert_eq!(escape_identifier("foo"), String::from("\"foo\""));
assert_eq!(escape_identifier("f\\oo"), String::from("\"f\\oo\""));
assert_eq!(escape_identifier("f'oo"), String::from("\"f'oo\""));
assert_eq!(escape_identifier("f\"oo"), String::from("\"f\"\"oo\""));
}
#[test]
fn test_escape_literal() {
assert_eq!(escape_literal("foo"), String::from("'foo'"));
assert_eq!(escape_literal("f\\oo"), String::from(" E'f\\\\oo'"));
assert_eq!(escape_literal("f'oo"), String::from("'f''oo'"));
assert_eq!(escape_literal("f\"oo"), String::from("'f\"oo'"));
}

View File

@@ -0,0 +1,78 @@
//! Low level Postgres protocol APIs.
//!
//! This crate implements the low level components of Postgres's communication
//! protocol, including message and value serialization and deserialization.
//! It is designed to be used as a building block by higher level APIs such as
//! `rust-postgres`, and should not typically be used directly.
//!
//! # Note
//!
//! This library assumes that the `client_encoding` backend parameter has been
//! set to `UTF8`. It will most likely not behave properly if that is not the case.
#![doc(html_root_url = "https://docs.rs/postgres-protocol/0.6")]
#![warn(missing_docs, rust_2018_idioms, clippy::all)]
use byteorder::{BigEndian, ByteOrder};
use bytes::{BufMut, BytesMut};
use std::io;
pub mod authentication;
pub mod escape;
pub mod message;
pub mod password;
pub mod types;
/// A Postgres OID.
pub type Oid = u32;
/// A Postgres Log Sequence Number (LSN).
pub type Lsn = u64;
/// An enum indicating if a value is `NULL` or not.
pub enum IsNull {
/// The value is `NULL`.
Yes,
/// The value is not `NULL`.
No,
}
fn write_nullable<F, E>(serializer: F, buf: &mut BytesMut) -> Result<(), E>
where
F: FnOnce(&mut BytesMut) -> Result<IsNull, E>,
E: From<io::Error>,
{
let base = buf.len();
buf.put_i32(0);
let size = match serializer(buf)? {
IsNull::No => i32::from_usize(buf.len() - base - 4)?,
IsNull::Yes => -1,
};
BigEndian::write_i32(&mut buf[base..], size);
Ok(())
}
trait FromUsize: Sized {
fn from_usize(x: usize) -> Result<Self, io::Error>;
}
macro_rules! from_usize {
($t:ty) => {
impl FromUsize for $t {
#[inline]
fn from_usize(x: usize) -> io::Result<$t> {
if x > <$t>::MAX as usize {
Err(io::Error::new(
io::ErrorKind::InvalidInput,
"value too large to transmit",
))
} else {
Ok(x as $t)
}
}
}
};
}
from_usize!(i16);
from_usize!(i32);

View File

@@ -0,0 +1,766 @@
#![allow(missing_docs)]
use byteorder::{BigEndian, ByteOrder, ReadBytesExt};
use bytes::{Bytes, BytesMut};
use fallible_iterator::FallibleIterator;
use memchr::memchr;
use std::cmp;
use std::io::{self, Read};
use std::ops::Range;
use std::str;
use crate::Oid;
// top-level message tags
const PARSE_COMPLETE_TAG: u8 = b'1';
const BIND_COMPLETE_TAG: u8 = b'2';
const CLOSE_COMPLETE_TAG: u8 = b'3';
pub const NOTIFICATION_RESPONSE_TAG: u8 = b'A';
const COPY_DONE_TAG: u8 = b'c';
const COMMAND_COMPLETE_TAG: u8 = b'C';
const COPY_DATA_TAG: u8 = b'd';
const DATA_ROW_TAG: u8 = b'D';
const ERROR_RESPONSE_TAG: u8 = b'E';
const COPY_IN_RESPONSE_TAG: u8 = b'G';
const COPY_OUT_RESPONSE_TAG: u8 = b'H';
const COPY_BOTH_RESPONSE_TAG: u8 = b'W';
const EMPTY_QUERY_RESPONSE_TAG: u8 = b'I';
const BACKEND_KEY_DATA_TAG: u8 = b'K';
pub const NO_DATA_TAG: u8 = b'n';
pub const NOTICE_RESPONSE_TAG: u8 = b'N';
const AUTHENTICATION_TAG: u8 = b'R';
const PORTAL_SUSPENDED_TAG: u8 = b's';
pub const PARAMETER_STATUS_TAG: u8 = b'S';
const PARAMETER_DESCRIPTION_TAG: u8 = b't';
const ROW_DESCRIPTION_TAG: u8 = b'T';
pub const READY_FOR_QUERY_TAG: u8 = b'Z';
#[derive(Debug, Copy, Clone)]
pub struct Header {
tag: u8,
len: i32,
}
#[allow(clippy::len_without_is_empty)]
impl Header {
#[inline]
pub fn parse(buf: &[u8]) -> io::Result<Option<Header>> {
if buf.len() < 5 {
return Ok(None);
}
let tag = buf[0];
let len = BigEndian::read_i32(&buf[1..]);
if len < 4 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"invalid message length: header length < 4",
));
}
Ok(Some(Header { tag, len }))
}
#[inline]
pub fn tag(self) -> u8 {
self.tag
}
#[inline]
pub fn len(self) -> i32 {
self.len
}
}
/// An enum representing Postgres backend messages.
#[non_exhaustive]
pub enum Message {
AuthenticationCleartextPassword,
AuthenticationGss,
AuthenticationKerberosV5,
AuthenticationMd5Password(AuthenticationMd5PasswordBody),
AuthenticationOk,
AuthenticationScmCredential,
AuthenticationSspi,
AuthenticationGssContinue,
AuthenticationSasl(AuthenticationSaslBody),
AuthenticationSaslContinue(AuthenticationSaslContinueBody),
AuthenticationSaslFinal(AuthenticationSaslFinalBody),
BackendKeyData(BackendKeyDataBody),
BindComplete,
CloseComplete,
CommandComplete(CommandCompleteBody),
CopyData,
CopyDone,
CopyInResponse,
CopyOutResponse,
CopyBothResponse,
DataRow(DataRowBody),
EmptyQueryResponse,
ErrorResponse(ErrorResponseBody),
NoData,
NoticeResponse(NoticeResponseBody),
NotificationResponse(NotificationResponseBody),
ParameterDescription(ParameterDescriptionBody),
ParameterStatus(ParameterStatusBody),
ParseComplete,
PortalSuspended,
ReadyForQuery(ReadyForQueryBody),
RowDescription(RowDescriptionBody),
}
impl Message {
#[inline]
pub fn parse(buf: &mut BytesMut) -> io::Result<Option<Message>> {
if buf.len() < 5 {
let to_read = 5 - buf.len();
buf.reserve(to_read);
return Ok(None);
}
let tag = buf[0];
let len = (&buf[1..5]).read_u32::<BigEndian>().unwrap();
if len < 4 {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"invalid message length: parsing u32",
));
}
let total_len = len as usize + 1;
if buf.len() < total_len {
let to_read = total_len - buf.len();
buf.reserve(to_read);
return Ok(None);
}
let mut buf = Buffer {
bytes: buf.split_to(total_len).freeze(),
idx: 5,
};
let message = match tag {
PARSE_COMPLETE_TAG => Message::ParseComplete,
BIND_COMPLETE_TAG => Message::BindComplete,
CLOSE_COMPLETE_TAG => Message::CloseComplete,
NOTIFICATION_RESPONSE_TAG => {
let process_id = buf.read_i32::<BigEndian>()?;
let channel = buf.read_cstr()?;
let message = buf.read_cstr()?;
Message::NotificationResponse(NotificationResponseBody {
process_id,
channel,
message,
})
}
COPY_DONE_TAG => Message::CopyDone,
COMMAND_COMPLETE_TAG => {
let tag = buf.read_cstr()?;
Message::CommandComplete(CommandCompleteBody { tag })
}
COPY_DATA_TAG => Message::CopyData,
DATA_ROW_TAG => {
let len = buf.read_u16::<BigEndian>()?;
let storage = buf.read_all();
Message::DataRow(DataRowBody { storage, len })
}
ERROR_RESPONSE_TAG => {
let storage = buf.read_all();
Message::ErrorResponse(ErrorResponseBody { storage })
}
COPY_IN_RESPONSE_TAG => Message::CopyInResponse,
COPY_OUT_RESPONSE_TAG => Message::CopyOutResponse,
COPY_BOTH_RESPONSE_TAG => Message::CopyBothResponse,
EMPTY_QUERY_RESPONSE_TAG => Message::EmptyQueryResponse,
BACKEND_KEY_DATA_TAG => {
let process_id = buf.read_i32::<BigEndian>()?;
let secret_key = buf.read_i32::<BigEndian>()?;
Message::BackendKeyData(BackendKeyDataBody {
process_id,
secret_key,
})
}
NO_DATA_TAG => Message::NoData,
NOTICE_RESPONSE_TAG => {
let storage = buf.read_all();
Message::NoticeResponse(NoticeResponseBody { storage })
}
AUTHENTICATION_TAG => match buf.read_i32::<BigEndian>()? {
0 => Message::AuthenticationOk,
2 => Message::AuthenticationKerberosV5,
3 => Message::AuthenticationCleartextPassword,
5 => {
let mut salt = [0; 4];
buf.read_exact(&mut salt)?;
Message::AuthenticationMd5Password(AuthenticationMd5PasswordBody { salt })
}
6 => Message::AuthenticationScmCredential,
7 => Message::AuthenticationGss,
8 => Message::AuthenticationGssContinue,
9 => Message::AuthenticationSspi,
10 => {
let storage = buf.read_all();
Message::AuthenticationSasl(AuthenticationSaslBody(storage))
}
11 => {
let storage = buf.read_all();
Message::AuthenticationSaslContinue(AuthenticationSaslContinueBody(storage))
}
12 => {
let storage = buf.read_all();
Message::AuthenticationSaslFinal(AuthenticationSaslFinalBody(storage))
}
tag => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!("unknown authentication tag `{}`", tag),
));
}
},
PORTAL_SUSPENDED_TAG => Message::PortalSuspended,
PARAMETER_STATUS_TAG => {
let name = buf.read_cstr()?;
let value = buf.read_cstr()?;
Message::ParameterStatus(ParameterStatusBody { name, value })
}
PARAMETER_DESCRIPTION_TAG => {
let len = buf.read_u16::<BigEndian>()?;
let storage = buf.read_all();
Message::ParameterDescription(ParameterDescriptionBody { storage, len })
}
ROW_DESCRIPTION_TAG => {
let len = buf.read_u16::<BigEndian>()?;
let storage = buf.read_all();
Message::RowDescription(RowDescriptionBody { storage, len })
}
READY_FOR_QUERY_TAG => {
let status = buf.read_u8()?;
Message::ReadyForQuery(ReadyForQueryBody { status })
}
tag => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!("unknown message tag `{}`", tag),
));
}
};
if !buf.is_empty() {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"invalid message length: expected buffer to be empty",
));
}
Ok(Some(message))
}
}
struct Buffer {
bytes: Bytes,
idx: usize,
}
impl Buffer {
#[inline]
fn slice(&self) -> &[u8] {
&self.bytes[self.idx..]
}
#[inline]
fn is_empty(&self) -> bool {
self.slice().is_empty()
}
#[inline]
fn read_cstr(&mut self) -> io::Result<Bytes> {
match memchr(0, self.slice()) {
Some(pos) => {
let start = self.idx;
let end = start + pos;
let cstr = self.bytes.slice(start..end);
self.idx = end + 1;
Ok(cstr)
}
None => Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"unexpected EOF",
)),
}
}
#[inline]
fn read_all(&mut self) -> Bytes {
let buf = self.bytes.slice(self.idx..);
self.idx = self.bytes.len();
buf
}
}
impl Read for Buffer {
#[inline]
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let len = {
let slice = self.slice();
let len = cmp::min(slice.len(), buf.len());
buf[..len].copy_from_slice(&slice[..len]);
len
};
self.idx += len;
Ok(len)
}
}
pub struct AuthenticationMd5PasswordBody {
salt: [u8; 4],
}
impl AuthenticationMd5PasswordBody {
#[inline]
pub fn salt(&self) -> [u8; 4] {
self.salt
}
}
pub struct AuthenticationSaslBody(Bytes);
impl AuthenticationSaslBody {
#[inline]
pub fn mechanisms(&self) -> SaslMechanisms<'_> {
SaslMechanisms(&self.0)
}
}
pub struct SaslMechanisms<'a>(&'a [u8]);
impl<'a> FallibleIterator for SaslMechanisms<'a> {
type Item = &'a str;
type Error = io::Error;
#[inline]
fn next(&mut self) -> io::Result<Option<&'a str>> {
let value_end = find_null(self.0, 0)?;
if value_end == 0 {
if self.0.len() != 1 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"invalid message length: expected to be at end of iterator for sasl",
));
}
Ok(None)
} else {
let value = get_str(&self.0[..value_end])?;
self.0 = &self.0[value_end + 1..];
Ok(Some(value))
}
}
}
pub struct AuthenticationSaslContinueBody(Bytes);
impl AuthenticationSaslContinueBody {
#[inline]
pub fn data(&self) -> &[u8] {
&self.0
}
}
pub struct AuthenticationSaslFinalBody(Bytes);
impl AuthenticationSaslFinalBody {
#[inline]
pub fn data(&self) -> &[u8] {
&self.0
}
}
pub struct BackendKeyDataBody {
process_id: i32,
secret_key: i32,
}
impl BackendKeyDataBody {
#[inline]
pub fn process_id(&self) -> i32 {
self.process_id
}
#[inline]
pub fn secret_key(&self) -> i32 {
self.secret_key
}
}
pub struct CommandCompleteBody {
tag: Bytes,
}
impl CommandCompleteBody {
#[inline]
pub fn tag(&self) -> io::Result<&str> {
get_str(&self.tag)
}
}
#[derive(Debug)]
pub struct DataRowBody {
storage: Bytes,
len: u16,
}
impl DataRowBody {
#[inline]
pub fn ranges(&self) -> DataRowRanges<'_> {
DataRowRanges {
buf: &self.storage,
len: self.storage.len(),
remaining: self.len,
}
}
#[inline]
pub fn buffer(&self) -> &[u8] {
&self.storage
}
}
pub struct DataRowRanges<'a> {
buf: &'a [u8],
len: usize,
remaining: u16,
}
impl FallibleIterator for DataRowRanges<'_> {
type Item = Option<Range<usize>>;
type Error = io::Error;
#[inline]
fn next(&mut self) -> io::Result<Option<Option<Range<usize>>>> {
if self.remaining == 0 {
if self.buf.is_empty() {
return Ok(None);
} else {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"invalid message length: datarowrange is not empty",
));
}
}
self.remaining -= 1;
let len = self.buf.read_i32::<BigEndian>()?;
if len < 0 {
Ok(Some(None))
} else {
let len = len as usize;
if self.buf.len() < len {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"unexpected EOF",
));
}
let base = self.len - self.buf.len();
self.buf = &self.buf[len..];
Ok(Some(Some(base..base + len)))
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.remaining as usize;
(len, Some(len))
}
}
pub struct ErrorResponseBody {
storage: Bytes,
}
impl ErrorResponseBody {
#[inline]
pub fn fields(&self) -> ErrorFields<'_> {
ErrorFields { buf: &self.storage }
}
}
pub struct ErrorFields<'a> {
buf: &'a [u8],
}
impl<'a> FallibleIterator for ErrorFields<'a> {
type Item = ErrorField<'a>;
type Error = io::Error;
#[inline]
fn next(&mut self) -> io::Result<Option<ErrorField<'a>>> {
let type_ = self.buf.read_u8()?;
if type_ == 0 {
if self.buf.is_empty() {
return Ok(None);
} else {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"invalid message length: error fields is not drained",
));
}
}
let value_end = find_null(self.buf, 0)?;
let value = get_str(&self.buf[..value_end])?;
self.buf = &self.buf[value_end + 1..];
Ok(Some(ErrorField { type_, value }))
}
}
pub struct ErrorField<'a> {
type_: u8,
value: &'a str,
}
impl ErrorField<'_> {
#[inline]
pub fn type_(&self) -> u8 {
self.type_
}
#[inline]
pub fn value(&self) -> &str {
self.value
}
}
pub struct NoticeResponseBody {
storage: Bytes,
}
impl NoticeResponseBody {
#[inline]
pub fn fields(&self) -> ErrorFields<'_> {
ErrorFields { buf: &self.storage }
}
}
pub struct NotificationResponseBody {
process_id: i32,
channel: Bytes,
message: Bytes,
}
impl NotificationResponseBody {
#[inline]
pub fn process_id(&self) -> i32 {
self.process_id
}
#[inline]
pub fn channel(&self) -> io::Result<&str> {
get_str(&self.channel)
}
#[inline]
pub fn message(&self) -> io::Result<&str> {
get_str(&self.message)
}
}
pub struct ParameterDescriptionBody {
storage: Bytes,
len: u16,
}
impl ParameterDescriptionBody {
#[inline]
pub fn parameters(&self) -> Parameters<'_> {
Parameters {
buf: &self.storage,
remaining: self.len,
}
}
}
pub struct Parameters<'a> {
buf: &'a [u8],
remaining: u16,
}
impl FallibleIterator for Parameters<'_> {
type Item = Oid;
type Error = io::Error;
#[inline]
fn next(&mut self) -> io::Result<Option<Oid>> {
if self.remaining == 0 {
if self.buf.is_empty() {
return Ok(None);
} else {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"invalid message length: parameters is not drained",
));
}
}
self.remaining -= 1;
self.buf.read_u32::<BigEndian>().map(Some)
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.remaining as usize;
(len, Some(len))
}
}
pub struct ParameterStatusBody {
name: Bytes,
value: Bytes,
}
impl ParameterStatusBody {
#[inline]
pub fn name(&self) -> io::Result<&str> {
get_str(&self.name)
}
#[inline]
pub fn value(&self) -> io::Result<&str> {
get_str(&self.value)
}
}
pub struct ReadyForQueryBody {
status: u8,
}
impl ReadyForQueryBody {
#[inline]
pub fn status(&self) -> u8 {
self.status
}
}
pub struct RowDescriptionBody {
storage: Bytes,
len: u16,
}
impl RowDescriptionBody {
#[inline]
pub fn fields(&self) -> Fields<'_> {
Fields {
buf: &self.storage,
remaining: self.len,
}
}
}
pub struct Fields<'a> {
buf: &'a [u8],
remaining: u16,
}
impl<'a> FallibleIterator for Fields<'a> {
type Item = Field<'a>;
type Error = io::Error;
#[inline]
fn next(&mut self) -> io::Result<Option<Field<'a>>> {
if self.remaining == 0 {
if self.buf.is_empty() {
return Ok(None);
} else {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"invalid message length: field is not drained",
));
}
}
self.remaining -= 1;
let name_end = find_null(self.buf, 0)?;
let name = get_str(&self.buf[..name_end])?;
self.buf = &self.buf[name_end + 1..];
let table_oid = self.buf.read_u32::<BigEndian>()?;
let column_id = self.buf.read_i16::<BigEndian>()?;
let type_oid = self.buf.read_u32::<BigEndian>()?;
let type_size = self.buf.read_i16::<BigEndian>()?;
let type_modifier = self.buf.read_i32::<BigEndian>()?;
let format = self.buf.read_i16::<BigEndian>()?;
Ok(Some(Field {
name,
table_oid,
column_id,
type_oid,
type_size,
type_modifier,
format,
}))
}
}
pub struct Field<'a> {
name: &'a str,
table_oid: Oid,
column_id: i16,
type_oid: Oid,
type_size: i16,
type_modifier: i32,
format: i16,
}
impl<'a> Field<'a> {
#[inline]
pub fn name(&self) -> &'a str {
self.name
}
#[inline]
pub fn table_oid(&self) -> Oid {
self.table_oid
}
#[inline]
pub fn column_id(&self) -> i16 {
self.column_id
}
#[inline]
pub fn type_oid(&self) -> Oid {
self.type_oid
}
#[inline]
pub fn type_size(&self) -> i16 {
self.type_size
}
#[inline]
pub fn type_modifier(&self) -> i32 {
self.type_modifier
}
#[inline]
pub fn format(&self) -> i16 {
self.format
}
}
#[inline]
fn find_null(buf: &[u8], start: usize) -> io::Result<usize> {
match memchr(0, &buf[start..]) {
Some(pos) => Ok(pos + start),
None => Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"unexpected EOF",
)),
}
}
#[inline]
fn get_str(buf: &[u8]) -> io::Result<&str> {
str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))
}

View File

@@ -0,0 +1,297 @@
//! Frontend message serialization.
#![allow(missing_docs)]
use byteorder::{BigEndian, ByteOrder};
use bytes::{Buf, BufMut, BytesMut};
use std::convert::TryFrom;
use std::error::Error;
use std::io;
use std::marker;
use crate::{write_nullable, FromUsize, IsNull, Oid};
#[inline]
fn write_body<F, E>(buf: &mut BytesMut, f: F) -> Result<(), E>
where
F: FnOnce(&mut BytesMut) -> Result<(), E>,
E: From<io::Error>,
{
let base = buf.len();
buf.extend_from_slice(&[0; 4]);
f(buf)?;
let size = i32::from_usize(buf.len() - base)?;
BigEndian::write_i32(&mut buf[base..], size);
Ok(())
}
pub enum BindError {
Conversion(Box<dyn Error + marker::Sync + Send>),
Serialization(io::Error),
}
impl From<Box<dyn Error + marker::Sync + Send>> for BindError {
#[inline]
fn from(e: Box<dyn Error + marker::Sync + Send>) -> BindError {
BindError::Conversion(e)
}
}
impl From<io::Error> for BindError {
#[inline]
fn from(e: io::Error) -> BindError {
BindError::Serialization(e)
}
}
#[inline]
pub fn bind<I, J, F, T, K>(
portal: &str,
statement: &str,
formats: I,
values: J,
mut serializer: F,
result_formats: K,
buf: &mut BytesMut,
) -> Result<(), BindError>
where
I: IntoIterator<Item = i16>,
J: IntoIterator<Item = T>,
F: FnMut(T, &mut BytesMut) -> Result<IsNull, Box<dyn Error + marker::Sync + Send>>,
K: IntoIterator<Item = i16>,
{
buf.put_u8(b'B');
write_body(buf, |buf| {
write_cstr(portal.as_bytes(), buf)?;
write_cstr(statement.as_bytes(), buf)?;
write_counted(
formats,
|f, buf| {
buf.put_i16(f);
Ok::<_, io::Error>(())
},
buf,
)?;
write_counted(
values,
|v, buf| write_nullable(|buf| serializer(v, buf), buf),
buf,
)?;
write_counted(
result_formats,
|f, buf| {
buf.put_i16(f);
Ok::<_, io::Error>(())
},
buf,
)?;
Ok(())
})
}
#[inline]
fn write_counted<I, T, F, E>(items: I, mut serializer: F, buf: &mut BytesMut) -> Result<(), E>
where
I: IntoIterator<Item = T>,
F: FnMut(T, &mut BytesMut) -> Result<(), E>,
E: From<io::Error>,
{
let base = buf.len();
buf.extend_from_slice(&[0; 2]);
let mut count = 0;
for item in items {
serializer(item, buf)?;
count += 1;
}
let count = i16::from_usize(count)?;
BigEndian::write_i16(&mut buf[base..], count);
Ok(())
}
#[inline]
pub fn cancel_request(process_id: i32, secret_key: i32, buf: &mut BytesMut) {
write_body(buf, |buf| {
buf.put_i32(80_877_102);
buf.put_i32(process_id);
buf.put_i32(secret_key);
Ok::<_, io::Error>(())
})
.unwrap();
}
#[inline]
pub fn close(variant: u8, name: &str, buf: &mut BytesMut) -> io::Result<()> {
buf.put_u8(b'C');
write_body(buf, |buf| {
buf.put_u8(variant);
write_cstr(name.as_bytes(), buf)
})
}
pub struct CopyData<T> {
buf: T,
len: i32,
}
impl<T> CopyData<T>
where
T: Buf,
{
pub fn new(buf: T) -> io::Result<CopyData<T>> {
let len = buf
.remaining()
.checked_add(4)
.and_then(|l| i32::try_from(l).ok())
.ok_or_else(|| {
io::Error::new(io::ErrorKind::InvalidInput, "message length overflow")
})?;
Ok(CopyData { buf, len })
}
pub fn write(self, out: &mut BytesMut) {
out.put_u8(b'd');
out.put_i32(self.len);
out.put(self.buf);
}
}
#[inline]
pub fn copy_done(buf: &mut BytesMut) {
buf.put_u8(b'c');
write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
}
#[inline]
pub fn copy_fail(message: &str, buf: &mut BytesMut) -> io::Result<()> {
buf.put_u8(b'f');
write_body(buf, |buf| write_cstr(message.as_bytes(), buf))
}
#[inline]
pub fn describe(variant: u8, name: &str, buf: &mut BytesMut) -> io::Result<()> {
buf.put_u8(b'D');
write_body(buf, |buf| {
buf.put_u8(variant);
write_cstr(name.as_bytes(), buf)
})
}
#[inline]
pub fn execute(portal: &str, max_rows: i32, buf: &mut BytesMut) -> io::Result<()> {
buf.put_u8(b'E');
write_body(buf, |buf| {
write_cstr(portal.as_bytes(), buf)?;
buf.put_i32(max_rows);
Ok(())
})
}
#[inline]
pub fn parse<I>(name: &str, query: &str, param_types: I, buf: &mut BytesMut) -> io::Result<()>
where
I: IntoIterator<Item = Oid>,
{
buf.put_u8(b'P');
write_body(buf, |buf| {
write_cstr(name.as_bytes(), buf)?;
write_cstr(query.as_bytes(), buf)?;
write_counted(
param_types,
|t, buf| {
buf.put_u32(t);
Ok::<_, io::Error>(())
},
buf,
)?;
Ok(())
})
}
#[inline]
pub fn password_message(password: &[u8], buf: &mut BytesMut) -> io::Result<()> {
buf.put_u8(b'p');
write_body(buf, |buf| write_cstr(password, buf))
}
#[inline]
pub fn query(query: &str, buf: &mut BytesMut) -> io::Result<()> {
buf.put_u8(b'Q');
write_body(buf, |buf| write_cstr(query.as_bytes(), buf))
}
#[inline]
pub fn sasl_initial_response(mechanism: &str, data: &[u8], buf: &mut BytesMut) -> io::Result<()> {
buf.put_u8(b'p');
write_body(buf, |buf| {
write_cstr(mechanism.as_bytes(), buf)?;
let len = i32::from_usize(data.len())?;
buf.put_i32(len);
buf.put_slice(data);
Ok(())
})
}
#[inline]
pub fn sasl_response(data: &[u8], buf: &mut BytesMut) -> io::Result<()> {
buf.put_u8(b'p');
write_body(buf, |buf| {
buf.put_slice(data);
Ok(())
})
}
#[inline]
pub fn ssl_request(buf: &mut BytesMut) {
write_body(buf, |buf| {
buf.put_i32(80_877_103);
Ok::<_, io::Error>(())
})
.unwrap();
}
#[inline]
pub fn startup_message<'a, I>(parameters: I, buf: &mut BytesMut) -> io::Result<()>
where
I: IntoIterator<Item = (&'a str, &'a str)>,
{
write_body(buf, |buf| {
// postgres protocol version 3.0(196608) in bigger-endian
buf.put_i32(0x00_03_00_00);
for (key, value) in parameters {
write_cstr(key.as_bytes(), buf)?;
write_cstr(value.as_bytes(), buf)?;
}
buf.put_u8(0);
Ok(())
})
}
#[inline]
pub fn sync(buf: &mut BytesMut) {
buf.put_u8(b'S');
write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
}
#[inline]
pub fn terminate(buf: &mut BytesMut) {
buf.put_u8(b'X');
write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
}
#[inline]
fn write_cstr(s: &[u8], buf: &mut BytesMut) -> Result<(), io::Error> {
if s.contains(&0) {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"string contains embedded null",
));
}
buf.put_slice(s);
buf.put_u8(0);
Ok(())
}

View File

@@ -0,0 +1,8 @@
//! Postgres message protocol support.
//!
//! See [Postgres's documentation][docs] for more information on message flow.
//!
//! [docs]: https://www.postgresql.org/docs/9.5/static/protocol-flow.html
pub mod backend;
pub mod frontend;

View File

@@ -0,0 +1,107 @@
//! Functions to encrypt a password in the client.
//!
//! This is intended to be used by client applications that wish to
//! send commands like `ALTER USER joe PASSWORD 'pwd'`. The password
//! need not be sent in cleartext if it is encrypted on the client
//! side. This is good because it ensures the cleartext password won't
//! end up in logs pg_stat displays, etc.
use crate::authentication::sasl;
use hmac::{Hmac, Mac};
use md5::Md5;
use rand::RngCore;
use sha2::digest::FixedOutput;
use sha2::{Digest, Sha256};
#[cfg(test)]
mod test;
const SCRAM_DEFAULT_ITERATIONS: u32 = 4096;
const SCRAM_DEFAULT_SALT_LEN: usize = 16;
/// Hash password using SCRAM-SHA-256 with a randomly-generated
/// salt.
///
/// The client may assume the returned string doesn't contain any
/// special characters that would require escaping in an SQL command.
pub async fn scram_sha_256(password: &[u8]) -> String {
let mut salt: [u8; SCRAM_DEFAULT_SALT_LEN] = [0; SCRAM_DEFAULT_SALT_LEN];
let mut rng = rand::thread_rng();
rng.fill_bytes(&mut salt);
scram_sha_256_salt(password, salt).await
}
// Internal implementation of scram_sha_256 with a caller-provided
// salt. This is useful for testing.
pub(crate) async fn scram_sha_256_salt(
password: &[u8],
salt: [u8; SCRAM_DEFAULT_SALT_LEN],
) -> String {
// Prepare the password, per [RFC
// 4013](https://tools.ietf.org/html/rfc4013), if possible.
//
// Postgres treats passwords as byte strings (without embedded NUL
// bytes), but SASL expects passwords to be valid UTF-8.
//
// Follow the behavior of libpq's PQencryptPasswordConn(), and
// also the backend. If the password is not valid UTF-8, or if it
// contains prohibited characters (such as non-ASCII whitespace),
// just skip the SASLprep step and use the original byte
// sequence.
let prepared: Vec<u8> = match std::str::from_utf8(password) {
Ok(password_str) => {
match stringprep::saslprep(password_str) {
Ok(p) => p.into_owned().into_bytes(),
// contains invalid characters; skip saslprep
Err(_) => Vec::from(password),
}
}
// not valid UTF-8; skip saslprep
Err(_) => Vec::from(password),
};
// salt password
let salted_password = sasl::hi(&prepared, &salt, SCRAM_DEFAULT_ITERATIONS).await;
// client key
let mut hmac = Hmac::<Sha256>::new_from_slice(&salted_password)
.expect("HMAC is able to accept all key sizes");
hmac.update(b"Client Key");
let client_key = hmac.finalize().into_bytes();
// stored key
let mut hash = Sha256::default();
hash.update(client_key.as_slice());
let stored_key = hash.finalize_fixed();
// server key
let mut hmac = Hmac::<Sha256>::new_from_slice(&salted_password)
.expect("HMAC is able to accept all key sizes");
hmac.update(b"Server Key");
let server_key = hmac.finalize().into_bytes();
format!(
"SCRAM-SHA-256${}:{}${}:{}",
SCRAM_DEFAULT_ITERATIONS,
base64::encode(salt),
base64::encode(stored_key),
base64::encode(server_key)
)
}
/// **Not recommended, as MD5 is not considered to be secure.**
///
/// Hash password using MD5 with the username as the salt.
///
/// The client may assume the returned string doesn't contain any
/// special characters that would require escaping.
pub fn md5(password: &[u8], username: &str) -> String {
// salt password with username
let mut salted_password = Vec::from(password);
salted_password.extend_from_slice(username.as_bytes());
let mut hash = Md5::new();
hash.update(&salted_password);
let digest = hash.finalize();
format!("md5{:x}", digest)
}

View File

@@ -0,0 +1,19 @@
use crate::password;
#[tokio::test]
async fn test_encrypt_scram_sha_256() {
// Specify the salt to make the test deterministic. Any bytes will do.
let salt: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
assert_eq!(
password::scram_sha_256_salt(b"secret", salt).await,
"SCRAM-SHA-256$4096:AQIDBAUGBwgJCgsMDQ4PEA==$8rrDg00OqaiWXJ7p+sCgHEIaBSHY89ZJl3mfIsf32oY=:05L1f+yZbiN8O0AnO40Og85NNRhvzTS57naKRWCcsIA="
);
}
#[test]
fn test_encrypt_md5() {
assert_eq!(
password::md5(b"secret", "foo"),
"md54ab2c5d00339c4b2a4e921d2dc4edec7"
);
}

View File

@@ -0,0 +1,294 @@
//! Conversions to and from Postgres's binary format for various types.
use byteorder::{BigEndian, ReadBytesExt};
use bytes::{BufMut, BytesMut};
use fallible_iterator::FallibleIterator;
use std::boxed::Box as StdBox;
use std::error::Error;
use std::str;
use crate::Oid;
#[cfg(test)]
mod test;
/// Serializes a `TEXT`, `VARCHAR`, `CHAR(n)`, `NAME`, or `CITEXT` value.
#[inline]
pub fn text_to_sql(v: &str, buf: &mut BytesMut) {
buf.put_slice(v.as_bytes());
}
/// Deserializes a `TEXT`, `VARCHAR`, `CHAR(n)`, `NAME`, or `CITEXT` value.
#[inline]
pub fn text_from_sql(buf: &[u8]) -> Result<&str, StdBox<dyn Error + Sync + Send>> {
Ok(str::from_utf8(buf)?)
}
/// Deserializes a `"char"` value.
#[inline]
pub fn char_from_sql(mut buf: &[u8]) -> Result<i8, StdBox<dyn Error + Sync + Send>> {
let v = buf.read_i8()?;
if !buf.is_empty() {
return Err("invalid buffer size".into());
}
Ok(v)
}
/// Serializes an `OID` value.
#[inline]
pub fn oid_to_sql(v: Oid, buf: &mut BytesMut) {
buf.put_u32(v);
}
/// Deserializes an `OID` value.
#[inline]
pub fn oid_from_sql(mut buf: &[u8]) -> Result<Oid, StdBox<dyn Error + Sync + Send>> {
let v = buf.read_u32::<BigEndian>()?;
if !buf.is_empty() {
return Err("invalid buffer size".into());
}
Ok(v)
}
/// A fallible iterator over `HSTORE` entries.
pub struct HstoreEntries<'a> {
remaining: i32,
buf: &'a [u8],
}
impl<'a> FallibleIterator for HstoreEntries<'a> {
type Item = (&'a str, Option<&'a str>);
type Error = StdBox<dyn Error + Sync + Send>;
#[inline]
#[allow(clippy::type_complexity)]
fn next(
&mut self,
) -> Result<Option<(&'a str, Option<&'a str>)>, StdBox<dyn Error + Sync + Send>> {
if self.remaining == 0 {
if !self.buf.is_empty() {
return Err("invalid buffer size".into());
}
return Ok(None);
}
self.remaining -= 1;
let key_len = self.buf.read_i32::<BigEndian>()?;
if key_len < 0 {
return Err("invalid key length".into());
}
let (key, buf) = self.buf.split_at(key_len as usize);
let key = str::from_utf8(key)?;
self.buf = buf;
let value_len = self.buf.read_i32::<BigEndian>()?;
let value = if value_len < 0 {
None
} else {
let (value, buf) = self.buf.split_at(value_len as usize);
let value = str::from_utf8(value)?;
self.buf = buf;
Some(value)
};
Ok(Some((key, value)))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.remaining as usize;
(len, Some(len))
}
}
/// Deserializes an array value.
#[inline]
pub fn array_from_sql(mut buf: &[u8]) -> Result<Array<'_>, StdBox<dyn Error + Sync + Send>> {
let dimensions = buf.read_i32::<BigEndian>()?;
if dimensions < 0 {
return Err("invalid dimension count".into());
}
let mut r = buf;
let mut elements = 1i32;
for _ in 0..dimensions {
let len = r.read_i32::<BigEndian>()?;
if len < 0 {
return Err("invalid dimension size".into());
}
let _lower_bound = r.read_i32::<BigEndian>()?;
elements = match elements.checked_mul(len) {
Some(elements) => elements,
None => return Err("too many array elements".into()),
};
}
if dimensions == 0 {
elements = 0;
}
Ok(Array {
dimensions,
elements,
buf,
})
}
/// A Postgres array.
pub struct Array<'a> {
dimensions: i32,
elements: i32,
buf: &'a [u8],
}
impl<'a> Array<'a> {
/// Returns an iterator over the dimensions of the array.
#[inline]
pub fn dimensions(&self) -> ArrayDimensions<'a> {
ArrayDimensions(&self.buf[..self.dimensions as usize * 8])
}
/// Returns an iterator over the values of the array.
#[inline]
pub fn values(&self) -> ArrayValues<'a> {
ArrayValues {
remaining: self.elements,
buf: &self.buf[self.dimensions as usize * 8..],
}
}
}
/// An iterator over the dimensions of an array.
pub struct ArrayDimensions<'a>(&'a [u8]);
impl FallibleIterator for ArrayDimensions<'_> {
type Item = ArrayDimension;
type Error = StdBox<dyn Error + Sync + Send>;
#[inline]
fn next(&mut self) -> Result<Option<ArrayDimension>, StdBox<dyn Error + Sync + Send>> {
if self.0.is_empty() {
return Ok(None);
}
let len = self.0.read_i32::<BigEndian>()?;
let lower_bound = self.0.read_i32::<BigEndian>()?;
Ok(Some(ArrayDimension { len, lower_bound }))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.0.len() / 8;
(len, Some(len))
}
}
/// Information about a dimension of an array.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct ArrayDimension {
/// The length of this dimension.
pub len: i32,
/// The base value used to index into this dimension.
pub lower_bound: i32,
}
/// An iterator over the values of an array, in row-major order.
pub struct ArrayValues<'a> {
remaining: i32,
buf: &'a [u8],
}
impl<'a> FallibleIterator for ArrayValues<'a> {
type Item = Option<&'a [u8]>;
type Error = StdBox<dyn Error + Sync + Send>;
#[inline]
fn next(&mut self) -> Result<Option<Option<&'a [u8]>>, StdBox<dyn Error + Sync + Send>> {
if self.remaining == 0 {
if !self.buf.is_empty() {
return Err("invalid message length: arrayvalue not drained".into());
}
return Ok(None);
}
self.remaining -= 1;
let len = self.buf.read_i32::<BigEndian>()?;
let val = if len < 0 {
None
} else {
if self.buf.len() < len as usize {
return Err("invalid value length".into());
}
let (val, buf) = self.buf.split_at(len as usize);
self.buf = buf;
Some(val)
};
Ok(Some(val))
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.remaining as usize;
(len, Some(len))
}
}
/// Serializes a Postgres ltree string
#[inline]
pub fn ltree_to_sql(v: &str, buf: &mut BytesMut) {
// A version number is prepended to an ltree string per spec
buf.put_u8(1);
// Append the rest of the query
buf.put_slice(v.as_bytes());
}
/// Deserialize a Postgres ltree string
#[inline]
pub fn ltree_from_sql(buf: &[u8]) -> Result<&str, StdBox<dyn Error + Sync + Send>> {
match buf {
// Remove the version number from the front of the ltree per spec
[1u8, rest @ ..] => Ok(str::from_utf8(rest)?),
_ => Err("ltree version 1 only supported".into()),
}
}
/// Serializes a Postgres lquery string
#[inline]
pub fn lquery_to_sql(v: &str, buf: &mut BytesMut) {
// A version number is prepended to an lquery string per spec
buf.put_u8(1);
// Append the rest of the query
buf.put_slice(v.as_bytes());
}
/// Deserialize a Postgres lquery string
#[inline]
pub fn lquery_from_sql(buf: &[u8]) -> Result<&str, StdBox<dyn Error + Sync + Send>> {
match buf {
// Remove the version number from the front of the lquery per spec
[1u8, rest @ ..] => Ok(str::from_utf8(rest)?),
_ => Err("lquery version 1 only supported".into()),
}
}
/// Serializes a Postgres ltxtquery string
#[inline]
pub fn ltxtquery_to_sql(v: &str, buf: &mut BytesMut) {
// A version number is prepended to an ltxtquery string per spec
buf.put_u8(1);
// Append the rest of the query
buf.put_slice(v.as_bytes());
}
/// Deserialize a Postgres ltxtquery string
#[inline]
pub fn ltxtquery_from_sql(buf: &[u8]) -> Result<&str, StdBox<dyn Error + Sync + Send>> {
match buf {
// Remove the version number from the front of the ltxtquery per spec
[1u8, rest @ ..] => Ok(str::from_utf8(rest)?),
_ => Err("ltxtquery version 1 only supported".into()),
}
}

View File

@@ -0,0 +1,87 @@
use bytes::{Buf, BytesMut};
use super::*;
#[test]
fn ltree_sql() {
let mut query = vec![1u8];
query.extend_from_slice("A.B.C".as_bytes());
let mut buf = BytesMut::new();
ltree_to_sql("A.B.C", &mut buf);
assert_eq!(query.as_slice(), buf.chunk());
}
#[test]
fn ltree_str() {
let mut query = vec![1u8];
query.extend_from_slice("A.B.C".as_bytes());
assert!(ltree_from_sql(query.as_slice()).is_ok())
}
#[test]
fn ltree_wrong_version() {
let mut query = vec![2u8];
query.extend_from_slice("A.B.C".as_bytes());
assert!(ltree_from_sql(query.as_slice()).is_err())
}
#[test]
fn lquery_sql() {
let mut query = vec![1u8];
query.extend_from_slice("A.B.C".as_bytes());
let mut buf = BytesMut::new();
lquery_to_sql("A.B.C", &mut buf);
assert_eq!(query.as_slice(), buf.chunk());
}
#[test]
fn lquery_str() {
let mut query = vec![1u8];
query.extend_from_slice("A.B.C".as_bytes());
assert!(lquery_from_sql(query.as_slice()).is_ok())
}
#[test]
fn lquery_wrong_version() {
let mut query = vec![2u8];
query.extend_from_slice("A.B.C".as_bytes());
assert!(lquery_from_sql(query.as_slice()).is_err())
}
#[test]
fn ltxtquery_sql() {
let mut query = vec![1u8];
query.extend_from_slice("a & b*".as_bytes());
let mut buf = BytesMut::new();
ltree_to_sql("a & b*", &mut buf);
assert_eq!(query.as_slice(), buf.chunk());
}
#[test]
fn ltxtquery_str() {
let mut query = vec![1u8];
query.extend_from_slice("a & b*".as_bytes());
assert!(ltree_from_sql(query.as_slice()).is_ok())
}
#[test]
fn ltxtquery_wrong_version() {
let mut query = vec![2u8];
query.extend_from_slice("a & b*".as_bytes());
assert!(ltree_from_sql(query.as_slice()).is_err())
}

View File

@@ -0,0 +1,10 @@
[package]
name = "postgres-types2"
version = "0.1.0"
edition = "2018"
license = "MIT/Apache-2.0"
[dependencies]
bytes.workspace = true
fallible-iterator.workspace = true
postgres-protocol2 = { path = "../postgres-protocol2" }

View File

@@ -0,0 +1,477 @@
//! Conversions to and from Postgres types.
//!
//! This crate is used by the `tokio-postgres` and `postgres` crates. You normally don't need to depend directly on it
//! unless you want to define your own `ToSql` or `FromSql` definitions.
#![doc(html_root_url = "https://docs.rs/postgres-types/0.2")]
#![warn(clippy::all, rust_2018_idioms, missing_docs)]
use fallible_iterator::FallibleIterator;
use postgres_protocol2::types;
use std::any::type_name;
use std::error::Error;
use std::fmt;
use std::sync::Arc;
use crate::type_gen::{Inner, Other};
#[doc(inline)]
pub use postgres_protocol2::Oid;
use bytes::BytesMut;
/// Generates a simple implementation of `ToSql::accepts` which accepts the
/// types passed to it.
macro_rules! accepts {
($($expected:ident),+) => (
fn accepts(ty: &$crate::Type) -> bool {
matches!(*ty, $($crate::Type::$expected)|+)
}
)
}
/// Generates an implementation of `ToSql::to_sql_checked`.
///
/// All `ToSql` implementations should use this macro.
macro_rules! to_sql_checked {
() => {
fn to_sql_checked(
&self,
ty: &$crate::Type,
out: &mut $crate::private::BytesMut,
) -> ::std::result::Result<
$crate::IsNull,
Box<dyn ::std::error::Error + ::std::marker::Sync + ::std::marker::Send>,
> {
$crate::__to_sql_checked(self, ty, out)
}
};
}
// WARNING: this function is not considered part of this crate's public API.
// It is subject to change at any time.
#[doc(hidden)]
pub fn __to_sql_checked<T>(
v: &T,
ty: &Type,
out: &mut BytesMut,
) -> Result<IsNull, Box<dyn Error + Sync + Send>>
where
T: ToSql,
{
if !T::accepts(ty) {
return Err(Box::new(WrongType::new::<T>(ty.clone())));
}
v.to_sql(ty, out)
}
// mod pg_lsn;
#[doc(hidden)]
pub mod private;
// mod special;
mod type_gen;
/// A Postgres type.
#[derive(PartialEq, Eq, Clone, Hash)]
pub struct Type(Inner);
impl fmt::Debug for Type {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.0, fmt)
}
}
impl fmt::Display for Type {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.schema() {
"public" | "pg_catalog" => {}
schema => write!(fmt, "{}.", schema)?,
}
fmt.write_str(self.name())
}
}
impl Type {
/// Creates a new `Type`.
pub fn new(name: String, oid: Oid, kind: Kind, schema: String) -> Type {
Type(Inner::Other(Arc::new(Other {
name,
oid,
kind,
schema,
})))
}
/// Returns the `Type` corresponding to the provided `Oid` if it
/// corresponds to a built-in type.
pub fn from_oid(oid: Oid) -> Option<Type> {
Inner::from_oid(oid).map(Type)
}
/// Returns the OID of the `Type`.
pub fn oid(&self) -> Oid {
self.0.oid()
}
/// Returns the kind of this type.
pub fn kind(&self) -> &Kind {
self.0.kind()
}
/// Returns the schema of this type.
pub fn schema(&self) -> &str {
match self.0 {
Inner::Other(ref u) => &u.schema,
_ => "pg_catalog",
}
}
/// Returns the name of this type.
pub fn name(&self) -> &str {
self.0.name()
}
}
/// Represents the kind of a Postgres type.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum Kind {
/// A simple type like `VARCHAR` or `INTEGER`.
Simple,
/// An enumerated type along with its variants.
Enum(Vec<String>),
/// A pseudo-type.
Pseudo,
/// An array type along with the type of its elements.
Array(Type),
/// A range type along with the type of its elements.
Range(Type),
/// A multirange type along with the type of its elements.
Multirange(Type),
/// A domain type along with its underlying type.
Domain(Type),
/// A composite type along with information about its fields.
Composite(Vec<Field>),
}
/// Information about a field of a composite type.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Field {
name: String,
type_: Type,
}
impl Field {
/// Creates a new `Field`.
pub fn new(name: String, type_: Type) -> Field {
Field { name, type_ }
}
/// Returns the name of the field.
pub fn name(&self) -> &str {
&self.name
}
/// Returns the type of the field.
pub fn type_(&self) -> &Type {
&self.type_
}
}
/// An error indicating that a `NULL` Postgres value was passed to a `FromSql`
/// implementation that does not support `NULL` values.
#[derive(Debug, Clone, Copy)]
pub struct WasNull;
impl fmt::Display for WasNull {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.write_str("a Postgres value was `NULL`")
}
}
impl Error for WasNull {}
/// An error indicating that a conversion was attempted between incompatible
/// Rust and Postgres types.
#[derive(Debug)]
pub struct WrongType {
postgres: Type,
rust: &'static str,
}
impl fmt::Display for WrongType {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
fmt,
"cannot convert between the Rust type `{}` and the Postgres type `{}`",
self.rust, self.postgres,
)
}
}
impl Error for WrongType {}
impl WrongType {
/// Creates a new `WrongType` error.
pub fn new<T>(ty: Type) -> WrongType {
WrongType {
postgres: ty,
rust: type_name::<T>(),
}
}
}
/// An error indicating that a as_text conversion was attempted on a binary
/// result.
#[derive(Debug)]
pub struct WrongFormat {}
impl Error for WrongFormat {}
impl fmt::Display for WrongFormat {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
fmt,
"cannot read column as text while it is in binary format"
)
}
}
/// A trait for types that can be created from a Postgres value.
pub trait FromSql<'a>: Sized {
/// Creates a new value of this type from a buffer of data of the specified
/// Postgres `Type` in its binary format.
///
/// The caller of this method is responsible for ensuring that this type
/// is compatible with the Postgres `Type`.
fn from_sql(ty: &Type, raw: &'a [u8]) -> Result<Self, Box<dyn Error + Sync + Send>>;
/// Creates a new value of this type from a `NULL` SQL value.
///
/// The caller of this method is responsible for ensuring that this type
/// is compatible with the Postgres `Type`.
///
/// The default implementation returns `Err(Box::new(WasNull))`.
#[allow(unused_variables)]
fn from_sql_null(ty: &Type) -> Result<Self, Box<dyn Error + Sync + Send>> {
Err(Box::new(WasNull))
}
/// A convenience function that delegates to `from_sql` and `from_sql_null` depending on the
/// value of `raw`.
fn from_sql_nullable(
ty: &Type,
raw: Option<&'a [u8]>,
) -> Result<Self, Box<dyn Error + Sync + Send>> {
match raw {
Some(raw) => Self::from_sql(ty, raw),
None => Self::from_sql_null(ty),
}
}
/// Determines if a value of this type can be created from the specified
/// Postgres `Type`.
fn accepts(ty: &Type) -> bool;
}
/// A trait for types which can be created from a Postgres value without borrowing any data.
///
/// This is primarily useful for trait bounds on functions.
pub trait FromSqlOwned: for<'a> FromSql<'a> {}
impl<T> FromSqlOwned for T where T: for<'a> FromSql<'a> {}
impl<'a, T: FromSql<'a>> FromSql<'a> for Option<T> {
fn from_sql(ty: &Type, raw: &'a [u8]) -> Result<Option<T>, Box<dyn Error + Sync + Send>> {
<T as FromSql>::from_sql(ty, raw).map(Some)
}
fn from_sql_null(_: &Type) -> Result<Option<T>, Box<dyn Error + Sync + Send>> {
Ok(None)
}
fn accepts(ty: &Type) -> bool {
<T as FromSql>::accepts(ty)
}
}
impl<'a, T: FromSql<'a>> FromSql<'a> for Vec<T> {
fn from_sql(ty: &Type, raw: &'a [u8]) -> Result<Vec<T>, Box<dyn Error + Sync + Send>> {
let member_type = match *ty.kind() {
Kind::Array(ref member) => member,
_ => panic!("expected array type"),
};
let array = types::array_from_sql(raw)?;
if array.dimensions().count()? > 1 {
return Err("array contains too many dimensions".into());
}
array
.values()
.map(|v| T::from_sql_nullable(member_type, v))
.collect()
}
fn accepts(ty: &Type) -> bool {
match *ty.kind() {
Kind::Array(ref inner) => T::accepts(inner),
_ => false,
}
}
}
impl<'a> FromSql<'a> for String {
fn from_sql(ty: &Type, raw: &'a [u8]) -> Result<String, Box<dyn Error + Sync + Send>> {
<&str as FromSql>::from_sql(ty, raw).map(ToString::to_string)
}
fn accepts(ty: &Type) -> bool {
<&str as FromSql>::accepts(ty)
}
}
impl<'a> FromSql<'a> for &'a str {
fn from_sql(ty: &Type, raw: &'a [u8]) -> Result<&'a str, Box<dyn Error + Sync + Send>> {
match *ty {
ref ty if ty.name() == "ltree" => types::ltree_from_sql(raw),
ref ty if ty.name() == "lquery" => types::lquery_from_sql(raw),
ref ty if ty.name() == "ltxtquery" => types::ltxtquery_from_sql(raw),
_ => types::text_from_sql(raw),
}
}
fn accepts(ty: &Type) -> bool {
match *ty {
Type::VARCHAR | Type::TEXT | Type::BPCHAR | Type::NAME | Type::UNKNOWN => true,
ref ty
if (ty.name() == "citext"
|| ty.name() == "ltree"
|| ty.name() == "lquery"
|| ty.name() == "ltxtquery") =>
{
true
}
_ => false,
}
}
}
macro_rules! simple_from {
($t:ty, $f:ident, $($expected:ident),+) => {
impl<'a> FromSql<'a> for $t {
fn from_sql(_: &Type, raw: &'a [u8]) -> Result<$t, Box<dyn Error + Sync + Send>> {
types::$f(raw)
}
accepts!($($expected),+);
}
}
}
simple_from!(i8, char_from_sql, CHAR);
simple_from!(u32, oid_from_sql, OID);
/// An enum representing the nullability of a Postgres value.
pub enum IsNull {
/// The value is NULL.
Yes,
/// The value is not NULL.
No,
}
/// A trait for types that can be converted into Postgres values.
pub trait ToSql: fmt::Debug {
/// Converts the value of `self` into the binary format of the specified
/// Postgres `Type`, appending it to `out`.
///
/// The caller of this method is responsible for ensuring that this type
/// is compatible with the Postgres `Type`.
///
/// The return value indicates if this value should be represented as
/// `NULL`. If this is the case, implementations **must not** write
/// anything to `out`.
fn to_sql(&self, ty: &Type, out: &mut BytesMut) -> Result<IsNull, Box<dyn Error + Sync + Send>>
where
Self: Sized;
/// Determines if a value of this type can be converted to the specified
/// Postgres `Type`.
fn accepts(ty: &Type) -> bool
where
Self: Sized;
/// An adaptor method used internally by Rust-Postgres.
///
/// *All* implementations of this method should be generated by the
/// `to_sql_checked!()` macro.
fn to_sql_checked(
&self,
ty: &Type,
out: &mut BytesMut,
) -> Result<IsNull, Box<dyn Error + Sync + Send>>;
/// Specify the encode format
fn encode_format(&self, _ty: &Type) -> Format {
Format::Binary
}
}
/// Supported Postgres message format types
///
/// Using Text format in a message assumes a Postgres `SERVER_ENCODING` of `UTF8`
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Format {
/// Text format (UTF-8)
Text,
/// Compact, typed binary format
Binary,
}
impl ToSql for &str {
fn to_sql(&self, ty: &Type, w: &mut BytesMut) -> Result<IsNull, Box<dyn Error + Sync + Send>> {
match *ty {
ref ty if ty.name() == "ltree" => types::ltree_to_sql(self, w),
ref ty if ty.name() == "lquery" => types::lquery_to_sql(self, w),
ref ty if ty.name() == "ltxtquery" => types::ltxtquery_to_sql(self, w),
_ => types::text_to_sql(self, w),
}
Ok(IsNull::No)
}
fn accepts(ty: &Type) -> bool {
match *ty {
Type::VARCHAR | Type::TEXT | Type::BPCHAR | Type::NAME | Type::UNKNOWN => true,
ref ty
if (ty.name() == "citext"
|| ty.name() == "ltree"
|| ty.name() == "lquery"
|| ty.name() == "ltxtquery") =>
{
true
}
_ => false,
}
}
to_sql_checked!();
}
macro_rules! simple_to {
($t:ty, $f:ident, $($expected:ident),+) => {
impl ToSql for $t {
fn to_sql(&self,
_: &Type,
w: &mut BytesMut)
-> Result<IsNull, Box<dyn Error + Sync + Send>> {
types::$f(*self, w);
Ok(IsNull::No)
}
accepts!($($expected),+);
to_sql_checked!();
}
}
}
simple_to!(u32, oid_to_sql, OID);

View File

@@ -0,0 +1,34 @@
use crate::{FromSql, Type};
pub use bytes::BytesMut;
use std::error::Error;
pub fn read_be_i32(buf: &mut &[u8]) -> Result<i32, Box<dyn Error + Sync + Send>> {
if buf.len() < 4 {
return Err("invalid buffer size".into());
}
let mut bytes = [0; 4];
bytes.copy_from_slice(&buf[..4]);
*buf = &buf[4..];
Ok(i32::from_be_bytes(bytes))
}
pub fn read_value<'a, T>(
type_: &Type,
buf: &mut &'a [u8],
) -> Result<T, Box<dyn Error + Sync + Send>>
where
T: FromSql<'a>,
{
let len = read_be_i32(buf)?;
let value = if len < 0 {
None
} else {
if len as usize > buf.len() {
return Err("invalid buffer size".into());
}
let (head, tail) = buf.split_at(len as usize);
*buf = tail;
Some(head)
};
T::from_sql_nullable(type_, value)
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
[package]
name = "tokio-postgres2"
version = "0.1.0"
edition = "2018"
license = "MIT/Apache-2.0"
[dependencies]
async-trait.workspace = true
bytes.workspace = true
byteorder.workspace = true
fallible-iterator.workspace = true
futures-util = { workspace = true, features = ["sink"] }
log = "0.4"
parking_lot.workspace = true
percent-encoding = "2.0"
pin-project-lite.workspace = true
phf = "0.11"
postgres-protocol2 = { path = "../postgres-protocol2" }
postgres-types2 = { path = "../postgres-types2" }
tokio = { workspace = true, features = ["io-util", "time", "net"] }
tokio-util = { workspace = true, features = ["codec"] }

View File

@@ -0,0 +1,40 @@
use tokio::net::TcpStream;
use crate::client::SocketConfig;
use crate::config::{Host, SslMode};
use crate::tls::MakeTlsConnect;
use crate::{cancel_query_raw, connect_socket, Error};
use std::io;
pub(crate) async fn cancel_query<T>(
config: Option<SocketConfig>,
ssl_mode: SslMode,
mut tls: T,
process_id: i32,
secret_key: i32,
) -> Result<(), Error>
where
T: MakeTlsConnect<TcpStream>,
{
let config = match config {
Some(config) => config,
None => {
return Err(Error::connect(io::Error::new(
io::ErrorKind::InvalidInput,
"unknown host",
)))
}
};
let hostname = match &config.host {
Host::Tcp(host) => &**host,
};
let tls = tls
.make_tls_connect(hostname)
.map_err(|e| Error::tls(e.into()))?;
let socket =
connect_socket::connect_socket(&config.host, config.port, config.connect_timeout).await?;
cancel_query_raw::cancel_query_raw(socket, ssl_mode, tls, process_id, secret_key).await
}

View File

@@ -0,0 +1,29 @@
use crate::config::SslMode;
use crate::tls::TlsConnect;
use crate::{connect_tls, Error};
use bytes::BytesMut;
use postgres_protocol2::message::frontend;
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
pub async fn cancel_query_raw<S, T>(
stream: S,
mode: SslMode,
tls: T,
process_id: i32,
secret_key: i32,
) -> Result<(), Error>
where
S: AsyncRead + AsyncWrite + Unpin,
T: TlsConnect<S>,
{
let mut stream = connect_tls::connect_tls(stream, mode, tls).await?;
let mut buf = BytesMut::new();
frontend::cancel_request(process_id, secret_key, &mut buf);
stream.write_all(&buf).await.map_err(Error::io)?;
stream.flush().await.map_err(Error::io)?;
stream.shutdown().await.map_err(Error::io)?;
Ok(())
}

View File

@@ -0,0 +1,62 @@
use crate::config::SslMode;
use crate::tls::TlsConnect;
use crate::{cancel_query, client::SocketConfig, tls::MakeTlsConnect};
use crate::{cancel_query_raw, Error};
use tokio::io::{AsyncRead, AsyncWrite};
use tokio::net::TcpStream;
/// The capability to request cancellation of in-progress queries on a
/// connection.
#[derive(Clone)]
pub struct CancelToken {
pub(crate) socket_config: Option<SocketConfig>,
pub(crate) ssl_mode: SslMode,
pub(crate) process_id: i32,
pub(crate) secret_key: i32,
}
impl CancelToken {
/// Attempts to cancel the in-progress query on the connection associated
/// with this `CancelToken`.
///
/// The server provides no information about whether a cancellation attempt was successful or not. An error will
/// only be returned if the client was unable to connect to the database.
///
/// Cancellation is inherently racy. There is no guarantee that the
/// cancellation request will reach the server before the query terminates
/// normally, or that the connection associated with this token is still
/// active.
///
/// Requires the `runtime` Cargo feature (enabled by default).
pub async fn cancel_query<T>(&self, tls: T) -> Result<(), Error>
where
T: MakeTlsConnect<TcpStream>,
{
cancel_query::cancel_query(
self.socket_config.clone(),
self.ssl_mode,
tls,
self.process_id,
self.secret_key,
)
.await
}
/// Like `cancel_query`, but uses a stream which is already connected to the server rather than opening a new
/// connection itself.
pub async fn cancel_query_raw<S, T>(&self, stream: S, tls: T) -> Result<(), Error>
where
S: AsyncRead + AsyncWrite + Unpin,
T: TlsConnect<S>,
{
cancel_query_raw::cancel_query_raw(
stream,
self.ssl_mode,
tls,
self.process_id,
self.secret_key,
)
.await
}
}

View File

@@ -0,0 +1,439 @@
use crate::codec::{BackendMessages, FrontendMessage};
use crate::config::Host;
use crate::config::SslMode;
use crate::connection::{Request, RequestMessages};
use crate::query::RowStream;
use crate::simple_query::SimpleQueryStream;
use crate::types::{Oid, ToSql, Type};
use crate::{
prepare, query, simple_query, slice_iter, CancelToken, Error, ReadyForQueryStatus, Row,
SimpleQueryMessage, Statement, ToStatement, Transaction, TransactionBuilder,
};
use bytes::BytesMut;
use fallible_iterator::FallibleIterator;
use futures_util::{future, ready, TryStreamExt};
use parking_lot::Mutex;
use postgres_protocol2::message::{backend::Message, frontend};
use std::collections::HashMap;
use std::fmt;
use std::sync::Arc;
use std::task::{Context, Poll};
use tokio::sync::mpsc;
use std::time::Duration;
pub struct Responses {
receiver: mpsc::Receiver<BackendMessages>,
cur: BackendMessages,
}
impl Responses {
pub fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll<Result<Message, Error>> {
loop {
match self.cur.next().map_err(Error::parse)? {
Some(Message::ErrorResponse(body)) => return Poll::Ready(Err(Error::db(body))),
Some(message) => return Poll::Ready(Ok(message)),
None => {}
}
match ready!(self.receiver.poll_recv(cx)) {
Some(messages) => self.cur = messages,
None => return Poll::Ready(Err(Error::closed())),
}
}
}
pub async fn next(&mut self) -> Result<Message, Error> {
future::poll_fn(|cx| self.poll_next(cx)).await
}
}
/// A cache of type info and prepared statements for fetching type info
/// (corresponding to the queries in the [prepare] module).
#[derive(Default)]
struct CachedTypeInfo {
/// A statement for basic information for a type from its
/// OID. Corresponds to [TYPEINFO_QUERY](prepare::TYPEINFO_QUERY) (or its
/// fallback).
typeinfo: Option<Statement>,
/// A statement for getting information for a composite type from its OID.
/// Corresponds to [TYPEINFO_QUERY](prepare::TYPEINFO_COMPOSITE_QUERY).
typeinfo_composite: Option<Statement>,
/// A statement for getting information for a composite type from its OID.
/// Corresponds to [TYPEINFO_QUERY](prepare::TYPEINFO_COMPOSITE_QUERY) (or
/// its fallback).
typeinfo_enum: Option<Statement>,
/// Cache of types already looked up.
types: HashMap<Oid, Type>,
}
pub struct InnerClient {
sender: mpsc::UnboundedSender<Request>,
cached_typeinfo: Mutex<CachedTypeInfo>,
/// A buffer to use when writing out postgres commands.
buffer: Mutex<BytesMut>,
}
impl InnerClient {
pub fn send(&self, messages: RequestMessages) -> Result<Responses, Error> {
let (sender, receiver) = mpsc::channel(1);
let request = Request { messages, sender };
self.sender.send(request).map_err(|_| Error::closed())?;
Ok(Responses {
receiver,
cur: BackendMessages::empty(),
})
}
pub fn typeinfo(&self) -> Option<Statement> {
self.cached_typeinfo.lock().typeinfo.clone()
}
pub fn set_typeinfo(&self, statement: &Statement) {
self.cached_typeinfo.lock().typeinfo = Some(statement.clone());
}
pub fn typeinfo_composite(&self) -> Option<Statement> {
self.cached_typeinfo.lock().typeinfo_composite.clone()
}
pub fn set_typeinfo_composite(&self, statement: &Statement) {
self.cached_typeinfo.lock().typeinfo_composite = Some(statement.clone());
}
pub fn typeinfo_enum(&self) -> Option<Statement> {
self.cached_typeinfo.lock().typeinfo_enum.clone()
}
pub fn set_typeinfo_enum(&self, statement: &Statement) {
self.cached_typeinfo.lock().typeinfo_enum = Some(statement.clone());
}
pub fn type_(&self, oid: Oid) -> Option<Type> {
self.cached_typeinfo.lock().types.get(&oid).cloned()
}
pub fn set_type(&self, oid: Oid, type_: &Type) {
self.cached_typeinfo.lock().types.insert(oid, type_.clone());
}
/// Call the given function with a buffer to be used when writing out
/// postgres commands.
pub fn with_buf<F, R>(&self, f: F) -> R
where
F: FnOnce(&mut BytesMut) -> R,
{
let mut buffer = self.buffer.lock();
let r = f(&mut buffer);
buffer.clear();
r
}
}
#[derive(Clone)]
pub(crate) struct SocketConfig {
pub host: Host,
pub port: u16,
pub connect_timeout: Option<Duration>,
// pub keepalive: Option<KeepaliveConfig>,
}
/// An asynchronous PostgreSQL client.
///
/// The client is one half of what is returned when a connection is established. Users interact with the database
/// through this client object.
pub struct Client {
inner: Arc<InnerClient>,
socket_config: Option<SocketConfig>,
ssl_mode: SslMode,
process_id: i32,
secret_key: i32,
}
impl Client {
pub(crate) fn new(
sender: mpsc::UnboundedSender<Request>,
ssl_mode: SslMode,
process_id: i32,
secret_key: i32,
) -> Client {
Client {
inner: Arc::new(InnerClient {
sender,
cached_typeinfo: Default::default(),
buffer: Default::default(),
}),
socket_config: None,
ssl_mode,
process_id,
secret_key,
}
}
/// Returns process_id.
pub fn get_process_id(&self) -> i32 {
self.process_id
}
pub(crate) fn inner(&self) -> &Arc<InnerClient> {
&self.inner
}
pub(crate) fn set_socket_config(&mut self, socket_config: SocketConfig) {
self.socket_config = Some(socket_config);
}
/// Creates a new prepared statement.
///
/// Prepared statements can be executed repeatedly, and may contain query parameters (indicated by `$1`, `$2`, etc),
/// which are set when executed. Prepared statements can only be used with the connection that created them.
pub async fn prepare(&self, query: &str) -> Result<Statement, Error> {
self.prepare_typed(query, &[]).await
}
/// Like `prepare`, but allows the types of query parameters to be explicitly specified.
///
/// The list of types may be smaller than the number of parameters - the types of the remaining parameters will be
/// inferred. For example, `client.prepare_typed(query, &[])` is equivalent to `client.prepare(query)`.
pub async fn prepare_typed(
&self,
query: &str,
parameter_types: &[Type],
) -> Result<Statement, Error> {
prepare::prepare(&self.inner, query, parameter_types).await
}
/// Executes a statement, returning a vector of the resulting rows.
///
/// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
/// provided, 1-indexed.
///
/// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
/// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
/// with the `prepare` method.
///
/// # Panics
///
/// Panics if the number of parameters provided does not match the number expected.
pub async fn query<T>(
&self,
statement: &T,
params: &[&(dyn ToSql + Sync)],
) -> Result<Vec<Row>, Error>
where
T: ?Sized + ToStatement,
{
self.query_raw(statement, slice_iter(params))
.await?
.try_collect()
.await
}
/// The maximally flexible version of [`query`].
///
/// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
/// provided, 1-indexed.
///
/// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
/// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
/// with the `prepare` method.
///
/// # Panics
///
/// Panics if the number of parameters provided does not match the number expected.
///
/// [`query`]: #method.query
pub async fn query_raw<'a, T, I>(&self, statement: &T, params: I) -> Result<RowStream, Error>
where
T: ?Sized + ToStatement,
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
I::IntoIter: ExactSizeIterator,
{
let statement = statement.__convert().into_statement(self).await?;
query::query(&self.inner, statement, params).await
}
/// Pass text directly to the Postgres backend to allow it to sort out typing itself and
/// to save a roundtrip
pub async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
where
S: AsRef<str>,
I: IntoIterator<Item = Option<S>>,
I::IntoIter: ExactSizeIterator,
{
query::query_txt(&self.inner, statement, params).await
}
/// Executes a statement, returning the number of rows modified.
///
/// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
/// provided, 1-indexed.
///
/// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
/// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
/// with the `prepare` method.
///
/// If the statement does not modify any rows (e.g. `SELECT`), 0 is returned.
///
/// # Panics
///
/// Panics if the number of parameters provided does not match the number expected.
pub async fn execute<T>(
&self,
statement: &T,
params: &[&(dyn ToSql + Sync)],
) -> Result<u64, Error>
where
T: ?Sized + ToStatement,
{
self.execute_raw(statement, slice_iter(params)).await
}
/// The maximally flexible version of [`execute`].
///
/// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
/// provided, 1-indexed.
///
/// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
/// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
/// with the `prepare` method.
///
/// # Panics
///
/// Panics if the number of parameters provided does not match the number expected.
///
/// [`execute`]: #method.execute
pub async fn execute_raw<'a, T, I>(&self, statement: &T, params: I) -> Result<u64, Error>
where
T: ?Sized + ToStatement,
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
I::IntoIter: ExactSizeIterator,
{
let statement = statement.__convert().into_statement(self).await?;
query::execute(self.inner(), statement, params).await
}
/// Executes a sequence of SQL statements using the simple query protocol, returning the resulting rows.
///
/// Statements should be separated by semicolons. If an error occurs, execution of the sequence will stop at that
/// point. The simple query protocol returns the values in rows as strings rather than in their binary encodings,
/// so the associated row type doesn't work with the `FromSql` trait. Rather than simply returning a list of the
/// rows, this method returns a list of an enum which indicates either the completion of one of the commands,
/// or a row of data. This preserves the framing between the separate statements in the request.
///
/// # Warning
///
/// Prepared statements should be use for any query which contains user-specified data, as they provided the
/// functionality to safely embed that data in the request. Do not form statements via string concatenation and pass
/// them to this method!
pub async fn simple_query(&self, query: &str) -> Result<Vec<SimpleQueryMessage>, Error> {
self.simple_query_raw(query).await?.try_collect().await
}
pub(crate) async fn simple_query_raw(&self, query: &str) -> Result<SimpleQueryStream, Error> {
simple_query::simple_query(self.inner(), query).await
}
/// Executes a sequence of SQL statements using the simple query protocol.
///
/// Statements should be separated by semicolons. If an error occurs, execution of the sequence will stop at that
/// point. This is intended for use when, for example, initializing a database schema.
///
/// # Warning
///
/// Prepared statements should be use for any query which contains user-specified data, as they provided the
/// functionality to safely embed that data in the request. Do not form statements via string concatenation and pass
/// them to this method!
pub async fn batch_execute(&self, query: &str) -> Result<ReadyForQueryStatus, Error> {
simple_query::batch_execute(self.inner(), query).await
}
/// Begins a new database transaction.
///
/// The transaction will roll back by default - use the `commit` method to commit it.
pub async fn transaction(&mut self) -> Result<Transaction<'_>, Error> {
struct RollbackIfNotDone<'me> {
client: &'me Client,
done: bool,
}
impl Drop for RollbackIfNotDone<'_> {
fn drop(&mut self) {
if self.done {
return;
}
let buf = self.client.inner().with_buf(|buf| {
frontend::query("ROLLBACK", buf).unwrap();
buf.split().freeze()
});
let _ = self
.client
.inner()
.send(RequestMessages::Single(FrontendMessage::Raw(buf)));
}
}
// This is done, as `Future` created by this method can be dropped after
// `RequestMessages` is synchronously send to the `Connection` by
// `batch_execute()`, but before `Responses` is asynchronously polled to
// completion. In that case `Transaction` won't be created and thus
// won't be rolled back.
{
let mut cleaner = RollbackIfNotDone {
client: self,
done: false,
};
self.batch_execute("BEGIN").await?;
cleaner.done = true;
}
Ok(Transaction::new(self))
}
/// Returns a builder for a transaction with custom settings.
///
/// Unlike the `transaction` method, the builder can be used to control the transaction's isolation level and other
/// attributes.
pub fn build_transaction(&mut self) -> TransactionBuilder<'_> {
TransactionBuilder::new(self)
}
/// Constructs a cancellation token that can later be used to request cancellation of a query running on the
/// connection associated with this client.
pub fn cancel_token(&self) -> CancelToken {
CancelToken {
socket_config: self.socket_config.clone(),
ssl_mode: self.ssl_mode,
process_id: self.process_id,
secret_key: self.secret_key,
}
}
/// Query for type information
pub async fn get_type(&self, oid: Oid) -> Result<Type, Error> {
crate::prepare::get_type(&self.inner, oid).await
}
/// Determines if the connection to the server has already closed.
///
/// In that case, all future queries will fail.
pub fn is_closed(&self) -> bool {
self.inner.sender.is_closed()
}
}
impl fmt::Debug for Client {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Client").finish()
}
}

View File

@@ -0,0 +1,109 @@
use bytes::{Buf, Bytes, BytesMut};
use fallible_iterator::FallibleIterator;
use postgres_protocol2::message::backend;
use postgres_protocol2::message::frontend::CopyData;
use std::io;
use tokio_util::codec::{Decoder, Encoder};
pub enum FrontendMessage {
Raw(Bytes),
CopyData(CopyData<Box<dyn Buf + Send>>),
}
pub enum BackendMessage {
Normal {
messages: BackendMessages,
request_complete: bool,
},
Async(backend::Message),
}
pub struct BackendMessages(BytesMut);
impl BackendMessages {
pub fn empty() -> BackendMessages {
BackendMessages(BytesMut::new())
}
}
impl FallibleIterator for BackendMessages {
type Item = backend::Message;
type Error = io::Error;
fn next(&mut self) -> io::Result<Option<backend::Message>> {
backend::Message::parse(&mut self.0)
}
}
pub struct PostgresCodec {
pub max_message_size: Option<usize>,
}
impl Encoder<FrontendMessage> for PostgresCodec {
type Error = io::Error;
fn encode(&mut self, item: FrontendMessage, dst: &mut BytesMut) -> io::Result<()> {
match item {
FrontendMessage::Raw(buf) => dst.extend_from_slice(&buf),
FrontendMessage::CopyData(data) => data.write(dst),
}
Ok(())
}
}
impl Decoder for PostgresCodec {
type Item = BackendMessage;
type Error = io::Error;
fn decode(&mut self, src: &mut BytesMut) -> Result<Option<BackendMessage>, io::Error> {
let mut idx = 0;
let mut request_complete = false;
while let Some(header) = backend::Header::parse(&src[idx..])? {
let len = header.len() as usize + 1;
if src[idx..].len() < len {
break;
}
if let Some(max) = self.max_message_size {
if len > max {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"message too large",
));
}
}
match header.tag() {
backend::NOTICE_RESPONSE_TAG
| backend::NOTIFICATION_RESPONSE_TAG
| backend::PARAMETER_STATUS_TAG => {
if idx == 0 {
let message = backend::Message::parse(src)?.unwrap();
return Ok(Some(BackendMessage::Async(message)));
} else {
break;
}
}
_ => {}
}
idx += len;
if header.tag() == backend::READY_FOR_QUERY_TAG {
request_complete = true;
break;
}
}
if idx == 0 {
Ok(None)
} else {
Ok(Some(BackendMessage::Normal {
messages: BackendMessages(src.split_to(idx)),
request_complete,
}))
}
}
}

View File

@@ -0,0 +1,897 @@
//! Connection configuration.
use crate::connect::connect;
use crate::connect_raw::connect_raw;
use crate::tls::MakeTlsConnect;
use crate::tls::TlsConnect;
use crate::{Client, Connection, Error};
use std::borrow::Cow;
use std::str;
use std::str::FromStr;
use std::time::Duration;
use std::{error, fmt, iter, mem};
use tokio::io::{AsyncRead, AsyncWrite};
pub use postgres_protocol2::authentication::sasl::ScramKeys;
use tokio::net::TcpStream;
/// Properties required of a session.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum TargetSessionAttrs {
/// No special properties are required.
Any,
/// The session must allow writes.
ReadWrite,
}
/// TLS configuration.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum SslMode {
/// Do not use TLS.
Disable,
/// Attempt to connect with TLS but allow sessions without.
Prefer,
/// Require the use of TLS.
Require,
}
/// Channel binding configuration.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum ChannelBinding {
/// Do not use channel binding.
Disable,
/// Attempt to use channel binding but allow sessions without.
Prefer,
/// Require the use of channel binding.
Require,
}
/// Replication mode configuration.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum ReplicationMode {
/// Physical replication.
Physical,
/// Logical replication.
Logical,
}
/// A host specification.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Host {
/// A TCP hostname.
Tcp(String),
}
/// Precomputed keys which may override password during auth.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AuthKeys {
/// A `ClientKey` & `ServerKey` pair for `SCRAM-SHA-256`.
ScramSha256(ScramKeys<32>),
}
/// Connection configuration.
///
/// Configuration can be parsed from libpq-style connection strings. These strings come in two formats:
///
/// # Key-Value
///
/// This format consists of space-separated key-value pairs. Values which are either the empty string or contain
/// whitespace should be wrapped in `'`. `'` and `\` characters should be backslash-escaped.
///
/// ## Keys
///
/// * `user` - The username to authenticate with. Required.
/// * `password` - The password to authenticate with.
/// * `dbname` - The name of the database to connect to. Defaults to the username.
/// * `options` - Command line options used to configure the server.
/// * `application_name` - Sets the `application_name` parameter on the server.
/// * `sslmode` - Controls usage of TLS. If set to `disable`, TLS will not be used. If set to `prefer`, TLS will be used
/// if available, but not used otherwise. If set to `require`, TLS will be forced to be used. Defaults to `prefer`.
/// * `host` - The host to connect to. On Unix platforms, if the host starts with a `/` character it is treated as the
/// path to the directory containing Unix domain sockets. Otherwise, it is treated as a hostname. Multiple hosts
/// can be specified, separated by commas. Each host will be tried in turn when connecting. Required if connecting
/// with the `connect` method.
/// * `port` - The port to connect to. Multiple ports can be specified, separated by commas. The number of ports must be
/// either 1, in which case it will be used for all hosts, or the same as the number of hosts. Defaults to 5432 if
/// omitted or the empty string.
/// * `connect_timeout` - The time limit in seconds applied to each socket-level connection attempt. Note that hostnames
/// can resolve to multiple IP addresses, and this limit is applied to each address. Defaults to no timeout.
/// * `target_session_attrs` - Specifies requirements of the session. If set to `read-write`, the client will check that
/// the `transaction_read_write` session parameter is set to `on`. This can be used to connect to the primary server
/// in a database cluster as opposed to the secondary read-only mirrors. Defaults to `all`.
/// * `channel_binding` - Controls usage of channel binding in the authentication process. If set to `disable`, channel
/// binding will not be used. If set to `prefer`, channel binding will be used if available, but not used otherwise.
/// If set to `require`, the authentication process will fail if channel binding is not used. Defaults to `prefer`.
///
/// ## Examples
///
/// ```not_rust
/// host=localhost user=postgres connect_timeout=10 keepalives=0
/// ```
///
/// ```not_rust
/// host=/var/lib/postgresql,localhost port=1234 user=postgres password='password with spaces'
/// ```
///
/// ```not_rust
/// host=host1,host2,host3 port=1234,,5678 user=postgres target_session_attrs=read-write
/// ```
///
/// # Url
///
/// This format resembles a URL with a scheme of either `postgres://` or `postgresql://`. All components are optional,
/// and the format accepts query parameters for all of the key-value pairs described in the section above. Multiple
/// host/port pairs can be comma-separated. Unix socket paths in the host section of the URL should be percent-encoded,
/// as the path component of the URL specifies the database name.
///
/// ## Examples
///
/// ```not_rust
/// postgresql://user@localhost
/// ```
///
/// ```not_rust
/// postgresql://user:password@%2Fvar%2Flib%2Fpostgresql/mydb?connect_timeout=10
/// ```
///
/// ```not_rust
/// postgresql://user@host1:1234,host2,host3:5678?target_session_attrs=read-write
/// ```
///
/// ```not_rust
/// postgresql:///mydb?user=user&host=/var/lib/postgresql
/// ```
#[derive(Clone, PartialEq, Eq)]
pub struct Config {
pub(crate) user: Option<String>,
pub(crate) password: Option<Vec<u8>>,
pub(crate) auth_keys: Option<Box<AuthKeys>>,
pub(crate) dbname: Option<String>,
pub(crate) options: Option<String>,
pub(crate) application_name: Option<String>,
pub(crate) ssl_mode: SslMode,
pub(crate) host: Vec<Host>,
pub(crate) port: Vec<u16>,
pub(crate) connect_timeout: Option<Duration>,
pub(crate) target_session_attrs: TargetSessionAttrs,
pub(crate) channel_binding: ChannelBinding,
pub(crate) replication_mode: Option<ReplicationMode>,
pub(crate) max_backend_message_size: Option<usize>,
}
impl Default for Config {
fn default() -> Config {
Config::new()
}
}
impl Config {
/// Creates a new configuration.
pub fn new() -> Config {
Config {
user: None,
password: None,
auth_keys: None,
dbname: None,
options: None,
application_name: None,
ssl_mode: SslMode::Prefer,
host: vec![],
port: vec![],
connect_timeout: None,
target_session_attrs: TargetSessionAttrs::Any,
channel_binding: ChannelBinding::Prefer,
replication_mode: None,
max_backend_message_size: None,
}
}
/// Sets the user to authenticate with.
///
/// Required.
pub fn user(&mut self, user: &str) -> &mut Config {
self.user = Some(user.to_string());
self
}
/// Gets the user to authenticate with, if one has been configured with
/// the `user` method.
pub fn get_user(&self) -> Option<&str> {
self.user.as_deref()
}
/// Sets the password to authenticate with.
pub fn password<T>(&mut self, password: T) -> &mut Config
where
T: AsRef<[u8]>,
{
self.password = Some(password.as_ref().to_vec());
self
}
/// Gets the password to authenticate with, if one has been configured with
/// the `password` method.
pub fn get_password(&self) -> Option<&[u8]> {
self.password.as_deref()
}
/// Sets precomputed protocol-specific keys to authenticate with.
/// When set, this option will override `password`.
/// See [`AuthKeys`] for more information.
pub fn auth_keys(&mut self, keys: AuthKeys) -> &mut Config {
self.auth_keys = Some(Box::new(keys));
self
}
/// Gets precomputed protocol-specific keys to authenticate with.
/// if one has been configured with the `auth_keys` method.
pub fn get_auth_keys(&self) -> Option<AuthKeys> {
self.auth_keys.as_deref().copied()
}
/// Sets the name of the database to connect to.
///
/// Defaults to the user.
pub fn dbname(&mut self, dbname: &str) -> &mut Config {
self.dbname = Some(dbname.to_string());
self
}
/// Gets the name of the database to connect to, if one has been configured
/// with the `dbname` method.
pub fn get_dbname(&self) -> Option<&str> {
self.dbname.as_deref()
}
/// Sets command line options used to configure the server.
pub fn options(&mut self, options: &str) -> &mut Config {
self.options = Some(options.to_string());
self
}
/// Gets the command line options used to configure the server, if the
/// options have been set with the `options` method.
pub fn get_options(&self) -> Option<&str> {
self.options.as_deref()
}
/// Sets the value of the `application_name` runtime parameter.
pub fn application_name(&mut self, application_name: &str) -> &mut Config {
self.application_name = Some(application_name.to_string());
self
}
/// Gets the value of the `application_name` runtime parameter, if it has
/// been set with the `application_name` method.
pub fn get_application_name(&self) -> Option<&str> {
self.application_name.as_deref()
}
/// Sets the SSL configuration.
///
/// Defaults to `prefer`.
pub fn ssl_mode(&mut self, ssl_mode: SslMode) -> &mut Config {
self.ssl_mode = ssl_mode;
self
}
/// Gets the SSL configuration.
pub fn get_ssl_mode(&self) -> SslMode {
self.ssl_mode
}
/// Adds a host to the configuration.
///
/// Multiple hosts can be specified by calling this method multiple times, and each will be tried in order.
pub fn host(&mut self, host: &str) -> &mut Config {
self.host.push(Host::Tcp(host.to_string()));
self
}
/// Gets the hosts that have been added to the configuration with `host`.
pub fn get_hosts(&self) -> &[Host] {
&self.host
}
/// Adds a port to the configuration.
///
/// Multiple ports can be specified by calling this method multiple times. There must either be no ports, in which
/// case the default of 5432 is used, a single port, in which it is used for all hosts, or the same number of ports
/// as hosts.
pub fn port(&mut self, port: u16) -> &mut Config {
self.port.push(port);
self
}
/// Gets the ports that have been added to the configuration with `port`.
pub fn get_ports(&self) -> &[u16] {
&self.port
}
/// Sets the timeout applied to socket-level connection attempts.
///
/// Note that hostnames can resolve to multiple IP addresses, and this timeout will apply to each address of each
/// host separately. Defaults to no limit.
pub fn connect_timeout(&mut self, connect_timeout: Duration) -> &mut Config {
self.connect_timeout = Some(connect_timeout);
self
}
/// Gets the connection timeout, if one has been set with the
/// `connect_timeout` method.
pub fn get_connect_timeout(&self) -> Option<&Duration> {
self.connect_timeout.as_ref()
}
/// Sets the requirements of the session.
///
/// This can be used to connect to the primary server in a clustered database rather than one of the read-only
/// secondary servers. Defaults to `Any`.
pub fn target_session_attrs(
&mut self,
target_session_attrs: TargetSessionAttrs,
) -> &mut Config {
self.target_session_attrs = target_session_attrs;
self
}
/// Gets the requirements of the session.
pub fn get_target_session_attrs(&self) -> TargetSessionAttrs {
self.target_session_attrs
}
/// Sets the channel binding behavior.
///
/// Defaults to `prefer`.
pub fn channel_binding(&mut self, channel_binding: ChannelBinding) -> &mut Config {
self.channel_binding = channel_binding;
self
}
/// Gets the channel binding behavior.
pub fn get_channel_binding(&self) -> ChannelBinding {
self.channel_binding
}
/// Set replication mode.
pub fn replication_mode(&mut self, replication_mode: ReplicationMode) -> &mut Config {
self.replication_mode = Some(replication_mode);
self
}
/// Get replication mode.
pub fn get_replication_mode(&self) -> Option<ReplicationMode> {
self.replication_mode
}
/// Set limit for backend messages size.
pub fn max_backend_message_size(&mut self, max_backend_message_size: usize) -> &mut Config {
self.max_backend_message_size = Some(max_backend_message_size);
self
}
/// Get limit for backend messages size.
pub fn get_max_backend_message_size(&self) -> Option<usize> {
self.max_backend_message_size
}
fn param(&mut self, key: &str, value: &str) -> Result<(), Error> {
match key {
"user" => {
self.user(value);
}
"password" => {
self.password(value);
}
"dbname" => {
self.dbname(value);
}
"options" => {
self.options(value);
}
"application_name" => {
self.application_name(value);
}
"sslmode" => {
let mode = match value {
"disable" => SslMode::Disable,
"prefer" => SslMode::Prefer,
"require" => SslMode::Require,
_ => return Err(Error::config_parse(Box::new(InvalidValue("sslmode")))),
};
self.ssl_mode(mode);
}
"host" => {
for host in value.split(',') {
self.host(host);
}
}
"port" => {
for port in value.split(',') {
let port = if port.is_empty() {
5432
} else {
port.parse()
.map_err(|_| Error::config_parse(Box::new(InvalidValue("port"))))?
};
self.port(port);
}
}
"connect_timeout" => {
let timeout = value
.parse::<i64>()
.map_err(|_| Error::config_parse(Box::new(InvalidValue("connect_timeout"))))?;
if timeout > 0 {
self.connect_timeout(Duration::from_secs(timeout as u64));
}
}
"target_session_attrs" => {
let target_session_attrs = match value {
"any" => TargetSessionAttrs::Any,
"read-write" => TargetSessionAttrs::ReadWrite,
_ => {
return Err(Error::config_parse(Box::new(InvalidValue(
"target_session_attrs",
))));
}
};
self.target_session_attrs(target_session_attrs);
}
"channel_binding" => {
let channel_binding = match value {
"disable" => ChannelBinding::Disable,
"prefer" => ChannelBinding::Prefer,
"require" => ChannelBinding::Require,
_ => {
return Err(Error::config_parse(Box::new(InvalidValue(
"channel_binding",
))))
}
};
self.channel_binding(channel_binding);
}
"max_backend_message_size" => {
let limit = value.parse::<usize>().map_err(|_| {
Error::config_parse(Box::new(InvalidValue("max_backend_message_size")))
})?;
if limit > 0 {
self.max_backend_message_size(limit);
}
}
key => {
return Err(Error::config_parse(Box::new(UnknownOption(
key.to_string(),
))));
}
}
Ok(())
}
/// Opens a connection to a PostgreSQL database.
///
/// Requires the `runtime` Cargo feature (enabled by default).
pub async fn connect<T>(
&self,
tls: T,
) -> Result<(Client, Connection<TcpStream, T::Stream>), Error>
where
T: MakeTlsConnect<TcpStream>,
{
connect(tls, self).await
}
/// Connects to a PostgreSQL database over an arbitrary stream.
///
/// All of the settings other than `user`, `password`, `dbname`, `options`, and `application_name` name are ignored.
pub async fn connect_raw<S, T>(
&self,
stream: S,
tls: T,
) -> Result<(Client, Connection<S, T::Stream>), Error>
where
S: AsyncRead + AsyncWrite + Unpin,
T: TlsConnect<S>,
{
connect_raw(stream, tls, self).await
}
}
impl FromStr for Config {
type Err = Error;
fn from_str(s: &str) -> Result<Config, Error> {
match UrlParser::parse(s)? {
Some(config) => Ok(config),
None => Parser::parse(s),
}
}
}
// Omit password from debug output
impl fmt::Debug for Config {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
struct Redaction {}
impl fmt::Debug for Redaction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "_")
}
}
f.debug_struct("Config")
.field("user", &self.user)
.field("password", &self.password.as_ref().map(|_| Redaction {}))
.field("dbname", &self.dbname)
.field("options", &self.options)
.field("application_name", &self.application_name)
.field("ssl_mode", &self.ssl_mode)
.field("host", &self.host)
.field("port", &self.port)
.field("connect_timeout", &self.connect_timeout)
.field("target_session_attrs", &self.target_session_attrs)
.field("channel_binding", &self.channel_binding)
.field("replication", &self.replication_mode)
.finish()
}
}
#[derive(Debug)]
struct UnknownOption(String);
impl fmt::Display for UnknownOption {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(fmt, "unknown option `{}`", self.0)
}
}
impl error::Error for UnknownOption {}
#[derive(Debug)]
struct InvalidValue(&'static str);
impl fmt::Display for InvalidValue {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(fmt, "invalid value for option `{}`", self.0)
}
}
impl error::Error for InvalidValue {}
struct Parser<'a> {
s: &'a str,
it: iter::Peekable<str::CharIndices<'a>>,
}
impl<'a> Parser<'a> {
fn parse(s: &'a str) -> Result<Config, Error> {
let mut parser = Parser {
s,
it: s.char_indices().peekable(),
};
let mut config = Config::new();
while let Some((key, value)) = parser.parameter()? {
config.param(key, &value)?;
}
Ok(config)
}
fn skip_ws(&mut self) {
self.take_while(char::is_whitespace);
}
fn take_while<F>(&mut self, f: F) -> &'a str
where
F: Fn(char) -> bool,
{
let start = match self.it.peek() {
Some(&(i, _)) => i,
None => return "",
};
loop {
match self.it.peek() {
Some(&(_, c)) if f(c) => {
self.it.next();
}
Some(&(i, _)) => return &self.s[start..i],
None => return &self.s[start..],
}
}
}
fn eat(&mut self, target: char) -> Result<(), Error> {
match self.it.next() {
Some((_, c)) if c == target => Ok(()),
Some((i, c)) => {
let m = format!(
"unexpected character at byte {}: expected `{}` but got `{}`",
i, target, c
);
Err(Error::config_parse(m.into()))
}
None => Err(Error::config_parse("unexpected EOF".into())),
}
}
fn eat_if(&mut self, target: char) -> bool {
match self.it.peek() {
Some(&(_, c)) if c == target => {
self.it.next();
true
}
_ => false,
}
}
fn keyword(&mut self) -> Option<&'a str> {
let s = self.take_while(|c| match c {
c if c.is_whitespace() => false,
'=' => false,
_ => true,
});
if s.is_empty() {
None
} else {
Some(s)
}
}
fn value(&mut self) -> Result<String, Error> {
let value = if self.eat_if('\'') {
let value = self.quoted_value()?;
self.eat('\'')?;
value
} else {
self.simple_value()?
};
Ok(value)
}
fn simple_value(&mut self) -> Result<String, Error> {
let mut value = String::new();
while let Some(&(_, c)) = self.it.peek() {
if c.is_whitespace() {
break;
}
self.it.next();
if c == '\\' {
if let Some((_, c2)) = self.it.next() {
value.push(c2);
}
} else {
value.push(c);
}
}
if value.is_empty() {
return Err(Error::config_parse("unexpected EOF".into()));
}
Ok(value)
}
fn quoted_value(&mut self) -> Result<String, Error> {
let mut value = String::new();
while let Some(&(_, c)) = self.it.peek() {
if c == '\'' {
return Ok(value);
}
self.it.next();
if c == '\\' {
if let Some((_, c2)) = self.it.next() {
value.push(c2);
}
} else {
value.push(c);
}
}
Err(Error::config_parse(
"unterminated quoted connection parameter value".into(),
))
}
fn parameter(&mut self) -> Result<Option<(&'a str, String)>, Error> {
self.skip_ws();
let keyword = match self.keyword() {
Some(keyword) => keyword,
None => return Ok(None),
};
self.skip_ws();
self.eat('=')?;
self.skip_ws();
let value = self.value()?;
Ok(Some((keyword, value)))
}
}
// This is a pretty sloppy "URL" parser, but it matches the behavior of libpq, where things really aren't very strict
struct UrlParser<'a> {
s: &'a str,
config: Config,
}
impl<'a> UrlParser<'a> {
fn parse(s: &'a str) -> Result<Option<Config>, Error> {
let s = match Self::remove_url_prefix(s) {
Some(s) => s,
None => return Ok(None),
};
let mut parser = UrlParser {
s,
config: Config::new(),
};
parser.parse_credentials()?;
parser.parse_host()?;
parser.parse_path()?;
parser.parse_params()?;
Ok(Some(parser.config))
}
fn remove_url_prefix(s: &str) -> Option<&str> {
for prefix in &["postgres://", "postgresql://"] {
if let Some(stripped) = s.strip_prefix(prefix) {
return Some(stripped);
}
}
None
}
fn take_until(&mut self, end: &[char]) -> Option<&'a str> {
match self.s.find(end) {
Some(pos) => {
let (head, tail) = self.s.split_at(pos);
self.s = tail;
Some(head)
}
None => None,
}
}
fn take_all(&mut self) -> &'a str {
mem::take(&mut self.s)
}
fn eat_byte(&mut self) {
self.s = &self.s[1..];
}
fn parse_credentials(&mut self) -> Result<(), Error> {
let creds = match self.take_until(&['@']) {
Some(creds) => creds,
None => return Ok(()),
};
self.eat_byte();
let mut it = creds.splitn(2, ':');
let user = self.decode(it.next().unwrap())?;
self.config.user(&user);
if let Some(password) = it.next() {
let password = Cow::from(percent_encoding::percent_decode(password.as_bytes()));
self.config.password(password);
}
Ok(())
}
fn parse_host(&mut self) -> Result<(), Error> {
let host = match self.take_until(&['/', '?']) {
Some(host) => host,
None => self.take_all(),
};
if host.is_empty() {
return Ok(());
}
for chunk in host.split(',') {
let (host, port) = if chunk.starts_with('[') {
let idx = match chunk.find(']') {
Some(idx) => idx,
None => return Err(Error::config_parse(InvalidValue("host").into())),
};
let host = &chunk[1..idx];
let remaining = &chunk[idx + 1..];
let port = if let Some(port) = remaining.strip_prefix(':') {
Some(port)
} else if remaining.is_empty() {
None
} else {
return Err(Error::config_parse(InvalidValue("host").into()));
};
(host, port)
} else {
let mut it = chunk.splitn(2, ':');
(it.next().unwrap(), it.next())
};
self.host_param(host)?;
let port = self.decode(port.unwrap_or("5432"))?;
self.config.param("port", &port)?;
}
Ok(())
}
fn parse_path(&mut self) -> Result<(), Error> {
if !self.s.starts_with('/') {
return Ok(());
}
self.eat_byte();
let dbname = match self.take_until(&['?']) {
Some(dbname) => dbname,
None => self.take_all(),
};
if !dbname.is_empty() {
self.config.dbname(&self.decode(dbname)?);
}
Ok(())
}
fn parse_params(&mut self) -> Result<(), Error> {
if !self.s.starts_with('?') {
return Ok(());
}
self.eat_byte();
while !self.s.is_empty() {
let key = match self.take_until(&['=']) {
Some(key) => self.decode(key)?,
None => return Err(Error::config_parse("unterminated parameter".into())),
};
self.eat_byte();
let value = match self.take_until(&['&']) {
Some(value) => {
self.eat_byte();
value
}
None => self.take_all(),
};
if key == "host" {
self.host_param(value)?;
} else {
let value = self.decode(value)?;
self.config.param(&key, &value)?;
}
}
Ok(())
}
fn host_param(&mut self, s: &str) -> Result<(), Error> {
let s = self.decode(s)?;
self.config.param("host", &s)
}
fn decode(&self, s: &'a str) -> Result<Cow<'a, str>, Error> {
percent_encoding::percent_decode(s.as_bytes())
.decode_utf8()
.map_err(|e| Error::config_parse(e.into()))
}
}

View File

@@ -0,0 +1,112 @@
use crate::client::SocketConfig;
use crate::config::{Host, TargetSessionAttrs};
use crate::connect_raw::connect_raw;
use crate::connect_socket::connect_socket;
use crate::tls::{MakeTlsConnect, TlsConnect};
use crate::{Client, Config, Connection, Error, SimpleQueryMessage};
use futures_util::{future, pin_mut, Future, FutureExt, Stream};
use std::io;
use std::task::Poll;
use tokio::net::TcpStream;
pub async fn connect<T>(
mut tls: T,
config: &Config,
) -> Result<(Client, Connection<TcpStream, T::Stream>), Error>
where
T: MakeTlsConnect<TcpStream>,
{
if config.host.is_empty() {
return Err(Error::config("host missing".into()));
}
if config.port.len() > 1 && config.port.len() != config.host.len() {
return Err(Error::config("invalid number of ports".into()));
}
let mut error = None;
for (i, host) in config.host.iter().enumerate() {
let port = config
.port
.get(i)
.or_else(|| config.port.first())
.copied()
.unwrap_or(5432);
let hostname = match host {
Host::Tcp(host) => host.as_str(),
};
let tls = tls
.make_tls_connect(hostname)
.map_err(|e| Error::tls(e.into()))?;
match connect_once(host, port, tls, config).await {
Ok((client, connection)) => return Ok((client, connection)),
Err(e) => error = Some(e),
}
}
Err(error.unwrap())
}
async fn connect_once<T>(
host: &Host,
port: u16,
tls: T,
config: &Config,
) -> Result<(Client, Connection<TcpStream, T::Stream>), Error>
where
T: TlsConnect<TcpStream>,
{
let socket = connect_socket(host, port, config.connect_timeout).await?;
let (mut client, mut connection) = connect_raw(socket, tls, config).await?;
if let TargetSessionAttrs::ReadWrite = config.target_session_attrs {
let rows = client.simple_query_raw("SHOW transaction_read_only");
pin_mut!(rows);
let rows = future::poll_fn(|cx| {
if connection.poll_unpin(cx)?.is_ready() {
return Poll::Ready(Err(Error::closed()));
}
rows.as_mut().poll(cx)
})
.await?;
pin_mut!(rows);
loop {
let next = future::poll_fn(|cx| {
if connection.poll_unpin(cx)?.is_ready() {
return Poll::Ready(Some(Err(Error::closed())));
}
rows.as_mut().poll_next(cx)
});
match next.await.transpose()? {
Some(SimpleQueryMessage::Row(row)) => {
if row.try_get(0)? == Some("on") {
return Err(Error::connect(io::Error::new(
io::ErrorKind::PermissionDenied,
"database does not allow writes",
)));
} else {
break;
}
}
Some(_) => {}
None => return Err(Error::unexpected_message()),
}
}
}
client.set_socket_config(SocketConfig {
host: host.clone(),
port,
connect_timeout: config.connect_timeout,
});
Ok((client, connection))
}

View File

@@ -0,0 +1,359 @@
use crate::codec::{BackendMessage, BackendMessages, FrontendMessage, PostgresCodec};
use crate::config::{self, AuthKeys, Config, ReplicationMode};
use crate::connect_tls::connect_tls;
use crate::maybe_tls_stream::MaybeTlsStream;
use crate::tls::{TlsConnect, TlsStream};
use crate::{Client, Connection, Error};
use bytes::BytesMut;
use fallible_iterator::FallibleIterator;
use futures_util::{ready, Sink, SinkExt, Stream, TryStreamExt};
use postgres_protocol2::authentication;
use postgres_protocol2::authentication::sasl;
use postgres_protocol2::authentication::sasl::ScramSha256;
use postgres_protocol2::message::backend::{AuthenticationSaslBody, Message};
use postgres_protocol2::message::frontend;
use std::collections::{HashMap, VecDeque};
use std::io;
use std::pin::Pin;
use std::task::{Context, Poll};
use tokio::io::{AsyncRead, AsyncWrite};
use tokio::sync::mpsc;
use tokio_util::codec::Framed;
pub struct StartupStream<S, T> {
inner: Framed<MaybeTlsStream<S, T>, PostgresCodec>,
buf: BackendMessages,
delayed: VecDeque<BackendMessage>,
}
impl<S, T> Sink<FrontendMessage> for StartupStream<S, T>
where
S: AsyncRead + AsyncWrite + Unpin,
T: AsyncRead + AsyncWrite + Unpin,
{
type Error = io::Error;
fn poll_ready(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
Pin::new(&mut self.inner).poll_ready(cx)
}
fn start_send(mut self: Pin<&mut Self>, item: FrontendMessage) -> io::Result<()> {
Pin::new(&mut self.inner).start_send(item)
}
fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
Pin::new(&mut self.inner).poll_flush(cx)
}
fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
Pin::new(&mut self.inner).poll_close(cx)
}
}
impl<S, T> Stream for StartupStream<S, T>
where
S: AsyncRead + AsyncWrite + Unpin,
T: AsyncRead + AsyncWrite + Unpin,
{
type Item = io::Result<Message>;
fn poll_next(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Option<io::Result<Message>>> {
loop {
match self.buf.next() {
Ok(Some(message)) => return Poll::Ready(Some(Ok(message))),
Ok(None) => {}
Err(e) => return Poll::Ready(Some(Err(e))),
}
match ready!(Pin::new(&mut self.inner).poll_next(cx)) {
Some(Ok(BackendMessage::Normal { messages, .. })) => self.buf = messages,
Some(Ok(BackendMessage::Async(message))) => return Poll::Ready(Some(Ok(message))),
Some(Err(e)) => return Poll::Ready(Some(Err(e))),
None => return Poll::Ready(None),
}
}
}
}
pub async fn connect_raw<S, T>(
stream: S,
tls: T,
config: &Config,
) -> Result<(Client, Connection<S, T::Stream>), Error>
where
S: AsyncRead + AsyncWrite + Unpin,
T: TlsConnect<S>,
{
let stream = connect_tls(stream, config.ssl_mode, tls).await?;
let mut stream = StartupStream {
inner: Framed::new(
stream,
PostgresCodec {
max_message_size: config.max_backend_message_size,
},
),
buf: BackendMessages::empty(),
delayed: VecDeque::new(),
};
startup(&mut stream, config).await?;
authenticate(&mut stream, config).await?;
let (process_id, secret_key, parameters) = read_info(&mut stream).await?;
let (sender, receiver) = mpsc::unbounded_channel();
let client = Client::new(sender, config.ssl_mode, process_id, secret_key);
let connection = Connection::new(stream.inner, stream.delayed, parameters, receiver);
Ok((client, connection))
}
async fn startup<S, T>(stream: &mut StartupStream<S, T>, config: &Config) -> Result<(), Error>
where
S: AsyncRead + AsyncWrite + Unpin,
T: AsyncRead + AsyncWrite + Unpin,
{
let mut params = vec![("client_encoding", "UTF8")];
if let Some(user) = &config.user {
params.push(("user", &**user));
}
if let Some(dbname) = &config.dbname {
params.push(("database", &**dbname));
}
if let Some(options) = &config.options {
params.push(("options", &**options));
}
if let Some(application_name) = &config.application_name {
params.push(("application_name", &**application_name));
}
if let Some(replication_mode) = &config.replication_mode {
match replication_mode {
ReplicationMode::Physical => params.push(("replication", "true")),
ReplicationMode::Logical => params.push(("replication", "database")),
}
}
let mut buf = BytesMut::new();
frontend::startup_message(params, &mut buf).map_err(Error::encode)?;
stream
.send(FrontendMessage::Raw(buf.freeze()))
.await
.map_err(Error::io)
}
async fn authenticate<S, T>(stream: &mut StartupStream<S, T>, config: &Config) -> Result<(), Error>
where
S: AsyncRead + AsyncWrite + Unpin,
T: TlsStream + Unpin,
{
match stream.try_next().await.map_err(Error::io)? {
Some(Message::AuthenticationOk) => {
can_skip_channel_binding(config)?;
return Ok(());
}
Some(Message::AuthenticationCleartextPassword) => {
can_skip_channel_binding(config)?;
let pass = config
.password
.as_ref()
.ok_or_else(|| Error::config("password missing".into()))?;
authenticate_password(stream, pass).await?;
}
Some(Message::AuthenticationMd5Password(body)) => {
can_skip_channel_binding(config)?;
let user = config
.user
.as_ref()
.ok_or_else(|| Error::config("user missing".into()))?;
let pass = config
.password
.as_ref()
.ok_or_else(|| Error::config("password missing".into()))?;
let output = authentication::md5_hash(user.as_bytes(), pass, body.salt());
authenticate_password(stream, output.as_bytes()).await?;
}
Some(Message::AuthenticationSasl(body)) => {
authenticate_sasl(stream, body, config).await?;
}
Some(Message::AuthenticationKerberosV5)
| Some(Message::AuthenticationScmCredential)
| Some(Message::AuthenticationGss)
| Some(Message::AuthenticationSspi) => {
return Err(Error::authentication(
"unsupported authentication method".into(),
))
}
Some(Message::ErrorResponse(body)) => return Err(Error::db(body)),
Some(_) => return Err(Error::unexpected_message()),
None => return Err(Error::closed()),
}
match stream.try_next().await.map_err(Error::io)? {
Some(Message::AuthenticationOk) => Ok(()),
Some(Message::ErrorResponse(body)) => Err(Error::db(body)),
Some(_) => Err(Error::unexpected_message()),
None => Err(Error::closed()),
}
}
fn can_skip_channel_binding(config: &Config) -> Result<(), Error> {
match config.channel_binding {
config::ChannelBinding::Disable | config::ChannelBinding::Prefer => Ok(()),
config::ChannelBinding::Require => Err(Error::authentication(
"server did not use channel binding".into(),
)),
}
}
async fn authenticate_password<S, T>(
stream: &mut StartupStream<S, T>,
password: &[u8],
) -> Result<(), Error>
where
S: AsyncRead + AsyncWrite + Unpin,
T: AsyncRead + AsyncWrite + Unpin,
{
let mut buf = BytesMut::new();
frontend::password_message(password, &mut buf).map_err(Error::encode)?;
stream
.send(FrontendMessage::Raw(buf.freeze()))
.await
.map_err(Error::io)
}
async fn authenticate_sasl<S, T>(
stream: &mut StartupStream<S, T>,
body: AuthenticationSaslBody,
config: &Config,
) -> Result<(), Error>
where
S: AsyncRead + AsyncWrite + Unpin,
T: TlsStream + Unpin,
{
let mut has_scram = false;
let mut has_scram_plus = false;
let mut mechanisms = body.mechanisms();
while let Some(mechanism) = mechanisms.next().map_err(Error::parse)? {
match mechanism {
sasl::SCRAM_SHA_256 => has_scram = true,
sasl::SCRAM_SHA_256_PLUS => has_scram_plus = true,
_ => {}
}
}
let channel_binding = stream
.inner
.get_ref()
.channel_binding()
.tls_server_end_point
.filter(|_| config.channel_binding != config::ChannelBinding::Disable)
.map(sasl::ChannelBinding::tls_server_end_point);
let (channel_binding, mechanism) = if has_scram_plus {
match channel_binding {
Some(channel_binding) => (channel_binding, sasl::SCRAM_SHA_256_PLUS),
None => (sasl::ChannelBinding::unsupported(), sasl::SCRAM_SHA_256),
}
} else if has_scram {
match channel_binding {
Some(_) => (sasl::ChannelBinding::unrequested(), sasl::SCRAM_SHA_256),
None => (sasl::ChannelBinding::unsupported(), sasl::SCRAM_SHA_256),
}
} else {
return Err(Error::authentication("unsupported SASL mechanism".into()));
};
if mechanism != sasl::SCRAM_SHA_256_PLUS {
can_skip_channel_binding(config)?;
}
let mut scram = if let Some(AuthKeys::ScramSha256(keys)) = config.get_auth_keys() {
ScramSha256::new_with_keys(keys, channel_binding)
} else if let Some(password) = config.get_password() {
ScramSha256::new(password, channel_binding)
} else {
return Err(Error::config("password or auth keys missing".into()));
};
let mut buf = BytesMut::new();
frontend::sasl_initial_response(mechanism, scram.message(), &mut buf).map_err(Error::encode)?;
stream
.send(FrontendMessage::Raw(buf.freeze()))
.await
.map_err(Error::io)?;
let body = match stream.try_next().await.map_err(Error::io)? {
Some(Message::AuthenticationSaslContinue(body)) => body,
Some(Message::ErrorResponse(body)) => return Err(Error::db(body)),
Some(_) => return Err(Error::unexpected_message()),
None => return Err(Error::closed()),
};
scram
.update(body.data())
.await
.map_err(|e| Error::authentication(e.into()))?;
let mut buf = BytesMut::new();
frontend::sasl_response(scram.message(), &mut buf).map_err(Error::encode)?;
stream
.send(FrontendMessage::Raw(buf.freeze()))
.await
.map_err(Error::io)?;
let body = match stream.try_next().await.map_err(Error::io)? {
Some(Message::AuthenticationSaslFinal(body)) => body,
Some(Message::ErrorResponse(body)) => return Err(Error::db(body)),
Some(_) => return Err(Error::unexpected_message()),
None => return Err(Error::closed()),
};
scram
.finish(body.data())
.map_err(|e| Error::authentication(e.into()))?;
Ok(())
}
async fn read_info<S, T>(
stream: &mut StartupStream<S, T>,
) -> Result<(i32, i32, HashMap<String, String>), Error>
where
S: AsyncRead + AsyncWrite + Unpin,
T: AsyncRead + AsyncWrite + Unpin,
{
let mut process_id = 0;
let mut secret_key = 0;
let mut parameters = HashMap::new();
loop {
match stream.try_next().await.map_err(Error::io)? {
Some(Message::BackendKeyData(body)) => {
process_id = body.process_id();
secret_key = body.secret_key();
}
Some(Message::ParameterStatus(body)) => {
parameters.insert(
body.name().map_err(Error::parse)?.to_string(),
body.value().map_err(Error::parse)?.to_string(),
);
}
Some(msg @ Message::NoticeResponse(_)) => {
stream.delayed.push_back(BackendMessage::Async(msg))
}
Some(Message::ReadyForQuery(_)) => return Ok((process_id, secret_key, parameters)),
Some(Message::ErrorResponse(body)) => return Err(Error::db(body)),
Some(_) => return Err(Error::unexpected_message()),
None => return Err(Error::closed()),
}
}
}

View File

@@ -0,0 +1,65 @@
use crate::config::Host;
use crate::Error;
use std::future::Future;
use std::io;
use std::time::Duration;
use tokio::net::{self, TcpStream};
use tokio::time;
pub(crate) async fn connect_socket(
host: &Host,
port: u16,
connect_timeout: Option<Duration>,
) -> Result<TcpStream, Error> {
match host {
Host::Tcp(host) => {
let addrs = net::lookup_host((&**host, port))
.await
.map_err(Error::connect)?;
let mut last_err = None;
for addr in addrs {
let stream =
match connect_with_timeout(TcpStream::connect(addr), connect_timeout).await {
Ok(stream) => stream,
Err(e) => {
last_err = Some(e);
continue;
}
};
stream.set_nodelay(true).map_err(Error::connect)?;
return Ok(stream);
}
Err(last_err.unwrap_or_else(|| {
Error::connect(io::Error::new(
io::ErrorKind::InvalidInput,
"could not resolve any addresses",
))
}))
}
}
}
async fn connect_with_timeout<F, T>(connect: F, timeout: Option<Duration>) -> Result<T, Error>
where
F: Future<Output = io::Result<T>>,
{
match timeout {
Some(timeout) => match time::timeout(timeout, connect).await {
Ok(Ok(socket)) => Ok(socket),
Ok(Err(e)) => Err(Error::connect(e)),
Err(_) => Err(Error::connect(io::Error::new(
io::ErrorKind::TimedOut,
"connection timed out",
))),
},
None => match connect.await {
Ok(socket) => Ok(socket),
Err(e) => Err(Error::connect(e)),
},
}
}

Some files were not shown because too many files have changed in this diff Show More