Compare commits

..

171 Commits

Author SHA1 Message Date
Christian Schwarz
c4f03a225c latency recorder sketches 2025-01-23 08:51:24 +01:00
Christian Schwarz
214ce815bc sketch cancellation 2025-01-22 19:19:42 +01:00
Christian Schwarz
f5b2eee23d prototype IoConcurrency propagation 2025-01-22 19:13:34 +01:00
Christian Schwarz
4b2a91cb5a sketch propagation through request context 2025-01-22 19:13:34 +01:00
Christian Schwarz
a6660a2883 without request context 2025-01-22 18:32:02 +01:00
Christian Schwarz
b0b9206908 Merge remote-tracking branch 'origin/main' into vlad/read-path-concurrent-io
Conflicts:
	pageserver/src/tenant/timeline.rs
	test_runner/fixtures/neon_fixtures.py
2025-01-22 14:31:12 +01:00
Christian Schwarz
4298e77f7a run unit tests in both modes 2025-01-22 14:30:01 +01:00
Christian Schwarz
dbb88cc59e test_get_vectored: don't parametrize inside the test, instead, use spawn_for_test like we do in all the other tests
This is a remnant from the early times of this PR.
2025-01-22 12:55:16 +01:00
Christian Schwarz
c5af3c576e the previous patch didn't cover test_version_mismatch; this one is far more universal 2025-01-22 01:03:34 +01:00
Christian Schwarz
3526d9aad3 pass forward compatibility 2025-01-22 00:40:19 +01:00
Christian Schwarz
a501095c5a fixup(commit b2dbc47b31 initial logical size calculation wasn't polled to completion; fix that, to make tests pass)
(requires previous commit)
2025-01-21 23:22:31 +01:00
Christian Schwarz
728052bd2e pausable_failpoint: add ability to provide a cancel flag, similar to what we have for sleep 2025-01-21 23:16:53 +01:00
Christian Schwarz
361210f8dc actually enable concurrent IO (and batching!) by default in test suite (still need to figure out how to not make compat test break)
https://neondb.slack.com/archives/C059ZC138NR/p1737490501941309
2025-01-21 21:27:21 +01:00
Christian Schwarz
925dd17fb8 Revert "debug why CI tests don't run with sidecar-task"
This reverts commit a528b325ee.
2025-01-21 20:51:48 +01:00
Christian Schwarz
fe615520dd remove the timing histograms for traversal and walredo, since their meaning and utility is dubious with concurrent IO; https://github.com/neondatabase/neon/pull/9353#discussion_r1924181713
The issue is that get_vectored_reconstruct_data latency means something
very different now with concurrent IO than what it did before, because
all the time we spend on the data blocks is no longer part of the
get_vectored_reconstruct_data().await wall clock time

GET_RECONSTRUCT_DATA_TIME : all the 3 dashboards that use it  are in my /personal/christian folder. I guess I'm free to break them 😄
https://github.com/search?q=repo%3Aneondatabase%2Fgrafana-dashboard-export%20pageserver_getpage_get_reconstruct_data_seconds&type=code

RECONSTRUCT_TIME
Used in a couple of dashboards I think nobody uses
- Timeline Inspector
- Sharding WAL streaming
- Pageserver
- walredo time throaway

Vlad agrees with removing them for now.
Maybe in the future we'll add some back

pageserver_getpage_get_reconstruct_data_seconds -> pageserver_getpage_io_plan_seconds
pageserver_getpage_reconstruct_data_seconds -> pageserver_getpage_io_execute_seconds
2025-01-21 20:32:15 +01:00
Christian Schwarz
d6cdf1b13f debug assertion on correct record order: https://github.com/neondatabase/neon/pull/9353#discussion_r1923801121 2025-01-21 20:21:35 +01:00
Christian Schwarz
b2dbc47b31 initial logical size calculation wasn't polled to completion; fix that, to make tests pass
(see prev commit for stack trace)

CI test failures

https://neon-github-public-dev.s3.amazonaws.com/reports/pr-9353/12892883355/index.html#suites/a1c2be32556270764423c495fad75d47/92cacda354b63fd7/
2025-01-21 20:09:39 +01:00
Christian Schwarz
a69dcadba4 test failure
christian@neon-hetzner-dev-christian:[~/src/neon-work-1]: NEON_PAGESERVER_USE_ONE_RUNTIME=current_thread DEFAULT_PG_VERSION=14 BUILD_TYPE=release poetry run pytest -k 'test_ancestor_detach_branched_from[release-pg14-False-True-after]'

2025-01-21T18:42:38.794431Z  WARN initial_size_calculation{tenant_id=cb106e50ddedc20995b0b1bb065ebcd9 shard_id=0000 timeline_id=e362ff10e7c4e116baee457de5c766d9}:logical_size_calculation_task: dropping ValuesReconstructState while some IOs have not been completed num_active_ios=1 sidecar_task_id=None backtrace=   0: <pageserver::tenant::storage_layer::ValuesReconstructState as core::ops::drop::Drop>::drop
             at /home/christian/src/neon-work-1/pageserver/src/tenant/storage_layer.rs:553:24
   1: core::ptr::drop_in_place<pageserver::tenant::storage_layer::ValuesReconstructState>
             at /home/christian/.rustup/toolchains/1.84.0-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ptr/mod.rs:521:1
   2: core::ptr::drop_in_place<pageserver::tenant::timeline::Timeline::get::{{closure}}>
             at /home/christian/src/neon-work-1/pageserver/src/tenant/timeline.rs:1042:5
   3: core::ptr::drop_in_place<pageserver::pgdatadir_mapping::<impl pageserver::tenant::timeline::Timeline>::get_current_logical_size_non_incremental::{{closure}}>
             at /home/christian/src/neon-work-1/pageserver/src/pgdatadir_mapping.rs:1001:67
   4: core::ptr::drop_in_place<pageserver::tenant::timeline::Timeline::calculate_logical_size::{{closure}}>
             at /home/christian/src/neon-work-1/pageserver/src/tenant/timeline.rs:3100:18
   5: core::ptr::drop_in_place<pageserver::tenant::timeline::Timeline::logical_size_calculation_task::{{closure}}::{{closure}}::{{closure}}>
             at /home/christian/src/neon-work-1/pageserver/src/tenant/timeline.rs:3050:22
   6: core::ptr::drop_in_place<pageserver::tenant::timeline::Timeline::logical_size_calculation_task::{{closure}}::{{closure}}>
             at /home/christian/src/neon-work-1/pageserver/src/tenant/timeline.rs:3060:5
2025-01-21 19:43:25 +01:00
Christian Schwarz
c4e0ba38b8 drain spawned IOs if traversal fails; https://github.com/neondatabase/neon/pull/9353/files#r1923589380 2025-01-21 19:33:13 +01:00
Christian Schwarz
14e4fcdb2a delta_layer: remove ignore_key_with_err optimization; https://github.com/neondatabase/neon/pull/9353#discussion_r1920573294
it would cause an assertion failure because we wouldn't be consuming all IOs
2025-01-21 19:11:25 +01:00
Christian Schwarz
1862fdf9e2 clean up doc comment 2025-01-21 19:01:22 +01:00
Christian Schwarz
24e0a3f941 undo the WIP benchmarks, will clean those up and commit in a future PR 2025-01-21 19:00:20 +01:00
Christian Schwarz
a528b325ee debug why CI tests don't run with sidecar-task 2025-01-21 18:53:44 +01:00
Christian Schwarz
a3c756334b lift noisereporting to ValuesReconstructData::drop, it's actually better there
For all high-rooted long-lived IoConcurrency's, IoConcurrency::drop will
never run.

What we actually care about is that we leave no dangling IOs after
get_vectored_impl, which lives much shorter than a high-rooted
IoConcurrency.

However, lifetime of `ValuesReconstructData` is generally == lifetime of
get_vectored_impl.
2025-01-21 18:52:15 +01:00
Christian Schwarz
93625344eb refactor: wrap the oneshot's into a properly named abstraction (OnDiskValueIo, etc) 2025-01-21 17:45:36 +01:00
Christian Schwarz
7ca9112ec1 fix noise 2025-01-21 17:08:03 +01:00
Christian Schwarz
4e72b22b41 make noise from IoConcurrency::drop instead of the task, for more context 2025-01-21 14:51:54 +01:00
Christian Schwarz
4014c390e2 initial logical size calculation can also reasonable use the sidecar because it's concurrency-limited 2025-01-21 12:50:33 +01:00
Christian Schwarz
bca4263eb8 inspect_image_layer can also have an IoConcurrency root, it's tests only 2025-01-21 12:40:22 +01:00
Christian Schwarz
a958febd7a reference issue that will remote hard-coded sequential() 2025-01-21 12:36:23 +01:00
Christian Schwarz
fc27da43ff one more test can do without it 2025-01-21 12:25:30 +01:00
Christian Schwarz
cf2f0c27aa IoConcurrency roots for scan() an tests 2025-01-21 12:21:46 +01:00
Christian Schwarz
f54c5d5596 turns out create_image_layers is easy 2025-01-21 10:47:49 +01:00
Christian Schwarz
ce5452d2e5 followup 0a37164c29: also rename IoConcurrency::serial() 2025-01-21 00:47:37 +01:00
Christian Schwarz
af6c9ffac7 Ok, I now understand why it deadlocked in mode=sidecar-task
The reason is that even in mode=`sidecar-task`, there
are a bunch of places that are serial. Those places obviously deadlock.
2025-01-21 00:41:45 +01:00
Christian Schwarz
081ff26519 fixup 40ab9c2c5e: deadlock
Reproduced by

test_runner/regress/test_branching.py::test_branching_with_pgbench[debug-pg16-flat-1-10]'

It kinda makes sense that this deadlocks in `sequential` mode.

However, it also deadlocks in `sidecar-task` mode.
I don't understand why.
2025-01-20 23:46:56 +01:00
Christian Schwarz
0a37164c29 replace env var with config variable; add test suite fixture env var to override default 2025-01-20 23:46:56 +01:00
Christian Schwarz
0eff09e35f Merge remote-tracking branch 'origin/main' into vlad/read-path-concurrent-io 2025-01-20 19:47:03 +01:00
Christian Schwarz
cdad6b2de5 we don't need the CancellationToken, the ios_rx.recv() will fail at the same time 2025-01-20 19:13:40 +01:00
Christian Schwarz
82d20b52b3 make noise when dropping an IoConcurrency with unfinished requests 2025-01-20 19:12:00 +01:00
Christian Schwarz
3b1328423e basebackup: fetch all SLRUs of one basebackup using the same IoConcurrency 2025-01-20 16:58:14 +01:00
Christian Schwarz
2eb235e923 doc string explaining why we're deadlock free right now and why it's so brittle 2025-01-17 18:33:34 +01:00
Christian Schwarz
40ab9c2c5e we can avoid adding the Arc<Mutex<>> around EphemeralLayer if we instead extend the lifetime of the InMemoryLayer for the spawned IO future; plus it's semantically more similar to what we now do for Delta and Image layers 2025-01-17 18:16:17 +01:00
Christian Schwarz
c43400389f delta & image layer spawned IOs: keep layer resident until IO is done 2025-01-17 18:00:13 +01:00
Christian Schwarz
65932512c1 run tests with futures-unordered 2025-01-16 20:03:01 +01:00
Christian Schwarz
1866f261e0 make mypy pass 2025-01-16 20:01:42 +01:00
Christian Schwarz
7c662b771a Merge branch 'problame/hung-shutdown/fix' into vlad/read-path-concurrent-io 2025-01-16 19:22:38 +01:00
Christian Schwarz
8f40bd4eb3 there is no Error Fe message -,- 2025-01-16 19:21:44 +01:00
Christian Schwarz
d2f8342080 Merge branch 'problame/hung-shutdown/fix' into vlad/read-path-concurrent-io 2025-01-16 18:16:36 +01:00
Christian Schwarz
92e4dd7ffa script: template NEON_REPO_DIR 2025-01-16 18:14:34 +01:00
Christian Schwarz
0c3ab9c494 move test message tag to 99 and represent Fe message tag as enum, like we do for Be message 2025-01-16 18:07:56 +01:00
Christian Schwarz
c19a16792a address nit ; https://github.com/neondatabase/neon/pull/10386#discussion_r1918782034 2025-01-16 17:54:14 +01:00
Christian Schwarz
cf75eb7d86 Revert "hacky experiment: what if we had more walredo procs => doesn't move the needle on throughput"
This reverts commit 9fffe6e60d.
2025-01-16 16:46:49 +01:00
Christian Schwarz
6ededa17e2 Revert "experiment: buffered socket with 128k buffer size; not super needle-moving"
This reverts commit 7e13e5fc4a.
2025-01-16 16:42:10 +01:00
Christian Schwarz
7e13e5fc4a experiment: buffered socket with 128k buffer size; not super needle-moving 2025-01-16 16:42:01 +01:00
Christian Schwarz
45358bcb65 in the deepl_layers_with_delta script, make the stack height an argument 2025-01-16 16:41:15 +01:00
Christian Schwarz
9fffe6e60d hacky experiment: what if we had more walredo procs => doesn't move the needle on throughput 2025-01-16 13:58:23 +01:00
Christian Schwarz
2ff0a4ae82 extract the l0stack generator into a reusable python module 2025-01-16 13:24:34 +01:00
Christian Schwarz
66c0df8109 doc comment on BatchedFeMessage explaining WeakHandle; https://github.com/neondatabase/neon/pull/10386#discussion_r1916968951 2025-01-15 21:50:00 +01:00
Christian Schwarz
9fe77c527f inline get_impl; https://github.com/neondatabase/neon/pull/10386#discussion_r1916939623 2025-01-15 21:47:39 +01:00
Christian Schwarz
7fb4595c7e fix: WeakHandle was holding on to the Timeline allocation
This made test_timeline_deletion_with_files_stuck_in_upload_queue fail
because the RemoteTimelineClient was being kept alive.

The fix is to stop keeping the timeline alive from WeakHandle.
2025-01-15 21:46:37 +01:00
Christian Schwarz
350dc251df test case demonstrates the issue: we hod Timeline object alive
--- STDERR:              pageserver tenant::timeline::handle::tests::test_weak_handles ---
thread 'tenant::timeline::handle::tests::test_weak_handles' panicked at pageserver/src/tenant/timeline/handle.rs:1131:9:
assertion `left == right` failed
  left: 3
 right: 2
2025-01-15 21:46:30 +01:00
Christian Schwarz
5b77a6d3ce address clippy 2025-01-15 19:38:21 +01:00
Christian Schwarz
8c5005ff59 rename IoConcurrency::{todo=>serial} and remove deprecation warning 2025-01-15 19:38:05 +01:00
Christian Schwarz
f8218ac5fc Revert "investigation: add log_if_slow => shows that the io_futures are slow"
This reverts commit e81fa7137e.
2025-01-15 19:34:37 +01:00
Christian Schwarz
40470c66cd remove opportunistic poll, it seems slightly beneficial for perf
esp before I remembered to configure pipelining, the unpipelined
configuration achieved ~10% higher tput.

In any way, makes sense to not do the opportunisitc polling because
it registers the wrong waker.
2025-01-15 19:34:05 +01:00
Christian Schwarz
9b9479881a extend script with instructions to configure batching 2025-01-15 19:30:15 +01:00
Christian Schwarz
af11b201bd now the issue is no longer reproducible, maybe it was the barriers? 2025-01-15 19:10:45 +01:00
Christian Schwarz
8fafff37c5 remove the whole barriers business 2025-01-15 19:00:00 +01:00
Christian Schwarz
e81fa7137e investigation: add log_if_slow => shows that the io_futures are slow 2025-01-15 18:56:07 +01:00
Christian Schwarz
e60738f029 it's reproducible before the merge, so, continuing to investigate and fix here 2025-01-15 18:43:01 +01:00
Christian Schwarz
f75b07a160 I find that if I ever go beyond queue-depth=4, something in the pageserver locks up. 2025-01-15 18:31:40 +01:00
Christian Schwarz
a5524fcf4d add comment to use queue-depthed pagebench to the script 2025-01-15 18:31:29 +01:00
Christian Schwarz
351da2349e Merge branch 'problame/hung-shutdown/fix' into vlad/read-path-concurrent-io 2025-01-15 17:09:02 +01:00
Christian Schwarz
c545d227b9 review doc comment 2025-01-15 16:24:39 +01:00
Christian Schwarz
a4fc6a92c9 fix cargo doc 2025-01-15 16:10:04 +01:00
Christian Schwarz
2205736262 doc comment & one fixup 2025-01-15 14:27:08 +01:00
Christian Schwarz
5f9ddbae2f Merge branch 'problame/hung-shutdown/demo-hypothesis' into problame/hung-shutdown/fix 2025-01-15 00:25:11 +01:00
Christian Schwarz
173f18832c fixup 2025-01-15 00:24:59 +01:00
Christian Schwarz
23bd5833e1 Merge branch 'problame/hung-shutdown/demo-hypothesis' into problame/hung-shutdown/fix 2025-01-15 00:21:54 +01:00
Christian Schwarz
dedd524d7e refinements 2025-01-15 00:21:28 +01:00
Christian Schwarz
0340f00228 post-merge fix the handling of the new pagestream Test message, so that the regression test now passes
non-package-mode-py3.10christian@neon-hetzner-dev-christian:[~/src/neon]: BUILD_TYPE=debug DEFAULT_PG_VERSION=16 poetry run pytest ./test_runner/regress/test_page_service_batching_regressions.py --timeout=0 --pdb
2025-01-14 23:56:35 +01:00
Christian Schwarz
366ff9ffcc Merge branch 'problame/hung-shutdown/demo-hypothesis' into problame/hung-shutdown/fix 2025-01-14 23:51:53 +01:00
Christian Schwarz
a8f9b564be fix cd pageserver && cargo clippy --features testing build 2025-01-14 23:50:22 +01:00
Christian Schwarz
5450e54dab bump ci 2025-01-14 22:47:16 +01:00
Christian Schwarz
53b05c4ba0 cleanups to make CI pass (well, fail because the bug isn't fixed yet) 2025-01-14 22:45:09 +01:00
Christian Schwarz
1f7d173235 Merge remote-tracking branch 'origin/main' into problame/hung-shutdown/demo-hypothesis 2025-01-14 22:33:20 +01:00
Christian Schwarz
8454e19a0f address warnings and such 2025-01-14 22:28:08 +01:00
Christian Schwarz
45e08d0aa5 it repros 2025-01-14 22:16:27 +01:00
Christian Schwarz
9a02bc0cfd try to repro root cause hypothesis for https://github.com/neondatabase/neon/issues/10309
This approach here doesn't work because it slows down all the responses.

the workload() thread gets stuck in auth, prob with zero pipeline depth

  0x00007fa28fe48e63 in epoll_wait () from /lib/x86_64-linux-gnu/libc.so.6
  (gdb) bt
  #0  0x00007fa28fe48e63 in epoll_wait () from /lib/x86_64-linux-gnu/libc.so.6
  #1  0x0000561a285bf44e in WaitEventSetWaitBlock (set=0x561a292723e8, cur_timeout=9999, occurred_events=0x7ffd1f11c970, nevents=1)
      at /home/christian/src/neon//vendor/postgres-v16/src/backend/storage/ipc/latch.c:1535
  #2  0x0000561a285bf338 in WaitEventSetWait (set=0x561a292723e8, timeout=9999, occurred_events=0x7ffd1f11c970, nevents=1, wait_event_info=117440512)
      at /home/christian/src/neon//vendor/postgres-v16/src/backend/storage/ipc/latch.c:1481
  #3  0x00007fa2904a7345 in call_PQgetCopyData (shard_no=0, buffer=0x7ffd1f11cad0) at /home/christian/src/neon//pgxn/neon/libpagestore.c:703
  #4  0x00007fa2904a7aec in pageserver_receive (shard_no=0) at /home/christian/src/neon//pgxn/neon/libpagestore.c:899
  #5  0x00007fa2904af471 in prefetch_read (slot=0x561a292863b0) at /home/christian/src/neon//pgxn/neon/pagestore_smgr.c:644
  #6  0x00007fa2904af26b in prefetch_wait_for (ring_index=0) at /home/christian/src/neon//pgxn/neon/pagestore_smgr.c:596
  #7  0x00007fa2904b489d in neon_read_at_lsnv (rinfo=..., forkNum=MAIN_FORKNUM, base_blockno=0, request_lsns=0x7ffd1f11cd60, buffers=0x7ffd1f11cd30, nblocks=1, mask=0x0)
      at /home/christian/src/neon//pgxn/neon/pagestore_smgr.c:3024
  #8  0x00007fa2904b4f34 in neon_read_at_lsn (rinfo=..., forkNum=MAIN_FORKNUM, blkno=0, request_lsns=..., buffer=0x7fa28b969000) at /home/christian/src/neon//pgxn/neon/pagestore_smgr.c:3104
  #9  0x00007fa2904b515d in neon_read (reln=0x561a292ef448, forkNum=MAIN_FORKNUM, blkno=0, buffer=0x7fa28b969000) at /home/christian/src/neon//pgxn/neon/pagestore_smgr.c:3146
  #10 0x0000561a285f1ed5 in smgrread (reln=0x561a292ef448, forknum=MAIN_FORKNUM, blocknum=0, buffer=0x7fa28b969000) at /home/christian/src/neon//vendor/postgres-v16/src/backend/storage/smgr/smgr.c:567
  #11 0x0000561a285a528a in ReadBuffer_common (smgr=0x561a292ef448, relpersistence=112 'p', forkNum=MAIN_FORKNUM, blockNum=0, mode=RBM_NORMAL, strategy=0x0, hit=0x7ffd1f11cf1b)
      at /home/christian/src/neon//vendor/postgres-v16/src/backend/storage/buffer/bufmgr.c:1134
  #12 0x0000561a285a47e3 in ReadBufferExtended (reln=0x7fa28ce1c888, forkNum=MAIN_FORKNUM, blockNum=0, mode=RBM_NORMAL, strategy=0x0)
      at /home/christian/src/neon//vendor/postgres-v16/src/backend/storage/buffer/bufmgr.c:781
  #13 0x0000561a285a46ad in ReadBuffer (reln=0x7fa28ce1c888, blockNum=0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/storage/buffer/bufmgr.c:715
  #14 0x0000561a2811d511 in _bt_getbuf (rel=0x7fa28ce1c888, blkno=0, access=1) at /home/christian/src/neon//vendor/postgres-v16/src/backend/access/nbtree/nbtpage.c:852
  #15 0x0000561a2811d1b2 in _bt_metaversion (rel=0x7fa28ce1c888, heapkeyspace=0x7ffd1f11d9f0, allequalimage=0x7ffd1f11d9f1) at /home/christian/src/neon//vendor/postgres-v16/src/backend/access/nbtree/nbtpage.c:747
  #16 0x0000561a28126220 in _bt_first (scan=0x561a292d0348, dir=ForwardScanDirection) at /home/christian/src/neon//vendor/postgres-v16/src/backend/access/nbtree/nbtsearch.c:1465
  #17 0x0000561a28121a07 in btgettuple (scan=0x561a292d0348, dir=ForwardScanDirection) at /home/christian/src/neon//vendor/postgres-v16/src/backend/access/nbtree/nbtree.c:246
  #18 0x0000561a28111afa in index_getnext_tid (scan=0x561a292d0348, direction=ForwardScanDirection) at /home/christian/src/neon//vendor/postgres-v16/src/backend/access/index/indexam.c:583
  #19 0x0000561a28111d14 in index_getnext_slot (scan=0x561a292d0348, direction=ForwardScanDirection, slot=0x561a292d01a8) at /home/christian/src/neon//vendor/postgres-v16/src/backend/access/index/indexam.c:675
  #20 0x0000561a2810fbcc in systable_getnext (sysscan=0x561a292d0158) at /home/christian/src/neon//vendor/postgres-v16/src/backend/access/index/genam.c:512
  #21 0x0000561a287a1ee1 in SearchCatCacheMiss (cache=0x561a292a0f80, nkeys=1, hashValue=3028463561, hashIndex=1, v1=94670359561576, v2=0, v3=0, v4=0)
      at /home/christian/src/neon//vendor/postgres-v16/src/backend/utils/cache/catcache.c:1440
  #22 0x0000561a287a1d8a in SearchCatCacheInternal (cache=0x561a292a0f80, nkeys=1, v1=94670359561576, v2=0, v3=0, v4=0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/utils/cache/catcache.c:1360
  #23 0x0000561a287a1a4f in SearchCatCache (cache=0x561a292a0f80, v1=94670359561576, v2=0, v3=0, v4=0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/utils/cache/catcache.c:1214
  #24 0x0000561a287be060 in SearchSysCache (cacheId=10, key1=94670359561576, key2=0, key3=0, key4=0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/utils/cache/syscache.c:817
  #25 0x0000561a287be66f in GetSysCacheOid (cacheId=10, oidcol=1, key1=94670359561576, key2=0, key3=0, key4=0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/utils/cache/syscache.c:1055
  #26 0x0000561a286319a5 in get_role_oid (rolname=0x561a29270568 "cloud_admin", missing_ok=true) at /home/christian/src/neon//vendor/postgres-v16/src/backend/utils/adt/acl.c:5251
  #27 0x0000561a283d42ca in check_hba (port=0x561a29268de0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/libpq/hba.c:2493
  #28 0x0000561a283d5537 in hba_getauthmethod (port=0x561a29268de0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/libpq/hba.c:3067
  #29 0x0000561a283c6fd7 in ClientAuthentication (port=0x561a29268de0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/libpq/auth.c:395
  #30 0x0000561a287dc943 in PerformAuthentication (port=0x561a29268de0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/utils/init/postinit.c:247
  #31 0x0000561a287dd9cd in InitPostgres (in_dbname=0x561a29270588 "postgres", dboid=0, username=0x561a29270568 "cloud_admin", useroid=0, load_session_libraries=true, override_allow_connections=false,
      out_dbname=0x0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/utils/init/postinit.c:929
  #32 0x0000561a285fa10b in PostgresMain (dbname=0x561a29270588 "postgres", username=0x561a29270568 "cloud_admin") at /home/christian/src/neon//vendor/postgres-v16/src/backend/tcop/postgres.c:4293
  #33 0x0000561a28524ce4 in BackendRun (port=0x561a29268de0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/postmaster/postmaster.c:4465
  #34 0x0000561a285245da in BackendStartup (port=0x561a29268de0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/postmaster/postmaster.c:4193
  #35 0x0000561a285209c4 in ServerLoop () at /home/christian/src/neon//vendor/postgres-v16/src/backend/postmaster/postmaster.c:1782
  #36 0x0000561a2852030f in PostmasterMain (argc=3, argv=0x561a291c5fc0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/postmaster/postmaster.c:1466
  #37 0x0000561a283dd987 in main (argc=3, argv=0x561a291c5fc0) at /home/christian/src/neon//vendor/postgres-v16/src/backend/main/main.c:238
2025-01-14 20:42:01 +01:00
Christian Schwarz
9e1cd986d7 address "why mut" nits; https://github.com/neondatabase/neon/pull/10353#discussion_r1913685991 https://github.com/neondatabase/neon/pull/10353#discussion_r1913683065 https://github.com/neondatabase/neon/pull/10353#discussion_r1913683392 2025-01-14 15:30:33 +01:00
Christian Schwarz
47544dcc0b simplify SmgrOpTimerState variant names, and add some doc comments; https://github.com/neondatabase/neon/pull/10353#discussion_r1913676569 and https://github.com/neondatabase/neon/pull/10353#discussion_r1913676824 2025-01-14 15:28:09 +01:00
Christian Schwarz
4e094d9638 rearrange code & inline HandleInner::shutdown() to minimize the diff 2025-01-14 15:12:46 +01:00
Christian Schwarz
c8bee86586 in some early WIP commit we had removed the loop{} inside get(); re-establish it one level down 2025-01-14 15:12:03 +01:00
Christian Schwarz
768a867dcf doc comment fix 2025-01-14 14:54:15 +01:00
Christian Schwarz
3b65465e10 turns out with the switch to sync Mutex there's no reason for upgrade() to be async either 2025-01-14 14:53:41 +01:00
Christian Schwarz
e4ea706424 turns out PerTimelineState::shutdown() doesn't need to be async 2025-01-14 14:48:03 +01:00
Christian Schwarz
7034f54a9e remove the earlier-commented-out assertions on arc reference counts, they were too whiteboxy to begin with 2025-01-14 14:45:25 +01:00
Christian Schwarz
d68c5ddf7e avoid the tokio::sync::Mutex by wrapping the GateGuard into an Arc 2025-01-14 14:23:28 +01:00
Christian Schwarz
b95365b45d Revert "experiment: what if we make Handle !Send so it can't be held across await points"
This reverts commit b44070d0c7.
2025-01-14 13:32:12 +01:00
Christian Schwarz
b44070d0c7 experiment: what if we make Handle !Send so it can't be held across await points
Result: the whole point of having a Handle at hand is to be holding a GateGuard
while performing a Timeline operation. Dont' do it.
2025-01-14 13:30:48 +01:00
Christian Schwarz
6b22acba9b avoid cloning the Arc<Timeline> on every handle upgrade/downgade, by wrapping it in yet another Arc 2025-01-14 12:28:37 +01:00
Christian Schwarz
22058d17d1 it turns out PerTimelineState need not store a Types::Timeline at all 2025-01-14 12:22:00 +01:00
Christian Schwarz
a8d096b72c Revert "WIP experiment: avoid upgrading"
This reverts commit f6eb6fff9f.
2025-01-14 12:18:02 +01:00
Christian Schwarz
f6eb6fff9f WIP experiment: avoid upgrading 2025-01-14 12:14:31 +01:00
Christian Schwarz
c868ceded0 WeakHandle should store weak ref to the GateGuard 2025-01-14 12:08:53 +01:00
Christian Schwarz
e82aa9419e convert handles to named structs 2025-01-14 12:02:25 +01:00
Christian Schwarz
bfe9efefb8 test tenant::timeline::handle::tests::test_timeline_shutdown hangs 2025-01-14 11:56:08 +01:00
Christian Schwarz
62f63275b2 fix test_connection_handler_exit 2025-01-14 11:55:45 +01:00
Christian Schwarz
5762fdbf68 test failure at: 5b45f03aa2/pageserver/src/tenant/timeline/handle.rs (L958) 2025-01-14 11:39:46 +01:00
Christian Schwarz
5b45f03aa2 tests: comment out the strong_count / weak_count assertions, 2025-01-14 11:39:19 +01:00
Christian Schwarz
3fefa5b415 fix warnings 2025-01-14 11:31:58 +01:00
Christian Schwarz
6007a94f91 it compiles 2025-01-14 11:30:43 +01:00
Christian Schwarz
9e03dda0c3 handle downgrade during batching 2025-01-13 15:49:45 +01:00
Christian Schwarz
8a0a0d06a8 renames 2025-01-13 14:38:19 +01:00
Christian Schwarz
dda31b9cb6 adjust shutdown 2025-01-13 14:38:19 +01:00
Christian Schwarz
9591d8789c WIP 2025-01-13 14:27:32 +01:00
Christian Schwarz
ee851d1127 Merge remote-tracking branch 'origin/main' into problame/throttle-before-batching 2025-01-10 20:38:08 +01:00
Christian Schwarz
d6a2b62cfb grand refactor of SmgrOpTimer states 2025-01-10 20:29:44 +01:00
Christian Schwarz
ad5120197c self-review 2025-01-10 20:28:20 +01:00
Christian Schwarz
4d496a29c2 following up to the last commit, the observation points that we use to calculate the various latency metrics are different, adjust for that 2025-01-10 20:26:09 +01:00
Christian Schwarz
8793e28ccb throttling cancel-sensitivity 2025-01-10 20:25:24 +01:00
Christian Schwarz
aa8da1e621 move throttle into pagestream_read_message 2025-01-10 15:35:07 +01:00
Christian Schwarz
c36cf79ab5 page_service client: address todo / unused warning 2025-01-08 18:00:38 +01:00
Christian Schwarz
f6f947e4ec Revert "debug cruft, likely will revert but this proved useful, esp log_if_slow"
This reverts commit 0fd67b27e5.

Bunch of conflicts.
2025-01-07 12:26:04 +01:00
Christian Schwarz
987829e5c2 fix build failures on Linux 2025-01-07 12:13:37 +01:00
Christian Schwarz
c73e8c34c8 Merge remote-tracking branch 'origin/main' into vlad/read-path-concurrent-io 2025-01-07 12:06:52 +01:00
Christian Schwarz
4637389882 remov the "parallel" mode, as we won't ever enable this in practice and benchmarks have shown very limited upside over futures-unordered 2024-12-21 20:44:13 +01:00
Christian Schwarz
d844ed098a fix some minor warnings 2024-12-21 20:37:21 +01:00
Christian Schwarz
d776ee66d7 avoid Arc<Gate> by having clonable GateGuard 2024-12-21 20:33:04 +01:00
Christian Schwarz
dc58846f0c with the new approach we don't even need the no-slots patch of tokio-epoll-uring 2024-12-21 20:33:04 +01:00
Christian Schwarz
3396574d21 cargo fmt 2024-12-21 20:32:53 +01:00
Christian Schwarz
97a01bdcb4 pagebench: option for queue depth 2024-12-17 21:01:22 +01:00
Christian Schwarz
29fecda704 avoid killing walredo 2024-12-17 21:01:22 +01:00
Christian Schwarz
dc1d53f7be name delta stack height variable 2024-12-17 21:01:22 +01:00
Christian Schwarz
8e6c01ddea WIP fix: one task per connections to drive all the IO futures 2024-12-17 21:01:22 +01:00
Christian Schwarz
e209fd8d77 diagnosis: with the debug cruft from previous commit, we see "thread_local_system" hang forever
The reason is likely that one spawned IO futures kicks off
thread_local_system launch, then returns Pending.
Another IO future observes the once cell already locked and waits
for the first future to finish.
But that never happens.

It's a sort of priority inversion.
2024-12-17 18:48:32 +01:00
Christian Schwarz
0fd67b27e5 debug cruft, likely will revert but this proved useful, esp log_if_slow 2024-12-17 17:32:06 +01:00
Christian Schwarz
d962b44c20 fix the script (it stopped making changes beyond 6); this now creates 18-tall delta stacks for each page 2024-12-13 12:43:40 +01:00
Christian Schwarz
31fec1fb4b add the script that I used to generate the delta stack
non-package-mode-py3.10christian@neon-hetzner-dev-christian:[~/src/neon/test_runner]: poetry run python3 deep_layers_with_delta.py
2024-12-12 20:25:16 +01:00
Christian Schwarz
87755bf80e concurrent-futures: poll before pushing into FuturesUnordered (this will do io_uring submission in most cases) 2024-12-12 19:37:23 +01:00
Christian Schwarz
1f7f947119 results
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Benchmark results ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.n_tenants: 1
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pgbench_scale: 136
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.duration: 20 s
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.n_clients: 1
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.config: 0
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pageserver_config_override.page_cache_size: 134217728 byte
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pageserver_config_override.max_file_descriptors: 500000
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.request_count: 47653
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_mean: 0.417 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p95: 0.588 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99: 0.670 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99.9: 0.991 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99.99: 1.896 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.time: 20.1797
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_cpu_seconds_total: 15.1300
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.1: 2209
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.4: 9683
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.8: 16795
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.16: 50022
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.32: 50022
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.64: 50022
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.128: 50022
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.256: 50022
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.512: 50022
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.1024: 50022
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.9223372036854775807: 50022
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.n_tenants: 1
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pgbench_scale: 136
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.duration: 20 s
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.n_clients: 1
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.config: 0
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pageserver_config_override.page_cache_size: 134217728 byte
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pageserver_config_override.max_file_descriptors: 500000
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.request_count: 46773
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_mean: 0.425 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p95: 0.632 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99: 0.837 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99.9: 1.234 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99.99: 2.347 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.time: 20.2244
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_cpu_seconds_total: 15.9300
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.1: 2072
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.4: 9362
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.8: 16558
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.16: 49119
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.32: 49119
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.64: 49119
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.128: 49119
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.256: 49119
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.512: 49119
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.1024: 49119
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.9223372036854775807: 49119
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.n_tenants: 1
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pgbench_scale: 136
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.duration: 20 s
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.n_clients: 1
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.config: 0
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pageserver_config_override.page_cache_size: 134217728 byte
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pageserver_config_override.max_file_descriptors: 500000
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.request_count: 47861
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_mean: 0.416 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p95: 0.609 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99: 0.713 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99.9: 1.135 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99.99: 1.940 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.time: 20.1969
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_cpu_seconds_total: 15.7000
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.1: 2282
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.4: 9597
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.8: 16926
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.16: 50189
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.32: 50189
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.64: 50189
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.128: 50189
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.256: 50189
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.512: 50189
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.1024: 50189
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.counters.pageserver_layers_visited_per_vectored_read_global_buckets.9223372036854775807: 50189

============================================================================================================================================================================================= 3 passed, 236 deselected in 101.20s (0:01:41) ==============================================================================================================================================================================================
2024-12-12 14:51:03 +01:00
Christian Schwarz
8b477ce4ee super hacky way to get layer visit buckets 2024-12-12 14:50:47 +01:00
Christian Schwarz
02d0d89069 run bench on hetzner box
christian@neon-hetzner-dev-christian:[~/src/neon]: NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS=true DEFAULT_PG_VERSION=16 BUILD_TYPE=release poetry run pytest --alluredir ~/tmp/alluredir --clean-alluredir test_runner/performance -k 'test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant' --maxfail=1

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Benchmark results ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.n_tenants: 1
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pgbench_scale: 136
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.duration: 20 s
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.n_clients: 1
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.config: 0
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pageserver_config_override.page_cache_size: 134217728 byte
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pageserver_config_override.max_file_descriptors: 500000
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.request_count: 46336
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_mean: 0.429 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p95: 0.607 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99: 0.705 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99.9: 1.138 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-serial-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99.99: 2.059 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.n_tenants: 1
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pgbench_scale: 136
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.duration: 20 s
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.n_clients: 1
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.config: 0
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pageserver_config_override.page_cache_size: 134217728 byte
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pageserver_config_override.max_file_descriptors: 500000
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.request_count: 48772
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_mean: 0.408 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p95: 0.592 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99: 0.701 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99.9: 1.139 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-parallel-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99.99: 2.231 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.n_tenants: 1
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pgbench_scale: 136
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.duration: 20 s
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.n_clients: 1
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.config: 0
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pageserver_config_override.page_cache_size: 134217728 byte
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.pageserver_config_override.max_file_descriptors: 500000
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.request_count: 47102
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_mean: 0.422 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p95: 0.605 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99: 0.705 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99.9: 1.124 ms
test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant[release-pg16-direct-futures-unordered-1-1-136-20].pageserver_max_throughput_getpage_at_latest_lsn.latency_percentiles.p99.99: 2.029 ms

============================================================================================================================================================================================= 3 passed, 236 deselected in 102.19s (0:01:42) ==============================================================================================================================================================================================
2024-12-12 14:18:51 +01:00
Christian Schwarz
309edebb90 repurpose test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant to measure latency improvement in unbatchable-pagestream but parallelizable workload (multiple layers visited) 2024-12-12 14:18:42 +01:00
Christian Schwarz
80aebce3d6 benc results on my box
christian@neon-hetzner-dev-christian:[~/src/neon]: DEFAULT_PG_VERSION=16 BUILD_TYPE=release poetry run pytest --alluredir ~/tmp/alluredir --clean-alluredir test_runner/performance/pageserver/test_page_service_batching.py -k 'test_throughput' --maxfail=1

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Benchmark results ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].counters.time: 1.0999
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_sum: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_cpu_seconds_total: 1.0450
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].counters.time: 1.2530
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_sum: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_cpu_seconds_total: 1.3367
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config2-5-futures-unordered-direct-100-128-batchable {'mode': 'serial'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config2-5-futures-unordered-direct-100-128-batchable {'mode': 'serial'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config2-5-futures-unordered-direct-100-128-batchable {'mode': 'serial'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config2-5-futures-unordered-direct-100-128-batchable {'mode': 'serial'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config2-5-futures-unordered-direct-100-128-batchable {'mode': 'serial'}].counters.time: 1.0713
test_throughput[release-pg16-50-pipelining_config2-5-futures-unordered-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_sum: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-5-futures-unordered-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-5-futures-unordered-direct-100-128-batchable {'mode': 'serial'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config2-5-futures-unordered-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_cpu_seconds_total: 1.0550
test_throughput[release-pg16-50-pipelining_config2-5-futures-unordered-direct-100-128-batchable {'mode': 'serial'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config3-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config3-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config3-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config3-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config3-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.2825
test_throughput[release-pg16-50-pipelining_config3-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_sum: 6,401.5882
test_throughput[release-pg16-50-pipelining_config3-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_count: 298.1176
test_throughput[release-pg16-50-pipelining_config3-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.5882
test_throughput[release-pg16-50-pipelining_config3-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3012
test_throughput[release-pg16-50-pipelining_config3-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 21.4734
test_throughput[release-pg16-50-pipelining_config4-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config4-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config4-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config4-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config4-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.3162
test_throughput[release-pg16-50-pipelining_config4-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_sum: 6,401.8000
test_throughput[release-pg16-50-pipelining_config4-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_count: 298.5333
test_throughput[release-pg16-50-pipelining_config4-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.8000
test_throughput[release-pg16-50-pipelining_config4-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3227
test_throughput[release-pg16-50-pipelining_config4-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 21.4442
test_throughput[release-pg16-50-pipelining_config5-5-futures-unordered-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config5-5-futures-unordered-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config5-5-futures-unordered-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config5-5-futures-unordered-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config5-5-futures-unordered-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.2842
test_throughput[release-pg16-50-pipelining_config5-5-futures-unordered-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_sum: 6,401.7647
test_throughput[release-pg16-50-pipelining_config5-5-futures-unordered-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_count: 298.1176
test_throughput[release-pg16-50-pipelining_config5-5-futures-unordered-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.7647
test_throughput[release-pg16-50-pipelining_config5-5-futures-unordered-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3135
test_throughput[release-pg16-50-pipelining_config5-5-futures-unordered-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 21.4740

==================================================================================================================================================================================================== 6 passed, 9 deselected in 55.08s ====================================================================================================================================================================================================
2024-12-12 13:57:24 +01:00
Christian Schwarz
71b6aa2ab7 implement futuresunordered mode 2024-12-12 13:55:35 +01:00
Christian Schwarz
e051e916b6 results on my hetzner box
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Benchmark results ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].counters.time: 1.0955
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_sum: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_cpu_seconds_total: 1.0250
test_throughput[release-pg16-50-pipelining_config0-5-serial-direct-100-128-batchable {'mode': 'serial'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].counters.time: 1.1962
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_sum: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].counters.pageserver_cpu_seconds_total: 1.2700
test_throughput[release-pg16-50-pipelining_config1-5-parallel-direct-100-128-batchable {'mode': 'serial'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config2-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config2-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config2-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config2-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config2-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.2611
test_throughput[release-pg16-50-pipelining_config2-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_sum: 6,401.5000
test_throughput[release-pg16-50-pipelining_config2-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_count: 298.0556
test_throughput[release-pg16-50-pipelining_config2-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.5000
test_throughput[release-pg16-50-pipelining_config2-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.2850
test_throughput[release-pg16-50-pipelining_config2-5-serial-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 21.4775
test_throughput[release-pg16-50-pipelining_config3-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config3-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config3-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config3-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config3-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.3033
test_throughput[release-pg16-50-pipelining_config3-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_sum: 6,401.6875
test_throughput[release-pg16-50-pipelining_config3-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_count: 298.0625
test_throughput[release-pg16-50-pipelining_config3-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.6875
test_throughput[release-pg16-50-pipelining_config3-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3075
test_throughput[release-pg16-50-pipelining_config3-5-parallel-direct-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 21.4777

==================================================================================================================================================================================================== 4 passed, 6 deselected in 38.06s ====================================================================================================================================================================================================
2024-12-12 13:37:13 +01:00
Christian Schwarz
7f55a32edb parametrization over direct io mode (only direct io for now) 2024-12-12 13:36:05 +01:00
Christian Schwarz
3779370f08 results on my hetzner box
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Benchmark results ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_throughput[release-pg16-50-pipelining_config0-5-serial-100-128-batchable {'mode': 'serial'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config0-5-serial-100-128-batchable {'mode': 'serial'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-100-128-batchable {'mode': 'serial'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-100-128-batchable {'mode': 'serial'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-100-128-batchable {'mode': 'serial'}].counters.time: 0.7328
test_throughput[release-pg16-50-pipelining_config0-5-serial-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_sum: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-100-128-batchable {'mode': 'serial'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config0-5-serial-100-128-batchable {'mode': 'serial'}].counters.pageserver_cpu_seconds_total: 0.8850
test_throughput[release-pg16-50-pipelining_config0-5-serial-100-128-batchable {'mode': 'serial'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-100-128-batchable {'mode': 'serial'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config1-5-parallel-100-128-batchable {'mode': 'serial'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-100-128-batchable {'mode': 'serial'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-100-128-batchable {'mode': 'serial'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-100-128-batchable {'mode': 'serial'}].counters.time: 0.7545
test_throughput[release-pg16-50-pipelining_config1-5-parallel-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_sum: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-100-128-batchable {'mode': 'serial'}].counters.pageserver_batch_size_histo_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-100-128-batchable {'mode': 'serial'}].counters.compute_getpage_count: 6,403.0000
test_throughput[release-pg16-50-pipelining_config1-5-parallel-100-128-batchable {'mode': 'serial'}].counters.pageserver_cpu_seconds_total: 0.9667
test_throughput[release-pg16-50-pipelining_config1-5-parallel-100-128-batchable {'mode': 'serial'}].perfmetric.batching_factor: 1.0000
test_throughput[release-pg16-50-pipelining_config2-5-serial-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config2-5-serial-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config2-5-serial-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config2-5-serial-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config2-5-serial-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.1824
test_throughput[release-pg16-50-pipelining_config2-5-serial-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_sum: 6,401.1111
test_throughput[release-pg16-50-pipelining_config2-5-serial-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_count: 297.8889
test_throughput[release-pg16-50-pipelining_config2-5-serial-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.1111
test_throughput[release-pg16-50-pipelining_config2-5-serial-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.2196
test_throughput[release-pg16-50-pipelining_config2-5-serial-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 21.4883
test_throughput[release-pg16-50-pipelining_config3-5-parallel-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].tablesize_mib: 50.0000 MiB
test_throughput[release-pg16-50-pipelining_config3-5-parallel-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].effective_io_concurrency: 100.0000
test_throughput[release-pg16-50-pipelining_config3-5-parallel-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].readhead_buffer_size: 128.0000
test_throughput[release-pg16-50-pipelining_config3-5-parallel-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].config: 0.0000
test_throughput[release-pg16-50-pipelining_config3-5-parallel-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.time: 0.2743
test_throughput[release-pg16-50-pipelining_config3-5-parallel-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_sum: 6,401.6667
test_throughput[release-pg16-50-pipelining_config3-5-parallel-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_batch_size_histo_count: 298.4444
test_throughput[release-pg16-50-pipelining_config3-5-parallel-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.compute_getpage_count: 6,401.6667
test_throughput[release-pg16-50-pipelining_config3-5-parallel-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].counters.pageserver_cpu_seconds_total: 0.3350
test_throughput[release-pg16-50-pipelining_config3-5-parallel-100-128-batchable {'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].perfmetric.batching_factor: 21.4501
test_latency[release-pg16-pipelining_config0-serial-{'mode': 'serial'}].latency_mean: 0.145 ms
test_latency[release-pg16-pipelining_config0-serial-{'mode': 'serial'}].latency_percentiles.p95: 0.178 ms
test_latency[release-pg16-pipelining_config0-serial-{'mode': 'serial'}].latency_percentiles.p99: 0.199 ms
test_latency[release-pg16-pipelining_config0-serial-{'mode': 'serial'}].latency_percentiles.p99.9: 0.265 ms
test_latency[release-pg16-pipelining_config0-serial-{'mode': 'serial'}].latency_percentiles.p99.99: 0.366 ms
test_latency[release-pg16-pipelining_config1-parallel-{'mode': 'serial'}].latency_mean: 0.168 ms
test_latency[release-pg16-pipelining_config1-parallel-{'mode': 'serial'}].latency_percentiles.p95: 0.201 ms
test_latency[release-pg16-pipelining_config1-parallel-{'mode': 'serial'}].latency_percentiles.p99: 0.224 ms
test_latency[release-pg16-pipelining_config1-parallel-{'mode': 'serial'}].latency_percentiles.p99.9: 0.317 ms
test_latency[release-pg16-pipelining_config1-parallel-{'mode': 'serial'}].latency_percentiles.p99.99: 0.416 ms
test_latency[release-pg16-pipelining_config2-serial-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_mean: 0.149 ms
test_latency[release-pg16-pipelining_config2-serial-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p95: 0.184 ms
test_latency[release-pg16-pipelining_config2-serial-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99: 0.205 ms
test_latency[release-pg16-pipelining_config2-serial-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.9: 0.289 ms
test_latency[release-pg16-pipelining_config2-serial-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.99: 0.359 ms
test_latency[release-pg16-pipelining_config3-parallel-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_mean: 0.180 ms
test_latency[release-pg16-pipelining_config3-parallel-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p95: 0.219 ms
test_latency[release-pg16-pipelining_config3-parallel-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99: 0.244 ms
test_latency[release-pg16-pipelining_config3-parallel-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.9: 0.341 ms
test_latency[release-pg16-pipelining_config3-parallel-{'max_batch_size': 1, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.99: 0.522 ms
test_latency[release-pg16-pipelining_config4-serial-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_mean: 0.154 ms
test_latency[release-pg16-pipelining_config4-serial-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p95: 0.189 ms
test_latency[release-pg16-pipelining_config4-serial-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99: 0.211 ms
test_latency[release-pg16-pipelining_config4-serial-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.9: 0.268 ms
test_latency[release-pg16-pipelining_config4-serial-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.99: 0.307 ms
test_latency[release-pg16-pipelining_config5-parallel-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_mean: 0.167 ms
test_latency[release-pg16-pipelining_config5-parallel-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p95: 0.211 ms
test_latency[release-pg16-pipelining_config5-parallel-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99: 0.238 ms
test_latency[release-pg16-pipelining_config5-parallel-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.9: 0.338 ms
test_latency[release-pg16-pipelining_config5-parallel-{'max_batch_size': 32, 'execution': 'concurrent-futures', 'mode': 'pipelined'}].latency_percentiles.p99.99: 0.407 ms

===================================================================================================================================================================================================== 10 passed in 120.65s (0:02:00) =====================================================================================================================================================================================================
2024-12-12 13:32:12 +01:00
Christian Schwarz
659da352a2 DO NOT MERGE trim down benchmark to what's relevant 2024-12-12 13:31:55 +01:00
Christian Schwarz
0e8ac43450 hacky parametrization of relevant benchmarks 2024-12-12 13:28:00 +01:00
Christian Schwarz
410283c9b1 serial mode without tasks 2024-12-12 12:26:15 +01:00
Christian Schwarz
4dc7434d0e Merge remote-tracking branch 'origin/main' into vlad/read-path-concurrent-io 2024-12-10 13:22:03 +01:00
Vlad Lazar
c72e87f98d Merge branch 'main' into vlad/read-path-concurrent-io
Fix merge conflict with https://github.com/neondatabase/neon/pull/9631.
2024-11-11 11:29:38 +01:00
Vlad Lazar
dba696885d review: do some testing of parallel IO 2024-11-04 14:42:49 +01:00
Vlad Lazar
657526a5be review: make import scope tighter 2024-11-04 14:24:30 +01:00
Vlad Lazar
2508c64646 review: comment on end state of io concurrency 2024-11-04 14:14:21 +01:00
Vlad Lazar
34a43b0789 review: improve doc comment for planner 2024-11-04 14:12:44 +01:00
Vlad Lazar
9f95864c8a review: remove matches! usage 2024-11-04 14:08:26 +01:00
Vlad Lazar
c446e3d3ab chore: fixup comment 2024-11-04 14:07:20 +01:00
Vlad Lazar
6599034410 chore: clippy warn 2024-11-04 14:07:20 +01:00
Vlad Lazar
eafae92795 pageserver: make io serial by default
One can configure this via the NEON_PAGESERVER_VALUE_RECONSTRUCT_IO_CONCURRENCY
env var. A config is possible as well, but it's more work and this is
enough for experimentation.
2024-11-04 14:07:20 +01:00
Vlad Lazar
1cbbc35957 pageserver: fixup some tests using low level read apis 2024-11-04 14:07:20 +01:00
Vlad Lazar
cc57b34771 pageserver: reinstate inspect_image_layers 2024-11-04 14:07:20 +01:00
Vlad Lazar
6da0b5fac1 pagserver: further simplify read path error handling 2024-11-04 14:07:20 +01:00
Vlad Lazar
11990ef2fe pageserver: use correct will_init for streaming planner
`BlobMeta::will_init` is not actually used on these code paths,
but let's be kind to future ourselves and make sure it's correct.
2024-11-04 14:07:20 +01:00
Vlad Lazar
0a840fbdbd pageserver: purge read path caching support
We now only store indices in the page cache.
This commit removes any caching support from the read path.
2024-11-04 14:07:20 +01:00
Vlad Lazar
0ad736e2a6 pageserver: clean up comms between pending IOs and awaiter
Previously, each pending IO sent a stupid buffer which was just what it
read from the layer file for the key. This made the awaiter code
confusing because on disk images in layer files don't keep the enum wrapper,
but the ones in delta layers do.

This commit introduces a type to make this a bit easier and cleans up
the IO awaiting code a bit. We also avoid some rather silly serialize,
deserialize dance.
2024-11-04 14:07:20 +01:00
Vlad Lazar
bde1694cb9 pageserver: remove unused on_key_error method
It's obvious the method is unused, but let's break down error handling
of the read path. Before this patch set, all IO was done sequentially
for a given read. If one IO failed, then the error would stop the
processing of the read path.

Now that we are doing IO concurrently when serving a read request
it's not trivial to implement the same error handling approach.
As of this commit, one IO failure does not stop any other IO requests.
When awaiting for the IOs to complete, we stop waiting on the first
failure, but we do not signal any other pending IOs to complete and
they will just fail silently.

Long term, we need a better approach for this. Two broad ideas:
1. Introduce some synchronization between pending IO tasks such
that new IOs are not issued after the first failure
2. Cancel any pending IOs when the first error is discovered
2024-11-04 14:07:20 +01:00
Vlad Lazar
e3aa85d722 pagserver: do concurrent IO on the read path
Previously, the read path would wait for all IO in one layer visit to
complete before visiting the next layer (if a subsequent visit is
required). IO within one layer visit was also sequential.

With this patch we gain the ability to issue IO concurrently within one
layer visit **and** to move on to the next layer without waiting for IOs
from the previous visit to complete.

This is a slightly cleaned up version of the work done at the Lisbon
hackathon.
2024-11-04 14:07:17 +01:00
35 changed files with 283 additions and 999 deletions

View File

@@ -892,14 +892,14 @@ jobs:
run: |
for repo in neondatabase 369495373322.dkr.ecr.eu-central-1.amazonaws.com; do
docker buildx imagetools create -t $repo/neon:latest \
neondatabase/neon:${{ needs.tag.outputs.build-tag }}
$repo/neon:${{ needs.tag.outputs.build-tag }}
for version in ${VERSIONS}; do
docker buildx imagetools create -t $repo/compute-node-${version}:latest \
neondatabase/compute-node-${version}:${{ needs.tag.outputs.build-tag }}
$repo/compute-node-${version}:${{ needs.tag.outputs.build-tag }}
docker buildx imagetools create -t $repo/vm-compute-node-${version}:latest \
neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
$repo/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
done
done
docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \

View File

@@ -3,9 +3,8 @@ name: Create Release Branch
on:
schedule:
# It should be kept in sync with if-condition in jobs
- cron: '0 6 * * THU' # Proxy release
- cron: '0 6 * * FRI' # Storage release
- cron: '0 7 * * FRI' # Compute release
- cron: '0 6 * * THU' # Proxy release
workflow_dispatch:
inputs:
create-storage-release-branch:
@@ -56,7 +55,7 @@ jobs:
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
create-compute-release-branch:
if: ${{ github.event.schedule == '0 7 * * FRI' || inputs.create-compute-release-branch }}
if: inputs.create-compute-release-branch
permissions:
contents: write

View File

@@ -67,9 +67,6 @@ RUN cd postgres && \
# Enable some of contrib extensions
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgres_fdw.control && \
file=/usr/local/pgsql/share/extension/postgres_fdw--1.0.sql && [ -e $file ] && \
echo 'GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO neon_superuser;' >> $file && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \

View File

@@ -41,14 +41,14 @@ use crate::local_proxy;
use crate::pg_helpers::*;
use crate::spec::*;
use crate::spec_apply::ApplySpecPhase::{
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon,
CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
RunInEachDatabase,
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSuperUser,
DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
};
use crate::spec_apply::PerDatabasePhase;
use crate::spec_apply::PerDatabasePhase::{
ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
ChangeSchemaPerms, DeleteDBRoleReferences, DropSubscriptionsForDeletedDatabases,
HandleAnonExtension,
};
use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
use crate::sync_sk::{check_if_synced, ping_safekeeper};
@@ -340,15 +340,6 @@ impl ComputeNode {
self.state.lock().unwrap().status
}
pub fn get_timeline_id(&self) -> Option<TimelineId> {
self.state
.lock()
.unwrap()
.pspec
.as_ref()
.map(|s| s.timeline_id)
}
// Remove `pgdata` directory and create it again with right permissions.
fn create_pgdata(&self) -> Result<()> {
// Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
@@ -938,48 +929,6 @@ impl ComputeNode {
.map(|role| (role.name.clone(), role))
.collect::<HashMap<String, Role>>();
// Check if we need to drop subscriptions before starting the endpoint.
//
// It is important to do this operation exactly once when endpoint starts on a new branch.
// Otherwise, we may drop not inherited, but newly created subscriptions.
//
// We cannot rely only on spec.drop_subscriptions_before_start flag,
// because if for some reason compute restarts inside VM,
// it will start again with the same spec and flag value.
//
// To handle this, we save the fact of the operation in the database
// in the neon.drop_subscriptions_done table.
// If the table does not exist, we assume that the operation was never performed, so we must do it.
// If table exists, we check if the operation was performed on the current timelilne.
//
let mut drop_subscriptions_done = false;
if spec.drop_subscriptions_before_start {
let timeline_id = self.get_timeline_id().context("timeline_id must be set")?;
let query = format!("select 1 from neon.drop_subscriptions_done where timeline_id = '{}'", timeline_id);
info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id);
drop_subscriptions_done = match
client.simple_query(&query).await {
Ok(result) => {
matches!(&result[0], postgres::SimpleQueryMessage::Row(_))
},
Err(e) =>
{
match e.code() {
Some(&SqlState::UNDEFINED_TABLE) => false,
_ => {
// We don't expect any other error here, except for the schema/table not existing
error!("Error checking if drop subscription operation was already performed: {}", e);
return Err(e.into());
}
}
}
}
};
let jwks_roles = Arc::new(
spec.as_ref()
.local_proxy_config
@@ -1047,7 +996,7 @@ impl ComputeNode {
jwks_roles.clone(),
concurrency_token.clone(),
db,
[DropLogicalSubscriptions].to_vec(),
[DropSubscriptionsForDeletedDatabases].to_vec(),
);
Ok(spawn(fut))
@@ -1075,7 +1024,6 @@ impl ComputeNode {
CreateAndAlterRoles,
RenameAndDeleteDatabases,
CreateAndAlterDatabases,
CreateSchemaNeon,
] {
info!("Applying phase {:?}", &phase);
apply_operations(
@@ -1116,17 +1064,6 @@ impl ComputeNode {
}
let conf = Arc::new(conf);
let mut phases = vec![
DeleteDBRoleReferences,
ChangeSchemaPerms,
HandleAnonExtension,
];
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
info!("Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
phases.push(DropLogicalSubscriptions);
}
let fut = Self::apply_spec_sql_db(
spec.clone(),
conf,
@@ -1134,7 +1071,12 @@ impl ComputeNode {
jwks_roles.clone(),
concurrency_token.clone(),
db,
phases,
[
DeleteDBRoleReferences,
ChangeSchemaPerms,
HandleAnonExtension,
]
.to_vec(),
);
Ok(spawn(fut))
@@ -1146,20 +1088,12 @@ impl ComputeNode {
handle.await??;
}
let mut phases = vec![
for phase in vec![
HandleOtherExtensions,
HandleNeonExtension, // This step depends on CreateSchemaNeon
HandleNeonExtension,
CreateAvailabilityCheck,
DropRoles,
];
// This step depends on CreateSchemaNeon
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
info!("Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
phases.push(FinalizeDropLogicalSubscriptions);
}
for phase in phases {
] {
debug!("Applying phase {:?}", &phase);
apply_operations(
spec.clone(),
@@ -1529,14 +1463,6 @@ impl ComputeNode {
Ok(())
},
)?;
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
if config::line_in_file(
&postgresql_conf_path,
"neon.disable_logical_replication_subscribers=false",
)? {
info!("updated postgresql.conf to set neon.disable_logical_replication_subscribers=false");
}
self.pg_reload_conf()?;
}
self.post_apply_config()?;

View File

@@ -129,13 +129,6 @@ pub fn write_postgres_conf(
writeln!(file, "neon.extension_server_port={}", extension_server_port)?;
if spec.drop_subscriptions_before_start {
writeln!(file, "neon.disable_logical_replication_subscribers=true")?;
} else {
// be explicit about the default value
writeln!(file, "neon.disable_logical_replication_subscribers=false")?;
}
// This is essential to keep this line at the end of the file,
// because it is intended to override any settings above.
writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?;

View File

@@ -47,7 +47,7 @@ pub enum PerDatabasePhase {
DeleteDBRoleReferences,
ChangeSchemaPerms,
HandleAnonExtension,
DropLogicalSubscriptions,
DropSubscriptionsForDeletedDatabases,
}
#[derive(Clone, Debug)]
@@ -58,13 +58,11 @@ pub enum ApplySpecPhase {
CreateAndAlterRoles,
RenameAndDeleteDatabases,
CreateAndAlterDatabases,
CreateSchemaNeon,
RunInEachDatabase { db: DB, subphase: PerDatabasePhase },
HandleOtherExtensions,
HandleNeonExtension,
CreateAvailabilityCheck,
DropRoles,
FinalizeDropLogicalSubscriptions,
}
pub struct Operation {
@@ -333,7 +331,7 @@ async fn get_operations<'a>(
// NB: there could be other db states, which prevent us from dropping
// the database. For example, if db is used by any active subscription
// or replication slot.
// Such cases are handled in the DropLogicalSubscriptions
// Such cases are handled in the DropSubscriptionsForDeletedDatabases
// phase. We do all the cleanup before actually dropping the database.
let drop_db_query: String = format!(
"DROP DATABASE IF EXISTS {} WITH (FORCE)",
@@ -444,19 +442,13 @@ async fn get_operations<'a>(
Ok(Box::new(operations))
}
ApplySpecPhase::CreateSchemaNeon => Ok(Box::new(once(Operation {
query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
comment: Some(String::from(
"create schema for neon extension and utils tables",
)),
}))),
ApplySpecPhase::RunInEachDatabase { db, subphase } => {
match subphase {
PerDatabasePhase::DropLogicalSubscriptions => {
PerDatabasePhase::DropSubscriptionsForDeletedDatabases => {
match &db {
DB::UserDB(db) => {
let drop_subscription_query: String = format!(
include_str!("sql/drop_subscriptions.sql"),
include_str!("sql/drop_subscription_for_drop_dbs.sql"),
datname_str = escape_literal(&db.name),
);
@@ -674,6 +666,10 @@ async fn get_operations<'a>(
}
ApplySpecPhase::HandleNeonExtension => {
let operations = vec![
Operation {
query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
comment: Some(String::from("init: add schema for extension")),
},
Operation {
query: String::from("CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon"),
comment: Some(String::from(
@@ -716,9 +712,5 @@ async fn get_operations<'a>(
Ok(Box::new(operations))
}
ApplySpecPhase::FinalizeDropLogicalSubscriptions => Ok(Box::new(once(Operation {
query: String::from(include_str!("sql/finalize_drop_subscriptions.sql")),
comment: None,
}))),
}
}

View File

@@ -1,21 +0,0 @@
DO $$
BEGIN
IF NOT EXISTS(
SELECT 1
FROM pg_catalog.pg_tables
WHERE tablename = 'drop_subscriptions_done'
AND schemaname = 'neon'
)
THEN
CREATE TABLE neon.drop_subscriptions_done
(id serial primary key, timeline_id text);
END IF;
-- preserve the timeline_id of the last drop_subscriptions run
-- to ensure that the cleanup of a timeline is executed only once.
-- use upsert to avoid the table bloat in case of cascade branching (branch of a branch)
INSERT INTO neon.drop_subscriptions_done VALUES (1, current_setting('neon.timeline_id'))
ON CONFLICT (id) DO UPDATE
SET timeline_id = current_setting('neon.timeline_id');
END
$$

View File

@@ -1357,7 +1357,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
args.pg_version,
mode,
!args.update_catalog,
false,
)?;
}
EndpointCmd::Start(args) => {

View File

@@ -76,7 +76,6 @@ pub struct EndpointConf {
http_port: u16,
pg_version: u32,
skip_pg_catalog_updates: bool,
drop_subscriptions_before_start: bool,
features: Vec<ComputeFeature>,
}
@@ -144,7 +143,6 @@ impl ComputeControlPlane {
pg_version: u32,
mode: ComputeMode,
skip_pg_catalog_updates: bool,
drop_subscriptions_before_start: bool,
) -> Result<Arc<Endpoint>> {
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
@@ -164,7 +162,6 @@ impl ComputeControlPlane {
// with this we basically test a case of waking up an idle compute, where
// we also skip catalog updates in the cloud.
skip_pg_catalog_updates,
drop_subscriptions_before_start,
features: vec![],
});
@@ -180,7 +177,6 @@ impl ComputeControlPlane {
pg_port,
pg_version,
skip_pg_catalog_updates,
drop_subscriptions_before_start,
features: vec![],
})?,
)?;
@@ -244,7 +240,6 @@ pub struct Endpoint {
// Optimizations
skip_pg_catalog_updates: bool,
drop_subscriptions_before_start: bool,
// Feature flags
features: Vec<ComputeFeature>,
}
@@ -296,7 +291,6 @@ impl Endpoint {
tenant_id: conf.tenant_id,
pg_version: conf.pg_version,
skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
features: conf.features,
})
}
@@ -631,7 +625,6 @@ impl Endpoint {
shard_stripe_size: Some(shard_stripe_size),
local_proxy_config: None,
reconfigure_concurrency: 1,
drop_subscriptions_before_start: self.drop_subscriptions_before_start,
};
let spec_path = self.endpoint_path().join("spec.json");
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;

View File

@@ -185,8 +185,6 @@ services:
neon-test-extensions:
profiles: ["test-extensions"]
image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TAG:-latest}
environment:
- PGPASSWORD=cloud_admin
entrypoint:
- "/bin/bash"
- "-c"

View File

@@ -7,10 +7,7 @@ LIST=$( (echo -e "${SKIP//","/"\n"}"; ls -d -- *-src) | sort | uniq -u)
for d in ${LIST}
do
[ -d "${d}" ] || continue
if ! psql -w -c "select 1" >/dev/null; then
FAILED="${d} ${FAILED}"
break
fi
psql -c "select 1" >/dev/null || break
USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
done
[ -z "${FAILED}" ] && exit 0

View File

@@ -0,0 +1,35 @@
// If we just want to draw a pie chart of where latency goes,
// then we can project the `if we want to draw a jaeger trace.rs` into these structures.
// - fold `Vec<...>` into a sum
// - fold `start_time` and `end_time` into deltas
struct SmgrLatencyRecorder {
parse_request: u64,
downstairs: Arc<Mutex<Downstairs>>,
flush_response: u64,
}
struct Downstairs {
wait_for_execution: u64, // batching happens here
execution: Arc<Mutex<Execution>>,
}
struct Execution {
traverse_and_submit: Plan,
wait_for_io_completions_and_walredo: WaitForIoCompletionsAndWalredo,
}
struct Plan {
visit_layer: VisitLayer,
// implict remainder: time in the fringe code and traversing timeline ancestor graph
}
struct VisitLayer {
index_lookup: u64,
submit_io: u64,
}
struct WaitForIoCompletionsAndWalredo {
wait_for_io_completions: u64,
walredo: u64,
}

View File

@@ -0,0 +1,69 @@
// If we want to draw a Jaeger trace
struct SmgrLatencyRecorder {
start_time: Instant,
parse_request: ParseRequest,
downstairs: Arc<Mutex<Downstairs>>,
flush_response: FlushResponse,
end_time: Instant,
}
struct ParseRequest {
start_time: Instant,
end_time: Instant,
}
struct FlushResponse {
start_time: Instant,
end_time: Instant,
}
struct Downstairs {
start_time: Instant,
wait_for_execution: WaitForExecution, // batching happens here
execution: Arc<Mutex<Execution>>,
end_time: Instant,
}
enum WaitForExecution {
start_time: Instant,
end_time: Instant,
}
struct Execution {
start_time: Instant,
traverse_and_submit: Plan,
wait_for_io_completions_and_walredo: Vec<Arc<Mutex<WaitForIoCompletionsAndWalredo>>>,
end_time: Instant,
}
struct Plan {
start_time: Instant,
visit_layer: Vec<VisitLayer>,
// implict remainder: time in the fringe code and traversing timeline ancestor graph
end_time: Instant,
}
struct VisitLayer {
start_time: Instant,
index_lookup: IndexLookup,
submit_io: SubmitIo,
end_time: Instant,
}
struct IndexLookup {
start_time: Instant,
end_time: Instant,
}
struct SubmitIo {
start_time: Instant,
end_time: Instant,
}
struct WaitForIoCompletionsAndWalredo {
start_time: Instant,
wait_for_io_completions: u64,
walredo: u64,
end_time: Instant,
}

View File

@@ -138,13 +138,6 @@ pub struct ComputeSpec {
/// enough spare connections for reconfiguration process to succeed.
#[serde(default = "default_reconfigure_concurrency")]
pub reconfigure_concurrency: usize,
/// If set to true, the compute_ctl will drop all subscriptions before starting the
/// compute. This is needed when we start an endpoint on a branch, so that child
/// would not compete with parent branch subscriptions
/// over the same replication content from publisher.
#[serde(default)] // Default false
pub drop_subscriptions_before_start: bool,
}
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.

View File

@@ -92,203 +92,94 @@
use crate::task_mgr::TaskKind;
// The main structure of this module, see module-level comment.
#[derive(Debug)]
#[derive(Debug, Default)]
pub struct RequestContext {
task_kind: TaskKind,
download_behavior: DownloadBehavior,
access_stats_behavior: AccessStatsBehavior,
page_content_kind: PageContentKind,
latency_recording: Option<latency_recording::LatencyRecording>,
io_concurrency: Option<io_concurrency_propagation::IoConcurrencyPropagation>,
cancel: Option<cancellation::Cancellation>,
}
/// The kind of access to the page cache.
#[derive(Clone, Copy, PartialEq, Eq, Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
pub enum PageContentKind {
Unknown,
DeltaLayerSummary,
DeltaLayerBtreeNode,
DeltaLayerValue,
ImageLayerSummary,
ImageLayerBtreeNode,
ImageLayerValue,
InMemoryLayer,
}
/// Desired behavior if the operation requires an on-demand download
/// to proceed.
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum DownloadBehavior {
/// Download the layer file. It can take a while.
Download,
/// Download the layer file, but print a warning to the log. This should be used
/// in code where the layer file is expected to already exist locally.
Warn,
/// Return a PageReconstructError::NeedsDownload error
Error,
}
/// Whether this request should update access times used in LRU eviction
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub(crate) enum AccessStatsBehavior {
/// Update access times: this request's access to data should be taken
/// as a hint that the accessed layer is likely to be accessed again
Update,
/// Do not update access times: this request is accessing the layer
/// but does not want to indicate that the layer should be retained in cache,
/// perhaps because the requestor is a compaction routine that will soon cover
/// this layer with another.
Skip,
}
pub struct RequestContextBuilder {
inner: RequestContext,
}
impl RequestContextBuilder {
/// A new builder with default settings
pub fn new(task_kind: TaskKind) -> Self {
Self {
inner: RequestContext {
task_kind,
download_behavior: DownloadBehavior::Download,
access_stats_behavior: AccessStatsBehavior::Update,
page_content_kind: PageContentKind::Unknown,
},
}
}
pub fn extend(original: &RequestContext) -> Self {
Self {
// This is like a Copy, but avoid implementing Copy because ordinary users of
// RequestContext should always move or ref it.
inner: RequestContext {
task_kind: original.task_kind,
download_behavior: original.download_behavior,
access_stats_behavior: original.access_stats_behavior,
page_content_kind: original.page_content_kind,
},
}
}
/// Configure the DownloadBehavior of the context: whether to
/// download missing layers, and/or warn on the download.
pub fn download_behavior(mut self, b: DownloadBehavior) -> Self {
self.inner.download_behavior = b;
self
}
/// Configure the AccessStatsBehavior of the context: whether layer
/// accesses should update the access time of the layer.
pub(crate) fn access_stats_behavior(mut self, b: AccessStatsBehavior) -> Self {
self.inner.access_stats_behavior = b;
self
}
pub(crate) fn page_content_kind(mut self, k: PageContentKind) -> Self {
self.inner.page_content_kind = k;
self
}
pub fn build(self) -> RequestContext {
self.inner
}
trait Propagatable: Default {
fn propagate(&self, child: &mut Self);
}
impl RequestContext {
/// Create a new RequestContext that has no parent.
///
/// The function is called `new` because, once we add children
/// to it using `detached_child` or `attached_child`, the context
/// form a tree (not implemented yet since cancellation will be
/// the first feature that requires a tree).
///
/// # Future: Cancellation
///
/// The only reason why a context like this one can be canceled is
/// because someone explicitly canceled it.
/// It has no parent, so it cannot inherit cancellation from there.
pub fn new(task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {
RequestContextBuilder::new(task_kind)
.download_behavior(download_behavior)
.build()
fn root() -> Self {
Self::default()
}
/// Create a detached child context for a task that may outlive `self`.
///
/// Use this when spawning new background activity that should complete
/// even if the current request is canceled.
///
/// # Future: Cancellation
///
/// Cancellation of `self` will not propagate to the child context returned
/// by this method.
///
/// # Future: Structured Concurrency
///
/// We could add the Future as a parameter to this function, spawn it as a task,
/// and pass to the new task the child context as an argument.
/// That would be an ergonomic improvement.
///
/// We could make new calls to this function fail if `self` is already canceled.
pub fn detached_child(&self, task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {
self.child_impl(task_kind, download_behavior)
}
/// Create a child of context `self` for a task that shall not outlive `self`.
///
/// Use this when fanning-out work to other async tasks.
///
/// # Future: Cancellation
///
/// Cancelling a context will propagate to its attached children.
///
/// # Future: Structured Concurrency
///
/// We could add the Future as a parameter to this function, spawn it as a task,
/// and track its `JoinHandle` inside the `RequestContext`.
///
/// We could then provide another method to allow waiting for all child tasks
/// to finish.
///
/// We could make new calls to this function fail if `self` is already canceled.
/// Alternatively, we could allow the creation but not spawn the task.
/// The method to wait for child tasks would return an error, indicating
/// that the child task was not started because the context was canceled.
pub fn attached_child(&self) -> Self {
self.child_impl(self.task_kind(), self.download_behavior())
}
/// Use this function when you should be creating a child context using
/// [`attached_child`] or [`detached_child`], but your caller doesn't provide
/// a context and you are unwilling to change all callers to provide one.
///
/// Before we add cancellation, we should get rid of this method.
///
/// [`attached_child`]: Self::attached_child
/// [`detached_child`]: Self::detached_child
pub fn todo_child(task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {
Self::new(task_kind, download_behavior)
}
fn child_impl(&self, task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {
Self::new(task_kind, download_behavior)
}
pub fn task_kind(&self) -> TaskKind {
self.task_kind
}
pub fn download_behavior(&self) -> DownloadBehavior {
self.download_behavior
}
pub(crate) fn access_stats_behavior(&self) -> AccessStatsBehavior {
self.access_stats_behavior
}
pub(crate) fn page_content_kind(&self) -> PageContentKind {
self.page_content_kind
fn child(&self) -> RequestContext {
let mut child = RequestContext::default();
let Self {
latency_recording,
} = self;
if let Some(latency_recording) = latency_recording {
child.latency_recording = Some(latency_recording.child());
}
}
}
mod latency_recording {
struct LatencyRecording {
inner: Arc<Mutex<Inner>>,
}
impl LatencyRecording {
fn new() -> Self {
Self {
current: Mutex::new(HashMap::new()),
}
}
fn on_request_recv(&self, now: Instant) -> {
let mut inner = self.inner.lock().unwrap();
inner.current.insert(now, now);
}
}
impl Propagatable for LatencyRecording {
fn propagate(&self, other: &mut Self) {
let mut inner = self.inner.lock().unwrap();
let other_inner = other.get_mut();
for (k, v) in other_inner.current.iter() {
inner.current.insert(*k, *v);
}
}
}
}
mod io_concurrency_propagation {
struct IoConcurrencyPropagation {
inner: IoConcurrency,
}
impl Propagatable for IoConcurrencyPropagation {
fn propagate(&self, other: &mut Self) {
other.inner = self.inner.clone();
}
}
}
mod cancellation {
struct Cancellation {
sources: Vec<CancellationToken>,
}
impl Propagatable for Cancellation {
fn propagate(&self, other: &mut Self) {
other.sources.extend(self.sources.iter().map(|tok| tok.child_toke()));
}
}
impl Cancellation {
async fn cancelled(&self) {
// TODO: this one is quite inefficient, it allocates
// But it's clear something better can be built within this architecture.
futures::future::select_all(self.sources.iter().map(|tok| tok.cancelled()))
}
}
}

View File

@@ -728,6 +728,12 @@ impl PageServerHandler {
msg = pgb.read_message() => { msg }
};
let ctx = ctx.enrich(
SmgrOpLatencyRecorder {
start_time: Instant::now(),
parse_request: ParseRequest::unset(),
});
let received_at = Instant::now();
let copy_data_bytes = match msg? {
@@ -748,9 +754,15 @@ impl PageServerHandler {
fail::fail_point!("ps::handle-pagerequest-message");
{
let parse_request_recorder= ctx.get::<SmgrOpLatencyRecorder>().parse_request;
// parse request
let neon_fe_msg =
PagestreamFeMessage::parse(&mut copy_data_bytes.reader(), protocol_version)?;
PagestreamFeMessage::parse(&mut copy_data_bytes.reader(), protocol_version, &parse_request_recorder)?;
}
// TODO: turn in to async closure once available to avoid repeating received_at
async fn record_op_start_and_throttle(
@@ -918,6 +930,11 @@ impl PageServerHandler {
shard: shard.downgrade(),
effective_request_lsn,
pages: smallvec::smallvec![BatchedGetPageRequest { req, timer }],
downstairs: {
let downstairs = Arc::new(Mutex::new(Downstairs::init()));
*(&mut ctx.get::<SmgrOpLatencyRecorder>().downstairs) = Arc::clone(&downstairs);
downstairs
},
}
}
#[cfg(feature = "testing")]
@@ -963,12 +980,14 @@ impl PageServerHandler {
shard: accum_shard,
pages: ref mut accum_pages,
effective_request_lsn: accum_lsn,
recorder: accum_recorder,
}),
BatchedFeMessage::GetPage {
span: _,
shard: this_shard,
pages: this_pages,
effective_request_lsn: this_lsn,
recorder: this_recorder,
},
) if (|| {
assert_eq!(this_pages.len(), 1);
@@ -994,6 +1013,7 @@ impl PageServerHandler {
{
// ok to batch
accum_pages.extend(this_pages);
*this_recorder = accum_recorder;
Ok(())
}
#[cfg(feature = "testing")]
@@ -1589,6 +1609,10 @@ impl PageServerHandler {
async move {
let _cancel_batcher = cancel_batcher.drop_guard();
loop {
let ctx = ctx.attached_child();
let child = ctx.attached_child();
let maybe_batch = batch_rx.recv().await;
let batch = match maybe_batch {
Ok(batch) => batch,
@@ -1603,6 +1627,8 @@ impl PageServerHandler {
return Err(e);
}
};
batch.downstairs.wait_for_execution.end_time = Instant::now();
ctx.latency_recording.enrich(batch.downstairs);
self.pagesteam_handle_batched_message(
pgb_writer,
batch,

View File

@@ -436,14 +436,12 @@ impl KeyHistoryRetention {
if dry_run {
return true;
}
let layer_generation;
{
let guard = tline.layers.read().await;
if !guard.contains_key(key) {
return false;
}
layer_generation = guard.get_from_key(key).metadata().generation;
let guard = tline.layers.read().await;
if !guard.contains_key(key) {
return false;
}
let layer_generation = guard.get_from_key(key).metadata().generation;
drop(guard);
if layer_generation == tline.generation {
info!(
key=%key,
@@ -2140,11 +2138,6 @@ impl Timeline {
self.get_gc_compaction_watermark()
};
if compact_below_lsn == Lsn::INVALID {
tracing::warn!("no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction");
return Ok(vec![]);
}
// Split compaction job to about 4GB each
const GC_COMPACT_MAX_SIZE_MB: u64 = 4 * 1024;
let sub_compaction_max_job_size_mb =
@@ -2219,12 +2212,6 @@ impl Timeline {
} else {
end
};
let end = if ranges_num == idx + 1 {
// extend the compaction range to the end of the key range if it's the last partition
end.max(job.compact_key_range.end)
} else {
end
};
info!(
"splitting compaction job: {}..{}, estimated_size={}",
start, end, total_size
@@ -2345,11 +2332,6 @@ impl Timeline {
// each of the retain_lsn. Therefore, if the user-provided `compact_lsn_range.end` is larger than the real gc cutoff, we will use
// the real cutoff.
let mut gc_cutoff = if compact_lsn_range.end == Lsn::MAX {
if real_gc_cutoff == Lsn::INVALID {
// If the gc_cutoff is not generated yet, we should not compact anything.
tracing::warn!("no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction");
return Ok(());
}
real_gc_cutoff
} else {
compact_lsn_range.end
@@ -2881,7 +2863,7 @@ impl Timeline {
"produced {} delta layers and {} image layers, {} layers are kept",
produced_delta_layers_len,
produced_image_layers_len,
keep_layers.len()
layer_selection.len()
);
// Step 3: Place back to the layer map.
@@ -2927,28 +2909,8 @@ impl Timeline {
// be batched into `schedule_compaction_update`.
let disk_consistent_lsn = self.disk_consistent_lsn.load();
self.schedule_uploads(disk_consistent_lsn, None)?;
// If a layer gets rewritten throughout gc-compaction, we need to keep that layer only in `compact_to` instead
// of `compact_from`.
let compact_from = {
let mut compact_from = Vec::new();
let mut compact_to_set = HashMap::new();
for layer in &compact_to {
compact_to_set.insert(layer.layer_desc().key(), layer);
}
for layer in &layer_selection {
if let Some(to) = compact_to_set.get(&layer.layer_desc().key()) {
tracing::info!(
"skipping delete {} because found same layer key at different generation {}",
layer, to
);
} else {
compact_from.push(layer.clone());
}
}
compact_from
};
self.remote_client
.schedule_compaction_update(&compact_from, &compact_to)?;
.schedule_compaction_update(&layer_selection, &compact_to)?;
drop(gc_lock);

View File

@@ -337,45 +337,16 @@ impl OpenLayerManager {
compact_to: &[ResidentLayer],
metrics: &TimelineMetrics,
) {
// gc-compaction could contain layer rewrites. We need to delete the old layers and insert the new ones.
// Match the old layers with the new layers
let mut add_layers = HashMap::new();
let mut rewrite_layers = HashMap::new();
let mut drop_layers = HashMap::new();
for layer in compact_from {
drop_layers.insert(layer.layer_desc().key(), layer.clone());
}
for layer in compact_to {
if let Some(old_layer) = drop_layers.remove(&layer.layer_desc().key()) {
rewrite_layers.insert(layer.layer_desc().key(), (old_layer.clone(), layer.clone()));
} else {
add_layers.insert(layer.layer_desc().key(), layer.clone());
}
}
let add_layers = add_layers.values().cloned().collect::<Vec<_>>();
let drop_layers = drop_layers.values().cloned().collect::<Vec<_>>();
let rewrite_layers = rewrite_layers.values().cloned().collect::<Vec<_>>();
self.rewrite_layers_inner(&rewrite_layers, &drop_layers, &add_layers, metrics);
// We can simply reuse compact l0 logic. Use a different function name to indicate a different type of layer map modification.
self.finish_compact_l0(compact_from, compact_to, metrics)
}
/// Called post-compaction when some previous generation image layers were trimmed.
pub fn rewrite_layers(
pub(crate) fn rewrite_layers(
&mut self,
rewrite_layers: &[(Layer, ResidentLayer)],
drop_layers: &[Layer],
metrics: &TimelineMetrics,
) {
self.rewrite_layers_inner(rewrite_layers, drop_layers, &[], metrics);
}
fn rewrite_layers_inner(
&mut self,
rewrite_layers: &[(Layer, ResidentLayer)],
drop_layers: &[Layer],
add_layers: &[ResidentLayer],
metrics: &TimelineMetrics,
) {
let mut updates = self.layer_map.batch_update();
for (old_layer, new_layer) in rewrite_layers {
@@ -411,10 +382,6 @@ impl OpenLayerManager {
for l in drop_layers {
Self::delete_historic_layer(l, &mut updates, &mut self.layer_fmgr);
}
for l in add_layers {
Self::insert_historic_layer(l.as_ref().clone(), &mut updates, &mut self.layer_fmgr);
metrics.record_new_file_metrics(l.layer_desc().file_size);
}
updates.flush();
}

View File

@@ -19,7 +19,6 @@
#include "access/xlogrecovery.h"
#endif
#include "replication/logical.h"
#include "replication/logicallauncher.h"
#include "replication/slot.h"
#include "replication/walsender.h"
#include "storage/proc.h"
@@ -435,15 +434,6 @@ _PG_init(void)
restore_running_xacts_callback = RestoreRunningXactsFromClog;
DefineCustomBoolVariable(
"neon.disable_logical_replication_subscribers",
"Disables incomming logical replication",
NULL,
&disable_logical_replication_subscribers,
false,
PGC_SIGHUP,
0,
NULL, NULL, NULL);
DefineCustomBoolVariable(
"neon.allow_replica_misconfig",

View File

@@ -4,7 +4,7 @@
//! united.
use reqwest::{IntoUrl, Method, StatusCode};
use safekeeper_api::models::{TimelineCreateRequest, TimelineStatus};
use safekeeper_api::models::TimelineStatus;
use std::error::Error as _;
use utils::{
http::error::HttpErrorBody,
@@ -76,28 +76,6 @@ impl Client {
}
}
pub async fn create_timeline(&self, req: &TimelineCreateRequest) -> Result<TimelineStatus> {
let uri = format!(
"{}/v1/tenant/{}/timeline/{}",
self.mgmt_api_endpoint, req.tenant_id, req.timeline_id
);
let resp = self.post(&uri, req).await?;
resp.json().await.map_err(Error::ReceiveBody)
}
pub async fn delete_timeline(
&self,
tenant_id: TenantId,
timeline_id: TimelineId,
) -> Result<TimelineStatus> {
let uri = format!(
"{}/v1/tenant/{}/timeline/{}",
self.mgmt_api_endpoint, tenant_id, timeline_id
);
let resp = self.request(Method::DELETE, &uri, ()).await?;
resp.json().await.map_err(Error::ReceiveBody)
}
pub async fn timeline_status(
&self,
tenant_id: TenantId,
@@ -129,14 +107,6 @@ impl Client {
self.get(&uri).await
}
async fn post<B: serde::Serialize, U: IntoUrl>(
&self,
uri: U,
body: B,
) -> Result<reqwest::Response> {
self.request(Method::POST, uri, body).await
}
async fn get<U: IntoUrl>(&self, uri: U) -> Result<reqwest::Response> {
self.request(Method::GET, uri, ()).await
}

View File

@@ -84,12 +84,6 @@ const parseReportJson = async ({ reportJsonUrl, fetch }) => {
} else {
arch = "unknown"
}
let lfcState = ""
if (test.parameters.includes("'with-lfc'")) {
lfcState = "with-lfc"
} else {
lfcState = "without-lfc"
}
// Removing build type and PostgreSQL version from the test name to make it shorter
const testName = test.name.replace(new RegExp(`${buildType}-pg${pgVersion}-?`), "").replace("[]", "")
@@ -97,7 +91,6 @@ const parseReportJson = async ({ reportJsonUrl, fetch }) => {
test.pgVersion = pgVersion
test.buildType = buildType
test.arch = arch
test.lfcState = lfcState
if (test.status === "passed") {
passedTests[pgVersion][testName].push(test)
@@ -164,7 +157,7 @@ const reportSummary = async (params) => {
const links = []
for (const test of tests) {
const allureLink = `${reportUrl}#suites/${test.parentUid}/${test.uid}`
links.push(`[${test.buildType}-${test.arch}-${test.lfcState}](${allureLink})`)
links.push(`[${test.buildType}-${test.arch}](${allureLink})`)
}
summary += `- \`${testName}\`: ${links.join(", ")}\n`
}
@@ -195,7 +188,7 @@ const reportSummary = async (params) => {
const links = []
for (const test of tests) {
const allureLink = `${reportUrl}#suites/${test.parentUid}/${test.uid}/retries`
links.push(`[${test.buildType}-${test.arch}-${test.lfcState}](${allureLink})`)
links.push(`[${test.buildType}-${test.arch}](${allureLink})`)
}
summary += `- \`${testName}\`: ${links.join(", ")}\n`
}

View File

@@ -134,7 +134,7 @@ def ingest_test_result(
if p["name"].startswith("__")
}
arch = parameters.get("arch", "UNKNOWN").strip("'")
lfc = parameters.get("lfc", "without-lfc").strip("'") == "with-lfc"
lfc = parameters.get("lfc", "False") == "True"
build_type, pg_version, unparametrized_name = parse_test_name(test["name"])
labels = {label["name"]: label["value"] for label in test["labels"]}

View File

@@ -523,7 +523,6 @@ class NeonLocalCli(AbstractNeonCli):
remote_ext_config: str | None = None,
pageserver_id: int | None = None,
allow_multiple: bool = False,
create_test_user: bool = False,
basebackup_request_tries: int | None = None,
env: dict[str, str] | None = None,
) -> subprocess.CompletedProcess[str]:
@@ -545,8 +544,6 @@ class NeonLocalCli(AbstractNeonCli):
args.extend(["--pageserver-id", str(pageserver_id)])
if allow_multiple:
args.extend(["--allow-multiple"])
if create_test_user:
args.extend(["--create-test-user"])
res = self.raw_cli(args, extra_env_vars)
res.check_returncode()

View File

@@ -3918,7 +3918,6 @@ class Endpoint(PgProtocol, LogUtils):
pageserver_id: int | None = None,
safekeepers: list[int] | None = None,
allow_multiple: bool = False,
create_test_user: bool = False,
basebackup_request_tries: int | None = None,
env: dict[str, str] | None = None,
) -> Self:
@@ -3940,7 +3939,6 @@ class Endpoint(PgProtocol, LogUtils):
remote_ext_config=remote_ext_config,
pageserver_id=pageserver_id,
allow_multiple=allow_multiple,
create_test_user=create_test_user,
basebackup_request_tries=basebackup_request_tries,
env=env,
)

View File

@@ -121,8 +121,6 @@ def pytest_runtest_makereport(*args, **kwargs):
}.get(os.uname().machine, "UNKNOWN")
arch = os.getenv("RUNNER_ARCH", uname_m)
allure.dynamic.parameter("__arch", arch)
allure.dynamic.parameter(
"__lfc", "with-lfc" if os.getenv("USE_LFC") != "false" else "without-lfc"
)
allure.dynamic.parameter("__lfc", os.getenv("USE_LFC") != "false")
yield

View File

@@ -1,8 +1,6 @@
from __future__ import annotations
import json
import math
import random
import time
from enum import StrEnum
@@ -130,6 +128,11 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder, with_b
}
env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF)
env.pageserver.allowed_errors.append(
r".*failed to acquire partition lock during gc-compaction.*"
)
env.pageserver.allowed_errors.append(r".*repartition() called concurrently.*")
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
@@ -144,14 +147,11 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder, with_b
log.info("Writing initial data ...")
workload.write_rows(row_count, env.pageserver.id)
ps_http.timeline_gc(
tenant_id, timeline_id, None
) # Force refresh gc info to have gc_cutoff generated
child_workloads: list[Workload] = []
for i in range(1, churn_rounds + 1):
log.info(f"Running churn round {i}/{churn_rounds} ...")
if i % 10 == 0:
log.info(f"Running churn round {i}/{churn_rounds} ...")
if i % 10 == 5 and with_branches == "with_branches":
branch_name = f"child-{i}"
branch_timeline_id = env.create_branch(branch_name)
@@ -172,241 +172,12 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder, with_b
"sub_compaction_max_job_size_mb": 16,
},
)
# do not wait for upload so that we can see if gc_compaction works well with data being ingested
workload.churn_rows(row_count, env.pageserver.id, upload=False)
time.sleep(1)
workload.validate(env.pageserver.id)
def compaction_finished():
queue_depth = len(ps_http.timeline_compact_info(tenant_id, timeline_id))
assert queue_depth == 0
wait_until(compaction_finished, timeout=60)
# ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
env.pageserver.assert_log_contains(
"scheduled_compact_timeline.*picked .* layers for compaction"
)
log.info("Validating at workload end ...")
workload.validate(env.pageserver.id)
for child_workload in child_workloads:
log.info(f"Validating at branch {child_workload.branch_name}")
child_workload.validate(env.pageserver.id)
# Run a legacy compaction+gc to ensure gc-compaction can coexist with legacy compaction.
ps_http.timeline_checkpoint(tenant_id, timeline_id, wait_until_uploaded=True)
ps_http.timeline_gc(tenant_id, timeline_id, None)
@pytest.mark.parametrize(
"compaction_mode",
["before_restart", "after_restart"],
)
def test_pageserver_gc_compaction_idempotent(
neon_env_builder: NeonEnvBuilder, compaction_mode: str
):
"""
Do gc-compaction twice without writing any new data and see if anything breaks.
We run this test in two modes:
- before_restart: run two gc-compactions before pageserver restart
- after_restart: run one gc-compaction before and one after pageserver restart
"""
SMOKE_CONF = {
# Run both gc and gc-compaction.
"gc_period": "5s",
"compaction_period": "5s",
# No PiTR interval and small GC horizon
"pitr_interval": "0s",
"gc_horizon": 1024,
"lsn_lease_length": "0s",
}
env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF)
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
# Only in testing mode: the warning is expected because we rewrite a layer file of different generations.
# We could potentially patch the sanity-check code to not emit the warning in the future.
env.pageserver.allowed_errors.append(".*was unlinked but was not dangling.*")
row_count = 10000
ps_http = env.pageserver.http_client()
workload = Workload(env, tenant_id, timeline_id)
workload.init(env.pageserver.id)
workload.write_rows(row_count, env.pageserver.id)
child_workloads: list[Workload] = []
def compaction_finished():
queue_depth = len(ps_http.timeline_compact_info(tenant_id, timeline_id))
assert queue_depth == 0
workload.churn_rows(row_count, env.pageserver.id)
env.create_branch("child_branch") # so that we have a retain_lsn
workload.churn_rows(row_count, env.pageserver.id)
# compact 3 times if mode is before_restart
n_compactions = 3 if compaction_mode == "before_restart" else 1
for _ in range(n_compactions):
# Force refresh gc info to have gc_cutoff generated
ps_http.timeline_gc(tenant_id, timeline_id, None)
ps_http.timeline_compact(
tenant_id,
timeline_id,
enhanced_gc_bottom_most_compaction=True,
body={
"scheduled": True,
"sub_compaction": True,
"compact_key_range": {
"start": "000000000000000000000000000000000000",
"end": "030000000000000000000000000000000000",
},
"sub_compaction_max_job_size_mb": 16,
},
)
wait_until(compaction_finished, timeout=60)
if compaction_mode == "after_restart":
env.pageserver.restart(True)
ps_http.timeline_gc(
tenant_id, timeline_id, None
) # Force refresh gc info to have gc_cutoff generated
for _ in range(3):
ps_http.timeline_compact(
tenant_id,
timeline_id,
enhanced_gc_bottom_most_compaction=True,
body={
"scheduled": True,
"sub_compaction": True,
"compact_key_range": {
"start": "000000000000000000000000000000000000",
"end": "030000000000000000000000000000000000",
},
"sub_compaction_max_job_size_mb": 16,
},
)
wait_until(compaction_finished, timeout=60)
# ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
env.pageserver.assert_log_contains(
"scheduled_compact_timeline.*picked .* layers for compaction"
)
# ensure we hit the duplicated layer key warning at least once: we did two compactions consecutively,
# and the second one should have hit the duplicated layer key warning.
if compaction_mode == "before_restart":
env.pageserver.assert_log_contains("duplicated layer key in the same generation")
else:
env.pageserver.assert_log_contains("same layer key at different generation")
log.info("Validating at workload end ...")
workload.validate(env.pageserver.id)
for child_workload in child_workloads:
log.info(f"Validating at branch {child_workload.branch_name}")
child_workload.validate(env.pageserver.id)
# Run a legacy compaction+gc to ensure gc-compaction can coexist with legacy compaction.
ps_http.timeline_checkpoint(tenant_id, timeline_id, wait_until_uploaded=True)
ps_http.timeline_gc(tenant_id, timeline_id, None)
@skip_in_debug_build("only run with release build")
def test_pageserver_gc_compaction_interrupt(neon_env_builder: NeonEnvBuilder):
"""
Force interrupt a gc-compaction and see if anything breaks.
"""
SMOKE_CONF = {
# Run both gc and gc-compaction.
"gc_period": "5s",
"compaction_period": "5s",
# No PiTR interval and small GC horizon
"pitr_interval": "0s",
"gc_horizon": "1024",
"lsn_lease_length": "0s",
}
env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF)
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
# Only in testing mode: the warning is expected because we rewrite a layer file of different generations.
# We could potentially patch the sanity-check code to not emit the warning in the future.
env.pageserver.allowed_errors.append(".*was unlinked but was not dangling.*")
row_count = 10000
churn_rounds = 20
ps_http = env.pageserver.http_client()
workload = Workload(env, tenant_id, timeline_id)
workload.init(env.pageserver.id)
log.info("Writing initial data ...")
workload.write_rows(row_count, env.pageserver.id)
def compaction_finished():
queue_depth = len(ps_http.timeline_compact_info(tenant_id, timeline_id))
assert queue_depth == 0
expected_compaction_time_seconds = 5.0
ps_http.timeline_gc(
tenant_id, timeline_id, None
) # Force refresh gc info to have gc_cutoff generated
for i in range(1, churn_rounds + 1):
log.info(f"Running churn round {i}/{churn_rounds} ...")
workload.churn_rows(row_count, env.pageserver.id)
ps_http.timeline_compact(
tenant_id,
timeline_id,
enhanced_gc_bottom_most_compaction=True,
body={
"scheduled": True,
"sub_compaction": True,
"compact_key_range": {
"start": "000000000000000000000000000000000000",
"end": "030000000000000000000000000000000000",
},
"sub_compaction_max_job_size_mb": 16,
},
)
# sleep random seconds between 0 and max(compaction_time); if the result is 0, wait until the compaction is complete
# This would hopefully trigger the restart at different periods of the compaction:
# - while we are doing the compaction
# - while we finished the compaction but not yet uploaded the metadata
# - after we uploaded the metadata
time_to_sleep = random.randint(0, max(5, math.ceil(expected_compaction_time_seconds)))
if time_to_sleep == 0 or i == 1:
start = time.time()
wait_until(compaction_finished, timeout=60)
end = time.time()
expected_compaction_time_seconds = end - start
log.info(
f"expected_compaction_time_seconds updated to {expected_compaction_time_seconds} seconds"
)
else:
time.sleep(time_to_sleep)
env.pageserver.restart(True)
ps_http.timeline_gc(
tenant_id, timeline_id, None
) # Force refresh gc info to have gc_cutoff generated
ps_http.timeline_compact(
tenant_id,
timeline_id,
enhanced_gc_bottom_most_compaction=True,
body={
"scheduled": True,
"sub_compaction": True,
"compact_key_range": {
"start": "000000000000000000000000000000000000",
"end": "030000000000000000000000000000000000",
},
"sub_compaction_max_job_size_mb": 16,
},
)
workload.validate(env.pageserver.id)
def compaction_finished():
queue_depth = len(ps_http.timeline_compact_info(tenant_id, timeline_id))
assert queue_depth == 0
wait_until(compaction_finished, timeout=60)
@@ -417,6 +188,9 @@ def test_pageserver_gc_compaction_interrupt(neon_env_builder: NeonEnvBuilder):
log.info("Validating at workload end ...")
workload.validate(env.pageserver.id)
for child_workload in child_workloads:
log.info(f"Validating at branch {child_workload.branch_name}")
child_workload.validate(env.pageserver.id)
# Run a legacy compaction+gc to ensure gc-compaction can coexist with legacy compaction.
ps_http.timeline_checkpoint(tenant_id, timeline_id, wait_until_uploaded=True)
@@ -463,9 +237,7 @@ def test_sharding_compaction(
"pitr_interval": "0s",
# disable background compaction and GC. We invoke it manually when we want it to happen.
"gc_period": "0s",
"gc_horizon": f"{128 * 1024}",
"compaction_period": "0s",
"lsn_lease_length": "0s",
# create image layers eagerly: we want to exercise image layer creation in this test.
"image_creation_threshold": "1",
"image_layer_creation_check_threshold": 0,
@@ -540,8 +312,6 @@ def test_sharding_compaction(
for shard in env.storage_controller.locate(tenant_id):
pageserver = env.get_pageserver(shard["node_id"])
tenant_shard_id = shard["shard_id"]
# Force refresh gc info to have gc_cutoff generated
pageserver.http_client().timeline_gc(tenant_shard_id, timeline_id, None)
pageserver.http_client().timeline_compact(
tenant_shard_id,
timeline_id,

View File

@@ -271,14 +271,8 @@ def test_scrubber_physical_gc_ancestors(neon_env_builder: NeonEnvBuilder, shard_
ps.http_client().timeline_compact(
shard, timeline_id, force_image_layer_creation=True, wait_until_uploaded=True
)
# Add some WAL so that we don't gc at the latest remote consistent lsn
workload.churn_rows(10)
# Now gc the old stuff away
for shard in shards:
ps = env.get_tenant_pageserver(shard)
assert ps is not None
# Add some WAL so that we don't gc at the latest remote consistent lsn
workload.churn_rows(1)
ps.http_client().timeline_gc(shard, timeline_id, 0)
# We will use a min_age_secs=1 threshold for deletion, let it pass

View File

@@ -1,242 +0,0 @@
from __future__ import annotations
import time
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, logical_replication_sync
from fixtures.utils import query_scalar, wait_until
# This test checks that branching of timeline with logical subscriptions
# does not affect logical replication for parent.
# Endpoint on a new branch will drop all existing subscriptions at the start,
# so it will not receive any changes.
# If needed, user can create new subscriptions on the child branch.
def test_subscriber_branching(neon_simple_env: NeonEnv):
env = neon_simple_env
env.create_branch("publisher")
pub = env.endpoints.create("publisher")
pub.respec(
skip_pg_catalog_updates=False,
create_test_user=True,
)
pub.start(create_test_user=True)
env.create_branch("subscriber")
sub = env.endpoints.create("subscriber")
# Pass create_test_user flag to get properly filled spec.users and spec.databases fields.
#
# This test checks the per-database operations that happen at compute start
# and these operations are applied to the databases that are present in the spec.
sub.respec(
skip_pg_catalog_updates=False,
create_test_user=True,
)
sub.start(create_test_user=True)
pub.wait_for_migrations()
sub.wait_for_migrations()
n_records = 1000
def check_that_changes_propagated():
scur.execute("SELECT count(*) FROM t")
res = scur.fetchall()
assert res[0][0] == n_records
def insert_data(pub, start):
with pub.cursor(dbname="neondb", user="test", password="pubtestpwd") as pcur:
for i in range(start, start + n_records):
pcur.execute("INSERT into t values (%s,random()*100000)", (i,))
# create_test_user creates a user without password
# but psycopg2 execute() requires a password
with sub.cursor() as scur:
scur.execute("ALTER USER test WITH PASSWORD 'testpwd'")
with pub.cursor() as pcur:
# Create a test user to avoid using superuser
pcur.execute("ALTER USER test WITH PASSWORD 'pubtestpwd'")
# If we don't do this, creating the subscription will fail
pub.edit_hba(["host all test 0.0.0.0/0 md5"])
with pub.cursor(dbname="neondb", user="test", password="pubtestpwd") as pcur:
pcur.execute("CREATE TABLE t (pk integer primary key, sk integer)")
pcur.execute("CREATE PUBLICATION pub FOR TABLE t")
with sub.cursor(dbname="neondb", user="test", password="testpwd") as scur:
scur.execute("CREATE TABLE t (pk integer primary key, sk integer)")
pub_conn = (
f"host=localhost port={pub.pg_port} dbname=neondb user=test password=pubtestpwd"
)
query = f"CREATE SUBSCRIPTION sub CONNECTION '{pub_conn}' PUBLICATION pub"
scur.execute(query)
time.sleep(2) # let initial table sync complete
insert_data(pub, 0)
with sub.cursor(dbname="neondb", user="test", password="testpwd") as scur:
wait_until(check_that_changes_propagated)
latest_end_lsn = query_scalar(
scur, "select latest_end_lsn from pg_catalog.pg_stat_subscription; "
)
last_insert_lsn = query_scalar(scur, "select pg_current_wal_insert_lsn();")
log.info(f"latest_end_lsn = {latest_end_lsn}")
log.info(f"last_insert_lsn = {last_insert_lsn}")
# stop the parent subscriber so that it doesn't interfere with the test
sub.stop()
# 1. good scenario:
# create subscriber_child_1
# it will not get changes from publisher, because drop_subscriptions_before_start is set to True
sub_child_1_timeline_id = env.create_branch(
"subscriber_child_1",
ancestor_branch_name="subscriber",
ancestor_start_lsn=last_insert_lsn,
)
sub_child_1 = env.endpoints.create("subscriber_child_1")
# Pass drop_subscriptions_before_start flag
sub_child_1.respec(
skip_pg_catalog_updates=False,
create_test_user=True,
drop_subscriptions_before_start=True,
)
sub_child_1.start(create_test_user=True)
# ensure that subscriber_child_1 sees all the data
with sub_child_1.cursor(dbname="neondb", user="test", password="testpwd") as scur:
scur.execute("SELECT count(*) FROM t")
res = scur.fetchall()
assert res[0][0] == n_records
# ensure that there are no subscriptions in this database
scur.execute("SELECT 1 FROM pg_catalog.pg_subscription WHERE subname = 'sub'")
assert len(scur.fetchall()) == 0
# ensure that drop_subscriptions_done happened on this timeline
with sub_child_1.cursor() as scur_postgres:
scur_postgres.execute("SELECT timeline_id from neon.drop_subscriptions_done")
res = scur_postgres.fetchall()
assert len(res) == 1
assert str(sub_child_1_timeline_id) == res[0][0]
old_n_records = n_records
# insert more data on publisher
insert_data(pub, n_records)
n_records += n_records
pcur.execute("SELECT count(*) FROM t")
res = pcur.fetchall()
assert res[0][0] == n_records
# ensure that subscriber_child_1 doesn't see the new data
with sub_child_1.cursor(dbname="neondb", user="test", password="testpwd") as scur:
scur.execute("SELECT count(*) FROM t")
res = scur.fetchall()
assert res[0][0] == old_n_records
# reenable logical replication on subscriber_child_1
# using new publication
# ensure that new publication works as expected
with sub_child_1.cursor(dbname="neondb", user="test", password="testpwd") as scur:
scur.execute("TRUNCATE t")
# create new subscription
# with new pub name
pcur.execute("CREATE PUBLICATION pub_new FOR TABLE t")
query = f"CREATE SUBSCRIPTION sub_new CONNECTION '{pub_conn}' PUBLICATION pub_new"
scur.execute(query)
wait_until(check_that_changes_propagated)
scur.execute("SELECT count(*) FROM t")
res = scur.fetchall()
assert res[0][0] == n_records
# ensure that new publication works as expected after compute restart
# first restart with drop_subscriptions_before_start=True
# to emulate the case when compute restarts within the VM with stale spec
sub_child_1.stop()
sub_child_1.respec(
skip_pg_catalog_updates=False,
create_test_user=True,
drop_subscriptions_before_start=True,
)
sub_child_1.start(create_test_user=True)
with sub_child_1.cursor(dbname="neondb", user="test", password="testpwd") as scur:
# ensure that even though the flag is set, we didn't drop new subscription
scur.execute("SELECT 1 FROM pg_catalog.pg_subscription WHERE subname = 'sub_new'")
assert len(scur.fetchall()) == 1
# ensure that drop_subscriptions_done happened on this timeline
with sub_child_1.cursor() as scur_postgres:
scur_postgres.execute("SELECT timeline_id from neon.drop_subscriptions_done")
res = scur_postgres.fetchall()
assert len(res) == 1
assert str(sub_child_1_timeline_id) == res[0][0]
sub_child_1.stop()
sub_child_1.respec(
skip_pg_catalog_updates=False,
create_test_user=True,
drop_subscriptions_before_start=False,
)
sub_child_1.start(create_test_user=True)
# insert more data on publisher
insert_data(pub, n_records)
n_records += n_records
with sub_child_1.cursor(dbname="neondb", user="test", password="testpwd") as scur:
# ensure that there is a subscriptions in this database
scur.execute("SELECT 1 FROM pg_catalog.pg_subscription WHERE subname = 'sub_new'")
assert len(scur.fetchall()) == 1
wait_until(check_that_changes_propagated)
scur.execute("SELECT count(*) FROM t")
res = scur.fetchall()
assert res[0][0] == n_records
# ensure that drop_subscriptions_done happened on this timeline
with sub_child_1.cursor() as scur_postgres:
scur_postgres.execute("SELECT timeline_id from neon.drop_subscriptions_done")
res = scur_postgres.fetchall()
assert len(res) == 1
assert str(sub_child_1_timeline_id) == res[0][0]
# wake the sub and ensure that it catches up with the new data
sub.start(create_test_user=True)
with sub.cursor(dbname="neondb", user="test", password="testpwd") as scur:
logical_replication_sync(sub, pub)
wait_until(check_that_changes_propagated)
scur.execute("SELECT count(*) FROM t")
res = scur.fetchall()
assert res[0][0] == n_records
# test that we can create a branch of a branch
sub_child_2_timeline_id = env.create_branch(
"subscriber_child_2",
ancestor_branch_name="subscriber_child_1",
)
sub_child_2 = env.endpoints.create("subscriber_child_2")
# Pass drop_subscriptions_before_start flag
sub_child_2.respec(
skip_pg_catalog_updates=False,
drop_subscriptions_before_start=True,
)
sub_child_2.start(create_test_user=True)
# ensure that subscriber_child_2 does not inherit subscription from child_1
with sub_child_2.cursor(dbname="neondb", user="test", password="testpwd") as scur:
# ensure that there are no subscriptions in this database
scur.execute("SELECT count(*) FROM pg_catalog.pg_subscription")
res = scur.fetchall()
assert res[0][0] == 0
# ensure that drop_subscriptions_done happened on this timeline
with sub_child_2.cursor() as scur_postgres:
scur_postgres.execute("SELECT timeline_id from neon.drop_subscriptions_done")
res = scur_postgres.fetchall()
assert len(res) == 1
assert str(sub_child_2_timeline_id) == res[0][0]

View File

@@ -607,7 +607,7 @@ def test_timeline_ancestor_detach_idempotent_success(
if shards_after > 1:
# FIXME: should this be in the neon_env_builder.init_start?
env.storage_controller.reconcile_until_idle(timeout_secs=120)
env.storage_controller.reconcile_until_idle()
client = env.storage_controller.pageserver_api()
else:
client = env.pageserver.http_client()
@@ -636,7 +636,7 @@ def test_timeline_ancestor_detach_idempotent_success(
# Do a shard split
# This is a reproducer for https://github.com/neondatabase/neon/issues/9667
env.storage_controller.tenant_shard_split(env.initial_tenant, shards_after)
env.storage_controller.reconcile_until_idle(timeout_secs=120)
env.storage_controller.reconcile_until_idle()
first_reparenting_response = client.detach_ancestor(env.initial_tenant, first_branch)
assert set(first_reparenting_response) == {reparented1, reparented2}

View File

@@ -1,18 +1,18 @@
{
"v17": [
"17.2",
"4276717f6e91023e504de355f4f21d4824074de8"
"a8dd6e779dde907778006adb436b557ad652fb97"
],
"v16": [
"16.6",
"061d56377961ba56998e41b7d5d5e975919ad301"
"d674efd776f59d78e8fa1535bd2f95c3e6984fca"
],
"v15": [
"15.10",
"935292e883298187f112db6e9c7f765037ddcf64"
"dd0b28d6fbad39e227f3b77296fcca879af8b3a9"
],
"v14": [
"14.15",
"5f3b3afdd7c24b4a0fd63ecb3288fab472fcc633"
"46082f20884f087a2d974b33ac65d63af26142bd"
]
}