diff --git a/Cargo.lock b/Cargo.lock index 4c464c62b8..cbc54e84f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2491,6 +2491,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", +] + [[package]] name = "gettid" version = "0.1.3" @@ -5259,6 +5271,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "rand" version = "0.7.3" @@ -5283,6 +5301,16 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rand" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.3", +] + [[package]] name = "rand_chacha" version = "0.2.2" @@ -5303,6 +5331,16 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.3", +] + [[package]] name = "rand_core" version = "0.5.1" @@ -5321,6 +5359,15 @@ dependencies = [ "getrandom 0.2.11", ] +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.2", +] + [[package]] name = "rand_distr" version = "0.4.3" @@ -7197,11 +7244,11 @@ dependencies = [ [[package]] name = "tokio-epoll-uring" version = "0.1.0" -source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#781989bb540a1408b0b93daa1e9d1fa452195497" dependencies = [ "futures", "nix 0.26.4", "once_cell", + "rand 0.9.1", "scopeguard", "thiserror 1.0.69", "tokio", @@ -7808,7 +7855,6 @@ dependencies = [ [[package]] name = "uring-common" version = "0.1.0" -source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#781989bb540a1408b0b93daa1e9d1fa452195497" dependencies = [ "bytes", "io-uring", @@ -8050,6 +8096,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasite" version = "0.1.0" @@ -8407,6 +8462,15 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags 2.8.0", +] + [[package]] name = "workspace_hack" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 1c203af9e0..68e6c04448 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -187,7 +187,8 @@ thiserror = "1.0" tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] } tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] } tokio = { version = "1.43.1", features = ["macros"] } -tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" } +#tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" } +tokio-epoll-uring = { path = "../tokio-epoll-uring/tokio-epoll-uring" } tokio-io-timeout = "1.2.0" tokio-postgres-rustls = "0.12.0" tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]} diff --git a/libs/utils/src/signals.rs b/libs/utils/src/signals.rs index 426bb65916..1c1e097127 100644 --- a/libs/utils/src/signals.rs +++ b/libs/utils/src/signals.rs @@ -46,6 +46,7 @@ pub async fn signal_handler(token: tokio_util::sync::CancellationToken) { let mut sigint = signal(SignalKind::interrupt()).unwrap(); let mut sigterm = signal(SignalKind::terminate()).unwrap(); let mut sigquit = signal(SignalKind::quit()).unwrap(); + let mut sigusr1 = signal(SignalKind::user_defined1()).unwrap(); loop { let signal = tokio::select! { @@ -55,13 +56,17 @@ pub async fn signal_handler(token: tokio_util::sync::CancellationToken) { } _ = sigint.recv() => "SIGINT", _ = sigterm.recv() => "SIGTERM", + _ = sigusr1.recv() => { + info!("Got signal SIGUSR1"); + continue; + } }; if !token.is_cancelled() { - info!("Got signal {signal}. Terminating gracefully in fast shutdown mode."); + info!(thread_id=?std::thread::current().id(), "Got signal {signal}. Terminating gracefully in fast shutdown mode."); token.cancel(); } else { - info!("Got signal {signal}. Already shutting down."); + info!(thread_id=?std::thread::current().id(), "Got signal {signal}. Already shutting down."); } } } diff --git a/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs b/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs index ad17405b64..169f97e099 100644 --- a/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs +++ b/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs @@ -76,12 +76,13 @@ pub async fn thread_local_system() -> Handle { ) .await; let per_system_metrics = metrics::THREAD_LOCAL_METRICS_STORAGE.register_system(inner.thread_local_state_id); - let res = System::launch_with_metrics(per_system_metrics) + info!(thread_id=?std::thread::current().id(), thread_name=?std::thread::current().name(), "launching system"); + let res = System::launch_with_metrics(per_system_metrics, usize::try_from(inner.thread_local_state_id).unwrap()) // this might move us to another executor thread => loop outside the get_or_try_init, not inside it .await; match res { Ok(system) => { - info!("successfully launched system"); + info!(thread_id=?std::thread::current().id(), thread_name=?std::thread::current().name(), "successfully launched system"); metrics::THREAD_LOCAL_LAUNCH_SUCCESSES.inc(); Ok(system) } diff --git a/test_runner/regress/test_pageserver_catchup.py b/test_runner/regress/test_pageserver_catchup.py index f3d7cd5bdb..a05315c912 100644 --- a/test_runner/regress/test_pageserver_catchup.py +++ b/test_runner/regress/test_pageserver_catchup.py @@ -2,6 +2,8 @@ from __future__ import annotations from typing import TYPE_CHECKING +import pytest + if TYPE_CHECKING: from fixtures.neon_fixtures import NeonEnvBuilder @@ -10,6 +12,7 @@ if TYPE_CHECKING: # while initial compute node is down and pageserver is lagging behind safekeepers. # Ensure that basebackup after restart of all components is correct # and new compute node contains all data. +@pytest.mark.repeat(1000) def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start()