This commit is contained in:
Christian Schwarz
2025-05-02 13:45:59 +02:00
parent 1b789e8d7c
commit f2392e11e8
5 changed files with 81 additions and 7 deletions

68
Cargo.lock generated
View File

@@ -2491,6 +2491,18 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "getrandom"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasi 0.14.2+wasi-0.2.4",
]
[[package]]
name = "gettid"
version = "0.1.3"
@@ -5259,6 +5271,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "r-efi"
version = "5.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
[[package]]
name = "rand"
version = "0.7.3"
@@ -5283,6 +5301,16 @@ dependencies = [
"rand_core 0.6.4",
]
[[package]]
name = "rand"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
dependencies = [
"rand_chacha 0.9.0",
"rand_core 0.9.3",
]
[[package]]
name = "rand_chacha"
version = "0.2.2"
@@ -5303,6 +5331,16 @@ dependencies = [
"rand_core 0.6.4",
]
[[package]]
name = "rand_chacha"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
"rand_core 0.9.3",
]
[[package]]
name = "rand_core"
version = "0.5.1"
@@ -5321,6 +5359,15 @@ dependencies = [
"getrandom 0.2.11",
]
[[package]]
name = "rand_core"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
dependencies = [
"getrandom 0.3.2",
]
[[package]]
name = "rand_distr"
version = "0.4.3"
@@ -7197,11 +7244,11 @@ dependencies = [
[[package]]
name = "tokio-epoll-uring"
version = "0.1.0"
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#781989bb540a1408b0b93daa1e9d1fa452195497"
dependencies = [
"futures",
"nix 0.26.4",
"once_cell",
"rand 0.9.1",
"scopeguard",
"thiserror 1.0.69",
"tokio",
@@ -7808,7 +7855,6 @@ dependencies = [
[[package]]
name = "uring-common"
version = "0.1.0"
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#781989bb540a1408b0b93daa1e9d1fa452195497"
dependencies = [
"bytes",
"io-uring",
@@ -8050,6 +8096,15 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasi"
version = "0.14.2+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
dependencies = [
"wit-bindgen-rt",
]
[[package]]
name = "wasite"
version = "0.1.0"
@@ -8407,6 +8462,15 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "wit-bindgen-rt"
version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
"bitflags 2.8.0",
]
[[package]]
name = "workspace_hack"
version = "0.1.0"

View File

@@ -187,7 +187,8 @@ thiserror = "1.0"
tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
tokio = { version = "1.43.1", features = ["macros"] }
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
#tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
tokio-epoll-uring = { path = "../tokio-epoll-uring/tokio-epoll-uring" }
tokio-io-timeout = "1.2.0"
tokio-postgres-rustls = "0.12.0"
tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}

View File

@@ -46,6 +46,7 @@ pub async fn signal_handler(token: tokio_util::sync::CancellationToken) {
let mut sigint = signal(SignalKind::interrupt()).unwrap();
let mut sigterm = signal(SignalKind::terminate()).unwrap();
let mut sigquit = signal(SignalKind::quit()).unwrap();
let mut sigusr1 = signal(SignalKind::user_defined1()).unwrap();
loop {
let signal = tokio::select! {
@@ -55,13 +56,17 @@ pub async fn signal_handler(token: tokio_util::sync::CancellationToken) {
}
_ = sigint.recv() => "SIGINT",
_ = sigterm.recv() => "SIGTERM",
_ = sigusr1.recv() => {
info!("Got signal SIGUSR1");
continue;
}
};
if !token.is_cancelled() {
info!("Got signal {signal}. Terminating gracefully in fast shutdown mode.");
info!(thread_id=?std::thread::current().id(), "Got signal {signal}. Terminating gracefully in fast shutdown mode.");
token.cancel();
} else {
info!("Got signal {signal}. Already shutting down.");
info!(thread_id=?std::thread::current().id(), "Got signal {signal}. Already shutting down.");
}
}
}

View File

@@ -76,12 +76,13 @@ pub async fn thread_local_system() -> Handle {
)
.await;
let per_system_metrics = metrics::THREAD_LOCAL_METRICS_STORAGE.register_system(inner.thread_local_state_id);
let res = System::launch_with_metrics(per_system_metrics)
info!(thread_id=?std::thread::current().id(), thread_name=?std::thread::current().name(), "launching system");
let res = System::launch_with_metrics(per_system_metrics, usize::try_from(inner.thread_local_state_id).unwrap())
// this might move us to another executor thread => loop outside the get_or_try_init, not inside it
.await;
match res {
Ok(system) => {
info!("successfully launched system");
info!(thread_id=?std::thread::current().id(), thread_name=?std::thread::current().name(), "successfully launched system");
metrics::THREAD_LOCAL_LAUNCH_SUCCESSES.inc();
Ok(system)
}

View File

@@ -2,6 +2,8 @@ from __future__ import annotations
from typing import TYPE_CHECKING
import pytest
if TYPE_CHECKING:
from fixtures.neon_fixtures import NeonEnvBuilder
@@ -10,6 +12,7 @@ if TYPE_CHECKING:
# while initial compute node is down and pageserver is lagging behind safekeepers.
# Ensure that basebackup after restart of all components is correct
# and new compute node contains all data.
@pytest.mark.repeat(1000)
def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()