diff --git a/Cargo.lock b/Cargo.lock index 5e7fce3e8d..373a30cca2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -77,6 +77,17 @@ dependencies = [ "opaque-debug", ] +[[package]] +name = "ahash" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a824f2aa7e75a0c98c5a504fceb80649e9c35265d44525b5f94de4771a395cd" +dependencies = [ + "getrandom 0.2.9", + "once_cell", + "version_check", +] + [[package]] name = "ahash" version = "0.8.3" @@ -185,6 +196,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "asn1-rs" version = "0.5.2" @@ -386,6 +403,17 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -474,7 +502,7 @@ dependencies = [ "http", "percent-encoding", "tracing", - "uuid", + "uuid 1.3.3", ] [[package]] @@ -848,7 +876,7 @@ dependencies = [ "log", "paste", "pin-project", - "quick-xml", + "quick-xml 0.30.0", "rand 0.8.5", "reqwest", "rustc_version 0.4.0", @@ -856,7 +884,7 @@ dependencies = [ "serde_json", "time 0.3.21", "url", - "uuid", + "uuid 1.3.3", ] [[package]] @@ -877,7 +905,7 @@ dependencies = [ "time 0.3.21", "tz-rs", "url", - "uuid", + "uuid 1.3.3", ] [[package]] @@ -899,7 +927,7 @@ dependencies = [ "sha2 0.10.6", "time 0.3.21", "url", - "uuid", + "uuid 1.3.3", ] [[package]] @@ -919,7 +947,7 @@ dependencies = [ "serde_json", "time 0.3.21", "url", - "uuid", + "uuid 1.3.3", ] [[package]] @@ -1067,6 +1095,12 @@ version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +[[package]] +name = "bytemuck" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" + [[package]] name = "byteorder" version = "1.4.3" @@ -1452,6 +1486,15 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +[[package]] +name = "cpp_demangle" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eeaa953eaad386a53111e47172c2fedba671e5684c8dd601a5f474f4f118710f" +dependencies = [ + "cfg-if", +] + [[package]] name = "cpufeatures" version = "0.2.9" @@ -1678,6 +1721,15 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308" +[[package]] +name = "debugid" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6ee87af31d84ef885378aebca32be3d682b0e0dc119d5b4860a2c5bb5046730" +dependencies = [ + "uuid 0.8.2", +] + [[package]] name = "debugid" version = "0.8.0" @@ -1685,7 +1737,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" dependencies = [ "serde", - "uuid", + "uuid 1.3.3", ] [[package]] @@ -1885,6 +1937,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "findshlibs" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64" +dependencies = [ + "cc", + "lazy_static", + "libc", + "winapi", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -2196,7 +2260,7 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" dependencies = [ - "ahash", + "ahash 0.8.3", ] [[package]] @@ -2232,6 +2296,15 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.3.3" @@ -2508,6 +2581,24 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" +[[package]] +name = "inferno" +version = "0.10.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3886428c6400486522cf44b8626e7b94ad794c14390290f2a274dcf728a58f" +dependencies = [ + "ahash 0.7.7", + "atty", + "indexmap", + "itoa", + "lazy_static", + "log", + "num-format", + "quick-xml 0.22.0", + "rgb", + "str_stack", +] + [[package]] name = "inotify" version = "0.9.6" @@ -2559,7 +2650,7 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.3", "libc", "windows-sys 0.48.0", ] @@ -2576,7 +2667,7 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.3", "io-lifetimes", "rustix 0.37.25", "windows-sys 0.48.0", @@ -2759,6 +2850,24 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memmap2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +dependencies = [ + "libc", +] + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + [[package]] name = "memoffset" version = "0.7.1" @@ -2864,6 +2973,19 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nix" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f3790c00a0150112de0f4cd161e3d7fc4b2d8a5542ffc35f099a2562aecb35c" +dependencies = [ + "bitflags", + "cc", + "cfg-if", + "libc", + "memoffset 0.6.5", +] + [[package]] name = "nix" version = "0.25.1" @@ -2938,6 +3060,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-format" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" +dependencies = [ + "arrayvec", + "itoa", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -2963,7 +3095,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.3", "libc", ] @@ -3256,6 +3388,7 @@ dependencies = [ "postgres_backend", "postgres_connection", "postgres_ffi", + "pprof", "pq_proto", "rand 0.8.5", "regex", @@ -3663,6 +3796,25 @@ dependencies = [ "workspace_hack", ] +[[package]] +name = "pprof" +version = "0.6.1" +source = "git+https://github.com/neondatabase/pprof-rs.git?branch=wallclock-profiling#4e011a87d22fb4d21d15cc38bce81ff1c75e4bc9" +dependencies = [ + "backtrace", + "cfg-if", + "findshlibs", + "inferno", + "lazy_static", + "libc", + "log", + "nix 0.23.2", + "parking_lot 0.11.2", + "symbolic-demangle", + "tempfile", + "thiserror", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -3869,12 +4021,21 @@ dependencies = [ "tracing-utils", "url", "utils", - "uuid", + "uuid 1.3.3", "webpki-roots 0.25.2", "workspace_hack", "x509-parser", ] +[[package]] +name = "quick-xml" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8533f14c8382aaad0d592c812ac3b826162128b65662331e1127b45c3d18536b" +dependencies = [ + "memchr", +] + [[package]] name = "quick-xml" version = "0.30.0" @@ -4205,6 +4366,15 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "rgb" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8" +dependencies = [ + "bytemuck", +] + [[package]] name = "ring" version = "0.16.20" @@ -4667,7 +4837,7 @@ version = "0.31.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99dc599bd6646884fc403d593cdcb9816dd67c50cff3271c01ff123617908dcd" dependencies = [ - "debugid", + "debugid 0.8.0", "getrandom 0.2.9", "hex", "serde", @@ -4675,7 +4845,7 @@ dependencies = [ "thiserror", "time 0.3.21", "url", - "uuid", + "uuid 1.3.3", ] [[package]] @@ -5050,6 +5220,12 @@ dependencies = [ "workspace_hack", ] +[[package]] +name = "str_stack" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" + [[package]] name = "stringprep" version = "0.1.2" @@ -5097,6 +5273,29 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fb1df15f412ee2e9dfc1c504260fa695c1c3f10fe9f4a6ee2d2184d7d6450e2" +[[package]] +name = "symbolic-common" +version = "8.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f551f902d5642e58039aee6a9021a61037926af96e071816361644983966f540" +dependencies = [ + "debugid 0.7.3", + "memmap2", + "stable_deref_trait", + "uuid 0.8.2", +] + +[[package]] +name = "symbolic-demangle" +version = "8.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4564ca7b4e6eb14105aa8bbbce26e080f6b5d9c4373e67167ab31f7b86443750" +dependencies = [ + "cpp_demangle", + "rustc-demangle", + "symbolic-common", +] + [[package]] name = "syn" version = "1.0.109" @@ -5998,10 +6197,16 @@ dependencies = [ "tracing-error", "tracing-subscriber", "url", - "uuid", + "uuid 1.3.3", "workspace_hack", ] +[[package]] +name = "uuid" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" + [[package]] name = "uuid" version = "1.3.3" @@ -6527,7 +6732,7 @@ dependencies = [ "tracing-core", "tungstenite", "url", - "uuid", + "uuid 1.3.3", ] [[package]] diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 3eb01003df..ab6783f918 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -9,6 +9,7 @@ default = [] # Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro, # which adds some runtime cost to run tests on outage conditions testing = ["fail/failpoints"] +profiling = ["pprof"] [dependencies] anyhow.workspace = true @@ -82,6 +83,7 @@ enum-map.workspace = true enumset.workspace = true strum.workspace = true strum_macros.workspace = true +pprof = { git = "https://github.com/neondatabase/pprof-rs.git", branch = "wallclock-profiling", features = ["flamegraph"], optional = true } [dev-dependencies] criterion.workspace = true diff --git a/pageserver/flamegraph.svg b/pageserver/flamegraph.svg new file mode 100644 index 0000000000..2fc62747c2 --- /dev/null +++ b/pageserver/flamegraph.svg @@ -0,0 +1,414 @@ +Flame Graph Reset ZoomSearch core::cmp::impls::<impl core::cmp::PartialEq<&B> for &A>::eq (15 samples, 1.94%)core..alloc::vec::partial_eq::<impl core::cmp::PartialEq<alloc::vec::Vec<U,A2>> for alloc::vec::Vec<T,A1>>::eq (9 samples, 1.16%)al..core::slice::cmp::<impl core::cmp::PartialEq<[B]> for [A]>::eq (9 samples, 1.16%)co..<[A] as core::slice::cmp::SlicePartialEq<B>>::equal (9 samples, 1.16%)<[..core::ptr::drop_in_place<alloc::vec::Vec<u8>> (6 samples, 0.78%)core::ptr::drop_in_place<alloc::raw_vec::RawVec<u8>> (6 samples, 0.78%)<alloc::raw_vec::RawVec<T,A> as core::ops::drop::Drop>::drop (6 samples, 0.78%)<alloc::alloc::Global as core::alloc::Allocator>::deallocate (6 samples, 0.78%)alloc::alloc::dealloc (6 samples, 0.78%)__libc_free (6 samples, 0.78%)core::ptr::drop_in_place<pageserver::tenant::blob_io::<impl pageserver::tenant::block_io::BlockCursor>::read_blob_into_buf::{{closure}}> (1 samples, 0.13%)alloc::vec::Vec<T,A>::extend_from_slice (7 samples, 0.91%)a..<alloc::vec::Vec<T,A> as alloc::vec::spec_extend::SpecExtend<&T,core::slice::iter::Iter<T>>>::spec_extend (7 samples, 0.91%)<..alloc::vec::Vec<T,A>::append_elements (7 samples, 0.91%)a..core::intrinsics::copy_nonoverlapping (7 samples, 0.91%)c..alloc::raw_vec::finish_grow (13 samples, 1.68%)all..__libc_malloc (9 samples, 1.16%)__..alloc::vec::Vec<T,A>::reserve (14 samples, 1.81%)allo..alloc::raw_vec::RawVec<T,A>::reserve (14 samples, 1.81%)allo..alloc::raw_vec::RawVec<T,A>::reserve::do_reserve_and_handle (14 samples, 1.81%)allo..alloc::raw_vec::RawVec<T,A>::grow_amortized (14 samples, 1.81%)allo..core::alloc::layout::Layout::array (1 samples, 0.13%)core::alloc::layout::Layout::array::inner (1 samples, 0.13%)core::mem::drop (6 samples, 0.78%)core::ptr::drop_in_place<alloc::sync::Weak<pageserver::page_cache::PinnedSlotsPermit,&alloc::alloc::Global>> (6 samples, 0.78%)<alloc::sync::Weak<T,A> as core::ops::drop::Drop>::drop (6 samples, 0.78%)core::sync::atomic::AtomicUsize::fetch_sub (6 samples, 0.78%)core::sync::atomic::atomic_sub (6 samples, 0.78%)<tokio::sync::semaphore::OwnedSemaphorePermit as core::ops::drop::Drop>::drop (14 samples, 1.81%)<tok..core::ptr::drop_in_place<alloc::sync::Arc<tokio::sync::semaphore::Semaphore>> (8 samples, 1.03%)c..<alloc::sync::Arc<T,A> as core::ops::drop::Drop>::drop (8 samples, 1.03%)<..core::sync::atomic::AtomicUsize::fetch_sub (8 samples, 1.03%)c..core::sync::atomic::atomic_sub (8 samples, 1.03%)c..core::ptr::drop_in_place<alloc::sync::Arc<pageserver::page_cache::PinnedSlotsPermit>> (40 samples, 5.17%)core::ptr::drop_..<alloc::sync::Arc<T,A> as core::ops::drop::Drop>::drop (40 samples, 5.17%)<alloc::sync::Ar..alloc::sync::Arc<T,A>::drop_slow (40 samples, 5.17%)alloc::sync::Arc..core::ptr::drop_in_place<pageserver::page_cache::PinnedSlotsPermit> (34 samples, 4.40%)core::ptr::dr..core::ptr::drop_in_place<tokio::sync::semaphore::OwnedSemaphorePermit> (34 samples, 4.40%)core::ptr::dr..tokio::sync::batch_semaphore::Semaphore::add_permits_locked (11 samples, 1.42%)tok..tokio::sync::batch_semaphore::Semaphore::add_permits_locked (7 samples, 0.91%)t..core::ptr::drop_in_place<pageserver::tenant::block_io::BlockLease> (54 samples, 6.99%)core::ptr::drop_in_pla..core::ptr::drop_in_place<pageserver::page_cache::PageReadGuard> (54 samples, 6.99%)core::ptr::drop_in_pla..core::ptr::drop_in_place<tokio::sync::rwlock::read_guard::RwLockReadGuard<pageserver::page_cache::SlotInner>> (14 samples, 1.81%)core..<tokio::sync::rwlock::read_guard::RwLockReadGuard<T> as core::ops::drop::Drop>::drop (14 samples, 1.81%)<tok..tokio::sync::batch_semaphore::Semaphore::release (7 samples, 0.91%)t..core::ptr::drop_in_place<pageserver::tenant::block_io::BlockReaderRef::read_blk::{{closure}}> (2 samples, 0.26%)<core::result::Result<T,E> as core::ops::try_trait::Try>::branch (19 samples, 2.46%)<core:..core::ptr::drop_in_place<pageserver::page_cache::PageCache::lock_for_read::{{closure}}> (2 samples, 0.26%)<once_cell::sync::Lazy<T,F> as core::ops::deref::Deref>::deref (1 samples, 0.13%)once_cell::sync::Lazy<T,F>::force (1 samples, 0.13%)once_cell::sync::OnceCell<T>::get_or_init (1 samples, 0.13%)once_cell::sync::OnceCell<T>::get_or_try_init (1 samples, 0.13%)once_cell::imp::OnceCell<T>::initialize (1 samples, 0.13%)once_cell::imp::initialize_or_wait (1 samples, 0.13%)once_cell::imp::OnceCell<T>::initialize::{{closure}} (1 samples, 0.13%)once_cell::sync::OnceCell<T>::get_or_init::{{closure}} (1 samples, 0.13%)once_cell::sync::Lazy<T,F>::force::{{closure}} (1 samples, 0.13%)core::ops::function::FnOnce::call_once (1 samples, 0.13%)core::ops::function::FnOnce::call_once (1 samples, 0.13%)pageserver::metrics::PAGE_CACHE::{{closure}} (1 samples, 0.13%)core::array::from_fn (1 samples, 0.13%)core::array::try_from_fn (1 samples, 0.13%)core::array::try_from_fn_erased (1 samples, 0.13%)core::ops::try_trait::NeverShortCircuit<T>::wrap_mut_1::{{closure}} (1 samples, 0.13%)pageserver::metrics::PAGE_CACHE::{{closure}}::{{closure}} (1 samples, 0.13%)core::array::from_fn (1 samples, 0.13%)core::array::try_from_fn (1 samples, 0.13%)core::array::try_from_fn_erased (1 samples, 0.13%)core::ops::try_trait::NeverShortCircuit<T>::wrap_mut_1::{{closure}} (1 samples, 0.13%)pageserver::metrics::PAGE_CACHE::{{closure}}::{{closure}}::{{closure}} (1 samples, 0.13%)prometheus::vec::MetricVec<T>::get_metric_with_label_values (1 samples, 0.13%)prometheus::vec::MetricVecCore<T>::get_metric_with_label_values (1 samples, 0.13%)prometheus::vec::MetricVecCore<T>::hash_label_values (1 samples, 0.13%)<fnv::FnvHasher as core::hash::Hasher>::write (1 samples, 0.13%)<F as core::future::into_future::IntoFuture>::into_future (17 samples, 2.20%)<F as..<tokio::time::timeout::Timeout<T> as core::future::future::Future>::poll::{{closure}} (5 samples, 0.65%)<tokio::time::sleep::Sleep as core::future::future::Future>::poll (5 samples, 0.65%)tokio::util::wake::wake_by_ref_arc_raw (5 samples, 0.65%)mio::sys::unix::waker::eventfd::Waker::wake (5 samples, 0.65%)<&std::fs::File as std::io::Write>::write (5 samples, 0.65%)std::sys::unix::fs::File::write (5 samples, 0.65%)std::sys::unix::fd::FileDesc::write (5 samples, 0.65%)__write (5 samples, 0.65%)tokio::runtime::coop::has_budget_remaining (20 samples, 2.59%)tokio::..tokio::runtime::context::budget (20 samples, 2.59%)tokio::..std::thread::local::LocalKey<T>::try_with (20 samples, 2.59%)std::th..tokio::runtime::context::budget::{{closure}} (20 samples, 2.59%)tokio::..tokio::runtime::coop::has_budget_remaining::{{closure}} (20 samples, 2.59%)tokio::..tokio::runtime::coop::Budget::has_remaining (1 samples, 0.13%)<tokio::sync::batch_semaphore::Acquire as core::future::future::Future>::poll (17 samples, 2.20%)<toki..tokio::util::wake::wake_by_ref_arc_raw (8 samples, 1.03%)t..mio::sys::unix::waker::eventfd::Waker::wake (7 samples, 0.91%)m..<&std::fs::File as std::io::Write>::write (7 samples, 0.91%)<..std::sys::unix::fs::File::write (7 samples, 0.91%)s..std::sys::unix::fd::FileDesc::write (7 samples, 0.91%)s..__write (7 samples, 0.91%)_..<tokio::time::timeout::Timeout<T> as core::future::future::Future>::poll (63 samples, 8.15%)<tokio::time::timeout::Tim..tokio::sync::semaphore::Semaphore::acquire_owned::{{closure}} (27 samples, 3.49%)tokio::syn..tokio::sync::batch_semaphore::Semaphore::acquire (10 samples, 1.29%)to..<prometheus::histogram::HistogramTimer as core::ops::drop::Drop>::drop (1 samples, 0.13%)<std::sys::unix::time::Timespec as core::convert::From<libc::unix::timespec>>::from (2 samples, 0.26%)std::sys::unix::time::Timespec::new (2 samples, 0.26%)clock_getres (1 samples, 0.13%)std::sys::unix::time::inner::<impl std::sys::unix::time::Timespec>::now (55 samples, 7.12%)std::sys::unix::time::i..clock_gettime (49 samples, 6.34%)clock_gettimecore::result::Result<T,E>::ok (2 samples, 0.26%)core::ptr::drop_in_place<prometheus::histogram::HistogramTimer> (87 samples, 11.25%)core::ptr::drop_in_place<prometheus::..prometheus::histogram::HistogramTimer::observe (86 samples, 11.13%)prometheus::histogram::HistogramTimer..std::time::Instant::duration_since (7 samples, 0.91%)s..std::time::Instant::checked_duration_since (7 samples, 0.91%)s..std::sys::unix::time::inner::Instant::checked_sub_instant (7 samples, 0.91%)s..std::sys::unix::time::Timespec::sub_timespec (5 samples, 0.65%)core::ptr::drop_in_place<tokio::sync::semaphore::Semaphore::acquire_owned::{{closure}}> (2 samples, 0.26%)<tokio::runtime::time::entry::TimerEntry as core::ops::drop::Drop>::drop (1 samples, 0.13%)core::ptr::drop_in_place<tokio::runtime::scheduler::Handle> (6 samples, 0.78%)core::ptr::drop_in_place<alloc::sync::Arc<tokio::runtime::scheduler::current_thread::Handle>> (6 samples, 0.78%)<alloc::sync::Arc<T,A> as core::ops::drop::Drop>::drop (6 samples, 0.78%)core::sync::atomic::AtomicUsize::fetch_sub (3 samples, 0.39%)core::sync::atomic::atomic_sub (3 samples, 0.39%)core::ptr::drop_in_place<tokio::time::timeout::Timeout<tokio::sync::semaphore::Semaphore::acquire_owned::{{closure}}>> (21 samples, 2.72%)core::p..core::ptr::drop_in_place<tokio::time::sleep::Sleep> (19 samples, 2.46%)core::..core::ptr::drop_in_place<tokio::runtime::time::entry::TimerEntry> (19 samples, 2.46%)core::..tokio::runtime::time::<impl tokio::runtime::time::handle::Handle>::clear_entry (11 samples, 1.42%)tok..prometheus::histogram::Histogram::start_timer (43 samples, 5.56%)prometheus::histo..std::sys::unix::time::inner::<impl std::sys::unix::time::Timespec>::now (42 samples, 5.43%)std::sys::unix::t..clock_gettime (40 samples, 5.17%)clock_gettime<std::sys::unix::time::Timespec as core::convert::From<libc::unix::timespec>>::from (1 samples, 0.13%)std::sys::unix::time::Timespec::new (1 samples, 0.13%)std::sys::unix::time::inner::<impl std::sys::unix::time::Timespec>::now (45 samples, 5.82%)std::sys::unix::ti..clock_gettime (44 samples, 5.69%)clock_gettimetokio::time::instant::Instant::checked_add (1 samples, 0.13%)std::time::SystemTime::checked_add (1 samples, 0.13%)std::sys::unix::time::SystemTime::checked_add_duration (1 samples, 0.13%)std::sys::unix::time::Timespec::checked_add_duration (1 samples, 0.13%)tokio::time::sleep::Sleep::new_timeout (20 samples, 2.59%)tokio::..pageserver::page_cache::PageCache::try_get_pinned_slot_permit::{{closure}} (361 samples, 46.70%)pageserver::page_cache::PageCache::try_get_pinned_slot_permit::{{closure}}tokio::time::timeout::timeout (90 samples, 11.64%)tokio::time::timeout::timeouttokio::util::trace::caller_location (1 samples, 0.13%)pageserver::page_cache::PageCache::try_insert_mapping (1 samples, 0.13%)std::collections::hash::map::HashMap<K,V,S>::entry (1 samples, 0.13%)hashbrown::rustc_entry::<impl hashbrown::map::HashMap<K,V,S,A>>::rustc_entry (1 samples, 0.13%)hashbrown::raw::RawTable<T,A>::find (1 samples, 0.13%)hashbrown::raw::RawTableInner<A>::find_inner (1 samples, 0.13%)hashbrown::raw::h2 (1 samples, 0.13%)<std::collections::hash::map::DefaultHasher as core::hash::Hasher>::finish (24 samples, 3.10%)<std::co..<core::hash::sip::SipHasher13 as core::hash::Hasher>::finish (24 samples, 3.10%)<core::h..<core::hash::sip::Hasher<S> as core::hash::Hasher>::finish (24 samples, 3.10%)<core::h..<core::hash::sip::Sip13Rounds as core::hash::sip::Sip>::d_rounds (24 samples, 3.10%)<core::h..core::num::<impl u64>::wrapping_add (13 samples, 1.68%)cor..hashbrown::map::make_hash (28 samples, 3.62%)hashbrown:..core::hash::BuildHasher::hash_one (28 samples, 3.62%)core::hash..core::hash::impls::<impl core::hash::Hash for &T>::hash (4 samples, 0.52%)core::hash::impls::<impl core::hash::Hash for (T,B)>::hash (4 samples, 0.52%)<pageserver::page_cache::FileId as core::hash::Hash>::hash (4 samples, 0.52%)core::hash::impls::<impl core::hash::Hash for u64>::hash (4 samples, 0.52%)core::hash::Hasher::write_u64 (4 samples, 0.52%)<std::collections::hash::map::DefaultHasher as core::hash::Hasher>::write (4 samples, 0.52%)<core::hash::sip::SipHasher13 as core::hash::Hasher>::write (4 samples, 0.52%)<core::hash::sip::Hasher<S> as core::hash::Hasher>::write (4 samples, 0.52%)<core::hash::sip::Sip13Rounds as core::hash::sip::Sip>::c_rounds (3 samples, 0.39%)core::num::<impl u64>::rotate_left (3 samples, 0.39%)std::collections::hash::map::HashMap<K,V,S>::get (42 samples, 5.43%)std::collections:..hashbrown::map::HashMap<K,V,S,A>::get (42 samples, 5.43%)hashbrown::map::H..hashbrown::map::HashMap<K,V,S,A>::get_inner (42 samples, 5.43%)hashbrown::map::H..hashbrown::raw::RawTable<T,A>::get (14 samples, 1.81%)hash..hashbrown::raw::RawTable<T,A>::find (14 samples, 1.81%)hash..hashbrown::raw::RawTableInner<A>::find_inner (14 samples, 1.81%)hash..hashbrown::raw::RawTable<T,A>::find::{{closure}} (14 samples, 1.81%)hash..hashbrown::raw::RawTable<T,A>::bucket (14 samples, 1.81%)hash..hashbrown::raw::Bucket<T>::from_base_index (14 samples, 1.81%)hash..core::ptr::mut_ptr::<impl *mut T>::sub (14 samples, 1.81%)core..core::ptr::mut_ptr::<impl *mut T>::offset (14 samples, 1.81%)core..pageserver::page_cache::PageCache::search_mapping (58 samples, 7.50%)pageserver::page_cache::..std::sync::rwlock::RwLock<T>::read (16 samples, 2.07%)std::..std::sys::unix::locks::futex_rwlock::RwLock::read (16 samples, 2.07%)std::..core::sync::atomic::AtomicU32::compare_exchange_weak (16 samples, 2.07%)core:..core::sync::atomic::atomic_compare_exchange_weak (16 samples, 2.07%)core:..alloc::sync::Arc<T>::new (16 samples, 2.07%)alloc..alloc::boxed::Box<T>::new (16 samples, 2.07%)alloc..alloc::alloc::exchange_malloc (2 samples, 0.26%)<alloc::alloc::Global as core::alloc::Allocator>::allocate (2 samples, 0.26%)alloc::alloc::Global::alloc_impl (2 samples, 0.26%)alloc::alloc::alloc (2 samples, 0.26%)<alloc::alloc::Global as core::alloc::Allocator>::deallocate (10 samples, 1.29%)<a..alloc::alloc::dealloc (10 samples, 1.29%)al..__libc_free (7 samples, 0.91%)_..pageserver::page_cache::SlotInner::coalesce_readers_permit (39 samples, 5.05%)pageserver::pag..core::ptr::drop_in_place<alloc::sync::Weak<pageserver::page_cache::PinnedSlotsPermit>> (23 samples, 2.98%)core::pt..<alloc::sync::Weak<T,A> as core::ops::drop::Drop>::drop (23 samples, 2.98%)<alloc::..core::sync::atomic::AtomicUsize::fetch_sub (13 samples, 1.68%)cor..core::sync::atomic::atomic_sub (13 samples, 1.68%)cor..<tokio::sync::batch_semaphore::Acquire as core::future::future::Future>::poll (17 samples, 2.20%)<toki..tokio::util::wake::wake_by_ref_arc_raw (8 samples, 1.03%)t..mio::sys::unix::waker::eventfd::Waker::wake (8 samples, 1.03%)m..<&std::fs::File as std::io::Write>::write (8 samples, 1.03%)<..std::sys::unix::fs::File::write (8 samples, 1.03%)s..std::sys::unix::fd::FileDesc::write (8 samples, 1.03%)s..__write (8 samples, 1.03%)_..core::ptr::drop_in_place<tokio::sync::batch_semaphore::Acquire> (1 samples, 0.13%)<tokio::sync::batch_semaphore::Acquire as core::ops::drop::Drop>::drop (1 samples, 0.13%)pageserver::page_cache::PageCache::read_immutable_buf::{{closure}} (535 samples, 69.21%)pageserver::page_cache::PageCache::read_immutable_buf::{{closure}}pageserver::page_cache::PageCache::lock_for_read::{{closure}} (533 samples, 68.95%)pageserver::page_cache::PageCache::lock_for_read::{{closure}}pageserver::page_cache::PageCache::try_lock_for_read::{{closure}} (170 samples, 21.99%)pageserver::page_cache::PageCache::try_lock_for_read::{{closure}}tokio::sync::rwlock::RwLock<T>::read::{{closure}} (38 samples, 4.92%)tokio::sync::rw..tokio::sync::rwlock::RwLock<T>::read::{{closure}}::{{closure}} (38 samples, 4.92%)tokio::sync::rw..tokio::sync::batch_semaphore::Semaphore::acquire (20 samples, 2.59%)tokio::..tokio::runtime::scheduler::current_thread::Context::enter (758 samples, 98.06%)tokio::runtime::scheduler::current_thread::Context::entertokio::runtime::scheduler::current_thread::CoreGuard::block_on::{{closure}}::{{closure}} (758 samples, 98.06%)tokio::runtime::scheduler::current_thread::CoreGuard::block_on::{{closure}}::{{closure}}tokio::runtime::coop::budget (758 samples, 98.06%)tokio::runtime::coop::budgettokio::runtime::coop::with_budget (758 samples, 98.06%)tokio::runtime::coop::with_budgettokio::runtime::scheduler::current_thread::CoreGuard::block_on::{{closure}}::{{closure}}::{{closure}} (757 samples, 97.93%)tokio::runtime::scheduler::current_thread::CoreGuard::block_on::{{closure}}::{{closure}}::{{closure}}<core::pin::Pin<P> as core::future::future::Future>::poll (757 samples, 97.93%)<core::pin::Pin<P> as core::future::future::Future>::poll<core::pin::Pin<P> as core::future::future::Future>::poll (757 samples, 97.93%)<core::pin::Pin<P> as core::future::future::Future>::pollpageserver::tenant::blob_io::tests::test_arrays_random_size::{{closure}} (757 samples, 97.93%)pageserver::tenant::blob_io::tests::test_arrays_random_size::{{closure}}pageserver::tenant::blob_io::tests::round_trip_test::{{closure}} (756 samples, 97.80%)pageserver::tenant::blob_io::tests::round_trip_test::{{closure}}pageserver::tenant::blob_io::<impl pageserver::tenant::block_io::BlockCursor>::read_blob::{{closure}} (735 samples, 95.08%)pageserver::tenant::blob_io::<impl pageserver::tenant::block_io::BlockCursor>::read_blob::{{closure}}pageserver::tenant::blob_io::<impl pageserver::tenant::block_io::BlockCursor>::read_blob_into_buf::{{closure}} (716 samples, 92.63%)pageserver::tenant::blob_io::<impl pageserver::tenant::block_io::BlockCursor>::read_blob_into_buf::{{closure}}pageserver::tenant::block_io::BlockCursor::read_blk::{{closure}} (623 samples, 80.60%)pageserver::tenant::block_io::BlockCursor::read_blk::{{closure}}pageserver::tenant::block_io::BlockReaderRef::read_blk::{{closure}} (606 samples, 78.40%)pageserver::tenant::block_io::BlockReaderRef::read_blk::{{closure}}pageserver::tenant::block_io::FileBlockReader::read_blk::{{closure}} (590 samples, 76.33%)pageserver::tenant::block_io::FileBlockReader::read_blk::{{closure}}pageserver::tenant::block_io::FileBlockReader::fill_buffer::{{closure}} (23 samples, 2.98%)pageserv..pageserver::virtual_file::VirtualFile::read_exact_at::{{closure}} (23 samples, 2.98%)pageserv..pageserver::virtual_file::VirtualFile::read_at::{{closure}} (23 samples, 2.98%)pageserv..pageserver::virtual_file::VirtualFile::with_file::{{closure}} (23 samples, 2.98%)pageserv..prometheus::histogram::Histogram::observe_closure_duration (23 samples, 2.98%)promethe..pageserver::virtual_file::VirtualFile::with_file::{{closure}}::{{closure}} (23 samples, 2.98%)pageserv..pageserver::virtual_file::VirtualFile::read_at::{{closure}}::{{closure}} (23 samples, 2.98%)pageserv..<std::fs::File as std::os::unix::fs::FileExt>::read_at (23 samples, 2.98%)<std::fs..std::sys::unix::fs::File::read_at (23 samples, 2.98%)std::sys..std::sys::unix::fd::FileDesc::read_at (23 samples, 2.98%)std::sys..pread (23 samples, 2.98%)preadall (773 samples, 100%)tenant::blob_io (773 samples, 100.00%)tenant::blob_iostd::sys::unix::thread::Thread::new::thread_start (773 samples, 100.00%)std::sys::unix::thread::Thread::new::thread_start<alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once (773 samples, 100.00%)<alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once<alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once (773 samples, 100.00%)<alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_oncecore::ops::function::FnOnce::call_once{{vtable.shim}} (773 samples, 100.00%)core::ops::function::FnOnce::call_once{{vtable.shim}}std::thread::Builder::spawn_unchecked_::{{closure}} (773 samples, 100.00%)std::thread::Builder::spawn_unchecked_::{{closure}}std::panic::catch_unwind (773 samples, 100.00%)std::panic::catch_unwindstd::panicking::try (773 samples, 100.00%)std::panicking::trystd::panicking::try::do_call (773 samples, 100.00%)std::panicking::try::do_call<core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once (773 samples, 100.00%)<core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_oncestd::thread::Builder::spawn_unchecked_::{{closure}}::{{closure}} (773 samples, 100.00%)std::thread::Builder::spawn_unchecked_::{{closure}}::{{closure}}std::sys_common::backtrace::__rust_begin_short_backtrace (773 samples, 100.00%)std::sys_common::backtrace::__rust_begin_short_backtracetest::run_test::{{closure}} (773 samples, 100.00%)test::run_test::{{closure}}test::run_test::{{closure}} (773 samples, 100.00%)test::run_test::{{closure}}test::run_test_in_process (773 samples, 100.00%)test::run_test_in_processstd::panic::catch_unwind (773 samples, 100.00%)std::panic::catch_unwindstd::panicking::try (773 samples, 100.00%)std::panicking::trystd::panicking::try::do_call (773 samples, 100.00%)std::panicking::try::do_call<core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once (773 samples, 100.00%)<core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_oncetest::run_test_in_process::{{closure}} (773 samples, 100.00%)test::run_test_in_process::{{closure}}test::__rust_begin_short_backtrace (773 samples, 100.00%)test::__rust_begin_short_backtracecore::ops::function::FnOnce::call_once (773 samples, 100.00%)core::ops::function::FnOnce::call_oncecore::ops::function::FnOnce::call_once (773 samples, 100.00%)core::ops::function::FnOnce::call_oncepageserver::tenant::blob_io::tests::test_arrays_random_size::{{closure}} (773 samples, 100.00%)pageserver::tenant::blob_io::tests::test_arrays_random_size::{{closure}}pageserver::tenant::blob_io::tests::test_arrays_random_size (773 samples, 100.00%)pageserver::tenant::blob_io::tests::test_arrays_random_sizetokio::runtime::runtime::Runtime::block_on (773 samples, 100.00%)tokio::runtime::runtime::Runtime::block_ontokio::runtime::scheduler::current_thread::CurrentThread::block_on (773 samples, 100.00%)tokio::runtime::scheduler::current_thread::CurrentThread::block_ontokio::runtime::scheduler::current_thread::CoreGuard::block_on (773 samples, 100.00%)tokio::runtime::scheduler::current_thread::CoreGuard::block_ontokio::runtime::scheduler::current_thread::CoreGuard::enter (773 samples, 100.00%)tokio::runtime::scheduler::current_thread::CoreGuard::entertokio::macros::scoped_tls::ScopedKey<T>::set (773 samples, 100.00%)tokio::macros::scoped_tls::ScopedKey<T>::settokio::runtime::scheduler::current_thread::CoreGuard::enter::{{closure}} (773 samples, 100.00%)tokio::runtime::scheduler::current_thread::CoreGuard::enter::{{closure}}tokio::runtime::scheduler::current_thread::CoreGuard::block_on::{{closure}} (773 samples, 100.00%)tokio::runtime::scheduler::current_thread::CoreGuard::block_on::{{closure}}tokio::runtime::scheduler::current_thread::Context::park (14 samples, 1.81%)toki..tokio::runtime::time::Driver::park_internal (13 samples, 1.68%)tok..tokio::runtime::io::Driver::turn (12 samples, 1.55%)tok..mio::poll::Poll::poll (11 samples, 1.42%)mio..epoll_wait (11 samples, 1.42%)epo.. \ No newline at end of file diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 5b0c140d00..68865b1684 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -49,6 +49,8 @@ const PID_FILE_NAME: &str = "pageserver.pid"; const FEATURES: &[&str] = &[ #[cfg(feature = "testing")] "testing", + #[cfg(feature = "profiling")] + "profiling", ]; fn version() -> String { diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index 3f74694ef2..9b8fbf082c 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -24,6 +24,7 @@ pub mod virtual_file; pub mod walingest; pub mod walrecord; pub mod walredo; +pub mod profiling; pub mod failpoint_support; diff --git a/pageserver/src/profiling.rs b/pageserver/src/profiling.rs new file mode 100644 index 0000000000..fd27c994d0 --- /dev/null +++ b/pageserver/src/profiling.rs @@ -0,0 +1,85 @@ +//! +//! Support for profiling +//! +//! This relies on a modified version of the 'pprof-rs' crate. That's not very +//! nice, so to avoid a hard dependency on that, this is an optional feature. +//! + +/// The actual implementation is in the `profiling_impl` submodule. If the profiling +/// feature is not enabled, it's just a dummy implementation that panics if you +/// try to enabled profiling in the configuration. +pub use profiling_impl::*; + +#[cfg(feature = "profiling")] +mod profiling_impl { + use super::*; + use pprof; + use std::marker::PhantomData; + + /// Start profiling the current thread. Returns a guard object; + /// the profiling continues until the guard is dropped. + /// + /// Note: profiling is not re-entrant. If you call 'profpoint_start' while + /// profiling is already started, nothing happens, and the profiling will be + /// stopped when either guard object is dropped. + #[inline] + pub fn profpoint_start() -> Option { + pprof::start_profiling(); + Some(ProfilingGuard(PhantomData)) + } + + /// A hack to remove Send and Sync from the ProfilingGuard. Because the + /// profiling is attached to current thread. + //// + /// See comments in https://github.com/rust-lang/rust/issues/68318 + type PhantomUnsend = std::marker::PhantomData<*mut u8>; + + pub struct ProfilingGuard(PhantomUnsend); + + impl Drop for ProfilingGuard { + fn drop(&mut self) { + pprof::stop_profiling(); + } + } + + /// Initialize the profiler. This must be called before any 'profpoint_start' calls. + pub fn init_profiler<'a>() -> Option> { + Some(pprof::ProfilerGuardBuilder::default().build().unwrap()) + } + + /// Exit the profiler. Writes the flamegraph to current workdir. + pub fn exit_profiler(profiler_guard: &Option) { + // Write out the flamegraph + if let Some(profiler_guard) = profiler_guard { + if let Ok(report) = profiler_guard.report().build() { + // this gets written under the workdir + let file = std::fs::File::create("flamegraph.svg").unwrap(); + let mut options = pprof::flamegraph::Options::default(); + options.image_width = Some(2500); + report.flamegraph_with_options(file, &mut options).unwrap(); + } + } + } +} + +/// Dummy implementation when compiling without profiling feature or for non-linux OSes. +#[cfg(not(feature = "profiling"))] +mod profiling_impl { + pub struct DummyProfilerGuard; + + impl Drop for DummyProfilerGuard { + fn drop(&mut self) { + // do nothing, this exists to calm Clippy down + } + } + + pub fn profpoint_start() -> Option { + None + } + + pub fn init_profiler() -> Option { + None + } + + // pub fn exit_profiler(profiler_guard: &Option) {} +} diff --git a/pageserver/src/tenant/blob_io.rs b/pageserver/src/tenant/blob_io.rs index 82b4ccf47a..050eeba7fe 100644 --- a/pageserver/src/tenant/blob_io.rs +++ b/pageserver/src/tenant/blob_io.rs @@ -269,6 +269,8 @@ mod tests { let rdr = FileBlockReader::new(file); let rdr = BlockReaderRef::FileBlockReader(&rdr); let rdr = BlockCursor::new(rdr); + + let prof_guard = crate::profiling::profpoint_start(); for (idx, (blob, offset)) in blobs.iter().zip(offsets.iter()).enumerate() { let blob_read = rdr.read_blob(*offset, &ctx).await?; assert_eq!( @@ -276,6 +278,7 @@ mod tests { "mismatch for idx={idx} at offset={offset}" ); } + drop(prof_guard); println!("read buffered={} time: {:?}", BUFFERED, now.elapsed()); Ok(()) @@ -331,6 +334,10 @@ mod tests { #[tokio::test] async fn test_arrays_random_size() -> Result<(), Error> { + + let profiler_guard = crate::profiling::init_profiler(); + + // crate::page_cache::init(10000); let mut rng = rand::rngs::StdRng::seed_from_u64(42); let blobs = (0..1024*1024) .map(|_| { @@ -348,6 +355,9 @@ mod tests { round_trip_test::(&blobs).await?; round_trip_test::(&blobs).await?; + + #[cfg(feature = "profiling")] + crate::profiling::exit_profiler(&profiler_guard); Ok(()) }