diff --git a/Cargo.lock b/Cargo.lock index e36b462d8e..f7ce2bf543 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,18 +26,21 @@ dependencies = [ "winapi", ] +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + [[package]] name = "anyhow" version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "595d3cfa7a60d4555cb5067b99f07142a08ea778de5cf993f7b75c7d8fabc486" -[[package]] -name = "arc-swap" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e906254e445520903e7fc9da4f709886c84ae4bc4ddaf0e093188d66df4dc820" - [[package]] name = "async-trait" version = "0.1.50" @@ -298,7 +301,7 @@ version = "2.33.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" dependencies = [ - "ansi_term", + "ansi_term 0.11.0", "atty", "bitflags", "strsim", @@ -387,26 +390,6 @@ dependencies = [ "rustc_version", ] -[[package]] -name = "crossbeam-channel" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" -dependencies = [ - "cfg-if 1.0.0", - "lazy_static", -] - [[package]] name = "crypto-mac" version = "0.10.0" @@ -445,16 +428,6 @@ dependencies = [ "dirs-sys", ] -[[package]] -name = "dirs-next" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" -dependencies = [ - "cfg-if 1.0.0", - "dirs-sys-next", -] - [[package]] name = "dirs-sys" version = "0.3.6" @@ -466,17 +439,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "dirs-sys-next" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - [[package]] name = "dlv-list" version = "0.2.3" @@ -956,6 +918,15 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "matchers" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f099785f7595cc4b4553a174ce30dd7589ef93391ff414dbb67f62392b9e0ce1" +dependencies = [ + "regex-automata", +] + [[package]] name = "matches" version = "0.1.8" @@ -1224,6 +1195,7 @@ dependencies = [ "thiserror", "tokio", "toml", + "tracing", "workspace_hack", "zenith_metrics", "zenith_utils", @@ -1531,6 +1503,15 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax", +] + [[package]] name = "regex-syntax" version = "0.6.25" @@ -1689,12 +1670,6 @@ dependencies = [ "webpki", ] -[[package]] -name = "rustversion" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61b3909d758bb75c79f23d4736fac9433868679d3ad2ea7a61e3c25cfda9a088" - [[package]] name = "ryu" version = "1.0.5" @@ -1852,6 +1827,15 @@ dependencies = [ "opaque-debug", ] +[[package]] +name = "sharded-slab" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "740223c51853f3145fe7c90360d2d4232f2b62e3449489c207eccde818979982" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.0.0" @@ -1890,59 +1874,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f173ac3d1a7e3b28003f40de0b5ce7fe2710f9b9dc3fc38664cebee46b3b6527" -[[package]] -name = "slog" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06" - -[[package]] -name = "slog-async" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c60813879f820c85dbc4eabf3269befe374591289019775898d56a81a804fbdc" -dependencies = [ - "crossbeam-channel", - "slog", - "take_mut", - "thread_local", -] - -[[package]] -name = "slog-scope" -version = "4.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786" -dependencies = [ - "arc-swap", - "lazy_static", - "slog", -] - -[[package]] -name = "slog-stdlog" -version = "4.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8228ab7302adbf4fcb37e66f3cda78003feb521e7fd9e3847ec117a7784d0f5a" -dependencies = [ - "log", - "slog", - "slog-scope", -] - -[[package]] -name = "slog-term" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95c1e7e5aab61ced6006149ea772770b84a0d16ce0f7885def313e4829946d76" -dependencies = [ - "atty", - "chrono", - "slog", - "term", - "thread_local", -] - [[package]] name = "smallvec" version = "1.6.1" @@ -1998,12 +1929,6 @@ dependencies = [ "unicode-xid", ] -[[package]] -name = "take_mut" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" - [[package]] name = "tap" version = "1.0.1" @@ -2035,17 +1960,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "term" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" -dependencies = [ - "dirs-next", - "rustversion", - "winapi", -] - [[package]] name = "termcolor" version = "1.1.2" @@ -2223,24 +2137,79 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" [[package]] name = "tracing" -version = "0.1.26" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09adeb8c97449311ccd28a427f96fb563e7fd31aabf994189879d9da2394b89d" +checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105" dependencies = [ "cfg-if 1.0.0", "pin-project-lite", + "tracing-attributes", "tracing-core", ] [[package]] -name = "tracing-core" +name = "tracing-attributes" version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9ff14f98b1a4b289c6248a023c1c2fa1491062964e9fed67ab29c4e4da4a052" +checksum = "f4f480b8f81512e825f337ad51e94c1eb5d3bbdf2b363dcd01e2b19a9ffe3f8e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4" dependencies = [ "lazy_static", ] +[[package]] +name = "tracing-log" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-serde" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb65ea441fbb84f9f6748fd496cf7f63ec9af5bca94dd86456978d055e8eb28b" +dependencies = [ + "serde", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e0d2eaa99c3c2e41547cfa109e910a68ea03823cccad4a0525dcbc9b01e8c71" +dependencies = [ + "ansi_term 0.12.1", + "chrono", + "lazy_static", + "matchers", + "regex", + "serde", + "serde_json", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", + "tracing-serde", +] + [[package]] name = "try-lock" version = "0.2.3" @@ -2605,14 +2574,12 @@ dependencies = [ "rustls-split", "serde", "serde_json", - "slog", - "slog-async", - "slog-scope", - "slog-stdlog", - "slog-term", "tempfile", "thiserror", "tokio", + "tracing", + "tracing-log", + "tracing-subscriber", "webpki", "workspace_hack", "zenith_metrics", diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 0e5a82df88..abbade4355 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -35,6 +35,7 @@ scopeguard = "1.1.0" rust-s3 = { version = "0.27.0-rc4", features = ["no-verify-ssl"] } async-trait = "0.1" const_format = "0.2.21" +tracing = "0.1.27" postgres_ffi = { path = "../postgres_ffi" } zenith_metrics = { path = "../zenith_metrics" } diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs index 5389d609a5..a4ee89918c 100644 --- a/pageserver/src/basebackup.rs +++ b/pageserver/src/basebackup.rs @@ -31,7 +31,7 @@ use zenith_utils::lsn::Lsn; pub struct Basebackup<'a> { ar: Builder<&'a mut dyn Write>, timeline: &'a Arc, - lsn: Lsn, + pub lsn: Lsn, prev_record_lsn: Lsn, } @@ -97,7 +97,6 @@ impl<'a> Basebackup<'a> { pub fn send_tarball(&mut self) -> anyhow::Result<()> { // Create pgdata subdirs structure for dir in pg_constants::PGDATA_SUBDIRS.iter() { - info!("send subdir {:?}", *dir); let header = new_tar_header_dir(*dir)?; self.ar.append(&header, &mut io::empty())?; } diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index c763f98a7f..d6cf86ba24 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -2,7 +2,6 @@ // Main entry point for the Page Server executable // -use log::*; use pageserver::defaults::*; use serde::{Deserialize, Serialize}; use std::{ @@ -12,6 +11,7 @@ use std::{ str::FromStr, thread, }; +use tracing::*; use zenith_utils::{auth::JwtAuth, logging, postgres_backend::AuthType}; use anyhow::{bail, ensure, Context, Result}; @@ -447,7 +447,7 @@ fn main() -> Result<()> { fn start_pageserver(conf: &'static PageServerConf) -> Result<()> { // Initialize logger - let (_scope_guard, log_file) = logging::init(LOG_FILE_NAME, conf.daemonize)?; + let log_file = logging::init(LOG_FILE_NAME, conf.daemonize)?; // TODO: Check that it looks like a valid repository before going further @@ -480,7 +480,7 @@ fn start_pageserver(conf: &'static PageServerConf) -> Result<()> { match daemonize.start() { Ok(_) => info!("Success, daemonized"), - Err(e) => error!("could not daemonize: {:#}", e), + Err(err) => error!(%err, "could not daemonize"), } } diff --git a/pageserver/src/branches.rs b/pageserver/src/branches.rs index 57adf479ca..4a30e760b5 100644 --- a/pageserver/src/branches.rs +++ b/pageserver/src/branches.rs @@ -14,12 +14,12 @@ use std::{ str::FromStr, sync::Arc, }; -use zenith_utils::zid::{ZTenantId, ZTimelineId}; +use tracing::*; -use log::*; use zenith_utils::crashsafe_dir; use zenith_utils::logging; use zenith_utils::lsn::Lsn; +use zenith_utils::zid::{ZTenantId, ZTimelineId}; use crate::tenant_mgr; use crate::walredo::WalRedoManager; @@ -100,7 +100,7 @@ pub struct PointInTime { pub fn init_pageserver(conf: &'static PageServerConf, create_tenant: Option<&str>) -> Result<()> { // Initialize logger // use true as daemonize parameter because otherwise we pollute zenith cli output with a few pages long output of info messages - let (_scope_guard, _log_file) = logging::init(LOG_FILE_NAME, true)?; + let _log_file = logging::init(LOG_FILE_NAME, true)?; // We don't use the real WAL redo manager, because we don't want to spawn the WAL redo // process during repository initialization. @@ -176,7 +176,7 @@ fn get_lsn_from_controlfile(path: &Path) -> Result { // to get bootstrap data for timeline initialization. // fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> { - info!("running initdb... "); + info!("running initdb in {}... ", initdbpath.display()); let initdb_path = conf.pg_bin_dir().join("initdb"); let initdb_output = Command::new(initdb_path) @@ -195,7 +195,6 @@ fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> { String::from_utf8_lossy(&initdb_output.stderr) ); } - info!("initdb succeeded"); Ok(()) } @@ -210,6 +209,8 @@ fn bootstrap_timeline( tli: ZTimelineId, repo: &dyn Repository, ) -> Result<()> { + let _enter = info_span!("bootstrapping", timeline = %tli, tenant = %tenantid).entered(); + let initdb_path = conf.tenant_path(&tenantid).join("tmp"); // Init temporarily repo to get bootstrap data @@ -218,8 +219,6 @@ fn bootstrap_timeline( let lsn = get_lsn_from_controlfile(&pgdata_path)?.align(); - info!("bootstrap_timeline {:?} at lsn {}", pgdata_path, lsn); - // Import the contents of the data directory at the initial checkpoint // LSN, and any WAL after that. let timeline = repo.create_empty_timeline(tli)?; diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index cd6b84b22f..8b15e438da 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -6,6 +6,7 @@ use hyper::header; use hyper::StatusCode; use hyper::{Body, Request, Response, Uri}; use routerify::{ext::RequestExt, RouterBuilder}; +use tracing::*; use zenith_utils::auth::JwtAuth; use zenith_utils::http::endpoint::attach_openapi_ui; use zenith_utils::http::endpoint::auth_middleware; @@ -98,6 +99,7 @@ async fn branch_create_handler(mut request: Request) -> Result) -> Result, A check_permission(&request, Some(tenantid))?; let response_data = tokio::task::spawn_blocking(move || { + let _enter = info_span!("branch_list", tenant = %tenantid).entered(); crate::branches::get_branches(get_config(&request), &tenantid) }) .await @@ -126,11 +129,12 @@ async fn branch_list_handler(request: Request) -> Result, A // TODO add to swagger async fn branch_detail_handler(request: Request) -> Result, ApiError> { let tenantid: ZTenantId = parse_request_param(&request, "tenant_id")?; - let branch_name: &str = get_request_param(&request, "branch_name")?; + let branch_name: String = get_request_param(&request, "branch_name")?.to_string(); let conf = get_state(&request).conf; - let path = conf.branch_path(branch_name, &tenantid); + let path = conf.branch_path(&branch_name, &tenantid); let response_data = tokio::task::spawn_blocking(move || { + let _enter = info_span!("branch_detail", tenant = %tenantid, branch=%branch_name).entered(); let repo = tenant_mgr::get_repository_for_tenant(tenantid)?; BranchInfo::from_path(path, conf, &tenantid, &repo) }) @@ -144,10 +148,13 @@ async fn tenant_list_handler(request: Request) -> Result, A // check for management permission check_permission(&request, None)?; - let response_data = - tokio::task::spawn_blocking(move || crate::branches::get_tenants(get_config(&request))) - .await - .map_err(ApiError::from_err)??; + let response_data = tokio::task::spawn_blocking(move || { + let _enter = info_span!("tenant_list").entered(); + crate::branches::get_tenants(get_config(&request)) + }) + .await + .map_err(ApiError::from_err)??; + Ok(json_response(StatusCode::OK, response_data)?) } @@ -158,6 +165,7 @@ async fn tenant_create_handler(mut request: Request) -> Result Result<()> { + let _enter = info_span!("saving metadata").entered(); let path = metadata_path(conf, timelineid, tenantid); // use OpenOptions to ensure file presence is consistent with first_save let mut file = OpenOptions::new() @@ -371,8 +380,6 @@ impl LayeredRepository { .create_new(first_save) .open(&path)?; - info!("saving metadata {}", path.display()); - let mut metadata_bytes = TimelineMetadata::ser(data)?; assert!(metadata_bytes.len() <= METADATA_MAX_DATA_SIZE); @@ -686,13 +693,7 @@ impl Timeline for LayeredTimeline { let segsize; if let Some((layer, lsn)) = self.get_layer_for_read(seg, lsn)? { segsize = layer.get_seg_size(lsn)?; - trace!( - "get_seg_size: {} at {}/{} -> {}", - seg, - self.timelineid, - lsn, - segsize - ); + trace!("get_seg_size: {} at {} -> {}", seg, lsn, segsize); } else { if segno == 0 { return Ok(None); @@ -794,7 +795,7 @@ impl Timeline for LayeredTimeline { result.insert(new_relish); trace!("List object {}", new_relish); } else { - trace!("Filter out droped object {}", new_relish); + trace!("Filtered out dropped object {}", new_relish); } } @@ -972,6 +973,8 @@ impl Timeline for LayeredTimeline { fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result { let mut total_blocks: usize = 0; + let _enter = info_span!("calc logical size", %lsn).entered(); + // list of all relations in this timeline, including ancestor timelines let all_rels = self.list_rels(0, 0, lsn)?; @@ -1042,11 +1045,8 @@ impl LayeredTimeline { /// Returns all timeline-related files that were found and loaded. /// fn load_layer_map(&self, disk_consistent_lsn: Lsn) -> anyhow::Result> { - info!( - "loading layer map for timeline {} into memory", - self.timelineid - ); let mut layers = self.layers.lock().unwrap(); + let mut num_layers = 0; let (imgfilenames, deltafilenames) = filename::list_files(self.conf, self.timelineid, self.tenantid)?; @@ -1066,14 +1066,10 @@ impl LayeredTimeline { let layer = ImageLayer::new(self.conf, self.timelineid, self.tenantid, filename); - info!( - "found layer {} {} on timeline {}", - layer.get_seg_tag(), - layer.get_start_lsn(), - self.timelineid - ); + trace!("found layer {}", layer.filename().display()); local_layers.push(layer.path()); layers.insert_historic(Arc::new(layer)); + num_layers += 1; } for filename in &deltafilenames { @@ -1090,14 +1086,12 @@ impl LayeredTimeline { let layer = DeltaLayer::new(self.conf, self.timelineid, self.tenantid, filename); - info!( - "found layer {} on timeline {}", - layer.filename().display(), - self.timelineid, - ); + trace!("found layer {}", layer.filename().display()); local_layers.push(layer.path()); layers.insert_historic(Arc::new(layer)); + num_layers += 1; } + info!("loaded layer map with {} layers", num_layers); Ok(local_layers) } @@ -1148,12 +1142,7 @@ impl LayeredTimeline { lsn: Lsn, self_layers: &MutexGuard, ) -> Result, Lsn)>> { - trace!( - "get_layer_for_read called for {} at {}/{}", - seg, - self.timelineid, - lsn - ); + trace!("get_layer_for_read called for {} at {}", seg, lsn); // If you requested a page at an older LSN, before the branch point, dig into // the right ancestor timeline. This can only happen if you launch a read-only @@ -1271,17 +1260,15 @@ impl LayeredTimeline { // First modification on this timeline start_lsn = self.ancestor_lsn + 1; trace!( - "creating layer for write for {} at branch point {}/{}", + "creating layer for write for {} at branch point {}", seg, - self.timelineid, start_lsn ); } else { start_lsn = prev_layer.get_end_lsn(); trace!( - "creating layer for write for {} after previous layer {}/{}", + "creating layer for write for {} after previous layer {}", seg, - self.timelineid, start_lsn ); } @@ -1340,11 +1327,7 @@ impl LayeredTimeline { prev: prev_record_lsn, } = self.last_record_lsn.load(); - trace!( - "checkpointing timeline {} at {}", - self.timelineid, - last_record_lsn - ); + trace!("checkpoint starting at {}", last_record_lsn); // Take the in-memory layer with the oldest WAL record. If it's older // than the threshold, write it out to disk as a new image and delta file. @@ -1520,11 +1503,11 @@ impl LayeredTimeline { let now = Instant::now(); let mut result: GcResult = Default::default(); - info!( - "running GC on timeline {}, cutoff {}", - self.timelineid, cutoff - ); - info!("retain_lsns: {:?}", retain_lsns); + let _enter = info_span!("garbage collection", timeline = %self.timelineid, tenant = %self.tenantid, cutoff = %cutoff).entered(); + + info!("GC starting"); + + debug!("retain_lsns: {:?}", retain_lsns); let mut layers_to_remove: Vec> = Vec::new(); @@ -1786,10 +1769,9 @@ impl LayeredTimeline { if data.records.is_empty() { if let Some(img) = &data.page_img { trace!( - "found page image for blk {} in {} at {}/{}, no WAL redo required", + "found page image for blk {} in {} at {}, no WAL redo required", blknum, rel, - self.timelineid, request_lsn ); Ok(img.clone()) @@ -1815,9 +1797,9 @@ impl LayeredTimeline { Ok(ZERO_PAGE.clone()) } else { if data.page_img.is_some() { - trace!("found {} WAL records and a base image for blk {} in {} at {}/{}, performing WAL redo", data.records.len(), blknum, rel, self.timelineid, request_lsn); + trace!("found {} WAL records and a base image for blk {} in {} at {}, performing WAL redo", data.records.len(), blknum, rel, request_lsn); } else { - trace!("found {} WAL records that will init the page for blk {} in {} at {}/{}, performing WAL redo", data.records.len(), blknum, rel, self.timelineid, request_lsn); + trace!("found {} WAL records that will init the page for blk {} in {} at {}, performing WAL redo", data.records.len(), blknum, rel, request_lsn); } let img = self.walredo_mgr.request_redo( rel, diff --git a/pageserver/src/layered_repository/inmemory_layer.rs b/pageserver/src/layered_repository/inmemory_layer.rs index 214c392fb6..8c44ac6296 100644 --- a/pageserver/src/layered_repository/inmemory_layer.rs +++ b/pageserver/src/layered_repository/inmemory_layer.rs @@ -579,9 +579,8 @@ impl InMemoryLayer { /// After completion, self is non-writeable, but not frozen. pub fn freeze(self: Arc, cutoff_lsn: Lsn) -> Result { info!( - "freezing in memory layer {} on timeline {} at {} (oldest {})", + "freezing in-memory layer {} at {} (oldest {})", self.filename().display(), - self.timelineid, cutoff_lsn, self.oldest_pending_lsn ); diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index d592a83993..32b8746f09 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -13,7 +13,6 @@ use anyhow::{anyhow, bail, ensure, Result}; use bytes::{Buf, BufMut, Bytes, BytesMut}; use lazy_static::lazy_static; -use log::*; use regex::Regex; use std::net::TcpListener; use std::str; @@ -21,6 +20,7 @@ use std::str::FromStr; use std::sync::Arc; use std::thread; use std::{io, net::TcpStream}; +use tracing::*; use zenith_metrics::{register_histogram_vec, HistogramVec}; use zenith_utils::auth::{self, JwtAuth}; use zenith_utils::auth::{Claims, Scope}; @@ -194,7 +194,7 @@ pub fn thread_main( let local_auth = auth.clone(); thread::spawn(move || { if let Err(err) = page_service_conn_main(conf, local_auth, socket, auth_type) { - error!("page server thread exiting with error: {:#}", err); + error!(%err, "page server thread exited with error"); } }); } @@ -260,6 +260,8 @@ impl PageServerHandler { timelineid: ZTimelineId, tenantid: ZTenantId, ) -> anyhow::Result<()> { + let _enter = info_span!("pagestream", timeline = %timelineid, tenant = %tenantid).entered(); + // Check that the timeline exists let timeline = tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)?; @@ -267,7 +269,7 @@ impl PageServerHandler { pgb.write_message(&BeMessage::CopyBothResponse)?; while let Some(message) = pgb.read_message()? { - trace!("query({:?}): {:?}", timelineid, message); + trace!("query: {:?}", message); let copy_data_bytes = match message { FeMessage::CopyData(bytes) => bytes, @@ -363,6 +365,8 @@ impl PageServerHandler { timeline: &dyn Timeline, req: &PagestreamExistsRequest, ) -> Result { + let _enter = info_span!("get_rel_exists", rel = %req.rel, req_lsn = %req.lsn).entered(); + let tag = RelishTag::Relation(req.rel); let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?; @@ -378,6 +382,7 @@ impl PageServerHandler { timeline: &dyn Timeline, req: &PagestreamNblocksRequest, ) -> Result { + let _enter = info_span!("get_nblocks", rel = %req.rel, req_lsn = %req.lsn).entered(); let tag = RelishTag::Relation(req.rel); let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?; @@ -397,6 +402,8 @@ impl PageServerHandler { timeline: &dyn Timeline, req: &PagestreamGetPageRequest, ) -> Result { + let _enter = info_span!("get_page", rel = %req.rel, blkno = &req.blkno, req_lsn = %req.lsn) + .entered(); let tag = RelishTag::Relation(req.rel); let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?; @@ -414,17 +421,20 @@ impl PageServerHandler { lsn: Option, tenantid: ZTenantId, ) -> anyhow::Result<()> { + let span = info_span!("basebackup", timeline = %timelineid, tenant = %tenantid, lsn = field::Empty); + let _enter = span.enter(); + // check that the timeline exists let timeline = tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)?; - /* switch client to COPYOUT */ + // switch client to COPYOUT pgb.write_message(&BeMessage::CopyOutResponse)?; - info!("sent CopyOut"); /* Send a tarball of the latest layer on the timeline */ { let mut writer = CopyDataSink { pgb }; let mut basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn)?; + span.record("lsn", &basebackup.lsn.to_string().as_str()); basebackup.send_tarball()?; } pgb.write_message(&BeMessage::CopyDone)?; @@ -529,11 +539,6 @@ impl postgres_backend::Handler for PageServerHandler { None }; - info!( - "got basebackup command. tenantid=\"{}\" timelineid=\"{}\" lsn=\"{:#?}\"", - tenantid, timelineid, lsn - ); - // Check that the timeline exists self.handle_basebackup_request(pgb, timelineid, lsn, tenantid)?; pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?; @@ -551,6 +556,9 @@ impl postgres_backend::Handler for PageServerHandler { self.check_permission(Some(tenantid))?; + let _enter = + info_span!("callmemaybe", timeline = %timelineid, tenant = %tenantid).entered(); + // Check that the timeline exists tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)?; @@ -573,6 +581,9 @@ impl postgres_backend::Handler for PageServerHandler { self.check_permission(Some(tenantid))?; + let _enter = + info_span!("branch_create", name = %branchname, tenant = %tenantid).entered(); + let branch = branches::create_branch(self.conf, &branchname, &startpoint_str, &tenantid)?; let branch = serde_json::to_vec(&branch)?; diff --git a/pageserver/src/restore_local_repo.rs b/pageserver/src/restore_local_repo.rs index dfe3edd7ac..ec6ee61d52 100644 --- a/pageserver/src/restore_local_repo.rs +++ b/pageserver/src/restore_local_repo.rs @@ -2,7 +2,6 @@ //! Import data and WAL from a PostgreSQL data directory and WAL segments into //! zenith Timeline. //! -use log::*; use postgres_ffi::nonrelfile_utils::clogpage_precedes; use postgres_ffi::nonrelfile_utils::slru_may_delete_clogsegment; use std::cmp::min; @@ -13,6 +12,7 @@ use std::path::Path; use anyhow::{bail, Result}; use bytes::{Buf, Bytes}; +use tracing::*; use crate::relish::*; use crate::repository::*; @@ -139,6 +139,7 @@ fn import_relfile( dboid: Oid, ) -> Result<()> { // Does it look like a relation file? + trace!("importing rel file {}", path.display()); let p = parse_relfilename(path.file_name().unwrap().to_str().unwrap()); if let Err(e) = p { @@ -166,14 +167,14 @@ fn import_relfile( } // TODO: UnexpectedEof is expected - Err(e) => match e.kind() { + Err(err) => match err.kind() { std::io::ErrorKind::UnexpectedEof => { // reached EOF. That's expected. // FIXME: maybe check that we read the full length of the file? break; } _ => { - bail!("error reading file {}: {:#}", path.display(), e); + bail!("error reading file {}: {:#}", path.display(), err); } }, }; @@ -200,7 +201,7 @@ fn import_nonrel_file( // read the whole file file.read_to_end(&mut buffer)?; - info!("importing non-rel file {}", path.display()); + trace!("importing non-rel file {}", path.display()); timeline.put_page_image(tag, 0, lsn, Bytes::copy_from_slice(&buffer[..]))?; Ok(()) @@ -217,7 +218,7 @@ fn import_control_file(timeline: &dyn Timeline, lsn: Lsn, path: &Path) -> Result // read the whole file file.read_to_end(&mut buffer)?; - info!("importing control file {}", path.display()); + trace!("importing control file {}", path.display()); // Import it as ControlFile timeline.put_page_image( @@ -244,7 +245,7 @@ fn import_slru_file(timeline: &dyn Timeline, lsn: Lsn, slru: SlruKind, path: &Pa let mut buf: [u8; 8192] = [0u8; 8192]; let segno = u32::from_str_radix(path.file_name().unwrap().to_str().unwrap(), 16)?; - info!("importing slru file {}", path.display()); + trace!("importing slru file {}", path.display()); let mut rpageno = 0; loop { @@ -260,14 +261,14 @@ fn import_slru_file(timeline: &dyn Timeline, lsn: Lsn, slru: SlruKind, path: &Pa } // TODO: UnexpectedEof is expected - Err(e) => match e.kind() { + Err(err) => match err.kind() { std::io::ErrorKind::UnexpectedEof => { // reached EOF. That's expected. // FIXME: maybe check that we read the full length of the file? break; } _ => { - bail!("error reading file {}: {:#}", path.display(), e); + bail!("error reading file {}: {:#}", path.display(), err); } }, }; diff --git a/pageserver/src/walreceiver.rs b/pageserver/src/walreceiver.rs index db0f2806ae..0fd9dfdf27 100644 --- a/pageserver/src/walreceiver.rs +++ b/pageserver/src/walreceiver.rs @@ -12,7 +12,6 @@ use crate::waldecoder::*; use crate::PageServerConf; use anyhow::{bail, Error, Result}; use lazy_static::lazy_static; -use log::*; use postgres::fallible_iterator::FallibleIterator; use postgres::replication::ReplicationIter; use postgres::{Client, NoTls, SimpleQueryMessage, SimpleQueryRow}; @@ -27,6 +26,7 @@ use std::thread; use std::thread::sleep; use std::thread_local; use std::time::{Duration, SystemTime}; +use tracing::*; use zenith_utils::lsn::Lsn; use zenith_utils::zid::ZTenantId; use zenith_utils::zid::ZTimelineId; @@ -96,10 +96,8 @@ fn get_wal_producer_connstr(timelineid: ZTimelineId) -> String { // This is the entry point for the WAL receiver thread. // fn thread_main(conf: &'static PageServerConf, timelineid: ZTimelineId, tenantid: ZTenantId) { - info!( - "WAL receiver thread started for timeline : '{}'", - timelineid - ); + let _enter = info_span!("WAL receiver", timeline = %timelineid, tenant = %tenantid).entered(); + info!("WAL receiver thread started"); // // Make a connection to the WAL safekeeper, or directly to the primary PostgreSQL server, @@ -169,8 +167,8 @@ fn walreceiver_main( startpoint += startpoint.calc_padding(8u32); info!( - "last_record_lsn {} starting replication from {} for timeline {}, server is at {}...", - last_rec_lsn, startpoint, timelineid, end_of_wal + "last_record_lsn {} starting replication from {}, server is at {}...", + last_rec_lsn, startpoint, end_of_wal ); let query = format!("START_REPLICATION PHYSICAL {}", startpoint); @@ -198,6 +196,8 @@ fn walreceiver_main( waldecoder.feed_bytes(data); while let Some((lsn, recdata)) = waldecoder.poll_decode()? { + let _enter = info_span!("processing record", lsn = %lsn).entered(); + // Save old checkpoint value to compare with it after decoding WAL record let old_checkpoint_bytes = checkpoint.encode(); let decoded = decode_wal_record(recdata.clone()); diff --git a/walkeeper/src/bin/wal_acceptor.rs b/walkeeper/src/bin/wal_acceptor.rs index 5bd64cb8a6..bfb09db5cb 100644 --- a/walkeeper/src/bin/wal_acceptor.rs +++ b/walkeeper/src/bin/wal_acceptor.rs @@ -129,7 +129,7 @@ fn main() -> Result<()> { fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> { let log_filename = conf.data_dir.join("wal_acceptor.log"); - let (_scope_guard, log_file) = logging::init(log_filename, conf.daemonize)?; + let log_file = logging::init(log_filename, conf.daemonize)?; let http_listener = TcpListener::bind(conf.listen_http_addr.clone()).map_err(|e| { error!("failed to bind to address {}: {}", conf.listen_http_addr, e); diff --git a/zenith_utils/Cargo.toml b/zenith_utils/Cargo.toml index 22c1c9bab6..6571fae042 100644 --- a/zenith_utils/Cargo.toml +++ b/zenith_utils/Cargo.toml @@ -18,12 +18,9 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1" thiserror = "1.0" tokio = "1.11" - -slog-async = "2.6.0" -slog-stdlog = "4.1.0" -slog-scope = "4.4.0" -slog-term = "2.8.0" -slog = "2.7.0" +tracing = "0.1" +tracing-log = "0.1" +tracing-subscriber = "0.2" zenith_metrics = { path = "../zenith_metrics" } workspace_hack = { path = "../workspace_hack" } diff --git a/zenith_utils/src/logging.rs b/zenith_utils/src/logging.rs index c6ed35cbf4..53dbfc305d 100644 --- a/zenith_utils/src/logging.rs +++ b/zenith_utils/src/logging.rs @@ -1,4 +1,3 @@ -use slog::{Drain, Level}; use std::{ fs::{File, OpenOptions}, path::Path, @@ -6,10 +5,12 @@ use std::{ use anyhow::{Context, Result}; -pub fn init( - log_filename: impl AsRef, - daemonize: bool, -) -> Result<(slog_scope::GlobalLoggerGuard, File)> { +use tracing::subscriber::set_global_default; +use tracing_log::LogTracer; +use tracing_subscriber::fmt; +use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Registry}; + +pub fn init(log_filename: impl AsRef, daemonize: bool) -> Result { // Don't open the same file for output multiple times; // the different fds could overwrite each other's output. let log_file = OpenOptions::new() @@ -18,30 +19,38 @@ pub fn init( .open(&log_filename) .with_context(|| format!("failed to open {:?}", log_filename.as_ref()))?; + let default_filter_str = "info"; + + // We fall back to printing all spans at info-level or above if + // the RUST_LOG environment variable is not set. + let env_filter = + EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(default_filter_str)); + // we are cloning and returning log file in order to allow redirecting daemonized stdout and stderr to it // if we do not use daemonization (e.g. in docker) it is better to log to stdout directly // for example to be in line with docker log command which expects logs comimg from stdout - let guard = if daemonize { - let decorator = slog_term::PlainSyncDecorator::new(log_file.try_clone()?); - let drain = slog_term::FullFormat::new(decorator) - .build() - .filter_level(Level::Info) - .fuse(); - let logger = slog::Logger::root(drain, slog::o!()); - slog_scope::set_global_logger(logger) + // + // TODO: perhaps use a more human-readable format when !daemonize + if daemonize { + let x = log_file.try_clone().unwrap(); + + let fmt_layer = fmt::layer() + .pretty() + .with_target(false) // don't include event targets + .with_ansi(false) // don't use colors in log file + .with_writer(move || x.try_clone().unwrap()); + let subscriber = Registry::default().with(env_filter).with(fmt_layer); + + set_global_default(subscriber).expect("Failed to set subscriber"); } else { - let decorator = slog_term::TermDecorator::new().build(); - let drain = slog_term::FullFormat::new(decorator) - .build() - .filter_level(Level::Info) - .fuse(); - let drain = slog_async::Async::new(drain).chan_size(1000).build().fuse(); - let logger = slog::Logger::root(drain, slog::o!()); - slog_scope::set_global_logger(logger) - }; + let fmt_layer = fmt::layer().with_target(false); // don't include event targets + let subscriber = Registry::default().with(env_filter).with(fmt_layer); - // initialise forwarding of std log calls - slog_stdlog::init()?; + set_global_default(subscriber).expect("Failed to set subscriber"); + } - Ok((guard, log_file)) + // Redirect all `log`'s events to our subscriber + LogTracer::init().expect("Failed to set logger"); + + Ok(log_file) }