Use tracing crate to have more context in log messages.

Whenever we start processing a request, we now enter a tracing "span"
that includes context information like the tenant and timeline ID, and
the operation we're performing. That context information gets attached
to every log message we create within the span. That way, we don't need
to include basic context information like that in every log message, and
it also becomes easier to filter the logs programmatically.

This removes the eplicit timeline and tenant IDs from most log messages,
as you get that information from the enclosing span now.

Also improve log messages in general, dialing down the level of some
messages that are not very useful, and adding information to others.

We now obey the RUST_LOG env variable, if it's set.

The 'tracing' crate allows for different log formatters, like JSON or
bunyan output. The one we use now is human-readable multi-line format,
which is nice when reading the log directly, but hard for
post-processing.  For production, we'll probably want JSON output and
some tools for working with it, but that's left as a TODO. The log
format is easy to change.
This commit is contained in:
Heikki Linnakangas
2021-10-11 08:59:06 +03:00
parent bf58f7f649
commit 7216f22609
14 changed files with 234 additions and 261 deletions

233
Cargo.lock generated
View File

@@ -26,18 +26,21 @@ dependencies = [
"winapi",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi",
]
[[package]]
name = "anyhow"
version = "1.0.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "595d3cfa7a60d4555cb5067b99f07142a08ea778de5cf993f7b75c7d8fabc486"
[[package]]
name = "arc-swap"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e906254e445520903e7fc9da4f709886c84ae4bc4ddaf0e093188d66df4dc820"
[[package]]
name = "async-trait"
version = "0.1.50"
@@ -298,7 +301,7 @@ version = "2.33.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
dependencies = [
"ansi_term",
"ansi_term 0.11.0",
"atty",
"bitflags",
"strsim",
@@ -387,26 +390,6 @@ dependencies = [
"rustc_version",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
dependencies = [
"cfg-if 1.0.0",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
dependencies = [
"cfg-if 1.0.0",
"lazy_static",
]
[[package]]
name = "crypto-mac"
version = "0.10.0"
@@ -445,16 +428,6 @@ dependencies = [
"dirs-sys",
]
[[package]]
name = "dirs-next"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
dependencies = [
"cfg-if 1.0.0",
"dirs-sys-next",
]
[[package]]
name = "dirs-sys"
version = "0.3.6"
@@ -466,17 +439,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "dirs-sys-next"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
dependencies = [
"libc",
"redox_users",
"winapi",
]
[[package]]
name = "dlv-list"
version = "0.2.3"
@@ -956,6 +918,15 @@ dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "matchers"
version = "0.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f099785f7595cc4b4553a174ce30dd7589ef93391ff414dbb67f62392b9e0ce1"
dependencies = [
"regex-automata",
]
[[package]]
name = "matches"
version = "0.1.8"
@@ -1224,6 +1195,7 @@ dependencies = [
"thiserror",
"tokio",
"toml",
"tracing",
"workspace_hack",
"zenith_metrics",
"zenith_utils",
@@ -1531,6 +1503,15 @@ dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.25"
@@ -1689,12 +1670,6 @@ dependencies = [
"webpki",
]
[[package]]
name = "rustversion"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61b3909d758bb75c79f23d4736fac9433868679d3ad2ea7a61e3c25cfda9a088"
[[package]]
name = "ryu"
version = "1.0.5"
@@ -1852,6 +1827,15 @@ dependencies = [
"opaque-debug",
]
[[package]]
name = "sharded-slab"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "740223c51853f3145fe7c90360d2d4232f2b62e3449489c207eccde818979982"
dependencies = [
"lazy_static",
]
[[package]]
name = "shlex"
version = "1.0.0"
@@ -1890,59 +1874,6 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f173ac3d1a7e3b28003f40de0b5ce7fe2710f9b9dc3fc38664cebee46b3b6527"
[[package]]
name = "slog"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06"
[[package]]
name = "slog-async"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c60813879f820c85dbc4eabf3269befe374591289019775898d56a81a804fbdc"
dependencies = [
"crossbeam-channel",
"slog",
"take_mut",
"thread_local",
]
[[package]]
name = "slog-scope"
version = "4.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786"
dependencies = [
"arc-swap",
"lazy_static",
"slog",
]
[[package]]
name = "slog-stdlog"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8228ab7302adbf4fcb37e66f3cda78003feb521e7fd9e3847ec117a7784d0f5a"
dependencies = [
"log",
"slog",
"slog-scope",
]
[[package]]
name = "slog-term"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95c1e7e5aab61ced6006149ea772770b84a0d16ce0f7885def313e4829946d76"
dependencies = [
"atty",
"chrono",
"slog",
"term",
"thread_local",
]
[[package]]
name = "smallvec"
version = "1.6.1"
@@ -1998,12 +1929,6 @@ dependencies = [
"unicode-xid",
]
[[package]]
name = "take_mut"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
[[package]]
name = "tap"
version = "1.0.1"
@@ -2035,17 +1960,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "term"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f"
dependencies = [
"dirs-next",
"rustversion",
"winapi",
]
[[package]]
name = "termcolor"
version = "1.1.2"
@@ -2223,24 +2137,79 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6"
[[package]]
name = "tracing"
version = "0.1.26"
version = "0.1.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09adeb8c97449311ccd28a427f96fb563e7fd31aabf994189879d9da2394b89d"
checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105"
dependencies = [
"cfg-if 1.0.0",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-core"
name = "tracing-attributes"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9ff14f98b1a4b289c6248a023c1c2fa1491062964e9fed67ab29c4e4da4a052"
checksum = "f4f480b8f81512e825f337ad51e94c1eb5d3bbdf2b363dcd01e2b19a9ffe3f8e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tracing-core"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4"
dependencies = [
"lazy_static",
]
[[package]]
name = "tracing-log"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3"
dependencies = [
"lazy_static",
"log",
"tracing-core",
]
[[package]]
name = "tracing-serde"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb65ea441fbb84f9f6748fd496cf7f63ec9af5bca94dd86456978d055e8eb28b"
dependencies = [
"serde",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e0d2eaa99c3c2e41547cfa109e910a68ea03823cccad4a0525dcbc9b01e8c71"
dependencies = [
"ansi_term 0.12.1",
"chrono",
"lazy_static",
"matchers",
"regex",
"serde",
"serde_json",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
"tracing-serde",
]
[[package]]
name = "try-lock"
version = "0.2.3"
@@ -2605,14 +2574,12 @@ dependencies = [
"rustls-split",
"serde",
"serde_json",
"slog",
"slog-async",
"slog-scope",
"slog-stdlog",
"slog-term",
"tempfile",
"thiserror",
"tokio",
"tracing",
"tracing-log",
"tracing-subscriber",
"webpki",
"workspace_hack",
"zenith_metrics",

View File

@@ -35,6 +35,7 @@ scopeguard = "1.1.0"
rust-s3 = { version = "0.27.0-rc4", features = ["no-verify-ssl"] }
async-trait = "0.1"
const_format = "0.2.21"
tracing = "0.1.27"
postgres_ffi = { path = "../postgres_ffi" }
zenith_metrics = { path = "../zenith_metrics" }

View File

@@ -31,7 +31,7 @@ use zenith_utils::lsn::Lsn;
pub struct Basebackup<'a> {
ar: Builder<&'a mut dyn Write>,
timeline: &'a Arc<dyn Timeline>,
lsn: Lsn,
pub lsn: Lsn,
prev_record_lsn: Lsn,
}
@@ -97,7 +97,6 @@ impl<'a> Basebackup<'a> {
pub fn send_tarball(&mut self) -> anyhow::Result<()> {
// Create pgdata subdirs structure
for dir in pg_constants::PGDATA_SUBDIRS.iter() {
info!("send subdir {:?}", *dir);
let header = new_tar_header_dir(*dir)?;
self.ar.append(&header, &mut io::empty())?;
}

View File

@@ -2,7 +2,6 @@
// Main entry point for the Page Server executable
//
use log::*;
use pageserver::defaults::*;
use serde::{Deserialize, Serialize};
use std::{
@@ -12,6 +11,7 @@ use std::{
str::FromStr,
thread,
};
use tracing::*;
use zenith_utils::{auth::JwtAuth, logging, postgres_backend::AuthType};
use anyhow::{bail, ensure, Context, Result};
@@ -447,7 +447,7 @@ fn main() -> Result<()> {
fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
// Initialize logger
let (_scope_guard, log_file) = logging::init(LOG_FILE_NAME, conf.daemonize)?;
let log_file = logging::init(LOG_FILE_NAME, conf.daemonize)?;
// TODO: Check that it looks like a valid repository before going further
@@ -480,7 +480,7 @@ fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
match daemonize.start() {
Ok(_) => info!("Success, daemonized"),
Err(e) => error!("could not daemonize: {:#}", e),
Err(err) => error!(%err, "could not daemonize"),
}
}

View File

@@ -14,12 +14,12 @@ use std::{
str::FromStr,
sync::Arc,
};
use zenith_utils::zid::{ZTenantId, ZTimelineId};
use tracing::*;
use log::*;
use zenith_utils::crashsafe_dir;
use zenith_utils::logging;
use zenith_utils::lsn::Lsn;
use zenith_utils::zid::{ZTenantId, ZTimelineId};
use crate::tenant_mgr;
use crate::walredo::WalRedoManager;
@@ -100,7 +100,7 @@ pub struct PointInTime {
pub fn init_pageserver(conf: &'static PageServerConf, create_tenant: Option<&str>) -> Result<()> {
// Initialize logger
// use true as daemonize parameter because otherwise we pollute zenith cli output with a few pages long output of info messages
let (_scope_guard, _log_file) = logging::init(LOG_FILE_NAME, true)?;
let _log_file = logging::init(LOG_FILE_NAME, true)?;
// We don't use the real WAL redo manager, because we don't want to spawn the WAL redo
// process during repository initialization.
@@ -176,7 +176,7 @@ fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
// to get bootstrap data for timeline initialization.
//
fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> {
info!("running initdb... ");
info!("running initdb in {}... ", initdbpath.display());
let initdb_path = conf.pg_bin_dir().join("initdb");
let initdb_output = Command::new(initdb_path)
@@ -195,7 +195,6 @@ fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> {
String::from_utf8_lossy(&initdb_output.stderr)
);
}
info!("initdb succeeded");
Ok(())
}
@@ -210,6 +209,8 @@ fn bootstrap_timeline(
tli: ZTimelineId,
repo: &dyn Repository,
) -> Result<()> {
let _enter = info_span!("bootstrapping", timeline = %tli, tenant = %tenantid).entered();
let initdb_path = conf.tenant_path(&tenantid).join("tmp");
// Init temporarily repo to get bootstrap data
@@ -218,8 +219,6 @@ fn bootstrap_timeline(
let lsn = get_lsn_from_controlfile(&pgdata_path)?.align();
info!("bootstrap_timeline {:?} at lsn {}", pgdata_path, lsn);
// Import the contents of the data directory at the initial checkpoint
// LSN, and any WAL after that.
let timeline = repo.create_empty_timeline(tli)?;

View File

@@ -6,6 +6,7 @@ use hyper::header;
use hyper::StatusCode;
use hyper::{Body, Request, Response, Uri};
use routerify::{ext::RequestExt, RouterBuilder};
use tracing::*;
use zenith_utils::auth::JwtAuth;
use zenith_utils::http::endpoint::attach_openapi_ui;
use zenith_utils::http::endpoint::auth_middleware;
@@ -98,6 +99,7 @@ async fn branch_create_handler(mut request: Request<Body>) -> Result<Response<Bo
check_permission(&request, Some(request_data.tenant_id))?;
let response_data = tokio::task::spawn_blocking(move || {
let _enter = info_span!("/branch_create", name = %request_data.name, tenant = %request_data.tenant_id, startpoint=%request_data.start_point).entered();
branches::create_branch(
get_config(&request),
&request_data.name,
@@ -116,6 +118,7 @@ async fn branch_list_handler(request: Request<Body>) -> Result<Response<Body>, A
check_permission(&request, Some(tenantid))?;
let response_data = tokio::task::spawn_blocking(move || {
let _enter = info_span!("branch_list", tenant = %tenantid).entered();
crate::branches::get_branches(get_config(&request), &tenantid)
})
.await
@@ -126,11 +129,12 @@ async fn branch_list_handler(request: Request<Body>) -> Result<Response<Body>, A
// TODO add to swagger
async fn branch_detail_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenantid: ZTenantId = parse_request_param(&request, "tenant_id")?;
let branch_name: &str = get_request_param(&request, "branch_name")?;
let branch_name: String = get_request_param(&request, "branch_name")?.to_string();
let conf = get_state(&request).conf;
let path = conf.branch_path(branch_name, &tenantid);
let path = conf.branch_path(&branch_name, &tenantid);
let response_data = tokio::task::spawn_blocking(move || {
let _enter = info_span!("branch_detail", tenant = %tenantid, branch=%branch_name).entered();
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
BranchInfo::from_path(path, conf, &tenantid, &repo)
})
@@ -144,10 +148,13 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
// check for management permission
check_permission(&request, None)?;
let response_data =
tokio::task::spawn_blocking(move || crate::branches::get_tenants(get_config(&request)))
.await
.map_err(ApiError::from_err)??;
let response_data = tokio::task::spawn_blocking(move || {
let _enter = info_span!("tenant_list").entered();
crate::branches::get_tenants(get_config(&request))
})
.await
.map_err(ApiError::from_err)??;
Ok(json_response(StatusCode::OK, response_data)?)
}
@@ -158,6 +165,7 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
let request_data: TenantCreateRequest = json_request(&mut request).await?;
let response_data = tokio::task::spawn_blocking(move || {
let _enter = info_span!("tenant_create", tenant = %request_data.tenant_id).entered();
tenant_mgr::create_repository_for_tenant(get_config(&request), request_data.tenant_id)
})
.await

View File

@@ -15,9 +15,9 @@ use anyhow::{anyhow, bail, ensure, Context, Result};
use bookfile::Book;
use bytes::Bytes;
use lazy_static::lazy_static;
use log::*;
use postgres_ffi::pg_constants::BLCKSZ;
use serde::{Deserialize, Serialize};
use tracing::*;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
@@ -243,6 +243,10 @@ impl LayeredRepository {
None
};
let _enter =
info_span!("loading timeline", timeline = %timelineid, tenant = %self.tenantid)
.entered();
let mut timeline = LayeredTimeline::new(
self.conf,
metadata,
@@ -316,7 +320,11 @@ impl LayeredRepository {
// bytes of WAL since last checkpoint.
{
let timelines = self.timelines.lock().unwrap();
for (_timelineid, timeline) in timelines.iter() {
for (timelineid, timeline) in timelines.iter() {
let _entered =
info_span!("checkpoint", timeline = %timelineid, tenant = %self.tenantid)
.entered();
STORAGE_TIME
.with_label_values(&["checkpoint_timed"])
.observe_closure_duration(|| {
@@ -364,6 +372,7 @@ impl LayeredRepository {
data: &TimelineMetadata,
first_save: bool,
) -> Result<()> {
let _enter = info_span!("saving metadata").entered();
let path = metadata_path(conf, timelineid, tenantid);
// use OpenOptions to ensure file presence is consistent with first_save
let mut file = OpenOptions::new()
@@ -371,8 +380,6 @@ impl LayeredRepository {
.create_new(first_save)
.open(&path)?;
info!("saving metadata {}", path.display());
let mut metadata_bytes = TimelineMetadata::ser(data)?;
assert!(metadata_bytes.len() <= METADATA_MAX_DATA_SIZE);
@@ -686,13 +693,7 @@ impl Timeline for LayeredTimeline {
let segsize;
if let Some((layer, lsn)) = self.get_layer_for_read(seg, lsn)? {
segsize = layer.get_seg_size(lsn)?;
trace!(
"get_seg_size: {} at {}/{} -> {}",
seg,
self.timelineid,
lsn,
segsize
);
trace!("get_seg_size: {} at {} -> {}", seg, lsn, segsize);
} else {
if segno == 0 {
return Ok(None);
@@ -794,7 +795,7 @@ impl Timeline for LayeredTimeline {
result.insert(new_relish);
trace!("List object {}", new_relish);
} else {
trace!("Filter out droped object {}", new_relish);
trace!("Filtered out dropped object {}", new_relish);
}
}
@@ -972,6 +973,8 @@ impl Timeline for LayeredTimeline {
fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result<usize> {
let mut total_blocks: usize = 0;
let _enter = info_span!("calc logical size", %lsn).entered();
// list of all relations in this timeline, including ancestor timelines
let all_rels = self.list_rels(0, 0, lsn)?;
@@ -1042,11 +1045,8 @@ impl LayeredTimeline {
/// Returns all timeline-related files that were found and loaded.
///
fn load_layer_map(&self, disk_consistent_lsn: Lsn) -> anyhow::Result<Vec<PathBuf>> {
info!(
"loading layer map for timeline {} into memory",
self.timelineid
);
let mut layers = self.layers.lock().unwrap();
let mut num_layers = 0;
let (imgfilenames, deltafilenames) =
filename::list_files(self.conf, self.timelineid, self.tenantid)?;
@@ -1066,14 +1066,10 @@ impl LayeredTimeline {
let layer = ImageLayer::new(self.conf, self.timelineid, self.tenantid, filename);
info!(
"found layer {} {} on timeline {}",
layer.get_seg_tag(),
layer.get_start_lsn(),
self.timelineid
);
trace!("found layer {}", layer.filename().display());
local_layers.push(layer.path());
layers.insert_historic(Arc::new(layer));
num_layers += 1;
}
for filename in &deltafilenames {
@@ -1090,14 +1086,12 @@ impl LayeredTimeline {
let layer = DeltaLayer::new(self.conf, self.timelineid, self.tenantid, filename);
info!(
"found layer {} on timeline {}",
layer.filename().display(),
self.timelineid,
);
trace!("found layer {}", layer.filename().display());
local_layers.push(layer.path());
layers.insert_historic(Arc::new(layer));
num_layers += 1;
}
info!("loaded layer map with {} layers", num_layers);
Ok(local_layers)
}
@@ -1148,12 +1142,7 @@ impl LayeredTimeline {
lsn: Lsn,
self_layers: &MutexGuard<LayerMap>,
) -> Result<Option<(Arc<dyn Layer>, Lsn)>> {
trace!(
"get_layer_for_read called for {} at {}/{}",
seg,
self.timelineid,
lsn
);
trace!("get_layer_for_read called for {} at {}", seg, lsn);
// If you requested a page at an older LSN, before the branch point, dig into
// the right ancestor timeline. This can only happen if you launch a read-only
@@ -1271,17 +1260,15 @@ impl LayeredTimeline {
// First modification on this timeline
start_lsn = self.ancestor_lsn + 1;
trace!(
"creating layer for write for {} at branch point {}/{}",
"creating layer for write for {} at branch point {}",
seg,
self.timelineid,
start_lsn
);
} else {
start_lsn = prev_layer.get_end_lsn();
trace!(
"creating layer for write for {} after previous layer {}/{}",
"creating layer for write for {} after previous layer {}",
seg,
self.timelineid,
start_lsn
);
}
@@ -1340,11 +1327,7 @@ impl LayeredTimeline {
prev: prev_record_lsn,
} = self.last_record_lsn.load();
trace!(
"checkpointing timeline {} at {}",
self.timelineid,
last_record_lsn
);
trace!("checkpoint starting at {}", last_record_lsn);
// Take the in-memory layer with the oldest WAL record. If it's older
// than the threshold, write it out to disk as a new image and delta file.
@@ -1520,11 +1503,11 @@ impl LayeredTimeline {
let now = Instant::now();
let mut result: GcResult = Default::default();
info!(
"running GC on timeline {}, cutoff {}",
self.timelineid, cutoff
);
info!("retain_lsns: {:?}", retain_lsns);
let _enter = info_span!("garbage collection", timeline = %self.timelineid, tenant = %self.tenantid, cutoff = %cutoff).entered();
info!("GC starting");
debug!("retain_lsns: {:?}", retain_lsns);
let mut layers_to_remove: Vec<Arc<dyn Layer>> = Vec::new();
@@ -1786,10 +1769,9 @@ impl LayeredTimeline {
if data.records.is_empty() {
if let Some(img) = &data.page_img {
trace!(
"found page image for blk {} in {} at {}/{}, no WAL redo required",
"found page image for blk {} in {} at {}, no WAL redo required",
blknum,
rel,
self.timelineid,
request_lsn
);
Ok(img.clone())
@@ -1815,9 +1797,9 @@ impl LayeredTimeline {
Ok(ZERO_PAGE.clone())
} else {
if data.page_img.is_some() {
trace!("found {} WAL records and a base image for blk {} in {} at {}/{}, performing WAL redo", data.records.len(), blknum, rel, self.timelineid, request_lsn);
trace!("found {} WAL records and a base image for blk {} in {} at {}, performing WAL redo", data.records.len(), blknum, rel, request_lsn);
} else {
trace!("found {} WAL records that will init the page for blk {} in {} at {}/{}, performing WAL redo", data.records.len(), blknum, rel, self.timelineid, request_lsn);
trace!("found {} WAL records that will init the page for blk {} in {} at {}, performing WAL redo", data.records.len(), blknum, rel, request_lsn);
}
let img = self.walredo_mgr.request_redo(
rel,

View File

@@ -579,9 +579,8 @@ impl InMemoryLayer {
/// After completion, self is non-writeable, but not frozen.
pub fn freeze(self: Arc<Self>, cutoff_lsn: Lsn) -> Result<FreezeLayers> {
info!(
"freezing in memory layer {} on timeline {} at {} (oldest {})",
"freezing in-memory layer {} at {} (oldest {})",
self.filename().display(),
self.timelineid,
cutoff_lsn,
self.oldest_pending_lsn
);

View File

@@ -13,7 +13,6 @@
use anyhow::{anyhow, bail, ensure, Result};
use bytes::{Buf, BufMut, Bytes, BytesMut};
use lazy_static::lazy_static;
use log::*;
use regex::Regex;
use std::net::TcpListener;
use std::str;
@@ -21,6 +20,7 @@ use std::str::FromStr;
use std::sync::Arc;
use std::thread;
use std::{io, net::TcpStream};
use tracing::*;
use zenith_metrics::{register_histogram_vec, HistogramVec};
use zenith_utils::auth::{self, JwtAuth};
use zenith_utils::auth::{Claims, Scope};
@@ -194,7 +194,7 @@ pub fn thread_main(
let local_auth = auth.clone();
thread::spawn(move || {
if let Err(err) = page_service_conn_main(conf, local_auth, socket, auth_type) {
error!("page server thread exiting with error: {:#}", err);
error!(%err, "page server thread exited with error");
}
});
}
@@ -260,6 +260,8 @@ impl PageServerHandler {
timelineid: ZTimelineId,
tenantid: ZTenantId,
) -> anyhow::Result<()> {
let _enter = info_span!("pagestream", timeline = %timelineid, tenant = %tenantid).entered();
// Check that the timeline exists
let timeline = tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)?;
@@ -267,7 +269,7 @@ impl PageServerHandler {
pgb.write_message(&BeMessage::CopyBothResponse)?;
while let Some(message) = pgb.read_message()? {
trace!("query({:?}): {:?}", timelineid, message);
trace!("query: {:?}", message);
let copy_data_bytes = match message {
FeMessage::CopyData(bytes) => bytes,
@@ -363,6 +365,8 @@ impl PageServerHandler {
timeline: &dyn Timeline,
req: &PagestreamExistsRequest,
) -> Result<PagestreamBeMessage> {
let _enter = info_span!("get_rel_exists", rel = %req.rel, req_lsn = %req.lsn).entered();
let tag = RelishTag::Relation(req.rel);
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
@@ -378,6 +382,7 @@ impl PageServerHandler {
timeline: &dyn Timeline,
req: &PagestreamNblocksRequest,
) -> Result<PagestreamBeMessage> {
let _enter = info_span!("get_nblocks", rel = %req.rel, req_lsn = %req.lsn).entered();
let tag = RelishTag::Relation(req.rel);
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
@@ -397,6 +402,8 @@ impl PageServerHandler {
timeline: &dyn Timeline,
req: &PagestreamGetPageRequest,
) -> Result<PagestreamBeMessage> {
let _enter = info_span!("get_page", rel = %req.rel, blkno = &req.blkno, req_lsn = %req.lsn)
.entered();
let tag = RelishTag::Relation(req.rel);
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
@@ -414,17 +421,20 @@ impl PageServerHandler {
lsn: Option<Lsn>,
tenantid: ZTenantId,
) -> anyhow::Result<()> {
let span = info_span!("basebackup", timeline = %timelineid, tenant = %tenantid, lsn = field::Empty);
let _enter = span.enter();
// check that the timeline exists
let timeline = tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)?;
/* switch client to COPYOUT */
// switch client to COPYOUT
pgb.write_message(&BeMessage::CopyOutResponse)?;
info!("sent CopyOut");
/* Send a tarball of the latest layer on the timeline */
{
let mut writer = CopyDataSink { pgb };
let mut basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn)?;
span.record("lsn", &basebackup.lsn.to_string().as_str());
basebackup.send_tarball()?;
}
pgb.write_message(&BeMessage::CopyDone)?;
@@ -529,11 +539,6 @@ impl postgres_backend::Handler for PageServerHandler {
None
};
info!(
"got basebackup command. tenantid=\"{}\" timelineid=\"{}\" lsn=\"{:#?}\"",
tenantid, timelineid, lsn
);
// Check that the timeline exists
self.handle_basebackup_request(pgb, timelineid, lsn, tenantid)?;
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
@@ -551,6 +556,9 @@ impl postgres_backend::Handler for PageServerHandler {
self.check_permission(Some(tenantid))?;
let _enter =
info_span!("callmemaybe", timeline = %timelineid, tenant = %tenantid).entered();
// Check that the timeline exists
tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)?;
@@ -573,6 +581,9 @@ impl postgres_backend::Handler for PageServerHandler {
self.check_permission(Some(tenantid))?;
let _enter =
info_span!("branch_create", name = %branchname, tenant = %tenantid).entered();
let branch =
branches::create_branch(self.conf, &branchname, &startpoint_str, &tenantid)?;
let branch = serde_json::to_vec(&branch)?;

View File

@@ -2,7 +2,6 @@
//! Import data and WAL from a PostgreSQL data directory and WAL segments into
//! zenith Timeline.
//!
use log::*;
use postgres_ffi::nonrelfile_utils::clogpage_precedes;
use postgres_ffi::nonrelfile_utils::slru_may_delete_clogsegment;
use std::cmp::min;
@@ -13,6 +12,7 @@ use std::path::Path;
use anyhow::{bail, Result};
use bytes::{Buf, Bytes};
use tracing::*;
use crate::relish::*;
use crate::repository::*;
@@ -139,6 +139,7 @@ fn import_relfile(
dboid: Oid,
) -> Result<()> {
// Does it look like a relation file?
trace!("importing rel file {}", path.display());
let p = parse_relfilename(path.file_name().unwrap().to_str().unwrap());
if let Err(e) = p {
@@ -166,14 +167,14 @@ fn import_relfile(
}
// TODO: UnexpectedEof is expected
Err(e) => match e.kind() {
Err(err) => match err.kind() {
std::io::ErrorKind::UnexpectedEof => {
// reached EOF. That's expected.
// FIXME: maybe check that we read the full length of the file?
break;
}
_ => {
bail!("error reading file {}: {:#}", path.display(), e);
bail!("error reading file {}: {:#}", path.display(), err);
}
},
};
@@ -200,7 +201,7 @@ fn import_nonrel_file(
// read the whole file
file.read_to_end(&mut buffer)?;
info!("importing non-rel file {}", path.display());
trace!("importing non-rel file {}", path.display());
timeline.put_page_image(tag, 0, lsn, Bytes::copy_from_slice(&buffer[..]))?;
Ok(())
@@ -217,7 +218,7 @@ fn import_control_file(timeline: &dyn Timeline, lsn: Lsn, path: &Path) -> Result
// read the whole file
file.read_to_end(&mut buffer)?;
info!("importing control file {}", path.display());
trace!("importing control file {}", path.display());
// Import it as ControlFile
timeline.put_page_image(
@@ -244,7 +245,7 @@ fn import_slru_file(timeline: &dyn Timeline, lsn: Lsn, slru: SlruKind, path: &Pa
let mut buf: [u8; 8192] = [0u8; 8192];
let segno = u32::from_str_radix(path.file_name().unwrap().to_str().unwrap(), 16)?;
info!("importing slru file {}", path.display());
trace!("importing slru file {}", path.display());
let mut rpageno = 0;
loop {
@@ -260,14 +261,14 @@ fn import_slru_file(timeline: &dyn Timeline, lsn: Lsn, slru: SlruKind, path: &Pa
}
// TODO: UnexpectedEof is expected
Err(e) => match e.kind() {
Err(err) => match err.kind() {
std::io::ErrorKind::UnexpectedEof => {
// reached EOF. That's expected.
// FIXME: maybe check that we read the full length of the file?
break;
}
_ => {
bail!("error reading file {}: {:#}", path.display(), e);
bail!("error reading file {}: {:#}", path.display(), err);
}
},
};

View File

@@ -12,7 +12,6 @@ use crate::waldecoder::*;
use crate::PageServerConf;
use anyhow::{bail, Error, Result};
use lazy_static::lazy_static;
use log::*;
use postgres::fallible_iterator::FallibleIterator;
use postgres::replication::ReplicationIter;
use postgres::{Client, NoTls, SimpleQueryMessage, SimpleQueryRow};
@@ -27,6 +26,7 @@ use std::thread;
use std::thread::sleep;
use std::thread_local;
use std::time::{Duration, SystemTime};
use tracing::*;
use zenith_utils::lsn::Lsn;
use zenith_utils::zid::ZTenantId;
use zenith_utils::zid::ZTimelineId;
@@ -96,10 +96,8 @@ fn get_wal_producer_connstr(timelineid: ZTimelineId) -> String {
// This is the entry point for the WAL receiver thread.
//
fn thread_main(conf: &'static PageServerConf, timelineid: ZTimelineId, tenantid: ZTenantId) {
info!(
"WAL receiver thread started for timeline : '{}'",
timelineid
);
let _enter = info_span!("WAL receiver", timeline = %timelineid, tenant = %tenantid).entered();
info!("WAL receiver thread started");
//
// Make a connection to the WAL safekeeper, or directly to the primary PostgreSQL server,
@@ -169,8 +167,8 @@ fn walreceiver_main(
startpoint += startpoint.calc_padding(8u32);
info!(
"last_record_lsn {} starting replication from {} for timeline {}, server is at {}...",
last_rec_lsn, startpoint, timelineid, end_of_wal
"last_record_lsn {} starting replication from {}, server is at {}...",
last_rec_lsn, startpoint, end_of_wal
);
let query = format!("START_REPLICATION PHYSICAL {}", startpoint);
@@ -198,6 +196,8 @@ fn walreceiver_main(
waldecoder.feed_bytes(data);
while let Some((lsn, recdata)) = waldecoder.poll_decode()? {
let _enter = info_span!("processing record", lsn = %lsn).entered();
// Save old checkpoint value to compare with it after decoding WAL record
let old_checkpoint_bytes = checkpoint.encode();
let decoded = decode_wal_record(recdata.clone());

View File

@@ -129,7 +129,7 @@ fn main() -> Result<()> {
fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> {
let log_filename = conf.data_dir.join("wal_acceptor.log");
let (_scope_guard, log_file) = logging::init(log_filename, conf.daemonize)?;
let log_file = logging::init(log_filename, conf.daemonize)?;
let http_listener = TcpListener::bind(conf.listen_http_addr.clone()).map_err(|e| {
error!("failed to bind to address {}: {}", conf.listen_http_addr, e);

View File

@@ -18,12 +18,9 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
thiserror = "1.0"
tokio = "1.11"
slog-async = "2.6.0"
slog-stdlog = "4.1.0"
slog-scope = "4.4.0"
slog-term = "2.8.0"
slog = "2.7.0"
tracing = "0.1"
tracing-log = "0.1"
tracing-subscriber = "0.2"
zenith_metrics = { path = "../zenith_metrics" }
workspace_hack = { path = "../workspace_hack" }

View File

@@ -1,4 +1,3 @@
use slog::{Drain, Level};
use std::{
fs::{File, OpenOptions},
path::Path,
@@ -6,10 +5,12 @@ use std::{
use anyhow::{Context, Result};
pub fn init(
log_filename: impl AsRef<Path>,
daemonize: bool,
) -> Result<(slog_scope::GlobalLoggerGuard, File)> {
use tracing::subscriber::set_global_default;
use tracing_log::LogTracer;
use tracing_subscriber::fmt;
use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Registry};
pub fn init(log_filename: impl AsRef<Path>, daemonize: bool) -> Result<File> {
// Don't open the same file for output multiple times;
// the different fds could overwrite each other's output.
let log_file = OpenOptions::new()
@@ -18,30 +19,38 @@ pub fn init(
.open(&log_filename)
.with_context(|| format!("failed to open {:?}", log_filename.as_ref()))?;
let default_filter_str = "info";
// We fall back to printing all spans at info-level or above if
// the RUST_LOG environment variable is not set.
let env_filter =
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(default_filter_str));
// we are cloning and returning log file in order to allow redirecting daemonized stdout and stderr to it
// if we do not use daemonization (e.g. in docker) it is better to log to stdout directly
// for example to be in line with docker log command which expects logs comimg from stdout
let guard = if daemonize {
let decorator = slog_term::PlainSyncDecorator::new(log_file.try_clone()?);
let drain = slog_term::FullFormat::new(decorator)
.build()
.filter_level(Level::Info)
.fuse();
let logger = slog::Logger::root(drain, slog::o!());
slog_scope::set_global_logger(logger)
//
// TODO: perhaps use a more human-readable format when !daemonize
if daemonize {
let x = log_file.try_clone().unwrap();
let fmt_layer = fmt::layer()
.pretty()
.with_target(false) // don't include event targets
.with_ansi(false) // don't use colors in log file
.with_writer(move || x.try_clone().unwrap());
let subscriber = Registry::default().with(env_filter).with(fmt_layer);
set_global_default(subscriber).expect("Failed to set subscriber");
} else {
let decorator = slog_term::TermDecorator::new().build();
let drain = slog_term::FullFormat::new(decorator)
.build()
.filter_level(Level::Info)
.fuse();
let drain = slog_async::Async::new(drain).chan_size(1000).build().fuse();
let logger = slog::Logger::root(drain, slog::o!());
slog_scope::set_global_logger(logger)
};
let fmt_layer = fmt::layer().with_target(false); // don't include event targets
let subscriber = Registry::default().with(env_filter).with(fmt_layer);
// initialise forwarding of std log calls
slog_stdlog::init()?;
set_global_default(subscriber).expect("Failed to set subscriber");
}
Ok((guard, log_file))
// Redirect all `log`'s events to our subscriber
LogTracer::init().expect("Failed to set logger");
Ok(log_file)
}