Log connection ids in safekeeper instead of thread ids.

Fixes build on macOS (which doesn't have nix gettid) after 0d8ced8534.
This commit is contained in:
Arseny Sher
2023-03-10 00:09:28 +04:00
committed by Arseny Sher
parent d1a0f2f0eb
commit 965837df53
6 changed files with 32 additions and 12 deletions

1
Cargo.lock generated
View File

@@ -3333,7 +3333,6 @@ dependencies = [
"humantime",
"hyper",
"metrics",
"nix",
"once_cell",
"parking_lot",
"postgres",

View File

@@ -19,7 +19,6 @@ git-version.workspace = true
hex.workspace = true
humantime.workspace = true
hyper.workspace = true
nix.workspace = true
once_cell.workspace = true
parking_lot.workspace = true
postgres.workspace = true

View File

@@ -8,6 +8,7 @@ use tracing::{info, info_span, Instrument};
use crate::auth::check_permission;
use crate::json_ctrl::{handle_json_ctrl, AppendLogicalMessage};
use crate::wal_service::ConnectionId;
use crate::{GlobalTimelines, SafeKeeperConf};
use postgres_backend::QueryError;
use postgres_backend::{self, PostgresBackend};
@@ -28,6 +29,8 @@ pub struct SafekeeperPostgresHandler {
pub tenant_id: Option<TenantId>,
pub timeline_id: Option<TimelineId>,
pub ttid: TenantTimelineId,
/// Unique connection id is logged in spans for observability.
pub conn_id: ConnectionId,
claims: Option<Claims>,
}
@@ -181,13 +184,14 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
}
impl SafekeeperPostgresHandler {
pub fn new(conf: SafeKeeperConf) -> Self {
pub fn new(conf: SafeKeeperConf, conn_id: u32) -> Self {
SafekeeperPostgresHandler {
conf,
appname: None,
tenant_id: None,
timeline_id: None,
ttid: TenantTimelineId::empty(),
conn_id,
claims: None,
}
}

View File

@@ -1,4 +1,5 @@
use remote_storage::RemoteStorageConfig;
use std::path::PathBuf;
use std::time::Duration;
use storage_broker::Uri;

View File

@@ -7,10 +7,10 @@ use crate::safekeeper::AcceptorProposerMessage;
use crate::safekeeper::ProposerAcceptorMessage;
use crate::safekeeper::ServerInfo;
use crate::timeline::Timeline;
use crate::wal_service::ConnectionId;
use crate::GlobalTimelines;
use anyhow::{anyhow, Context};
use bytes::BytesMut;
use nix::unistd::gettid;
use postgres_backend::CopyStreamHandlerEnd;
use postgres_backend::PostgresBackend;
use postgres_backend::PostgresBackendReader;
@@ -70,7 +70,7 @@ impl SafekeeperPostgresHandler {
let peer_addr = *pgb.get_peer_addr();
let res = tokio::select! {
// todo: add read|write .context to these errors
r = read_network(self.ttid, &mut pgb_reader, peer_addr, msg_tx, &mut acceptor_handle, msg_rx, reply_tx) => r,
r = read_network(self.ttid, self.conn_id, &mut pgb_reader, peer_addr, msg_tx, &mut acceptor_handle, msg_rx, reply_tx) => r,
r = write_network(pgb, reply_rx) => r,
};
@@ -119,6 +119,7 @@ async fn read_message(
/// tell the error.
async fn read_network(
ttid: TenantTimelineId,
conn_id: ConnectionId,
pgb_reader: &mut PostgresBackendReader,
peer_addr: SocketAddr,
msg_tx: Sender<ProposerAcceptorMessage>,
@@ -151,7 +152,8 @@ async fn read_network(
};
*acceptor_handle = Some(
WalAcceptor::spawn(tli.clone(), msg_rx, reply_tx).context("spawn WalAcceptor thread")?,
WalAcceptor::spawn(tli.clone(), msg_rx, reply_tx, conn_id)
.context("spawn WalAcceptor thread")?,
);
// Forward all messages to WalAcceptor
@@ -205,6 +207,7 @@ impl WalAcceptor {
tli: Arc<Timeline>,
msg_rx: Receiver<ProposerAcceptorMessage>,
reply_tx: Sender<AcceptorProposerMessage>,
conn_id: ConnectionId,
) -> anyhow::Result<JoinHandle<anyhow::Result<()>>> {
let thread_name = format!("WAL acceptor {}", tli.ttid);
thread::Builder::new()
@@ -223,7 +226,7 @@ impl WalAcceptor {
let span_ttid = wa.tli.ttid; // satisfy borrow checker
runtime.block_on(
wa.run()
.instrument(info_span!("WAL acceptor", tid = %gettid(), ttid = %span_ttid)),
.instrument(info_span!("WAL acceptor", cid = %conn_id, ttid = %span_ttid)),
)
})
.map_err(anyhow::Error::from)

View File

@@ -3,7 +3,6 @@
//! receive WAL from wal_proposer and send it to WAL receivers
//!
use anyhow::{Context, Result};
use nix::unistd::gettid;
use postgres_backend::QueryError;
use std::{future, thread};
use tokio::net::TcpStream;
@@ -27,17 +26,19 @@ pub fn thread_main(conf: SafeKeeperConf, pg_listener: std::net::TcpListener) {
// Tokio's from_std won't do this for us, per its comment.
pg_listener.set_nonblocking(true)?;
let listener = tokio::net::TcpListener::from_std(pg_listener)?;
let mut connection_count: ConnectionCount = 0;
loop {
match listener.accept().await {
Ok((socket, peer_addr)) => {
debug!("accepted connection from {}", peer_addr);
let conf = conf.clone();
let conn_id = issue_connection_id(&mut connection_count);
let _ = thread::Builder::new()
.name("WAL service thread".into())
.spawn(move || {
if let Err(err) = handle_socket(socket, conf) {
if let Err(err) = handle_socket(socket, conf, conn_id) {
error!("connection handler exited: {}", err);
}
})
@@ -54,8 +55,12 @@ pub fn thread_main(conf: SafeKeeperConf, pg_listener: std::net::TcpListener) {
/// This is run by `thread_main` above, inside a background thread.
///
fn handle_socket(socket: TcpStream, conf: SafeKeeperConf) -> Result<(), QueryError> {
let _enter = info_span!("", tid = %gettid()).entered();
fn handle_socket(
socket: TcpStream,
conf: SafeKeeperConf,
conn_id: ConnectionId,
) -> Result<(), QueryError> {
let _enter = info_span!("", cid = %conn_id).entered();
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_all()
@@ -68,7 +73,7 @@ fn handle_socket(socket: TcpStream, conf: SafeKeeperConf) -> Result<(), QueryErr
None => AuthType::Trust,
Some(_) => AuthType::NeonJWT,
};
let mut conn_handler = SafekeeperPostgresHandler::new(conf);
let mut conn_handler = SafekeeperPostgresHandler::new(conf, conn_id);
let pgbackend = PostgresBackend::new(socket, auth_type, None)?;
// libpq protocol between safekeeper and walproposer / pageserver
// We don't use shutdown.
@@ -79,3 +84,12 @@ fn handle_socket(socket: TcpStream, conf: SafeKeeperConf) -> Result<(), QueryErr
Ok(())
}
/// Unique WAL service connection ids are logged in spans for observability.
pub type ConnectionId = u32;
pub type ConnectionCount = u32;
pub fn issue_connection_id(count: &mut ConnectionCount) -> ConnectionId {
*count = count.wrapping_add(1);
*count
}