diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index fa6d1e496a..18d6aee68d 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -362,6 +362,11 @@ impl PageServerNode { .map(|x| x.parse::()) .transpose() .context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?, + trace_read_requests: settings + .remove("trace_read_requests") + .map(|x| x.parse::()) + .transpose() + .context("Failed to parse 'trace_read_requests' as bool")?, }; if !settings.is_empty() { bail!("Unrecognized tenant settings: {settings:?}") @@ -424,6 +429,11 @@ impl PageServerNode { .map(|x| x.parse::()) .transpose() .context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?, + trace_read_requests: settings + .get("trace_read_requests") + .map(|x| x.parse::()) + .transpose() + .context("Failed to parse 'trace_read_requests' as bool")?, }) .send()? .error_from_body()?; diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index e5bd46f260..af9be2d456 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -73,6 +73,7 @@ pub struct TenantCreateRequest { pub walreceiver_connect_timeout: Option, pub lagging_wal_timeout: Option, pub max_lsn_wal_lag: Option, + pub trace_read_requests: Option, } #[serde_as] @@ -112,6 +113,7 @@ pub struct TenantConfigRequest { pub walreceiver_connect_timeout: Option, pub lagging_wal_timeout: Option, pub max_lsn_wal_lag: Option, + pub trace_read_requests: Option, } impl TenantConfigRequest { @@ -130,6 +132,7 @@ impl TenantConfigRequest { walreceiver_connect_timeout: None, lagging_wal_timeout: None, max_lsn_wal_lag: None, + trace_read_requests: None, } } } diff --git a/libs/utils/src/id.rs b/libs/utils/src/id.rs index f245f7c3d4..7ce324614d 100644 --- a/libs/utils/src/id.rs +++ b/libs/utils/src/id.rs @@ -204,6 +204,17 @@ pub struct TenantId(Id); id_newtype!(TenantId); +/// Neon Connection Id identifies long-lived connections (for example a pagestream +/// connection with the page_service). Is used for better logging and tracing +/// +/// NOTE: It (de)serializes as an array of hex bytes, so the string representation would look +/// like `[173,80,132,115,129,226,72,254,170,201,135,108,199,26,228,24]`. +/// See [`Id`] for alternative ways to serialize it. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd, Ord)] +pub struct ConnectionId(Id); + +id_newtype!(ConnectionId); + // A pair uniquely identifying Neon instance. #[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct TenantTimelineId { diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 747e63af2b..f40b608da1 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -8,6 +8,7 @@ use anyhow::{anyhow, bail, ensure, Context, Result}; use remote_storage::RemoteStorageConfig; use std::env; use utils::crashsafe::path_with_suffix_extension; +use utils::id::ConnectionId; use std::num::NonZeroUsize; use std::path::{Path, PathBuf}; @@ -414,6 +415,22 @@ impl PageServerConf { ) } + pub fn traces_path(&self) -> PathBuf { + self.workdir.join("traces") + } + + pub fn trace_path( + &self, + tenant_id: &TenantId, + timeline_id: &TimelineId, + connection_id: &ConnectionId, + ) -> PathBuf { + self.traces_path() + .join(tenant_id.to_string()) + .join(timeline_id.to_string()) + .join(connection_id.to_string()) + } + /// Points to a place in pageserver's local directory, /// where certain timeline's metadata file should be located. pub fn metadata_path(&self, timeline_id: TimelineId, tenant_id: TenantId) -> PathBuf { diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 14ea054577..db581efc7d 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -618,6 +618,7 @@ async fn tenant_create_handler(mut request: Request) -> Result) -> Result) -> Result bool { + let tenant_conf = self.tenant_conf.read().unwrap(); + tenant_conf + .trace_read_requests + .unwrap_or(self.conf.default_tenant_conf.trace_read_requests) + } + pub fn update_tenant_config(&self, new_tenant_conf: TenantConfOpt) { self.tenant_conf.write().unwrap().update(&new_tenant_conf); } @@ -1666,6 +1673,7 @@ pub mod harness { walreceiver_connect_timeout: Some(tenant_conf.walreceiver_connect_timeout), lagging_wal_timeout: Some(tenant_conf.lagging_wal_timeout), max_lsn_wal_lag: Some(tenant_conf.max_lsn_wal_lag), + trace_read_requests: Some(tenant_conf.trace_read_requests), } } } diff --git a/pageserver/src/tenant_config.rs b/pageserver/src/tenant_config.rs index dc1b9353a6..dd3792450d 100644 --- a/pageserver/src/tenant_config.rs +++ b/pageserver/src/tenant_config.rs @@ -82,6 +82,7 @@ pub struct TenantConf { /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update, /// to avoid eager reconnects. pub max_lsn_wal_lag: NonZeroU64, + pub trace_read_requests: bool, } /// Same as TenantConf, but this struct preserves the information about @@ -105,6 +106,7 @@ pub struct TenantConfOpt { #[serde(with = "humantime_serde")] pub lagging_wal_timeout: Option, pub max_lsn_wal_lag: Option, + pub trace_read_requests: Option, } impl TenantConfOpt { @@ -138,6 +140,9 @@ impl TenantConfOpt { .lagging_wal_timeout .unwrap_or(global_conf.lagging_wal_timeout), max_lsn_wal_lag: self.max_lsn_wal_lag.unwrap_or(global_conf.max_lsn_wal_lag), + trace_read_requests: self + .trace_read_requests + .unwrap_or(global_conf.trace_read_requests), } } @@ -207,6 +212,7 @@ impl TenantConf { .expect("cannot parse default walreceiver lagging wal timeout"), max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG) .expect("cannot parse default max walreceiver Lsn wal lag"), + trace_read_requests: false, } } @@ -232,6 +238,7 @@ impl TenantConf { .unwrap(), max_lsn_wal_lag: NonZeroU64::new(defaults::DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG) .unwrap(), + trace_read_requests: false, } } } diff --git a/pageserver/src/trace.rs b/pageserver/src/trace.rs new file mode 100644 index 0000000000..9e466dd9b0 --- /dev/null +++ b/pageserver/src/trace.rs @@ -0,0 +1,36 @@ +use bytes::Bytes; +use std::{ + fs::{create_dir_all, File}, + io::{BufWriter, Write}, + path::PathBuf, +}; + +pub struct Tracer { + writer: BufWriter, +} + +impl Drop for Tracer { + fn drop(&mut self) { + self.flush() + } +} + +impl Tracer { + pub fn new(path: PathBuf) -> Self { + let parent = path.parent().expect("failed to parse parent path"); + create_dir_all(parent).expect("failed to create trace dir"); + + let file = File::create(path).expect("failed to create trace file"); + Tracer { + writer: BufWriter::new(file), + } + } + + pub fn trace(&mut self, msg: &Bytes) { + self.writer.write_all(msg).expect("failed to write trace"); + } + + pub fn flush(&mut self) { + self.writer.flush().expect("failed to flush trace file"); + } +} diff --git a/test_runner/performance/test_read_trace.py b/test_runner/performance/test_read_trace.py new file mode 100644 index 0000000000..a5bd0b8de6 --- /dev/null +++ b/test_runner/performance/test_read_trace.py @@ -0,0 +1,31 @@ +from contextlib import closing + +from fixtures.neon_fixtures import NeonEnvBuilder + + +# This test demonstrates how to collect a read trace. It's useful until +# it gets replaced by a test that actually does stuff with the trace. +def test_read_request_tracing(neon_env_builder: NeonEnvBuilder): + neon_env_builder.num_safekeepers = 1 + env = neon_env_builder.init_start() + + tenant, _ = env.neon_cli.create_tenant( + conf={ + "trace_read_requests": "true", + } + ) + + timeline = env.neon_cli.create_timeline("test_trace_replay", tenant_id=tenant) + pg = env.postgres.create_start("test_trace_replay", "main", tenant) + + with closing(pg.connect()) as conn: + with conn.cursor() as cur: + cur.execute("create table t (i integer);") + cur.execute(f"insert into t values (generate_series(1,{10000}));") + cur.execute("select count(*) from t;") + + # Stop pg so we drop the connection and flush the traces + pg.stop() + + trace_path = env.repo_dir / "traces" / str(tenant) / str(timeline) + assert trace_path.exists()