mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 05:52:55 +00:00
Based on https://github.com/neondatabase/neon/pull/11139 ## Problem We want to export performance traces from the pageserver in OTEL format. End goal is to see them in Grafana. ## Summary of changes https://github.com/neondatabase/neon/pull/11139 introduces the infrastructure required to run the otel collector alongside the pageserver. ### Design Requirements: 1. We'd like to avoid implementing our own performance tracing stack if possible and use the `tracing` crate if possible. 2. Ideally, we'd like zero overhead of a sampling rate of zero and be a be able to change the tracing config for a tenant on the fly. 3. We should leave the current span hierarchy intact. This includes adding perf traces without modifying existing tracing. To satisfy (3) (and (2) in part) a separate span hierarchy is used. `RequestContext` gains an optional `perf_span` member that's only set when the request was chosen by sampling. All perf span related methods added to `RequestContext` are no-ops for requests that are not sampled. This on its own is not enough for (3), so performance spans use a separate tracing subscriber. The `tracing` crate doesn't have great support for this, so there's a fair amount of boilerplate to override the subscriber at all points of the perf span lifecycle. ### Perf Impact [Periodic pagebench](https://neonprod.grafana.net/d/ddqtbfykfqfi8d/e904990?orgId=1&from=2025-02-08T14:15:59.362Z&to=2025-03-10T14:15:59.362Z&timezone=utc) shows no statistically significant regression with a sample ratio of 0. There's an annotation on the dashboard on 2025-03-06. ### Overview of changes: 1. Clean up the `RequestContext` API a bit. Namely, get rid of the `RequestContext::extend` API and use the builder instead. 2. Add pageserver level configs for tracing: sampling ratio, otel endpoint, etc. 3. Introduce some perf span tracking utilities and expose them via `RequestContext`. We add a `tracing::Span` wrapper to be used for perf spans and a `tracing::Instrumented` equivalent for it. See doc comments for reason. 4. Set up OTEL tracing infra according to configuration. A separate runtime is used for the collector. 5. Add perf traces to the read path. ## Refs - epic https://github.com/neondatabase/neon/issues/9873 --------- Co-authored-by: Christian Schwarz <christian@neon.tech>
42 lines
1010 B
TOML
42 lines
1010 B
TOML
[package]
|
|
name = "pageserver_api"
|
|
version = "0.1.0"
|
|
edition = "2024"
|
|
license.workspace = true
|
|
|
|
[features]
|
|
# See pageserver/Cargo.toml
|
|
testing = ["dep:nix"]
|
|
|
|
[dependencies]
|
|
serde.workspace = true
|
|
serde_with.workspace = true
|
|
serde_json.workspace = true
|
|
const_format.workspace = true
|
|
anyhow.workspace = true
|
|
bytes.workspace = true
|
|
byteorder.workspace = true
|
|
utils.workspace = true
|
|
postgres_ffi.workspace = true
|
|
enum-map.workspace = true
|
|
strum.workspace = true
|
|
strum_macros.workspace = true
|
|
hex.workspace = true
|
|
humantime.workspace = true
|
|
thiserror.workspace = true
|
|
humantime-serde.workspace = true
|
|
chrono = { workspace = true, features = ["serde"] }
|
|
itertools.workspace = true
|
|
storage_broker.workspace = true
|
|
camino = {workspace = true, features = ["serde1"]}
|
|
remote_storage.workspace = true
|
|
postgres_backend.workspace = true
|
|
nix = {workspace = true, optional = true}
|
|
reqwest.workspace = true
|
|
rand.workspace = true
|
|
tracing-utils.workspace = true
|
|
|
|
[dev-dependencies]
|
|
bincode.workspace = true
|
|
rand.workspace = true
|