Fix

Proper fix
2026-07-04 04:30:38 +00:00 · 2024-04-10 11:51:10 +02:00 · 2024-04-10 11:48:44 +02:00
53 changed files with 1170 additions and 1608 deletions
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -150,7 +150,7 @@ runs:

        # Use aws s3 cp (instead of aws s3 sync) to keep files from previous runs to make old URLs work,
        # and to keep files on the host to upload them to the database
-        time s5cmd --log error cp "${WORKDIR}/report/*" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}/"
+        time aws s3 cp --recursive --only-show-errors "${WORKDIR}/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"

        # Generate redirect
        cat <<EOF > ${WORKDIR}/index.html
--- a/.github/workflows/approved-for-ci-run.yml
+++ b/.github/workflows/approved-for-ci-run.yml
@@ -18,7 +18,6 @@ on:

 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
-  cancel-in-progress: false

 env:
  GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/build-build-tools-image.yml
+++ b/.github/workflows/build-build-tools-image.yml
@@ -21,7 +21,6 @@ defaults:

 concurrency:
  group: build-build-tools-image-${{ inputs.image-tag }}
-  cancel-in-progress: false

 # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
 permissions: {}
--- a/.github/workflows/pin-build-tools-image.yml
+++ b/.github/workflows/pin-build-tools-image.yml
@@ -20,7 +20,6 @@ defaults:

 concurrency:
  group: pin-build-tools-image-${{ inputs.from-tag }}
-  cancel-in-progress: false

 permissions: {}

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2932,9 +2932,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "measured"
-version = "0.0.21"
+version = "0.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "652bc741286361c06de8cb4d89b21a6437f120c508c51713663589eeb9928ac5"
+checksum = "3cbf033874bea03565f2449572c8640ca37ec26300455faf36001f24755da452"
 dependencies = [
 "bytes",
 "crossbeam-utils",
@@ -2950,9 +2950,9 @@ dependencies = [

 [[package]]
 name = "measured-derive"
-version = "0.0.21"
+version = "0.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ea497f33e1e856a376c32ad916f69a0bd3c597db1f912a399f842b01a4a685d"
+checksum = "be9e29b682b38f8af2a89f960455054ab1a9f5a06822f6f3500637ad9fa57def"
 dependencies = [
 "heck 0.5.0",
 "proc-macro2",
@@ -2962,9 +2962,9 @@ dependencies = [

 [[package]]
 name = "measured-process"
-version = "0.0.21"
+version = "0.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b364ccb66937a814b6b2ad751d1a2f7a9d5a78c761144036825fb36bb0771000"
+checksum = "a20849acdd04c5d6a88f565559044546904648a1842a2937cfff0b48b4ca7ef2"
 dependencies = [
 "libc",
 "measured",
@@ -4322,7 +4322,6 @@ dependencies = [
 "itertools",
 "lasso",
 "md5",
- "measured",
 "metrics",
 "native-tls",
 "once_cell",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -107,8 +107,8 @@ lasso = "0.7"
 leaky-bucket = "1.0.1"
 libc = "0.2"
 md5 = "0.7.0"
-measured = { version = "0.0.21", features=["lasso"] }
-measured-process = { version = "0.0.21" }
+measured = { version = "0.0.20", features=["lasso"] }
+measured-process = { version = "0.0.20" }
 memoffset = "0.8"
 native-tls = "0.2"
 nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] }
--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -58,12 +58,6 @@ RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v$
    && mv protoc/include/google /usr/local/include/google \
    && rm -rf protoc.zip protoc

-# s5cmd
-ENV S5CMD_VERSION=2.2.2
-RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/s5cmd_${S5CMD_VERSION}_Linux-$(uname -m | sed 's/x86_64/64bit/g' | sed 's/aarch64/arm64/g').tar.gz" | tar zxvf - s5cmd \
-    && chmod +x s5cmd \
-    && mv s5cmd /usr/local/bin/s5cmd
-
 # LLVM
 ENV LLVM_VERSION=17
 RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -6,8 +6,8 @@ use std::path::Path;
 use anyhow::Result;

 use crate::pg_helpers::escape_conf_value;
-use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize};
-use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption};
+use crate::pg_helpers::PgOptionsSerialize;
+use compute_api::spec::{ComputeMode, ComputeSpec};

 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
@@ -92,27 +92,6 @@ pub fn write_postgres_conf(
        }
    }

-    if cfg!(target_os = "linux") {
-        // Check /proc/sys/vm/overcommit_memory -- if it equals 2 (i.e. linux memory overcommit is
-        // disabled), then the control plane has enabled swap and we should set
-        // dynamic_shared_memory_type = 'mmap'.
-        //
-        // This is (maybe?) temporary - for more, see https://github.com/neondatabase/cloud/issues/12047.
-        let overcommit_memory_contents = std::fs::read_to_string("/proc/sys/vm/overcommit_memory")
-            // ignore any errors - they may be expected to occur under certain situations (e.g. when
-            // not running in Linux).
-            .unwrap_or_else(|_| String::new());
-        if overcommit_memory_contents.trim() == "2" {
-            let opt = GenericOption {
-                name: "dynamic_shared_memory_type".to_owned(),
-                value: Some("mmap".to_owned()),
-                vartype: "enum".to_owned(),
-            };
-
-            write!(file, "{}", opt.to_pg_setting())?;
-        }
-    }
-
    // If there are any extra options in the 'settings' field, append those
    if spec.cluster.settings.is_some() {
        writeln!(file, "# Managed by compute_ctl: begin")?;
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -44,7 +44,7 @@ pub fn escape_conf_value(s: &str) -> String {
    format!("'{}'", res)
 }

-pub trait GenericOptionExt {
+trait GenericOptionExt {
    fn to_pg_option(&self) -> String;
    fn to_pg_setting(&self) -> String;
 }
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -1231,7 +1231,7 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
    match ComputeControlPlane::load(env.clone()) {
        Ok(cplane) => {
            for (_k, node) in cplane.endpoints {
-                if let Err(e) = node.stop(if immediate { "immediate" } else { "fast" }, false) {
+                if let Err(e) = node.stop(if immediate { "immediate" } else { "fast " }, false) {
                    eprintln!("postgres stop failed: {e:#}");
                }
            }
--- a/libs/metrics/src/hll.rs
+++ b/libs/metrics/src/hll.rs
@@ -7,19 +7,14 @@
 //! use significantly less memory than this, but can only approximate the cardinality.

 use std::{
-    hash::{BuildHasher, BuildHasherDefault, Hash},
-    sync::atomic::AtomicU8,
+    collections::HashMap,
+    hash::{BuildHasher, BuildHasherDefault, Hash, Hasher},
+    sync::{atomic::AtomicU8, Arc, RwLock},
 };

-use measured::{
-    label::{LabelGroupVisitor, LabelName, LabelValue, LabelVisitor},
-    metric::{
-        group::{Encoding, MetricValue},
-        name::MetricNameEncoder,
-        Metric, MetricType, MetricVec,
-    },
-    text::TextEncoder,
-    LabelGroup,
+use prometheus::{
+    core::{self, Describer},
+    proto, Opts,
 };
 use twox_hash::xxh3;

@@ -98,25 +93,203 @@ macro_rules! register_hll {
 /// ```
 ///
 /// See <https://en.wikipedia.org/wiki/HyperLogLog#Practical_considerations> for estimates on alpha
-pub type HyperLogLogVec<L, const N: usize> = MetricVec<HyperLogLogState<N>, L>;
-pub type HyperLogLog<const N: usize> = Metric<HyperLogLogState<N>>;
-
-pub struct HyperLogLogState<const N: usize> {
-    shards: [AtomicU8; N],
+#[derive(Clone)]
+pub struct HyperLogLogVec<const N: usize> {
+    core: Arc<HyperLogLogVecCore<N>>,
 }
-impl<const N: usize> Default for HyperLogLogState<N> {
-    fn default() -> Self {
-        #[allow(clippy::declare_interior_mutable_const)]
-        const ZERO: AtomicU8 = AtomicU8::new(0);
-        Self { shards: [ZERO; N] }
+
+struct HyperLogLogVecCore<const N: usize> {
+    pub children: RwLock<HashMap<u64, HyperLogLog<N>, BuildHasherDefault<xxh3::Hash64>>>,
+    pub desc: core::Desc,
+    pub opts: Opts,
+}
+
+impl<const N: usize> core::Collector for HyperLogLogVec<N> {
+    fn desc(&self) -> Vec<&core::Desc> {
+        vec![&self.core.desc]
+    }
+
+    fn collect(&self) -> Vec<proto::MetricFamily> {
+        let mut m = proto::MetricFamily::default();
+        m.set_name(self.core.desc.fq_name.clone());
+        m.set_help(self.core.desc.help.clone());
+        m.set_field_type(proto::MetricType::GAUGE);
+
+        let mut metrics = Vec::new();
+        for child in self.core.children.read().unwrap().values() {
+            child.core.collect_into(&mut metrics);
+        }
+        m.set_metric(metrics);
+
+        vec![m]
    }
 }

-impl<const N: usize> MetricType for HyperLogLogState<N> {
-    type Metadata = ();
+impl<const N: usize> HyperLogLogVec<N> {
+    /// Create a new [`HyperLogLogVec`] based on the provided
+    /// [`Opts`] and partitioned by the given label names. At least one label name must be
+    /// provided.
+    pub fn new(opts: Opts, label_names: &[&str]) -> prometheus::Result<Self> {
+        assert!(N.is_power_of_two());
+        let variable_names = label_names.iter().map(|s| (*s).to_owned()).collect();
+        let opts = opts.variable_labels(variable_names);
+
+        let desc = opts.describe()?;
+        let v = HyperLogLogVecCore {
+            children: RwLock::new(HashMap::default()),
+            desc,
+            opts,
+        };
+
+        Ok(Self { core: Arc::new(v) })
+    }
+
+    /// `get_metric_with_label_values` returns the [`HyperLogLog<P>`] for the given slice
+    /// of label values (same order as the VariableLabels in Desc). If that combination of
+    /// label values is accessed for the first time, a new [`HyperLogLog<P>`] is created.
+    ///
+    /// An error is returned if the number of label values is not the same as the
+    /// number of VariableLabels in Desc.
+    pub fn get_metric_with_label_values(
+        &self,
+        vals: &[&str],
+    ) -> prometheus::Result<HyperLogLog<N>> {
+        self.core.get_metric_with_label_values(vals)
+    }
+
+    /// `with_label_values` works as `get_metric_with_label_values`, but panics if an error
+    /// occurs.
+    pub fn with_label_values(&self, vals: &[&str]) -> HyperLogLog<N> {
+        self.get_metric_with_label_values(vals).unwrap()
+    }
 }

-impl<const N: usize> HyperLogLogState<N> {
+impl<const N: usize> HyperLogLogVecCore<N> {
+    pub fn get_metric_with_label_values(
+        &self,
+        vals: &[&str],
+    ) -> prometheus::Result<HyperLogLog<N>> {
+        let h = self.hash_label_values(vals)?;
+
+        if let Some(metric) = self.children.read().unwrap().get(&h).cloned() {
+            return Ok(metric);
+        }
+
+        self.get_or_create_metric(h, vals)
+    }
+
+    pub(crate) fn hash_label_values(&self, vals: &[&str]) -> prometheus::Result<u64> {
+        if vals.len() != self.desc.variable_labels.len() {
+            return Err(prometheus::Error::InconsistentCardinality {
+                expect: self.desc.variable_labels.len(),
+                got: vals.len(),
+            });
+        }
+
+        let mut h = xxh3::Hash64::default();
+        for val in vals {
+            h.write(val.as_bytes());
+        }
+
+        Ok(h.finish())
+    }
+
+    fn get_or_create_metric(
+        &self,
+        hash: u64,
+        label_values: &[&str],
+    ) -> prometheus::Result<HyperLogLog<N>> {
+        let mut children = self.children.write().unwrap();
+        // Check exist first.
+        if let Some(metric) = children.get(&hash).cloned() {
+            return Ok(metric);
+        }
+
+        let metric = HyperLogLog::with_opts_and_label_values(&self.opts, label_values)?;
+        children.insert(hash, metric.clone());
+        Ok(metric)
+    }
+}
+
+/// HLL is a probabilistic cardinality measure.
+///
+/// How to use this time-series for a metric name `my_metrics_total_hll`:
+///
+/// ```promql
+/// # harmonic mean
+/// 1 / (
+///     sum (
+///         2 ^ -(
+///             # HLL merge operation
+///             max (my_metrics_total_hll{}) by (hll_shard, other_labels...)
+///         )
+///     ) without (hll_shard)
+/// )
+/// * alpha
+/// * shards_count
+/// * shards_count
+/// ```
+///
+/// If you want an estimate over time, you can use the following query:
+///
+/// ```promql
+/// # harmonic mean
+/// 1 / (
+///     sum (
+///         2 ^ -(
+///             # HLL merge operation
+///             max (
+///                 max_over_time(my_metrics_total_hll{}[$__rate_interval])
+///             ) by (hll_shard, other_labels...)
+///         )
+///     ) without (hll_shard)
+/// )
+/// * alpha
+/// * shards_count
+/// * shards_count
+/// ```
+///
+/// In the case of low cardinality, you might want to use the linear counting approximation:
+///
+/// ```promql
+/// # LinearCounting(m, V) = m log (m / V)
+/// shards_count * ln(shards_count /
+///     # calculate V = how many shards contain a 0
+///     count(max (proxy_connecting_endpoints{}) by (hll_shard, protocol) == 0) without (hll_shard)
+/// )
+/// ```
+///
+/// See <https://en.wikipedia.org/wiki/HyperLogLog#Practical_considerations> for estimates on alpha
+#[derive(Clone)]
+pub struct HyperLogLog<const N: usize> {
+    core: Arc<HyperLogLogCore<N>>,
+}
+
+impl<const N: usize> HyperLogLog<N> {
+    /// Create a [`HyperLogLog`] with the `name` and `help` arguments.
+    pub fn new<S1: Into<String>, S2: Into<String>>(name: S1, help: S2) -> prometheus::Result<Self> {
+        assert!(N.is_power_of_two());
+        let opts = Opts::new(name, help);
+        Self::with_opts(opts)
+    }
+
+    /// Create a [`HyperLogLog`] with the `opts` options.
+    pub fn with_opts(opts: Opts) -> prometheus::Result<Self> {
+        Self::with_opts_and_label_values(&opts, &[])
+    }
+
+    fn with_opts_and_label_values(opts: &Opts, label_values: &[&str]) -> prometheus::Result<Self> {
+        let desc = opts.describe()?;
+        let labels = make_label_pairs(&desc, label_values)?;
+
+        let v = HyperLogLogCore {
+            shards: [0; N].map(AtomicU8::new),
+            desc,
+            labels,
+        };
+        Ok(Self { core: Arc::new(v) })
+    }
+
    pub fn measure(&self, item: &impl Hash) {
        // changing the hasher will break compatibility with previous measurements.
        self.record(BuildHasherDefault::<xxh3::Hash64>::default().hash_one(item));
@@ -126,11 +299,42 @@ impl<const N: usize> HyperLogLogState<N> {
        let p = N.ilog2() as u8;
        let j = hash & (N as u64 - 1);
        let rho = (hash >> p).leading_zeros() as u8 + 1 - p;
-        self.shards[j as usize].fetch_max(rho, std::sync::atomic::Ordering::Relaxed);
+        self.core.shards[j as usize].fetch_max(rho, std::sync::atomic::Ordering::Relaxed);
+    }
+}
+
+struct HyperLogLogCore<const N: usize> {
+    shards: [AtomicU8; N],
+    desc: core::Desc,
+    labels: Vec<proto::LabelPair>,
+}
+
+impl<const N: usize> core::Collector for HyperLogLog<N> {
+    fn desc(&self) -> Vec<&core::Desc> {
+        vec![&self.core.desc]
    }

-    fn take_sample(&self) -> [u8; N] {
-        self.shards.each_ref().map(|x| {
+    fn collect(&self) -> Vec<proto::MetricFamily> {
+        let mut m = proto::MetricFamily::default();
+        m.set_name(self.core.desc.fq_name.clone());
+        m.set_help(self.core.desc.help.clone());
+        m.set_field_type(proto::MetricType::GAUGE);
+
+        let mut metrics = Vec::new();
+        self.core.collect_into(&mut metrics);
+        m.set_metric(metrics);
+
+        vec![m]
+    }
+}
+
+impl<const N: usize> HyperLogLogCore<N> {
+    fn collect_into(&self, metrics: &mut Vec<proto::Metric>) {
+        self.shards.iter().enumerate().for_each(|(i, x)| {
+            let mut shard_label = proto::LabelPair::default();
+            shard_label.set_name("hll_shard".to_owned());
+            shard_label.set_value(format!("{i}"));
+
            // We reset the counter to 0 so we can perform a cardinality measure over any time slice in prometheus.

            // This seems like it would be a race condition,
@@ -140,90 +344,85 @@ impl<const N: usize> HyperLogLogState<N> {

            // TODO: maybe we shouldn't reset this on every collect, instead, only after a time window.
            // this would mean that a dev port-forwarding the metrics url won't break the sampling.
-            x.swap(0, std::sync::atomic::Ordering::Relaxed)
+            let v = x.swap(0, std::sync::atomic::Ordering::Relaxed);
+
+            let mut m = proto::Metric::default();
+            let mut c = proto::Gauge::default();
+            c.set_value(v as f64);
+            m.set_gauge(c);
+
+            let mut labels = Vec::with_capacity(self.labels.len() + 1);
+            labels.extend_from_slice(&self.labels);
+            labels.push(shard_label);
+
+            m.set_label(labels);
+            metrics.push(m);
        })
    }
 }
-impl<W: std::io::Write, const N: usize> measured::metric::MetricEncoding<TextEncoder<W>>
-    for HyperLogLogState<N>
-{
-    fn write_type(
-        name: impl MetricNameEncoder,
-        enc: &mut TextEncoder<W>,
-    ) -> Result<(), std::io::Error> {
-        enc.write_type(&name, measured::text::MetricType::Gauge)
+
+fn make_label_pairs(
+    desc: &core::Desc,
+    label_values: &[&str],
+) -> prometheus::Result<Vec<proto::LabelPair>> {
+    if desc.variable_labels.len() != label_values.len() {
+        return Err(prometheus::Error::InconsistentCardinality {
+            expect: desc.variable_labels.len(),
+            got: label_values.len(),
+        });
    }
-    fn collect_into(
-        &self,
-        _: &(),
-        labels: impl LabelGroup,
-        name: impl MetricNameEncoder,
-        enc: &mut TextEncoder<W>,
-    ) -> Result<(), std::io::Error> {
-        struct I64(i64);
-        impl LabelValue for I64 {
-            fn visit<V: LabelVisitor>(&self, v: V) -> V::Output {
-                v.write_int(self.0)
-            }
-        }

-        struct HllShardLabel {
-            hll_shard: i64,
-        }
-
-        impl LabelGroup for HllShardLabel {
-            fn visit_values(&self, v: &mut impl LabelGroupVisitor) {
-                const LE: &LabelName = LabelName::from_str("hll_shard");
-                v.write_value(LE, &I64(self.hll_shard));
-            }
-        }
-
-        self.take_sample()
-            .into_iter()
-            .enumerate()
-            .try_for_each(|(hll_shard, val)| {
-                enc.write_metric_value(
-                    name.by_ref(),
-                    labels.by_ref().compose_with(HllShardLabel {
-                        hll_shard: hll_shard as i64,
-                    }),
-                    MetricValue::Int(val as i64),
-                )
-            })
+    let total_len = desc.variable_labels.len() + desc.const_label_pairs.len();
+    if total_len == 0 {
+        return Ok(vec![]);
    }
+
+    if desc.variable_labels.is_empty() {
+        return Ok(desc.const_label_pairs.clone());
+    }
+
+    let mut label_pairs = Vec::with_capacity(total_len);
+    for (i, n) in desc.variable_labels.iter().enumerate() {
+        let mut label_pair = proto::LabelPair::default();
+        label_pair.set_name(n.clone());
+        label_pair.set_value(label_values[i].to_owned());
+        label_pairs.push(label_pair);
+    }
+
+    for label_pair in &desc.const_label_pairs {
+        label_pairs.push(label_pair.clone());
+    }
+    label_pairs.sort();
+    Ok(label_pairs)
 }

 #[cfg(test)]
 mod tests {
    use std::collections::HashSet;

-    use measured::{label::StaticLabelSet, FixedCardinalityLabel};
+    use prometheus::{proto, Opts};
    use rand::{rngs::StdRng, Rng, SeedableRng};
    use rand_distr::{Distribution, Zipf};

    use crate::HyperLogLogVec;

-    #[derive(FixedCardinalityLabel, Clone, Copy)]
-    #[label(singleton = "x")]
-    enum Label {
-        A,
-        B,
+    fn collect(hll: &HyperLogLogVec<32>) -> Vec<proto::Metric> {
+        let mut metrics = vec![];
+        hll.core
+            .children
+            .read()
+            .unwrap()
+            .values()
+            .for_each(|c| c.core.collect_into(&mut metrics));
+        metrics
    }
-
-    fn collect(hll: &HyperLogLogVec<StaticLabelSet<Label>, 32>) -> ([u8; 32], [u8; 32]) {
-        // cannot go through the `hll.collect_family_into` interface yet...
-        // need to see if I can fix the conflicting impls problem in measured.
-        (
-            hll.get_metric(hll.with_labels(Label::A)).take_sample(),
-            hll.get_metric(hll.with_labels(Label::B)).take_sample(),
-        )
-    }
-
-    fn get_cardinality(samples: &[[u8; 32]]) -> f64 {
+    fn get_cardinality(metrics: &[proto::Metric], filter: impl Fn(&proto::Metric) -> bool) -> f64 {
        let mut buckets = [0.0; 32];
-        for &sample in samples {
-            for (i, m) in sample.into_iter().enumerate() {
-                buckets[i] = f64::max(buckets[i], m as f64);
+        for metric in metrics.chunks_exact(32) {
+            if filter(&metric[0]) {
+                for (i, m) in metric.iter().enumerate() {
+                    buckets[i] = f64::max(buckets[i], m.get_gauge().get_value());
+                }
            }
        }

@@ -238,7 +437,7 @@ mod tests {
    }

    fn test_cardinality(n: usize, dist: impl Distribution<f64>) -> ([usize; 3], [f64; 3]) {
-        let hll = HyperLogLogVec::<StaticLabelSet<Label>, 32>::new();
+        let hll = HyperLogLogVec::<32>::new(Opts::new("foo", "bar"), &["x"]).unwrap();

        let mut iter = StdRng::seed_from_u64(0x2024_0112).sample_iter(dist);
        let mut set_a = HashSet::new();
@@ -246,20 +445,18 @@ mod tests {

        for x in iter.by_ref().take(n) {
            set_a.insert(x.to_bits());
-            hll.get_metric(hll.with_labels(Label::A))
-                .measure(&x.to_bits());
+            hll.with_label_values(&["a"]).measure(&x.to_bits());
        }
        for x in iter.by_ref().take(n) {
            set_b.insert(x.to_bits());
-            hll.get_metric(hll.with_labels(Label::B))
-                .measure(&x.to_bits());
+            hll.with_label_values(&["b"]).measure(&x.to_bits());
        }
        let merge = &set_a | &set_b;

-        let (a, b) = collect(&hll);
-        let len = get_cardinality(&[a, b]);
-        let len_a = get_cardinality(&[a]);
-        let len_b = get_cardinality(&[b]);
+        let metrics = collect(&hll);
+        let len = get_cardinality(&metrics, |_| true);
+        let len_a = get_cardinality(&metrics, |l| l.get_label()[0].get_value() == "a");
+        let len_b = get_cardinality(&metrics, |l| l.get_label()[0].get_value() == "b");

        ([merge.len(), set_a.len(), set_b.len()], [len, len_a, len_b])
    }
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -5,7 +5,7 @@
 #![deny(clippy::undocumented_unsafe_blocks)]

 use measured::{
-    label::{LabelGroupSet, LabelGroupVisitor, LabelName, NoLabels},
+    label::{LabelGroupVisitor, LabelName, NoLabels},
    metric::{
        counter::CounterState,
        gauge::GaugeState,
@@ -40,7 +40,7 @@ pub mod launch_timestamp;
 mod wrappers;
 pub use wrappers::{CountedReader, CountedWriter};
 mod hll;
-pub use hll::{HyperLogLog, HyperLogLogState, HyperLogLogVec};
+pub use hll::{HyperLogLog, HyperLogLogVec};
 #[cfg(target_os = "linux")]
 pub mod more_process_metrics;

@@ -421,171 +421,3 @@ pub type IntCounterPair = GenericCounterPair<AtomicU64>;

 /// A guard for [`IntCounterPair`] that will decrement the gauge on drop
 pub type IntCounterPairGuard = GenericCounterPairGuard<AtomicU64>;
-
-pub trait CounterPairAssoc {
-    const INC_NAME: &'static MetricName;
-    const DEC_NAME: &'static MetricName;
-
-    const INC_HELP: &'static str;
-    const DEC_HELP: &'static str;
-
-    type LabelGroupSet: LabelGroupSet;
-}
-
-pub struct CounterPairVec<A: CounterPairAssoc> {
-    vec: measured::metric::MetricVec<MeasuredCounterPairState, A::LabelGroupSet>,
-}
-
-impl<A: CounterPairAssoc> Default for CounterPairVec<A>
-where
-    A::LabelGroupSet: Default,
-{
-    fn default() -> Self {
-        Self {
-            vec: Default::default(),
-        }
-    }
-}
-
-impl<A: CounterPairAssoc> CounterPairVec<A> {
-    pub fn guard(
-        &self,
-        labels: <A::LabelGroupSet as LabelGroupSet>::Group<'_>,
-    ) -> MeasuredCounterPairGuard<'_, A> {
-        let id = self.vec.with_labels(labels);
-        self.vec.get_metric(id).inc.inc();
-        MeasuredCounterPairGuard { vec: &self.vec, id }
-    }
-    pub fn inc(&self, labels: <A::LabelGroupSet as LabelGroupSet>::Group<'_>) {
-        let id = self.vec.with_labels(labels);
-        self.vec.get_metric(id).inc.inc();
-    }
-    pub fn dec(&self, labels: <A::LabelGroupSet as LabelGroupSet>::Group<'_>) {
-        let id = self.vec.with_labels(labels);
-        self.vec.get_metric(id).dec.inc();
-    }
-    pub fn remove_metric(
-        &self,
-        labels: <A::LabelGroupSet as LabelGroupSet>::Group<'_>,
-    ) -> Option<MeasuredCounterPairState> {
-        let id = self.vec.with_labels(labels);
-        self.vec.remove_metric(id)
-    }
-}
-
-impl<T, A> ::measured::metric::group::MetricGroup<T> for CounterPairVec<A>
-where
-    T: ::measured::metric::group::Encoding,
-    A: CounterPairAssoc,
-    ::measured::metric::counter::CounterState: ::measured::metric::MetricEncoding<T>,
-{
-    fn collect_group_into(&self, enc: &mut T) -> Result<(), T::Err> {
-        // write decrement first to avoid a race condition where inc - dec < 0
-        T::write_help(enc, A::DEC_NAME, A::DEC_HELP)?;
-        self.vec
-            .collect_family_into(A::DEC_NAME, &mut Dec(&mut *enc))?;
-
-        T::write_help(enc, A::INC_NAME, A::INC_HELP)?;
-        self.vec
-            .collect_family_into(A::INC_NAME, &mut Inc(&mut *enc))?;
-
-        Ok(())
-    }
-}
-
-#[derive(MetricGroup, Default)]
-pub struct MeasuredCounterPairState {
-    pub inc: CounterState,
-    pub dec: CounterState,
-}
-
-impl measured::metric::MetricType for MeasuredCounterPairState {
-    type Metadata = ();
-}
-
-pub struct MeasuredCounterPairGuard<'a, A: CounterPairAssoc> {
-    vec: &'a measured::metric::MetricVec<MeasuredCounterPairState, A::LabelGroupSet>,
-    id: measured::metric::LabelId<A::LabelGroupSet>,
-}
-
-impl<A: CounterPairAssoc> Drop for MeasuredCounterPairGuard<'_, A> {
-    fn drop(&mut self) {
-        self.vec.get_metric(self.id).dec.inc();
-    }
-}
-
-/// [`MetricEncoding`] for [`MeasuredCounterPairState`] that only writes the inc counter to the inner encoder.
-struct Inc<T>(T);
-/// [`MetricEncoding`] for [`MeasuredCounterPairState`] that only writes the dec counter to the inner encoder.
-struct Dec<T>(T);
-
-impl<T: Encoding> Encoding for Inc<T> {
-    type Err = T::Err;
-
-    fn write_help(&mut self, name: impl MetricNameEncoder, help: &str) -> Result<(), Self::Err> {
-        self.0.write_help(name, help)
-    }
-
-    fn write_metric_value(
-        &mut self,
-        name: impl MetricNameEncoder,
-        labels: impl LabelGroup,
-        value: MetricValue,
-    ) -> Result<(), Self::Err> {
-        self.0.write_metric_value(name, labels, value)
-    }
-}
-
-impl<T: Encoding> MetricEncoding<Inc<T>> for MeasuredCounterPairState
-where
-    CounterState: MetricEncoding<T>,
-{
-    fn write_type(name: impl MetricNameEncoder, enc: &mut Inc<T>) -> Result<(), T::Err> {
-        CounterState::write_type(name, &mut enc.0)
-    }
-    fn collect_into(
-        &self,
-        metadata: &(),
-        labels: impl LabelGroup,
-        name: impl MetricNameEncoder,
-        enc: &mut Inc<T>,
-    ) -> Result<(), T::Err> {
-        self.inc.collect_into(metadata, labels, name, &mut enc.0)
-    }
-}
-
-impl<T: Encoding> Encoding for Dec<T> {
-    type Err = T::Err;
-
-    fn write_help(&mut self, name: impl MetricNameEncoder, help: &str) -> Result<(), Self::Err> {
-        self.0.write_help(name, help)
-    }
-
-    fn write_metric_value(
-        &mut self,
-        name: impl MetricNameEncoder,
-        labels: impl LabelGroup,
-        value: MetricValue,
-    ) -> Result<(), Self::Err> {
-        self.0.write_metric_value(name, labels, value)
-    }
-}
-
-/// Write the dec counter to the encoder
-impl<T: Encoding> MetricEncoding<Dec<T>> for MeasuredCounterPairState
-where
-    CounterState: MetricEncoding<T>,
-{
-    fn write_type(name: impl MetricNameEncoder, enc: &mut Dec<T>) -> Result<(), T::Err> {
-        CounterState::write_type(name, &mut enc.0)
-    }
-    fn collect_into(
-        &self,
-        metadata: &(),
-        labels: impl LabelGroup,
-        name: impl MetricNameEncoder,
-        enc: &mut Dec<T>,
-    ) -> Result<(), T::Err> {
-        self.dec.collect_into(metadata, labels, name, &mut enc.0)
-    }
-}
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -49,8 +49,6 @@ char	   *neon_auth_token;
 int			readahead_buffer_size = 128;
 int			flush_every_n_requests = 8;

-int         neon_protocol_version;
-
 static int	n_reconnect_attempts = 0;
 static int	max_reconnect_attempts = 60;
 static int	stripe_size;
@@ -846,14 +844,6 @@ pg_init_libpagestore(void)
 							PGC_USERSET,
 							0,	/* no flags required */
 							NULL, (GucIntAssignHook) &readahead_buffer_resize, NULL);
-	DefineCustomIntVariable("neon.protocol_version",
-							"Version of compute<->page server protocol",
-							NULL,
-							&neon_protocol_version,
-							NEON_PROTOCOL_VERSION, 1, 2,
-							PGC_USERSET,
-							0,	/* no flags required */
-							NULL, NULL, NULL);

 	relsize_hash_init();

--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -28,13 +28,6 @@
 #define MAX_SHARDS 128
 #define MAX_PAGESERVER_CONNSTRING_SIZE 256

-/*
- * Currently, the protocol version is not sent to the server.
- * So it is critical that format of existing commands is not changed.
- * New protocol versions can just add new commands.
- */
-#define NEON_PROTOCOL_VERSION  2
-
 typedef enum
 {
 	/* pagestore_client -> pagestore */
@@ -44,12 +37,6 @@ typedef enum
 	T_NeonDbSizeRequest,
 	T_NeonGetSlruSegmentRequest,

-	T_NeonExistsV2Request = 10, /* new protocol message tags start from 10 */
-	T_NeonNblocksV2Request,
-	T_NeonGetPageV2Request,
-	T_NeonDbSizeV2Request,
-	T_NeonGetSlruSegmentV2Request,
-
 	/* pagestore -> pagestore_client */
 	T_NeonExistsResponse = 100,
 	T_NeonNblocksResponse,
@@ -82,33 +69,18 @@ typedef enum {
 	SLRU_MULTIXACT_OFFSETS
 } SlruKind;

-/*--
- * supertype of all the Neon*Request structs below.
+/*
+ * supertype of all the Neon*Request structs below
 *
- * All requests contain two LSNs:
- *
- * lsn:                request page (or relation size, etc) at this LSN
- * not_modified_since: Hint that the page hasn't been modified between
- *                     this LSN and the request LSN (`lsn`).
- *
- * To request the latest version of a page, you can use MAX_LSN as the request
- * LSN.
- *
- * If you don't know any better, you can always set 'not_modified_since' equal
- * to 'lsn', but providing a lower value can speed up processing the request
- * in the pageserver, as it doesn't need to wait for the WAL to arrive, and it
- * can skip traversing through recent layers which we know to not contain any
- * versions for the requested page.
- *
- * These structs describe the V2 of these requests. The old V1 protocol contained
- * just one LSN and a boolean 'latest' flag. If the neon_protocol_version GUC is
- * set to 1, we will convert these to the V1 requests before sending.
+ * If 'latest' is true, we are requesting the latest page version, and 'lsn'
+ * is just a hint to the server that we know there are no versions of the page
+ * (or relation size, for exists/nblocks requests) later than the 'lsn'.
 */
 typedef struct
 {
 	NeonMessageTag tag;
-	XLogRecPtr	lsn;
-	XLogRecPtr	not_modified_since;
+	bool		latest;			/* if true, request latest page version */
+	XLogRecPtr	lsn;			/* request page version @ this LSN */
 } NeonRequest;

 typedef struct
@@ -221,7 +193,6 @@ extern int	readahead_buffer_size;
 extern char *neon_timeline;
 extern char *neon_tenant;
 extern int32 max_cluster_size;
-extern int  neon_protocol_version;

 extern shardno_t get_shard_number(BufferTag* tag);

@@ -254,14 +225,14 @@ extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
 extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 					  char *buffer);
 extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
-										 XLogRecPtr request_lsn, XLogRecPtr not_modified_since, char *buffer);
+										 XLogRecPtr request_lsn, bool request_latest, char *buffer);
 extern void neon_write(SMgrRelation reln, ForkNumber forknum,
 					   BlockNumber blocknum, char *buffer, bool skipFsync);
 #else
 extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 					  void *buffer);
 extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
-										 XLogRecPtr request_lsn, XLogRecPtr not_modified_since, void *buffer);
+										 XLogRecPtr request_lsn, bool request_latest, void *buffer);
 extern void neon_write(SMgrRelation reln, ForkNumber forknum,
 					   BlockNumber blocknum, const void *buffer, bool skipFsync);
 #endif
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -168,8 +168,8 @@ typedef enum PrefetchStatus
 typedef struct PrefetchRequest
 {
 	BufferTag	buftag;			/* must be first entry in the struct */
-	XLogRecPtr	request_lsn;
-	XLogRecPtr	not_modified_since;
+	XLogRecPtr	effective_request_lsn;
+	XLogRecPtr	actual_request_lsn;
 	NeonResponse *response;		/* may be null */
 	PrefetchStatus status;
 	shardno_t   shard_no;
@@ -269,17 +269,19 @@ static PrefetchState *MyPState;
 	) \
 )

+static XLogRecPtr prefetch_lsn = 0;
+
 static bool compact_prefetch_buffers(void);
 static void consume_prefetch_responses(void);
-static uint64 prefetch_register_buffer(BufferTag tag, XLogRecPtr *force_request_lsn, XLogRecPtr *force_not_modified_since);
+static uint64 prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn);
 static bool prefetch_read(PrefetchRequest *slot);
-static void prefetch_do_request(PrefetchRequest *slot, XLogRecPtr *force_request_lsn, XLogRecPtr *force_not_modified_since);
+static void prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn);
 static bool prefetch_wait_for(uint64 ring_index);
 static void prefetch_cleanup_trailing_unused(void);
 static inline void prefetch_set_unused(uint64 ring_index);

-static void neon_get_request_lsn(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
-								 XLogRecPtr *request_lsn, XLogRecPtr *not_modified_since);
+static XLogRecPtr neon_get_request_lsn(bool *latest, NRelFileInfo rinfo,
+									   ForkNumber forknum, BlockNumber blkno);

 static bool
 compact_prefetch_buffers(void)
@@ -336,8 +338,8 @@ compact_prefetch_buffers(void)
 		target_slot->shard_no = source_slot->shard_no;
 		target_slot->status = source_slot->status;
 		target_slot->response = source_slot->response;
-		target_slot->request_lsn = source_slot->request_lsn;
-		target_slot->not_modified_since = source_slot->not_modified_since;
+		target_slot->effective_request_lsn = source_slot->effective_request_lsn;
+		target_slot->actual_request_lsn = source_slot->actual_request_lsn;
 		target_slot->my_ring_index = empty_ring_index;

 		prfh_delete(MyPState->prf_hash, source_slot);
@@ -356,8 +358,7 @@ compact_prefetch_buffers(void)
 		};
 		source_slot->response = NULL;
 		source_slot->my_ring_index = 0;
-		source_slot->request_lsn = InvalidXLogRecPtr;
-		source_slot->not_modified_since = InvalidXLogRecPtr;
+		source_slot->effective_request_lsn = 0;

 		/* update bookkeeping */
 		n_moved++;
@@ -683,35 +684,54 @@ prefetch_set_unused(uint64 ring_index)
 }

 static void
-prefetch_do_request(PrefetchRequest *slot, XLogRecPtr *force_request_lsn, XLogRecPtr *force_not_modified_since)
+prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn)
 {
 	bool		found;
 	NeonGetPageRequest request = {
 		.req.tag = T_NeonGetPageRequest,
-		/* lsn and not_modified_since are filled in below */
+		.req.latest = false,
+		.req.lsn = 0,
 		.rinfo = BufTagGetNRelFileInfo(slot->buftag),
 		.forknum = slot->buftag.forkNum,
 		.blkno = slot->buftag.blockNum,
 	};

-	Assert(((force_request_lsn != NULL)  == (force_not_modified_since != NULL)));
-
-	if (force_request_lsn)
+	if (force_lsn && force_latest)
 	{
-		request.req.lsn = *force_request_lsn;
-		request.req.not_modified_since = *force_not_modified_since;
-		slot->request_lsn = *force_request_lsn;
-		slot->not_modified_since = *force_not_modified_since;
+		request.req.lsn = *force_lsn;
+		request.req.latest = *force_latest;
+		slot->actual_request_lsn = slot->effective_request_lsn = *force_lsn;
 	}
 	else
 	{
-		neon_get_request_lsn(BufTagGetNRelFileInfo(slot->buftag),
-							 slot->buftag.forkNum,
-							 slot->buftag.blockNum,
-							 &request.req.lsn,
-							 &request.req.not_modified_since);
-		slot->request_lsn = request.req.lsn;
-		slot->not_modified_since = request.req.not_modified_since;
+		XLogRecPtr	lsn = neon_get_request_lsn(
+											   &request.req.latest,
+											   BufTagGetNRelFileInfo(slot->buftag),
+											   slot->buftag.forkNum,
+											   slot->buftag.blockNum
+			);
+
+		/*
+		 * Note: effective_request_lsn is potentially higher than the
+		 * requested LSN, but still correct:
+		 *
+		 * We know there are no changes between the actual requested LSN and
+		 * the value of effective_request_lsn: If there were, the page would
+		 * have been in cache and evicted between those LSN values, which then
+		 * would have had to result in a larger request LSN for this page.
+		 *
+		 * It is possible that a concurrent backend loads the page, modifies
+		 * it and then evicts it again, but the LSN of that eviction cannot be
+		 * smaller than the current WAL insert/redo pointer, which is already
+		 * larger than this prefetch_lsn. So in any case, that would
+		 * invalidate this cache.
+		 *
+		 * The best LSN to use for effective_request_lsn would be
+		 * XLogCtl->Insert.RedoRecPtr, but that's expensive to access.
+		 */
+		slot->actual_request_lsn = request.req.lsn = lsn;
+		prefetch_lsn = Max(prefetch_lsn, lsn);
+		slot->effective_request_lsn = prefetch_lsn;
 	}

 	Assert(slot->response == NULL);
@@ -729,6 +749,7 @@ prefetch_do_request(PrefetchRequest *slot, XLogRecPtr *force_request_lsn, XLogRe
 	/* update slot state */
 	slot->status = PRFS_REQUESTED;

+
 	prfh_insert(MyPState->prf_hash, slot, &found);
 	Assert(!found);
 }
@@ -738,25 +759,22 @@ prefetch_do_request(PrefetchRequest *slot, XLogRecPtr *force_request_lsn, XLogRe
 *
 * Register that we may want the contents of BufferTag in the near future.
 *
- * If force_request_lsn and force_not_modified_since are not NULL, those
- * values are sent to the pageserver. If they are NULL, we utilize the
- * lastWrittenLsn -infrastructure to fill them in.
+ * If force_latest and force_lsn are not NULL, those values are sent to the
+ * pageserver. If they are NULL, we utilize the lastWrittenLsn -infrastructure
+ * to fill in these values manually.
 *
 * NOTE: this function may indirectly update MyPState->pfs_hash; which
 * invalidates any active pointers into the hash table.
 */

 static uint64
-prefetch_register_buffer(BufferTag tag, XLogRecPtr *force_request_lsn,
-						 XLogRecPtr *force_not_modified_since)
+prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn)
 {
 	uint64		ring_index;
 	PrefetchRequest req;
 	PrefetchRequest *slot;
 	PrfHashEntry *entry;

-	Assert(((force_request_lsn != NULL)  == (force_not_modified_since != NULL)));
-
 	/* use an intermediate PrefetchRequest struct to ensure correct alignment */
 	req.buftag = tag;
 Retry:
@@ -777,31 +795,37 @@ Retry:
 		 * If we want a specific lsn, we do not accept requests that were made
 		 * with a potentially different LSN.
 		 */
-		if (force_request_lsn)
+		if (force_latest && force_lsn)
 		{
 			/*
-			 * The not_changed_since..request_lsn range of each request is
-			 * effectively a claim that the page has not been modified between
-			 * whose LSNs.  Therefore, if the range of the old request in the
-			 * queue overlaps with the new request, we know that the the page
-			 * hasn't been modified in the union of the ranges.  We can reuse
-			 * the old request in that case.
-			 *
-			 * The new request's LSN should never be older than the old one,
-			 * so don't bother checking that case.
+			 * if we want the latest version, any effective_request_lsn <
+			 * request lsn is OK
 			 */
-			if (*force_request_lsn >= slot->not_modified_since &&
-				*force_not_modified_since <= slot->request_lsn)
+			if (*force_latest)
 			{
-				/* the old request overlaps with the new one; keep it */
+				if (*force_lsn > slot->effective_request_lsn)
+				{
+					if (!prefetch_wait_for(ring_index))
+						goto Retry;
+					prefetch_set_unused(ring_index);
+					entry = NULL;
+				}
+
 			}
+
+			/*
+			 * if we don't want the latest version, only accept requests with
+			 * the exact same LSN
+			 */
 			else
 			{
-				/* Wait for the old request to finish and discard it */
-				if (!prefetch_wait_for(ring_index))
-					goto Retry;
-				prefetch_set_unused(ring_index);
-				entry = NULL;
+				if (*force_lsn != slot->effective_request_lsn)
+				{
+					if (!prefetch_wait_for(ring_index))
+						goto Retry;
+					prefetch_set_unused(ring_index);
+					entry = NULL;
+				}
 			}
 		}

@@ -897,7 +921,7 @@ Retry:
 	slot->shard_no = get_shard_number(&tag);
 	slot->my_ring_index = ring_index;

-	prefetch_do_request(slot, force_request_lsn, force_not_modified_since);
+	prefetch_do_request(slot, force_latest, force_lsn);
 	Assert(slot->status == PRFS_REQUESTED);
 	Assert(MyPState->ring_last <= ring_index &&
 		   ring_index < MyPState->ring_unused);
@@ -973,66 +997,7 @@ nm_pack_request(NeonRequest *msg)
 	StringInfoData s;

 	initStringInfo(&s);
-
-	if (neon_protocol_version >= 2)
-	{
-		pq_sendbyte(&s, msg->tag);
-		pq_sendint64(&s, msg->lsn);
-		pq_sendint64(&s, msg->not_modified_since);
-	}
-	else
-	{
-		NeonMessageTag tag;
-		bool		latest;
-		XLogRecPtr	lsn;
-
-		/*
-		 * In primary, we always request the latest page version.
-		 */
-		if (!RecoveryInProgress())
-		{
-			latest = true;
-			lsn = msg->not_modified_since;
-		}
-		else
-		{
-			/*
-			 * In the current protocol, we cannot represent that we want to read
-			 * page at LSN X, and we know that it hasn't been modified since Y. We
-			 * can either use 'not_modified_lsn' as the request LSN, and risk
-			 * getting an error if that LSN is too old and has already fallen out
-			 * of the pageserver's GC horizon, or we can send 'request_lsn',
-			 * causing the pageserver to possibly wait for the recent WAL to
-			 * arrive unnecessarily. Or something in between. We choose to use the
-			 * old LSN and risk GC errors, because that's what we've done
-			 * historically.
-			 */
-			latest = false;
-			lsn = msg->not_modified_since;
-		}
-
-		switch(msg->tag)
-		{
-			case T_NeonExistsV2Request:
-				tag = T_NeonExistsRequest;
-				break;
-			case T_NeonNblocksV2Request:
-				tag = T_NeonNblocksRequest;
-				break;
-			case T_NeonGetPageV2Request:
-				tag = T_NeonGetPageRequest;
-				break;
-			case T_NeonDbSizeV2Request:
-				tag = T_NeonDbSizeRequest;
-				break;
-			case T_NeonGetSlruSegmentV2Request:
-				tag = T_NeonGetSlruSegmentRequest;
-				break;
-		}
-		pq_sendbyte(&s, tag);
-		pq_sendbyte(&s, latest);
-		pq_sendint64(&s, lsn);
-	}
+	pq_sendbyte(&s, msg->tag);

 	switch (messageTag(msg))
 	{
@@ -1041,6 +1006,8 @@ nm_pack_request(NeonRequest *msg)
 			{
 				NeonExistsRequest *msg_req = (NeonExistsRequest *) msg;

+				pq_sendbyte(&s, msg_req->req.latest);
+				pq_sendint64(&s, msg_req->req.lsn);
 				pq_sendint32(&s, NInfoGetSpcOid(msg_req->rinfo));
 				pq_sendint32(&s, NInfoGetDbOid(msg_req->rinfo));
 				pq_sendint32(&s, NInfoGetRelNumber(msg_req->rinfo));
@@ -1052,6 +1019,8 @@ nm_pack_request(NeonRequest *msg)
 			{
 				NeonNblocksRequest *msg_req = (NeonNblocksRequest *) msg;

+				pq_sendbyte(&s, msg_req->req.latest);
+				pq_sendint64(&s, msg_req->req.lsn);
 				pq_sendint32(&s, NInfoGetSpcOid(msg_req->rinfo));
 				pq_sendint32(&s, NInfoGetDbOid(msg_req->rinfo));
 				pq_sendint32(&s, NInfoGetRelNumber(msg_req->rinfo));
@@ -1063,6 +1032,8 @@ nm_pack_request(NeonRequest *msg)
 			{
 				NeonDbSizeRequest *msg_req = (NeonDbSizeRequest *) msg;

+				pq_sendbyte(&s, msg_req->req.latest);
+				pq_sendint64(&s, msg_req->req.lsn);
 				pq_sendint32(&s, msg_req->dbNode);

 				break;
@@ -1071,6 +1042,8 @@ nm_pack_request(NeonRequest *msg)
 			{
 				NeonGetPageRequest *msg_req = (NeonGetPageRequest *) msg;

+				pq_sendbyte(&s, msg_req->req.latest);
+				pq_sendint64(&s, msg_req->req.lsn);
 				pq_sendint32(&s, NInfoGetSpcOid(msg_req->rinfo));
 				pq_sendint32(&s, NInfoGetDbOid(msg_req->rinfo));
 				pq_sendint32(&s, NInfoGetRelNumber(msg_req->rinfo));
@@ -1084,6 +1057,8 @@ nm_pack_request(NeonRequest *msg)
 			{
 				NeonGetSlruSegmentRequest *msg_req = (NeonGetSlruSegmentRequest *) msg;

+				pq_sendbyte(&s, msg_req->req.latest);
+				pq_sendint64(&s, msg_req->req.lsn);
 				pq_sendbyte(&s, msg_req->kind);
 				pq_sendint32(&s, msg_req->segno);

@@ -1234,7 +1209,7 @@ nm_to_string(NeonMessage *msg)
 				appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
 				appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
 				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
-				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
+				appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest);
 				appendStringInfoChar(&s, '}');
 				break;
 			}
@@ -1247,7 +1222,7 @@ nm_to_string(NeonMessage *msg)
 				appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
 				appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
 				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
-				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
+				appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest);
 				appendStringInfoChar(&s, '}');
 				break;
 			}
@@ -1261,7 +1236,7 @@ nm_to_string(NeonMessage *msg)
 				appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
 				appendStringInfo(&s, ", \"blkno\": %u", msg_req->blkno);
 				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
-				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
+				appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest);
 				appendStringInfoChar(&s, '}');
 				break;
 			}
@@ -1272,7 +1247,7 @@ nm_to_string(NeonMessage *msg)
 				appendStringInfoString(&s, "{\"type\": \"NeonDbSizeRequest\"");
 				appendStringInfo(&s, ", \"dbnode\": \"%u\"", msg_req->dbNode);
 				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
-				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
+				appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest);
 				appendStringInfoChar(&s, '}');
 				break;
 			}
@@ -1284,7 +1259,7 @@ nm_to_string(NeonMessage *msg)
 				appendStringInfo(&s, ", \"kind\": %u", msg_req->kind);
 				appendStringInfo(&s, ", \"segno\": %u", msg_req->segno);
 				appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
-				appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
+				appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest);
 				appendStringInfoChar(&s, '}');
 				break;
 			}
@@ -1556,36 +1531,44 @@ nm_adjust_lsn(XLogRecPtr lsn)
 /*
 * Return LSN for requesting pages and number of blocks from page server
 */
-static void
-neon_get_request_lsn(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
-					 XLogRecPtr *request_lsn, XLogRecPtr *not_modified_since)
+static XLogRecPtr
+neon_get_request_lsn(bool *latest, NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno)
 {
+	XLogRecPtr	lsn;
+
 	if (RecoveryInProgress())
 	{
-		/* Request the page at the last replayed LSN. */
-		*request_lsn = GetXLogReplayRecPtr(NULL);
-		*not_modified_since = GetLastWrittenLSN(rinfo, forknum, blkno);
+		/*
+		 * We don't know if WAL has been generated but not yet replayed, so
+		 * we're conservative in our estimates about latest pages.
+		 */
+		*latest = false;

-		neon_log(DEBUG1, "neon_get_request_lsn request lsn %X/%X, not_modified_since %X/%X",
-				 LSN_FORMAT_ARGS(*request_lsn), LSN_FORMAT_ARGS(*not_modified_since));
+		/*
+		 * Get the last written LSN of this page.
+		 */
+		lsn = GetLastWrittenLSN(rinfo, forknum, blkno);
+		lsn = nm_adjust_lsn(lsn);
+
+		neon_log(DEBUG1, "neon_get_request_lsn GetXLogReplayRecPtr %X/%X request lsn 0 ",
+			 (uint32) ((lsn) >> 32), (uint32) (lsn));
 	}
 	else
 	{
-		XLogRecPtr	last_written_lsn;
 		XLogRecPtr	flushlsn;

 		/*
-		 * Use the latest LSN that was evicted from the buffer cache as the
-		 * 'not_modified_since' hint. Any pages modified by later WAL records
-		 * must still in the buffer cache, so our request cannot concern
-		 * those.
+		 * Use the latest LSN that was evicted from the buffer cache. Any
+		 * pages modified by later WAL records must still in the buffer cache,
+		 * so our request cannot concern those.
 		 */
-		last_written_lsn = GetLastWrittenLSN(rinfo, forknum, blkno);
-		Assert(last_written_lsn != InvalidXLogRecPtr);
+		*latest = true;
+		lsn = GetLastWrittenLSN(rinfo, forknum, blkno);
+		Assert(lsn != InvalidXLogRecPtr);
 		neon_log(DEBUG1, "neon_get_request_lsn GetLastWrittenLSN lsn %X/%X ",
-				 LSN_FORMAT_ARGS(last_written_lsn));
+			 (uint32) ((lsn) >> 32), (uint32) (lsn));

-		last_written_lsn = nm_adjust_lsn(last_written_lsn);
+		lsn = nm_adjust_lsn(lsn);

 		/*
 		 * Is it possible that the last-written LSN is ahead of last flush
@@ -1600,25 +1583,16 @@ neon_get_request_lsn(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 #else
 		flushlsn = GetFlushRecPtr();
 #endif
-		if (last_written_lsn > flushlsn)
+		if (lsn > flushlsn)
 		{
 			neon_log(DEBUG5, "last-written LSN %X/%X is ahead of last flushed LSN %X/%X",
-					 LSN_FORMAT_ARGS(last_written_lsn),
-					 LSN_FORMAT_ARGS(flushlsn));
-			XLogFlush(last_written_lsn);
-			flushlsn = last_written_lsn;
+				 (uint32) (lsn >> 32), (uint32) lsn,
+				 (uint32) (flushlsn >> 32), (uint32) flushlsn);
+			XLogFlush(lsn);
 		}
-
-		/*
-		 * Request the latest version of the page. The most up-to-date request
-		 * LSN we could use would be the current insert LSN, but to avoid the
-		 * overhead of looking it up, use 'flushlsn' instead. This relies on the
-		 * assumption that if the page was modified since the last WAL flush, it
-		 * should still be in the buffer cache, and we wouldn't be requesting it.
-		 */
-		*request_lsn = flushlsn;
-		*not_modified_since = last_written_lsn;
 	}
+
+	return lsn;
 }

 /*
@@ -1630,8 +1604,8 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
 	bool		exists;
 	NeonResponse *resp;
 	BlockNumber n_blocks;
+	bool		latest;
 	XLogRecPtr	request_lsn;
-	XLogRecPtr	not_modified_since;

 	switch (reln->smgr_relpersistence)
 	{
@@ -1686,13 +1660,12 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
 		return false;
 	}

-	neon_get_request_lsn(InfoFromSMgrRel(reln), forkNum, REL_METADATA_PSEUDO_BLOCKNO,
-						 &request_lsn, &not_modified_since);
+	request_lsn = neon_get_request_lsn(&latest, InfoFromSMgrRel(reln), forkNum, REL_METADATA_PSEUDO_BLOCKNO);
 	{
 		NeonExistsRequest request = {
 			.req.tag = T_NeonExistsRequest,
+			.req.latest = latest,
 			.req.lsn = request_lsn,
-			.req.not_modified_since = not_modified_since,
 			.rinfo = InfoFromSMgrRel(reln),
 		.forknum = forkNum};

@@ -2129,10 +2102,10 @@ neon_writeback(SMgrRelation reln, ForkNumber forknum,
 void
 #if PG_MAJORVERSION_NUM < 16
 neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
-				 XLogRecPtr request_lsn, XLogRecPtr not_modified_since, char *buffer)
+				 XLogRecPtr request_lsn, bool request_latest, char *buffer)
 #else
 neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
-				 XLogRecPtr request_lsn, XLogRecPtr not_modified_since, void *buffer)
+				 XLogRecPtr request_lsn, bool request_latest, void *buffer)
 #endif
 {
 	NeonResponse *resp;
@@ -2175,28 +2148,15 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	if (entry != NULL)
 	{
 		slot = entry->slot;
-		/*
-		 * The not_changed_since..request_lsn range of each request is
-		 * effectively a claim that the page has not been modified between
-		 * those LSNs. Therefore, if the range of the old request in the queue
-		 * overlaps with the new request, we know that the the page hasn't
-		 * been modified in the union of the ranges. We can reuse the old
-		 * request in that case.
-		 *
-		 * The new request's LSN should never be older than the old one,
-		 * so don't bother checking that case.
-		 */
-		if (request_lsn >= slot->not_modified_since &&
-			not_modified_since <= slot->request_lsn)
+		if (slot->effective_request_lsn >= request_lsn)
 		{
 			ring_index = slot->my_ring_index;
 			pgBufferUsage.prefetch.hits += 1;
 		}
-		else
+		else					/* the current prefetch LSN is not large
+								 * enough, so drop the prefetch */
 		{
 			/*
-			 * Cannot use this prefetch, discard it
-			 *
 			 * We can't drop cache for not-yet-received requested items. It is
 			 * unlikely this happens, but it can happen if prefetch distance
 			 * is large enough and a backend didn't consume all prefetch
@@ -2221,8 +2181,8 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		{
 			pgBufferUsage.prefetch.misses += 1;

-			ring_index = prefetch_register_buffer(buftag, &request_lsn,
-												  &not_modified_since);
+			ring_index = prefetch_register_buffer(buftag, &request_latest,
+												  &request_lsn);
 			slot = GetPrfSlot(ring_index);
 		}
 		else
@@ -2286,8 +2246,8 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, char *buffer
 neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer)
 #endif
 {
+	bool		latest;
 	XLogRecPtr	request_lsn;
-	XLogRecPtr	not_modified_since;

 	switch (reln->smgr_relpersistence)
 	{
@@ -2312,9 +2272,8 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 		return;
 	}

-	neon_get_request_lsn(InfoFromSMgrRel(reln), forkNum, blkno,
-						 &request_lsn, &not_modified_since);
-	neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsn, not_modified_since, buffer);
+	request_lsn = neon_get_request_lsn(&latest, InfoFromSMgrRel(reln), forkNum, blkno);
+	neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsn, latest, buffer);

 #ifdef DEBUG_COMPARE_LOCAL
 	if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln))
@@ -2483,8 +2442,8 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 {
 	NeonResponse *resp;
 	BlockNumber n_blocks;
+	bool		latest;
 	XLogRecPtr	request_lsn;
-	XLogRecPtr	not_modified_since;

 	switch (reln->smgr_relpersistence)
 	{
@@ -2511,13 +2470,12 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 		return n_blocks;
 	}

-	neon_get_request_lsn(InfoFromSMgrRel(reln), forknum, REL_METADATA_PSEUDO_BLOCKNO,
-						 &request_lsn, &not_modified_since);
+	request_lsn = neon_get_request_lsn(&latest, InfoFromSMgrRel(reln), forknum, REL_METADATA_PSEUDO_BLOCKNO);
 	{
 		NeonNblocksRequest request = {
 			.req.tag = T_NeonNblocksRequest,
+			.req.latest = latest,
 			.req.lsn = request_lsn,
-			.req.not_modified_since = not_modified_since,
 			.rinfo = InfoFromSMgrRel(reln),
 			.forknum = forknum,
 		};
@@ -2565,17 +2523,16 @@ neon_dbsize(Oid dbNode)
 {
 	NeonResponse *resp;
 	int64		db_size;
-	XLogRecPtr	request_lsn,
-		not_modified_since;
+	XLogRecPtr	request_lsn;
+	bool		latest;
 	NRelFileInfo dummy_node = {0};

-	neon_get_request_lsn(dummy_node, MAIN_FORKNUM, REL_METADATA_PSEUDO_BLOCKNO,
-						 &request_lsn, &not_modified_since);
+	request_lsn = neon_get_request_lsn(&latest, dummy_node, MAIN_FORKNUM, REL_METADATA_PSEUDO_BLOCKNO);
 	{
 		NeonDbSizeRequest request = {
 			.req.tag = T_NeonDbSizeRequest,
+			.req.latest = latest,
 			.req.lsn = request_lsn,
-			.req.not_modified_since = not_modified_since,
 			.dbNode = dbNode,
 		};

@@ -2648,6 +2605,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 	 * the most recently inserted WAL record's LSN.
 	 */
 	lsn = GetXLogInsertRecPtr();
+
 	lsn = nm_adjust_lsn(lsn);

 	/*
@@ -2847,23 +2805,14 @@ neon_end_unlogged_build(SMgrRelation reln)
 static int
 neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buffer)
 {
-	XLogRecPtr request_lsn,
-		not_modified_since;
-
-	if (RecoveryInProgress())
-		request_lsn = GetXLogReplayRecPtr(NULL);
-	else
-		request_lsn = GetXLogInsertRecPtr();
-	request_lsn = nm_adjust_lsn(request_lsn);
-
+	XLogRecPtr request_lsn;
 	/*
-	 * GetRedoStartLsn() returns LSN of basebackup. We know that the SLRU
-	 * segment has not changed since the basebackup, because in order to
-	 * modify it, we would have had to download it already. And once
-	 * downloaded, we never evict SLRU segments from local disk.
+	 * GetRedoStartLsn() returns LSN of basebackup.
+	 * We need to download SLRU segments only once after node startup,
+	 * then SLRUs are maintained locally.
 	 */
-	not_modified_since = GetRedoStartLsn();
-
+	request_lsn = GetRedoStartLsn();
+	request_lsn = nm_adjust_lsn(request_lsn);
 	SlruKind kind;

    if (STRPREFIX(path, "pg_xact"))
@@ -2878,8 +2827,8 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
 	NeonResponse *resp;
 	NeonGetSlruSegmentRequest request = {
 		.req.tag = T_NeonGetSlruSegmentRequest,
+		.req.latest = false,
 		.req.lsn = request_lsn,
-		.req.not_modified_since = not_modified_since,

 		.kind = kind,
 		.segno = segno
@@ -3007,9 +2956,6 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 {
 	BlockNumber relsize;

-	/* This is only used in WAL replay */
-	Assert(RecoveryInProgress());
-
 	/* Extend the relation if we know its size */
 	if (get_cached_relsize(rinfo, forknum, &relsize))
 	{
@@ -3028,12 +2974,13 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 		 * This length is later reused when we open the smgr to read the
 		 * block, which is fine and expected.
 		 */
+
 		NeonResponse *response;
 		NeonNblocksResponse *nbresponse;
 		NeonNblocksRequest request = {
 			.req = (NeonRequest) {
 				.lsn = end_recptr,
-				.not_modified_since = end_recptr,
+				.latest = false,
 				.tag = T_NeonNblocksRequest,
 			},
 			.rinfo = rinfo,
--- a/pgxn/neon_test_utils/neontest.c
+++ b/pgxn/neon_test_utils/neontest.c
@@ -48,10 +48,10 @@ PG_FUNCTION_INFO_V1(neon_xlogflush);
 */
 #if PG_MAJORVERSION_NUM < 16
 typedef void (*neon_read_at_lsn_type) (NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
-									   XLogRecPtr request_lsn, XLogRecPtr not_modified_since, char *buffer);
+									   XLogRecPtr request_lsn, bool request_latest, char *buffer);
 #else
 typedef void (*neon_read_at_lsn_type) (NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
-									   XLogRecPtr request_lsn, XLogRecPtr not_modified_since, void *buffer);
+									   XLogRecPtr request_lsn, bool request_latest, void *buffer);
 #endif

 static neon_read_at_lsn_type neon_read_at_lsn_ptr;
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.

 [[package]]
 name = "aiohttp"
@@ -1191,13 +1191,13 @@ files = [

 [[package]]
 name = "idna"
-version = "3.7"
+version = "3.3"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.5"
 files = [
-    {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
-    {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
+    {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
+    {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
 ]

 [[package]]
@@ -2182,7 +2182,6 @@ files = [
    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -2653,16 +2652,6 @@ files = [
    {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
    {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
    {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
-    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
-    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -44,7 +44,6 @@ ipnet.workspace = true
 itertools.workspace = true
 lasso = { workspace = true, features = ["multi-threaded"] }
 md5.workspace = true
-measured = { workspace = true, features = ["lasso"] }
 metrics.workspace = true
 once_cell.workspace = true
 opentelemetry.workspace = true
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -13,7 +13,7 @@ use crate::console::provider::{CachedRoleSecret, ConsoleBackend};
 use crate::console::{AuthSecret, NodeInfo};
 use crate::context::RequestMonitoring;
 use crate::intern::EndpointIdInt;
-use crate::metrics::Metrics;
+use crate::metrics::{AUTH_RATE_LIMIT_HITS, ENDPOINTS_AUTH_RATE_LIMITED};
 use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::proxy::NeonOptions;
 use crate::stream::Stream;
@@ -210,12 +210,8 @@ impl AuthenticationConfig {
                enabled = self.rate_limiter_enabled,
                "rate limiting authentication"
            );
-            Metrics::get().proxy.requests_auth_rate_limits_total.inc();
-            Metrics::get()
-                .proxy
-                .endpoints_auth_rate_limits
-                .get_metric()
-                .measure(endpoint);
+            AUTH_RATE_LIMIT_HITS.inc();
+            ENDPOINTS_AUTH_RATE_LIMITED.measure(endpoint);

            if self.rate_limiter_enabled {
                return Err(auth::AuthError::too_many_connections());
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -4,7 +4,7 @@ use crate::{
    auth::password_hack::parse_endpoint_param,
    context::RequestMonitoring,
    error::{ReportableError, UserFacingError},
-    metrics::{Metrics, SniKind},
+    metrics::NUM_CONNECTION_ACCEPTED_BY_SNI,
    proxy::NeonOptions,
    serverless::SERVERLESS_DRIVER_SNI,
    EndpointId, RoleName,
@@ -144,22 +144,21 @@ impl ComputeUserInfoMaybeEndpoint {
            ctx.set_endpoint_id(ep.clone());
        }

-        let metrics = Metrics::get();
        info!(%user, "credentials");
        if sni.is_some() {
            info!("Connection with sni");
-            metrics.proxy.accepted_connections_by_sni.inc(SniKind::Sni);
+            NUM_CONNECTION_ACCEPTED_BY_SNI
+                .with_label_values(&["sni"])
+                .inc();
        } else if endpoint.is_some() {
-            metrics
-                .proxy
-                .accepted_connections_by_sni
-                .inc(SniKind::NoSni);
+            NUM_CONNECTION_ACCEPTED_BY_SNI
+                .with_label_values(&["no_sni"])
+                .inc();
            info!("Connection without sni");
        } else {
-            metrics
-                .proxy
-                .accepted_connections_by_sni
-                .inc(SniKind::PasswordHack);
+            NUM_CONNECTION_ACCEPTED_BY_SNI
+                .with_label_values(&["password_hack"])
+                .inc();
            info!("Connection with password hack");
        }

--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -176,12 +176,7 @@ async fn task_main(
                    .context("failed to set socket option")?;

                info!(%peer_addr, "serving");
-                let ctx = RequestMonitoring::new(
-                    session_id,
-                    peer_addr.ip(),
-                    proxy::metrics::Protocol::SniRouter,
-                    "sni",
-                );
+                let ctx = RequestMonitoring::new(session_id, peer_addr.ip(), "sni_router", "sni");
                handle_client(ctx, dest_suffix, tls_config, tls_server_end_point, socket).await
            }
            .unwrap_or_else(|e| {
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -18,8 +18,7 @@ use proxy::config::ProjectInfoCacheOptions;
 use proxy::console;
 use proxy::context::parquet::ParquetUploadArgs;
 use proxy::http;
-use proxy::http::health_server::AppMetrics;
-use proxy::metrics::Metrics;
+use proxy::metrics::NUM_CANCELLATION_REQUESTS_SOURCE_FROM_CLIENT;
 use proxy::rate_limiter::AuthRateLimiter;
 use proxy::rate_limiter::EndpointRateLimiter;
 use proxy::rate_limiter::RateBucketInfo;
@@ -252,18 +251,14 @@ async fn main() -> anyhow::Result<()> {

    info!("Version: {GIT_VERSION}");
    info!("Build_tag: {BUILD_TAG}");
-    let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {
-        revision: GIT_VERSION,
-        build_tag: BUILD_TAG,
-    });
+    ::metrics::set_build_info_metric(GIT_VERSION, BUILD_TAG);

-    let jemalloc = match proxy::jemalloc::MetricRecorder::new() {
-        Ok(t) => Some(t),
-        Err(e) => {
-            tracing::error!(error = ?e, "could not start jemalloc metrics loop");
-            None
+    match proxy::jemalloc::MetricRecorder::new(prometheus::default_registry()) {
+        Ok(t) => {
+            t.start();
        }
-    };
+        Err(e) => tracing::error!(error = ?e, "could not start jemalloc metrics loop"),
+    }

    let args = ProxyCliArgs::parse();
    let config = build_config(&args)?;
@@ -303,27 +298,27 @@ async fn main() -> anyhow::Result<()> {
        ),
        aws_credentials_provider,
    ));
-    let regional_redis_client = match (args.redis_host, args.redis_port) {
-        (Some(host), Some(port)) => Some(
-            ConnectionWithCredentialsProvider::new_with_credentials_provider(
-                host,
-                port,
-                elasticache_credentials_provider.clone(),
+    let redis_notifications_client =
+        match (args.redis_notifications, (args.redis_host, args.redis_port)) {
+            (Some(url), _) => {
+                info!("Starting redis notifications listener ({url})");
+                Some(ConnectionWithCredentialsProvider::new_with_static_credentials(url))
+            }
+            (None, (Some(host), Some(port))) => Some(
+                ConnectionWithCredentialsProvider::new_with_credentials_provider(
+                    host,
+                    port,
+                    elasticache_credentials_provider.clone(),
+                ),
            ),
-        ),
-        (None, None) => {
-            warn!("Redis events from console are disabled");
-            None
-        }
-        _ => {
-            bail!("redis-host and redis-port must be specified together");
-        }
-    };
-    let redis_notifications_client = if let Some(url) = args.redis_notifications {
-        Some(ConnectionWithCredentialsProvider::new_with_static_credentials(url))
-    } else {
-        regional_redis_client.clone()
-    };
+            (None, (None, None)) => {
+                warn!("Redis is disabled");
+                None
+            }
+            _ => {
+                bail!("redis-host and redis-port must be specified together");
+            }
+        };

    // Check that we can bind to address before further initialization
    let http_address: SocketAddr = args.http.parse()?;
@@ -342,7 +337,8 @@ async fn main() -> anyhow::Result<()> {
    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(&config.endpoint_rps_limit));
    let cancel_map = CancelMap::default();

-    let redis_publisher = match &regional_redis_client {
+    // let redis_notifications_client = redis_notifications_client.map(|x| Box::leak(Box::new(x)));
+    let redis_publisher = match &redis_notifications_client {
        Some(redis_publisher) => Some(Arc::new(Mutex::new(RedisPublisherClient::new(
            redis_publisher.clone(),
            args.region.clone(),
@@ -355,7 +351,7 @@ async fn main() -> anyhow::Result<()> {
    >::new(
        cancel_map.clone(),
        redis_publisher,
-        proxy::metrics::CancellationSource::FromClient,
+        NUM_CANCELLATION_REQUESTS_SOURCE_FROM_CLIENT,
    ));

    // client facing tasks. these will exit on error or on cancellation
@@ -393,14 +389,7 @@ async fn main() -> anyhow::Result<()> {
    // maintenance tasks. these never return unless there's an error
    let mut maintenance_tasks = JoinSet::new();
    maintenance_tasks.spawn(proxy::handle_signals(cancellation_token.clone()));
-    maintenance_tasks.spawn(http::health_server::task_main(
-        http_listener,
-        AppMetrics {
-            jemalloc,
-            neon_metrics,
-            proxy: proxy::metrics::Metrics::get(),
-        },
-    ));
+    maintenance_tasks.spawn(http::health_server::task_main(http_listener));
    maintenance_tasks.spawn(console::mgmt::task_main(mgmt_listener));

    if let Some(metrics_config) = &config.metric_collection {
@@ -417,16 +406,14 @@ async fn main() -> anyhow::Result<()> {
            if let Some(redis_notifications_client) = redis_notifications_client {
                let cache = api.caches.project_info.clone();
                maintenance_tasks.spawn(notifications::task_main(
-                    redis_notifications_client,
+                    redis_notifications_client.clone(),
                    cache.clone(),
                    cancel_map.clone(),
                    args.region.clone(),
                ));
                maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
-            }
-            if let Some(regional_redis_client) = regional_redis_client {
                let cache = api.caches.endpoints_cache.clone();
-                let con = regional_redis_client;
+                let con = redis_notifications_client.clone();
                maintenance_tasks.spawn(async move { cache.do_read(con).await });
            }
        }
@@ -529,15 +516,8 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
            } = args.wake_compute_lock.parse()?;
            info!(permits, shards, ?epoch, "Using NodeLocks (wake_compute)");
            let locks = Box::leak(Box::new(
-                console::locks::ApiLocks::new(
-                    "wake_compute_lock",
-                    permits,
-                    shards,
-                    timeout,
-                    epoch,
-                    &Metrics::get().wake_compute_lock,
-                )
-                .unwrap(),
+                console::locks::ApiLocks::new("wake_compute_lock", permits, shards, timeout, epoch)
+                    .unwrap(),
            ));
            tokio::spawn(locks.garbage_collect_worker());

--- a/proxy/src/cache/endpoints.rs
+++ b/proxy/src/cache/endpoints.rs
@@ -18,29 +18,18 @@ use crate::{
    config::EndpointCacheConfig,
    context::RequestMonitoring,
    intern::{BranchIdInt, EndpointIdInt, ProjectIdInt},
-    metrics::{Metrics, RedisErrors},
+    metrics::REDIS_BROKEN_MESSAGES,
    rate_limiter::GlobalRateLimiter,
    redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider,
    EndpointId,
 };

 #[derive(Deserialize, Debug, Clone)]
-pub struct ControlPlaneEventKey {
-    endpoint_created: Option<EndpointCreated>,
-    branch_created: Option<BranchCreated>,
-    project_created: Option<ProjectCreated>,
-}
-#[derive(Deserialize, Debug, Clone)]
-struct EndpointCreated {
-    endpoint_id: String,
-}
-#[derive(Deserialize, Debug, Clone)]
-struct BranchCreated {
-    branch_id: String,
-}
-#[derive(Deserialize, Debug, Clone)]
-struct ProjectCreated {
-    project_id: String,
+#[serde(rename_all(deserialize = "snake_case"))]
+pub enum ControlPlaneEventKey {
+    EndpointCreated,
+    BranchCreated,
+    ProjectCreated,
 }

 pub struct EndpointsCache {
@@ -95,19 +84,18 @@ impl EndpointsCache {
                .contains(&ProjectIdInt::from(&endpoint.as_project()))
        }
    }
-    fn insert_event(&self, key: ControlPlaneEventKey) {
+    fn insert_event(&self, key: ControlPlaneEventKey, value: String) {
        // Do not do normalization here, we expect the events to be normalized.
-        if let Some(endpoint_created) = key.endpoint_created {
-            self.endpoints
-                .insert(EndpointIdInt::from(&endpoint_created.endpoint_id.into()));
-        }
-        if let Some(branch_created) = key.branch_created {
-            self.branches
-                .insert(BranchIdInt::from(&branch_created.branch_id.into()));
-        }
-        if let Some(project_created) = key.project_created {
-            self.projects
-                .insert(ProjectIdInt::from(&project_created.project_id.into()));
+        match key {
+            ControlPlaneEventKey::EndpointCreated => {
+                self.endpoints.insert(EndpointIdInt::from(&value.into()));
+            }
+            ControlPlaneEventKey::BranchCreated => {
+                self.branches.insert(BranchIdInt::from(&value.into()));
+            }
+            ControlPlaneEventKey::ProjectCreated => {
+                self.projects.insert(ProjectIdInt::from(&value.into()));
+            }
        }
    }
    pub async fn do_read(
@@ -124,7 +112,6 @@ impl EndpointsCache {
            if let Err(e) = self.read_from_stream(&mut con, &mut last_id).await {
                tracing::error!("error reading from redis: {:?}", e);
            }
-            tokio::time::sleep(self.config.retry_interval).await;
        }
    }
    async fn read_from_stream(
@@ -145,16 +132,15 @@ impl EndpointsCache {
        self.batch_read(
            con,
            StreamReadOptions::default()
-                .count(self.config.default_batch_size)
+                .count(self.config.initial_batch_size)
                .block(self.config.xread_timeout.as_millis() as usize),
            last_id,
            false,
        )
        .await
    }
-    fn parse_key_value(value: &Value) -> anyhow::Result<ControlPlaneEventKey> {
-        let s: String = FromRedisValue::from_redis_value(value)?;
-        Ok(serde_json::from_str(&s)?)
+    fn parse_key_value(key: &str, value: &Value) -> anyhow::Result<(ControlPlaneEventKey, String)> {
+        Ok((serde_json::from_str(key)?, String::from_redis_value(value)?))
    }
    async fn batch_read(
        &self,
@@ -168,59 +154,37 @@ impl EndpointsCache {
            let mut res: StreamReadReply = conn
                .xread_options(&[&self.config.stream_name], &[last_id.as_str()], &opts)
                .await?;
-
-            if res.keys.is_empty() {
-                if return_when_finish {
-                    anyhow::bail!(
-                        "Redis stream {} is empty, cannot be used to filter endpoints",
-                        self.config.stream_name
-                    );
-                }
-                // If we are not returning when finish, we should wait for more data.
-                continue;
-            }
            if res.keys.len() != 1 {
                anyhow::bail!("Cannot read from redis stream {}", self.config.stream_name);
            }

            let res = res.keys.pop().expect("Checked length above");
-            let len = res.ids.len();
+
+            if return_when_finish && res.ids.len() <= self.config.default_batch_size {
+                break;
+            }
            for x in res.ids {
                total += 1;
-                for (_, v) in x.map {
-                    let key = match Self::parse_key_value(&v) {
+                for (k, v) in x.map {
+                    let (key, value) = match Self::parse_key_value(&k, &v) {
                        Ok(x) => x,
                        Err(e) => {
-                            Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
-                                channel: &self.config.stream_name,
-                            });
-                            tracing::error!("error parsing value {v:?}: {e:?}");
+                            REDIS_BROKEN_MESSAGES
+                                .with_label_values(&[&self.config.stream_name])
+                                .inc();
+                            tracing::error!("error parsing key-value {k}-{v:?}: {e:?}");
                            continue;
                        }
                    };
-                    self.insert_event(key);
+                    self.insert_event(key, value);
                }
                if total.is_power_of_two() {
                    tracing::debug!("endpoints read {}", total);
                }
                *last_id = x.id;
            }
-            if return_when_finish && len <= self.config.default_batch_size {
-                break;
-            }
        }
        tracing::info!("read {} endpoints/branches/projects from redis", total);
        Ok(())
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::ControlPlaneEventKey;
-
-    #[test]
-    fn test() {
-        let s = "{\"branch_created\":null,\"endpoint_created\":{\"endpoint_id\":\"ep-rapid-thunder-w0qqw2q9\"},\"project_created\":null,\"type\":\"endpoint_created\"}";
-        let _: ControlPlaneEventKey = serde_json::from_str(s).unwrap();
-    }
-}
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -10,7 +10,7 @@ use uuid::Uuid;

 use crate::{
    error::ReportableError,
-    metrics::{CancellationRequest, CancellationSource, Metrics},
+    metrics::NUM_CANCELLATION_REQUESTS,
    redis::cancellation_publisher::{
        CancellationPublisher, CancellationPublisherMut, RedisPublisherClient,
    },
@@ -28,7 +28,7 @@ pub struct CancellationHandler<P> {
    client: P,
    /// This field used for the monitoring purposes.
    /// Represents the source of the cancellation request.
-    from: CancellationSource,
+    from: &'static str,
 }

 #[derive(Debug, Error)]
@@ -89,13 +89,9 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
        // NB: we should immediately release the lock after cloning the token.
        let Some(cancel_closure) = self.map.get(&key).and_then(|x| x.clone()) else {
            tracing::warn!("query cancellation key not found: {key}");
-            Metrics::get()
-                .proxy
-                .cancellation_requests_total
-                .inc(CancellationRequest {
-                    source: self.from,
-                    kind: crate::metrics::CancellationOutcome::NotFound,
-                });
+            NUM_CANCELLATION_REQUESTS
+                .with_label_values(&[self.from, "not_found"])
+                .inc();
            match self.client.try_publish(key, session_id).await {
                Ok(()) => {} // do nothing
                Err(e) => {
@@ -107,13 +103,9 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
            }
            return Ok(());
        };
-        Metrics::get()
-            .proxy
-            .cancellation_requests_total
-            .inc(CancellationRequest {
-                source: self.from,
-                kind: crate::metrics::CancellationOutcome::Found,
-            });
+        NUM_CANCELLATION_REQUESTS
+            .with_label_values(&[self.from, "found"])
+            .inc();
        info!("cancelling query per user's request using key {key}");
        cancel_closure.try_cancel_query().await
    }
@@ -130,7 +122,7 @@ impl<P: CancellationPublisher> CancellationHandler<P> {
 }

 impl CancellationHandler<()> {
-    pub fn new(map: CancelMap, from: CancellationSource) -> Self {
+    pub fn new(map: CancelMap, from: &'static str) -> Self {
        Self {
            map,
            client: (),
@@ -140,7 +132,7 @@ impl CancellationHandler<()> {
 }

 impl<P: CancellationPublisherMut> CancellationHandler<Option<Arc<Mutex<P>>>> {
-    pub fn new(map: CancelMap, client: Option<Arc<Mutex<P>>>, from: CancellationSource) -> Self {
+    pub fn new(map: CancelMap, client: Option<Arc<Mutex<P>>>, from: &'static str) -> Self {
        Self { map, client, from }
    }
 }
@@ -200,13 +192,15 @@ impl<P> Drop for Session<P> {

 #[cfg(test)]
 mod tests {
+    use crate::metrics::NUM_CANCELLATION_REQUESTS_SOURCE_FROM_REDIS;
+
    use super::*;

    #[tokio::test]
    async fn check_session_drop() -> anyhow::Result<()> {
        let cancellation_handler = Arc::new(CancellationHandler::<()>::new(
            CancelMap::default(),
-            CancellationSource::FromRedis,
+            NUM_CANCELLATION_REQUESTS_SOURCE_FROM_REDIS,
        ));

        let session = cancellation_handler.clone().get_session();
@@ -220,7 +214,7 @@ mod tests {

    #[tokio::test]
    async fn cancel_session_noop_regression() {
-        let handler = CancellationHandler::<()>::new(Default::default(), CancellationSource::Local);
+        let handler = CancellationHandler::<()>::new(Default::default(), "local");
        handler
            .cancel_session(
                CancelKeyData {
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -4,11 +4,12 @@ use crate::{
    console::{errors::WakeComputeError, messages::MetricsAuxInfo},
    context::RequestMonitoring,
    error::{ReportableError, UserFacingError},
-    metrics::{Metrics, NumDbConnectionsGuard},
+    metrics::NUM_DB_CONNECTIONS_GAUGE,
    proxy::neon_option,
 };
 use futures::{FutureExt, TryFutureExt};
 use itertools::Itertools;
+use metrics::IntCounterPairGuard;
 use pq_proto::StartupMessageParams;
 use std::{io, net::SocketAddr, time::Duration};
 use thiserror::Error;
@@ -248,7 +249,7 @@ pub struct PostgresConnection {
    /// Labels for proxy's metrics.
    pub aux: MetricsAuxInfo,

-    _guage: NumDbConnectionsGuard<'static>,
+    _guage: IntCounterPairGuard,
 }

 impl ConnCfg {
@@ -294,7 +295,9 @@ impl ConnCfg {
            params,
            cancel_closure,
            aux,
-            _guage: Metrics::get().proxy.db_connections.guard(ctx.protocol),
+            _guage: NUM_DB_CONNECTIONS_GAUGE
+                .with_label_values(&[ctx.protocol])
+                .guard(),
        };

        Ok(connection)
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -328,15 +328,13 @@ pub struct EndpointCacheConfig {
    /// Disable cache.
    /// If true, cache is ignored, but reports all statistics.
    pub disable_cache: bool,
-    /// Retry interval for the stream read operation.
-    pub retry_interval: Duration,
 }

 impl EndpointCacheConfig {
    /// Default options for [`crate::console::provider::NodeInfoCache`].
    /// Notice that by default the limiter is empty, which means that cache is disabled.
    pub const CACHE_DEFAULT_OPTIONS: &'static str =
-        "initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s,retry_interval=1s";
+        "initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s";

    /// Parse cache options passed via cmdline.
    /// Example: [`Self::CACHE_DEFAULT_OPTIONS`].
@@ -347,7 +345,6 @@ impl EndpointCacheConfig {
        let mut stream_name = None;
        let mut limiter_info = vec![];
        let mut disable_cache = false;
-        let mut retry_interval = None;

        for option in options.split(',') {
            let (key, value) = option
@@ -361,7 +358,6 @@ impl EndpointCacheConfig {
                "stream_name" => stream_name = Some(value.to_string()),
                "limiter_info" => limiter_info.push(RateBucketInfo::from_str(value)?),
                "disable_cache" => disable_cache = value.parse()?,
-                "retry_interval" => retry_interval = Some(humantime::parse_duration(value)?),
                unknown => bail!("unknown key: {unknown}"),
            }
        }
@@ -374,7 +370,6 @@ impl EndpointCacheConfig {
            stream_name: stream_name.context("missing `stream_name`")?,
            disable_cache,
            limiter_info,
-            retry_interval: retry_interval.context("missing `retry_interval`")?,
        })
    }
 }
--- a/proxy/src/console/messages.rs
+++ b/proxy/src/console/messages.rs
@@ -1,4 +1,3 @@
-use measured::FixedCardinalityLabel;
 use serde::{Deserialize, Serialize};
 use std::fmt;

@@ -103,7 +102,7 @@ pub struct MetricsAuxInfo {
    pub cold_start_info: ColdStartInfo,
 }

-#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy, FixedCardinalityLabel)]
+#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy)]
 #[serde(rename_all = "snake_case")]
 pub enum ColdStartInfo {
    #[default]
@@ -111,11 +110,9 @@ pub enum ColdStartInfo {
    /// Compute was already running
    Warm,
    #[serde(rename = "pool_hit")]
-    #[label(rename = "pool_hit")]
    /// Compute was not running but there was an available VM
    VmPoolHit,
    #[serde(rename = "pool_miss")]
-    #[label(rename = "pool_miss")]
    /// Compute was not running and there were no VMs available
    VmPoolMiss,

--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -13,11 +13,10 @@ use crate::{
    config::{CacheOptions, EndpointCacheConfig, ProjectInfoCacheOptions},
    context::RequestMonitoring,
    intern::ProjectIdInt,
-    metrics::ApiLockMetrics,
    scram, EndpointCacheKey,
 };
 use dashmap::DashMap;
-use std::{sync::Arc, time::Duration};
+use std::{convert::Infallible, sync::Arc, time::Duration};
 use tokio::sync::{OwnedSemaphorePermit, Semaphore};
 use tokio::time::Instant;
 use tracing::info;
@@ -447,7 +446,10 @@ pub struct ApiLocks {
    permits: usize,
    timeout: Duration,
    epoch: std::time::Duration,
-    metrics: &'static ApiLockMetrics,
+    registered: prometheus::IntCounter,
+    unregistered: prometheus::IntCounter,
+    reclamation_lag: prometheus::Histogram,
+    lock_acquire_lag: prometheus::Histogram,
 }

 impl ApiLocks {
@@ -457,15 +459,55 @@ impl ApiLocks {
        shards: usize,
        timeout: Duration,
        epoch: std::time::Duration,
-        metrics: &'static ApiLockMetrics,
    ) -> prometheus::Result<Self> {
+        let registered = prometheus::IntCounter::with_opts(
+            prometheus::Opts::new(
+                "semaphores_registered",
+                "Number of semaphores registered in this api lock",
+            )
+            .namespace(name),
+        )?;
+        prometheus::register(Box::new(registered.clone()))?;
+        let unregistered = prometheus::IntCounter::with_opts(
+            prometheus::Opts::new(
+                "semaphores_unregistered",
+                "Number of semaphores unregistered in this api lock",
+            )
+            .namespace(name),
+        )?;
+        prometheus::register(Box::new(unregistered.clone()))?;
+        let reclamation_lag = prometheus::Histogram::with_opts(
+            prometheus::HistogramOpts::new(
+                "reclamation_lag_seconds",
+                "Time it takes to reclaim unused semaphores in the api lock",
+            )
+            .namespace(name)
+            // 1us -> 65ms
+            // benchmarks on my mac indicate it's usually in the range of 256us and 512us
+            .buckets(prometheus::exponential_buckets(1e-6, 2.0, 16)?),
+        )?;
+        prometheus::register(Box::new(reclamation_lag.clone()))?;
+        let lock_acquire_lag = prometheus::Histogram::with_opts(
+            prometheus::HistogramOpts::new(
+                "semaphore_acquire_seconds",
+                "Time it takes to reclaim unused semaphores in the api lock",
+            )
+            .namespace(name)
+            // 0.1ms -> 6s
+            .buckets(prometheus::exponential_buckets(1e-4, 2.0, 16)?),
+        )?;
+        prometheus::register(Box::new(lock_acquire_lag.clone()))?;
+
        Ok(Self {
            name,
            node_locks: DashMap::with_shard_amount(shards),
            permits,
            timeout,
            epoch,
-            metrics,
+            lock_acquire_lag,
+            registered,
+            unregistered,
+            reclamation_lag,
        })
    }

@@ -485,7 +527,7 @@ impl ApiLocks {
                self.node_locks
                    .entry(key.clone())
                    .or_insert_with(|| {
-                        self.metrics.semaphores_registered.inc();
+                        self.registered.inc();
                        Arc::new(Semaphore::new(self.permits))
                    })
                    .clone()
@@ -493,9 +535,8 @@ impl ApiLocks {
        };
        let permit = tokio::time::timeout_at(now + self.timeout, semaphore.acquire_owned()).await;

-        self.metrics
-            .semaphore_acquire_seconds
-            .observe(now.elapsed().as_secs_f64());
+        self.lock_acquire_lag
+            .observe((Instant::now() - now).as_secs_f64());

        Ok(WakeComputePermit {
            permit: Some(permit??),
@@ -520,13 +561,13 @@ impl ApiLocks {
                    "performing epoch reclamation on api lock"
                );
                let mut lock = shard.write();
-                let timer = self.metrics.reclamation_lag_seconds.start_timer();
+                let timer = self.reclamation_lag.start_timer();
                let count = lock
                    .extract_if(|_, semaphore| Arc::strong_count(semaphore.get_mut()) == 1)
                    .count();
                drop(lock);
-                self.metrics.semaphores_unregistered.inc_by(count as u64);
-                timer.observe();
+                self.unregistered.inc_by(count as u64);
+                timer.observe_duration()
            }
        }
    }
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -7,14 +7,14 @@ use super::{
    NodeInfo,
 };
 use crate::{
-    auth::backend::ComputeUserInfo,
-    compute,
-    console::messages::ColdStartInfo,
-    http,
-    metrics::{CacheOutcome, Metrics},
-    scram, Normalize,
+    auth::backend::ComputeUserInfo, compute, console::messages::ColdStartInfo, http, scram,
+    Normalize,
+};
+use crate::{
+    cache::Cached,
+    context::RequestMonitoring,
+    metrics::{ALLOWED_IPS_BY_CACHE_OUTCOME, ALLOWED_IPS_NUMBER},
 };
-use crate::{cache::Cached, context::RequestMonitoring};
 use futures::TryFutureExt;
 use std::sync::Arc;
 use tokio::time::Instant;
@@ -107,10 +107,7 @@ impl Api {
                Some(secret)
            };
            let allowed_ips = body.allowed_ips.unwrap_or_default();
-            Metrics::get()
-                .proxy
-                .allowed_ips_number
-                .observe(allowed_ips.len() as f64);
+            ALLOWED_IPS_NUMBER.observe(allowed_ips.len() as f64);
            Ok(AuthInfo {
                secret,
                allowed_ips,
@@ -225,16 +222,14 @@ impl super::Api for Api {
    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
        let normalized_ep = &user_info.endpoint.normalize();
        if let Some(allowed_ips) = self.caches.project_info.get_allowed_ips(normalized_ep) {
-            Metrics::get()
-                .proxy
-                .allowed_ips_cache_misses
-                .inc(CacheOutcome::Hit);
+            ALLOWED_IPS_BY_CACHE_OUTCOME
+                .with_label_values(&["hit"])
+                .inc();
            return Ok((allowed_ips, None));
        }
-        Metrics::get()
-            .proxy
-            .allowed_ips_cache_misses
-            .inc(CacheOutcome::Miss);
+        ALLOWED_IPS_BY_CACHE_OUTCOME
+            .with_label_values(&["miss"])
+            .inc();
        let auth_info = self.do_get_auth_info(ctx, user_info).await?;
        let allowed_ips = Arc::new(auth_info.allowed_ips);
        let user = &user_info.user;
--- a/proxy/src/context.rs
+++ b/proxy/src/context.rs
@@ -12,7 +12,9 @@ use crate::{
    console::messages::{ColdStartInfo, MetricsAuxInfo},
    error::ErrorKind,
    intern::{BranchIdInt, ProjectIdInt},
-    metrics::{ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol},
+    metrics::{
+        bool_to_str, LatencyTimer, ENDPOINT_ERRORS_BY_KIND, ERROR_BY_KIND, NUM_INVALID_ENDPOINTS,
+    },
    DbName, EndpointId, RoleName,
 };

@@ -29,7 +31,7 @@ static LOG_CHAN: OnceCell<mpsc::WeakUnboundedSender<RequestData>> = OnceCell::ne
 pub struct RequestMonitoring {
    pub peer_addr: IpAddr,
    pub session_id: Uuid,
-    pub protocol: Protocol,
+    pub protocol: &'static str,
    first_packet: chrono::DateTime<Utc>,
    region: &'static str,
    pub span: Span,
@@ -67,7 +69,7 @@ impl RequestMonitoring {
    pub fn new(
        session_id: Uuid,
        peer_addr: IpAddr,
-        protocol: Protocol,
+        protocol: &'static str,
        region: &'static str,
    ) -> Self {
        let span = info_span!(
@@ -105,7 +107,7 @@ impl RequestMonitoring {

    #[cfg(test)]
    pub fn test() -> Self {
-        RequestMonitoring::new(Uuid::now_v7(), [127, 0, 0, 1].into(), Protocol::Tcp, "test")
+        RequestMonitoring::new(Uuid::now_v7(), [127, 0, 0, 1].into(), "test", "test")
    }

    pub fn console_application_name(&self) -> String {
@@ -141,9 +143,9 @@ impl RequestMonitoring {
    pub fn set_endpoint_id(&mut self, endpoint_id: EndpointId) {
        if self.endpoint_id.is_none() {
            self.span.record("ep", display(&endpoint_id));
-            let metric = &Metrics::get().proxy.connecting_endpoints;
-            let label = metric.with_labels(self.protocol);
-            metric.get_metric(label).measure(&endpoint_id);
+            crate::metrics::CONNECTING_ENDPOINTS
+                .with_label_values(&[self.protocol])
+                .measure(&endpoint_id);
            self.endpoint_id = Some(endpoint_id);
        }
    }
@@ -165,11 +167,13 @@ impl RequestMonitoring {
    }

    pub fn set_error_kind(&mut self, kind: ErrorKind) {
-        Metrics::get().proxy.errors_total.inc(kind);
+        ERROR_BY_KIND
+            .with_label_values(&[kind.to_metric_label()])
+            .inc();
        if let Some(ep) = &self.endpoint_id {
-            let metric = &Metrics::get().proxy.endpoints_affected_by_errors;
-            let label = metric.with_labels(kind);
-            metric.get_metric(label).measure(ep);
+            ENDPOINT_ERRORS_BY_KIND
+                .with_label_values(&[kind.to_metric_label()])
+                .measure(ep);
        }
        self.error_kind = Some(kind);
    }
@@ -183,19 +187,10 @@ impl RequestMonitoring {

 impl Drop for RequestMonitoring {
    fn drop(&mut self) {
-        let outcome = if self.success {
-            ConnectOutcome::Success
-        } else {
-            ConnectOutcome::Failed
-        };
-        Metrics::get()
-            .proxy
-            .invalid_endpoints_total
-            .inc(InvalidEndpointsGroup {
-                protocol: self.protocol,
-                rejected: self.rejected.into(),
-                outcome,
-            });
+        let outcome = if self.success { "success" } else { "failure" };
+        NUM_INVALID_ENDPOINTS
+            .with_label_values(&[self.protocol, bool_to_str(self.rejected), outcome])
+            .inc();
        if let Some(tx) = self.sender.take() {
            let _: Result<(), _> = tx.send(RequestData::from(&*self));
        }
--- a/proxy/src/context/parquet.rs
+++ b/proxy/src/context/parquet.rs
@@ -111,7 +111,7 @@ impl From<&RequestMonitoring> for RequestData {
                super::AuthMethod::ScramSha256Plus => "scram_sha_256_plus",
                super::AuthMethod::Cleartext => "cleartext",
            }),
-            protocol: value.protocol.as_str(),
+            protocol: value.protocol,
            region: value.region,
            error: value.error_kind.as_ref().map(|e| e.to_metric_label()),
            success: value.success,
--- a/proxy/src/error.rs
+++ b/proxy/src/error.rs
@@ -1,7 +1,5 @@
 use std::{error::Error as StdError, fmt, io};

-use measured::FixedCardinalityLabel;
-
 /// Upcast (almost) any error into an opaque [`io::Error`].
 pub fn io_error(e: impl Into<Box<dyn StdError + Send + Sync>>) -> io::Error {
    io::Error::new(io::ErrorKind::Other, e)
@@ -31,29 +29,24 @@ pub trait UserFacingError: ReportableError {
    }
 }

-#[derive(Copy, Clone, Debug, Eq, PartialEq, FixedCardinalityLabel)]
-#[label(singleton = "type")]
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
 pub enum ErrorKind {
    /// Wrong password, unknown endpoint, protocol violation, etc...
    User,

    /// Network error between user and proxy. Not necessarily user error
-    #[label(rename = "clientdisconnect")]
    ClientDisconnect,

    /// Proxy self-imposed user rate limits
-    #[label(rename = "ratelimit")]
    RateLimit,

    /// Proxy self-imposed service-wise rate limits
-    #[label(rename = "serviceratelimit")]
    ServiceRateLimit,

    /// internal errors
    Service,

    /// Error communicating with control plane
-    #[label(rename = "controlplane")]
    ControlPlane,

    /// Postgres error
--- a/proxy/src/http.rs
+++ b/proxy/src/http.rs
@@ -13,11 +13,7 @@ pub use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
 use tokio::time::Instant;
 use tracing::trace;

-use crate::{
-    metrics::{ConsoleRequest, Metrics},
-    rate_limiter,
-    url::ApiUrl,
-};
+use crate::{metrics::CONSOLE_REQUEST_LATENCY, rate_limiter, url::ApiUrl};
 use reqwest_middleware::RequestBuilder;

 /// This is the preferred way to create new http clients,
@@ -94,14 +90,13 @@ impl Endpoint {

    /// Execute a [request](reqwest::Request).
    pub async fn execute(&self, request: Request) -> Result<Response, Error> {
-        let _timer = Metrics::get()
-            .proxy
-            .console_request_latency
-            .start_timer(ConsoleRequest {
-                request: request.url().path(),
-            });
-
-        self.client.execute(request).await
+        let path = request.url().path().to_string();
+        let start = Instant::now();
+        let res = self.client.execute(request).await;
+        CONSOLE_REQUEST_LATENCY
+            .with_label_values(&[&path])
+            .observe(start.elapsed().as_secs_f64());
+        res
    }
 }

--- a/proxy/src/http/health_server.rs
+++ b/proxy/src/http/health_server.rs
@@ -1,49 +1,30 @@
 use anyhow::{anyhow, bail};
-use hyper::{header::CONTENT_TYPE, Body, Request, Response, StatusCode};
-use measured::{text::BufferedTextEncoder, MetricGroup};
-use metrics::NeonMetrics;
-use std::{
-    convert::Infallible,
-    net::TcpListener,
-    sync::{Arc, Mutex},
-};
-use tracing::{info, info_span};
+use hyper::{Body, Request, Response, StatusCode};
+use std::{convert::Infallible, net::TcpListener};
+use tracing::info;
 use utils::http::{
-    endpoint::{self, request_span},
+    endpoint::{self, prometheus_metrics_handler, request_span},
    error::ApiError,
    json::json_response,
    RouterBuilder, RouterService,
 };

-use crate::jemalloc;
-
 async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
    json_response(StatusCode::OK, "")
 }

-fn make_router(metrics: AppMetrics) -> RouterBuilder<hyper::Body, ApiError> {
-    let state = Arc::new(Mutex::new(PrometheusHandler {
-        encoder: BufferedTextEncoder::new(),
-        metrics,
-    }));
-
+fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
    endpoint::make_router()
-        .get("/metrics", move |r| {
-            let state = state.clone();
-            request_span(r, move |b| prometheus_metrics_handler(b, state))
-        })
+        .get("/metrics", |r| request_span(r, prometheus_metrics_handler))
        .get("/v1/status", status_handler)
 }

-pub async fn task_main(
-    http_listener: TcpListener,
-    metrics: AppMetrics,
-) -> anyhow::Result<Infallible> {
+pub async fn task_main(http_listener: TcpListener) -> anyhow::Result<Infallible> {
    scopeguard::defer! {
        info!("http has shut down");
    }

-    let service = || RouterService::new(make_router(metrics).build()?);
+    let service = || RouterService::new(make_router().build()?);

    hyper::Server::from_tcp(http_listener)?
        .serve(service().map_err(|e| anyhow!(e))?)
@@ -51,57 +32,3 @@ pub async fn task_main(

    bail!("hyper server without shutdown handling cannot shutdown successfully");
 }
-
-struct PrometheusHandler {
-    encoder: BufferedTextEncoder,
-    metrics: AppMetrics,
-}
-
-#[derive(MetricGroup)]
-pub struct AppMetrics {
-    #[metric(namespace = "jemalloc")]
-    pub jemalloc: Option<jemalloc::MetricRecorder>,
-    #[metric(flatten)]
-    pub neon_metrics: NeonMetrics,
-    #[metric(flatten)]
-    pub proxy: &'static crate::metrics::Metrics,
-}
-
-async fn prometheus_metrics_handler(
-    _req: Request<Body>,
-    state: Arc<Mutex<PrometheusHandler>>,
-) -> Result<Response<Body>, ApiError> {
-    let started_at = std::time::Instant::now();
-
-    let span = info_span!("blocking");
-    let body = tokio::task::spawn_blocking(move || {
-        let _span = span.entered();
-
-        let mut state = state.lock().unwrap();
-        let PrometheusHandler { encoder, metrics } = &mut *state;
-
-        metrics
-            .collect_group_into(&mut *encoder)
-            .unwrap_or_else(|infallible| match infallible {});
-
-        let body = encoder.finish();
-
-        tracing::info!(
-            bytes = body.len(),
-            elapsed_ms = started_at.elapsed().as_millis(),
-            "responded /metrics"
-        );
-
-        body
-    })
-    .await
-    .unwrap();
-
-    let response = Response::builder()
-        .status(200)
-        .header(CONTENT_TYPE, "text/plain; version=0.0.4")
-        .body(Body::from(body))
-        .unwrap();
-
-    Ok(response)
-}
--- a/proxy/src/jemalloc.rs
+++ b/proxy/src/jemalloc.rs
@@ -1,45 +1,27 @@
-use std::marker::PhantomData;
+use std::time::Duration;

-use measured::{
-    label::NoLabels,
-    metric::{
-        gauge::GaugeState, group::Encoding, group::MetricValue, name::MetricNameEncoder,
-        MetricEncoding, MetricFamilyEncoding, MetricType,
-    },
-    text::TextEncoder,
-    LabelGroup, MetricGroup,
-};
+use metrics::IntGauge;
+use prometheus::{register_int_gauge_with_registry, Registry};
 use tikv_jemalloc_ctl::{config, epoch, epoch_mib, stats, version};

 pub struct MetricRecorder {
    epoch: epoch_mib,
-    inner: Metrics,
-}
-
-#[derive(MetricGroup)]
-struct Metrics {
-    active_bytes: JemallocGaugeFamily<stats::active_mib>,
-    allocated_bytes: JemallocGaugeFamily<stats::allocated_mib>,
-    mapped_bytes: JemallocGaugeFamily<stats::mapped_mib>,
-    metadata_bytes: JemallocGaugeFamily<stats::metadata_mib>,
-    resident_bytes: JemallocGaugeFamily<stats::resident_mib>,
-    retained_bytes: JemallocGaugeFamily<stats::retained_mib>,
-}
-
-impl<Enc: Encoding> MetricGroup<Enc> for MetricRecorder
-where
-    Metrics: MetricGroup<Enc>,
-{
-    fn collect_group_into(&self, enc: &mut Enc) -> Result<(), Enc::Err> {
-        if self.epoch.advance().is_ok() {
-            self.inner.collect_group_into(enc)?;
-        }
-        Ok(())
-    }
+    active: stats::active_mib,
+    active_gauge: IntGauge,
+    allocated: stats::allocated_mib,
+    allocated_gauge: IntGauge,
+    mapped: stats::mapped_mib,
+    mapped_gauge: IntGauge,
+    metadata: stats::metadata_mib,
+    metadata_gauge: IntGauge,
+    resident: stats::resident_mib,
+    resident_gauge: IntGauge,
+    retained: stats::retained_mib,
+    retained_gauge: IntGauge,
 }

 impl MetricRecorder {
-    pub fn new() -> Result<Self, anyhow::Error> {
+    pub fn new(registry: &Registry) -> Result<Self, anyhow::Error> {
        tracing::info!(
            config = config::malloc_conf::read()?,
            version = version::read()?,
@@ -48,69 +30,71 @@ impl MetricRecorder {

        Ok(Self {
            epoch: epoch::mib()?,
-            inner: Metrics {
-                active_bytes: JemallocGaugeFamily(stats::active::mib()?),
-                allocated_bytes: JemallocGaugeFamily(stats::allocated::mib()?),
-                mapped_bytes: JemallocGaugeFamily(stats::mapped::mib()?),
-                metadata_bytes: JemallocGaugeFamily(stats::metadata::mib()?),
-                resident_bytes: JemallocGaugeFamily(stats::resident::mib()?),
-                retained_bytes: JemallocGaugeFamily(stats::retained::mib()?),
-            },
+            active: stats::active::mib()?,
+            active_gauge: register_int_gauge_with_registry!(
+                "jemalloc_active_bytes",
+                "Total number of bytes in active pages allocated by the process",
+                registry
+            )?,
+            allocated: stats::allocated::mib()?,
+            allocated_gauge: register_int_gauge_with_registry!(
+                "jemalloc_allocated_bytes",
+                "Total number of bytes allocated by the process",
+                registry
+            )?,
+            mapped: stats::mapped::mib()?,
+            mapped_gauge: register_int_gauge_with_registry!(
+                "jemalloc_mapped_bytes",
+                "Total number of bytes in active extents mapped by the allocator",
+                registry
+            )?,
+            metadata: stats::metadata::mib()?,
+            metadata_gauge: register_int_gauge_with_registry!(
+                "jemalloc_metadata_bytes",
+                "Total number of bytes dedicated to jemalloc metadata",
+                registry
+            )?,
+            resident: stats::resident::mib()?,
+            resident_gauge: register_int_gauge_with_registry!(
+                "jemalloc_resident_bytes",
+                "Total number of bytes in physically resident data pages mapped by the allocator",
+                registry
+            )?,
+            retained: stats::retained::mib()?,
+            retained_gauge: register_int_gauge_with_registry!(
+                "jemalloc_retained_bytes",
+                "Total number of bytes in virtual memory mappings that were retained rather than being returned to the operating system",
+                registry
+            )?,
+        })
+    }
+
+    fn _poll(&self) -> Result<(), anyhow::Error> {
+        self.epoch.advance()?;
+        self.active_gauge.set(self.active.read()? as i64);
+        self.allocated_gauge.set(self.allocated.read()? as i64);
+        self.mapped_gauge.set(self.mapped.read()? as i64);
+        self.metadata_gauge.set(self.metadata.read()? as i64);
+        self.resident_gauge.set(self.resident.read()? as i64);
+        self.retained_gauge.set(self.retained.read()? as i64);
+        Ok(())
+    }
+
+    #[inline]
+    pub fn poll(&self) {
+        if let Err(error) = self._poll() {
+            tracing::warn!(%error, "Failed to poll jemalloc stats");
+        }
+    }
+
+    pub fn start(self) -> tokio::task::JoinHandle<()> {
+        tokio::task::spawn(async move {
+            let mut interval = tokio::time::interval(Duration::from_secs(15));
+            interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
+            loop {
+                self.poll();
+                interval.tick().await;
+            }
        })
    }
 }
-
-struct JemallocGauge<T>(PhantomData<T>);
-
-impl<T> Default for JemallocGauge<T> {
-    fn default() -> Self {
-        JemallocGauge(PhantomData)
-    }
-}
-impl<T> MetricType for JemallocGauge<T> {
-    type Metadata = T;
-}
-
-struct JemallocGaugeFamily<T>(T);
-impl<M, T: Encoding> MetricFamilyEncoding<T> for JemallocGaugeFamily<M>
-where
-    JemallocGauge<M>: MetricEncoding<T, Metadata = M>,
-{
-    fn collect_family_into(&self, name: impl MetricNameEncoder, enc: &mut T) -> Result<(), T::Err> {
-        JemallocGauge::write_type(&name, enc)?;
-        JemallocGauge(PhantomData).collect_into(&self.0, NoLabels, name, enc)
-    }
-}
-
-macro_rules! jemalloc_gauge {
-    ($stat:ident, $mib:ident) => {
-        impl<W: std::io::Write> MetricEncoding<TextEncoder<W>> for JemallocGauge<stats::$mib> {
-            fn write_type(
-                name: impl MetricNameEncoder,
-                enc: &mut TextEncoder<W>,
-            ) -> Result<(), std::io::Error> {
-                GaugeState::write_type(name, enc)
-            }
-
-            fn collect_into(
-                &self,
-                mib: &stats::$mib,
-                labels: impl LabelGroup,
-                name: impl MetricNameEncoder,
-                enc: &mut TextEncoder<W>,
-            ) -> Result<(), std::io::Error> {
-                if let Ok(v) = mib.read() {
-                    enc.write_metric_value(name, labels, MetricValue::Int(v as i64))?;
-                }
-                Ok(())
-            }
-        }
-    };
-}
-
-jemalloc_gauge!(active, active_mib);
-jemalloc_gauge!(allocated, allocated_mib);
-jemalloc_gauge!(mapped, mapped_mib);
-jemalloc_gauge!(metadata, metadata_mib);
-jemalloc_gauge!(resident, resident_mib);
-jemalloc_gauge!(retained, retained_mib);
--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -1,359 +1,188 @@
-use std::sync::OnceLock;
-
-use lasso::ThreadedRodeo;
-use measured::{
-    label::StaticLabelSet,
-    metric::{histogram::Thresholds, name::MetricName},
-    Counter, CounterVec, FixedCardinalityLabel, Gauge, GaugeVec, Histogram, HistogramVec,
-    LabelGroup, MetricGroup,
+use ::metrics::{
+    exponential_buckets, register_histogram, register_histogram_vec, register_hll_vec,
+    register_int_counter_pair_vec, register_int_counter_vec, register_int_gauge,
+    register_int_gauge_vec, Histogram, HistogramVec, HyperLogLogVec, IntCounterPairVec,
+    IntCounterVec, IntGauge, IntGaugeVec,
+};
+use metrics::{
+    register_hll, register_int_counter, register_int_counter_pair, HyperLogLog, IntCounter,
+    IntCounterPair,
 };
-use metrics::{CounterPairAssoc, CounterPairVec, HyperLogLog, HyperLogLogVec};

+use once_cell::sync::Lazy;
 use tokio::time::{self, Instant};

 use crate::console::messages::ColdStartInfo;

-#[derive(MetricGroup)]
-pub struct Metrics {
-    #[metric(namespace = "proxy")]
-    pub proxy: ProxyMetrics,
+pub static NUM_DB_CONNECTIONS_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
+    register_int_counter_pair_vec!(
+        "proxy_opened_db_connections_total",
+        "Number of opened connections to a database.",
+        "proxy_closed_db_connections_total",
+        "Number of closed connections to a database.",
+        &["protocol"],
+    )
+    .unwrap()
+});

-    #[metric(namespace = "wake_compute_lock")]
-    pub wake_compute_lock: ApiLockMetrics,
+pub static NUM_CLIENT_CONNECTION_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
+    register_int_counter_pair_vec!(
+        "proxy_opened_client_connections_total",
+        "Number of opened connections from a client.",
+        "proxy_closed_client_connections_total",
+        "Number of closed connections from a client.",
+        &["protocol"],
+    )
+    .unwrap()
+});

-    // the one metric not called proxy_....
-    pub semaphore_control_plane_limit: GaugeVec<StaticLabelSet<RateLimit>>,
-}
+pub static NUM_CONNECTION_REQUESTS_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
+    register_int_counter_pair_vec!(
+        "proxy_accepted_connections_total",
+        "Number of client connections accepted.",
+        "proxy_closed_connections_total",
+        "Number of client connections closed.",
+        &["protocol"],
+    )
+    .unwrap()
+});

-impl Metrics {
-    pub fn get() -> &'static Self {
-        static SELF: OnceLock<Metrics> = OnceLock::new();
-        SELF.get_or_init(|| Metrics {
-            proxy: ProxyMetrics::default(),
-            wake_compute_lock: ApiLockMetrics::new(),
-            semaphore_control_plane_limit: GaugeVec::default(),
-        })
-    }
-}
+pub static COMPUTE_CONNECTION_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "proxy_compute_connection_latency_seconds",
+        "Time it took for proxy to establish a connection to the compute endpoint",
+        // http/ws/tcp, true/false, true/false, success/failure, client/client_and_cplane
+        // 3 * 6 * 2 * 2 = 72 counters
+        &["protocol", "cold_start_info", "outcome", "excluded"],
+        // largest bucket = 2^16 * 0.5ms = 32s
+        exponential_buckets(0.0005, 2.0, 16).unwrap(),
+    )
+    .unwrap()
+});

-#[derive(MetricGroup)]
-#[metric(new())]
-pub struct ProxyMetrics {
-    #[metric(flatten)]
-    pub db_connections: CounterPairVec<NumDbConnectionsGauge>,
-    #[metric(flatten)]
-    pub client_connections: CounterPairVec<NumClientConnectionsGauge>,
-    #[metric(flatten)]
-    pub connection_requests: CounterPairVec<NumConnectionRequestsGauge>,
-    #[metric(flatten)]
-    pub http_endpoint_pools: HttpEndpointPools,
-
-    /// Time it took for proxy to establish a connection to the compute endpoint.
-    // largest bucket = 2^16 * 0.5ms = 32s
-    #[metric(metadata = Thresholds::exponential_buckets(0.0005, 2.0))]
-    pub compute_connection_latency_seconds: HistogramVec<ComputeConnectionLatencySet, 16>,
-
-    /// Time it took for proxy to receive a response from control plane.
-    #[metric(
+pub static CONSOLE_REQUEST_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "proxy_console_request_latency",
+        "Time it took for proxy to establish a connection to the compute endpoint",
+        // proxy_wake_compute/proxy_get_role_info
+        &["request"],
        // largest bucket = 2^16 * 0.2ms = 13s
-        metadata = Thresholds::exponential_buckets(0.0002, 2.0),
-    )]
-    pub console_request_latency: HistogramVec<ConsoleRequestSet, 16>,
+        exponential_buckets(0.0002, 2.0, 16).unwrap(),
+    )
+    .unwrap()
+});

-    /// Time it takes to acquire a token to call console plane.
-    // largest bucket = 3^16 * 0.05ms = 2.15s
-    #[metric(metadata = Thresholds::exponential_buckets(0.00005, 3.0))]
-    pub control_plane_token_acquire_seconds: Histogram<16>,
+pub static ALLOWED_IPS_BY_CACHE_OUTCOME: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_allowed_ips_cache_misses",
+        "Number of cache hits/misses for allowed ips",
+        // hit/miss
+        &["outcome"],
+    )
+    .unwrap()
+});

-    /// Size of the HTTP request body lengths.
-    // smallest bucket = 16 bytes
-    // largest bucket = 4^12 * 16 bytes = 256MB
-    #[metric(metadata = Thresholds::exponential_buckets(16.0, 4.0))]
-    pub http_conn_content_length_bytes: HistogramVec<StaticLabelSet<HttpDirection>, 12>,
+pub static RATE_LIMITER_ACQUIRE_LATENCY: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "proxy_control_plane_token_acquire_seconds",
+        "Time it took for proxy to establish a connection to the compute endpoint",
+        // largest bucket = 3^16 * 0.05ms = 2.15s
+        exponential_buckets(0.00005, 3.0, 16).unwrap(),
+    )
+    .unwrap()
+});

-    /// Time it takes to reclaim unused connection pools.
-    #[metric(metadata = Thresholds::exponential_buckets(1e-6, 2.0))]
-    pub http_pool_reclaimation_lag_seconds: Histogram<16>,
+pub static RATE_LIMITER_LIMIT: Lazy<IntGaugeVec> = Lazy::new(|| {
+    register_int_gauge_vec!(
+        "semaphore_control_plane_limit",
+        "Current limit of the semaphore control plane",
+        &["limit"], // 2 counters
+    )
+    .unwrap()
+});

-    /// Number of opened connections to a database.
-    pub http_pool_opened_connections: Gauge,
+pub static NUM_CONNECTION_ACCEPTED_BY_SNI: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_accepted_connections_by_sni",
+        "Number of connections (per sni).",
+        &["kind"],
+    )
+    .unwrap()
+});

-    /// Number of cache hits/misses for allowed ips.
-    pub allowed_ips_cache_misses: CounterVec<StaticLabelSet<CacheOutcome>>,
+pub static ALLOWED_IPS_NUMBER: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "proxy_allowed_ips_number",
+        "Number of allowed ips",
+        vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0],
+    )
+    .unwrap()
+});

-    /// Number of allowed ips
-    #[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0]))]
-    pub allowed_ips_number: Histogram<10>,
+pub static HTTP_CONTENT_LENGTH: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "proxy_http_conn_content_length_bytes",
+        "Number of bytes the HTTP response content consumes",
+        // request/response
+        &["direction"],
+        // smallest bucket = 16 bytes
+        // largest bucket = 4^12 * 16 bytes = 256MB
+        exponential_buckets(16.0, 4.0, 12).unwrap()
+    )
+    .unwrap()
+});

-    /// Number of connections (per sni).
-    pub accepted_connections_by_sni: CounterVec<StaticLabelSet<SniKind>>,
+pub static GC_LATENCY: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "proxy_http_pool_reclaimation_lag_seconds",
+        "Time it takes to reclaim unused connection pools",
+        // 1us -> 65ms
+        exponential_buckets(1e-6, 2.0, 16).unwrap(),
+    )
+    .unwrap()
+});

-    /// Number of connection failures (per kind).
-    pub connection_failures_total: CounterVec<StaticLabelSet<ConnectionFailureKind>>,
+pub static ENDPOINT_POOLS: Lazy<IntCounterPair> = Lazy::new(|| {
+    register_int_counter_pair!(
+        "proxy_http_pool_endpoints_registered_total",
+        "Number of endpoints we have registered pools for",
+        "proxy_http_pool_endpoints_unregistered_total",
+        "Number of endpoints we have unregistered pools for",
+    )
+    .unwrap()
+});

-    /// Number of wake-up failures (per kind).
-    pub connection_failures_breakdown: CounterVec<ConnectionFailuresBreakdownSet>,
+pub static NUM_OPEN_CLIENTS_IN_HTTP_POOL: Lazy<IntGauge> = Lazy::new(|| {
+    register_int_gauge!(
+        "proxy_http_pool_opened_connections",
+        "Number of opened connections to a database.",
+    )
+    .unwrap()
+});

-    /// Number of bytes sent/received between all clients and backends.
-    pub io_bytes: CounterVec<StaticLabelSet<Direction>>,
+pub static NUM_CANCELLATION_REQUESTS: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_cancellation_requests_total",
+        "Number of cancellation requests (per found/not_found).",
+        &["source", "kind"],
+    )
+    .unwrap()
+});

-    /// Number of errors by a given classification.
-    pub errors_total: CounterVec<StaticLabelSet<crate::error::ErrorKind>>,
+pub static NUM_INVALID_ENDPOINTS: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_invalid_endpoints_total",
+        "Number of invalid endpoints (per protocol, per rejected).",
+        // http/ws/tcp, true/false, success/failure
+        // TODO(anna): the last dimension is just a proxy to what we actually want to measure.
+        // We need to measure whether the endpoint was found by cplane or not.
+        &["protocol", "rejected", "outcome"],
+    )
+    .unwrap()
+});

-    /// Number of cancellation requests (per found/not_found).
-    pub cancellation_requests_total: CounterVec<CancellationRequestSet>,
-
-    /// Number of errors by a given classification
-    pub redis_errors_total: CounterVec<RedisErrorsSet>,
-
-    /// Number of TLS handshake failures
-    pub tls_handshake_failures: Counter,
-
-    /// Number of connection requests affected by authentication rate limits
-    pub requests_auth_rate_limits_total: Counter,
-
-    /// HLL approximate cardinality of endpoints that are connecting
-    pub connecting_endpoints: HyperLogLogVec<StaticLabelSet<Protocol>, 32>,
-
-    /// Number of endpoints affected by errors of a given classification
-    pub endpoints_affected_by_errors: HyperLogLogVec<StaticLabelSet<crate::error::ErrorKind>, 32>,
-
-    /// Number of endpoints affected by authentication rate limits
-    pub endpoints_auth_rate_limits: HyperLogLog<32>,
-
-    /// Number of invalid endpoints (per protocol, per rejected).
-    pub invalid_endpoints_total: CounterVec<InvalidEndpointsSet>,
-}
-
-#[derive(MetricGroup)]
-#[metric(new())]
-pub struct ApiLockMetrics {
-    /// Number of semaphores registered in this api lock
-    pub semaphores_registered: Counter,
-    /// Number of semaphores unregistered in this api lock
-    pub semaphores_unregistered: Counter,
-    /// Time it takes to reclaim unused semaphores in the api lock
-    #[metric(metadata = Thresholds::exponential_buckets(1e-6, 2.0))]
-    pub reclamation_lag_seconds: Histogram<16>,
-    /// Time it takes to acquire a semaphore lock
-    #[metric(metadata = Thresholds::exponential_buckets(1e-4, 2.0))]
-    pub semaphore_acquire_seconds: Histogram<16>,
-}
-
-impl Default for ProxyMetrics {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-#[derive(FixedCardinalityLabel, Copy, Clone)]
-#[label(singleton = "direction")]
-pub enum HttpDirection {
-    Request,
-    Response,
-}
-
-#[derive(FixedCardinalityLabel, Copy, Clone)]
-#[label(singleton = "direction")]
-pub enum Direction {
-    Tx,
-    Rx,
-}
-
-#[derive(FixedCardinalityLabel, Clone, Copy, Debug)]
-#[label(singleton = "protocol")]
-pub enum Protocol {
-    Http,
-    Ws,
-    Tcp,
-    SniRouter,
-}
-
-impl Protocol {
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            Protocol::Http => "http",
-            Protocol::Ws => "ws",
-            Protocol::Tcp => "tcp",
-            Protocol::SniRouter => "sni_router",
-        }
-    }
-}
-
-impl std::fmt::Display for Protocol {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(self.as_str())
-    }
-}
-
-#[derive(FixedCardinalityLabel, Copy, Clone)]
-pub enum Bool {
-    True,
-    False,
-}
-
-#[derive(FixedCardinalityLabel, Copy, Clone)]
-#[label(singleton = "outcome")]
-pub enum Outcome {
-    Success,
-    Failed,
-}
-
-#[derive(FixedCardinalityLabel, Copy, Clone)]
-#[label(singleton = "outcome")]
-pub enum CacheOutcome {
-    Hit,
-    Miss,
-}
-
-#[derive(LabelGroup)]
-#[label(set = ConsoleRequestSet)]
-pub struct ConsoleRequest<'a> {
-    #[label(dynamic_with = ThreadedRodeo, default)]
-    pub request: &'a str,
-}
-
-#[derive(MetricGroup, Default)]
-pub struct HttpEndpointPools {
-    /// Number of endpoints we have registered pools for
-    pub http_pool_endpoints_registered_total: Counter,
-    /// Number of endpoints we have unregistered pools for
-    pub http_pool_endpoints_unregistered_total: Counter,
-}
-
-pub struct HttpEndpointPoolsGuard<'a> {
-    dec: &'a Counter,
-}
-
-impl Drop for HttpEndpointPoolsGuard<'_> {
-    fn drop(&mut self) {
-        self.dec.inc();
-    }
-}
-
-impl HttpEndpointPools {
-    pub fn guard(&self) -> HttpEndpointPoolsGuard {
-        self.http_pool_endpoints_registered_total.inc();
-        HttpEndpointPoolsGuard {
-            dec: &self.http_pool_endpoints_unregistered_total,
-        }
-    }
-}
-pub struct NumDbConnectionsGauge;
-impl CounterPairAssoc for NumDbConnectionsGauge {
-    const INC_NAME: &'static MetricName = MetricName::from_str("opened_db_connections_total");
-    const DEC_NAME: &'static MetricName = MetricName::from_str("closed_db_connections_total");
-    const INC_HELP: &'static str = "Number of opened connections to a database.";
-    const DEC_HELP: &'static str = "Number of closed connections to a database.";
-    type LabelGroupSet = StaticLabelSet<Protocol>;
-}
-pub type NumDbConnectionsGuard<'a> = metrics::MeasuredCounterPairGuard<'a, NumDbConnectionsGauge>;
-
-pub struct NumClientConnectionsGauge;
-impl CounterPairAssoc for NumClientConnectionsGauge {
-    const INC_NAME: &'static MetricName = MetricName::from_str("opened_client_connections_total");
-    const DEC_NAME: &'static MetricName = MetricName::from_str("closed_client_connections_total");
-    const INC_HELP: &'static str = "Number of opened connections from a client.";
-    const DEC_HELP: &'static str = "Number of closed connections from a client.";
-    type LabelGroupSet = StaticLabelSet<Protocol>;
-}
-pub type NumClientConnectionsGuard<'a> =
-    metrics::MeasuredCounterPairGuard<'a, NumClientConnectionsGauge>;
-
-pub struct NumConnectionRequestsGauge;
-impl CounterPairAssoc for NumConnectionRequestsGauge {
-    const INC_NAME: &'static MetricName = MetricName::from_str("accepted_connections_total");
-    const DEC_NAME: &'static MetricName = MetricName::from_str("closed_connections_total");
-    const INC_HELP: &'static str = "Number of client connections accepted.";
-    const DEC_HELP: &'static str = "Number of client connections closed.";
-    type LabelGroupSet = StaticLabelSet<Protocol>;
-}
-pub type NumConnectionRequestsGuard<'a> =
-    metrics::MeasuredCounterPairGuard<'a, NumConnectionRequestsGauge>;
-
-#[derive(LabelGroup)]
-#[label(set = ComputeConnectionLatencySet)]
-pub struct ComputeConnectionLatencyGroup {
-    protocol: Protocol,
-    cold_start_info: ColdStartInfo,
-    outcome: ConnectOutcome,
-    excluded: LatencyExclusions,
-}
-
-#[derive(FixedCardinalityLabel, Copy, Clone)]
-pub enum LatencyExclusions {
-    Client,
-    ClientAndCplane,
-}
-
-#[derive(FixedCardinalityLabel, Copy, Clone)]
-#[label(singleton = "limit")]
-pub enum RateLimit {
-    Actual,
-    Expected,
-}
-
-#[derive(FixedCardinalityLabel, Copy, Clone)]
-#[label(singleton = "kind")]
-pub enum SniKind {
-    Sni,
-    NoSni,
-    PasswordHack,
-}
-
-#[derive(FixedCardinalityLabel, Copy, Clone)]
-#[label(singleton = "kind")]
-pub enum ConnectionFailureKind {
-    ComputeCached,
-    ComputeUncached,
-}
-
-#[derive(FixedCardinalityLabel, Copy, Clone)]
-#[label(singleton = "kind")]
-pub enum WakeupFailureKind {
-    BadComputeAddress,
-    ApiTransportError,
-    QuotaExceeded,
-    ApiConsoleLocked,
-    ApiConsoleBadRequest,
-    ApiConsoleOtherServerError,
-    ApiConsoleOtherError,
-    TimeoutError,
-}
-
-#[derive(LabelGroup)]
-#[label(set = ConnectionFailuresBreakdownSet)]
-pub struct ConnectionFailuresBreakdownGroup {
-    pub kind: WakeupFailureKind,
-    pub retry: Bool,
-}
-
-#[derive(LabelGroup, Copy, Clone)]
-#[label(set = RedisErrorsSet)]
-pub struct RedisErrors<'a> {
-    #[label(dynamic_with = ThreadedRodeo, default)]
-    pub channel: &'a str,
-}
-
-#[derive(FixedCardinalityLabel, Copy, Clone)]
-pub enum CancellationSource {
-    FromClient,
-    FromRedis,
-    Local,
-}
-
-#[derive(FixedCardinalityLabel, Copy, Clone)]
-pub enum CancellationOutcome {
-    NotFound,
-    Found,
-}
-
-#[derive(LabelGroup)]
-#[label(set = CancellationRequestSet)]
-pub struct CancellationRequest {
-    pub source: CancellationSource,
-    pub kind: CancellationOutcome,
-}
+pub const NUM_CANCELLATION_REQUESTS_SOURCE_FROM_CLIENT: &str = "from_client";
+pub const NUM_CANCELLATION_REQUESTS_SOURCE_FROM_REDIS: &str = "from_redis";

 pub enum Waiting {
    Cplane,
@@ -368,6 +197,20 @@ struct Accumulated {
    compute: time::Duration,
 }

+enum Outcome {
+    Success,
+    Failed,
+}
+
+impl Outcome {
+    fn as_str(&self) -> &'static str {
+        match self {
+            Outcome::Success => "success",
+            Outcome::Failed => "failed",
+        }
+    }
+}
+
 pub struct LatencyTimer {
    // time since the stopwatch was started
    start: time::Instant,
@@ -376,9 +219,9 @@ pub struct LatencyTimer {
    // accumulated time on the stopwatch
    accumulated: Accumulated,
    // label data
-    protocol: Protocol,
+    protocol: &'static str,
    cold_start_info: ColdStartInfo,
-    outcome: ConnectOutcome,
+    outcome: Outcome,
 }

 pub struct LatencyTimerPause<'a> {
@@ -388,7 +231,7 @@ pub struct LatencyTimerPause<'a> {
 }

 impl LatencyTimer {
-    pub fn new(protocol: Protocol) -> Self {
+    pub fn new(protocol: &'static str) -> Self {
        Self {
            start: time::Instant::now(),
            stop: None,
@@ -396,7 +239,7 @@ impl LatencyTimer {
            protocol,
            cold_start_info: ColdStartInfo::Unknown,
            // assume failed unless otherwise specified
-            outcome: ConnectOutcome::Failed,
+            outcome: Outcome::Failed,
        }
    }

@@ -417,7 +260,7 @@ impl LatencyTimer {
        self.stop = Some(time::Instant::now());

        // success
-        self.outcome = ConnectOutcome::Success;
+        self.outcome = Outcome::Success;
    }
 }

@@ -432,62 +275,128 @@ impl Drop for LatencyTimerPause<'_> {
    }
 }

-#[derive(FixedCardinalityLabel, Clone, Copy, Debug)]
-pub enum ConnectOutcome {
-    Success,
-    Failed,
-}
-
 impl Drop for LatencyTimer {
    fn drop(&mut self) {
        let duration = self
            .stop
            .unwrap_or_else(time::Instant::now)
            .duration_since(self.start);
-
-        let metric = &Metrics::get().proxy.compute_connection_latency_seconds;
-
-        // Excluding client communication from the accumulated time.
-        metric.observe(
-            ComputeConnectionLatencyGroup {
-                protocol: self.protocol,
-                cold_start_info: self.cold_start_info,
-                outcome: self.outcome,
-                excluded: LatencyExclusions::Client,
-            },
-            duration
-                .saturating_sub(self.accumulated.client)
-                .as_secs_f64(),
-        );
-
+        // Excluding cplane communication from the accumulated time.
+        COMPUTE_CONNECTION_LATENCY
+            .with_label_values(&[
+                self.protocol,
+                self.cold_start_info.as_str(),
+                self.outcome.as_str(),
+                "client",
+            ])
+            .observe((duration.saturating_sub(self.accumulated.client)).as_secs_f64());
        // Exclude client and cplane communication from the accumulated time.
        let accumulated_total = self.accumulated.client + self.accumulated.cplane;
-        metric.observe(
-            ComputeConnectionLatencyGroup {
-                protocol: self.protocol,
-                cold_start_info: self.cold_start_info,
-                outcome: self.outcome,
-                excluded: LatencyExclusions::ClientAndCplane,
-            },
-            duration.saturating_sub(accumulated_total).as_secs_f64(),
-        );
+        COMPUTE_CONNECTION_LATENCY
+            .with_label_values(&[
+                self.protocol,
+                self.cold_start_info.as_str(),
+                self.outcome.as_str(),
+                "client_and_cplane",
+            ])
+            .observe((duration.saturating_sub(accumulated_total)).as_secs_f64());
    }
 }

-impl From<bool> for Bool {
-    fn from(value: bool) -> Self {
-        if value {
-            Bool::True
-        } else {
-            Bool::False
-        }
+pub static NUM_CONNECTION_FAILURES: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_connection_failures_total",
+        "Number of connection failures (per kind).",
+        &["kind"],
+    )
+    .unwrap()
+});
+
+pub static NUM_WAKEUP_FAILURES: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_connection_failures_breakdown",
+        "Number of wake-up failures (per kind).",
+        &["retry", "kind"],
+    )
+    .unwrap()
+});
+
+pub static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_io_bytes",
+        "Number of bytes sent/received between all clients and backends.",
+        &["direction"],
+    )
+    .unwrap()
+});
+
+pub const fn bool_to_str(x: bool) -> &'static str {
+    if x {
+        "true"
+    } else {
+        "false"
    }
 }

-#[derive(LabelGroup)]
-#[label(set = InvalidEndpointsSet)]
-pub struct InvalidEndpointsGroup {
-    pub protocol: Protocol,
-    pub rejected: Bool,
-    pub outcome: ConnectOutcome,
-}
+pub static CONNECTING_ENDPOINTS: Lazy<HyperLogLogVec<32>> = Lazy::new(|| {
+    register_hll_vec!(
+        32,
+        "proxy_connecting_endpoints",
+        "HLL approximate cardinality of endpoints that are connecting",
+        &["protocol"],
+    )
+    .unwrap()
+});
+
+pub static ERROR_BY_KIND: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_errors_total",
+        "Number of errors by a given classification",
+        &["type"],
+    )
+    .unwrap()
+});
+
+pub static ENDPOINT_ERRORS_BY_KIND: Lazy<HyperLogLogVec<32>> = Lazy::new(|| {
+    register_hll_vec!(
+        32,
+        "proxy_endpoints_affected_by_errors",
+        "Number of endpoints affected by errors of a given classification",
+        &["type"],
+    )
+    .unwrap()
+});
+
+pub static REDIS_BROKEN_MESSAGES: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_redis_errors_total",
+        "Number of errors by a given classification",
+        &["channel"],
+    )
+    .unwrap()
+});
+
+pub static TLS_HANDSHAKE_FAILURES: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "proxy_tls_handshake_failures",
+        "Number of TLS handshake failures",
+    )
+    .unwrap()
+});
+
+pub static ENDPOINTS_AUTH_RATE_LIMITED: Lazy<HyperLogLog<32>> = Lazy::new(|| {
+    register_hll!(
+        32,
+        "proxy_endpoints_auth_rate_limits",
+        "Number of endpoints affected by authentication rate limits",
+    )
+    .unwrap()
+});
+
+pub static AUTH_RATE_LIMIT_HITS: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "proxy_requests_auth_rate_limits_total",
+        "Number of connection requests affected by authentication rate limits",
+    )
+    .unwrap()
+});
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -15,7 +15,7 @@ use crate::{
    config::{ProxyConfig, TlsConfig},
    context::RequestMonitoring,
    error::ReportableError,
-    metrics::{Metrics, NumClientConnectionsGuard},
+    metrics::{NUM_CLIENT_CONNECTION_GAUGE, NUM_CONNECTION_REQUESTS_GAUGE},
    protocol2::WithClientIp,
    proxy::handshake::{handshake, HandshakeData},
    rate_limiter::EndpointRateLimiter,
@@ -24,6 +24,7 @@ use crate::{
 };
 use futures::TryFutureExt;
 use itertools::Itertools;
+use metrics::IntCounterPairGuard;
 use once_cell::sync::OnceCell;
 use pq_proto::{BeMessage as Be, StartupMessageParams};
 use regex::Regex;
@@ -78,10 +79,9 @@ pub async fn task_main(
    {
        let (socket, peer_addr) = accept_result?;

-        let conn_gauge = Metrics::get()
-            .proxy
-            .client_connections
-            .guard(crate::metrics::Protocol::Tcp);
+        let conn_gauge = NUM_CLIENT_CONNECTION_GAUGE
+            .with_label_values(&["tcp"])
+            .guard();

        let session_id = uuid::Uuid::new_v4();
        let cancellation_handler = Arc::clone(&cancellation_handler);
@@ -113,12 +113,7 @@ pub async fn task_main(
                },
            };

-            let mut ctx = RequestMonitoring::new(
-                    session_id,
-                    peer_addr,
-                    crate::metrics::Protocol::Tcp,
-                    &config.region,
-                );
+            let mut ctx = RequestMonitoring::new(session_id, peer_addr, "tcp", &config.region);
            let span = ctx.span.clone();

            let res = handle_client(
@@ -242,17 +237,14 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    stream: S,
    mode: ClientMode,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    conn_gauge: NumClientConnectionsGuard<'static>,
+    conn_gauge: IntCounterPairGuard,
 ) -> Result<Option<ProxyPassthrough<CancellationHandlerMainInternal, S>>, ClientRequestError> {
-    info!(
-        protocol = %ctx.protocol,
-        "handling interactive connection from client"
-    );
+    info!("handling interactive connection from client");

-    let metrics = &Metrics::get().proxy;
    let proto = ctx.protocol;
-    // let _client_gauge = metrics.client_connections.guard(proto);
-    let _request_gauge = metrics.connection_requests.guard(proto);
+    let _request_gauge = NUM_CONNECTION_REQUESTS_GAUGE
+        .with_label_values(&[proto])
+        .guard();

    let tls = config.tls_config.as_ref();

--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -4,7 +4,7 @@ use crate::{
    console::{self, errors::WakeComputeError, CachedNodeInfo, NodeInfo},
    context::RequestMonitoring,
    error::ReportableError,
-    metrics::{ConnectionFailureKind, Metrics},
+    metrics::NUM_CONNECTION_FAILURES,
    proxy::{
        retry::{retry_after, ShouldRetry},
        wake_compute::wake_compute,
@@ -27,10 +27,10 @@ pub fn invalidate_cache(node_info: console::CachedNodeInfo) -> NodeInfo {
        warn!("invalidating stalled compute node info cache entry");
    }
    let label = match is_cached {
-        true => ConnectionFailureKind::ComputeCached,
-        false => ConnectionFailureKind::ComputeUncached,
+        true => "compute_cached",
+        false => "compute_uncached",
    };
-    Metrics::get().proxy.connection_failures_total.inc(label);
+    NUM_CONNECTION_FAILURES.with_label_values(&[label]).inc();

    node_info.invalidate()
 }
--- a/proxy/src/proxy/passthrough.rs
+++ b/proxy/src/proxy/passthrough.rs
@@ -2,10 +2,11 @@ use crate::{
    cancellation,
    compute::PostgresConnection,
    console::messages::MetricsAuxInfo,
-    metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard},
+    metrics::NUM_BYTES_PROXIED_COUNTER,
    stream::Stream,
    usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS},
 };
+use metrics::IntCounterPairGuard;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::info;
 use utils::measured_stream::MeasuredStream;
@@ -22,25 +23,24 @@ pub async fn proxy_pass(
        branch_id: aux.branch_id,
    });

-    let metrics = &Metrics::get().proxy.io_bytes;
-    let m_sent = metrics.with_labels(Direction::Tx);
+    let m_sent = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["tx"]);
    let mut client = MeasuredStream::new(
        client,
        |_| {},
        |cnt| {
            // Number of bytes we sent to the client (outbound).
-            metrics.get_metric(m_sent).inc_by(cnt as u64);
+            m_sent.inc_by(cnt as u64);
            usage.record_egress(cnt as u64);
        },
    );

-    let m_recv = metrics.with_labels(Direction::Rx);
+    let m_recv = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["rx"]);
    let mut compute = MeasuredStream::new(
        compute,
        |_| {},
        |cnt| {
            // Number of bytes the client sent to the compute node (inbound).
-            metrics.get_metric(m_recv).inc_by(cnt as u64);
+            m_recv.inc_by(cnt as u64);
        },
    );

@@ -60,8 +60,8 @@ pub struct ProxyPassthrough<P, S> {
    pub compute: PostgresConnection,
    pub aux: MetricsAuxInfo,

-    pub req: NumConnectionRequestsGuard<'static>,
-    pub conn: NumClientConnectionsGuard<'static>,
+    pub req: IntCounterPairGuard,
+    pub conn: IntCounterPairGuard,
    pub cancel: cancellation::Session<P>,
 }

--- a/proxy/src/proxy/wake_compute.rs
+++ b/proxy/src/proxy/wake_compute.rs
@@ -1,6 +1,6 @@
 use crate::console::{errors::WakeComputeError, provider::CachedNodeInfo};
 use crate::context::RequestMonitoring;
-use crate::metrics::{ConnectionFailuresBreakdownGroup, Metrics, WakeupFailureKind};
+use crate::metrics::{bool_to_str, NUM_WAKEUP_FAILURES};
 use crate::proxy::retry::retry_after;
 use hyper::StatusCode;
 use std::ops::ControlFlow;
@@ -57,46 +57,39 @@ pub fn handle_try_wake(

 fn report_error(e: &WakeComputeError, retry: bool) {
    use crate::console::errors::ApiError;
+    let retry = bool_to_str(retry);
    let kind = match e {
-        WakeComputeError::BadComputeAddress(_) => WakeupFailureKind::BadComputeAddress,
-        WakeComputeError::ApiError(ApiError::Transport(_)) => WakeupFailureKind::ApiTransportError,
+        WakeComputeError::BadComputeAddress(_) => "bad_compute_address",
+        WakeComputeError::ApiError(ApiError::Transport(_)) => "api_transport_error",
        WakeComputeError::ApiError(ApiError::Console {
            status: StatusCode::LOCKED,
            ref text,
        }) if text.contains("written data quota exceeded")
            || text.contains("the limit for current plan reached") =>
        {
-            WakeupFailureKind::QuotaExceeded
+            "quota_exceeded"
        }
        WakeComputeError::ApiError(ApiError::Console {
            status: StatusCode::UNPROCESSABLE_ENTITY,
            ref text,
        }) if text.contains("compute time quota of non-primary branches is exceeded") => {
-            WakeupFailureKind::QuotaExceeded
+            "quota_exceeded"
        }
        WakeComputeError::ApiError(ApiError::Console {
            status: StatusCode::LOCKED,
            ..
-        }) => WakeupFailureKind::ApiConsoleLocked,
+        }) => "api_console_locked",
        WakeComputeError::ApiError(ApiError::Console {
            status: StatusCode::BAD_REQUEST,
            ..
-        }) => WakeupFailureKind::ApiConsoleBadRequest,
+        }) => "api_console_bad_request",
        WakeComputeError::ApiError(ApiError::Console { status, .. })
            if status.is_server_error() =>
        {
-            WakeupFailureKind::ApiConsoleOtherServerError
+            "api_console_other_server_error"
        }
-        WakeComputeError::ApiError(ApiError::Console { .. }) => {
-            WakeupFailureKind::ApiConsoleOtherError
-        }
-        WakeComputeError::TimeoutError => WakeupFailureKind::TimeoutError,
+        WakeComputeError::ApiError(ApiError::Console { .. }) => "api_console_other_error",
+        WakeComputeError::TimeoutError => "timeout_error",
    };
-    Metrics::get()
-        .proxy
-        .connection_failures_breakdown
-        .inc(ConnectionFailuresBreakdownGroup {
-            kind,
-            retry: retry.into(),
-        });
+    NUM_WAKEUP_FAILURES.with_label_values(&[retry, kind]).inc();
 }
--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -17,13 +17,7 @@ use tokio::sync::{Mutex as AsyncMutex, Semaphore, SemaphorePermit};
 use tokio::time::{timeout, Duration, Instant};
 use tracing::info;

-use crate::{
-    intern::EndpointIdInt,
-    {
-        metrics::{Metrics, RateLimit},
-        EndpointId,
-    },
-};
+use crate::{intern::EndpointIdInt, EndpointId};

 use super::{
    limit_algorithm::{LimitAlgorithm, Sample},
@@ -463,9 +457,12 @@ impl Limiter {
            }
            new_limit
        };
-        let metric = &Metrics::get().semaphore_control_plane_limit;
-        metric.set(RateLimit::Expected, new_limit as i64);
-        metric.set(RateLimit::Actual, actual_limit as i64);
+        crate::metrics::RATE_LIMITER_LIMIT
+            .with_label_values(&["expected"])
+            .set(new_limit as i64);
+        crate::metrics::RATE_LIMITER_LIMIT
+            .with_label_values(&["actual"])
+            .set(actual_limit as i64);
        self.limits.store(new_limit, Ordering::Release);
        #[cfg(test)]
        if let Some(n) = &self.notifier {
@@ -522,10 +519,7 @@ impl reqwest_middleware::Middleware for Limiter {
        extensions: &mut task_local_extensions::Extensions,
        next: reqwest_middleware::Next<'_>,
    ) -> reqwest_middleware::Result<reqwest::Response> {
-        let timer = Metrics::get()
-            .proxy
-            .control_plane_token_acquire_seconds
-            .start_timer();
+        let start = Instant::now();
        let token = self
            .acquire_timeout(self.config.timeout)
            .await
@@ -539,12 +533,8 @@ impl reqwest_middleware::Middleware for Limiter {
                    .into(),
                )
            })?;
-        let duration = timer.observe();
-        info!(
-            ?duration,
-            "waiting for token to connect to the control plane"
-        );
-
+        info!(duration = ?start.elapsed(), "waiting for token to connect to the control plane");
+        crate::metrics::RATE_LIMITER_ACQUIRE_LATENCY.observe(start.elapsed().as_secs_f64());
        match next.run(req, extensions).await {
            Ok(response) => {
                self.release(token, Some(Outcome::from_reqwest_response(&response)))
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -11,7 +11,7 @@ use crate::{
    cache::project_info::ProjectInfoCache,
    cancellation::{CancelMap, CancellationHandler},
    intern::{ProjectIdInt, RoleNameInt},
-    metrics::{Metrics, RedisErrors},
+    metrics::{NUM_CANCELLATION_REQUESTS_SOURCE_FROM_REDIS, REDIS_BROKEN_MESSAGES},
 };

 const CPLANE_CHANNEL_NAME: &str = "neondb-proxy-ws-updates";
@@ -104,9 +104,9 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
        let msg: Notification = match serde_json::from_str(&payload) {
            Ok(msg) => msg,
            Err(e) => {
-                Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
-                    channel: msg.get_channel_name(),
-                });
+                REDIS_BROKEN_MESSAGES
+                    .with_label_values(&[msg.get_channel_name()])
+                    .inc();
                tracing::error!("broken message: {e}");
                return Ok(());
            }
@@ -183,7 +183,7 @@ where
        cache,
        Arc::new(CancellationHandler::<()>::new(
            cancel_map,
-            crate::metrics::CancellationSource::FromRedis,
+            NUM_CANCELLATION_REQUESTS_SOURCE_FROM_REDIS,
        )),
        region_id,
    );
--- a/proxy/src/serverless.rs
+++ b/proxy/src/serverless.rs
@@ -32,7 +32,7 @@ use tokio_util::task::TaskTracker;
 use crate::cancellation::CancellationHandlerMain;
 use crate::config::ProxyConfig;
 use crate::context::RequestMonitoring;
-use crate::metrics::Metrics;
+use crate::metrics::{NUM_CLIENT_CONNECTION_GAUGE, TLS_HANDSHAKE_FAILURES};
 use crate::protocol2::WithClientIp;
 use crate::proxy::run_until_cancelled;
 use crate::rate_limiter::EndpointRateLimiter;
@@ -156,10 +156,9 @@ async fn connection_handler(
 ) {
    let session_id = uuid::Uuid::new_v4();

-    let _gauge = Metrics::get()
-        .proxy
-        .client_connections
-        .guard(crate::metrics::Protocol::Http);
+    let _gauge = NUM_CLIENT_CONNECTION_GAUGE
+        .with_label_values(&["http"])
+        .guard();

    // handle PROXY protocol
    let mut conn = WithClientIp::new(conn);
@@ -182,13 +181,13 @@ async fn connection_handler(
        }
        // The handshake failed
        Ok(Err(e)) => {
-            Metrics::get().proxy.tls_handshake_failures.inc();
+            TLS_HANDSHAKE_FAILURES.inc();
            warn!(?session_id, %peer_addr, "failed to accept TLS connection: {e:?}");
            return;
        }
        // The handshake timed out
        Err(e) => {
-            Metrics::get().proxy.tls_handshake_failures.inc();
+            TLS_HANDSHAKE_FAILURES.inc();
            warn!(?session_id, %peer_addr, "failed to accept TLS connection: {e:?}");
            return;
        }
@@ -275,13 +274,7 @@ async fn request_handler(

    // Check if the request is a websocket upgrade request.
    if hyper_tungstenite::is_upgrade_request(&request) {
-        let ctx = RequestMonitoring::new(
-            session_id,
-            peer_addr,
-            crate::metrics::Protocol::Ws,
-            &config.region,
-        );
-
+        let ctx = RequestMonitoring::new(session_id, peer_addr, "ws", &config.region);
        let span = ctx.span.clone();
        info!(parent: &span, "performing websocket upgrade");

@@ -309,12 +302,7 @@ async fn request_handler(
        // Return the response so the spawned future can continue.
        Ok(response)
    } else if request.uri().path() == "/sql" && *request.method() == Method::POST {
-        let ctx = RequestMonitoring::new(
-            session_id,
-            peer_addr,
-            crate::metrics::Protocol::Http,
-            &config.region,
-        );
+        let ctx = RequestMonitoring::new(session_id, peer_addr, "http", &config.region);
        let span = ctx.span.clone();

        sql_over_http::handle(config, ctx, request, backend, http_cancellation_token)
--- a/proxy/src/serverless/conn_pool.rs
+++ b/proxy/src/serverless/conn_pool.rs
@@ -1,5 +1,6 @@
 use dashmap::DashMap;
 use futures::{future::poll_fn, Future};
+use metrics::IntCounterPairGuard;
 use parking_lot::RwLock;
 use rand::Rng;
 use smallvec::SmallVec;
@@ -15,13 +16,13 @@ use std::{
 use tokio::time::Instant;
 use tokio_postgres::tls::NoTlsStream;
 use tokio_postgres::{AsyncMessage, ReadyForQueryStatus, Socket};
-use tokio_util::sync::CancellationToken;

 use crate::console::messages::{ColdStartInfo, MetricsAuxInfo};
-use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
+use crate::metrics::{ENDPOINT_POOLS, GC_LATENCY, NUM_OPEN_CLIENTS_IN_HTTP_POOL};
 use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
 use crate::{
-    auth::backend::ComputeUserInfo, context::RequestMonitoring, DbName, EndpointCacheKey, RoleName,
+    auth::backend::ComputeUserInfo, context::RequestMonitoring, metrics::NUM_DB_CONNECTIONS_GAUGE,
+    DbName, EndpointCacheKey, RoleName,
 };

 use tracing::{debug, error, warn, Span};
@@ -77,7 +78,7 @@ pub struct EndpointConnPool<C: ClientInnerExt> {
    pools: HashMap<(DbName, RoleName), DbUserConnPool<C>>,
    total_conns: usize,
    max_conns: usize,
-    _guard: HttpEndpointPoolsGuard<'static>,
+    _guard: IntCounterPairGuard,
    global_connections_count: Arc<AtomicUsize>,
    global_pool_size_max_conns: usize,
 }
@@ -109,11 +110,7 @@ impl<C: ClientInnerExt> EndpointConnPool<C> {
            let removed = old_len - new_len;
            if removed > 0 {
                global_connections_count.fetch_sub(removed, atomic::Ordering::Relaxed);
-                Metrics::get()
-                    .proxy
-                    .http_pool_opened_connections
-                    .get_metric()
-                    .dec_by(removed as i64);
+                NUM_OPEN_CLIENTS_IN_HTTP_POOL.sub(removed as i64);
            }
            *total_conns -= removed;
            removed > 0
@@ -159,11 +156,7 @@ impl<C: ClientInnerExt> EndpointConnPool<C> {
                pool.total_conns += 1;
                pool.global_connections_count
                    .fetch_add(1, atomic::Ordering::Relaxed);
-                Metrics::get()
-                    .proxy
-                    .http_pool_opened_connections
-                    .get_metric()
-                    .inc();
+                NUM_OPEN_CLIENTS_IN_HTTP_POOL.inc();
            }

            pool.total_conns
@@ -183,11 +176,7 @@ impl<C: ClientInnerExt> Drop for EndpointConnPool<C> {
        if self.total_conns > 0 {
            self.global_connections_count
                .fetch_sub(self.total_conns, atomic::Ordering::Relaxed);
-            Metrics::get()
-                .proxy
-                .http_pool_opened_connections
-                .get_metric()
-                .dec_by(self.total_conns as i64);
+            NUM_OPEN_CLIENTS_IN_HTTP_POOL.sub(self.total_conns as i64);
        }
    }
 }
@@ -226,11 +215,7 @@ impl<C: ClientInnerExt> DbUserConnPool<C> {
            removed += 1;
        }
        global_connections_count.fetch_sub(removed, atomic::Ordering::Relaxed);
-        Metrics::get()
-            .proxy
-            .http_pool_opened_connections
-            .get_metric()
-            .dec_by(removed as i64);
+        NUM_OPEN_CLIENTS_IN_HTTP_POOL.sub(removed as i64);
        conn
    }
 }
@@ -318,10 +303,7 @@ impl<C: ClientInnerExt> GlobalConnPool<C> {
        // acquire a random shard lock
        let mut shard = self.global_pool.shards()[shard].write();

-        let timer = Metrics::get()
-            .proxy
-            .http_pool_reclaimation_lag_seconds
-            .start_timer();
+        let timer = GC_LATENCY.start_timer();
        let current_len = shard.len();
        let mut clients_removed = 0;
        shard.retain(|endpoint, x| {
@@ -349,7 +331,7 @@ impl<C: ClientInnerExt> GlobalConnPool<C> {

        let new_len = shard.len();
        drop(shard);
-        timer.observe();
+        timer.observe_duration();

        // Do logging outside of the lock.
        if clients_removed > 0 {
@@ -357,11 +339,7 @@ impl<C: ClientInnerExt> GlobalConnPool<C> {
                .global_connections_count
                .fetch_sub(clients_removed, atomic::Ordering::Relaxed)
                - clients_removed;
-            Metrics::get()
-                .proxy
-                .http_pool_opened_connections
-                .get_metric()
-                .dec_by(clients_removed as i64);
+            NUM_OPEN_CLIENTS_IN_HTTP_POOL.sub(clients_removed as i64);
            info!("pool: performed global pool gc. removed {clients_removed} clients, total number of clients in pool is {size}");
        }
        let removed = current_len - new_len;
@@ -432,7 +410,7 @@ impl<C: ClientInnerExt> GlobalConnPool<C> {
            pools: HashMap::new(),
            total_conns: 0,
            max_conns: self.config.pool_options.max_conns_per_endpoint,
-            _guard: Metrics::get().proxy.http_endpoint_pools.guard(),
+            _guard: ENDPOINT_POOLS.guard(),
            global_connections_count: self.global_connections_count.clone(),
            global_pool_size_max_conns: self.config.pool_options.max_total_conns,
        }));
@@ -472,7 +450,9 @@ pub fn poll_client<C: ClientInnerExt>(
    conn_id: uuid::Uuid,
    aux: MetricsAuxInfo,
 ) -> Client<C> {
-    let conn_gauge = Metrics::get().proxy.db_connections.guard(ctx.protocol);
+    let conn_gauge = NUM_DB_CONNECTIONS_GAUGE
+        .with_label_values(&[ctx.protocol])
+        .guard();
    let mut session_id = ctx.session_id;
    let (tx, mut rx) = tokio::sync::watch::channel(session_id);

@@ -489,32 +469,15 @@ pub fn poll_client<C: ClientInnerExt>(

    let db_user = conn_info.db_and_user();
    let idle = global_pool.get_idle_timeout();
-    let cancel = CancellationToken::new();
-    let cancelled = cancel.clone().cancelled_owned();
-
    tokio::spawn(
    async move {
        let _conn_gauge = conn_gauge;
        let mut idle_timeout = pin!(tokio::time::sleep(idle));
-        let mut cancelled = pin!(cancelled);
-
        poll_fn(move |cx| {
-            if cancelled.as_mut().poll(cx).is_ready() {
-                info!("connection dropped");
-                return Poll::Ready(())
-            }
-
-            match rx.has_changed() {
-                Ok(true) => {
-                    session_id = *rx.borrow_and_update();
-                    info!(%session_id, "changed session");
-                    idle_timeout.as_mut().reset(Instant::now() + idle);
-                }
-                Err(_) => {
-                    info!("connection dropped");
-                    return Poll::Ready(())
-                }
-                _ => {}
+            if matches!(rx.has_changed(), Ok(true)) {
+                session_id = *rx.borrow_and_update();
+                info!(%session_id, "changed session");
+                idle_timeout.as_mut().reset(Instant::now() + idle);
            }

            // 5 minute idle connection timeout
@@ -569,7 +532,6 @@ pub fn poll_client<C: ClientInnerExt>(
    let inner = ClientInner {
        inner: client,
        session: tx,
-        cancel,
        aux,
        conn_id,
    };
@@ -579,18 +541,10 @@ pub fn poll_client<C: ClientInnerExt>(
 struct ClientInner<C: ClientInnerExt> {
    inner: C,
    session: tokio::sync::watch::Sender<uuid::Uuid>,
-    cancel: CancellationToken,
    aux: MetricsAuxInfo,
    conn_id: uuid::Uuid,
 }

-impl<C: ClientInnerExt> Drop for ClientInner<C> {
-    fn drop(&mut self) {
-        // on client drop, tell the conn to shut down
-        self.cancel.cancel();
-    }
-}
-
 pub trait ClientInnerExt: Sync + Send + 'static {
    fn is_closed(&self) -> bool;
    fn get_process_id(&self) -> i32;
@@ -743,7 +697,6 @@ mod tests {
        ClientInner {
            inner: client,
            session: tokio::sync::watch::Sender::new(uuid::Uuid::new_v4()),
-            cancel: CancellationToken::new(),
            aux: MetricsAuxInfo {
                endpoint_id: (&EndpointId::from("endpoint")).into(),
                project_id: (&ProjectId::from("project")).into(),
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -43,8 +43,8 @@ use crate::context::RequestMonitoring;
 use crate::error::ErrorKind;
 use crate::error::ReportableError;
 use crate::error::UserFacingError;
-use crate::metrics::HttpDirection;
-use crate::metrics::Metrics;
+use crate::metrics::HTTP_CONTENT_LENGTH;
+use crate::metrics::NUM_CONNECTION_REQUESTS_GAUGE;
 use crate::proxy::run_until_cancelled;
 use crate::proxy::NeonOptions;
 use crate::serverless::backend::HttpConnError;
@@ -494,11 +494,10 @@ async fn handle_inner(
    request: Request<Incoming>,
    backend: Arc<PoolingBackend>,
 ) -> Result<Response<Full<Bytes>>, SqlOverHttpError> {
-    let _requeset_gauge = Metrics::get().proxy.connection_requests.guard(ctx.protocol);
-    info!(
-        protocol = %ctx.protocol,
-        "handling interactive connection from client"
-    );
+    let _request_gauge = NUM_CONNECTION_REQUESTS_GAUGE
+        .with_label_values(&[ctx.protocol])
+        .guard();
+    info!("handling interactive connection from client");

    //
    // Determine the destination and connection params
@@ -521,10 +520,9 @@ async fn handle_inner(
        None => MAX_REQUEST_SIZE + 1,
    };
    info!(request_content_length, "request size in bytes");
-    Metrics::get()
-        .proxy
-        .http_conn_content_length_bytes
-        .observe(HttpDirection::Request, request_content_length as f64);
+    HTTP_CONTENT_LENGTH
+        .with_label_values(&["request"])
+        .observe(request_content_length as f64);

    // we don't have a streaming request support yet so this is to prevent OOM
    // from a malicious user sending an extremely large request body
@@ -609,10 +607,9 @@ async fn handle_inner(
    // count the egress bytes - we miss the TLS and header overhead but oh well...
    // moving this later in the stack is going to be a lot of effort and ehhhh
    metrics.record_egress(len as u64);
-    Metrics::get()
-        .proxy
-        .http_conn_content_length_bytes
-        .observe(HttpDirection::Response, len as f64);
+    HTTP_CONTENT_LENGTH
+        .with_label_values(&["response"])
+        .observe(len as f64);

    Ok(response)
 }
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -3,7 +3,7 @@ use crate::{
    config::ProxyConfig,
    context::RequestMonitoring,
    error::{io_error, ReportableError},
-    metrics::Metrics,
+    metrics::NUM_CLIENT_CONNECTION_GAUGE,
    proxy::{handle_client, ClientMode},
    rate_limiter::EndpointRateLimiter,
 };
@@ -139,10 +139,9 @@ pub async fn serve_websocket(
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
    let websocket = websocket.await?;
-    let conn_gauge = Metrics::get()
-        .proxy
-        .client_connections
-        .guard(crate::metrics::Protocol::Ws);
+    let conn_gauge = NUM_CLIENT_CONNECTION_GAUGE
+        .with_label_values(&["ws"])
+        .guard();

    let res = handle_client(
        config,
--- a/proxy/src/stream.rs
+++ b/proxy/src/stream.rs
@@ -1,6 +1,6 @@
 use crate::config::TlsServerEndPoint;
 use crate::error::{ErrorKind, ReportableError, UserFacingError};
-use crate::metrics::Metrics;
+use crate::metrics::TLS_HANDSHAKE_FAILURES;
 use bytes::BytesMut;

 use pq_proto::framed::{ConnectionError, Framed};
@@ -228,7 +228,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Stream<S> {
            Stream::Raw { raw } => Ok(tokio_rustls::TlsAcceptor::from(cfg)
                .accept(raw)
                .await
-                .inspect_err(|_| Metrics::get().proxy.tls_handshake_failures.inc())?),
+                .inspect_err(|_| TLS_HANDSHAKE_FAILURES.inc())?),
            Stream::Tls { .. } => Err(StreamUpgradeError::AlreadyTls),
        }
    }
--- a/storage_controller/src/compute_hook.rs
+++ b/storage_controller/src/compute_hook.rs
@@ -17,8 +17,6 @@ use crate::service::Config;

 const SLOWDOWN_DELAY: Duration = Duration::from_secs(5);

-const NOTIFY_REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
-
 pub(crate) const API_CONCURRENCY: usize = 32;

 struct UnshardedComputeHookTenant {
@@ -244,10 +242,6 @@ pub(super) struct ComputeHook {

    // This lock is only used in testing enviroments, to serialize calls into neon_lock
    neon_local_lock: tokio::sync::Mutex<()>,
-
-    // We share a client across all notifications to enable connection re-use etc when
-    // sending large numbers of notifications
-    client: reqwest::Client,
 }

 impl ComputeHook {
@@ -257,18 +251,12 @@ impl ComputeHook {
            .clone()
            .map(|jwt| format!("Bearer {}", jwt));

-        let client = reqwest::ClientBuilder::new()
-            .timeout(NOTIFY_REQUEST_TIMEOUT)
-            .build()
-            .expect("Failed to construct HTTP client");
-
        Self {
            state: Default::default(),
            config,
            authorization_header,
            neon_local_lock: Default::default(),
            api_concurrency: tokio::sync::Semaphore::new(API_CONCURRENCY),
-            client,
        }
    }

@@ -322,11 +310,12 @@ impl ComputeHook {

    async fn do_notify_iteration(
        &self,
+        client: &reqwest::Client,
        url: &String,
        reconfigure_request: &ComputeHookNotifyRequest,
        cancel: &CancellationToken,
    ) -> Result<(), NotifyError> {
-        let req = self.client.request(Method::PUT, url);
+        let req = client.request(Method::PUT, url);
        let req = if let Some(value) = &self.authorization_header {
            req.header(reqwest::header::AUTHORIZATION, value)
        } else {
@@ -392,6 +381,8 @@ impl ComputeHook {
        reconfigure_request: &ComputeHookNotifyRequest,
        cancel: &CancellationToken,
    ) -> Result<(), NotifyError> {
+        let client = reqwest::Client::new();
+
        // We hold these semaphore units across all retries, rather than only across each
        // HTTP request: this is to preserve fairness and avoid a situation where a retry might
        // time out waiting for a semaphore.
@@ -403,7 +394,7 @@ impl ComputeHook {
            .map_err(|_| NotifyError::ShuttingDown)?;

        backoff::retry(
-            || self.do_notify_iteration(url, reconfigure_request, cancel),
+            || self.do_notify_iteration(&client, url, reconfigure_request, cancel),
            |e| {
                matches!(
                    e,
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -192,6 +192,9 @@ def test_backward_compatibility(
    assert not breaking_changes_allowed, "Breaking changes are allowed by ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"


+# Forward compatibility is broken due to https://github.com/neondatabase/neon/pull/6530
+# The test is disabled until the next release deployment
+@pytest.mark.xfail
@check_ondisk_data_compatibility_if_enabled
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,5 +1,5 @@
 {
-  "postgres-v16": "261497dd63ace434045058b1453bcbaaa83f23e5",
-  "postgres-v15": "85d809c124a898847a97d66a211f7d5ef4f8e0cb",
-  "postgres-v14": "d9149dc59abcbeeb26293707509aef51752db28f"
+  "postgres-v16": "3946b2e2ea71d07af092099cb5bcae76a69b90d6",
+  "postgres-v15": "64b8c7bccc6b77e04795e2d4cf6ad82dc8d987ed",
+  "postgres-v14": "a7b4c66156bce00afa60e5592d4284ba9e40b4cf"
 }
Author	SHA1	Message	Date
Anna Khanova	de4449281d	Fix	2024-04-10 11:51:10 +02:00
Anna Khanova	2876eaba61	Proper fix	2024-04-10 11:48:44 +02:00