From bef5954fd7b8ea43cac6f43a111d437cd7a360ad Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Thu, 8 May 2025 17:46:57 +0100 Subject: [PATCH] feat(proxy): track SNI usage by protocol, including for http (#11863) ## Problem We want to see how many users of the legacy serverless driver are still using the old URL for SQL-over-HTTP traffic. ## Summary of changes Adds a protocol field to the connections_by_sni metric. Ensures it's incremented for sql-over-http. --- proxy/src/auth/credentials.rs | 29 ++++++++++++++------------- proxy/src/metrics.rs | 15 +++++++++++--- proxy/src/serverless/mod.rs | 1 + proxy/src/serverless/sql_over_http.rs | 28 +++++++++++++++++++++++++- test_runner/fixtures/neon_fixtures.py | 8 ++++---- 5 files changed, 59 insertions(+), 22 deletions(-) diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs index 183976374a..526d0df7f2 100644 --- a/proxy/src/auth/credentials.rs +++ b/proxy/src/auth/credentials.rs @@ -12,9 +12,9 @@ use tracing::{debug, warn}; use crate::auth::password_hack::parse_endpoint_param; use crate::context::RequestContext; use crate::error::{ReportableError, UserFacingError}; -use crate::metrics::{Metrics, SniKind}; +use crate::metrics::{Metrics, SniGroup, SniKind}; use crate::proxy::NeonOptions; -use crate::serverless::SERVERLESS_DRIVER_SNI; +use crate::serverless::{AUTH_BROKER_SNI, SERVERLESS_DRIVER_SNI}; use crate::types::{EndpointId, RoleName}; #[derive(Debug, Error, PartialEq, Eq, Clone)] @@ -65,7 +65,7 @@ pub(crate) fn endpoint_sni(sni: &str, common_names: &HashSet) -> Option< if !common_names.contains(common_name) { return None; } - if subdomain == SERVERLESS_DRIVER_SNI { + if subdomain == SERVERLESS_DRIVER_SNI || subdomain == AUTH_BROKER_SNI { return None; } Some(EndpointId::from(subdomain)) @@ -128,22 +128,23 @@ impl ComputeUserInfoMaybeEndpoint { let metrics = Metrics::get(); debug!(%user, "credentials"); - if sni.is_some() { + + let protocol = ctx.protocol(); + let kind = if sni.is_some() { debug!("Connection with sni"); - metrics.proxy.accepted_connections_by_sni.inc(SniKind::Sni); + SniKind::Sni } else if endpoint.is_some() { - metrics - .proxy - .accepted_connections_by_sni - .inc(SniKind::NoSni); debug!("Connection without sni"); + SniKind::NoSni } else { - metrics - .proxy - .accepted_connections_by_sni - .inc(SniKind::PasswordHack); debug!("Connection with password hack"); - } + SniKind::PasswordHack + }; + + metrics + .proxy + .accepted_connections_by_sni + .inc(SniGroup { protocol, kind }); let options = NeonOptions::parse_params(params); diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs index e5fc0b724b..4b22c912eb 100644 --- a/proxy/src/metrics.rs +++ b/proxy/src/metrics.rs @@ -115,8 +115,8 @@ pub struct ProxyMetrics { #[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0]))] pub allowed_vpc_endpoint_ids: Histogram<10>, - /// Number of connections (per sni). - pub accepted_connections_by_sni: CounterVec>, + /// Number of connections, by the method we used to determine the endpoint. + pub accepted_connections_by_sni: CounterVec, /// Number of connection failures (per kind). pub connection_failures_total: CounterVec>, @@ -342,11 +342,20 @@ pub enum LatencyExclusions { ClientCplaneComputeRetry, } +#[derive(LabelGroup)] +#[label(set = SniSet)] +pub struct SniGroup { + pub protocol: Protocol, + pub kind: SniKind, +} + #[derive(FixedCardinalityLabel, Copy, Clone)] -#[label(singleton = "kind")] pub enum SniKind { + /// Domain name based routing. SNI for libpq/websockets. Host for HTTP Sni, + /// Metadata based routing. `options` for libpq/websockets. Header for HTTP NoSni, + /// Metadata based routing, using the password field. PasswordHack, } diff --git a/proxy/src/serverless/mod.rs b/proxy/src/serverless/mod.rs index 6f24ad3dec..2a7069b1c2 100644 --- a/proxy/src/serverless/mod.rs +++ b/proxy/src/serverless/mod.rs @@ -56,6 +56,7 @@ use crate::serverless::backend::PoolingBackend; use crate::serverless::http_util::{api_error_into_response, json_response}; pub(crate) const SERVERLESS_DRIVER_SNI: &str = "api"; +pub(crate) const AUTH_BROKER_SNI: &str = "apiauth"; pub async fn task_main( config: &'static ProxyConfig, diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index fee5942b7e..dfaeedaeae 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -38,7 +38,7 @@ use crate::config::{AuthenticationConfig, HttpConfig, ProxyConfig, TlsConfig}; use crate::context::RequestContext; use crate::error::{ErrorKind, ReportableError, UserFacingError}; use crate::http::{ReadBodyError, read_body_with_limit}; -use crate::metrics::{HttpDirection, Metrics}; +use crate::metrics::{HttpDirection, Metrics, SniGroup, SniKind}; use crate::proxy::{NeonOptions, run_until_cancelled}; use crate::serverless::backend::HttpConnError; use crate::types::{DbName, RoleName}; @@ -227,6 +227,32 @@ fn get_conn_info( } } + // check the URL that was used, for metrics + { + let host_endpoint = headers + // get the host header + .get("host") + // extract the domain + .and_then(|h| { + let (host, _port) = h.to_str().ok()?.split_once(':')?; + Some(host) + }) + // get the endpoint prefix + .map(|h| h.split_once('.').map_or(h, |(prefix, _)| prefix)); + + let kind = if host_endpoint == Some(&*endpoint) { + SniKind::Sni + } else { + SniKind::NoSni + }; + + let protocol = ctx.protocol(); + Metrics::get() + .proxy + .accepted_connections_by_sni + .inc(SniGroup { protocol, kind }); + } + ctx.set_user_agent( headers .get(hyper::header::USER_AGENT) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index aa468d9386..1b4562c0b3 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -3835,7 +3835,7 @@ class NeonAuthBroker: external_http_port: int, auth_backend: NeonAuthBroker.ProxyV1, ): - self.domain = "apiauth.local.neon.build" # resolves to 127.0.0.1 + self.domain = "local.neon.build" # resolves to 127.0.0.1 self.host = "127.0.0.1" self.http_port = http_port self.external_http_port = external_http_port @@ -3852,7 +3852,7 @@ class NeonAuthBroker: # generate key of it doesn't exist crt_path = self.test_output_dir / "proxy.crt" key_path = self.test_output_dir / "proxy.key" - generate_proxy_tls_certs("apiauth.local.neon.build", key_path, crt_path) + generate_proxy_tls_certs(f"apiauth.{self.domain}", key_path, crt_path) args = [ str(self.neon_binpath / "proxy"), @@ -3896,10 +3896,10 @@ class NeonAuthBroker: log.info(f"Executing http query: {query}") - connstr = f"postgresql://{user}@{self.domain}/postgres" + connstr = f"postgresql://{user}@ep-foo-bar-1234.{self.domain}/postgres" async with httpx.AsyncClient(verify=str(self.test_output_dir / "proxy.crt")) as client: response = await client.post( - f"https://{self.domain}:{self.external_http_port}/sql", + f"https://apiauth.{self.domain}:{self.external_http_port}/sql", json={"query": query, "params": args}, headers={ "Neon-Connection-String": connstr,