From 9a081c230f6b1d2bff7fea1e201631a7e7ee4328 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Thu, 30 May 2024 12:02:38 +0100 Subject: [PATCH] proxy: lazily parse startup pg params (#7905) ## Problem proxy params being a `HashMap` when it contains just ``` application_name: psql database: neondb user: neondb_owner ``` is quite wasteful allocation wise. ## Summary of changes Keep the params in the wire protocol form, eg: ``` application_name\0psql\0database\0neondb\0user\0neondb_owner\0 ``` Using a linear search for the map is fast enough at small sizes, which is the normal case. --- Cargo.lock | 1 + libs/pq_proto/Cargo.toml | 1 + libs/pq_proto/src/lib.rs | 80 +++++++++++++++------------ proxy/src/serverless/sql_over_http.rs | 4 +- 4 files changed, 48 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 794486e2e1..44edbabaf6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4113,6 +4113,7 @@ version = "0.1.0" dependencies = [ "byteorder", "bytes", + "itertools", "pin-project-lite", "postgres-protocol", "rand 0.8.5", diff --git a/libs/pq_proto/Cargo.toml b/libs/pq_proto/Cargo.toml index 6eeb3bafef..8afabe670e 100644 --- a/libs/pq_proto/Cargo.toml +++ b/libs/pq_proto/Cargo.toml @@ -7,6 +7,7 @@ license.workspace = true [dependencies] bytes.workspace = true byteorder.workspace = true +itertools.workspace = true pin-project-lite.workspace = true postgres-protocol.workspace = true rand.workspace = true diff --git a/libs/pq_proto/src/lib.rs b/libs/pq_proto/src/lib.rs index f8e578c6f2..cee3742017 100644 --- a/libs/pq_proto/src/lib.rs +++ b/libs/pq_proto/src/lib.rs @@ -7,8 +7,9 @@ pub mod framed; use byteorder::{BigEndian, ReadBytesExt}; use bytes::{Buf, BufMut, Bytes, BytesMut}; +use itertools::Itertools; use serde::{Deserialize, Serialize}; -use std::{borrow::Cow, collections::HashMap, fmt, io, str}; +use std::{borrow::Cow, fmt, io, str}; // re-export for use in utils pageserver_feedback.rs pub use postgres_protocol::PG_EPOCH; @@ -50,20 +51,37 @@ pub enum FeStartupPacket { }, } +#[derive(Debug, Clone, Default)] +pub struct StartupMessageParamsBuilder { + params: BytesMut, +} + +impl StartupMessageParamsBuilder { + /// Set parameter's value by its name. + /// name and value must not contain a \0 byte + pub fn insert(&mut self, name: &str, value: &str) { + self.params.put(name.as_bytes()); + self.params.put(&b"\0"[..]); + self.params.put(value.as_bytes()); + self.params.put(&b"\0"[..]); + } + + pub fn freeze(self) -> StartupMessageParams { + StartupMessageParams { + params: self.params.freeze(), + } + } +} + #[derive(Debug, Clone, Default)] pub struct StartupMessageParams { - params: HashMap, + params: Bytes, } impl StartupMessageParams { - /// Set parameter's value by its name. - pub fn insert(&mut self, name: &str, value: &str) { - self.params.insert(name.to_owned(), value.to_owned()); - } - /// Get parameter's value by its name. pub fn get(&self, name: &str) -> Option<&str> { - self.params.get(name).map(|s| s.as_str()) + self.iter().find_map(|(k, v)| (k == name).then_some(v)) } /// Split command-line options according to PostgreSQL's logic, @@ -117,15 +135,19 @@ impl StartupMessageParams { /// Iterate through key-value pairs in an arbitrary order. pub fn iter(&self) -> impl Iterator { - self.params.iter().map(|(k, v)| (k.as_str(), v.as_str())) + let params = + std::str::from_utf8(&self.params).expect("should be validated as utf8 already"); + params.split_terminator('\0').tuples() } // This function is mostly useful in tests. #[doc(hidden)] pub fn new<'a, const N: usize>(pairs: [(&'a str, &'a str); N]) -> Self { - Self { - params: pairs.map(|(k, v)| (k.to_owned(), v.to_owned())).into(), + let mut b = StartupMessageParamsBuilder::default(); + for (k, v) in pairs { + b.insert(k, v) } + b.freeze() } } @@ -350,35 +372,21 @@ impl FeStartupPacket { (major_version, minor_version) => { // StartupMessage - // Parse pairs of null-terminated strings (key, value). - // See `postgres: ProcessStartupPacket, build_startup_packet`. - let mut tokens = str::from_utf8(&msg) - .map_err(|_e| { - ProtocolError::BadMessage("StartupMessage params: invalid utf-8".to_owned()) - })? - .strip_suffix('\0') // drop packet's own null - .ok_or_else(|| { - ProtocolError::Protocol( - "StartupMessage params: missing null terminator".to_string(), - ) - })? - .split_terminator('\0'); - - let mut params = HashMap::new(); - while let Some(name) = tokens.next() { - let value = tokens.next().ok_or_else(|| { - ProtocolError::Protocol( - "StartupMessage params: key without value".to_string(), - ) - })?; - - params.insert(name.to_owned(), value.to_owned()); - } + let s = str::from_utf8(&msg).map_err(|_e| { + ProtocolError::BadMessage("StartupMessage params: invalid utf-8".to_owned()) + })?; + let s = s.strip_suffix('\0').ok_or_else(|| { + ProtocolError::Protocol( + "StartupMessage params: missing null terminator".to_string(), + ) + })?; FeStartupPacket::StartupMessage { major_version, minor_version, - params: StartupMessageParams { params }, + params: StartupMessageParams { + params: msg.slice_ref(s.as_bytes()), + }, } } }; diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index 9a7cdc8577..9d6a475aeb 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -17,7 +17,7 @@ use hyper1::http::HeaderValue; use hyper1::Response; use hyper1::StatusCode; use hyper1::{HeaderMap, Request}; -use pq_proto::StartupMessageParams; +use pq_proto::StartupMessageParamsBuilder; use serde_json::json; use serde_json::Value; use tokio::time; @@ -193,7 +193,7 @@ fn get_conn_info( let mut options = Option::None; - let mut params = StartupMessageParams::default(); + let mut params = StartupMessageParamsBuilder::default(); params.insert("user", &username); params.insert("database", &dbname); for (key, value) in pairs {