From ce6bbca8d7cd6eec44c11269fe9aa4484316dde9 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Thu, 17 Jul 2025 12:21:24 +0100 Subject: [PATCH] make use of esc string opt --- libs/proxy/json/benches/escape.rs | 2 +- libs/proxy/json/src/lib.rs | 12 ++++----- libs/proxy/json/src/macros.rs | 8 ++++++ libs/proxy/json/src/str.rs | 26 ++++++++++-------- proxy/src/logging.rs | 39 ++++++++++++++++----------- proxy/src/serverless/sql_over_http.rs | 26 +++++++++--------- 6 files changed, 66 insertions(+), 47 deletions(-) diff --git a/libs/proxy/json/benches/escape.rs b/libs/proxy/json/benches/escape.rs index 43571b52bd..f8b4a15f3b 100644 --- a/libs/proxy/json/benches/escape.rs +++ b/libs/proxy/json/benches/escape.rs @@ -19,7 +19,7 @@ struct Bar { pub fn escape(c: &mut Criterion) { c.bench_function("small", |b| bench_json_encode_inner(b, "object_key")); c.bench_function("small_static", |b| { - bench_json_encode_inner(b, &json::EscapedStr::from_static("object_key")); + bench_json_encode_inner(b, json::esc!("object_key")); }); c.bench_function("large_fmt", |b| { let value = Foo { diff --git a/libs/proxy/json/src/lib.rs b/libs/proxy/json/src/lib.rs index b4b2cf1a68..77b8374b92 100644 --- a/libs/proxy/json/src/lib.rs +++ b/libs/proxy/json/src/lib.rs @@ -278,14 +278,14 @@ impl<'buf> ListSer<'buf> { #[cfg(test)] mod tests { - use crate::{Null, ValueSer}; + use crate::{Null, ValueSer, esc}; #[test] fn object() { let mut buf = vec![]; let mut object = ValueSer::new(&mut buf).object(); - object.entry("foo", "bar"); - object.entry("baz", Null); + object.entry(esc!("foo"), "bar"); + object.entry(esc!("baz"), Null); object.finish(); assert_eq!(buf, br#"{"foo":"bar","baz":null}"#); @@ -306,8 +306,8 @@ mod tests { fn object_macro() { let res = crate::value_to_string!(|obj| { crate::value_as_object!(|obj| { - obj.entry("foo", "bar"); - obj.entry("baz", Null); + obj.entry(esc!("foo"), "bar"); + obj.entry(esc!("baz"), Null); }) }); @@ -363,7 +363,7 @@ mod tests { let entry = obj.key("2"); let entry = { let mut nested_obj = entry.object(); - nested_obj.entry("foo", "bar"); + nested_obj.entry(esc!("foo"), "bar"); nested_obj.rollback() }; diff --git a/libs/proxy/json/src/macros.rs b/libs/proxy/json/src/macros.rs index d3b5cfed10..b83b3d4755 100644 --- a/libs/proxy/json/src/macros.rs +++ b/libs/proxy/json/src/macros.rs @@ -84,3 +84,11 @@ macro_rules! value_as_list { res }}; } + +/// A helper macro that ensures the provided string literal does not need escaping. +#[macro_export] +macro_rules! esc { + ($str:literal) => { + const { $crate::EscapedStr::from_static($str) } + }; +} diff --git a/libs/proxy/json/src/str.rs b/libs/proxy/json/src/str.rs index 67fb2950b1..115f238ff4 100644 --- a/libs/proxy/json/src/str.rs +++ b/libs/proxy/json/src/str.rs @@ -8,14 +8,12 @@ //! //! With modifications by Conrad Ludgate on behalf of Databricks. -use std::{ - borrow::Cow, - fmt::{self, Write}, -}; +use std::fmt::{self, Write}; use crate::{KeyEncoder, ValueEncoder, ValueSer}; -pub struct EscapedStr(Cow<'static, [u8]>); +#[repr(transparent)] +pub struct EscapedStr([u8]); impl EscapedStr { /// Assumes the string does not need escaping. @@ -23,31 +21,37 @@ impl EscapedStr { /// # Panics /// /// This will panic if the string does need escaping. - pub const fn from_static(s: &'static str) -> Self { + #[inline(always)] + pub const fn from_static(s: &'static str) -> &'static Self { let bytes = s.as_bytes(); let mut i = 0; while i < bytes.len() { let byte = bytes[i]; - let escape = ESCAPE[byte as usize]; - assert!(escape == 0, "a character in the string needed escaping"); + if byte < 0x20 || byte == b'"' || byte == b'\\' { + panic!("the string needs escaping"); + } i += 1; } - Self(Cow::Borrowed(bytes)) + // safety: this EscapedStr is transparent over [u8]. + unsafe { std::mem::transmute::<&[u8], &EscapedStr>(bytes) } } /// Escapes the string eagerly. - pub fn escape(s: &str) -> Self { + pub fn escape(s: &str) -> Box { let mut writer = Vec::with_capacity(s.len()); Collect { buf: &mut writer } .write_str(s) .expect("formatting should not error"); - Self(Cow::Owned(writer)) + let bytes = writer.into_boxed_slice(); + + // safety: this EscapedStr is transparent over [u8]. + unsafe { std::mem::transmute::, Box>(bytes) } } } diff --git a/proxy/src/logging.rs b/proxy/src/logging.rs index d4fd826c13..cff08f0a85 100644 --- a/proxy/src/logging.rs +++ b/proxy/src/logging.rs @@ -552,21 +552,23 @@ impl EventFormatter { let serializer = json::ValueSer::new(&mut self.logline_buffer); json::value_as_object!(|serializer| { // Timestamp comes first, so raw lines can be sorted by timestamp. - serializer.entry("timestamp", &*timestamp); + serializer.entry(json::esc!("timestamp"), &*timestamp); // Level next. - serializer.entry("level", meta.level().as_str()); + serializer.entry(json::esc!("level"), meta.level().as_str()); // Message next. - let mut message_extractor = - MessageFieldExtractor::new(serializer.key("message"), skipped_field_indices); + let mut message_extractor = MessageFieldExtractor::new( + serializer.key(json::esc!("message")), + skipped_field_indices, + ); event.record(&mut message_extractor); message_extractor.finish(); // Direct message fields. { let mut message_skipper = MessageFieldSkipper::new( - serializer.key("fields").object(), + serializer.key(json::esc!("fields")).object(), skipped_field_indices, ); event.record(&mut message_skipper); @@ -579,7 +581,7 @@ impl EventFormatter { let mut extracted = ExtractedSpanFields::new(extract_fields); - let spans = serializer.key("spans"); + let spans = serializer.key(json::esc!("spans")); json::value_as_object!(|spans| { let parent_spans = ctx .event_span(event) @@ -599,6 +601,8 @@ impl EventFormatter { json::value_as_object!(|span_fields| { for (field, value) in std::iter::zip(span.metadata().fields(), values) { if let Some(value) = value { + // we don't use entry syntax here, as that's intended for literal keys only. + // the field name might need escaping, and entry would panic in that case. span_fields.entry(field.name(), value); } } @@ -610,37 +614,37 @@ impl EventFormatter { let pid = std::process::id(); // Skip adding pid 1 to reduce noise for services running in containers. if pid != 1 { - serializer.entry("process_id", pid); + serializer.entry(json::esc!("process_id"), pid); } - THREAD_ID.with(|tid| serializer.entry("thread_id", tid)); + THREAD_ID.with(|tid| serializer.entry(json::esc!("thread_id"), tid)); // TODO: tls cache? name could change if let Some(thread_name) = std::thread::current().name() && !thread_name.is_empty() && thread_name != "tokio-runtime-worker" { - serializer.entry("thread_name", thread_name); + serializer.entry(json::esc!("thread_name"), thread_name); } if let Some(task_id) = tokio::task::try_id() { - serializer.entry("task_id", format_args!("{task_id}")); + serializer.entry(json::esc!("task_id"), format_args!("{task_id}")); } - serializer.entry("target", meta.target()); + serializer.entry(json::esc!("target"), meta.target()); // Skip adding module if it's the same as target. if let Some(module) = meta.module_path() && module != meta.target() { - serializer.entry("module", module); + serializer.entry(json::esc!("module"), module); } if let Some(file) = meta.file() { if let Some(line) = meta.line() { - serializer.entry("src", format_args!("{file}:{line}")); + serializer.entry(json::esc!("src"), format_args!("{file}:{line}")); } else { - serializer.entry("src", file); + serializer.entry(json::esc!("src"), file); } } @@ -649,13 +653,16 @@ impl EventFormatter { let otel_spanref = otel_context.span(); let span_context = otel_spanref.span_context(); if span_context.is_valid() { - serializer.entry("trace_id", format_args!("{}", span_context.trace_id())); + serializer.entry( + json::esc!("trace_id"), + format_args!("{}", span_context.trace_id()), + ); } } if extracted.has_values() { // TODO: add fields from event, too? - let extract = serializer.key("extract"); + let extract = serializer.key(json::esc!("extract")); json::value_as_object!(|extract| { for (key, value) in std::iter::zip(extracted.names, extracted.values) { if let Some(value) = value { diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index 8a14f804b6..0e85c03efd 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -875,7 +875,7 @@ async fn query_batch_to_json( headers: HttpHeaders, ) -> Result { let json_output = json::value_to_string!(|obj| json::value_as_object!(|obj| { - let results = obj.key("results"); + let results = obj.key(json::esc!("results")); json::value_as_list!(|results| { query_batch(config, cancel, tx, queries, headers, results).await?; }); @@ -900,17 +900,17 @@ async fn query_to_json( .map_err(SqlOverHttpError::Postgres)?; let query_acknowledged = Instant::now(); - let mut json_fields = output.key("fields").list(); + let mut json_fields = output.key(json::esc!("fields")).list(); for c in row_stream.statement.columns() { let json_field = json_fields.entry(); json::value_as_object!(|json_field| { - json_field.entry("name", c.name()); - json_field.entry("dataTypeID", c.type_().oid()); - json_field.entry("tableID", c.table_oid()); - json_field.entry("columnID", c.column_id()); - json_field.entry("dataTypeSize", c.type_size()); - json_field.entry("dataTypeModifier", c.type_modifier()); - json_field.entry("format", "text"); + json_field.entry(json::esc!("name"), c.name()); + json_field.entry(json::esc!("dataTypeID"), c.type_().oid()); + json_field.entry(json::esc!("tableID"), c.table_oid()); + json_field.entry(json::esc!("columnID"), c.column_id()); + json_field.entry(json::esc!("dataTypeSize"), c.type_size()); + json_field.entry(json::esc!("dataTypeModifier"), c.type_modifier()); + json_field.entry(json::esc!("format"), "text"); }); } json_fields.finish(); @@ -922,7 +922,7 @@ async fn query_to_json( // around to get a command tag. Also check that the response is not too // big. let mut rows = 0; - let mut json_rows = output.key("rows").list(); + let mut json_rows = output.key(json::esc!("rows")).list(); while let Some(row) = row_stream.next().await { let row = row.map_err(SqlOverHttpError::Postgres)?; @@ -971,9 +971,9 @@ async fn query_to_json( "finished executing query" ); - output.entry("command", command_tag_name); - output.entry("rowCount", command_tag_count); - output.entry("rowAsArray", array_mode); + output.entry(json::esc!("command"), command_tag_name); + output.entry(json::esc!("rowCount"), command_tag_count); + output.entry(json::esc!("rowAsArray"), array_mode); output.finish(); Ok(ready)