make use of esc string opt

This commit is contained in:
Conrad Ludgate
2025-07-17 12:21:24 +01:00
parent d6f4dc4949
commit ce6bbca8d7
6 changed files with 66 additions and 47 deletions

View File

@@ -19,7 +19,7 @@ struct Bar {
pub fn escape(c: &mut Criterion) {
c.bench_function("small", |b| bench_json_encode_inner(b, "object_key"));
c.bench_function("small_static", |b| {
bench_json_encode_inner(b, &json::EscapedStr::from_static("object_key"));
bench_json_encode_inner(b, json::esc!("object_key"));
});
c.bench_function("large_fmt", |b| {
let value = Foo {

View File

@@ -278,14 +278,14 @@ impl<'buf> ListSer<'buf> {
#[cfg(test)]
mod tests {
use crate::{Null, ValueSer};
use crate::{Null, ValueSer, esc};
#[test]
fn object() {
let mut buf = vec![];
let mut object = ValueSer::new(&mut buf).object();
object.entry("foo", "bar");
object.entry("baz", Null);
object.entry(esc!("foo"), "bar");
object.entry(esc!("baz"), Null);
object.finish();
assert_eq!(buf, br#"{"foo":"bar","baz":null}"#);
@@ -306,8 +306,8 @@ mod tests {
fn object_macro() {
let res = crate::value_to_string!(|obj| {
crate::value_as_object!(|obj| {
obj.entry("foo", "bar");
obj.entry("baz", Null);
obj.entry(esc!("foo"), "bar");
obj.entry(esc!("baz"), Null);
})
});
@@ -363,7 +363,7 @@ mod tests {
let entry = obj.key("2");
let entry = {
let mut nested_obj = entry.object();
nested_obj.entry("foo", "bar");
nested_obj.entry(esc!("foo"), "bar");
nested_obj.rollback()
};

View File

@@ -84,3 +84,11 @@ macro_rules! value_as_list {
res
}};
}
/// A helper macro that ensures the provided string literal does not need escaping.
#[macro_export]
macro_rules! esc {
($str:literal) => {
const { $crate::EscapedStr::from_static($str) }
};
}

View File

@@ -8,14 +8,12 @@
//!
//! With modifications by Conrad Ludgate on behalf of Databricks.
use std::{
borrow::Cow,
fmt::{self, Write},
};
use std::fmt::{self, Write};
use crate::{KeyEncoder, ValueEncoder, ValueSer};
pub struct EscapedStr(Cow<'static, [u8]>);
#[repr(transparent)]
pub struct EscapedStr([u8]);
impl EscapedStr {
/// Assumes the string does not need escaping.
@@ -23,31 +21,37 @@ impl EscapedStr {
/// # Panics
///
/// This will panic if the string does need escaping.
pub const fn from_static(s: &'static str) -> Self {
#[inline(always)]
pub const fn from_static(s: &'static str) -> &'static Self {
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
let byte = bytes[i];
let escape = ESCAPE[byte as usize];
assert!(escape == 0, "a character in the string needed escaping");
if byte < 0x20 || byte == b'"' || byte == b'\\' {
panic!("the string needs escaping");
}
i += 1;
}
Self(Cow::Borrowed(bytes))
// safety: this EscapedStr is transparent over [u8].
unsafe { std::mem::transmute::<&[u8], &EscapedStr>(bytes) }
}
/// Escapes the string eagerly.
pub fn escape(s: &str) -> Self {
pub fn escape(s: &str) -> Box<Self> {
let mut writer = Vec::with_capacity(s.len());
Collect { buf: &mut writer }
.write_str(s)
.expect("formatting should not error");
Self(Cow::Owned(writer))
let bytes = writer.into_boxed_slice();
// safety: this EscapedStr is transparent over [u8].
unsafe { std::mem::transmute::<Box<[u8]>, Box<EscapedStr>>(bytes) }
}
}

View File

@@ -552,21 +552,23 @@ impl EventFormatter {
let serializer = json::ValueSer::new(&mut self.logline_buffer);
json::value_as_object!(|serializer| {
// Timestamp comes first, so raw lines can be sorted by timestamp.
serializer.entry("timestamp", &*timestamp);
serializer.entry(json::esc!("timestamp"), &*timestamp);
// Level next.
serializer.entry("level", meta.level().as_str());
serializer.entry(json::esc!("level"), meta.level().as_str());
// Message next.
let mut message_extractor =
MessageFieldExtractor::new(serializer.key("message"), skipped_field_indices);
let mut message_extractor = MessageFieldExtractor::new(
serializer.key(json::esc!("message")),
skipped_field_indices,
);
event.record(&mut message_extractor);
message_extractor.finish();
// Direct message fields.
{
let mut message_skipper = MessageFieldSkipper::new(
serializer.key("fields").object(),
serializer.key(json::esc!("fields")).object(),
skipped_field_indices,
);
event.record(&mut message_skipper);
@@ -579,7 +581,7 @@ impl EventFormatter {
let mut extracted = ExtractedSpanFields::new(extract_fields);
let spans = serializer.key("spans");
let spans = serializer.key(json::esc!("spans"));
json::value_as_object!(|spans| {
let parent_spans = ctx
.event_span(event)
@@ -599,6 +601,8 @@ impl EventFormatter {
json::value_as_object!(|span_fields| {
for (field, value) in std::iter::zip(span.metadata().fields(), values) {
if let Some(value) = value {
// we don't use entry syntax here, as that's intended for literal keys only.
// the field name might need escaping, and entry would panic in that case.
span_fields.entry(field.name(), value);
}
}
@@ -610,37 +614,37 @@ impl EventFormatter {
let pid = std::process::id();
// Skip adding pid 1 to reduce noise for services running in containers.
if pid != 1 {
serializer.entry("process_id", pid);
serializer.entry(json::esc!("process_id"), pid);
}
THREAD_ID.with(|tid| serializer.entry("thread_id", tid));
THREAD_ID.with(|tid| serializer.entry(json::esc!("thread_id"), tid));
// TODO: tls cache? name could change
if let Some(thread_name) = std::thread::current().name()
&& !thread_name.is_empty()
&& thread_name != "tokio-runtime-worker"
{
serializer.entry("thread_name", thread_name);
serializer.entry(json::esc!("thread_name"), thread_name);
}
if let Some(task_id) = tokio::task::try_id() {
serializer.entry("task_id", format_args!("{task_id}"));
serializer.entry(json::esc!("task_id"), format_args!("{task_id}"));
}
serializer.entry("target", meta.target());
serializer.entry(json::esc!("target"), meta.target());
// Skip adding module if it's the same as target.
if let Some(module) = meta.module_path()
&& module != meta.target()
{
serializer.entry("module", module);
serializer.entry(json::esc!("module"), module);
}
if let Some(file) = meta.file() {
if let Some(line) = meta.line() {
serializer.entry("src", format_args!("{file}:{line}"));
serializer.entry(json::esc!("src"), format_args!("{file}:{line}"));
} else {
serializer.entry("src", file);
serializer.entry(json::esc!("src"), file);
}
}
@@ -649,13 +653,16 @@ impl EventFormatter {
let otel_spanref = otel_context.span();
let span_context = otel_spanref.span_context();
if span_context.is_valid() {
serializer.entry("trace_id", format_args!("{}", span_context.trace_id()));
serializer.entry(
json::esc!("trace_id"),
format_args!("{}", span_context.trace_id()),
);
}
}
if extracted.has_values() {
// TODO: add fields from event, too?
let extract = serializer.key("extract");
let extract = serializer.key(json::esc!("extract"));
json::value_as_object!(|extract| {
for (key, value) in std::iter::zip(extracted.names, extracted.values) {
if let Some(value) = value {

View File

@@ -875,7 +875,7 @@ async fn query_batch_to_json(
headers: HttpHeaders,
) -> Result<String, SqlOverHttpError> {
let json_output = json::value_to_string!(|obj| json::value_as_object!(|obj| {
let results = obj.key("results");
let results = obj.key(json::esc!("results"));
json::value_as_list!(|results| {
query_batch(config, cancel, tx, queries, headers, results).await?;
});
@@ -900,17 +900,17 @@ async fn query_to_json<T: GenericClient>(
.map_err(SqlOverHttpError::Postgres)?;
let query_acknowledged = Instant::now();
let mut json_fields = output.key("fields").list();
let mut json_fields = output.key(json::esc!("fields")).list();
for c in row_stream.statement.columns() {
let json_field = json_fields.entry();
json::value_as_object!(|json_field| {
json_field.entry("name", c.name());
json_field.entry("dataTypeID", c.type_().oid());
json_field.entry("tableID", c.table_oid());
json_field.entry("columnID", c.column_id());
json_field.entry("dataTypeSize", c.type_size());
json_field.entry("dataTypeModifier", c.type_modifier());
json_field.entry("format", "text");
json_field.entry(json::esc!("name"), c.name());
json_field.entry(json::esc!("dataTypeID"), c.type_().oid());
json_field.entry(json::esc!("tableID"), c.table_oid());
json_field.entry(json::esc!("columnID"), c.column_id());
json_field.entry(json::esc!("dataTypeSize"), c.type_size());
json_field.entry(json::esc!("dataTypeModifier"), c.type_modifier());
json_field.entry(json::esc!("format"), "text");
});
}
json_fields.finish();
@@ -922,7 +922,7 @@ async fn query_to_json<T: GenericClient>(
// around to get a command tag. Also check that the response is not too
// big.
let mut rows = 0;
let mut json_rows = output.key("rows").list();
let mut json_rows = output.key(json::esc!("rows")).list();
while let Some(row) = row_stream.next().await {
let row = row.map_err(SqlOverHttpError::Postgres)?;
@@ -971,9 +971,9 @@ async fn query_to_json<T: GenericClient>(
"finished executing query"
);
output.entry("command", command_tag_name);
output.entry("rowCount", command_tag_count);
output.entry("rowAsArray", array_mode);
output.entry(json::esc!("command"), command_tag_name);
output.entry(json::esc!("rowCount"), command_tag_count);
output.entry(json::esc!("rowAsArray"), array_mode);
output.finish();
Ok(ready)