Handle escaped dot in json path in the QueryParser. (#1682)

This commit is contained in:
Paul Masurel
2022-11-16 07:18:34 +09:00
committed by GitHub
parent ca6231170e
commit 2a39289a1b
3 changed files with 126 additions and 6 deletions

View File

@@ -261,6 +261,39 @@ pub struct JsonTermWriter<'a> {
path_stack: Vec<usize>, path_stack: Vec<usize>,
} }
/// Splits a json path supplied to the query parser in such a way that
/// `.` can be escaped.
///
/// In other words,
/// - `k8s.node` ends up as `["k8s", "node"]`.
/// - `k8s\.node` ends up as `["k8s.node"]`.
fn split_json_path(json_path: &str) -> Vec<String> {
let mut escaped_state: bool = false;
let mut json_path_segments = Vec::new();
let mut buffer = String::new();
for ch in json_path.chars() {
if escaped_state {
buffer.push(ch);
escaped_state = false;
continue;
}
match ch {
'\\' => {
escaped_state = true;
}
'.' => {
let new_segment = std::mem::take(&mut buffer);
json_path_segments.push(new_segment);
}
_ => {
buffer.push(ch);
}
}
}
json_path_segments.push(buffer);
json_path_segments
}
impl<'a> JsonTermWriter<'a> { impl<'a> JsonTermWriter<'a> {
pub fn from_field_and_json_path( pub fn from_field_and_json_path(
field: Field, field: Field,
@@ -269,8 +302,8 @@ impl<'a> JsonTermWriter<'a> {
) -> Self { ) -> Self {
term_buffer.set_field_and_type(field, Type::Json); term_buffer.set_field_and_type(field, Type::Json);
let mut json_term_writer = Self::wrap(term_buffer); let mut json_term_writer = Self::wrap(term_buffer);
for segment in json_path.split('.') { for segment in split_json_path(json_path) {
json_term_writer.push_path_segment(segment); json_term_writer.push_path_segment(&segment);
} }
json_term_writer json_term_writer
} }
@@ -350,7 +383,7 @@ impl<'a> JsonTermWriter<'a> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::JsonTermWriter; use super::{split_json_path, JsonTermWriter};
use crate::schema::{Field, Type}; use crate::schema::{Field, Type};
use crate::Term; use crate::Term;
@@ -495,4 +528,48 @@ mod tests {
json_writer.set_str("pink"); json_writer.set_str("pink");
assert_eq!(json_writer.path(), b"color\x01hue"); assert_eq!(json_writer.path(), b"color\x01hue");
} }
#[test]
fn test_split_json_path_simple() {
let json_path = split_json_path("titi.toto");
assert_eq!(&json_path, &["titi", "toto"]);
}
#[test]
fn test_split_json_path_single_segment() {
let json_path = split_json_path("toto");
assert_eq!(&json_path, &["toto"]);
}
#[test]
fn test_split_json_path_trailing_dot() {
let json_path = split_json_path("toto.");
assert_eq!(&json_path, &["toto", ""]);
}
#[test]
fn test_split_json_path_heading_dot() {
let json_path = split_json_path(".toto");
assert_eq!(&json_path, &["", "toto"]);
}
#[test]
fn test_split_json_path_escaped_dot() {
let json_path = split_json_path(r#"toto\.titi"#);
assert_eq!(&json_path, &["toto.titi"]);
let json_path_2 = split_json_path(r#"k8s\.container\.name"#);
assert_eq!(&json_path_2, &["k8s.container.name"]);
}
#[test]
fn test_split_json_path_escaped_backslash() {
let json_path = split_json_path(r#"toto\\titi"#);
assert_eq!(&json_path, &[r#"toto\titi"#]);
}
#[test]
fn test_split_json_path_escaped_normal_letter() {
let json_path = split_json_path(r#"toto\titi"#);
assert_eq!(&json_path, &[r#"tototiti"#]);
}
} }

View File

@@ -58,13 +58,15 @@ type AddBatchReceiver = channel::Receiver<AddBatch>;
#[cfg(feature = "mmap")] #[cfg(feature = "mmap")]
#[cfg(test)] #[cfg(test)]
mod tests_mmap { mod tests_mmap {
use crate::schema::{self, Schema}; use crate::collector::Count;
use crate::query::QueryParser;
use crate::schema::{Schema, STORED, TEXT};
use crate::{Index, Term}; use crate::{Index, Term};
#[test] #[test]
fn test_advance_delete_bug() -> crate::Result<()> { fn test_advance_delete_bug() -> crate::Result<()> {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", schema::TEXT); let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_from_tempdir(schema_builder.build())?; let index = Index::create_from_tempdir(schema_builder.build())?;
let mut index_writer = index.writer_for_tests()?; let mut index_writer = index.writer_for_tests()?;
// there must be one deleted document in the segment // there must be one deleted document in the segment
@@ -75,7 +77,26 @@ mod tests_mmap {
index_writer.add_document(doc!(text_field=>"c"))?; index_writer.add_document(doc!(text_field=>"c"))?;
} }
index_writer.commit()?; index_writer.commit()?;
index_writer.commit()?;
Ok(()) Ok(())
} }
#[test]
fn test_json_field_espace() {
let mut schema_builder = Schema::builder();
let json_field = schema_builder.add_json_field("json", TEXT | STORED);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests().unwrap();
let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
index_writer.add_document(doc!(json_field=>json)).unwrap();
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
assert_eq!(searcher.num_docs(), 1);
let parse_query = QueryParser::for_index(&index, Vec::new());
let query = parse_query
.parse_query(r#"json.k8s\.container\.name:prometheus"#)
.unwrap();
let num_docs = searcher.search(&query, &Count).unwrap();
assert_eq!(num_docs, 1);
}
} }

View File

@@ -1062,6 +1062,28 @@ mod test {
); );
} }
fn extract_query_term_json_path(query: &str) -> String {
let LogicalAst::Leaf(literal) = parse_query_to_logical_ast(query, false).unwrap() else {
panic!();
};
let LogicalLiteral::Term(term) = *literal else {
panic!();
};
std::str::from_utf8(term.value_bytes()).unwrap().to_string()
}
#[test]
fn test_json_field_query_with_espaced_dot() {
assert_eq!(
extract_query_term_json_path(r#"json.k8s.node.name:hello"#),
"k8s\u{1}node\u{1}name\0shello"
);
assert_eq!(
extract_query_term_json_path(r#"json.k8s\.node\.name:hello"#),
"k8s.node.name\0shello"
);
}
#[test] #[test]
fn test_json_field_possibly_a_number() { fn test_json_field_possibly_a_number() {
test_parse_query_to_logical_ast_helper( test_parse_query_to_logical_ast_helper(