mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 17:42:55 +00:00
Handle escaped dot in json path in the QueryParser. (#1682)
This commit is contained in:
@@ -261,6 +261,39 @@ pub struct JsonTermWriter<'a> {
|
||||
path_stack: Vec<usize>,
|
||||
}
|
||||
|
||||
/// Splits a json path supplied to the query parser in such a way that
|
||||
/// `.` can be escaped.
|
||||
///
|
||||
/// In other words,
|
||||
/// - `k8s.node` ends up as `["k8s", "node"]`.
|
||||
/// - `k8s\.node` ends up as `["k8s.node"]`.
|
||||
fn split_json_path(json_path: &str) -> Vec<String> {
|
||||
let mut escaped_state: bool = false;
|
||||
let mut json_path_segments = Vec::new();
|
||||
let mut buffer = String::new();
|
||||
for ch in json_path.chars() {
|
||||
if escaped_state {
|
||||
buffer.push(ch);
|
||||
escaped_state = false;
|
||||
continue;
|
||||
}
|
||||
match ch {
|
||||
'\\' => {
|
||||
escaped_state = true;
|
||||
}
|
||||
'.' => {
|
||||
let new_segment = std::mem::take(&mut buffer);
|
||||
json_path_segments.push(new_segment);
|
||||
}
|
||||
_ => {
|
||||
buffer.push(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
json_path_segments.push(buffer);
|
||||
json_path_segments
|
||||
}
|
||||
|
||||
impl<'a> JsonTermWriter<'a> {
|
||||
pub fn from_field_and_json_path(
|
||||
field: Field,
|
||||
@@ -269,8 +302,8 @@ impl<'a> JsonTermWriter<'a> {
|
||||
) -> Self {
|
||||
term_buffer.set_field_and_type(field, Type::Json);
|
||||
let mut json_term_writer = Self::wrap(term_buffer);
|
||||
for segment in json_path.split('.') {
|
||||
json_term_writer.push_path_segment(segment);
|
||||
for segment in split_json_path(json_path) {
|
||||
json_term_writer.push_path_segment(&segment);
|
||||
}
|
||||
json_term_writer
|
||||
}
|
||||
@@ -350,7 +383,7 @@ impl<'a> JsonTermWriter<'a> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::JsonTermWriter;
|
||||
use super::{split_json_path, JsonTermWriter};
|
||||
use crate::schema::{Field, Type};
|
||||
use crate::Term;
|
||||
|
||||
@@ -495,4 +528,48 @@ mod tests {
|
||||
json_writer.set_str("pink");
|
||||
assert_eq!(json_writer.path(), b"color\x01hue");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_json_path_simple() {
|
||||
let json_path = split_json_path("titi.toto");
|
||||
assert_eq!(&json_path, &["titi", "toto"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_json_path_single_segment() {
|
||||
let json_path = split_json_path("toto");
|
||||
assert_eq!(&json_path, &["toto"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_json_path_trailing_dot() {
|
||||
let json_path = split_json_path("toto.");
|
||||
assert_eq!(&json_path, &["toto", ""]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_json_path_heading_dot() {
|
||||
let json_path = split_json_path(".toto");
|
||||
assert_eq!(&json_path, &["", "toto"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_json_path_escaped_dot() {
|
||||
let json_path = split_json_path(r#"toto\.titi"#);
|
||||
assert_eq!(&json_path, &["toto.titi"]);
|
||||
let json_path_2 = split_json_path(r#"k8s\.container\.name"#);
|
||||
assert_eq!(&json_path_2, &["k8s.container.name"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_json_path_escaped_backslash() {
|
||||
let json_path = split_json_path(r#"toto\\titi"#);
|
||||
assert_eq!(&json_path, &[r#"toto\titi"#]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_json_path_escaped_normal_letter() {
|
||||
let json_path = split_json_path(r#"toto\titi"#);
|
||||
assert_eq!(&json_path, &[r#"tototiti"#]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,13 +58,15 @@ type AddBatchReceiver = channel::Receiver<AddBatch>;
|
||||
#[cfg(feature = "mmap")]
|
||||
#[cfg(test)]
|
||||
mod tests_mmap {
|
||||
use crate::schema::{self, Schema};
|
||||
use crate::collector::Count;
|
||||
use crate::query::QueryParser;
|
||||
use crate::schema::{Schema, STORED, TEXT};
|
||||
use crate::{Index, Term};
|
||||
|
||||
#[test]
|
||||
fn test_advance_delete_bug() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", schema::TEXT);
|
||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||
let index = Index::create_from_tempdir(schema_builder.build())?;
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
// there must be one deleted document in the segment
|
||||
@@ -75,7 +77,26 @@ mod tests_mmap {
|
||||
index_writer.add_document(doc!(text_field=>"c"))?;
|
||||
}
|
||||
index_writer.commit()?;
|
||||
index_writer.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_field_espace() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let json_field = schema_builder.add_json_field("json", TEXT | STORED);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
|
||||
index_writer.add_document(doc!(json_field=>json)).unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
assert_eq!(searcher.num_docs(), 1);
|
||||
let parse_query = QueryParser::for_index(&index, Vec::new());
|
||||
let query = parse_query
|
||||
.parse_query(r#"json.k8s\.container\.name:prometheus"#)
|
||||
.unwrap();
|
||||
let num_docs = searcher.search(&query, &Count).unwrap();
|
||||
assert_eq!(num_docs, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1062,6 +1062,28 @@ mod test {
|
||||
);
|
||||
}
|
||||
|
||||
fn extract_query_term_json_path(query: &str) -> String {
|
||||
let LogicalAst::Leaf(literal) = parse_query_to_logical_ast(query, false).unwrap() else {
|
||||
panic!();
|
||||
};
|
||||
let LogicalLiteral::Term(term) = *literal else {
|
||||
panic!();
|
||||
};
|
||||
std::str::from_utf8(term.value_bytes()).unwrap().to_string()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_field_query_with_espaced_dot() {
|
||||
assert_eq!(
|
||||
extract_query_term_json_path(r#"json.k8s.node.name:hello"#),
|
||||
"k8s\u{1}node\u{1}name\0shello"
|
||||
);
|
||||
assert_eq!(
|
||||
extract_query_term_json_path(r#"json.k8s\.node\.name:hello"#),
|
||||
"k8s.node.name\0shello"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_field_possibly_a_number() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
|
||||
Reference in New Issue
Block a user