mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-27 04:29:58 +00:00
Handle escaped dot in json path in the QueryParser. (#1682)
This commit is contained in:
@@ -261,6 +261,39 @@ pub struct JsonTermWriter<'a> {
|
|||||||
path_stack: Vec<usize>,
|
path_stack: Vec<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Splits a json path supplied to the query parser in such a way that
|
||||||
|
/// `.` can be escaped.
|
||||||
|
///
|
||||||
|
/// In other words,
|
||||||
|
/// - `k8s.node` ends up as `["k8s", "node"]`.
|
||||||
|
/// - `k8s\.node` ends up as `["k8s.node"]`.
|
||||||
|
fn split_json_path(json_path: &str) -> Vec<String> {
|
||||||
|
let mut escaped_state: bool = false;
|
||||||
|
let mut json_path_segments = Vec::new();
|
||||||
|
let mut buffer = String::new();
|
||||||
|
for ch in json_path.chars() {
|
||||||
|
if escaped_state {
|
||||||
|
buffer.push(ch);
|
||||||
|
escaped_state = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match ch {
|
||||||
|
'\\' => {
|
||||||
|
escaped_state = true;
|
||||||
|
}
|
||||||
|
'.' => {
|
||||||
|
let new_segment = std::mem::take(&mut buffer);
|
||||||
|
json_path_segments.push(new_segment);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
buffer.push(ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
json_path_segments.push(buffer);
|
||||||
|
json_path_segments
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> JsonTermWriter<'a> {
|
impl<'a> JsonTermWriter<'a> {
|
||||||
pub fn from_field_and_json_path(
|
pub fn from_field_and_json_path(
|
||||||
field: Field,
|
field: Field,
|
||||||
@@ -269,8 +302,8 @@ impl<'a> JsonTermWriter<'a> {
|
|||||||
) -> Self {
|
) -> Self {
|
||||||
term_buffer.set_field_and_type(field, Type::Json);
|
term_buffer.set_field_and_type(field, Type::Json);
|
||||||
let mut json_term_writer = Self::wrap(term_buffer);
|
let mut json_term_writer = Self::wrap(term_buffer);
|
||||||
for segment in json_path.split('.') {
|
for segment in split_json_path(json_path) {
|
||||||
json_term_writer.push_path_segment(segment);
|
json_term_writer.push_path_segment(&segment);
|
||||||
}
|
}
|
||||||
json_term_writer
|
json_term_writer
|
||||||
}
|
}
|
||||||
@@ -350,7 +383,7 @@ impl<'a> JsonTermWriter<'a> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::JsonTermWriter;
|
use super::{split_json_path, JsonTermWriter};
|
||||||
use crate::schema::{Field, Type};
|
use crate::schema::{Field, Type};
|
||||||
use crate::Term;
|
use crate::Term;
|
||||||
|
|
||||||
@@ -495,4 +528,48 @@ mod tests {
|
|||||||
json_writer.set_str("pink");
|
json_writer.set_str("pink");
|
||||||
assert_eq!(json_writer.path(), b"color\x01hue");
|
assert_eq!(json_writer.path(), b"color\x01hue");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_json_path_simple() {
|
||||||
|
let json_path = split_json_path("titi.toto");
|
||||||
|
assert_eq!(&json_path, &["titi", "toto"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_json_path_single_segment() {
|
||||||
|
let json_path = split_json_path("toto");
|
||||||
|
assert_eq!(&json_path, &["toto"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_json_path_trailing_dot() {
|
||||||
|
let json_path = split_json_path("toto.");
|
||||||
|
assert_eq!(&json_path, &["toto", ""]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_json_path_heading_dot() {
|
||||||
|
let json_path = split_json_path(".toto");
|
||||||
|
assert_eq!(&json_path, &["", "toto"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_json_path_escaped_dot() {
|
||||||
|
let json_path = split_json_path(r#"toto\.titi"#);
|
||||||
|
assert_eq!(&json_path, &["toto.titi"]);
|
||||||
|
let json_path_2 = split_json_path(r#"k8s\.container\.name"#);
|
||||||
|
assert_eq!(&json_path_2, &["k8s.container.name"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_json_path_escaped_backslash() {
|
||||||
|
let json_path = split_json_path(r#"toto\\titi"#);
|
||||||
|
assert_eq!(&json_path, &[r#"toto\titi"#]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_json_path_escaped_normal_letter() {
|
||||||
|
let json_path = split_json_path(r#"toto\titi"#);
|
||||||
|
assert_eq!(&json_path, &[r#"tototiti"#]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -58,13 +58,15 @@ type AddBatchReceiver = channel::Receiver<AddBatch>;
|
|||||||
#[cfg(feature = "mmap")]
|
#[cfg(feature = "mmap")]
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests_mmap {
|
mod tests_mmap {
|
||||||
use crate::schema::{self, Schema};
|
use crate::collector::Count;
|
||||||
|
use crate::query::QueryParser;
|
||||||
|
use crate::schema::{Schema, STORED, TEXT};
|
||||||
use crate::{Index, Term};
|
use crate::{Index, Term};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_advance_delete_bug() -> crate::Result<()> {
|
fn test_advance_delete_bug() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", schema::TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let index = Index::create_from_tempdir(schema_builder.build())?;
|
let index = Index::create_from_tempdir(schema_builder.build())?;
|
||||||
let mut index_writer = index.writer_for_tests()?;
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
// there must be one deleted document in the segment
|
// there must be one deleted document in the segment
|
||||||
@@ -75,7 +77,26 @@ mod tests_mmap {
|
|||||||
index_writer.add_document(doc!(text_field=>"c"))?;
|
index_writer.add_document(doc!(text_field=>"c"))?;
|
||||||
}
|
}
|
||||||
index_writer.commit()?;
|
index_writer.commit()?;
|
||||||
index_writer.commit()?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_json_field_espace() {
|
||||||
|
let mut schema_builder = Schema::builder();
|
||||||
|
let json_field = schema_builder.add_json_field("json", TEXT | STORED);
|
||||||
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
|
let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
|
||||||
|
index_writer.add_document(doc!(json_field=>json)).unwrap();
|
||||||
|
index_writer.commit().unwrap();
|
||||||
|
let reader = index.reader().unwrap();
|
||||||
|
let searcher = reader.searcher();
|
||||||
|
assert_eq!(searcher.num_docs(), 1);
|
||||||
|
let parse_query = QueryParser::for_index(&index, Vec::new());
|
||||||
|
let query = parse_query
|
||||||
|
.parse_query(r#"json.k8s\.container\.name:prometheus"#)
|
||||||
|
.unwrap();
|
||||||
|
let num_docs = searcher.search(&query, &Count).unwrap();
|
||||||
|
assert_eq!(num_docs, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1062,6 +1062,28 @@ mod test {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn extract_query_term_json_path(query: &str) -> String {
|
||||||
|
let LogicalAst::Leaf(literal) = parse_query_to_logical_ast(query, false).unwrap() else {
|
||||||
|
panic!();
|
||||||
|
};
|
||||||
|
let LogicalLiteral::Term(term) = *literal else {
|
||||||
|
panic!();
|
||||||
|
};
|
||||||
|
std::str::from_utf8(term.value_bytes()).unwrap().to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_json_field_query_with_espaced_dot() {
|
||||||
|
assert_eq!(
|
||||||
|
extract_query_term_json_path(r#"json.k8s.node.name:hello"#),
|
||||||
|
"k8s\u{1}node\u{1}name\0shello"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
extract_query_term_json_path(r#"json.k8s\.node\.name:hello"#),
|
||||||
|
"k8s.node.name\0shello"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_json_field_possibly_a_number() {
|
fn test_json_field_possibly_a_number() {
|
||||||
test_parse_query_to_logical_ast_helper(
|
test_parse_query_to_logical_ast_helper(
|
||||||
|
|||||||
Reference in New Issue
Block a user