mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-03 15:52:55 +00:00
fix merge panic for JSON fields (#2284)
Root cause was the positions buffer had residue positions from the previous term, when the terms were alternating between having and not having positions in JSON (terms have positions, but not numerics). Fixes #2283
This commit is contained in:
@@ -1651,6 +1651,7 @@ mod tests {
|
||||
force_end_merge: bool,
|
||||
) -> crate::Result<Index> {
|
||||
let mut schema_builder = schema::Schema::builder();
|
||||
let json_field = schema_builder.add_json_field("json", FAST | TEXT | STORED);
|
||||
let ip_field = schema_builder.add_ip_addr_field("ip", FAST | INDEXED | STORED);
|
||||
let ips_field = schema_builder
|
||||
.add_ip_addr_field("ips", IpAddrOptions::default().set_fast().set_indexed());
|
||||
@@ -1729,7 +1730,9 @@ mod tests {
|
||||
id_field=>id,
|
||||
))?;
|
||||
} else {
|
||||
let json = json!({"date1": format!("2022-{id}-01T00:00:01Z"), "date2": format!("{id}-05-01T00:00:01Z"), "id": id, "ip": ip.to_string()});
|
||||
index_writer.add_document(doc!(id_field=>id,
|
||||
json_field=>json,
|
||||
bytes_field => id.to_le_bytes().as_slice(),
|
||||
id_opt_field => id,
|
||||
ip_field => ip,
|
||||
|
||||
@@ -605,6 +605,10 @@ impl IndexMerger {
|
||||
segment_postings.positions(&mut positions_buffer);
|
||||
segment_postings.term_freq()
|
||||
} else {
|
||||
// The positions_buffer may contain positions from the previous term
|
||||
// Existence of positions depend on the value type in JSON fields.
|
||||
// https://github.com/quickwit-oss/tantivy/issues/2283
|
||||
positions_buffer.clear();
|
||||
0u32
|
||||
};
|
||||
|
||||
|
||||
@@ -879,6 +879,31 @@ mod tests {
|
||||
assert_eq!(searcher.search(&phrase_query, &Count).unwrap(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_term_with_numeric_merge_panic_regression_bug_2283() {
|
||||
// https://github.com/quickwit-oss/tantivy/issues/2283
|
||||
let mut schema_builder = Schema::builder();
|
||||
let json = schema_builder.add_json_field("json", TEXT);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut writer = index.writer_for_tests().unwrap();
|
||||
let doc = json!({"field": "a"});
|
||||
writer.add_document(doc!(json=>doc)).unwrap();
|
||||
writer.commit().unwrap();
|
||||
let doc = json!({"field": "a", "id": 1});
|
||||
writer.add_document(doc!(json=>doc.clone())).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
// Force Merge
|
||||
writer.wait_merging_threads().unwrap();
|
||||
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
|
||||
let segment_ids = index
|
||||
.searchable_segment_ids()
|
||||
.expect("Searchable segments failed.");
|
||||
index_writer.merge(&segment_ids).wait().unwrap();
|
||||
assert!(index_writer.wait_merging_threads().is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bug_regression_1629_position_when_array_with_a_field_value_that_does_not_contain_any_token(
|
||||
) {
|
||||
|
||||
Reference in New Issue
Block a user