mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-05 16:52:55 +00:00
@@ -1004,7 +1004,22 @@ impl IndexMerger {
|
||||
} else {
|
||||
for reader in &self.readers {
|
||||
let store_reader = reader.get_store_reader()?;
|
||||
if reader.num_deleted_docs() > 0 {
|
||||
if reader.num_deleted_docs() > 0
|
||||
// If there is not enough data in the store, we avoid stacking in order to
|
||||
// avoid creating many small blocks in the doc store. Once we have 5 full blocks,
|
||||
// we start stacking. In the worst case 2/7 of the blocks would be very small.
|
||||
// [segment 1 - {1 doc}][segment 2 - {fullblock * 5}{1doc}]
|
||||
// => 5 * full blocks, 2 * 1 document blocks
|
||||
//
|
||||
// In a more realistic scenario the segments are of the same size, so 1/6 of
|
||||
// the doc stores would be on average half full, given total randomness (which
|
||||
// is not the case here, but not sure how it behaves exactly).
|
||||
//
|
||||
// https://github.com/tantivy-search/tantivy/issues/1053
|
||||
//
|
||||
// take 7 in order to not walk over all checkpoints.
|
||||
|| store_reader.block_checkpoints().take(7).count() < 6
|
||||
{
|
||||
for doc_bytes_res in store_reader.iter_raw(reader.delete_bitset()) {
|
||||
let doc_bytes = doc_bytes_res?;
|
||||
store_writer.store_bytes(&doc_bytes)?;
|
||||
|
||||
@@ -97,8 +97,10 @@ use self::compression_snap::{compress, decompress};
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
|
||||
use futures::executor::block_on;
|
||||
|
||||
use super::*;
|
||||
use crate::schema::{self, FieldValue, TextFieldIndexing};
|
||||
use crate::schema::{self, FieldValue, TextFieldIndexing, STORED, TEXT};
|
||||
use crate::schema::{Document, TextOptions};
|
||||
use crate::{
|
||||
directory::{Directory, RamDirectory, WritePtr},
|
||||
@@ -214,6 +216,47 @@ pub mod tests {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
#[test]
|
||||
fn test_merge_of_small_segments() -> crate::Result<()> {
|
||||
let mut schema_builder = schema::Schema::builder();
|
||||
|
||||
let text_field = schema_builder.add_text_field("text_field", TEXT | STORED);
|
||||
let schema = schema_builder.build();
|
||||
let index_builder = Index::builder().schema(schema);
|
||||
|
||||
let index = index_builder.create_in_ram().unwrap();
|
||||
|
||||
{
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
|
||||
index_writer.add_document(doc!(text_field=> "1"));
|
||||
assert!(index_writer.commit().is_ok());
|
||||
index_writer.add_document(doc!(text_field=> "2"));
|
||||
assert!(index_writer.commit().is_ok());
|
||||
index_writer.add_document(doc!(text_field=> "3"));
|
||||
assert!(index_writer.commit().is_ok());
|
||||
index_writer.add_document(doc!(text_field=> "4"));
|
||||
assert!(index_writer.commit().is_ok());
|
||||
index_writer.add_document(doc!(text_field=> "5"));
|
||||
assert!(index_writer.commit().is_ok());
|
||||
}
|
||||
// Merging the segments
|
||||
{
|
||||
let segment_ids = index
|
||||
.searchable_segment_ids()
|
||||
.expect("Searchable segments failed.");
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
assert!(block_on(index_writer.merge(&segment_ids)).is_ok());
|
||||
assert!(index_writer.wait_merging_threads().is_ok());
|
||||
}
|
||||
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 1);
|
||||
let reader = searcher.segment_readers().iter().last().unwrap();
|
||||
let store = reader.get_store_reader().unwrap();
|
||||
assert_eq!(store.block_checkpoints().count(), 1);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
|
||||
Reference in New Issue
Block a user