From 8f647b817f78edf002c64b36014ae4e46fa4133a Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Wed, 28 Sep 2022 17:43:58 +0800 Subject: [PATCH 1/2] fix docstore settings for temp docstore fixes #1565 --- src/indexer/segment_serializer.rs | 28 +++++++++++++++++----------- src/indexer/segment_writer.rs | 3 ++- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/indexer/segment_serializer.rs b/src/indexer/segment_serializer.rs index 7cb5bf3f5..dee306bb3 100644 --- a/src/indexer/segment_serializer.rs +++ b/src/indexer/segment_serializer.rs @@ -24,12 +24,25 @@ impl SegmentSerializer { // In the merge case this is not necessary because we can kmerge the already sorted // segments let remapping_required = segment.index().settings().sort_by_field.is_some() && !is_in_merge; - let store_component = if remapping_required { - SegmentComponent::TempStore + let settings = segment.index().settings().clone(); + let store_writer = if remapping_required { + let store_write = segment.open_write(SegmentComponent::TempStore)?; + StoreWriter::new( + store_write, + crate::store::Compressor::None, + 0, // we want random access on the docs, so we choose a minimal block size. Every + // doc will be flushed + settings.docstore_compress_dedicated_thread, + )? } else { - SegmentComponent::Store + let store_write = segment.open_write(SegmentComponent::Store)?; + StoreWriter::new( + store_write, + settings.docstore_compression, + settings.docstore_blocksize, + settings.docstore_compress_dedicated_thread, + )? }; - let store_write = segment.open_write(store_component)?; let fast_field_write = segment.open_write(SegmentComponent::FastFields)?; let fast_field_serializer = CompositeFastFieldSerializer::from_write(fast_field_write)?; @@ -38,13 +51,6 @@ impl SegmentSerializer { let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?; let postings_serializer = InvertedIndexSerializer::open(&mut segment)?; - let settings = segment.index().settings(); - let store_writer = StoreWriter::new( - store_write, - settings.docstore_compression, - settings.docstore_blocksize, - settings.docstore_compress_dedicated_thread, - )?; Ok(SegmentSerializer { segment, store_writer, diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 3ac64bc6c..9859beaee 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -393,7 +393,8 @@ fn remap_and_write( serializer .segment() .open_read(SegmentComponent::TempStore)?, - 50, + 1, /* The docstore is configured for tiny docs for fast random access, we don't need + * the the cache */ )?; for old_doc_id in doc_id_map.iter_old_doc_ids() { let doc_bytes = store_read.get_document_bytes(old_doc_id)?; From f8686ab1ec9cc456441aa185d20039e49f615a1c Mon Sep 17 00:00:00 2001 From: PSeitz Date: Fri, 30 Sep 2022 04:00:47 +0200 Subject: [PATCH 2/2] improve comments Co-authored-by: Paul Masurel --- src/indexer/segment_serializer.rs | 2 +- src/indexer/segment_writer.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/indexer/segment_serializer.rs b/src/indexer/segment_serializer.rs index dee306bb3..dab5095df 100644 --- a/src/indexer/segment_serializer.rs +++ b/src/indexer/segment_serializer.rs @@ -31,7 +31,7 @@ impl SegmentSerializer { store_write, crate::store::Compressor::None, 0, // we want random access on the docs, so we choose a minimal block size. Every - // doc will be flushed + // doc will get its own block. settings.docstore_compress_dedicated_thread, )? } else { diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 9859beaee..91347d0d1 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -393,8 +393,8 @@ fn remap_and_write( serializer .segment() .open_read(SegmentComponent::TempStore)?, - 1, /* The docstore is configured for tiny docs for fast random access, we don't need - * the the cache */ + 1, /* The docstore is configured to have one doc per block, and each doc is accessed + * only once: we don't need caching. */ )?; for old_doc_id in doc_id_map.iter_old_doc_ids() { let doc_bytes = store_read.get_document_bytes(old_doc_id)?;