From 0ae94baef580d8780d3a9775bfdaafaff3422dd4 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Tue, 27 Jan 2026 09:22:11 +0100 Subject: [PATCH] Remove temp file (#2815) Co-authored-by: Paul Masurel --- src/directory/mmap_directory/mod.rs | 2 +- src/index/index_meta.rs | 40 +++-------------------------- src/index/segment_component.rs | 5 +--- src/indexer/index_writer.rs | 2 +- src/space_usage/mod.rs | 1 - 5 files changed, 6 insertions(+), 44 deletions(-) diff --git a/src/directory/mmap_directory/mod.rs b/src/directory/mmap_directory/mod.rs index 60ef82b30..ca91cb4b1 100644 --- a/src/directory/mmap_directory/mod.rs +++ b/src/directory/mmap_directory/mod.rs @@ -676,7 +676,7 @@ mod tests { let num_segments = reader.searcher().segment_readers().len(); assert!(num_segments <= 4); let num_components_except_deletes_and_tempstore = - crate::index::SegmentComponent::iterator().len() - 2; + crate::index::SegmentComponent::iterator().len() - 1; let max_num_mmapped = num_components_except_deletes_and_tempstore * num_segments; assert_eventually(|| { let num_mmapped = mmap_directory.get_cache_info().mmapped.len(); diff --git a/src/index/index_meta.rs b/src/index/index_meta.rs index d06d706c4..8c7983116 100644 --- a/src/index/index_meta.rs +++ b/src/index/index_meta.rs @@ -1,8 +1,6 @@ use std::collections::HashSet; use std::fmt; use std::path::PathBuf; -use std::sync::atomic::AtomicBool; -use std::sync::Arc; use serde::{Deserialize, Serialize}; @@ -37,7 +35,6 @@ impl SegmentMetaInventory { let inner = InnerSegmentMeta { segment_id, max_doc, - include_temp_doc_store: Arc::new(AtomicBool::new(true)), deletes: None, }; SegmentMeta::from(self.inventory.track(inner)) @@ -85,15 +82,6 @@ impl SegmentMeta { self.tracked.segment_id } - /// Removes the Component::TempStore from the alive list and - /// therefore marks the temp docstore file to be deleted by - /// the garbage collection. - pub fn untrack_temp_docstore(&self) { - self.tracked - .include_temp_doc_store - .store(false, std::sync::atomic::Ordering::Relaxed); - } - /// Returns the number of deleted documents. pub fn num_deleted_docs(&self) -> u32 { self.tracked @@ -111,20 +99,9 @@ impl SegmentMeta { /// is by removing all files that have been created by tantivy /// and are not used by any segment anymore. pub fn list_files(&self) -> HashSet { - if self - .tracked - .include_temp_doc_store - .load(std::sync::atomic::Ordering::Relaxed) - { - SegmentComponent::iterator() - .map(|component| self.relative_path(*component)) - .collect::>() - } else { - SegmentComponent::iterator() - .filter(|comp| *comp != &SegmentComponent::TempStore) - .map(|component| self.relative_path(*component)) - .collect::>() - } + SegmentComponent::iterator() + .map(|component| self.relative_path(*component)) + .collect::>() } /// Returns the relative path of a component of our segment. @@ -138,7 +115,6 @@ impl SegmentMeta { SegmentComponent::Positions => ".pos".to_string(), SegmentComponent::Terms => ".term".to_string(), SegmentComponent::Store => ".store".to_string(), - SegmentComponent::TempStore => ".store.temp".to_string(), SegmentComponent::FastFields => ".fast".to_string(), SegmentComponent::FieldNorms => ".fieldnorm".to_string(), SegmentComponent::Delete => format!(".{}.del", self.delete_opstamp().unwrap_or(0)), @@ -183,7 +159,6 @@ impl SegmentMeta { segment_id: inner_meta.segment_id, max_doc, deletes: None, - include_temp_doc_store: Arc::new(AtomicBool::new(true)), }); SegmentMeta { tracked } } @@ -202,7 +177,6 @@ impl SegmentMeta { let tracked = self.tracked.map(move |inner_meta| InnerSegmentMeta { segment_id: inner_meta.segment_id, max_doc: inner_meta.max_doc, - include_temp_doc_store: Arc::new(AtomicBool::new(true)), deletes: Some(delete_meta), }); SegmentMeta { tracked } @@ -214,14 +188,6 @@ struct InnerSegmentMeta { segment_id: SegmentId, max_doc: u32, pub deletes: Option, - /// If you want to avoid the SegmentComponent::TempStore file to be covered by - /// garbage collection and deleted, set this to true. This is used during merge. - #[serde(skip)] - #[serde(default = "default_temp_store")] - pub(crate) include_temp_doc_store: Arc, -} -fn default_temp_store() -> Arc { - Arc::new(AtomicBool::new(false)) } impl InnerSegmentMeta { diff --git a/src/index/segment_component.rs b/src/index/segment_component.rs index 42ac1d178..290f36e83 100644 --- a/src/index/segment_component.rs +++ b/src/index/segment_component.rs @@ -23,8 +23,6 @@ pub enum SegmentComponent { /// Accessing a document from the store is relatively slow, as it /// requires to decompress the entire block it belongs to. Store, - /// Temporary storage of the documents, before streamed to `Store`. - TempStore, /// Bitset describing which document of the segment is alive. /// (It was representing deleted docs but changed to represent alive docs from v0.17) Delete, @@ -33,14 +31,13 @@ pub enum SegmentComponent { impl SegmentComponent { /// Iterates through the components. pub fn iterator() -> slice::Iter<'static, SegmentComponent> { - static SEGMENT_COMPONENTS: [SegmentComponent; 8] = [ + static SEGMENT_COMPONENTS: [SegmentComponent; 7] = [ SegmentComponent::Postings, SegmentComponent::Positions, SegmentComponent::FastFields, SegmentComponent::FieldNorms, SegmentComponent::Terms, SegmentComponent::Store, - SegmentComponent::TempStore, SegmentComponent::Delete, ]; SEGMENT_COMPONENTS.iter() diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 1e07dd210..7ffc38615 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -218,7 +218,7 @@ fn index_documents( let alive_bitset_opt = apply_deletes(&segment_with_max_doc, &mut delete_cursor, &doc_opstamps)?; let meta = segment_with_max_doc.meta().clone(); - meta.untrack_temp_docstore(); + // update segment_updater inventory to remove tempstore let segment_entry = SegmentEntry::new(meta, delete_cursor, alive_bitset_opt); segment_updater.schedule_add_segment(segment_entry).wait()?; diff --git a/src/space_usage/mod.rs b/src/space_usage/mod.rs index 1dc413156..4c1f9c76a 100644 --- a/src/space_usage/mod.rs +++ b/src/space_usage/mod.rs @@ -124,7 +124,6 @@ impl SegmentSpaceUsage { FieldNorms => PerField(self.fieldnorms().clone()), Terms => PerField(self.termdict().clone()), SegmentComponent::Store => ComponentSpaceUsage::Store(self.store().clone()), - SegmentComponent::TempStore => ComponentSpaceUsage::Store(self.store().clone()), Delete => Basic(self.deletes()), } }