diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index f26cf2366..97fea8ffa 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -57,8 +57,8 @@ For a better idea of how indexing works, you may read the [following blog post]( Deletes happen by deleting a "term". Tantivy does not offer any notion of primary id, so it is up to the user to use a field in their schema as if it was a primary id, and delete the associated term if they want to delete only one specific document. -On commit, tantivy will find all of the segments with documents matching this existing term and create a [tombstone file](src/fastfield/delete.rs) that represents the bitset of the document that are deleted. -Like all segment files, this file is immutable. Because it is possible to have more than one tombstone file at a given instant, the tombstone filename has the format ``` segment_id . commit_opstamp . del```. +On commit, tantivy will find all of the segments with documents matching this existing term and remove from [alive bitset file](src/fastfield/alive_bitset.rs) that represents the bitset of the alive document ids. +Like all segment files, this file is immutable. Because it is possible to have more than one alive bitset file at a given instant, the alive bitset filename has the format ``` segment_id . commit_opstamp . del```. An opstamp is simply an incremental id that identifies any operation applied to the index. For instance, performing a commit or adding a document. diff --git a/src/core/segment_component.rs b/src/core/segment_component.rs index c5e07255b..1855bb853 100644 --- a/src/core/segment_component.rs +++ b/src/core/segment_component.rs @@ -24,7 +24,8 @@ pub enum SegmentComponent { Store, /// Temporary storage of the documents, before streamed to `Store`. TempStore, - /// Bitset describing which document of the segment is deleted. + /// Bitset describing which document of the segment is alive. + /// (It was representing deleted docs but changed to represent alive docs from v0.17) Delete, } diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index f41dac701..f41a7caa1 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -175,9 +175,9 @@ impl SegmentReader { let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?; let original_bitset = if segment.meta().has_deletes() { - let delete_file_slice = segment.open_read(SegmentComponent::Delete)?; - let delete_data = delete_file_slice.read_bytes()?; - Some(AliveBitSet::open(delete_data)) + let alive_doc_file_slice = segment.open_read(SegmentComponent::Delete)?; + let alive_doc_data = alive_doc_file_slice.read_bytes()?; + Some(AliveBitSet::open(alive_doc_data)) } else { None }; @@ -295,8 +295,7 @@ impl SegmentReader { self.delete_opstamp } - /// Returns the bitset representing - /// the documents that have been deleted. + /// Returns the bitset representing the alive `DocId`s. pub fn alive_bitset(&self) -> Option<&AliveBitSet> { self.alive_bitset_opt.as_ref() } @@ -305,7 +304,7 @@ impl SegmentReader { /// as deleted. pub fn is_deleted(&self, doc: DocId) -> bool { self.alive_bitset() - .map(|delete_set| delete_set.is_deleted(doc)) + .map(|alive_bitset| alive_bitset.is_deleted(doc)) .unwrap_or(false) } diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index ff541b00c..9d5a32989 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -158,9 +158,9 @@ pub(crate) fn advance_deletes( if num_deleted_docs > num_deleted_docs_before { // There are new deletes. We need to write a new delete file. segment = segment.with_delete_meta(num_deleted_docs as u32, target_opstamp); - let mut delete_file = segment.open_write(SegmentComponent::Delete)?; - write_alive_bitset(&alive_bitset, &mut delete_file)?; - delete_file.terminate()?; + let mut alive_doc_file = segment.open_write(SegmentComponent::Delete)?; + write_alive_bitset(&alive_bitset, &mut alive_doc_file)?; + alive_doc_file.terminate()?; } segment_entry.set_meta(segment.meta().clone());