Fix outdated variable naming and comments to alive bitset (#1387)

* Fix outdated variables and comments for alive bitset

* Fix expired link to delete bitset
This commit is contained in:
Kanji Yomoda
2022-06-14 15:59:15 +09:00
committed by GitHub
parent 88054aa333
commit 83d0c13fb0
4 changed files with 12 additions and 12 deletions

View File

@@ -57,8 +57,8 @@ For a better idea of how indexing works, you may read the [following blog post](
Deletes happen by deleting a "term". Tantivy does not offer any notion of primary id, so it is up to the user to use a field in their schema as if it was a primary id, and delete the associated term if they want to delete only one specific document.
On commit, tantivy will find all of the segments with documents matching this existing term and create a [tombstone file](src/fastfield/delete.rs) that represents the bitset of the document that are deleted.
Like all segment files, this file is immutable. Because it is possible to have more than one tombstone file at a given instant, the tombstone filename has the format ``` segment_id . commit_opstamp . del```.
On commit, tantivy will find all of the segments with documents matching this existing term and remove from [alive bitset file](src/fastfield/alive_bitset.rs) that represents the bitset of the alive document ids.
Like all segment files, this file is immutable. Because it is possible to have more than one alive bitset file at a given instant, the alive bitset filename has the format ``` segment_id . commit_opstamp . del```.
An opstamp is simply an incremental id that identifies any operation applied to the index. For instance, performing a commit or adding a document.

View File

@@ -24,7 +24,8 @@ pub enum SegmentComponent {
Store,
/// Temporary storage of the documents, before streamed to `Store`.
TempStore,
/// Bitset describing which document of the segment is deleted.
/// Bitset describing which document of the segment is alive.
/// (It was representing deleted docs but changed to represent alive docs from v0.17)
Delete,
}

View File

@@ -175,9 +175,9 @@ impl SegmentReader {
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
let original_bitset = if segment.meta().has_deletes() {
let delete_file_slice = segment.open_read(SegmentComponent::Delete)?;
let delete_data = delete_file_slice.read_bytes()?;
Some(AliveBitSet::open(delete_data))
let alive_doc_file_slice = segment.open_read(SegmentComponent::Delete)?;
let alive_doc_data = alive_doc_file_slice.read_bytes()?;
Some(AliveBitSet::open(alive_doc_data))
} else {
None
};
@@ -295,8 +295,7 @@ impl SegmentReader {
self.delete_opstamp
}
/// Returns the bitset representing
/// the documents that have been deleted.
/// Returns the bitset representing the alive `DocId`s.
pub fn alive_bitset(&self) -> Option<&AliveBitSet> {
self.alive_bitset_opt.as_ref()
}
@@ -305,7 +304,7 @@ impl SegmentReader {
/// as deleted.
pub fn is_deleted(&self, doc: DocId) -> bool {
self.alive_bitset()
.map(|delete_set| delete_set.is_deleted(doc))
.map(|alive_bitset| alive_bitset.is_deleted(doc))
.unwrap_or(false)
}

View File

@@ -158,9 +158,9 @@ pub(crate) fn advance_deletes(
if num_deleted_docs > num_deleted_docs_before {
// There are new deletes. We need to write a new delete file.
segment = segment.with_delete_meta(num_deleted_docs as u32, target_opstamp);
let mut delete_file = segment.open_write(SegmentComponent::Delete)?;
write_alive_bitset(&alive_bitset, &mut delete_file)?;
delete_file.terminate()?;
let mut alive_doc_file = segment.open_write(SegmentComponent::Delete)?;
write_alive_bitset(&alive_bitset, &mut alive_doc_file)?;
alive_doc_file.terminate()?;
}
segment_entry.set_meta(segment.meta().clone());