AliveBitSet instead of DeleteBitSet

This commit is contained in:
Pascal Seitz
2021-09-23 20:03:57 +08:00
parent 4583fa270b
commit a1f5cead96
13 changed files with 177 additions and 82 deletions

View File

@@ -31,7 +31,7 @@ impl IntoIterator for TinySet {
}
impl TinySet {
pub fn serialize(&self, writer: &mut dyn Write) -> io::Result<()> {
pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
writer.write_all(self.0.to_le_bytes().as_ref())
}
@@ -42,17 +42,24 @@ impl TinySet {
}
/// Returns an empty `TinySet`.
#[inline]
pub fn empty() -> TinySet {
TinySet(0u64)
}
/// Returns a full `TinySet`.
#[inline]
pub fn full() -> TinySet {
TinySet::empty().complement()
}
pub fn clear(&mut self) {
self.0 = 0u64;
}
#[inline]
/// Returns the complement of the set in `[0, 64[`.
fn complement(self) -> TinySet {
pub fn complement(self) -> TinySet {
TinySet(!self.0)
}
@@ -68,6 +75,12 @@ impl TinySet {
self.0.count_ones()
}
#[inline]
/// Returns the number of elements in the TinySet.
pub fn num_unset(self) -> u32 {
self.0.count_zeros()
}
#[inline]
/// Returns the intersection of `self` and `other`
pub fn intersect(self, other: TinySet) -> TinySet {
@@ -81,13 +94,21 @@ impl TinySet {
TinySet(1u64 << u64::from(el))
}
/// Insert a new element within [0..64[
/// Insert a new element within [0..64)
#[inline]
pub fn insert(self, el: u32) -> TinySet {
self.union(TinySet::singleton(el))
}
/// Insert a new element within [0..64[
/// Removes an element within [0..64)
#[inline]
pub fn remove(self, el: u32) -> TinySet {
self.intersect(TinySet::singleton(el).complement())
}
/// Insert a new element within [0..64)
///
/// returns true if the bit changed
#[inline]
pub fn insert_mut(&mut self, el: u32) -> bool {
let old = *self;
@@ -95,6 +116,16 @@ impl TinySet {
old != *self
}
/// Remove a new element within [0..64)
///
/// returns true if the bit changed
#[inline]
pub fn remove_mut(&mut self, el: u32) -> bool {
let old = *self;
*self = old.remove(el);
old != *self
}
/// Returns the union of two tinysets
#[inline]
pub fn union(self, other: TinySet) -> TinySet {
@@ -151,7 +182,7 @@ fn num_buckets(max_val: u32) -> u32 {
impl BitSet {
/// serialize a `BitSet`.
///
pub fn serialize(&self, writer: &mut dyn Write) -> io::Result<()> {
pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
writer.write_all(self.max_value.to_le_bytes().as_ref())?;
for tinyset in self.tinysets.iter() {
@@ -163,6 +194,7 @@ impl BitSet {
/// Deserialize a `BitSet`.
///
#[cfg(test)]
pub fn deserialize(mut data: &[u8]) -> io::Result<Self> {
let max_value: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
data = &data[4..];
@@ -181,10 +213,19 @@ impl BitSet {
})
}
/// Count the number of unset bits from serialized data.
///
#[inline]
pub fn count_unset_from_bytes<'a>(data: &'a [u8]) -> usize {
BitSet::iter_tinysets_from_bytes(data)
.map(|tinyset| tinyset.num_unset() as usize)
.sum()
}
/// Iterate the tinyset on the fly from serialized data.
///
#[inline]
pub fn iter_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = TinySet> + 'a {
fn iter_tinysets_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = TinySet> + 'a {
assert!((data.len() - 4) % 8 == 0);
data[4..].chunks_exact(8).map(move |chunk| {
let tinyset: TinySet = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
@@ -198,8 +239,7 @@ impl BitSet {
#[inline]
pub fn iter_unset_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = u32> + 'a {
let max_val: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
Self::iter_from_bytes(data)
.map(|tinyset| tinyset.complement())
Self::iter_tinysets_from_bytes(data)
.enumerate()
.flat_map(move |(chunk_num, tinyset)| {
let chunk_base_val = chunk_num as u32 * 64;
@@ -211,7 +251,7 @@ impl BitSet {
}
/// Create a new `BitSet` that may contain elements
/// within `[0, max_val[`.
/// within `[0, max_val)`.
pub fn with_max_value(max_value: u32) -> BitSet {
let num_buckets = num_buckets(max_value);
let tinybisets = vec![TinySet::empty(); num_buckets as usize].into_boxed_slice();
@@ -222,6 +262,18 @@ impl BitSet {
}
}
/// Create a new `BitSet` that may contain elements
/// within `[0, max_val)`.
pub fn with_max_value_and_filled(max_value: u32) -> BitSet {
let num_buckets = num_buckets(max_value);
let tinybisets = vec![TinySet::full(); num_buckets as usize].into_boxed_slice();
BitSet {
tinysets: tinybisets,
len: max_value as u64,
max_value,
}
}
/// Removes all elements from the `BitSet`.
pub fn clear(&mut self) {
for tinyset in self.tinysets.iter_mut() {
@@ -230,7 +282,7 @@ impl BitSet {
}
/// Returns the number of elements in the `BitSet`.
pub fn len(&self) -> usize {
pub fn num_set_bits(&self) -> usize {
self.len as usize
}
@@ -246,6 +298,18 @@ impl BitSet {
};
}
/// Inserts an element in the `BitSet`
pub fn remove(&mut self, el: u32) {
// we do not check saturated els.
let higher = el / 64u32;
let lower = el % 64u32;
self.len -= if self.tinysets[higher as usize].remove_mut(lower) {
1
} else {
0
};
}
/// Returns true iff the elements is in the `BitSet`.
#[inline]
pub fn contains_from_bytes(el: u32, data: &[u8]) -> bool {
@@ -296,6 +360,33 @@ mod tests {
use std::collections::HashSet;
use std::convert::TryInto;
#[test]
fn test_tiny_set_remove() {
{
let mut u = TinySet::empty().insert(63u32).insert(5).remove(63u32);
assert_eq!(u.pop_lowest(), Some(5u32));
assert!(u.pop_lowest().is_none());
}
{
let mut u = TinySet::empty()
.insert(63u32)
.insert(1)
.insert(5)
.remove(63u32);
assert_eq!(u.pop_lowest(), Some(1u32));
assert_eq!(u.pop_lowest(), Some(5u32));
assert!(u.pop_lowest().is_none());
}
{
let mut u = TinySet::empty().insert(1).remove(63u32);
assert_eq!(u.pop_lowest(), Some(1u32));
assert!(u.pop_lowest().is_none());
}
{
let mut u = TinySet::empty().insert(1).remove(1u32);
assert!(u.pop_lowest().is_none());
}
}
#[test]
fn test_tiny_set() {
assert!(TinySet::empty().is_empty());
@@ -361,7 +452,7 @@ mod tests {
assert_eq!(hashset.contains(&el), bitset.contains(el));
}
assert_eq!(bitset.max_value(), max_value);
assert_eq!(bitset.len(), els.len());
assert_eq!(bitset.num_set_bits(), els.len());
};
test_against_hashset(&[], 0);
@@ -415,17 +506,25 @@ mod tests {
#[test]
fn test_bitset_len() {
let mut bitset = BitSet::with_max_value(1_000);
assert_eq!(bitset.len(), 0);
assert_eq!(bitset.num_set_bits(), 0);
bitset.insert(3u32);
assert_eq!(bitset.len(), 1);
assert_eq!(bitset.num_set_bits(), 1);
bitset.insert(103u32);
assert_eq!(bitset.len(), 2);
assert_eq!(bitset.num_set_bits(), 2);
bitset.insert(3u32);
assert_eq!(bitset.len(), 2);
assert_eq!(bitset.num_set_bits(), 2);
bitset.insert(103u32);
assert_eq!(bitset.len(), 2);
assert_eq!(bitset.num_set_bits(), 2);
bitset.insert(104u32);
assert_eq!(bitset.len(), 3);
assert_eq!(bitset.num_set_bits(), 3);
bitset.remove(105u32);
assert_eq!(bitset.num_set_bits(), 3);
bitset.remove(104u32);
assert_eq!(bitset.num_set_bits(), 2);
bitset.remove(3u32);
assert_eq!(bitset.num_set_bits(), 1);
bitset.remove(103u32);
assert_eq!(bitset.num_set_bits(), 0);
}
pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {

View File

@@ -5,7 +5,7 @@ use crate::core::SegmentId;
use crate::directory::CompositeFile;
use crate::directory::FileSlice;
use crate::error::DataCorruption;
use crate::fastfield::DeleteBitSet;
use crate::fastfield::AliveBitSet;
use crate::fastfield::FacetReader;
use crate::fastfield::FastFieldReaders;
use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
@@ -47,7 +47,7 @@ pub struct SegmentReader {
fieldnorm_readers: FieldNormReaders,
store_file: FileSlice,
delete_bitset_opt: Option<DeleteBitSet>,
delete_bitset_opt: Option<AliveBitSet>,
schema: Schema,
}
@@ -172,7 +172,7 @@ impl SegmentReader {
let delete_bitset_opt = if segment.meta().has_deletes() {
let delete_data = segment.open_read(SegmentComponent::Delete)?;
let delete_bitset = DeleteBitSet::open(delete_data)?;
let delete_bitset = AliveBitSet::open(delete_data)?;
Some(delete_bitset)
} else {
None
@@ -274,7 +274,7 @@ impl SegmentReader {
/// Returns the bitset representing
/// the documents that have been deleted.
pub fn delete_bitset(&self) -> Option<&DeleteBitSet> {
pub fn delete_bitset(&self) -> Option<&AliveBitSet> {
self.delete_bitset_opt.as_ref()
}
@@ -307,7 +307,7 @@ impl SegmentReader {
self.get_store_reader()?.space_usage(),
self.delete_bitset_opt
.as_ref()
.map(DeleteBitSet::space_usage)
.map(AliveBitSet::space_usage)
.unwrap_or(0),
))
}

View File

@@ -1,4 +1,4 @@
use crate::fastfield::DeleteBitSet;
use crate::fastfield::AliveBitSet;
use crate::DocId;
use std::borrow::Borrow;
use std::borrow::BorrowMut;
@@ -85,7 +85,7 @@ pub trait DocSet: Send {
/// Returns the number documents matching.
/// Calling this method consumes the `DocSet`.
fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
fn count(&mut self, delete_bitset: &AliveBitSet) -> u32 {
let mut count = 0u32;
let mut doc = self.doc();
while doc != TERMINATED {
@@ -130,7 +130,7 @@ impl<'a> DocSet for &'a mut dyn DocSet {
(**self).size_hint()
}
fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
fn count(&mut self, delete_bitset: &AliveBitSet) -> u32 {
(**self).count(delete_bitset)
}
@@ -160,7 +160,7 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
unboxed.size_hint()
}
fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
fn count(&mut self, delete_bitset: &AliveBitSet) -> u32 {
let unboxed: &mut TDocSet = self.borrow_mut();
unboxed.count(delete_bitset)
}

View File

@@ -11,27 +11,27 @@ use std::io::Write;
/// where `delete_bitset` is the set of deleted `DocId`.
/// Warning: this function does not call terminate. The caller is in charge of
/// closing the writer properly.
pub fn write_delete_bitset(delete_bitset: &BitSet, writer: &mut dyn Write) -> io::Result<()> {
pub fn write_delete_bitset<T: Write>(delete_bitset: &BitSet, writer: &mut T) -> io::Result<()> {
delete_bitset.serialize(writer)?;
Ok(())
}
/// Set of deleted `DocId`s.
#[derive(Clone)]
pub struct DeleteBitSet {
pub struct AliveBitSet {
data: OwnedBytes,
num_deleted: usize,
}
impl DeleteBitSet {
impl AliveBitSet {
#[cfg(test)]
pub(crate) fn for_test(docs: &[DocId], max_doc: u32) -> DeleteBitSet {
pub(crate) fn for_test(not_alive_docs: &[DocId], max_doc: u32) -> AliveBitSet {
use crate::directory::{Directory, RamDirectory, TerminatingWrite};
use std::path::Path;
assert!(docs.iter().all(|&doc| doc < max_doc));
let mut bitset = BitSet::with_max_value(max_doc);
for &doc in docs {
bitset.insert(doc);
assert!(not_alive_docs.iter().all(|&doc| doc < max_doc));
let mut bitset = BitSet::with_max_value_and_filled(max_doc);
for &doc in not_alive_docs {
bitset.remove(doc);
}
let directory = RamDirectory::create();
let path = Path::new("dummydeletebitset");
@@ -43,13 +43,11 @@ impl DeleteBitSet {
}
/// Opens a delete bitset given its file.
pub fn open(file: FileSlice) -> crate::Result<DeleteBitSet> {
pub fn open(file: FileSlice) -> crate::Result<AliveBitSet> {
let bytes = file.read_bytes()?;
let num_deleted = BitSet::iter_from_bytes(bytes.as_slice())
.map(|tinyset| tinyset.len() as usize)
.sum();
let num_deleted = BitSet::count_unset_from_bytes(bytes.as_slice());
Ok(DeleteBitSet {
Ok(AliveBitSet {
data: bytes,
num_deleted,
})
@@ -65,7 +63,7 @@ impl DeleteBitSet {
#[inline]
pub fn is_deleted(&self, doc: DocId) -> bool {
let data = self.data.as_slice();
BitSet::contains_from_bytes(doc, data)
!BitSet::contains_from_bytes(doc, data)
}
/// Iterate over the positions of the set elements
@@ -88,11 +86,11 @@ impl DeleteBitSet {
#[cfg(test)]
mod tests {
use super::DeleteBitSet;
use super::AliveBitSet;
#[test]
fn test_delete_bitset_empty() {
let delete_bitset = DeleteBitSet::for_test(&[], 10);
let delete_bitset = AliveBitSet::for_test(&[], 10);
for doc in 0..10 {
assert_eq!(delete_bitset.is_deleted(doc), !delete_bitset.is_alive(doc));
}
@@ -101,7 +99,7 @@ mod tests {
#[test]
fn test_delete_bitset() {
let delete_bitset = DeleteBitSet::for_test(&[1, 9], 10);
let delete_bitset = AliveBitSet::for_test(&[1, 9], 10);
assert!(delete_bitset.is_alive(0));
assert!(delete_bitset.is_deleted(1));
assert!(delete_bitset.is_alive(2));
@@ -121,7 +119,7 @@ mod tests {
#[test]
fn test_delete_bitset_iter_minimal() {
let delete_bitset = DeleteBitSet::for_test(&[7], 8);
let delete_bitset = AliveBitSet::for_test(&[7], 8);
let data: Vec<_> = delete_bitset.iter_unset().collect();
assert_eq!(data, vec![0, 1, 2, 3, 4, 5, 6]);
@@ -129,14 +127,14 @@ mod tests {
#[test]
fn test_delete_bitset_iter_small() {
let delete_bitset = DeleteBitSet::for_test(&[0, 2, 3, 6], 7);
let delete_bitset = AliveBitSet::for_test(&[0, 2, 3, 6], 7);
let data: Vec<_> = delete_bitset.iter_unset().collect();
assert_eq!(data, vec![1, 4, 5]);
}
#[test]
fn test_delete_bitset_iter() {
let delete_bitset = DeleteBitSet::for_test(&[0, 1, 1000], 1001);
let delete_bitset = AliveBitSet::for_test(&[0, 1, 1000], 1001);
let data: Vec<_> = delete_bitset.iter_unset().collect();
assert_eq!(data, (2..=999).collect::<Vec<_>>());
@@ -146,16 +144,14 @@ mod tests {
#[cfg(all(test, feature = "unstable"))]
mod bench {
use super::DeleteBitSet;
use common::BitSet;
use super::AliveBitSet;
use rand::prelude::IteratorRandom;
use rand::prelude::SliceRandom;
use rand::thread_rng;
use test::Bencher;
fn get_many_deleted() -> Vec<u32> {
fn get_alive() -> Vec<u32> {
let mut data = (0..1_000_000_u32).collect::<Vec<u32>>();
for _ in 0..(1_000_000) * 7 / 8 {
for _ in 0..(1_000_000) * 1 / 8 {
remove_rand(&mut data);
}
data
@@ -168,14 +164,14 @@ mod bench {
#[bench]
fn bench_deletebitset_iter_deser_on_fly(bench: &mut Bencher) {
let delete_bitset = DeleteBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
let delete_bitset = AliveBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
bench.iter(|| delete_bitset.iter_unset().collect::<Vec<_>>());
}
#[bench]
fn bench_deletebitset_access(bench: &mut Bencher) {
let delete_bitset = DeleteBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
let delete_bitset = AliveBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
bench.iter(|| {
(0..1_000_000_u32)
@@ -186,14 +182,14 @@ mod bench {
#[bench]
fn bench_deletebitset_iter_deser_on_fly_1_8_alive(bench: &mut Bencher) {
let delete_bitset = DeleteBitSet::for_test(&get_many_deleted(), 1_000_000);
let delete_bitset = AliveBitSet::for_test(&get_alive(), 1_000_000);
bench.iter(|| delete_bitset.iter_unset().collect::<Vec<_>>());
}
#[bench]
fn bench_deletebitset_access_1_8_alive(bench: &mut Bencher) {
let delete_bitset = DeleteBitSet::for_test(&get_many_deleted(), 1_000_000);
let delete_bitset = AliveBitSet::for_test(&get_alive(), 1_000_000);
bench.iter(|| {
(0..1_000_000_u32)

View File

@@ -23,9 +23,9 @@ values stored.
Read access performance is comparable to that of an array lookup.
*/
pub use self::alive_bitset::write_delete_bitset;
pub use self::alive_bitset::AliveBitSet;
pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
pub use self::delete::write_delete_bitset;
pub use self::delete::DeleteBitSet;
pub use self::error::{FastFieldNotAvailableError, Result};
pub use self::facet_reader::FacetReader;
pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter};
@@ -46,8 +46,8 @@ use crate::{
schema::Type,
};
mod alive_bitset;
mod bytes;
mod delete;
mod error;
mod facet_reader;
mod multivalued;

View File

@@ -114,7 +114,7 @@ fn compute_deleted_bitset(
let mut doc_matching_deleted_term = docset.doc();
while doc_matching_deleted_term != TERMINATED {
if doc_opstamps.is_deleted(doc_matching_deleted_term, delete_op.opstamp) {
delete_bitset.insert(doc_matching_deleted_term);
delete_bitset.remove(doc_matching_deleted_term);
might_have_changed = true;
}
doc_matching_deleted_term = docset.advance();
@@ -151,7 +151,7 @@ pub(crate) fn advance_deletes(
let max_doc = segment_reader.max_doc();
let mut delete_bitset: BitSet = match segment_entry.delete_bitset() {
Some(previous_delete_bitset) => (*previous_delete_bitset).clone(),
None => BitSet::with_max_value(max_doc),
None => BitSet::with_max_value_and_filled(max_doc),
};
let num_deleted_docs_before = segment.meta().num_deleted_docs();
@@ -170,12 +170,13 @@ pub(crate) fn advance_deletes(
if let Some(seg_delete_bitset) = segment_reader.delete_bitset() {
for doc in 0u32..max_doc {
if seg_delete_bitset.is_deleted(doc) {
delete_bitset.insert(doc);
delete_bitset.remove(doc);
}
}
}
let num_deleted_docs: u32 = delete_bitset.len() as u32;
let num_alive_docs: u32 = delete_bitset.num_set_bits() as u32;
let num_deleted_docs = max_doc - num_alive_docs;
if num_deleted_docs > num_deleted_docs_before {
// There are new deletes. We need to write a new delete file.
segment = segment.with_delete_meta(num_deleted_docs as u32, target_opstamp);
@@ -259,7 +260,7 @@ fn apply_deletes(
let doc_to_opstamps = DocToOpstampMapping::WithMap(doc_opstamps);
let max_doc = segment.meta().max_doc();
let mut deleted_bitset = BitSet::with_max_value(max_doc);
let mut deleted_bitset = BitSet::with_max_value_and_filled(max_doc);
let may_have_deletes = compute_deleted_bitset(
&mut deleted_bitset,
&segment_reader,

View File

@@ -1,6 +1,5 @@
use crate::error::DataCorruption;
use crate::fastfield::CompositeFastFieldSerializer;
use crate::fastfield::DeleteBitSet;
use crate::fastfield::DynamicFastFieldReader;
use crate::fastfield::FastFieldDataAccess;
use crate::fastfield::FastFieldReader;

View File

@@ -1,6 +1,6 @@
#[cfg(test)]
mod tests {
use crate::fastfield::{DeleteBitSet, FastFieldReader};
use crate::fastfield::{AliveBitSet, FastFieldReader};
use crate::schema::IndexRecordOption;
use crate::{
collector::TopDocs,
@@ -257,7 +257,7 @@ mod tests {
.unwrap();
assert_eq!(postings.doc_freq(), 2);
let fallback_bitset = DeleteBitSet::for_test(&[0], 100);
let fallback_bitset = AliveBitSet::for_test(&[0], 100);
assert_eq!(
postings.doc_freq_given_deletes(
segment_reader.delete_bitset().unwrap_or(&fallback_bitset)
@@ -336,7 +336,7 @@ mod tests {
.unwrap()
.unwrap();
assert_eq!(postings.doc_freq(), 2);
let fallback_bitset = DeleteBitSet::for_test(&[0], 100);
let fallback_bitset = AliveBitSet::for_test(&[0], 100);
assert_eq!(
postings.doc_freq_given_deletes(
segment_reader.delete_bitset().unwrap_or(&fallback_bitset)
@@ -446,7 +446,7 @@ mod tests {
.unwrap();
assert_eq!(postings.doc_freq(), 2);
let fallback_bitset = DeleteBitSet::for_test(&[0], 100);
let fallback_bitset = AliveBitSet::for_test(&[0], 100);
assert_eq!(
postings.doc_freq_given_deletes(
segment_reader.delete_bitset().unwrap_or(&fallback_bitset)

View File

@@ -1,5 +1,5 @@
use crate::docset::DocSet;
use crate::fastfield::DeleteBitSet;
use crate::fastfield::AliveBitSet;
use crate::positions::PositionReader;
use crate::postings::branchless_binary_search;
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
@@ -34,7 +34,7 @@ impl SegmentPostings {
///
/// This method will clone and scan through the posting lists.
/// (this is a rather expensive operation).
pub fn doc_freq_given_deletes(&self, delete_bitset: &DeleteBitSet) -> u32 {
pub fn doc_freq_given_deletes(&self, delete_bitset: &AliveBitSet) -> u32 {
let mut docset = self.clone();
let mut doc_freq = 0;
loop {
@@ -268,7 +268,7 @@ mod tests {
use common::HasLen;
use crate::docset::{DocSet, TERMINATED};
use crate::fastfield::DeleteBitSet;
use crate::fastfield::AliveBitSet;
use crate::postings::postings::Postings;
#[test]
@@ -296,9 +296,9 @@ mod tests {
fn test_doc_freq() {
let docs = SegmentPostings::create_from_docs(&[0, 2, 10]);
assert_eq!(docs.doc_freq(), 3);
let delete_bitset = DeleteBitSet::for_test(&[2], 12);
let delete_bitset = AliveBitSet::for_test(&[2], 12);
assert_eq!(docs.doc_freq_given_deletes(&delete_bitset), 2);
let all_deleted = DeleteBitSet::for_test(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
let all_deleted = AliveBitSet::for_test(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
assert_eq!(docs.doc_freq_given_deletes(&all_deleted), 0);
}
}

View File

@@ -90,7 +90,7 @@ impl DocSet for BitSetDocSet {
/// but we don't have access to any better
/// value.
fn size_hint(&self) -> u32 {
self.docs.len() as u32
self.docs.num_set_bits() as u32
}
}
@@ -124,7 +124,7 @@ mod tests {
for i in 0..100_000 {
assert_eq!(btreeset.contains(&i), bitset.contains(i));
}
assert_eq!(btreeset.len(), bitset.len());
assert_eq!(btreeset.len(), bitset.num_set_bits());
let mut bitset_docset = BitSetDocSet::from(bitset);
let mut remaining = true;
for el in btreeset.into_iter() {

View File

@@ -1,4 +1,4 @@
use crate::fastfield::DeleteBitSet;
use crate::fastfield::AliveBitSet;
use crate::query::explanation::does_not_match;
use crate::query::{Explanation, Query, Scorer, Weight};
use crate::{DocId, DocSet, Score, Searcher, SegmentReader, Term};
@@ -118,7 +118,7 @@ impl<S: Scorer> DocSet for BoostScorer<S> {
self.underlying.size_hint()
}
fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
fn count(&mut self, delete_bitset: &AliveBitSet) -> u32 {
self.underlying.count(delete_bitset)
}

View File

@@ -57,7 +57,7 @@ pub mod tests {
use futures::executor::block_on;
use super::*;
use crate::fastfield::DeleteBitSet;
use crate::fastfield::AliveBitSet;
use crate::schema::{self, FieldValue, TextFieldIndexing, STORED, TEXT};
use crate::schema::{Document, TextOptions};
use crate::{
@@ -113,7 +113,7 @@ pub mod tests {
fn test_doc_store_iter_with_delete_bug_1077() -> crate::Result<()> {
// this will cover deletion of the first element in a checkpoint
let deleted_docids = (200..300).collect::<Vec<_>>();
let delete_bitset = DeleteBitSet::for_test(&deleted_docids, NUM_DOCS as u32);
let delete_bitset = AliveBitSet::for_test(&deleted_docids, NUM_DOCS as u32);
let path = Path::new("store");
let directory = RamDirectory::create();

View File

@@ -5,7 +5,7 @@ use crate::schema::Document;
use crate::space_usage::StoreSpaceUsage;
use crate::store::index::Checkpoint;
use crate::DocId;
use crate::{error::DataCorruption, fastfield::DeleteBitSet};
use crate::{error::DataCorruption, fastfield::AliveBitSet};
use common::{BinarySerializable, HasLen, VInt};
use lru::LruCache;
use std::io;
@@ -136,7 +136,7 @@ impl StoreReader {
/// The delete_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
pub fn iter<'a: 'b, 'b>(
&'b self,
delete_bitset: Option<&'a DeleteBitSet>,
delete_bitset: Option<&'a AliveBitSet>,
) -> impl Iterator<Item = crate::Result<Document>> + 'b {
self.iter_raw(delete_bitset).map(|doc_bytes_res| {
let mut doc_bytes = doc_bytes_res?;
@@ -149,7 +149,7 @@ impl StoreReader {
/// The delete_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
pub(crate) fn iter_raw<'a: 'b, 'b>(
&'b self,
delete_bitset: Option<&'a DeleteBitSet>,
delete_bitset: Option<&'a AliveBitSet>,
) -> impl Iterator<Item = crate::Result<OwnedBytes>> + 'b {
let last_docid = self
.block_checkpoints()