move bitset to common crate, move composite file to directory

This commit is contained in:
Pascal Seitz
2021-08-19 17:45:09 +01:00
parent 483e0336b6
commit ee0881712a
21 changed files with 75 additions and 57 deletions

View File

@@ -10,3 +10,6 @@ description = "common traits and utility functions used by multiple tantivy subc
[dependencies]
byteorder = "1.4.3"
[dev-dependencies]
rand = "0.8.4"

View File

@@ -2,7 +2,7 @@ use std::fmt;
use std::u64;
#[derive(Clone, Copy, Eq, PartialEq)]
pub(crate) struct TinySet(u64);
pub struct TinySet(u64);
impl fmt::Debug for TinySet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -178,7 +178,7 @@ impl BitSet {
///
/// Reminder: the tiny set with the bucket `bucket`, represents the
/// elements from `bucket * 64` to `(bucket+1) * 64`.
pub(crate) fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
pub fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
self.tinysets[bucket as usize..]
.iter()
.cloned()
@@ -193,7 +193,7 @@ impl BitSet {
/// Returns the tiny bitset representing the
/// the set restricted to the number range from
/// `bucket * 64` to `(bucket + 1) * 64`.
pub(crate) fn tinyset(&self, bucket: u32) -> TinySet {
pub fn tinyset(&self, bucket: u32) -> TinySet {
self.tinysets[bucket as usize]
}
}
@@ -203,11 +203,9 @@ mod tests {
use super::BitSet;
use super::TinySet;
use crate::docset::{DocSet, TERMINATED};
use crate::query::BitSetDocSet;
use crate::tests;
use crate::tests::generate_nonunique_unsorted;
use std::collections::BTreeSet;
use rand::distributions::Bernoulli;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use std::collections::HashSet;
#[test]
@@ -263,29 +261,6 @@ mod tests {
test_against_hashset(&[62u32, 63u32], 64);
}
#[test]
fn test_bitset_large() {
let arr = generate_nonunique_unsorted(100_000, 5_000);
let mut btreeset: BTreeSet<u32> = BTreeSet::new();
let mut bitset = BitSet::with_max_value(100_000);
for el in arr {
btreeset.insert(el);
bitset.insert(el);
}
for i in 0..100_000 {
assert_eq!(btreeset.contains(&i), bitset.contains(i));
}
assert_eq!(btreeset.len(), bitset.len());
let mut bitset_docset = BitSetDocSet::from(bitset);
let mut remaining = true;
for el in btreeset.into_iter() {
assert!(remaining);
assert_eq!(bitset_docset.doc(), el);
remaining = bitset_docset.advance() != TERMINATED;
}
assert!(!remaining);
}
#[test]
fn test_bitset_num_buckets() {
use super::num_buckets;
@@ -340,10 +315,23 @@ mod tests {
assert_eq!(bitset.len(), 3);
}
pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
StdRng::from_seed([seed_val; 32])
.sample_iter(&Bernoulli::new(ratio).unwrap())
.take(n as usize)
.enumerate()
.filter_map(|(val, keep)| if keep { Some(val as u32) } else { None })
.collect()
}
pub fn sample(n: u32, ratio: f64) -> Vec<u32> {
sample_with_seed(n, ratio, 4)
}
#[test]
fn test_bitset_clear() {
let mut bitset = BitSet::with_max_value(1_000);
let els = tests::sample(1_000, 0.01f64);
let els = sample(1_000, 0.01f64);
for &el in &els {
bitset.insert(el);
}

View File

@@ -1,9 +1,11 @@
pub use byteorder::LittleEndian as Endianness;
mod bitset;
mod serialize;
mod vint;
mod writer;
pub use bitset::*;
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
pub use vint::{read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt};
pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};

View File

@@ -1,9 +1,3 @@
mod bitset;
mod composite_file;
pub use self::bitset::BitSet;
pub(crate) use self::bitset::TinySet;
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
pub use byteorder::LittleEndian as Endianness;
pub use common::CountingWriter;
pub use common::{

View File

@@ -2,7 +2,9 @@ use crate::core::InvertedIndexReader;
use crate::core::Segment;
use crate::core::SegmentComponent;
use crate::core::SegmentId;
use crate::directory::CompositeFile;
use crate::directory::FileSlice;
use crate::error::DataCorruption;
use crate::fastfield::DeleteBitSet;
use crate::fastfield::FacetReader;
use crate::fastfield::FastFieldReaders;
@@ -14,7 +16,6 @@ use crate::space_usage::SegmentSpaceUsage;
use crate::store::StoreReader;
use crate::termdict::TermDictionary;
use crate::DocId;
use crate::{common::CompositeFile, error::DataCorruption};
use fail::fail_point;
use std::fmt;
use std::sync::Arc;

View File

@@ -1,5 +1,6 @@
use crate::common::BinarySerializable;
use crate::common::CountingWriter;
use crate::common::HasLen;
use crate::common::VInt;
use crate::directory::FileSlice;
use crate::directory::{TerminatingWrite, WritePtr};
@@ -11,8 +12,6 @@ use std::io::{self, Read, Write};
use std::iter::ExactSizeIterator;
use std::ops::Range;
use super::HasLen;
#[derive(Eq, PartialEq, Hash, Copy, Ord, PartialOrd, Clone, Debug)]
pub struct FileAddr {
field: Field,

View File

@@ -20,6 +20,9 @@ mod watch_event_router;
/// Errors specific to the directory module.
pub mod error;
mod composite_file;
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
pub use self::directory::DirectoryLock;
pub use self::directory::{Directory, DirectoryClone};
pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};

View File

@@ -1,4 +1,5 @@
use crate::common::{BitSet, HasLen};
use crate::common::HasLen;
use common::BitSet;
use crate::directory::FileSlice;
use crate::directory::OwnedBytes;
use crate::directory::WritePtr;

View File

@@ -213,7 +213,7 @@ fn value_to_u64(value: &Value) -> u64 {
mod tests {
use super::*;
use crate::common::CompositeFile;
use crate::directory::CompositeFile;
use crate::common::HasLen;
use crate::directory::{Directory, RamDirectory, WritePtr};
use crate::merge_policy::NoMergePolicy;
@@ -588,7 +588,7 @@ mod bench {
use super::tests::FIELD;
use super::tests::{generate_permutation, SCHEMA};
use super::*;
use crate::common::CompositeFile;
use crate::directory::CompositeFile;
use crate::directory::{Directory, RamDirectory, WritePtr};
use crate::fastfield::FastFieldReader;
use std::collections::HashMap;

View File

@@ -1,6 +1,6 @@
use super::FastValue;
use crate::common::BinarySerializable;
use crate::common::CompositeFile;
use crate::directory::CompositeFile;
use crate::directory::FileSlice;
use crate::directory::OwnedBytes;
use crate::directory::{Directory, RamDirectory, WritePtr};

View File

@@ -1,4 +1,4 @@
use crate::common::CompositeFile;
use crate::directory::CompositeFile;
use crate::directory::FileSlice;
use crate::fastfield::MultiValuedFastFieldReader;
use crate::fastfield::{BitpackedFastFieldReader, FastFieldNotAvailableError};

View File

@@ -1,5 +1,5 @@
use crate::common::BinarySerializable;
use crate::common::CompositeWrite;
use crate::directory::CompositeWrite;
use crate::common::CountingWriter;
use crate::directory::WritePtr;
use crate::schema::Field;

View File

@@ -1,5 +1,5 @@
use super::{fieldnorm_to_id, id_to_fieldnorm};
use crate::common::CompositeFile;
use crate::directory::CompositeFile;
use crate::directory::FileSlice;
use crate::directory::OwnedBytes;
use crate::schema::Field;

View File

@@ -1,4 +1,4 @@
use crate::common::CompositeWrite;
use crate::directory::CompositeWrite;
use crate::directory::WritePtr;
use crate::schema::Field;
use std::io;

View File

@@ -1,7 +1,7 @@
use super::operation::{AddOperation, UserOperation};
use super::segment_updater::SegmentUpdater;
use super::PreparedCommit;
use crate::common::BitSet;
use common::BitSet;
use crate::core::Index;
use crate::core::Segment;
use crate::core::SegmentComponent;

View File

@@ -1,7 +1,7 @@
use crate::common::BitSet;
use crate::core::SegmentId;
use crate::core::SegmentMeta;
use crate::indexer::delete_queue::DeleteCursor;
use common::BitSet;
use std::fmt;
/// A segment entry describes the state of

View File

@@ -1,7 +1,8 @@
use super::TermInfo;
use crate::common::CountingWriter;
use crate::common::{BinarySerializable, VInt};
use crate::common::{CompositeWrite, CountingWriter};
use crate::core::Segment;
use crate::directory::CompositeWrite;
use crate::directory::WritePtr;
use crate::fieldnorm::FieldNormReader;
use crate::positions::PositionSerializer;

View File

@@ -1,4 +1,4 @@
use crate::common::BitSet;
use common::BitSet;
use crate::core::SegmentReader;
use crate::query::ConstScorer;
use crate::query::{BitSetDocSet, Explanation};

View File

@@ -1,6 +1,6 @@
use crate::common::{BitSet, TinySet};
use crate::docset::{DocSet, TERMINATED};
use crate::DocId;
use common::{BitSet, TinySet};
/// A `BitSetDocSet` makes it possible to iterate through a bitset as if it was a `DocSet`.
///
@@ -96,10 +96,13 @@ impl DocSet for BitSetDocSet {
#[cfg(test)]
mod tests {
use std::collections::BTreeSet;
use super::BitSetDocSet;
use crate::common::BitSet;
use crate::docset::{DocSet, TERMINATED};
use crate::tests::generate_nonunique_unsorted;
use crate::DocId;
use common::BitSet;
fn create_docbitset(docs: &[DocId], max_doc: DocId) -> BitSetDocSet {
let mut docset = BitSet::with_max_value(max_doc);
@@ -109,6 +112,29 @@ mod tests {
BitSetDocSet::from(docset)
}
#[test]
fn test_bitset_large() {
let arr = generate_nonunique_unsorted(100_000, 5_000);
let mut btreeset: BTreeSet<u32> = BTreeSet::new();
let mut bitset = BitSet::with_max_value(100_000);
for el in arr {
btreeset.insert(el);
bitset.insert(el);
}
for i in 0..100_000 {
assert_eq!(btreeset.contains(&i), bitset.contains(i));
}
assert_eq!(btreeset.len(), bitset.len());
let mut bitset_docset = BitSetDocSet::from(bitset);
let mut remaining = true;
for el in btreeset.into_iter() {
assert!(remaining);
assert_eq!(bitset_docset.doc(), el);
remaining = bitset_docset.advance() != TERMINATED;
}
assert!(!remaining);
}
#[test]
fn test_empty() {
let bitset = BitSet::with_max_value(1000);

View File

@@ -1,4 +1,3 @@
use crate::common::BitSet;
use crate::core::Searcher;
use crate::core::SegmentReader;
use crate::error::TantivyError;
@@ -10,6 +9,7 @@ use crate::schema::Type;
use crate::schema::{Field, IndexRecordOption, Term};
use crate::termdict::{TermDictionary, TermStreamer};
use crate::{DocId, Score};
use common::BitSet;
use std::io;
use std::ops::{Bound, Range};

View File

@@ -1,9 +1,9 @@
use crate::common::TinySet;
use crate::docset::{DocSet, TERMINATED};
use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner};
use crate::query::Scorer;
use crate::DocId;
use crate::Score;
use common::TinySet;
const HORIZON_NUM_TINYBITSETS: usize = 64;
const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32;