mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-05 01:50:42 +00:00
move bitset to common crate, move composite file to directory
This commit is contained in:
@@ -10,3 +10,6 @@ description = "common traits and utility functions used by multiple tantivy subc
|
||||
|
||||
[dependencies]
|
||||
byteorder = "1.4.3"
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.8.4"
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::fmt;
|
||||
use std::u64;
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
pub(crate) struct TinySet(u64);
|
||||
pub struct TinySet(u64);
|
||||
|
||||
impl fmt::Debug for TinySet {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
@@ -178,7 +178,7 @@ impl BitSet {
|
||||
///
|
||||
/// Reminder: the tiny set with the bucket `bucket`, represents the
|
||||
/// elements from `bucket * 64` to `(bucket+1) * 64`.
|
||||
pub(crate) fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
|
||||
pub fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
|
||||
self.tinysets[bucket as usize..]
|
||||
.iter()
|
||||
.cloned()
|
||||
@@ -193,7 +193,7 @@ impl BitSet {
|
||||
/// Returns the tiny bitset representing the
|
||||
/// the set restricted to the number range from
|
||||
/// `bucket * 64` to `(bucket + 1) * 64`.
|
||||
pub(crate) fn tinyset(&self, bucket: u32) -> TinySet {
|
||||
pub fn tinyset(&self, bucket: u32) -> TinySet {
|
||||
self.tinysets[bucket as usize]
|
||||
}
|
||||
}
|
||||
@@ -203,11 +203,9 @@ mod tests {
|
||||
|
||||
use super::BitSet;
|
||||
use super::TinySet;
|
||||
use crate::docset::{DocSet, TERMINATED};
|
||||
use crate::query::BitSetDocSet;
|
||||
use crate::tests;
|
||||
use crate::tests::generate_nonunique_unsorted;
|
||||
use std::collections::BTreeSet;
|
||||
use rand::distributions::Bernoulli;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use std::collections::HashSet;
|
||||
|
||||
#[test]
|
||||
@@ -263,29 +261,6 @@ mod tests {
|
||||
test_against_hashset(&[62u32, 63u32], 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bitset_large() {
|
||||
let arr = generate_nonunique_unsorted(100_000, 5_000);
|
||||
let mut btreeset: BTreeSet<u32> = BTreeSet::new();
|
||||
let mut bitset = BitSet::with_max_value(100_000);
|
||||
for el in arr {
|
||||
btreeset.insert(el);
|
||||
bitset.insert(el);
|
||||
}
|
||||
for i in 0..100_000 {
|
||||
assert_eq!(btreeset.contains(&i), bitset.contains(i));
|
||||
}
|
||||
assert_eq!(btreeset.len(), bitset.len());
|
||||
let mut bitset_docset = BitSetDocSet::from(bitset);
|
||||
let mut remaining = true;
|
||||
for el in btreeset.into_iter() {
|
||||
assert!(remaining);
|
||||
assert_eq!(bitset_docset.doc(), el);
|
||||
remaining = bitset_docset.advance() != TERMINATED;
|
||||
}
|
||||
assert!(!remaining);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bitset_num_buckets() {
|
||||
use super::num_buckets;
|
||||
@@ -340,10 +315,23 @@ mod tests {
|
||||
assert_eq!(bitset.len(), 3);
|
||||
}
|
||||
|
||||
pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
|
||||
StdRng::from_seed([seed_val; 32])
|
||||
.sample_iter(&Bernoulli::new(ratio).unwrap())
|
||||
.take(n as usize)
|
||||
.enumerate()
|
||||
.filter_map(|(val, keep)| if keep { Some(val as u32) } else { None })
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn sample(n: u32, ratio: f64) -> Vec<u32> {
|
||||
sample_with_seed(n, ratio, 4)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bitset_clear() {
|
||||
let mut bitset = BitSet::with_max_value(1_000);
|
||||
let els = tests::sample(1_000, 0.01f64);
|
||||
let els = sample(1_000, 0.01f64);
|
||||
for &el in &els {
|
||||
bitset.insert(el);
|
||||
}
|
||||
@@ -1,9 +1,11 @@
|
||||
pub use byteorder::LittleEndian as Endianness;
|
||||
|
||||
mod bitset;
|
||||
mod serialize;
|
||||
mod vint;
|
||||
mod writer;
|
||||
|
||||
pub use bitset::*;
|
||||
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
|
||||
pub use vint::{read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt};
|
||||
pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};
|
||||
|
||||
@@ -1,9 +1,3 @@
|
||||
mod bitset;
|
||||
mod composite_file;
|
||||
|
||||
pub use self::bitset::BitSet;
|
||||
pub(crate) use self::bitset::TinySet;
|
||||
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
|
||||
pub use byteorder::LittleEndian as Endianness;
|
||||
pub use common::CountingWriter;
|
||||
pub use common::{
|
||||
|
||||
@@ -2,7 +2,9 @@ use crate::core::InvertedIndexReader;
|
||||
use crate::core::Segment;
|
||||
use crate::core::SegmentComponent;
|
||||
use crate::core::SegmentId;
|
||||
use crate::directory::CompositeFile;
|
||||
use crate::directory::FileSlice;
|
||||
use crate::error::DataCorruption;
|
||||
use crate::fastfield::DeleteBitSet;
|
||||
use crate::fastfield::FacetReader;
|
||||
use crate::fastfield::FastFieldReaders;
|
||||
@@ -14,7 +16,6 @@ use crate::space_usage::SegmentSpaceUsage;
|
||||
use crate::store::StoreReader;
|
||||
use crate::termdict::TermDictionary;
|
||||
use crate::DocId;
|
||||
use crate::{common::CompositeFile, error::DataCorruption};
|
||||
use fail::fail_point;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::common::BinarySerializable;
|
||||
use crate::common::CountingWriter;
|
||||
use crate::common::HasLen;
|
||||
use crate::common::VInt;
|
||||
use crate::directory::FileSlice;
|
||||
use crate::directory::{TerminatingWrite, WritePtr};
|
||||
@@ -11,8 +12,6 @@ use std::io::{self, Read, Write};
|
||||
use std::iter::ExactSizeIterator;
|
||||
use std::ops::Range;
|
||||
|
||||
use super::HasLen;
|
||||
|
||||
#[derive(Eq, PartialEq, Hash, Copy, Ord, PartialOrd, Clone, Debug)]
|
||||
pub struct FileAddr {
|
||||
field: Field,
|
||||
@@ -20,6 +20,9 @@ mod watch_event_router;
|
||||
/// Errors specific to the directory module.
|
||||
pub mod error;
|
||||
|
||||
mod composite_file;
|
||||
|
||||
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
|
||||
pub use self::directory::DirectoryLock;
|
||||
pub use self::directory::{Directory, DirectoryClone};
|
||||
pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use crate::common::{BitSet, HasLen};
|
||||
use crate::common::HasLen;
|
||||
use common::BitSet;
|
||||
use crate::directory::FileSlice;
|
||||
use crate::directory::OwnedBytes;
|
||||
use crate::directory::WritePtr;
|
||||
|
||||
@@ -213,7 +213,7 @@ fn value_to_u64(value: &Value) -> u64 {
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::common::CompositeFile;
|
||||
use crate::directory::CompositeFile;
|
||||
use crate::common::HasLen;
|
||||
use crate::directory::{Directory, RamDirectory, WritePtr};
|
||||
use crate::merge_policy::NoMergePolicy;
|
||||
@@ -588,7 +588,7 @@ mod bench {
|
||||
use super::tests::FIELD;
|
||||
use super::tests::{generate_permutation, SCHEMA};
|
||||
use super::*;
|
||||
use crate::common::CompositeFile;
|
||||
use crate::directory::CompositeFile;
|
||||
use crate::directory::{Directory, RamDirectory, WritePtr};
|
||||
use crate::fastfield::FastFieldReader;
|
||||
use std::collections::HashMap;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use super::FastValue;
|
||||
use crate::common::BinarySerializable;
|
||||
use crate::common::CompositeFile;
|
||||
use crate::directory::CompositeFile;
|
||||
use crate::directory::FileSlice;
|
||||
use crate::directory::OwnedBytes;
|
||||
use crate::directory::{Directory, RamDirectory, WritePtr};
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::common::CompositeFile;
|
||||
use crate::directory::CompositeFile;
|
||||
use crate::directory::FileSlice;
|
||||
use crate::fastfield::MultiValuedFastFieldReader;
|
||||
use crate::fastfield::{BitpackedFastFieldReader, FastFieldNotAvailableError};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::common::BinarySerializable;
|
||||
use crate::common::CompositeWrite;
|
||||
use crate::directory::CompositeWrite;
|
||||
use crate::common::CountingWriter;
|
||||
use crate::directory::WritePtr;
|
||||
use crate::schema::Field;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use super::{fieldnorm_to_id, id_to_fieldnorm};
|
||||
use crate::common::CompositeFile;
|
||||
use crate::directory::CompositeFile;
|
||||
use crate::directory::FileSlice;
|
||||
use crate::directory::OwnedBytes;
|
||||
use crate::schema::Field;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::common::CompositeWrite;
|
||||
use crate::directory::CompositeWrite;
|
||||
use crate::directory::WritePtr;
|
||||
use crate::schema::Field;
|
||||
use std::io;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use super::operation::{AddOperation, UserOperation};
|
||||
use super::segment_updater::SegmentUpdater;
|
||||
use super::PreparedCommit;
|
||||
use crate::common::BitSet;
|
||||
use common::BitSet;
|
||||
use crate::core::Index;
|
||||
use crate::core::Segment;
|
||||
use crate::core::SegmentComponent;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::common::BitSet;
|
||||
use crate::core::SegmentId;
|
||||
use crate::core::SegmentMeta;
|
||||
use crate::indexer::delete_queue::DeleteCursor;
|
||||
use common::BitSet;
|
||||
use std::fmt;
|
||||
|
||||
/// A segment entry describes the state of
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use super::TermInfo;
|
||||
use crate::common::CountingWriter;
|
||||
use crate::common::{BinarySerializable, VInt};
|
||||
use crate::common::{CompositeWrite, CountingWriter};
|
||||
use crate::core::Segment;
|
||||
use crate::directory::CompositeWrite;
|
||||
use crate::directory::WritePtr;
|
||||
use crate::fieldnorm::FieldNormReader;
|
||||
use crate::positions::PositionSerializer;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::common::BitSet;
|
||||
use common::BitSet;
|
||||
use crate::core::SegmentReader;
|
||||
use crate::query::ConstScorer;
|
||||
use crate::query::{BitSetDocSet, Explanation};
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::common::{BitSet, TinySet};
|
||||
use crate::docset::{DocSet, TERMINATED};
|
||||
use crate::DocId;
|
||||
use common::{BitSet, TinySet};
|
||||
|
||||
/// A `BitSetDocSet` makes it possible to iterate through a bitset as if it was a `DocSet`.
|
||||
///
|
||||
@@ -96,10 +96,13 @@ impl DocSet for BitSetDocSet {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use super::BitSetDocSet;
|
||||
use crate::common::BitSet;
|
||||
use crate::docset::{DocSet, TERMINATED};
|
||||
use crate::tests::generate_nonunique_unsorted;
|
||||
use crate::DocId;
|
||||
use common::BitSet;
|
||||
|
||||
fn create_docbitset(docs: &[DocId], max_doc: DocId) -> BitSetDocSet {
|
||||
let mut docset = BitSet::with_max_value(max_doc);
|
||||
@@ -109,6 +112,29 @@ mod tests {
|
||||
BitSetDocSet::from(docset)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bitset_large() {
|
||||
let arr = generate_nonunique_unsorted(100_000, 5_000);
|
||||
let mut btreeset: BTreeSet<u32> = BTreeSet::new();
|
||||
let mut bitset = BitSet::with_max_value(100_000);
|
||||
for el in arr {
|
||||
btreeset.insert(el);
|
||||
bitset.insert(el);
|
||||
}
|
||||
for i in 0..100_000 {
|
||||
assert_eq!(btreeset.contains(&i), bitset.contains(i));
|
||||
}
|
||||
assert_eq!(btreeset.len(), bitset.len());
|
||||
let mut bitset_docset = BitSetDocSet::from(bitset);
|
||||
let mut remaining = true;
|
||||
for el in btreeset.into_iter() {
|
||||
assert!(remaining);
|
||||
assert_eq!(bitset_docset.doc(), el);
|
||||
remaining = bitset_docset.advance() != TERMINATED;
|
||||
}
|
||||
assert!(!remaining);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty() {
|
||||
let bitset = BitSet::with_max_value(1000);
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use crate::common::BitSet;
|
||||
use crate::core::Searcher;
|
||||
use crate::core::SegmentReader;
|
||||
use crate::error::TantivyError;
|
||||
@@ -10,6 +9,7 @@ use crate::schema::Type;
|
||||
use crate::schema::{Field, IndexRecordOption, Term};
|
||||
use crate::termdict::{TermDictionary, TermStreamer};
|
||||
use crate::{DocId, Score};
|
||||
use common::BitSet;
|
||||
use std::io;
|
||||
use std::ops::{Bound, Range};
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use crate::common::TinySet;
|
||||
use crate::docset::{DocSet, TERMINATED};
|
||||
use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner};
|
||||
use crate::query::Scorer;
|
||||
use crate::DocId;
|
||||
use crate::Score;
|
||||
use common::TinySet;
|
||||
|
||||
const HORIZON_NUM_TINYBITSETS: usize = 64;
|
||||
const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32;
|
||||
|
||||
Reference in New Issue
Block a user