diff --git a/bitpacker/src/lib.rs b/bitpacker/src/lib.rs index 1697a8488..141fe66a5 100644 --- a/bitpacker/src/lib.rs +++ b/bitpacker/src/lib.rs @@ -50,3 +50,32 @@ where } None } + +#[test] +fn test_compute_num_bits() { + assert_eq!(compute_num_bits(1), 1u8); + assert_eq!(compute_num_bits(0), 0u8); + assert_eq!(compute_num_bits(2), 2u8); + assert_eq!(compute_num_bits(3), 2u8); + assert_eq!(compute_num_bits(4), 3u8); + assert_eq!(compute_num_bits(255), 8u8); + assert_eq!(compute_num_bits(256), 9u8); + assert_eq!(compute_num_bits(5_000_000_000), 33u8); +} + +#[test] +fn test_minmax_empty() { + let vals: Vec = vec![]; + assert_eq!(minmax(vals.into_iter()), None); +} + +#[test] +fn test_minmax_one() { + assert_eq!(minmax(vec![1].into_iter()), Some((1, 1))); +} + +#[test] +fn test_minmax_two() { + assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2))); + assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2))); +} diff --git a/common/Cargo.toml b/common/Cargo.toml index d262da05e..94b40a459 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -10,3 +10,7 @@ description = "common traits and utility functions used by multiple tantivy subc [dependencies] byteorder = "1.4.3" + +[dev-dependencies] +proptest = "1.0.0" +rand = "0.8.4" diff --git a/src/common/bitset.rs b/common/src/bitset.rs similarity index 89% rename from src/common/bitset.rs rename to common/src/bitset.rs index 0eb4d8da6..942a94269 100644 --- a/src/common/bitset.rs +++ b/common/src/bitset.rs @@ -2,7 +2,7 @@ use std::fmt; use std::u64; #[derive(Clone, Copy, Eq, PartialEq)] -pub(crate) struct TinySet(u64); +pub struct TinySet(u64); impl fmt::Debug for TinySet { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -178,7 +178,7 @@ impl BitSet { /// /// Reminder: the tiny set with the bucket `bucket`, represents the /// elements from `bucket * 64` to `(bucket+1) * 64`. - pub(crate) fn first_non_empty_bucket(&self, bucket: u32) -> Option { + pub fn first_non_empty_bucket(&self, bucket: u32) -> Option { self.tinysets[bucket as usize..] .iter() .cloned() @@ -193,7 +193,7 @@ impl BitSet { /// Returns the tiny bitset representing the /// the set restricted to the number range from /// `bucket * 64` to `(bucket + 1) * 64`. - pub(crate) fn tinyset(&self, bucket: u32) -> TinySet { + pub fn tinyset(&self, bucket: u32) -> TinySet { self.tinysets[bucket as usize] } } @@ -203,11 +203,9 @@ mod tests { use super::BitSet; use super::TinySet; - use crate::docset::{DocSet, TERMINATED}; - use crate::query::BitSetDocSet; - use crate::tests; - use crate::tests::generate_nonunique_unsorted; - use std::collections::BTreeSet; + use rand::distributions::Bernoulli; + use rand::rngs::StdRng; + use rand::{Rng, SeedableRng}; use std::collections::HashSet; #[test] @@ -263,29 +261,6 @@ mod tests { test_against_hashset(&[62u32, 63u32], 64); } - #[test] - fn test_bitset_large() { - let arr = generate_nonunique_unsorted(100_000, 5_000); - let mut btreeset: BTreeSet = BTreeSet::new(); - let mut bitset = BitSet::with_max_value(100_000); - for el in arr { - btreeset.insert(el); - bitset.insert(el); - } - for i in 0..100_000 { - assert_eq!(btreeset.contains(&i), bitset.contains(i)); - } - assert_eq!(btreeset.len(), bitset.len()); - let mut bitset_docset = BitSetDocSet::from(bitset); - let mut remaining = true; - for el in btreeset.into_iter() { - assert!(remaining); - assert_eq!(bitset_docset.doc(), el); - remaining = bitset_docset.advance() != TERMINATED; - } - assert!(!remaining); - } - #[test] fn test_bitset_num_buckets() { use super::num_buckets; @@ -340,10 +315,23 @@ mod tests { assert_eq!(bitset.len(), 3); } + pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec { + StdRng::from_seed([seed_val; 32]) + .sample_iter(&Bernoulli::new(ratio).unwrap()) + .take(n as usize) + .enumerate() + .filter_map(|(val, keep)| if keep { Some(val as u32) } else { None }) + .collect() + } + + pub fn sample(n: u32, ratio: f64) -> Vec { + sample_with_seed(n, ratio, 4) + } + #[test] fn test_bitset_clear() { let mut bitset = BitSet::with_max_value(1_000); - let els = tests::sample(1_000, 0.01f64); + let els = sample(1_000, 0.01f64); for &el in &els { bitset.insert(el); } diff --git a/common/src/lib.rs b/common/src/lib.rs index b3c24163b..ef95ce659 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -1,9 +1,167 @@ +use std::ops::Deref; + pub use byteorder::LittleEndian as Endianness; +mod bitset; mod serialize; mod vint; mod writer; +pub use bitset::*; pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize}; pub use vint::{read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt}; pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite}; + +/// Has length trait +pub trait HasLen { + /// Return length + fn len(&self) -> usize; + + /// Returns true iff empty. + fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl> HasLen for T { + fn len(&self) -> usize { + self.deref().len() + } +} + +const HIGHEST_BIT: u64 = 1 << 63; + +/// Maps a `i64` to `u64` +/// +/// For simplicity, tantivy internally handles `i64` as `u64`. +/// The mapping is defined by this function. +/// +/// Maps `i64` to `u64` so that +/// `-2^63 .. 2^63-1` is mapped +/// to +/// `0 .. 2^64-1` +/// in that order. +/// +/// This is more suited than simply casting (`val as u64`) +/// because of bitpacking. +/// +/// Imagine a list of `i64` ranging from -10 to 10. +/// When casting negative values, the negative values are projected +/// to values over 2^63, and all values end up requiring 64 bits. +/// +/// # See also +/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html). +#[inline] +pub fn i64_to_u64(val: i64) -> u64 { + (val as u64) ^ HIGHEST_BIT +} + +/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html). +#[inline] +pub fn u64_to_i64(val: u64) -> i64 { + (val ^ HIGHEST_BIT) as i64 +} + +/// Maps a `f64` to `u64` +/// +/// For simplicity, tantivy internally handles `f64` as `u64`. +/// The mapping is defined by this function. +/// +/// Maps `f64` to `u64` in a monotonic manner, so that bytes lexical order is preserved. +/// +/// This is more suited than simply casting (`val as u64`) +/// which would truncate the result +/// +/// # Reference +/// +/// Daniel Lemire's [blog post](https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order/) +/// explains the mapping in a clear manner. +/// +/// # See also +/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html). +#[inline] +pub fn f64_to_u64(val: f64) -> u64 { + let bits = val.to_bits(); + if val.is_sign_positive() { + bits ^ HIGHEST_BIT + } else { + !bits + } +} + +/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html). +#[inline] +pub fn u64_to_f64(val: u64) -> f64 { + f64::from_bits(if val & HIGHEST_BIT != 0 { + val ^ HIGHEST_BIT + } else { + !val + }) +} + +#[cfg(test)] +pub mod test { + + use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64}; + use super::{BinarySerializable, FixedSize}; + use proptest::prelude::*; + use std::f64; + + fn test_i64_converter_helper(val: i64) { + assert_eq!(u64_to_i64(i64_to_u64(val)), val); + } + + fn test_f64_converter_helper(val: f64) { + assert_eq!(u64_to_f64(f64_to_u64(val)), val); + } + + pub fn fixed_size_test() { + let mut buffer = Vec::new(); + O::default().serialize(&mut buffer).unwrap(); + assert_eq!(buffer.len(), O::SIZE_IN_BYTES); + } + + proptest! { + #[test] + fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) { + let left_u64 = f64_to_u64(left); + let right_u64 = f64_to_u64(right); + assert_eq!(left_u64 < right_u64, left < right); + } + } + + #[test] + fn test_i64_converter() { + assert_eq!(i64_to_u64(i64::min_value()), u64::min_value()); + assert_eq!(i64_to_u64(i64::max_value()), u64::max_value()); + test_i64_converter_helper(0i64); + test_i64_converter_helper(i64::min_value()); + test_i64_converter_helper(i64::max_value()); + for i in -1000i64..1000i64 { + test_i64_converter_helper(i); + } + } + + #[test] + fn test_f64_converter() { + test_f64_converter_helper(f64::INFINITY); + test_f64_converter_helper(f64::NEG_INFINITY); + test_f64_converter_helper(0.0); + test_f64_converter_helper(-0.0); + test_f64_converter_helper(1.0); + test_f64_converter_helper(-1.0); + } + + #[test] + fn test_f64_order() { + assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)) + .contains(&f64_to_u64(f64::NAN))); //nan is not a number + assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa + assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent + assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa + assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg + assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0)); + assert!(f64_to_u64(-2.0) < f64_to_u64(1.0)); + assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5)); + } +} diff --git a/src/common/mod.rs b/src/common/mod.rs deleted file mode 100644 index b82b352f8..000000000 --- a/src/common/mod.rs +++ /dev/null @@ -1,203 +0,0 @@ -mod bitset; -mod composite_file; - -pub use self::bitset::BitSet; -pub(crate) use self::bitset::TinySet; -pub(crate) use self::composite_file::{CompositeFile, CompositeWrite}; -pub use byteorder::LittleEndian as Endianness; -pub use common::CountingWriter; -pub use common::{ - read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt, -}; -pub use common::{BinarySerializable, DeserializeFrom, FixedSize}; - -/// Segment's max doc must be `< MAX_DOC_LIMIT`. -/// -/// We do not allow segments with more than -pub const MAX_DOC_LIMIT: u32 = 1 << 31; - -/// Has length trait -pub trait HasLen { - /// Return length - fn len(&self) -> usize; - - /// Returns true iff empty. - fn is_empty(&self) -> bool { - self.len() == 0 - } -} - -const HIGHEST_BIT: u64 = 1 << 63; - -/// Maps a `i64` to `u64` -/// -/// For simplicity, tantivy internally handles `i64` as `u64`. -/// The mapping is defined by this function. -/// -/// Maps `i64` to `u64` so that -/// `-2^63 .. 2^63-1` is mapped -/// to -/// `0 .. 2^64-1` -/// in that order. -/// -/// This is more suited than simply casting (`val as u64`) -/// because of bitpacking. -/// -/// Imagine a list of `i64` ranging from -10 to 10. -/// When casting negative values, the negative values are projected -/// to values over 2^63, and all values end up requiring 64 bits. -/// -/// # See also -/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html). -#[inline] -pub fn i64_to_u64(val: i64) -> u64 { - (val as u64) ^ HIGHEST_BIT -} - -/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html). -#[inline] -pub fn u64_to_i64(val: u64) -> i64 { - (val ^ HIGHEST_BIT) as i64 -} - -/// Maps a `f64` to `u64` -/// -/// For simplicity, tantivy internally handles `f64` as `u64`. -/// The mapping is defined by this function. -/// -/// Maps `f64` to `u64` in a monotonic manner, so that bytes lexical order is preserved. -/// -/// This is more suited than simply casting (`val as u64`) -/// which would truncate the result -/// -/// # Reference -/// -/// Daniel Lemire's [blog post](https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order/) -/// explains the mapping in a clear manner. -/// -/// # See also -/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html). -#[inline] -pub fn f64_to_u64(val: f64) -> u64 { - let bits = val.to_bits(); - if val.is_sign_positive() { - bits ^ HIGHEST_BIT - } else { - !bits - } -} - -/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html). -#[inline] -pub fn u64_to_f64(val: u64) -> f64 { - f64::from_bits(if val & HIGHEST_BIT != 0 { - val ^ HIGHEST_BIT - } else { - !val - }) -} - -#[cfg(test)] -pub(crate) mod test { - - use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64}; - use common::{BinarySerializable, FixedSize}; - use proptest::prelude::*; - use std::f64; - use tantivy_bitpacker::compute_num_bits; - pub use tantivy_bitpacker::minmax; - - fn test_i64_converter_helper(val: i64) { - assert_eq!(u64_to_i64(i64_to_u64(val)), val); - } - - fn test_f64_converter_helper(val: f64) { - assert_eq!(u64_to_f64(f64_to_u64(val)), val); - } - - pub fn fixed_size_test() { - let mut buffer = Vec::new(); - O::default().serialize(&mut buffer).unwrap(); - assert_eq!(buffer.len(), O::SIZE_IN_BYTES); - } - - proptest! { - #[test] - fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) { - let left_u64 = f64_to_u64(left); - let right_u64 = f64_to_u64(right); - assert_eq!(left_u64 < right_u64, left < right); - } - } - - #[test] - fn test_i64_converter() { - assert_eq!(i64_to_u64(i64::min_value()), u64::min_value()); - assert_eq!(i64_to_u64(i64::max_value()), u64::max_value()); - test_i64_converter_helper(0i64); - test_i64_converter_helper(i64::min_value()); - test_i64_converter_helper(i64::max_value()); - for i in -1000i64..1000i64 { - test_i64_converter_helper(i); - } - } - - #[test] - fn test_f64_converter() { - test_f64_converter_helper(f64::INFINITY); - test_f64_converter_helper(f64::NEG_INFINITY); - test_f64_converter_helper(0.0); - test_f64_converter_helper(-0.0); - test_f64_converter_helper(1.0); - test_f64_converter_helper(-1.0); - } - - #[test] - fn test_f64_order() { - assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)) - .contains(&f64_to_u64(f64::NAN))); //nan is not a number - assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa - assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent - assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa - assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg - assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0)); - assert!(f64_to_u64(-2.0) < f64_to_u64(1.0)); - assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5)); - } - - #[test] - fn test_compute_num_bits() { - assert_eq!(compute_num_bits(1), 1u8); - assert_eq!(compute_num_bits(0), 0u8); - assert_eq!(compute_num_bits(2), 2u8); - assert_eq!(compute_num_bits(3), 2u8); - assert_eq!(compute_num_bits(4), 3u8); - assert_eq!(compute_num_bits(255), 8u8); - assert_eq!(compute_num_bits(256), 9u8); - assert_eq!(compute_num_bits(5_000_000_000), 33u8); - } - - #[test] - fn test_max_doc() { - // this is the first time I write a unit test for a constant. - assert!(((super::MAX_DOC_LIMIT - 1) as i32) >= 0); - assert!((super::MAX_DOC_LIMIT as i32) < 0); - } - - #[test] - fn test_minmax_empty() { - let vals: Vec = vec![]; - assert_eq!(minmax(vals.into_iter()), None); - } - - #[test] - fn test_minmax_one() { - assert_eq!(minmax(vec![1].into_iter()), Some((1, 1))); - } - - #[test] - fn test_minmax_two() { - assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2))); - assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2))); - } -} diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index 3371de1c3..e710e8ff9 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -1,6 +1,5 @@ use std::io; -use crate::common::BinarySerializable; use crate::directory::FileSlice; use crate::positions::PositionReader; use crate::postings::TermInfo; @@ -8,6 +7,7 @@ use crate::postings::{BlockSegmentPostings, SegmentPostings}; use crate::schema::IndexRecordOption; use crate::schema::Term; use crate::termdict::TermDictionary; +use common::BinarySerializable; /// The inverted index reader is in charge of accessing /// the inverted index associated to a specific field. diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 27110ce31..73de5fb4c 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -2,7 +2,9 @@ use crate::core::InvertedIndexReader; use crate::core::Segment; use crate::core::SegmentComponent; use crate::core::SegmentId; +use crate::directory::CompositeFile; use crate::directory::FileSlice; +use crate::error::DataCorruption; use crate::fastfield::DeleteBitSet; use crate::fastfield::FacetReader; use crate::fastfield::FastFieldReaders; @@ -14,7 +16,6 @@ use crate::space_usage::SegmentSpaceUsage; use crate::store::StoreReader; use crate::termdict::TermDictionary; use crate::DocId; -use crate::{common::CompositeFile, error::DataCorruption}; use fail::fail_point; use std::fmt; use std::sync::Arc; diff --git a/src/common/composite_file.rs b/src/directory/composite_file.rs similarity index 97% rename from src/common/composite_file.rs rename to src/directory/composite_file.rs index babee87d1..6d542609b 100644 --- a/src/common/composite_file.rs +++ b/src/directory/composite_file.rs @@ -1,18 +1,17 @@ -use crate::common::BinarySerializable; -use crate::common::CountingWriter; -use crate::common::VInt; use crate::directory::FileSlice; use crate::directory::{TerminatingWrite, WritePtr}; use crate::schema::Field; use crate::space_usage::FieldUsage; use crate::space_usage::PerFieldSpaceUsage; +use common::BinarySerializable; +use common::CountingWriter; +use common::HasLen; +use common::VInt; use std::collections::HashMap; use std::io::{self, Read, Write}; use std::iter::ExactSizeIterator; use std::ops::Range; -use super::HasLen; - #[derive(Eq, PartialEq, Hash, Copy, Ord, PartialOrd, Clone, Debug)] pub struct FileAddr { field: Field, @@ -188,10 +187,10 @@ impl CompositeFile { mod test { use super::{CompositeFile, CompositeWrite}; - use crate::common::BinarySerializable; - use crate::common::VInt; use crate::directory::{Directory, RamDirectory}; use crate::schema::Field; + use common::BinarySerializable; + use common::VInt; use std::io::Write; use std::path::Path; diff --git a/src/directory/file_slice.rs b/src/directory/file_slice.rs index 1a159ebfa..cd267da16 100644 --- a/src/directory/file_slice.rs +++ b/src/directory/file_slice.rs @@ -1,7 +1,7 @@ use stable_deref_trait::StableDeref; -use crate::common::HasLen; use crate::directory::OwnedBytes; +use common::HasLen; use std::fmt; use std::ops::Range; use std::sync::{Arc, Weak}; @@ -32,12 +32,6 @@ impl FileHandle for &'static [u8] { } } -impl> HasLen for T { - fn len(&self) -> usize { - self.deref().len() - } -} - impl From for FileSlice where B: StableDeref + Deref + 'static + Send + Sync, @@ -178,7 +172,7 @@ impl HasLen for FileSlice { #[cfg(test)] mod tests { use super::{FileHandle, FileSlice}; - use crate::common::HasLen; + use common::HasLen; use std::io; #[test] diff --git a/src/directory/footer.rs b/src/directory/footer.rs index 79eaa53e9..590088791 100644 --- a/src/directory/footer.rs +++ b/src/directory/footer.rs @@ -1,10 +1,10 @@ use crate::directory::error::Incompatibility; use crate::directory::FileSlice; use crate::{ - common::{BinarySerializable, CountingWriter, DeserializeFrom, FixedSize, HasLen}, directory::{AntiCallToken, TerminatingWrite}, Version, INDEX_FORMAT_VERSION, }; +use common::{BinarySerializable, CountingWriter, DeserializeFrom, FixedSize, HasLen}; use crc32fast::Hasher; use serde::{Deserialize, Serialize}; use std::io; @@ -156,10 +156,8 @@ mod tests { use crate::directory::footer::Footer; use crate::directory::OwnedBytes; - use crate::{ - common::BinarySerializable, - directory::{footer::FOOTER_MAGIC_NUMBER, FileSlice}, - }; + use crate::directory::{footer::FOOTER_MAGIC_NUMBER, FileSlice}; + use common::BinarySerializable; use std::io; #[test] diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index 4ec0eda9b..397a050aa 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -485,13 +485,14 @@ mod tests { // The following tests are specific to the MmapDirectory use super::*; + use crate::indexer::LogMergePolicy; use crate::Index; use crate::ReloadPolicy; - use crate::{common::HasLen, indexer::LogMergePolicy}; use crate::{ schema::{Schema, SchemaBuilder, TEXT}, IndexSettings, }; + use common::HasLen; #[test] fn test_open_non_existent_path() { diff --git a/src/directory/mod.rs b/src/directory/mod.rs index fcfe90342..fcf6fdd35 100644 --- a/src/directory/mod.rs +++ b/src/directory/mod.rs @@ -20,6 +20,9 @@ mod watch_event_router; /// Errors specific to the directory module. pub mod error; +mod composite_file; + +pub(crate) use self::composite_file::{CompositeFile, CompositeWrite}; pub use self::directory::DirectoryLock; pub use self::directory::{Directory, DirectoryClone}; pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK}; diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs index 3a3f38e06..29b9042fd 100644 --- a/src/directory/ram_directory.rs +++ b/src/directory/ram_directory.rs @@ -1,9 +1,10 @@ +use crate::core::META_FILEPATH; use crate::directory::error::{DeleteError, OpenReadError, OpenWriteError}; use crate::directory::AntiCallToken; use crate::directory::WatchCallbackList; use crate::directory::{Directory, FileSlice, WatchCallback, WatchHandle}; use crate::directory::{TerminatingWrite, WritePtr}; -use crate::{common::HasLen, core::META_FILEPATH}; +use common::HasLen; use fail::fail_point; use std::collections::HashMap; use std::fmt; diff --git a/src/fastfield/delete.rs b/src/fastfield/delete.rs index a2f14aa7c..421761d63 100644 --- a/src/fastfield/delete.rs +++ b/src/fastfield/delete.rs @@ -1,9 +1,10 @@ -use crate::common::{BitSet, HasLen}; use crate::directory::FileSlice; use crate::directory::OwnedBytes; use crate::directory::WritePtr; use crate::space_usage::ByteCount; use crate::DocId; +use common::BitSet; +use common::HasLen; use std::io; use std::io::Write; @@ -110,7 +111,7 @@ impl HasLen for DeleteBitSet { #[cfg(test)] mod tests { use super::DeleteBitSet; - use crate::common::HasLen; + use common::HasLen; #[test] fn test_delete_bitset_empty() { diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index bc46e5bec..a3dc8c17f 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -40,11 +40,11 @@ pub use self::writer::{FastFieldsWriter, IntFastFieldWriter}; use crate::schema::Cardinality; use crate::schema::FieldType; use crate::schema::Value; +use crate::DocId; use crate::{ chrono::{NaiveDateTime, Utc}, schema::Type, }; -use crate::{common, DocId}; mod bytes; mod delete; @@ -213,8 +213,7 @@ fn value_to_u64(value: &Value) -> u64 { mod tests { use super::*; - use crate::common::CompositeFile; - use crate::common::HasLen; + use crate::directory::CompositeFile; use crate::directory::{Directory, RamDirectory, WritePtr}; use crate::merge_policy::NoMergePolicy; use crate::schema::Field; @@ -222,6 +221,7 @@ mod tests { use crate::schema::FAST; use crate::schema::{Document, IntOptions}; use crate::{Index, SegmentId, SegmentReader}; + use common::HasLen; use once_cell::sync::Lazy; use rand::prelude::SliceRandom; use rand::rngs::StdRng; @@ -588,7 +588,7 @@ mod bench { use super::tests::FIELD; use super::tests::{generate_permutation, SCHEMA}; use super::*; - use crate::common::CompositeFile; + use crate::directory::CompositeFile; use crate::directory::{Directory, RamDirectory, WritePtr}; use crate::fastfield::FastFieldReader; use std::collections::HashMap; diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 47adcddfb..0fbefb7d6 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -1,6 +1,5 @@ use super::FastValue; -use crate::common::BinarySerializable; -use crate::common::CompositeFile; +use crate::directory::CompositeFile; use crate::directory::FileSlice; use crate::directory::OwnedBytes; use crate::directory::{Directory, RamDirectory, WritePtr}; @@ -8,6 +7,7 @@ use crate::fastfield::{CompositeFastFieldSerializer, FastFieldsWriter}; use crate::schema::Schema; use crate::schema::FAST; use crate::DocId; +use common::BinarySerializable; use fastfield_codecs::bitpacked::BitpackedFastFieldReader as BitpackedReader; use fastfield_codecs::bitpacked::BitpackedFastFieldSerializer; use fastfield_codecs::linearinterpol::LinearInterpolFastFieldReader; diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs index f202bbb35..b85754641 100644 --- a/src/fastfield/readers.rs +++ b/src/fastfield/readers.rs @@ -1,4 +1,4 @@ -use crate::common::CompositeFile; +use crate::directory::CompositeFile; use crate::directory::FileSlice; use crate::fastfield::MultiValuedFastFieldReader; use crate::fastfield::{BitpackedFastFieldReader, FastFieldNotAvailableError}; diff --git a/src/fastfield/serializer/mod.rs b/src/fastfield/serializer/mod.rs index 2279ce1b5..7d8ea8fe5 100644 --- a/src/fastfield/serializer/mod.rs +++ b/src/fastfield/serializer/mod.rs @@ -1,8 +1,8 @@ -use crate::common::BinarySerializable; -use crate::common::CompositeWrite; -use crate::common::CountingWriter; +use crate::directory::CompositeWrite; use crate::directory::WritePtr; use crate::schema::Field; +use common::BinarySerializable; +use common::CountingWriter; pub use fastfield_codecs::bitpacked::BitpackedFastFieldSerializer; pub use fastfield_codecs::bitpacked::BitpackedFastFieldSerializerLegacy; use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer; diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 9e9893454..f7d4110ff 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -1,12 +1,12 @@ use super::multivalued::MultiValuedFastFieldWriter; use super::serializer::FastFieldStats; use super::FastFieldDataAccess; -use crate::common; use crate::fastfield::{BytesFastFieldWriter, CompositeFastFieldSerializer}; use crate::indexer::doc_id_mapping::DocIdMapping; use crate::postings::UnorderedTermId; use crate::schema::{Cardinality, Document, Field, FieldEntry, FieldType, Schema}; use crate::termdict::TermOrdinal; +use common; use fnv::FnvHashMap; use std::collections::HashMap; use std::io; diff --git a/src/fieldnorm/reader.rs b/src/fieldnorm/reader.rs index 71535e3f9..ea264b2ff 100644 --- a/src/fieldnorm/reader.rs +++ b/src/fieldnorm/reader.rs @@ -1,5 +1,5 @@ use super::{fieldnorm_to_id, id_to_fieldnorm}; -use crate::common::CompositeFile; +use crate::directory::CompositeFile; use crate::directory::FileSlice; use crate::directory::OwnedBytes; use crate::schema::Field; diff --git a/src/fieldnorm/serializer.rs b/src/fieldnorm/serializer.rs index 057626fcc..54043b1e9 100644 --- a/src/fieldnorm/serializer.rs +++ b/src/fieldnorm/serializer.rs @@ -1,4 +1,4 @@ -use crate::common::CompositeWrite; +use crate::directory::CompositeWrite; use crate::directory::WritePtr; use crate::schema::Field; use std::io; diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 183741a20..f498099b9 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -1,7 +1,6 @@ use super::operation::{AddOperation, UserOperation}; use super::segment_updater::SegmentUpdater; use super::PreparedCommit; -use crate::common::BitSet; use crate::core::Index; use crate::core::Segment; use crate::core::SegmentComponent; @@ -24,6 +23,7 @@ use crate::schema::Document; use crate::schema::IndexRecordOption; use crate::schema::Term; use crate::Opstamp; +use common::BitSet; use crossbeam::channel; use futures::executor::block_on; use futures::future::Future; diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index b737f533c..8d07506cc 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -5,6 +5,7 @@ use crate::fastfield::DynamicFastFieldReader; use crate::fastfield::FastFieldDataAccess; use crate::fastfield::FastFieldReader; use crate::fastfield::FastFieldStats; +use crate::fastfield::MultiValueLength; use crate::fastfield::MultiValuedFastFieldReader; use crate::fieldnorm::FieldNormsSerializer; use crate::fieldnorm::FieldNormsWriter; @@ -19,9 +20,8 @@ use crate::schema::{Field, Schema}; use crate::store::StoreWriter; use crate::termdict::TermMerger; use crate::termdict::TermOrdinal; +use crate::IndexSettings; use crate::IndexSortByField; -use crate::{common::HasLen, fastfield::MultiValueLength}; -use crate::{common::MAX_DOC_LIMIT, IndexSettings}; use crate::{core::Segment, indexer::doc_id_mapping::expect_field_id_for_sort_field}; use crate::{core::SegmentReader, Order}; use crate::{ @@ -29,6 +29,7 @@ use crate::{ SegmentOrdinal, }; use crate::{DocId, InvertedIndexReader, SegmentComponent}; +use common::HasLen; use itertools::Itertools; use measure_time::debug_time; use std::cmp; @@ -36,6 +37,11 @@ use std::collections::HashMap; use std::sync::Arc; use tantivy_bitpacker::minmax; +/// Segment's max doc must be `< MAX_DOC_LIMIT`. +/// +/// We do not allow segments with more than +pub const MAX_DOC_LIMIT: u32 = 1 << 31; + fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::Result { let mut total_tokens = 0u64; let mut count: [usize; 256] = [0; 256]; @@ -2075,4 +2081,11 @@ mod tests { Ok(()) } + + #[test] + fn test_max_doc() { + // this is the first time I write a unit test for a constant. + assert!(((super::MAX_DOC_LIMIT - 1) as i32) >= 0); + assert!((super::MAX_DOC_LIMIT as i32) < 0); + } } diff --git a/src/indexer/segment_entry.rs b/src/indexer/segment_entry.rs index 1808fd1da..4ac352e50 100644 --- a/src/indexer/segment_entry.rs +++ b/src/indexer/segment_entry.rs @@ -1,7 +1,7 @@ -use crate::common::BitSet; use crate::core::SegmentId; use crate::core::SegmentMeta; use crate::indexer::delete_queue::DeleteCursor; +use common::BitSet; use std::fmt; /// A segment entry describes the state of diff --git a/src/lib.rs b/src/lib.rs index bc1beb88d..314ebb93a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -135,7 +135,6 @@ pub type Result = std::result::Result; /// Tantivy DateTime pub type DateTime = chrono::DateTime; -mod common; mod core; mod indexer; @@ -163,8 +162,6 @@ pub use self::snippet::{Snippet, SnippetGenerator}; mod docset; pub use self::docset::{DocSet, TERMINATED}; -pub use crate::common::HasLen; -pub use crate::common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64}; pub use crate::core::{Executor, SegmentComponent}; pub use crate::core::{ Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, Order, Searcher, Segment, @@ -178,6 +175,8 @@ pub use crate::indexer::IndexWriter; pub use crate::postings::Postings; pub use crate::reader::LeasedItem; pub use crate::schema::{Document, Term}; +pub use common::HasLen; +pub use common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64}; use std::fmt; use once_cell::sync::Lazy; @@ -293,7 +292,7 @@ pub struct DocAddress { } #[cfg(test)] -mod tests { +pub mod tests { use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE; use crate::core::SegmentReader; use crate::docset::{DocSet, TERMINATED}; @@ -304,11 +303,18 @@ mod tests { use crate::Index; use crate::Postings; use crate::ReloadPolicy; + use common::{BinarySerializable, FixedSize}; use rand::distributions::Bernoulli; use rand::distributions::Uniform; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; + pub fn fixed_size_test() { + let mut buffer = Vec::new(); + O::default().serialize(&mut buffer).unwrap(); + assert_eq!(buffer.len(), O::SIZE_IN_BYTES); + } + /// Checks if left and right are close one to each other. /// Panics if the two values are more than 0.5% apart. #[macro_export] diff --git a/src/positions/reader.rs b/src/positions/reader.rs index 5a046ad1d..25f857cc1 100644 --- a/src/positions/reader.rs +++ b/src/positions/reader.rs @@ -1,9 +1,9 @@ use std::io; -use crate::common::{BinarySerializable, VInt}; use crate::directory::OwnedBytes; use crate::positions::COMPRESSION_BLOCK_SIZE; use crate::postings::compression::{BlockDecoder, VIntDecoder}; +use common::{BinarySerializable, VInt}; /// When accessing the position of a term, we get a positions_idx from the `Terminfo`. /// This means we need to skip to the `nth` positions efficiently. diff --git a/src/positions/serializer.rs b/src/positions/serializer.rs index 45a06c5c8..23f242335 100644 --- a/src/positions/serializer.rs +++ b/src/positions/serializer.rs @@ -1,7 +1,7 @@ -use crate::common::{BinarySerializable, CountingWriter, VInt}; use crate::positions::COMPRESSION_BLOCK_SIZE; use crate::postings::compression::BlockEncoder; use crate::postings::compression::VIntEncoder; +use common::{BinarySerializable, CountingWriter, VInt}; use std::io::{self, Write}; /// The PositionSerializer is in charge of serializing all of the positions diff --git a/src/postings/block_segment_postings.rs b/src/postings/block_segment_postings.rs index f3877b84f..d12e8e994 100644 --- a/src/postings/block_segment_postings.rs +++ b/src/postings/block_segment_postings.rs @@ -1,6 +1,5 @@ use std::io; -use crate::common::{BinarySerializable, VInt}; use crate::directory::FileSlice; use crate::directory::OwnedBytes; use crate::fieldnorm::FieldNormReader; @@ -9,6 +8,7 @@ use crate::postings::{BlockInfo, FreqReadingOption, SkipReader}; use crate::query::Bm25Weight; use crate::schema::IndexRecordOption; use crate::{DocId, Score, TERMINATED}; +use common::{BinarySerializable, VInt}; fn max_score>(mut it: I) -> Option { it.next().map(|first| it.fold(first, Score::max)) @@ -347,7 +347,6 @@ impl BlockSegmentPostings { #[cfg(test)] mod tests { use super::BlockSegmentPostings; - use crate::common::HasLen; use crate::core::Index; use crate::docset::{DocSet, TERMINATED}; use crate::postings::compression::COMPRESSION_BLOCK_SIZE; @@ -358,6 +357,7 @@ mod tests { use crate::schema::Term; use crate::schema::INDEXED; use crate::DocId; + use common::HasLen; #[test] fn test_empty_segment_postings() { diff --git a/src/postings/compression/mod.rs b/src/postings/compression/mod.rs index 138ebcdf1..84a250b65 100644 --- a/src/postings/compression/mod.rs +++ b/src/postings/compression/mod.rs @@ -1,5 +1,5 @@ -use crate::common::FixedSize; use bitpacking::{BitPacker, BitPacker4x}; +use common::FixedSize; pub const COMPRESSION_BLOCK_SIZE: usize = BitPacker4x::BLOCK_LEN; const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * u32::SIZE_IN_BYTES; diff --git a/src/postings/recorder.rs b/src/postings/recorder.rs index a77d5327b..11e8447fd 100644 --- a/src/postings/recorder.rs +++ b/src/postings/recorder.rs @@ -1,10 +1,8 @@ use super::stacker::{ExpUnrolledLinkedList, MemoryArena}; +use crate::indexer::doc_id_mapping::DocIdMapping; use crate::postings::FieldSerializer; use crate::DocId; -use crate::{ - common::{read_u32_vint, write_u32_vint}, - indexer::doc_id_mapping::DocIdMapping, -}; +use common::{read_u32_vint, write_u32_vint}; const POSITION_END: u32 = 0; diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index eaf36440f..aa470d99f 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -1,4 +1,3 @@ -use crate::common::HasLen; use crate::docset::DocSet; use crate::fastfield::DeleteBitSet; use crate::positions::PositionReader; @@ -7,6 +6,7 @@ use crate::postings::compression::COMPRESSION_BLOCK_SIZE; use crate::postings::BlockSegmentPostings; use crate::postings::Postings; use crate::{DocId, TERMINATED}; +use common::HasLen; /// `SegmentPostings` represents the inverted list or postings associated to /// a term in a `Segment`. @@ -265,7 +265,7 @@ impl Postings for SegmentPostings { mod tests { use super::SegmentPostings; - use crate::common::HasLen; + use common::HasLen; use crate::docset::{DocSet, TERMINATED}; use crate::fastfield::DeleteBitSet; diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index 8631586c2..40e3ca2ac 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -1,7 +1,6 @@ use super::TermInfo; -use crate::common::{BinarySerializable, VInt}; -use crate::common::{CompositeWrite, CountingWriter}; use crate::core::Segment; +use crate::directory::CompositeWrite; use crate::directory::WritePtr; use crate::fieldnorm::FieldNormReader; use crate::positions::PositionSerializer; @@ -12,6 +11,8 @@ use crate::schema::{Field, FieldEntry, FieldType}; use crate::schema::{IndexRecordOption, Schema}; use crate::termdict::{TermDictionaryBuilder, TermOrdinal}; use crate::{DocId, Score}; +use common::CountingWriter; +use common::{BinarySerializable, VInt}; use std::cmp::Ordering; use std::io::{self, Write}; diff --git a/src/postings/term_info.rs b/src/postings/term_info.rs index b86e56b2f..8703b5589 100644 --- a/src/postings/term_info.rs +++ b/src/postings/term_info.rs @@ -1,4 +1,4 @@ -use crate::common::{BinarySerializable, FixedSize}; +use common::{BinarySerializable, FixedSize}; use std::io; use std::iter::ExactSizeIterator; use std::ops::Range; @@ -67,7 +67,7 @@ impl BinarySerializable for TermInfo { mod tests { use super::TermInfo; - use crate::common::test::fixed_size_test; + use crate::tests::fixed_size_test; // TODO add serialize/deserialize test for terminfo diff --git a/src/query/automaton_weight.rs b/src/query/automaton_weight.rs index 2ffa4309a..ae9bbc45e 100644 --- a/src/query/automaton_weight.rs +++ b/src/query/automaton_weight.rs @@ -1,4 +1,3 @@ -use crate::common::BitSet; use crate::core::SegmentReader; use crate::query::ConstScorer; use crate::query::{BitSetDocSet, Explanation}; @@ -7,6 +6,7 @@ use crate::schema::{Field, IndexRecordOption}; use crate::termdict::{TermDictionary, TermStreamer}; use crate::TantivyError; use crate::{DocId, Score}; +use common::BitSet; use std::io; use std::sync::Arc; use tantivy_fst::Automaton; diff --git a/src/query/bitset/mod.rs b/src/query/bitset/mod.rs index b74ccd2e8..030fdeae7 100644 --- a/src/query/bitset/mod.rs +++ b/src/query/bitset/mod.rs @@ -1,6 +1,6 @@ -use crate::common::{BitSet, TinySet}; use crate::docset::{DocSet, TERMINATED}; use crate::DocId; +use common::{BitSet, TinySet}; /// A `BitSetDocSet` makes it possible to iterate through a bitset as if it was a `DocSet`. /// @@ -96,10 +96,13 @@ impl DocSet for BitSetDocSet { #[cfg(test)] mod tests { + use std::collections::BTreeSet; + use super::BitSetDocSet; - use crate::common::BitSet; use crate::docset::{DocSet, TERMINATED}; + use crate::tests::generate_nonunique_unsorted; use crate::DocId; + use common::BitSet; fn create_docbitset(docs: &[DocId], max_doc: DocId) -> BitSetDocSet { let mut docset = BitSet::with_max_value(max_doc); @@ -109,6 +112,29 @@ mod tests { BitSetDocSet::from(docset) } + #[test] + fn test_bitset_large() { + let arr = generate_nonunique_unsorted(100_000, 5_000); + let mut btreeset: BTreeSet = BTreeSet::new(); + let mut bitset = BitSet::with_max_value(100_000); + for el in arr { + btreeset.insert(el); + bitset.insert(el); + } + for i in 0..100_000 { + assert_eq!(btreeset.contains(&i), bitset.contains(i)); + } + assert_eq!(btreeset.len(), bitset.len()); + let mut bitset_docset = BitSetDocSet::from(bitset); + let mut remaining = true; + for el in btreeset.into_iter() { + assert!(remaining); + assert_eq!(bitset_docset.doc(), el); + remaining = bitset_docset.advance() != TERMINATED; + } + assert!(!remaining); + } + #[test] fn test_empty() { let bitset = BitSet::with_max_value(1000); diff --git a/src/query/range_query.rs b/src/query/range_query.rs index fa230d015..a625a3354 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -1,4 +1,3 @@ -use crate::common::BitSet; use crate::core::Searcher; use crate::core::SegmentReader; use crate::error::TantivyError; @@ -10,6 +9,7 @@ use crate::schema::Type; use crate::schema::{Field, IndexRecordOption, Term}; use crate::termdict::{TermDictionary, TermStreamer}; use crate::{DocId, Score}; +use common::BitSet; use std::io; use std::ops::{Bound, Range}; diff --git a/src/query/union.rs b/src/query/union.rs index 8185f7c6c..cf7b4d956 100644 --- a/src/query/union.rs +++ b/src/query/union.rs @@ -1,9 +1,9 @@ -use crate::common::TinySet; use crate::docset::{DocSet, TERMINATED}; use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner}; use crate::query::Scorer; use crate::DocId; use crate::Score; +use common::TinySet; const HORIZON_NUM_TINYBITSETS: usize = 64; const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32; diff --git a/src/query/vec_docset.rs b/src/query/vec_docset.rs index 89f32bd7f..3f765ef58 100644 --- a/src/query/vec_docset.rs +++ b/src/query/vec_docset.rs @@ -1,8 +1,8 @@ #![allow(dead_code)] -use crate::common::HasLen; use crate::docset::{DocSet, TERMINATED}; use crate::DocId; +use common::HasLen; /// Simulate a `Postings` objects from a `VecPostings`. /// `VecPostings` only exist for testing purposes. diff --git a/src/schema/document.rs b/src/schema/document.rs index 1887821f2..dc9fe4ba9 100644 --- a/src/schema/document.rs +++ b/src/schema/document.rs @@ -1,8 +1,8 @@ use super::*; -use crate::common::BinarySerializable; -use crate::common::VInt; use crate::tokenizer::PreTokenizedString; use crate::DateTime; +use common::BinarySerializable; +use common::VInt; use std::io::{self, Read, Write}; use std::mem; diff --git a/src/schema/facet.rs b/src/schema/facet.rs index 37dbec983..8cbb3b020 100644 --- a/src/schema/facet.rs +++ b/src/schema/facet.rs @@ -1,4 +1,4 @@ -use crate::common::BinarySerializable; +use common::BinarySerializable; use once_cell::sync::Lazy; use regex::Regex; use serde::{Deserialize, Deserializer, Serialize, Serializer}; diff --git a/src/schema/field.rs b/src/schema/field.rs index 13ec3d131..ee8348e3c 100644 --- a/src/schema/field.rs +++ b/src/schema/field.rs @@ -1,4 +1,4 @@ -use crate::common::BinarySerializable; +use common::BinarySerializable; use std::io; use std::io::Read; use std::io::Write; diff --git a/src/schema/field_value.rs b/src/schema/field_value.rs index 1d1b7ec7f..4dbe15afb 100644 --- a/src/schema/field_value.rs +++ b/src/schema/field_value.rs @@ -1,6 +1,6 @@ -use crate::common::BinarySerializable; use crate::schema::Field; use crate::schema::Value; +use common::BinarySerializable; use std::io::{self, Read, Write}; /// `FieldValue` holds together a `Field` and its `Value`. diff --git a/src/schema/term.rs b/src/schema/term.rs index 0662e5230..149eab9ee 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -1,9 +1,9 @@ use std::fmt; use super::Field; -use crate::common; use crate::schema::Facet; use crate::DateTime; +use common; use std::str; /// Size (in bytes) of the buffer of a int field. diff --git a/src/schema/value.rs b/src/schema/value.rs index f34b1fb82..b3b49a8eb 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -276,10 +276,10 @@ impl From for Value { mod binary_serialize { use super::Value; - use crate::common::{f64_to_u64, u64_to_f64, BinarySerializable}; use crate::schema::Facet; use crate::tokenizer::PreTokenizedString; use chrono::{TimeZone, Utc}; + use common::{f64_to_u64, u64_to_f64, BinarySerializable}; use std::io::{self, Read, Write}; const TEXT_CODE: u8 = 0; diff --git a/src/store/footer.rs b/src/store/footer.rs index 1c5f2817b..6f63f8170 100644 --- a/src/store/footer.rs +++ b/src/store/footer.rs @@ -1,8 +1,5 @@ -use crate::{ - common::{BinarySerializable, FixedSize, HasLen}, - directory::FileSlice, - store::Compressor, -}; +use crate::{directory::FileSlice, store::Compressor}; +use common::{BinarySerializable, FixedSize, HasLen}; use std::io; #[derive(Debug, Clone, PartialEq)] diff --git a/src/store/index/block.rs b/src/store/index/block.rs index 3b49905b5..5915f1e13 100644 --- a/src/store/index/block.rs +++ b/src/store/index/block.rs @@ -1,6 +1,6 @@ -use crate::common::VInt; use crate::store::index::{Checkpoint, CHECKPOINT_PERIOD}; use crate::DocId; +use common::VInt; use std::io; use std::ops::Range; diff --git a/src/store/index/skip_index.rs b/src/store/index/skip_index.rs index 306eb7ca1..b69df319a 100644 --- a/src/store/index/skip_index.rs +++ b/src/store/index/skip_index.rs @@ -1,8 +1,8 @@ -use crate::common::{BinarySerializable, VInt}; use crate::directory::OwnedBytes; use crate::store::index::block::CheckpointBlock; use crate::store::index::Checkpoint; use crate::DocId; +use common::{BinarySerializable, VInt}; pub struct LayerCursor<'a> { remaining: &'a [u8], diff --git a/src/store/index/skip_index_builder.rs b/src/store/index/skip_index_builder.rs index 416f7bfa0..c9e311b92 100644 --- a/src/store/index/skip_index_builder.rs +++ b/src/store/index/skip_index_builder.rs @@ -1,6 +1,6 @@ -use crate::common::{BinarySerializable, VInt}; use crate::store::index::block::CheckpointBlock; use crate::store::index::{Checkpoint, CHECKPOINT_PERIOD}; +use common::{BinarySerializable, VInt}; use std::io; use std::io::Write; diff --git a/src/store/reader.rs b/src/store/reader.rs index 64cef7339..3ff04f691 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -5,11 +5,8 @@ use crate::schema::Document; use crate::space_usage::StoreSpaceUsage; use crate::store::index::Checkpoint; use crate::DocId; -use crate::{ - common::{BinarySerializable, HasLen, VInt}, - error::DataCorruption, - fastfield::DeleteBitSet, -}; +use crate::{error::DataCorruption, fastfield::DeleteBitSet}; +use common::{BinarySerializable, HasLen, VInt}; use lru::LruCache; use std::io; use std::sync::atomic::{AtomicUsize, Ordering}; diff --git a/src/store/writer.rs b/src/store/writer.rs index d208920ca..d7004c0f6 100644 --- a/src/store/writer.rs +++ b/src/store/writer.rs @@ -1,13 +1,13 @@ use super::index::SkipIndexBuilder; use super::StoreReader; use super::{compressors::Compressor, footer::DocStoreFooter}; -use crate::common::CountingWriter; -use crate::common::{BinarySerializable, VInt}; use crate::directory::TerminatingWrite; use crate::directory::WritePtr; use crate::schema::Document; use crate::store::index::Checkpoint; use crate::DocId; +use common::CountingWriter; +use common::{BinarySerializable, VInt}; use std::io::{self, Write}; const BLOCK_SIZE: usize = 16_384; diff --git a/src/termdict/fst_termdict/term_info_store.rs b/src/termdict/fst_termdict/term_info_store.rs index e78d7f2cd..28d463226 100644 --- a/src/termdict/fst_termdict/term_info_store.rs +++ b/src/termdict/fst_termdict/term_info_store.rs @@ -1,8 +1,8 @@ -use crate::common::{BinarySerializable, FixedSize}; use crate::directory::{FileSlice, OwnedBytes}; use crate::postings::TermInfo; use crate::termdict::TermOrdinal; use byteorder::{ByteOrder, LittleEndian}; +use common::{BinarySerializable, FixedSize}; use std::cmp; use std::io::{self, Read, Write}; use tantivy_bitpacker::compute_num_bits; @@ -290,16 +290,16 @@ mod tests { use super::extract_bits; use super::TermInfoBlockMeta; use super::{TermInfoStore, TermInfoStoreWriter}; - use crate::common; - use crate::common::BinarySerializable; use crate::directory::FileSlice; use crate::postings::TermInfo; + use common; + use common::BinarySerializable; use tantivy_bitpacker::compute_num_bits; use tantivy_bitpacker::BitPacker; #[test] fn test_term_info_block() { - common::test::fixed_size_test::(); + crate::tests::fixed_size_test::(); } #[test] diff --git a/src/termdict/fst_termdict/termdict.rs b/src/termdict/fst_termdict/termdict.rs index ff0d4ec5f..078e12054 100644 --- a/src/termdict/fst_termdict/termdict.rs +++ b/src/termdict/fst_termdict/termdict.rs @@ -1,10 +1,10 @@ use super::term_info_store::{TermInfoStore, TermInfoStoreWriter}; use super::{TermStreamer, TermStreamerBuilder}; -use crate::common::{BinarySerializable, CountingWriter}; use crate::directory::{FileSlice, OwnedBytes}; use crate::error::DataCorruption; use crate::postings::TermInfo; use crate::termdict::TermOrdinal; +use common::{BinarySerializable, CountingWriter}; use once_cell::sync::Lazy; use std::io::{self, Write}; use tantivy_fst::raw::Fst;