diff --git a/Cargo.toml b/Cargo.toml index baf593511..731045b9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,6 @@ fs2 = { version = "0.4.3", optional = true } levenshtein_automata = "0.2.1" uuid = { version = "1.0.0", features = ["v4", "serde"] } crossbeam-channel = "0.5.4" -stable_deref_trait = "1.2.0" rust-stemmers = "1.2.0" downcast-rs = "1.2.0" bitpacking = { version = "0.8.4", default-features = false, features = ["bitpacker4x"] } @@ -60,9 +59,8 @@ sstable = { version="0.1", path="./sstable", package ="tantivy-sstable", optiona stacker = { version="0.1", path="./stacker", package ="tantivy-stacker" } tantivy-query-grammar = { version= "0.19.0", path="./query-grammar" } tantivy-bitpacker = { version= "0.3", path="./bitpacker" } -common = { version= "0.4", path = "./common/", package = "tantivy-common" } +common = { version= "0.5", path = "./common/", package = "tantivy-common" } fastfield_codecs = { version= "0.3", path="./fastfield_codecs", default-features = false } -ownedbytes = { version= "0.4", path="./ownedbytes" } [target.'cfg(windows)'.dependencies] winapi = "0.3.9" diff --git a/common/Cargo.toml b/common/Cargo.toml index e579a9aab..9d16079e8 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy-common" -version = "0.4.0" +version = "0.5.0" authors = ["Paul Masurel ", "Pascal Seitz "] license = "MIT" edition = "2021" @@ -14,7 +14,8 @@ repository = "https://github.com/quickwit-oss/tantivy" [dependencies] byteorder = "1.4.3" -ownedbytes = { version= "0.4", path="../ownedbytes" } +ownedbytes = { version= "0.5", path="../ownedbytes" } +async-trait = "0.1" [dev-dependencies] proptest = "1.0.0" diff --git a/src/directory/file_slice.rs b/common/src/file_slice.rs similarity index 80% rename from src/directory/file_slice.rs rename to common/src/file_slice.rs index 3ec28e9d7..2b748d39a 100644 --- a/src/directory/file_slice.rs +++ b/common/src/file_slice.rs @@ -1,19 +1,18 @@ -use std::ops::{Deref, Range}; +use std::ops::{Deref, Range, RangeBounds}; use std::sync::Arc; use std::{fmt, io}; use async_trait::async_trait; -use common::HasLen; -use stable_deref_trait::StableDeref; +use ownedbytes::{OwnedBytes, StableDeref}; -use crate::directory::OwnedBytes; +use crate::HasLen; /// Objects that represents files sections in tantivy. /// /// By contract, whatever happens to the directory file, as long as a FileHandle /// is alive, the data associated with it cannot be altered or destroyed. /// -/// The underlying behavior is therefore specific to the [`Directory`](crate::Directory) that +/// The underlying behavior is therefore specific to the `Directory` that /// created it. Despite its name, a [`FileSlice`] may or may not directly map to an actual file /// on the filesystem. @@ -68,6 +67,34 @@ impl fmt::Debug for FileSlice { } } +/// Takes a range, a `RangeBounds` object, and returns +/// a `Range` that corresponds to the relative application of the +/// `RangeBounds` object to the original `Range`. +/// +/// For instance, combine_ranges(`[2..11)`, `[5..7]`) returns `[7..10]` +/// as it reads, what is the sub-range that starts at the 5 element of +/// `[2..11)` and ends at the 9th element included. +/// +/// This function panics, if the result would suggest something outside +/// of the bounds of the original range. +fn combine_ranges>(orig_range: Range, rel_range: R) -> Range { + let start: usize = orig_range.start + + match rel_range.start_bound().cloned() { + std::ops::Bound::Included(rel_start) => rel_start, + std::ops::Bound::Excluded(rel_start) => rel_start + 1, + std::ops::Bound::Unbounded => 0, + }; + assert!(start <= orig_range.end); + let end: usize = match rel_range.end_bound().cloned() { + std::ops::Bound::Included(rel_end) => orig_range.start + rel_end + 1, + std::ops::Bound::Excluded(rel_end) => orig_range.start + rel_end, + std::ops::Bound::Unbounded => orig_range.end, + }; + assert!(end >= start); + assert!(end <= orig_range.end); + start..end +} + impl FileSlice { /// Wraps a FileHandle. pub fn new(file_handle: Arc) -> Self { @@ -91,11 +118,11 @@ impl FileSlice { /// /// Panics if `byte_range.end` exceeds the filesize. #[must_use] - pub fn slice(&self, byte_range: Range) -> FileSlice { - assert!(byte_range.end <= self.len()); + #[inline] + pub fn slice>(&self, byte_range: R) -> FileSlice { FileSlice { data: self.data.clone(), - range: self.range.start + byte_range.start..self.range.start + byte_range.end, + range: combine_ranges(self.range.clone(), byte_range), } } @@ -134,7 +161,6 @@ impl FileSlice { .read_bytes(self.range.start + range.start..self.range.start + range.end) } - #[cfg(feature = "quickwit")] #[doc(hidden)] pub async fn read_bytes_slice_async(&self, byte_range: Range) -> io::Result { assert!( @@ -225,11 +251,12 @@ impl FileHandle for OwnedBytes { #[cfg(test)] mod tests { use std::io; + use std::ops::Bound; use std::sync::Arc; - use common::HasLen; - use super::{FileHandle, FileSlice}; + use crate::file_slice::combine_ranges; + use crate::HasLen; #[test] fn test_file_slice() -> io::Result<()> { @@ -300,4 +327,23 @@ mod tests { b"bcd" ); } + + #[test] + fn test_combine_range() { + assert_eq!(combine_ranges(1..3, 0..1), 1..2); + assert_eq!(combine_ranges(1..3, 1..), 2..3); + assert_eq!(combine_ranges(1..4, ..2), 1..3); + assert_eq!(combine_ranges(3..10, 2..5), 5..8); + assert_eq!(combine_ranges(2..11, 5..=7), 7..10); + assert_eq!( + combine_ranges(2..11, (Bound::Excluded(5), Bound::Unbounded)), + 8..11 + ); + } + + #[test] + #[should_panic] + fn test_combine_range_panics() { + let _ = combine_ranges(3..5, 1..4); + } } diff --git a/common/src/lib.rs b/common/src/lib.rs index 9dac16de1..92ea19def 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -5,11 +5,12 @@ use std::ops::Deref; pub use byteorder::LittleEndian as Endianness; mod bitset; +pub mod file_slice; mod serialize; mod vint; mod writer; - pub use bitset::*; +pub use ownedbytes::{OwnedBytes, StableDeref}; pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize}; pub use vint::{ deserialize_vint_u128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u128, diff --git a/fastfield_codecs/Cargo.toml b/fastfield_codecs/Cargo.toml index a56d0f983..b3c86b920 100644 --- a/fastfield_codecs/Cargo.toml +++ b/fastfield_codecs/Cargo.toml @@ -12,9 +12,8 @@ repository = "https://github.com/quickwit-oss/tantivy" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -common = { version = "0.4", path = "../common/", package = "tantivy-common" } +common = { version = "0.5", path = "../common/", package = "tantivy-common" } tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" } -ownedbytes = { version = "0.4.0", path = "../ownedbytes" } prettytable-rs = {version="0.9.0", optional= true} rand = {version="0.8.3", optional= true} fastdivide = "0.4" diff --git a/fastfield_codecs/benches/bench.rs b/fastfield_codecs/benches/bench.rs index 8f89f3ecf..c42466646 100644 --- a/fastfield_codecs/benches/bench.rs +++ b/fastfield_codecs/benches/bench.rs @@ -7,8 +7,8 @@ mod tests { use std::iter; use std::sync::Arc; + use common::OwnedBytes; use fastfield_codecs::*; - use ownedbytes::OwnedBytes; use rand::prelude::*; use test::Bencher; diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs index 044debb96..a5f324666 100644 --- a/fastfield_codecs/src/bitpacked.rs +++ b/fastfield_codecs/src/bitpacked.rs @@ -1,6 +1,6 @@ use std::io::{self, Write}; -use ownedbytes::OwnedBytes; +use common::OwnedBytes; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use crate::serialize::NormalizedHeader; diff --git a/fastfield_codecs/src/blockwise_linear.rs b/fastfield_codecs/src/blockwise_linear.rs index f4ba6c728..dbe70a099 100644 --- a/fastfield_codecs/src/blockwise_linear.rs +++ b/fastfield_codecs/src/blockwise_linear.rs @@ -1,8 +1,7 @@ use std::sync::Arc; use std::{io, iter}; -use common::{BinarySerializable, CountingWriter, DeserializeFrom}; -use ownedbytes::OwnedBytes; +use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes}; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use crate::line::Line; @@ -47,7 +46,7 @@ impl FastFieldCodec for BlockwiseLinearCodec { type Reader = BlockwiseLinearReader; fn open_from_bytes( - bytes: ownedbytes::OwnedBytes, + bytes: common::OwnedBytes, normalized_header: NormalizedHeader, ) -> io::Result { let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?; diff --git a/fastfield_codecs/src/compact_space/mod.rs b/fastfield_codecs/src/compact_space/mod.rs index 9129452cc..ed2294710 100644 --- a/fastfield_codecs/src/compact_space/mod.rs +++ b/fastfield_codecs/src/compact_space/mod.rs @@ -17,8 +17,7 @@ use std::{ ops::{Range, RangeInclusive}, }; -use common::{BinarySerializable, CountingWriter, VInt, VIntU128}; -use ownedbytes::OwnedBytes; +use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128}; use tantivy_bitpacker::{self, BitPacker, BitUnpacker}; use crate::compact_space::build_compact_space::get_compact_space; diff --git a/fastfield_codecs/src/format_version.rs b/fastfield_codecs/src/format_version.rs index 7eaa342a3..bdcb80785 100644 --- a/fastfield_codecs/src/format_version.rs +++ b/fastfield_codecs/src/format_version.rs @@ -1,7 +1,6 @@ use std::io; -use common::BinarySerializable; -use ownedbytes::OwnedBytes; +use common::{BinarySerializable, OwnedBytes}; const MAGIC_NUMBER: u16 = 4335u16; const FASTFIELD_FORMAT_VERSION: u8 = 1; diff --git a/fastfield_codecs/src/gcd.rs b/fastfield_codecs/src/gcd.rs index 7917d7ca4..a82085649 100644 --- a/fastfield_codecs/src/gcd.rs +++ b/fastfield_codecs/src/gcd.rs @@ -45,7 +45,7 @@ mod tests { use std::io; use std::num::NonZeroU64; - use ownedbytes::OwnedBytes; + use common::OwnedBytes; use crate::gcd::{compute_gcd, find_gcd}; use crate::{FastFieldCodecType, VecColumn}; diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index f4b7e1f4f..79763a153 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -18,7 +18,7 @@ use std::io; use std::io::Write; use std::sync::Arc; -use common::BinarySerializable; +use common::{BinarySerializable, OwnedBytes}; use compact_space::CompactSpaceDecompressor; use format_version::read_format_version; use monotonic_mapping::{ @@ -26,7 +26,6 @@ use monotonic_mapping::{ StrictlyMonotonicMappingToInternalBaseval, StrictlyMonotonicMappingToInternalGCDBaseval, }; use null_index_footer::read_null_index_footer; -use ownedbytes::OwnedBytes; use serialize::{Header, U128Header}; mod bitpacked; @@ -436,7 +435,7 @@ mod tests { mod bench { use std::sync::Arc; - use ownedbytes::OwnedBytes; + use common::OwnedBytes; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; use test::{self, Bencher}; diff --git a/fastfield_codecs/src/linear.rs b/fastfield_codecs/src/linear.rs index d75eeea80..89851c7d9 100644 --- a/fastfield_codecs/src/linear.rs +++ b/fastfield_codecs/src/linear.rs @@ -1,7 +1,6 @@ use std::io::{self, Write}; -use common::BinarySerializable; -use ownedbytes::OwnedBytes; +use common::{BinarySerializable, OwnedBytes}; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use crate::line::Line; diff --git a/fastfield_codecs/src/main.rs b/fastfield_codecs/src/main.rs index 1c26bbe02..aad421866 100644 --- a/fastfield_codecs/src/main.rs +++ b/fastfield_codecs/src/main.rs @@ -6,10 +6,10 @@ use std::io::BufRead; use std::net::{IpAddr, Ipv6Addr}; use std::str::FromStr; +use common::OwnedBytes; use fastfield_codecs::{open_u128, serialize_u128, Column, FastFieldCodecType, VecColumn}; use itertools::Itertools; use measure_time::print_time; -use ownedbytes::OwnedBytes; use prettytable::{Cell, Row, Table}; fn print_set_stats(ip_addrs: &[u128]) { diff --git a/fastfield_codecs/src/null_index/dense.rs b/fastfield_codecs/src/null_index/dense.rs index 2b5c7c9da..690453f4e 100644 --- a/fastfield_codecs/src/null_index/dense.rs +++ b/fastfield_codecs/src/null_index/dense.rs @@ -1,9 +1,8 @@ use std::convert::TryInto; use std::io::{self, Write}; -use common::BinarySerializable; +use common::{BinarySerializable, OwnedBytes}; use itertools::Itertools; -use ownedbytes::OwnedBytes; use super::{get_bit_at, set_bit_at}; diff --git a/fastfield_codecs/src/null_index/sparse.rs b/fastfield_codecs/src/null_index/sparse.rs index 591d78078..fe08a07a7 100644 --- a/fastfield_codecs/src/null_index/sparse.rs +++ b/fastfield_codecs/src/null_index/sparse.rs @@ -1,7 +1,6 @@ use std::io::{self, Write}; -use common::BitSet; -use ownedbytes::OwnedBytes; +use common::{BitSet, OwnedBytes}; use super::{serialize_dense_codec, DenseCodec}; diff --git a/fastfield_codecs/src/null_index_footer.rs b/fastfield_codecs/src/null_index_footer.rs index 272ddbc3f..8a4b4cd59 100644 --- a/fastfield_codecs/src/null_index_footer.rs +++ b/fastfield_codecs/src/null_index_footer.rs @@ -1,8 +1,7 @@ use std::io::{self, Write}; use std::ops::Range; -use common::{BinarySerializable, CountingWriter, VInt}; -use ownedbytes::OwnedBytes; +use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt}; #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub(crate) enum FastFieldCardinality { diff --git a/fastfield_codecs/src/serialize.rs b/fastfield_codecs/src/serialize.rs index 2f1cbc28b..a2c6cd378 100644 --- a/fastfield_codecs/src/serialize.rs +++ b/fastfield_codecs/src/serialize.rs @@ -21,9 +21,8 @@ use std::io; use std::num::NonZeroU64; use std::sync::Arc; -use common::{BinarySerializable, VInt}; +use common::{BinarySerializable, OwnedBytes, VInt}; use log::warn; -use ownedbytes::OwnedBytes; use crate::bitpacked::BitpackedCodec; use crate::blockwise_linear::BlockwiseLinearCodec; diff --git a/ownedbytes/Cargo.toml b/ownedbytes/Cargo.toml index 4bd3206ef..c7cf89301 100644 --- a/ownedbytes/Cargo.toml +++ b/ownedbytes/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Paul Masurel ", "Pascal Seitz "] name = "ownedbytes" -version = "0.4.0" +version = "0.5.0" edition = "2021" description = "Expose data as static slice" license = "MIT" diff --git a/ownedbytes/src/lib.rs b/ownedbytes/src/lib.rs index 622f9e66e..ef0ab72ac 100644 --- a/ownedbytes/src/lib.rs +++ b/ownedbytes/src/lib.rs @@ -3,7 +3,7 @@ use std::ops::{Deref, Range}; use std::sync::Arc; use std::{fmt, io, mem}; -use stable_deref_trait::StableDeref; +pub use stable_deref_trait::StableDeref; /// An OwnedBytes simply wraps an object that owns a slice of data and exposes /// this data as a slice. diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index f87a19a39..33bdea328 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -6,10 +6,10 @@ use std::path::{Path, PathBuf}; use std::sync::{Arc, RwLock, Weak}; use std::{fmt, result}; +use common::StableDeref; use fs2::FileExt; use memmap2::Mmap; use serde::{Deserialize, Serialize}; -use stable_deref_trait::StableDeref; use tempfile::TempDir; use crate::core::META_FILEPATH; diff --git a/src/directory/mod.rs b/src/directory/mod.rs index 6397ea6b0..1dda8579e 100644 --- a/src/directory/mod.rs +++ b/src/directory/mod.rs @@ -5,7 +5,6 @@ mod mmap_directory; mod directory; mod directory_lock; -mod file_slice; mod file_watcher; mod footer; mod managed_directory; @@ -20,13 +19,12 @@ mod composite_file; use std::io::BufWriter; use std::path::PathBuf; -pub use common::{AntiCallToken, TerminatingWrite}; -pub use ownedbytes::OwnedBytes; +pub use common::file_slice::{FileHandle, FileSlice}; +pub use common::{AntiCallToken, OwnedBytes, TerminatingWrite}; pub(crate) use self::composite_file::{CompositeFile, CompositeWrite}; pub use self::directory::{Directory, DirectoryClone, DirectoryLock}; pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK}; -pub use self::file_slice::{FileHandle, FileSlice}; pub use self::ram_directory::RamDirectory; pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle}; diff --git a/src/fastfield/alive_bitset.rs b/src/fastfield/alive_bitset.rs index 508508bad..526d71c20 100644 --- a/src/fastfield/alive_bitset.rs +++ b/src/fastfield/alive_bitset.rs @@ -1,8 +1,7 @@ use std::io; use std::io::Write; -use common::{intersect_bitsets, BitSet, ReadOnlyBitSet}; -use ownedbytes::OwnedBytes; +use common::{intersect_bitsets, BitSet, OwnedBytes, ReadOnlyBitSet}; use crate::space_usage::ByteCount; use crate::DocId; diff --git a/src/fastfield/multivalued/index.rs b/src/fastfield/multivalued/index.rs index fb7cfb7f1..8eb0b4639 100644 --- a/src/fastfield/multivalued/index.rs +++ b/src/fastfield/multivalued/index.rs @@ -80,6 +80,7 @@ impl MultiValueIndex { /// /// TODO: Instead of a linear scan we can employ a exponential search into binary search to /// match a docid to its value position. + #[allow(clippy::bool_to_int_with_if)] pub(crate) fn positions_to_docids(&self, doc_id_range: Range, positions: &mut Vec) { if positions.is_empty() { return; diff --git a/src/indexer/stamper.rs b/src/indexer/stamper.rs index a0094edb1..8287d841c 100644 --- a/src/indexer/stamper.rs +++ b/src/indexer/stamper.rs @@ -20,7 +20,7 @@ mod atomic_impl { } pub fn fetch_add(&self, val: u64, order: Ordering) -> u64 { - self.0.fetch_add(val, order) as u64 + self.0.fetch_add(val, order) } pub fn revert(&self, val: u64, order: Ordering) -> u64 { diff --git a/src/store/reader.rs b/src/store/reader.rs index b0a3b9259..cc20de184 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -4,9 +4,8 @@ use std::ops::{AddAssign, Range}; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; -use common::{BinarySerializable, HasLen}; +use common::{BinarySerializable, HasLen, OwnedBytes}; use lru::LruCache; -use ownedbytes::OwnedBytes; use super::footer::DocStoreFooter; use super::index::SkipIndex; diff --git a/stacker/Cargo.toml b/stacker/Cargo.toml index f683ff5ec..933eb0737 100644 --- a/stacker/Cargo.toml +++ b/stacker/Cargo.toml @@ -6,4 +6,4 @@ edition = "2021" [dependencies] murmurhash32 = "0.2" byteorder = "1" -common = { version = "0.4", path = "../common/", package = "tantivy-common" } +common = { version = "0.5", path = "../common/", package = "tantivy-common" }