mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 00:02:55 +00:00
Moving FileSlice to tantivy-common (#1729)
This commit is contained in:
@@ -36,7 +36,6 @@ fs2 = { version = "0.4.3", optional = true }
|
||||
levenshtein_automata = "0.2.1"
|
||||
uuid = { version = "1.0.0", features = ["v4", "serde"] }
|
||||
crossbeam-channel = "0.5.4"
|
||||
stable_deref_trait = "1.2.0"
|
||||
rust-stemmers = "1.2.0"
|
||||
downcast-rs = "1.2.0"
|
||||
bitpacking = { version = "0.8.4", default-features = false, features = ["bitpacker4x"] }
|
||||
@@ -60,9 +59,8 @@ sstable = { version="0.1", path="./sstable", package ="tantivy-sstable", optiona
|
||||
stacker = { version="0.1", path="./stacker", package ="tantivy-stacker" }
|
||||
tantivy-query-grammar = { version= "0.19.0", path="./query-grammar" }
|
||||
tantivy-bitpacker = { version= "0.3", path="./bitpacker" }
|
||||
common = { version= "0.4", path = "./common/", package = "tantivy-common" }
|
||||
common = { version= "0.5", path = "./common/", package = "tantivy-common" }
|
||||
fastfield_codecs = { version= "0.3", path="./fastfield_codecs", default-features = false }
|
||||
ownedbytes = { version= "0.4", path="./ownedbytes" }
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = "0.3.9"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy-common"
|
||||
version = "0.4.0"
|
||||
version = "0.5.0"
|
||||
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
||||
license = "MIT"
|
||||
edition = "2021"
|
||||
@@ -14,7 +14,8 @@ repository = "https://github.com/quickwit-oss/tantivy"
|
||||
|
||||
[dependencies]
|
||||
byteorder = "1.4.3"
|
||||
ownedbytes = { version= "0.4", path="../ownedbytes" }
|
||||
ownedbytes = { version= "0.5", path="../ownedbytes" }
|
||||
async-trait = "0.1"
|
||||
|
||||
[dev-dependencies]
|
||||
proptest = "1.0.0"
|
||||
|
||||
@@ -1,19 +1,18 @@
|
||||
use std::ops::{Deref, Range};
|
||||
use std::ops::{Deref, Range, RangeBounds};
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, io};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common::HasLen;
|
||||
use stable_deref_trait::StableDeref;
|
||||
use ownedbytes::{OwnedBytes, StableDeref};
|
||||
|
||||
use crate::directory::OwnedBytes;
|
||||
use crate::HasLen;
|
||||
|
||||
/// Objects that represents files sections in tantivy.
|
||||
///
|
||||
/// By contract, whatever happens to the directory file, as long as a FileHandle
|
||||
/// is alive, the data associated with it cannot be altered or destroyed.
|
||||
///
|
||||
/// The underlying behavior is therefore specific to the [`Directory`](crate::Directory) that
|
||||
/// The underlying behavior is therefore specific to the `Directory` that
|
||||
/// created it. Despite its name, a [`FileSlice`] may or may not directly map to an actual file
|
||||
/// on the filesystem.
|
||||
|
||||
@@ -68,6 +67,34 @@ impl fmt::Debug for FileSlice {
|
||||
}
|
||||
}
|
||||
|
||||
/// Takes a range, a `RangeBounds` object, and returns
|
||||
/// a `Range` that corresponds to the relative application of the
|
||||
/// `RangeBounds` object to the original `Range`.
|
||||
///
|
||||
/// For instance, combine_ranges(`[2..11)`, `[5..7]`) returns `[7..10]`
|
||||
/// as it reads, what is the sub-range that starts at the 5 element of
|
||||
/// `[2..11)` and ends at the 9th element included.
|
||||
///
|
||||
/// This function panics, if the result would suggest something outside
|
||||
/// of the bounds of the original range.
|
||||
fn combine_ranges<R: RangeBounds<usize>>(orig_range: Range<usize>, rel_range: R) -> Range<usize> {
|
||||
let start: usize = orig_range.start
|
||||
+ match rel_range.start_bound().cloned() {
|
||||
std::ops::Bound::Included(rel_start) => rel_start,
|
||||
std::ops::Bound::Excluded(rel_start) => rel_start + 1,
|
||||
std::ops::Bound::Unbounded => 0,
|
||||
};
|
||||
assert!(start <= orig_range.end);
|
||||
let end: usize = match rel_range.end_bound().cloned() {
|
||||
std::ops::Bound::Included(rel_end) => orig_range.start + rel_end + 1,
|
||||
std::ops::Bound::Excluded(rel_end) => orig_range.start + rel_end,
|
||||
std::ops::Bound::Unbounded => orig_range.end,
|
||||
};
|
||||
assert!(end >= start);
|
||||
assert!(end <= orig_range.end);
|
||||
start..end
|
||||
}
|
||||
|
||||
impl FileSlice {
|
||||
/// Wraps a FileHandle.
|
||||
pub fn new(file_handle: Arc<dyn FileHandle>) -> Self {
|
||||
@@ -91,11 +118,11 @@ impl FileSlice {
|
||||
///
|
||||
/// Panics if `byte_range.end` exceeds the filesize.
|
||||
#[must_use]
|
||||
pub fn slice(&self, byte_range: Range<usize>) -> FileSlice {
|
||||
assert!(byte_range.end <= self.len());
|
||||
#[inline]
|
||||
pub fn slice<R: RangeBounds<usize>>(&self, byte_range: R) -> FileSlice {
|
||||
FileSlice {
|
||||
data: self.data.clone(),
|
||||
range: self.range.start + byte_range.start..self.range.start + byte_range.end,
|
||||
range: combine_ranges(self.range.clone(), byte_range),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,7 +161,6 @@ impl FileSlice {
|
||||
.read_bytes(self.range.start + range.start..self.range.start + range.end)
|
||||
}
|
||||
|
||||
#[cfg(feature = "quickwit")]
|
||||
#[doc(hidden)]
|
||||
pub async fn read_bytes_slice_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
|
||||
assert!(
|
||||
@@ -225,11 +251,12 @@ impl FileHandle for OwnedBytes {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io;
|
||||
use std::ops::Bound;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::HasLen;
|
||||
|
||||
use super::{FileHandle, FileSlice};
|
||||
use crate::file_slice::combine_ranges;
|
||||
use crate::HasLen;
|
||||
|
||||
#[test]
|
||||
fn test_file_slice() -> io::Result<()> {
|
||||
@@ -300,4 +327,23 @@ mod tests {
|
||||
b"bcd"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_combine_range() {
|
||||
assert_eq!(combine_ranges(1..3, 0..1), 1..2);
|
||||
assert_eq!(combine_ranges(1..3, 1..), 2..3);
|
||||
assert_eq!(combine_ranges(1..4, ..2), 1..3);
|
||||
assert_eq!(combine_ranges(3..10, 2..5), 5..8);
|
||||
assert_eq!(combine_ranges(2..11, 5..=7), 7..10);
|
||||
assert_eq!(
|
||||
combine_ranges(2..11, (Bound::Excluded(5), Bound::Unbounded)),
|
||||
8..11
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_combine_range_panics() {
|
||||
let _ = combine_ranges(3..5, 1..4);
|
||||
}
|
||||
}
|
||||
@@ -5,11 +5,12 @@ use std::ops::Deref;
|
||||
pub use byteorder::LittleEndian as Endianness;
|
||||
|
||||
mod bitset;
|
||||
pub mod file_slice;
|
||||
mod serialize;
|
||||
mod vint;
|
||||
mod writer;
|
||||
|
||||
pub use bitset::*;
|
||||
pub use ownedbytes::{OwnedBytes, StableDeref};
|
||||
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
|
||||
pub use vint::{
|
||||
deserialize_vint_u128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u128,
|
||||
|
||||
@@ -12,9 +12,8 @@ repository = "https://github.com/quickwit-oss/tantivy"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
common = { version = "0.4", path = "../common/", package = "tantivy-common" }
|
||||
common = { version = "0.5", path = "../common/", package = "tantivy-common" }
|
||||
tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" }
|
||||
ownedbytes = { version = "0.4.0", path = "../ownedbytes" }
|
||||
prettytable-rs = {version="0.9.0", optional= true}
|
||||
rand = {version="0.8.3", optional= true}
|
||||
fastdivide = "0.4"
|
||||
|
||||
@@ -7,8 +7,8 @@ mod tests {
|
||||
use std::iter;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::OwnedBytes;
|
||||
use fastfield_codecs::*;
|
||||
use ownedbytes::OwnedBytes;
|
||||
use rand::prelude::*;
|
||||
use test::Bencher;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::io::{self, Write};
|
||||
|
||||
use ownedbytes::OwnedBytes;
|
||||
use common::OwnedBytes;
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
|
||||
use crate::serialize::NormalizedHeader;
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use std::sync::Arc;
|
||||
use std::{io, iter};
|
||||
|
||||
use common::{BinarySerializable, CountingWriter, DeserializeFrom};
|
||||
use ownedbytes::OwnedBytes;
|
||||
use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes};
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
|
||||
use crate::line::Line;
|
||||
@@ -47,7 +46,7 @@ impl FastFieldCodec for BlockwiseLinearCodec {
|
||||
type Reader = BlockwiseLinearReader;
|
||||
|
||||
fn open_from_bytes(
|
||||
bytes: ownedbytes::OwnedBytes,
|
||||
bytes: common::OwnedBytes,
|
||||
normalized_header: NormalizedHeader,
|
||||
) -> io::Result<Self::Reader> {
|
||||
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
|
||||
|
||||
@@ -17,8 +17,7 @@ use std::{
|
||||
ops::{Range, RangeInclusive},
|
||||
};
|
||||
|
||||
use common::{BinarySerializable, CountingWriter, VInt, VIntU128};
|
||||
use ownedbytes::OwnedBytes;
|
||||
use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
|
||||
use tantivy_bitpacker::{self, BitPacker, BitUnpacker};
|
||||
|
||||
use crate::compact_space::build_compact_space::get_compact_space;
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::io;
|
||||
|
||||
use common::BinarySerializable;
|
||||
use ownedbytes::OwnedBytes;
|
||||
use common::{BinarySerializable, OwnedBytes};
|
||||
|
||||
const MAGIC_NUMBER: u16 = 4335u16;
|
||||
const FASTFIELD_FORMAT_VERSION: u8 = 1;
|
||||
|
||||
@@ -45,7 +45,7 @@ mod tests {
|
||||
use std::io;
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
use ownedbytes::OwnedBytes;
|
||||
use common::OwnedBytes;
|
||||
|
||||
use crate::gcd::{compute_gcd, find_gcd};
|
||||
use crate::{FastFieldCodecType, VecColumn};
|
||||
|
||||
@@ -18,7 +18,7 @@ use std::io;
|
||||
use std::io::Write;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::BinarySerializable;
|
||||
use common::{BinarySerializable, OwnedBytes};
|
||||
use compact_space::CompactSpaceDecompressor;
|
||||
use format_version::read_format_version;
|
||||
use monotonic_mapping::{
|
||||
@@ -26,7 +26,6 @@ use monotonic_mapping::{
|
||||
StrictlyMonotonicMappingToInternalBaseval, StrictlyMonotonicMappingToInternalGCDBaseval,
|
||||
};
|
||||
use null_index_footer::read_null_index_footer;
|
||||
use ownedbytes::OwnedBytes;
|
||||
use serialize::{Header, U128Header};
|
||||
|
||||
mod bitpacked;
|
||||
@@ -436,7 +435,7 @@ mod tests {
|
||||
mod bench {
|
||||
use std::sync::Arc;
|
||||
|
||||
use ownedbytes::OwnedBytes;
|
||||
use common::OwnedBytes;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use test::{self, Bencher};
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::io::{self, Write};
|
||||
|
||||
use common::BinarySerializable;
|
||||
use ownedbytes::OwnedBytes;
|
||||
use common::{BinarySerializable, OwnedBytes};
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
|
||||
use crate::line::Line;
|
||||
|
||||
@@ -6,10 +6,10 @@ use std::io::BufRead;
|
||||
use std::net::{IpAddr, Ipv6Addr};
|
||||
use std::str::FromStr;
|
||||
|
||||
use common::OwnedBytes;
|
||||
use fastfield_codecs::{open_u128, serialize_u128, Column, FastFieldCodecType, VecColumn};
|
||||
use itertools::Itertools;
|
||||
use measure_time::print_time;
|
||||
use ownedbytes::OwnedBytes;
|
||||
use prettytable::{Cell, Row, Table};
|
||||
|
||||
fn print_set_stats(ip_addrs: &[u128]) {
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
use std::convert::TryInto;
|
||||
use std::io::{self, Write};
|
||||
|
||||
use common::BinarySerializable;
|
||||
use common::{BinarySerializable, OwnedBytes};
|
||||
use itertools::Itertools;
|
||||
use ownedbytes::OwnedBytes;
|
||||
|
||||
use super::{get_bit_at, set_bit_at};
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::io::{self, Write};
|
||||
|
||||
use common::BitSet;
|
||||
use ownedbytes::OwnedBytes;
|
||||
use common::{BitSet, OwnedBytes};
|
||||
|
||||
use super::{serialize_dense_codec, DenseCodec};
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use std::io::{self, Write};
|
||||
use std::ops::Range;
|
||||
|
||||
use common::{BinarySerializable, CountingWriter, VInt};
|
||||
use ownedbytes::OwnedBytes;
|
||||
use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt};
|
||||
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
|
||||
pub(crate) enum FastFieldCardinality {
|
||||
|
||||
@@ -21,9 +21,8 @@ use std::io;
|
||||
use std::num::NonZeroU64;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::{BinarySerializable, VInt};
|
||||
use common::{BinarySerializable, OwnedBytes, VInt};
|
||||
use log::warn;
|
||||
use ownedbytes::OwnedBytes;
|
||||
|
||||
use crate::bitpacked::BitpackedCodec;
|
||||
use crate::blockwise_linear::BlockwiseLinearCodec;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
||||
name = "ownedbytes"
|
||||
version = "0.4.0"
|
||||
version = "0.5.0"
|
||||
edition = "2021"
|
||||
description = "Expose data as static slice"
|
||||
license = "MIT"
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::ops::{Deref, Range};
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, io, mem};
|
||||
|
||||
use stable_deref_trait::StableDeref;
|
||||
pub use stable_deref_trait::StableDeref;
|
||||
|
||||
/// An OwnedBytes simply wraps an object that owns a slice of data and exposes
|
||||
/// this data as a slice.
|
||||
|
||||
@@ -6,10 +6,10 @@ use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, RwLock, Weak};
|
||||
use std::{fmt, result};
|
||||
|
||||
use common::StableDeref;
|
||||
use fs2::FileExt;
|
||||
use memmap2::Mmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use stable_deref_trait::StableDeref;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::core::META_FILEPATH;
|
||||
|
||||
@@ -5,7 +5,6 @@ mod mmap_directory;
|
||||
|
||||
mod directory;
|
||||
mod directory_lock;
|
||||
mod file_slice;
|
||||
mod file_watcher;
|
||||
mod footer;
|
||||
mod managed_directory;
|
||||
@@ -20,13 +19,12 @@ mod composite_file;
|
||||
use std::io::BufWriter;
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub use common::{AntiCallToken, TerminatingWrite};
|
||||
pub use ownedbytes::OwnedBytes;
|
||||
pub use common::file_slice::{FileHandle, FileSlice};
|
||||
pub use common::{AntiCallToken, OwnedBytes, TerminatingWrite};
|
||||
|
||||
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
|
||||
pub use self::directory::{Directory, DirectoryClone, DirectoryLock};
|
||||
pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
|
||||
pub use self::file_slice::{FileHandle, FileSlice};
|
||||
pub use self::ram_directory::RamDirectory;
|
||||
pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle};
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
|
||||
use common::{intersect_bitsets, BitSet, ReadOnlyBitSet};
|
||||
use ownedbytes::OwnedBytes;
|
||||
use common::{intersect_bitsets, BitSet, OwnedBytes, ReadOnlyBitSet};
|
||||
|
||||
use crate::space_usage::ByteCount;
|
||||
use crate::DocId;
|
||||
|
||||
@@ -80,6 +80,7 @@ impl MultiValueIndex {
|
||||
///
|
||||
/// TODO: Instead of a linear scan we can employ a exponential search into binary search to
|
||||
/// match a docid to its value position.
|
||||
#[allow(clippy::bool_to_int_with_if)]
|
||||
pub(crate) fn positions_to_docids(&self, doc_id_range: Range<u32>, positions: &mut Vec<u32>) {
|
||||
if positions.is_empty() {
|
||||
return;
|
||||
|
||||
@@ -20,7 +20,7 @@ mod atomic_impl {
|
||||
}
|
||||
|
||||
pub fn fetch_add(&self, val: u64, order: Ordering) -> u64 {
|
||||
self.0.fetch_add(val, order) as u64
|
||||
self.0.fetch_add(val, order)
|
||||
}
|
||||
|
||||
pub fn revert(&self, val: u64, order: Ordering) -> u64 {
|
||||
|
||||
@@ -4,9 +4,8 @@ use std::ops::{AddAssign, Range};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use common::{BinarySerializable, HasLen};
|
||||
use common::{BinarySerializable, HasLen, OwnedBytes};
|
||||
use lru::LruCache;
|
||||
use ownedbytes::OwnedBytes;
|
||||
|
||||
use super::footer::DocStoreFooter;
|
||||
use super::index::SkipIndex;
|
||||
|
||||
@@ -6,4 +6,4 @@ edition = "2021"
|
||||
[dependencies]
|
||||
murmurhash32 = "0.2"
|
||||
byteorder = "1"
|
||||
common = { version = "0.4", path = "../common/", package = "tantivy-common" }
|
||||
common = { version = "0.5", path = "../common/", package = "tantivy-common" }
|
||||
|
||||
Reference in New Issue
Block a user