Moving FileSlice to tantivy-common (#1729)

This commit is contained in:
Paul Masurel
2022-12-21 16:35:11 +09:00
committed by GitHub
parent 32cb1d22da
commit f39165e1e7
27 changed files with 89 additions and 56 deletions

View File

@@ -36,7 +36,6 @@ fs2 = { version = "0.4.3", optional = true }
levenshtein_automata = "0.2.1"
uuid = { version = "1.0.0", features = ["v4", "serde"] }
crossbeam-channel = "0.5.4"
stable_deref_trait = "1.2.0"
rust-stemmers = "1.2.0"
downcast-rs = "1.2.0"
bitpacking = { version = "0.8.4", default-features = false, features = ["bitpacker4x"] }
@@ -60,9 +59,8 @@ sstable = { version="0.1", path="./sstable", package ="tantivy-sstable", optiona
stacker = { version="0.1", path="./stacker", package ="tantivy-stacker" }
tantivy-query-grammar = { version= "0.19.0", path="./query-grammar" }
tantivy-bitpacker = { version= "0.3", path="./bitpacker" }
common = { version= "0.4", path = "./common/", package = "tantivy-common" }
common = { version= "0.5", path = "./common/", package = "tantivy-common" }
fastfield_codecs = { version= "0.3", path="./fastfield_codecs", default-features = false }
ownedbytes = { version= "0.4", path="./ownedbytes" }
[target.'cfg(windows)'.dependencies]
winapi = "0.3.9"

View File

@@ -1,6 +1,6 @@
[package]
name = "tantivy-common"
version = "0.4.0"
version = "0.5.0"
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
license = "MIT"
edition = "2021"
@@ -14,7 +14,8 @@ repository = "https://github.com/quickwit-oss/tantivy"
[dependencies]
byteorder = "1.4.3"
ownedbytes = { version= "0.4", path="../ownedbytes" }
ownedbytes = { version= "0.5", path="../ownedbytes" }
async-trait = "0.1"
[dev-dependencies]
proptest = "1.0.0"

View File

@@ -1,19 +1,18 @@
use std::ops::{Deref, Range};
use std::ops::{Deref, Range, RangeBounds};
use std::sync::Arc;
use std::{fmt, io};
use async_trait::async_trait;
use common::HasLen;
use stable_deref_trait::StableDeref;
use ownedbytes::{OwnedBytes, StableDeref};
use crate::directory::OwnedBytes;
use crate::HasLen;
/// Objects that represents files sections in tantivy.
///
/// By contract, whatever happens to the directory file, as long as a FileHandle
/// is alive, the data associated with it cannot be altered or destroyed.
///
/// The underlying behavior is therefore specific to the [`Directory`](crate::Directory) that
/// The underlying behavior is therefore specific to the `Directory` that
/// created it. Despite its name, a [`FileSlice`] may or may not directly map to an actual file
/// on the filesystem.
@@ -68,6 +67,34 @@ impl fmt::Debug for FileSlice {
}
}
/// Takes a range, a `RangeBounds` object, and returns
/// a `Range` that corresponds to the relative application of the
/// `RangeBounds` object to the original `Range`.
///
/// For instance, combine_ranges(`[2..11)`, `[5..7]`) returns `[7..10]`
/// as it reads, what is the sub-range that starts at the 5 element of
/// `[2..11)` and ends at the 9th element included.
///
/// This function panics, if the result would suggest something outside
/// of the bounds of the original range.
fn combine_ranges<R: RangeBounds<usize>>(orig_range: Range<usize>, rel_range: R) -> Range<usize> {
let start: usize = orig_range.start
+ match rel_range.start_bound().cloned() {
std::ops::Bound::Included(rel_start) => rel_start,
std::ops::Bound::Excluded(rel_start) => rel_start + 1,
std::ops::Bound::Unbounded => 0,
};
assert!(start <= orig_range.end);
let end: usize = match rel_range.end_bound().cloned() {
std::ops::Bound::Included(rel_end) => orig_range.start + rel_end + 1,
std::ops::Bound::Excluded(rel_end) => orig_range.start + rel_end,
std::ops::Bound::Unbounded => orig_range.end,
};
assert!(end >= start);
assert!(end <= orig_range.end);
start..end
}
impl FileSlice {
/// Wraps a FileHandle.
pub fn new(file_handle: Arc<dyn FileHandle>) -> Self {
@@ -91,11 +118,11 @@ impl FileSlice {
///
/// Panics if `byte_range.end` exceeds the filesize.
#[must_use]
pub fn slice(&self, byte_range: Range<usize>) -> FileSlice {
assert!(byte_range.end <= self.len());
#[inline]
pub fn slice<R: RangeBounds<usize>>(&self, byte_range: R) -> FileSlice {
FileSlice {
data: self.data.clone(),
range: self.range.start + byte_range.start..self.range.start + byte_range.end,
range: combine_ranges(self.range.clone(), byte_range),
}
}
@@ -134,7 +161,6 @@ impl FileSlice {
.read_bytes(self.range.start + range.start..self.range.start + range.end)
}
#[cfg(feature = "quickwit")]
#[doc(hidden)]
pub async fn read_bytes_slice_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
assert!(
@@ -225,11 +251,12 @@ impl FileHandle for OwnedBytes {
#[cfg(test)]
mod tests {
use std::io;
use std::ops::Bound;
use std::sync::Arc;
use common::HasLen;
use super::{FileHandle, FileSlice};
use crate::file_slice::combine_ranges;
use crate::HasLen;
#[test]
fn test_file_slice() -> io::Result<()> {
@@ -300,4 +327,23 @@ mod tests {
b"bcd"
);
}
#[test]
fn test_combine_range() {
assert_eq!(combine_ranges(1..3, 0..1), 1..2);
assert_eq!(combine_ranges(1..3, 1..), 2..3);
assert_eq!(combine_ranges(1..4, ..2), 1..3);
assert_eq!(combine_ranges(3..10, 2..5), 5..8);
assert_eq!(combine_ranges(2..11, 5..=7), 7..10);
assert_eq!(
combine_ranges(2..11, (Bound::Excluded(5), Bound::Unbounded)),
8..11
);
}
#[test]
#[should_panic]
fn test_combine_range_panics() {
let _ = combine_ranges(3..5, 1..4);
}
}

View File

@@ -5,11 +5,12 @@ use std::ops::Deref;
pub use byteorder::LittleEndian as Endianness;
mod bitset;
pub mod file_slice;
mod serialize;
mod vint;
mod writer;
pub use bitset::*;
pub use ownedbytes::{OwnedBytes, StableDeref};
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
pub use vint::{
deserialize_vint_u128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u128,

View File

@@ -12,9 +12,8 @@ repository = "https://github.com/quickwit-oss/tantivy"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
common = { version = "0.4", path = "../common/", package = "tantivy-common" }
common = { version = "0.5", path = "../common/", package = "tantivy-common" }
tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" }
ownedbytes = { version = "0.4.0", path = "../ownedbytes" }
prettytable-rs = {version="0.9.0", optional= true}
rand = {version="0.8.3", optional= true}
fastdivide = "0.4"

View File

@@ -7,8 +7,8 @@ mod tests {
use std::iter;
use std::sync::Arc;
use common::OwnedBytes;
use fastfield_codecs::*;
use ownedbytes::OwnedBytes;
use rand::prelude::*;
use test::Bencher;

View File

@@ -1,6 +1,6 @@
use std::io::{self, Write};
use ownedbytes::OwnedBytes;
use common::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
use crate::serialize::NormalizedHeader;

View File

@@ -1,8 +1,7 @@
use std::sync::Arc;
use std::{io, iter};
use common::{BinarySerializable, CountingWriter, DeserializeFrom};
use ownedbytes::OwnedBytes;
use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes};
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
use crate::line::Line;
@@ -47,7 +46,7 @@ impl FastFieldCodec for BlockwiseLinearCodec {
type Reader = BlockwiseLinearReader;
fn open_from_bytes(
bytes: ownedbytes::OwnedBytes,
bytes: common::OwnedBytes,
normalized_header: NormalizedHeader,
) -> io::Result<Self::Reader> {
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;

View File

@@ -17,8 +17,7 @@ use std::{
ops::{Range, RangeInclusive},
};
use common::{BinarySerializable, CountingWriter, VInt, VIntU128};
use ownedbytes::OwnedBytes;
use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
use tantivy_bitpacker::{self, BitPacker, BitUnpacker};
use crate::compact_space::build_compact_space::get_compact_space;

View File

@@ -1,7 +1,6 @@
use std::io;
use common::BinarySerializable;
use ownedbytes::OwnedBytes;
use common::{BinarySerializable, OwnedBytes};
const MAGIC_NUMBER: u16 = 4335u16;
const FASTFIELD_FORMAT_VERSION: u8 = 1;

View File

@@ -45,7 +45,7 @@ mod tests {
use std::io;
use std::num::NonZeroU64;
use ownedbytes::OwnedBytes;
use common::OwnedBytes;
use crate::gcd::{compute_gcd, find_gcd};
use crate::{FastFieldCodecType, VecColumn};

View File

@@ -18,7 +18,7 @@ use std::io;
use std::io::Write;
use std::sync::Arc;
use common::BinarySerializable;
use common::{BinarySerializable, OwnedBytes};
use compact_space::CompactSpaceDecompressor;
use format_version::read_format_version;
use monotonic_mapping::{
@@ -26,7 +26,6 @@ use monotonic_mapping::{
StrictlyMonotonicMappingToInternalBaseval, StrictlyMonotonicMappingToInternalGCDBaseval,
};
use null_index_footer::read_null_index_footer;
use ownedbytes::OwnedBytes;
use serialize::{Header, U128Header};
mod bitpacked;
@@ -436,7 +435,7 @@ mod tests {
mod bench {
use std::sync::Arc;
use ownedbytes::OwnedBytes;
use common::OwnedBytes;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use test::{self, Bencher};

View File

@@ -1,7 +1,6 @@
use std::io::{self, Write};
use common::BinarySerializable;
use ownedbytes::OwnedBytes;
use common::{BinarySerializable, OwnedBytes};
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
use crate::line::Line;

View File

@@ -6,10 +6,10 @@ use std::io::BufRead;
use std::net::{IpAddr, Ipv6Addr};
use std::str::FromStr;
use common::OwnedBytes;
use fastfield_codecs::{open_u128, serialize_u128, Column, FastFieldCodecType, VecColumn};
use itertools::Itertools;
use measure_time::print_time;
use ownedbytes::OwnedBytes;
use prettytable::{Cell, Row, Table};
fn print_set_stats(ip_addrs: &[u128]) {

View File

@@ -1,9 +1,8 @@
use std::convert::TryInto;
use std::io::{self, Write};
use common::BinarySerializable;
use common::{BinarySerializable, OwnedBytes};
use itertools::Itertools;
use ownedbytes::OwnedBytes;
use super::{get_bit_at, set_bit_at};

View File

@@ -1,7 +1,6 @@
use std::io::{self, Write};
use common::BitSet;
use ownedbytes::OwnedBytes;
use common::{BitSet, OwnedBytes};
use super::{serialize_dense_codec, DenseCodec};

View File

@@ -1,8 +1,7 @@
use std::io::{self, Write};
use std::ops::Range;
use common::{BinarySerializable, CountingWriter, VInt};
use ownedbytes::OwnedBytes;
use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt};
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub(crate) enum FastFieldCardinality {

View File

@@ -21,9 +21,8 @@ use std::io;
use std::num::NonZeroU64;
use std::sync::Arc;
use common::{BinarySerializable, VInt};
use common::{BinarySerializable, OwnedBytes, VInt};
use log::warn;
use ownedbytes::OwnedBytes;
use crate::bitpacked::BitpackedCodec;
use crate::blockwise_linear::BlockwiseLinearCodec;

View File

@@ -1,7 +1,7 @@
[package]
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
name = "ownedbytes"
version = "0.4.0"
version = "0.5.0"
edition = "2021"
description = "Expose data as static slice"
license = "MIT"

View File

@@ -3,7 +3,7 @@ use std::ops::{Deref, Range};
use std::sync::Arc;
use std::{fmt, io, mem};
use stable_deref_trait::StableDeref;
pub use stable_deref_trait::StableDeref;
/// An OwnedBytes simply wraps an object that owns a slice of data and exposes
/// this data as a slice.

View File

@@ -6,10 +6,10 @@ use std::path::{Path, PathBuf};
use std::sync::{Arc, RwLock, Weak};
use std::{fmt, result};
use common::StableDeref;
use fs2::FileExt;
use memmap2::Mmap;
use serde::{Deserialize, Serialize};
use stable_deref_trait::StableDeref;
use tempfile::TempDir;
use crate::core::META_FILEPATH;

View File

@@ -5,7 +5,6 @@ mod mmap_directory;
mod directory;
mod directory_lock;
mod file_slice;
mod file_watcher;
mod footer;
mod managed_directory;
@@ -20,13 +19,12 @@ mod composite_file;
use std::io::BufWriter;
use std::path::PathBuf;
pub use common::{AntiCallToken, TerminatingWrite};
pub use ownedbytes::OwnedBytes;
pub use common::file_slice::{FileHandle, FileSlice};
pub use common::{AntiCallToken, OwnedBytes, TerminatingWrite};
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
pub use self::directory::{Directory, DirectoryClone, DirectoryLock};
pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
pub use self::file_slice::{FileHandle, FileSlice};
pub use self::ram_directory::RamDirectory;
pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle};

View File

@@ -1,8 +1,7 @@
use std::io;
use std::io::Write;
use common::{intersect_bitsets, BitSet, ReadOnlyBitSet};
use ownedbytes::OwnedBytes;
use common::{intersect_bitsets, BitSet, OwnedBytes, ReadOnlyBitSet};
use crate::space_usage::ByteCount;
use crate::DocId;

View File

@@ -80,6 +80,7 @@ impl MultiValueIndex {
///
/// TODO: Instead of a linear scan we can employ a exponential search into binary search to
/// match a docid to its value position.
#[allow(clippy::bool_to_int_with_if)]
pub(crate) fn positions_to_docids(&self, doc_id_range: Range<u32>, positions: &mut Vec<u32>) {
if positions.is_empty() {
return;

View File

@@ -20,7 +20,7 @@ mod atomic_impl {
}
pub fn fetch_add(&self, val: u64, order: Ordering) -> u64 {
self.0.fetch_add(val, order) as u64
self.0.fetch_add(val, order)
}
pub fn revert(&self, val: u64, order: Ordering) -> u64 {

View File

@@ -4,9 +4,8 @@ use std::ops::{AddAssign, Range};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use common::{BinarySerializable, HasLen};
use common::{BinarySerializable, HasLen, OwnedBytes};
use lru::LruCache;
use ownedbytes::OwnedBytes;
use super::footer::DocStoreFooter;
use super::index::SkipIndex;

View File

@@ -6,4 +6,4 @@ edition = "2021"
[dependencies]
murmurhash32 = "0.2"
byteorder = "1"
common = { version = "0.4", path = "../common/", package = "tantivy-common" }
common = { version = "0.5", path = "../common/", package = "tantivy-common" }