Merge pull request #921 from tantivy-search/more-pub-for-hot-directory

Exposing API for the hot directory
This commit is contained in:
Paul Masurel
2020-10-29 13:04:37 +09:00
committed by GitHub
6 changed files with 64 additions and 12 deletions

View File

@@ -16,6 +16,9 @@ pub type BoxedData = Box<dyn Deref<Target = [u8]> + Send + Sync + 'static>;
/// Despite its name, a `FileSlice` may or may not directly map to an actual file
/// on the filesystem.
pub trait FileHandle: 'static + Send + Sync + HasLen {
/// Reads a slice of bytes.
///
/// This method may panic if the range requested is invalid.
fn read_bytes(&self, from: usize, to: usize) -> io::Result<OwnedBytes>;
}
@@ -53,7 +56,7 @@ pub struct FileSlice {
}
impl FileSlice {
/// Wraps a new `Deref<Target = [u8]>`
/// Wraps a FileHandle.
pub fn new<D>(data: D) -> Self
where
D: FileHandle,
@@ -69,6 +72,7 @@ impl FileSlice {
/// Creates a fileslice that is just a view over a slice of the data.
///
/// # Panics
///
/// Panics if `to < from` or if `to` exceeds the filesize.
pub fn slice(&self, from: usize, to: usize) -> FileSlice {
assert!(to <= self.len());
@@ -96,6 +100,18 @@ impl FileSlice {
self.data.read_bytes(self.start, self.stop)
}
/// Reads a specific slice of data.
///
/// This is equivalent to running `file_slice.slice(from, to).read_bytes()`.
pub fn read_bytes_slice(&self, from: usize, to: usize) -> io::Result<OwnedBytes> {
assert!(from <= to);
assert!(
self.start + to <= self.stop,
"`to` exceeds the fileslice length"
);
self.data.read_bytes(self.start + from, self.start + to)
}
/// Splits the FileSlice at the given offset and return two file slices.
/// `file_slice[..split_offset]` and `file_slice[split_offset..]`.
///
@@ -187,11 +203,35 @@ mod tests {
}
#[test]
fn test_slice_deref() -> io::Result<()> {
let slice_deref = FileSlice::new(&b"abcdef"[..]);
assert_eq!(slice_deref.len(), 6);
assert_eq!(slice_deref.read_bytes()?.as_ref(), b"abcdef");
assert_eq!(slice_deref.slice(1, 4).read_bytes()?.as_ref(), b"bcd");
fn test_slice_simple_read() -> io::Result<()> {
let slice = FileSlice::new(&b"abcdef"[..]);
assert_eq!(slice.len(), 6);
assert_eq!(slice.read_bytes()?.as_ref(), b"abcdef");
assert_eq!(slice.slice(1, 4).read_bytes()?.as_ref(), b"bcd");
Ok(())
}
#[test]
fn test_slice_read_slice() -> io::Result<()> {
let slice_deref = FileSlice::new(&b"abcdef"[..]);
assert_eq!(slice_deref.read_bytes_slice(1, 4)?.as_ref(), b"bcd");
Ok(())
}
#[test]
#[should_panic(expected = "assertion failed: from <= to")]
fn test_slice_read_slice_invalid_range() {
let slice_deref = FileSlice::new(&b"abcdef"[..]);
assert_eq!(slice_deref.read_bytes_slice(1, 0).unwrap().as_ref(), b"bcd");
}
#[test]
#[should_panic(expected = "`to` exceeds the fileslice length")]
fn test_slice_read_slice_invalid_range_exceeds() {
let slice_deref = FileSlice::new(&b"abcdef"[..]);
assert_eq!(
slice_deref.read_bytes_slice(0, 10).unwrap().as_ref(),
b"bcd"
);
}
}

View File

@@ -23,12 +23,13 @@ pub use self::directory::DirectoryLock;
pub use self::directory::{Directory, DirectoryClone};
pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
pub(crate) use self::file_slice::BoxedData;
pub use self::file_slice::FileSlice;
pub use self::file_slice::{FileHandle, FileSlice};
pub use self::owned_bytes::OwnedBytes;
pub use self::ram_directory::RAMDirectory;
pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle};
use std::io::{self, BufWriter, Write};
use std::path::PathBuf;
/// Outcome of the Garbage collection
pub struct GarbageCollectionResult {
/// List of files that were deleted in this cycle

View File

@@ -1,11 +1,10 @@
use crate::directory::FileHandle;
use stable_deref_trait::StableDeref;
use std::mem;
use std::ops::Deref;
use std::sync::Arc;
use std::{fmt, io};
use super::file_slice::FileHandle;
/// An OwnedBytes simply wraps an object that owns a slice of data and exposes
/// this data as a static slice.
///

View File

@@ -29,6 +29,13 @@ impl WatchHandle {
pub fn new(watch_callback: Arc<WatchCallback>) -> WatchHandle {
WatchHandle(watch_callback)
}
/// Returns an empty watch handle.
///
/// This function is only useful when implementing a readonly directory.
pub fn empty() -> WatchHandle {
WatchHandle::new(Arc::new(Box::new(|| {})))
}
}
impl WatchCallbackList {

View File

@@ -13,12 +13,16 @@ use std::fmt;
use std::path::PathBuf;
use std::sync::PoisonError;
/// Represents a `DataCorruption` error.
///
/// When facing data corruption, tantivy actually panic or return this error.
pub struct DataCorruption {
filepath: Option<PathBuf>,
comment: String,
}
impl DataCorruption {
/// Creates a `DataCorruption` Error.
pub fn new(filepath: PathBuf, comment: String) -> DataCorruption {
DataCorruption {
filepath: Some(filepath),
@@ -26,10 +30,11 @@ impl DataCorruption {
}
}
pub fn comment_only(comment: String) -> DataCorruption {
/// Creates a `DataCorruption` Error, when the filepath is irrelevant.
pub fn comment_only<TStr: ToString>(comment: TStr) -> DataCorruption {
DataCorruption {
filepath: None,
comment,
comment: comment.to_string(),
}
}
}

View File

@@ -134,7 +134,7 @@ mod core;
mod indexer;
#[allow(unused_doc_comments)]
mod error;
pub mod error;
pub mod tokenizer;
pub mod collector;