mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 09:12:55 +00:00
236 lines
7.5 KiB
Rust
236 lines
7.5 KiB
Rust
use slog::{error, Logger};
|
|
|
|
use crate::directory::directory_lock::Lock;
|
|
use crate::directory::error::LockError;
|
|
use crate::directory::error::{DeleteError, OpenReadError, OpenWriteError};
|
|
use crate::directory::WatchCallback;
|
|
use crate::directory::WatchHandle;
|
|
use crate::directory::{ReadOnlySource, WritePtr};
|
|
use std::fmt;
|
|
use std::io;
|
|
use std::io::Write;
|
|
use std::marker::Send;
|
|
use std::marker::Sync;
|
|
use std::path::Path;
|
|
use std::path::PathBuf;
|
|
use std::result;
|
|
use std::thread;
|
|
use std::time::Duration;
|
|
|
|
/// Retry the logic of acquiring locks is pretty simple.
|
|
/// We just retry `n` times after a given `duratio`, both
|
|
/// depending on the type of lock.
|
|
struct RetryPolicy {
|
|
num_retries: usize,
|
|
wait_in_ms: u64,
|
|
}
|
|
|
|
impl RetryPolicy {
|
|
fn no_retry() -> RetryPolicy {
|
|
RetryPolicy {
|
|
num_retries: 0,
|
|
wait_in_ms: 0,
|
|
}
|
|
}
|
|
|
|
fn wait_and_retry(&mut self) -> bool {
|
|
if self.num_retries == 0 {
|
|
false
|
|
} else {
|
|
self.num_retries -= 1;
|
|
let wait_duration = Duration::from_millis(self.wait_in_ms);
|
|
thread::sleep(wait_duration);
|
|
true
|
|
}
|
|
}
|
|
}
|
|
|
|
/// The `DirectoryLock` is an object that represents a file lock.
|
|
/// See [`LockType`](struct.LockType.html)
|
|
///
|
|
/// It is transparently associated to a lock file, that gets deleted
|
|
/// on `Drop.` The lock is released automatically on `Drop`.
|
|
pub struct DirectoryLock(Box<dyn Send + Sync + 'static>);
|
|
|
|
struct DirectoryLockGuard {
|
|
directory: Box<dyn Directory>,
|
|
path: PathBuf,
|
|
}
|
|
|
|
impl<T: Send + Sync + 'static> From<Box<T>> for DirectoryLock {
|
|
fn from(underlying: Box<T>) -> Self {
|
|
DirectoryLock(underlying)
|
|
}
|
|
}
|
|
|
|
impl Drop for DirectoryLockGuard {
|
|
fn drop(&mut self) {
|
|
if let Err(e) = self.directory.delete(&*self.path) {
|
|
error!(
|
|
self.directory.logger(),
|
|
"Failed to remove the lock file. {:?}", e
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
enum TryAcquireLockError {
|
|
FileExists,
|
|
IOError(io::Error),
|
|
}
|
|
|
|
fn try_acquire_lock(
|
|
filepath: &Path,
|
|
directory: &mut dyn Directory,
|
|
) -> Result<DirectoryLock, TryAcquireLockError> {
|
|
let mut write = directory.open_write(filepath).map_err(|e| match e {
|
|
OpenWriteError::FileAlreadyExists(_) => TryAcquireLockError::FileExists,
|
|
OpenWriteError::IOError { io_error, .. } => TryAcquireLockError::IOError(io_error),
|
|
})?;
|
|
write.flush().map_err(TryAcquireLockError::IOError)?;
|
|
Ok(DirectoryLock::from(Box::new(DirectoryLockGuard {
|
|
directory: directory.box_clone(),
|
|
path: filepath.to_owned(),
|
|
})))
|
|
}
|
|
|
|
fn retry_policy(is_blocking: bool) -> RetryPolicy {
|
|
if is_blocking {
|
|
RetryPolicy {
|
|
num_retries: 100,
|
|
wait_in_ms: 100,
|
|
}
|
|
} else {
|
|
RetryPolicy::no_retry()
|
|
}
|
|
}
|
|
|
|
/// Write-once read many (WORM) abstraction for where
|
|
/// tantivy's data should be stored.
|
|
///
|
|
/// There are currently two implementations of `Directory`
|
|
///
|
|
/// - The [`MMapDirectory`](struct.MmapDirectory.html), this
|
|
/// should be your default choice.
|
|
/// - The [`RAMDirectory`](struct.RAMDirectory.html), which
|
|
/// should be used mostly for tests.
|
|
///
|
|
pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
|
/// Opens a virtual file for read.
|
|
///
|
|
/// Once a virtual file is open, its data may not
|
|
/// change.
|
|
///
|
|
/// Specifically, subsequent writes or flushes should
|
|
/// have no effect on the returned `ReadOnlySource` object.
|
|
///
|
|
/// You should only use this to read files create with [Directory::open_write].
|
|
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError>;
|
|
|
|
/// Removes a file
|
|
///
|
|
/// Removing a file will not affect an eventual
|
|
/// existing ReadOnlySource pointing to it.
|
|
///
|
|
/// Removing a nonexistent file, yields a
|
|
/// `DeleteError::DoesNotExist`.
|
|
fn delete(&self, path: &Path) -> result::Result<(), DeleteError>;
|
|
|
|
/// Returns true iff the file exists
|
|
fn exists(&self, path: &Path) -> bool;
|
|
|
|
/// Opens a writer for the *virtual file* associated with
|
|
/// a Path.
|
|
///
|
|
/// Right after this call, the file should be created
|
|
/// and any subsequent call to `open_read` for the
|
|
/// same path should return a `ReadOnlySource`.
|
|
///
|
|
/// Write operations may be aggressively buffered.
|
|
/// The client of this trait is responsible for calling flush
|
|
/// to ensure that subsequent `read` operations
|
|
/// will take into account preceding `write` operations.
|
|
///
|
|
/// Flush operation should also be persistent.
|
|
///
|
|
/// The user shall not rely on `Drop` triggering `flush`.
|
|
/// Note that `RAMDirectory` will panic! if `flush`
|
|
/// was not called.
|
|
///
|
|
/// The file may not previously exist.
|
|
fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError>;
|
|
|
|
/// Reads the full content file that has been written using
|
|
/// atomic_write.
|
|
///
|
|
/// This should only be used for small files.
|
|
///
|
|
/// You should only use this to read files create with [Directory::atomic_write].
|
|
fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError>;
|
|
|
|
/// Atomically replace the content of a file with data.
|
|
///
|
|
/// This calls ensure that reads can never *observe*
|
|
/// a partially written file.
|
|
///
|
|
/// The file may or may not previously exist.
|
|
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()>;
|
|
|
|
/// Acquire a lock in the given directory.
|
|
///
|
|
/// The method is blocking or not depending on the `Lock` object.
|
|
fn acquire_lock(&self, lock: &Lock) -> Result<DirectoryLock, LockError> {
|
|
let mut box_directory = self.box_clone();
|
|
let mut retry_policy = retry_policy(lock.is_blocking);
|
|
loop {
|
|
match try_acquire_lock(&lock.filepath, &mut *box_directory) {
|
|
Ok(result) => {
|
|
return Ok(result);
|
|
}
|
|
Err(TryAcquireLockError::FileExists) => {
|
|
if !retry_policy.wait_and_retry() {
|
|
return Err(LockError::LockBusy);
|
|
}
|
|
}
|
|
Err(TryAcquireLockError::IOError(io_error)) => {
|
|
return Err(LockError::IOError(io_error));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Registers a callback that will be called whenever a change on the `meta.json`
|
|
/// using the `atomic_write` API is detected.
|
|
///
|
|
/// The behavior when using `.watch()` on a file using [Directory::open_write] is, on the other
|
|
/// hand, undefined.
|
|
///
|
|
/// The file will be watched for the lifetime of the returned `WatchHandle`. The caller is
|
|
/// required to keep it.
|
|
/// It does not override previous callbacks. When the file is modified, all callback that are
|
|
/// registered (and whose `WatchHandle` is still alive) are triggered.
|
|
///
|
|
/// Internally, tantivy only uses this API to detect new commits to implement the
|
|
/// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the
|
|
/// `OnCommit` `ReloadPolicy` to work properly.
|
|
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle>;
|
|
|
|
/// Returns the `slog::Logger` configured for the `Directory`.
|
|
fn logger(&self) -> &Logger;
|
|
}
|
|
|
|
/// DirectoryClone
|
|
pub trait DirectoryClone {
|
|
/// Clones the directory and boxes the clone
|
|
fn box_clone(&self) -> Box<dyn Directory>;
|
|
}
|
|
|
|
impl<T> DirectoryClone for T
|
|
where
|
|
T: 'static + Directory + Clone,
|
|
{
|
|
fn box_clone(&self) -> Box<dyn Directory> {
|
|
Box::new(self.clone())
|
|
}
|
|
}
|