mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-29 14:40:40 +00:00
Documented the Directory behavior.
Removed sync calls, and moved then on file creation and flush instead.
This commit is contained in:
@@ -150,16 +150,7 @@ impl Index {
|
||||
}
|
||||
self.save_metas()
|
||||
}
|
||||
|
||||
pub fn sync(&mut self, segment: &Segment) -> Result<()> {
|
||||
let directory = try!(self.ro_directory());
|
||||
for component in SegmentComponent::values() {
|
||||
let path = segment.relative_path(component);
|
||||
try!(directory.sync(&path));
|
||||
}
|
||||
try!(self.ro_directory()).sync_directory()
|
||||
}
|
||||
|
||||
|
||||
pub fn segments(&self,) -> Vec<Segment> {
|
||||
self.segment_ids()
|
||||
.into_iter()
|
||||
|
||||
@@ -80,7 +80,6 @@ impl IndexWriter {
|
||||
segment_writer.add_document(&doc, &schema_clone).unwrap();
|
||||
}
|
||||
segment_writer.finalize().unwrap();
|
||||
index_clone.sync(&segment).unwrap();
|
||||
index_clone.publish_segment(&segment).unwrap();
|
||||
}
|
||||
})
|
||||
@@ -100,7 +99,6 @@ impl IndexWriter {
|
||||
let merged_segment = self.index.new_segment();
|
||||
let segment_serializer = try!(SegmentSerializer::for_segment(&merged_segment));
|
||||
try!(merger.write(segment_serializer));
|
||||
try!(self.index.sync(&merged_segment));
|
||||
try!(self.index.publish_merge_segment(segments, &merged_segment));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -6,18 +6,50 @@ use directory::{ReadOnlySource, WritePtr, OpenError};
|
||||
use std::result;
|
||||
use Result;
|
||||
|
||||
|
||||
/// Abstraction for where tantivy's index should be stored.
|
||||
///
|
||||
/// There is currently two implementations of `Directory`
|
||||
/// - [RAMDirectory](index.html)
|
||||
///
|
||||
/// - The [MMapDirectory](struct.MmapDirectory.html), this
|
||||
/// should be your default choice.
|
||||
/// - The [RAMDirectory](struct.RAMDirectory.html), which
|
||||
/// should be used mostly for tests.
|
||||
///
|
||||
pub trait Directory: fmt::Debug + Send + Sync {
|
||||
|
||||
/// Opens a virtual file for read.
|
||||
///
|
||||
/// Once a virtualfile is open, its data may not
|
||||
/// change.
|
||||
///
|
||||
/// Specifically, subsequent write or flush should
|
||||
/// have no effect the returned `ReadOnlySource` object.
|
||||
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenError>;
|
||||
fn open_write(&mut self, path: &Path) -> Result<WritePtr>;
|
||||
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> Result<()>;
|
||||
|
||||
/// Syncs the file if it exists
|
||||
/// If it does not exists, just return Ok(())
|
||||
// TODO Change the API
|
||||
fn sync(&self, path: &Path) -> Result<()>;
|
||||
fn sync_directory(&self,) -> Result<()>;
|
||||
/// Opens a writer for the *virtual file* associated with
|
||||
/// a Path.
|
||||
///
|
||||
/// Right after this call, the file should be created
|
||||
/// and any subsequent call to `open_read` for the
|
||||
/// same path should return a `ReadOnlySource`.
|
||||
///
|
||||
/// Write operations may be aggressively buffered.
|
||||
/// The client of this trait is in charge to call flush
|
||||
/// to ensure that subsequent `read` operations
|
||||
/// will take in account preceding `write` operations.
|
||||
///
|
||||
/// Flush operation should also be persistent.
|
||||
///
|
||||
/// User shall not rely on `Drop` triggering `flush`.
|
||||
/// Note that `RAMDirectory` will panic! if `flush`
|
||||
/// was not called.
|
||||
///
|
||||
///
|
||||
fn open_write(&mut self, path: &Path) -> Result<WritePtr>;
|
||||
|
||||
/// Atomically replace the content of a file by data.
|
||||
///
|
||||
/// This calls ensure that reads can never *observe*
|
||||
/// a partially written file.
|
||||
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> Result<()>;
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ use std::sync::RwLock;
|
||||
use std::fmt;
|
||||
use std::io::Write;
|
||||
use std::io;
|
||||
use std::io::{Seek, SeekFrom};
|
||||
use directory::Directory;
|
||||
use directory::ReadOnlySource;
|
||||
use directory::WritePtr;
|
||||
@@ -59,9 +60,47 @@ impl MmapDirectory {
|
||||
self.root_path.join(relative_path)
|
||||
}
|
||||
|
||||
fn sync_directory(&self,) -> Result<()> {
|
||||
let fd = try!(File::open(&self.root_path));
|
||||
try!(fd.sync_all());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// This Write wraps a File, but has the specificity of
|
||||
/// call sync_all on flush.
|
||||
struct SafeFileWriter {
|
||||
writer: BufWriter<File>,
|
||||
}
|
||||
|
||||
impl SafeFileWriter {
|
||||
fn new(file: File) -> SafeFileWriter {
|
||||
SafeFileWriter {
|
||||
writer: BufWriter::new(file),
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl Write for SafeFileWriter {
|
||||
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
self.writer.write(buf)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
try!(self.writer.flush());
|
||||
self.writer.get_ref().sync_all()
|
||||
}
|
||||
}
|
||||
|
||||
impl Seek for SafeFileWriter {
|
||||
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
|
||||
self.writer.seek(pos)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl Directory for MmapDirectory {
|
||||
|
||||
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenError> {
|
||||
@@ -90,12 +129,17 @@ impl Directory for MmapDirectory {
|
||||
Ok(ReadOnlySource::Mmap(mmap))
|
||||
}
|
||||
|
||||
|
||||
fn open_write(&mut self, path: &Path) -> Result<WritePtr> {
|
||||
let full_path = self.resolve_path(path);
|
||||
let file = try!(File::create(full_path));
|
||||
let buf_writer = BufWriter::new(file);
|
||||
Ok(Box::new(buf_writer))
|
||||
let mut file = try!(File::create(full_path));
|
||||
// making sure the file is created.
|
||||
try!(file.flush());
|
||||
// Apparetntly, on some filesystem syncing the parent
|
||||
// directory is required.
|
||||
try!(self.sync_directory());
|
||||
|
||||
let writer = SafeFileWriter::new(file);
|
||||
Ok(Box::new(writer))
|
||||
}
|
||||
|
||||
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> Result<()> {
|
||||
@@ -107,27 +151,4 @@ impl Directory for MmapDirectory {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn sync(&self, path: &Path) -> Result<()> {
|
||||
let full_path = self.resolve_path(path);
|
||||
match File::open(&full_path) {
|
||||
Ok(fd) => {
|
||||
try!(fd.sync_all());
|
||||
Ok(())
|
||||
}
|
||||
Err(_) => {
|
||||
// file does not exists.
|
||||
// this is not considered a failure as some of the file (postings) only exists if
|
||||
// a functionality is used
|
||||
//
|
||||
// TODO be fine-grained about this.
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn sync_directory(&self,) -> Result<()> {
|
||||
let fd = try!(File::open(&self.root_path));
|
||||
try!(fd.sync_all());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,16 @@ use std::result;
|
||||
use super::SharedVecSlice;
|
||||
use Result;
|
||||
|
||||
|
||||
/// Writer associated to the `RAMDirectory`
|
||||
///
|
||||
/// The Writer just writes a buffer.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// On drop, if the writer was left in a *dirty* state.
|
||||
/// That is, if flush was not called after the last call
|
||||
/// to write.
|
||||
///
|
||||
struct VecWriter {
|
||||
path: PathBuf,
|
||||
shared_directory: InnerDirectory,
|
||||
@@ -19,7 +28,6 @@ struct VecWriter {
|
||||
is_flushed: bool,
|
||||
}
|
||||
|
||||
|
||||
impl Drop for VecWriter {
|
||||
fn drop(&mut self) {
|
||||
if !self.is_flushed {
|
||||
@@ -99,7 +107,11 @@ impl RAMDirectory {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Directory storing everything in anonymous memory.
|
||||
///
|
||||
/// It's main purpose is unit test.
|
||||
/// Writes are only made visible upon flushing.
|
||||
///
|
||||
pub struct RAMDirectory {
|
||||
fs: InnerDirectory,
|
||||
}
|
||||
@@ -128,11 +140,4 @@ impl Directory for RAMDirectory {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn sync(&self, _: &Path) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn sync_directory(&self,) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user