diff --git a/src/core/index.rs b/src/core/index.rs index cd2f84dd4..0ece29fd0 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -150,16 +150,7 @@ impl Index { } self.save_metas() } - - pub fn sync(&mut self, segment: &Segment) -> Result<()> { - let directory = try!(self.ro_directory()); - for component in SegmentComponent::values() { - let path = segment.relative_path(component); - try!(directory.sync(&path)); - } - try!(self.ro_directory()).sync_directory() - } - + pub fn segments(&self,) -> Vec { self.segment_ids() .into_iter() diff --git a/src/core/writer.rs b/src/core/writer.rs index 07356a35c..e154b2028 100644 --- a/src/core/writer.rs +++ b/src/core/writer.rs @@ -80,7 +80,6 @@ impl IndexWriter { segment_writer.add_document(&doc, &schema_clone).unwrap(); } segment_writer.finalize().unwrap(); - index_clone.sync(&segment).unwrap(); index_clone.publish_segment(&segment).unwrap(); } }) @@ -100,7 +99,6 @@ impl IndexWriter { let merged_segment = self.index.new_segment(); let segment_serializer = try!(SegmentSerializer::for_segment(&merged_segment)); try!(merger.write(segment_serializer)); - try!(self.index.sync(&merged_segment)); try!(self.index.publish_merge_segment(segments, &merged_segment)); Ok(()) } diff --git a/src/directory/directory.rs b/src/directory/directory.rs index fbd259883..6c0396ca3 100644 --- a/src/directory/directory.rs +++ b/src/directory/directory.rs @@ -6,18 +6,50 @@ use directory::{ReadOnlySource, WritePtr, OpenError}; use std::result; use Result; - +/// Abstraction for where tantivy's index should be stored. +/// /// There is currently two implementations of `Directory` -/// - [RAMDirectory](index.html) +/// +/// - The [MMapDirectory](struct.MmapDirectory.html), this +/// should be your default choice. +/// - The [RAMDirectory](struct.RAMDirectory.html), which +/// should be used mostly for tests. /// pub trait Directory: fmt::Debug + Send + Sync { + + /// Opens a virtual file for read. + /// + /// Once a virtualfile is open, its data may not + /// change. + /// + /// Specifically, subsequent write or flush should + /// have no effect the returned `ReadOnlySource` object. fn open_read(&self, path: &Path) -> result::Result; - fn open_write(&mut self, path: &Path) -> Result; - fn atomic_write(&mut self, path: &Path, data: &[u8]) -> Result<()>; - /// Syncs the file if it exists - /// If it does not exists, just return Ok(()) - // TODO Change the API - fn sync(&self, path: &Path) -> Result<()>; - fn sync_directory(&self,) -> Result<()>; + /// Opens a writer for the *virtual file* associated with + /// a Path. + /// + /// Right after this call, the file should be created + /// and any subsequent call to `open_read` for the + /// same path should return a `ReadOnlySource`. + /// + /// Write operations may be aggressively buffered. + /// The client of this trait is in charge to call flush + /// to ensure that subsequent `read` operations + /// will take in account preceding `write` operations. + /// + /// Flush operation should also be persistent. + /// + /// User shall not rely on `Drop` triggering `flush`. + /// Note that `RAMDirectory` will panic! if `flush` + /// was not called. + /// + /// + fn open_write(&mut self, path: &Path) -> Result; + + /// Atomically replace the content of a file by data. + /// + /// This calls ensure that reads can never *observe* + /// a partially written file. + fn atomic_write(&mut self, path: &Path, data: &[u8]) -> Result<()>; } diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index ce427082c..3cf8335ee 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -10,6 +10,7 @@ use std::sync::RwLock; use std::fmt; use std::io::Write; use std::io; +use std::io::{Seek, SeekFrom}; use directory::Directory; use directory::ReadOnlySource; use directory::WritePtr; @@ -59,9 +60,47 @@ impl MmapDirectory { self.root_path.join(relative_path) } + fn sync_directory(&self,) -> Result<()> { + let fd = try!(File::open(&self.root_path)); + try!(fd.sync_all()); + Ok(()) + } +} + +/// This Write wraps a File, but has the specificity of +/// call sync_all on flush. +struct SafeFileWriter { + writer: BufWriter, +} + +impl SafeFileWriter { + fn new(file: File) -> SafeFileWriter { + SafeFileWriter { + writer: BufWriter::new(file), + } + } } +impl Write for SafeFileWriter { + + fn write(&mut self, buf: &[u8]) -> io::Result { + self.writer.write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + try!(self.writer.flush()); + self.writer.get_ref().sync_all() + } +} + +impl Seek for SafeFileWriter { + fn seek(&mut self, pos: SeekFrom) -> io::Result { + self.writer.seek(pos) + } +} + + impl Directory for MmapDirectory { fn open_read(&self, path: &Path) -> result::Result { @@ -90,12 +129,17 @@ impl Directory for MmapDirectory { Ok(ReadOnlySource::Mmap(mmap)) } - fn open_write(&mut self, path: &Path) -> Result { let full_path = self.resolve_path(path); - let file = try!(File::create(full_path)); - let buf_writer = BufWriter::new(file); - Ok(Box::new(buf_writer)) + let mut file = try!(File::create(full_path)); + // making sure the file is created. + try!(file.flush()); + // Apparetntly, on some filesystem syncing the parent + // directory is required. + try!(self.sync_directory()); + + let writer = SafeFileWriter::new(file); + Ok(Box::new(writer)) } fn atomic_write(&mut self, path: &Path, data: &[u8]) -> Result<()> { @@ -107,27 +151,4 @@ impl Directory for MmapDirectory { Ok(()) } - fn sync(&self, path: &Path) -> Result<()> { - let full_path = self.resolve_path(path); - match File::open(&full_path) { - Ok(fd) => { - try!(fd.sync_all()); - Ok(()) - } - Err(_) => { - // file does not exists. - // this is not considered a failure as some of the file (postings) only exists if - // a functionality is used - // - // TODO be fine-grained about this. - Ok(()) - } - } - } - - fn sync_directory(&self,) -> Result<()> { - let fd = try!(File::open(&self.root_path)); - try!(fd.sync_all()); - Ok(()) - } } diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs index faae203df..577048bc2 100644 --- a/src/directory/ram_directory.rs +++ b/src/directory/ram_directory.rs @@ -11,7 +11,16 @@ use std::result; use super::SharedVecSlice; use Result; - +/// Writer associated to the `RAMDirectory` +/// +/// The Writer just writes a buffer. +/// +/// # Panics +/// +/// On drop, if the writer was left in a *dirty* state. +/// That is, if flush was not called after the last call +/// to write. +/// struct VecWriter { path: PathBuf, shared_directory: InnerDirectory, @@ -19,7 +28,6 @@ struct VecWriter { is_flushed: bool, } - impl Drop for VecWriter { fn drop(&mut self) { if !self.is_flushed { @@ -99,7 +107,11 @@ impl RAMDirectory { } } - +/// Directory storing everything in anonymous memory. +/// +/// It's main purpose is unit test. +/// Writes are only made visible upon flushing. +/// pub struct RAMDirectory { fs: InnerDirectory, } @@ -128,11 +140,4 @@ impl Directory for RAMDirectory { Ok(()) } - fn sync(&self, _: &Path) -> Result<()> { - Ok(()) - } - - fn sync_directory(&self,) -> Result<()> { - Ok(()) - } }