Documented the Directory behavior.

Removed sync calls, and moved then on file creation
and flush instead.
This commit is contained in:
Paul Masurel
2016-08-18 23:41:58 +09:00
parent 0972a1c6a0
commit 04238d9e65
5 changed files with 105 additions and 58 deletions

View File

@@ -150,16 +150,7 @@ impl Index {
}
self.save_metas()
}
pub fn sync(&mut self, segment: &Segment) -> Result<()> {
let directory = try!(self.ro_directory());
for component in SegmentComponent::values() {
let path = segment.relative_path(component);
try!(directory.sync(&path));
}
try!(self.ro_directory()).sync_directory()
}
pub fn segments(&self,) -> Vec<Segment> {
self.segment_ids()
.into_iter()

View File

@@ -80,7 +80,6 @@ impl IndexWriter {
segment_writer.add_document(&doc, &schema_clone).unwrap();
}
segment_writer.finalize().unwrap();
index_clone.sync(&segment).unwrap();
index_clone.publish_segment(&segment).unwrap();
}
})
@@ -100,7 +99,6 @@ impl IndexWriter {
let merged_segment = self.index.new_segment();
let segment_serializer = try!(SegmentSerializer::for_segment(&merged_segment));
try!(merger.write(segment_serializer));
try!(self.index.sync(&merged_segment));
try!(self.index.publish_merge_segment(segments, &merged_segment));
Ok(())
}

View File

@@ -6,18 +6,50 @@ use directory::{ReadOnlySource, WritePtr, OpenError};
use std::result;
use Result;
/// Abstraction for where tantivy's index should be stored.
///
/// There is currently two implementations of `Directory`
/// - [RAMDirectory](index.html)
///
/// - The [MMapDirectory](struct.MmapDirectory.html), this
/// should be your default choice.
/// - The [RAMDirectory](struct.RAMDirectory.html), which
/// should be used mostly for tests.
///
pub trait Directory: fmt::Debug + Send + Sync {
/// Opens a virtual file for read.
///
/// Once a virtualfile is open, its data may not
/// change.
///
/// Specifically, subsequent write or flush should
/// have no effect the returned `ReadOnlySource` object.
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenError>;
fn open_write(&mut self, path: &Path) -> Result<WritePtr>;
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> Result<()>;
/// Syncs the file if it exists
/// If it does not exists, just return Ok(())
// TODO Change the API
fn sync(&self, path: &Path) -> Result<()>;
fn sync_directory(&self,) -> Result<()>;
/// Opens a writer for the *virtual file* associated with
/// a Path.
///
/// Right after this call, the file should be created
/// and any subsequent call to `open_read` for the
/// same path should return a `ReadOnlySource`.
///
/// Write operations may be aggressively buffered.
/// The client of this trait is in charge to call flush
/// to ensure that subsequent `read` operations
/// will take in account preceding `write` operations.
///
/// Flush operation should also be persistent.
///
/// User shall not rely on `Drop` triggering `flush`.
/// Note that `RAMDirectory` will panic! if `flush`
/// was not called.
///
///
fn open_write(&mut self, path: &Path) -> Result<WritePtr>;
/// Atomically replace the content of a file by data.
///
/// This calls ensure that reads can never *observe*
/// a partially written file.
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> Result<()>;
}

View File

@@ -10,6 +10,7 @@ use std::sync::RwLock;
use std::fmt;
use std::io::Write;
use std::io;
use std::io::{Seek, SeekFrom};
use directory::Directory;
use directory::ReadOnlySource;
use directory::WritePtr;
@@ -59,9 +60,47 @@ impl MmapDirectory {
self.root_path.join(relative_path)
}
fn sync_directory(&self,) -> Result<()> {
let fd = try!(File::open(&self.root_path));
try!(fd.sync_all());
Ok(())
}
}
/// This Write wraps a File, but has the specificity of
/// call sync_all on flush.
struct SafeFileWriter {
writer: BufWriter<File>,
}
impl SafeFileWriter {
fn new(file: File) -> SafeFileWriter {
SafeFileWriter {
writer: BufWriter::new(file),
}
}
}
impl Write for SafeFileWriter {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.writer.write(buf)
}
fn flush(&mut self) -> io::Result<()> {
try!(self.writer.flush());
self.writer.get_ref().sync_all()
}
}
impl Seek for SafeFileWriter {
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
self.writer.seek(pos)
}
}
impl Directory for MmapDirectory {
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenError> {
@@ -90,12 +129,17 @@ impl Directory for MmapDirectory {
Ok(ReadOnlySource::Mmap(mmap))
}
fn open_write(&mut self, path: &Path) -> Result<WritePtr> {
let full_path = self.resolve_path(path);
let file = try!(File::create(full_path));
let buf_writer = BufWriter::new(file);
Ok(Box::new(buf_writer))
let mut file = try!(File::create(full_path));
// making sure the file is created.
try!(file.flush());
// Apparetntly, on some filesystem syncing the parent
// directory is required.
try!(self.sync_directory());
let writer = SafeFileWriter::new(file);
Ok(Box::new(writer))
}
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> Result<()> {
@@ -107,27 +151,4 @@ impl Directory for MmapDirectory {
Ok(())
}
fn sync(&self, path: &Path) -> Result<()> {
let full_path = self.resolve_path(path);
match File::open(&full_path) {
Ok(fd) => {
try!(fd.sync_all());
Ok(())
}
Err(_) => {
// file does not exists.
// this is not considered a failure as some of the file (postings) only exists if
// a functionality is used
//
// TODO be fine-grained about this.
Ok(())
}
}
}
fn sync_directory(&self,) -> Result<()> {
let fd = try!(File::open(&self.root_path));
try!(fd.sync_all());
Ok(())
}
}

View File

@@ -11,7 +11,16 @@ use std::result;
use super::SharedVecSlice;
use Result;
/// Writer associated to the `RAMDirectory`
///
/// The Writer just writes a buffer.
///
/// # Panics
///
/// On drop, if the writer was left in a *dirty* state.
/// That is, if flush was not called after the last call
/// to write.
///
struct VecWriter {
path: PathBuf,
shared_directory: InnerDirectory,
@@ -19,7 +28,6 @@ struct VecWriter {
is_flushed: bool,
}
impl Drop for VecWriter {
fn drop(&mut self) {
if !self.is_flushed {
@@ -99,7 +107,11 @@ impl RAMDirectory {
}
}
/// Directory storing everything in anonymous memory.
///
/// It's main purpose is unit test.
/// Writes are only made visible upon flushing.
///
pub struct RAMDirectory {
fs: InnerDirectory,
}
@@ -128,11 +140,4 @@ impl Directory for RAMDirectory {
Ok(())
}
fn sync(&self, _: &Path) -> Result<()> {
Ok(())
}
fn sync_directory(&self,) -> Result<()> {
Ok(())
}
}