issue/96 Added unit test, documentation and various tiny improvements.

This commit is contained in:
Paul Masurel
2017-04-04 22:43:35 +09:00
parent 35203378ef
commit e0a39fb273
10 changed files with 149 additions and 51 deletions

View File

@@ -3,16 +3,15 @@ mod timer;
mod vint;
pub mod bitpacker;
pub use self::serialize::BinarySerializable;
pub use self::timer::Timing;
pub use self::timer::TimerTree;
pub use self::timer::OpenTimer;
pub use self::vint::VInt;
use std::io;
/// Create a default io error given a string.
pub fn make_io_err(msg: String) -> io::Error {
io::Error::new(io::ErrorKind::Other, msg)
}
@@ -30,7 +29,11 @@ pub trait HasLen {
}
pub fn create_vec_with_len<T>(capacity: usize) -> Vec<T> {
/// Creates an uninitialized Vec of a given usize
///
/// `allocate_vec` does an unsafe call to `set_len`
/// as other solution are extremely slow in debug mode.
pub fn allocate_vec<T>(capacity: usize) -> Vec<T> {
let mut v = Vec::with_capacity(capacity);
unsafe {
v.set_len(capacity);

View File

@@ -153,9 +153,8 @@ impl Index {
/// Returns the list of segments that are searchable
pub fn searchable_segments(&self) -> Result<Vec<Segment>> {
let metas = load_metas(self.directory())?;
Ok(metas
.segments
Ok(self
.searchable_segment_metas()?
.into_iter()
.map(|segment_meta| self.segment(segment_meta))
.collect())
@@ -183,18 +182,17 @@ impl Index {
}
/// Reads the meta.json and returns the list of
/// segments in the last commit.
pub fn segments(&self) -> Result<Vec<SegmentMeta>> {
/// `SegmentMeta` from the last commit.
pub fn searchable_segment_metas(&self) -> Result<Vec<SegmentMeta>> {
Ok(load_metas(self.directory())?.segments)
}
/// Returns the list of segment ids that are searchable.
pub fn searchable_segment_ids(&self) -> Result<Vec<SegmentId>> {
Ok(load_metas(self.directory())?
.segments
.iter()
.map(|segment_meta| segment_meta.id())
.collect())
Ok(self.searchable_segment_metas()?
.iter()
.map(|segment_meta| segment_meta.id())
.collect())
}
/// Creates a new generation of searchers after

View File

@@ -25,7 +25,20 @@ pub use self::term_iterator::TermIterator;
use std::path::PathBuf;
lazy_static! {
/// The meta file contains all the information about the list of segments and the schema
/// of the index.
pub static ref META_FILEPATH: PathBuf = PathBuf::from("meta.json");
/// The managed file contains a list of files that were created by the tantivy
/// and will therefore be garbage collected when they are deemed useless by tantivy.
///
/// Removing this file is safe, but will prevent the garbage collection of all of the file that
/// are currently in the directory
pub static ref MANAGED_FILEPATH: PathBuf = PathBuf::from(".managed.json");
/// Only one process should be able to write tantivy's index at a time.
/// This file, when present, is in charge of preventing other processes to open an IndexWriter.
///
/// If the process is killed and this file remains, it is safe to remove it manually.
pub static ref LOCKFILE_FILEPATH: PathBuf = PathBuf::from(".tantivy-indexer.lock");
}

View File

@@ -43,10 +43,6 @@ impl Segment {
self.index.schema()
}
pub fn index(&self,) -> &Index {
&self.index
}
/// Returns the segment meta-information
pub fn meta(&self) -> &SegmentMeta {
&self.meta
@@ -70,19 +66,25 @@ impl Segment {
self.meta.relative_path(component)
}
/// Protects a specific component file from being deleted.
///
/// Returns a FileProtection object. The file is guaranteed
/// to not be garbage collected as long as this `FileProtection` object
/// lives.
pub fn protect_from_delete(&self, component: SegmentComponent) -> FileProtection {
let path = self.relative_path(component);
self.index.directory().protect_file_from_delete(&path)
}
/// Open one of the component file for read.
/// Open one of the component file for a *regular* read.
pub fn open_read(&self, component: SegmentComponent) -> result::Result<ReadOnlySource, FileError> {
let path = self.relative_path(component);
let source = try!(self.index.directory().open_read(&path));
Ok(source)
}
/// Open one of the component file for write.
/// Open one of the component file for *regular* write.
pub fn open_write(&mut self, component: SegmentComponent) -> result::Result<WritePtr, OpenWriteError> {
let path = self.relative_path(component);
let write = try!(self.index.directory_mut().open_write(&path));
@@ -102,4 +104,65 @@ pub trait SerializableSegment {
#[derive(Clone,Debug,RustcDecodable,RustcEncodable)]
pub struct SegmentInfo {
pub max_doc: DocId,
}
#[cfg(test)]
mod tests {
use core::SegmentComponent;
use std::path::Path;
use directory::Directory;
use schema::{SchemaBuilder, Document, FieldValue, TEXT, Term};
use Index;
#[test]
fn test_segment_protect_component() {
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
{
// simply creating two segments
// with one delete to create the DELETE file.
{
let doc1 = doc!(text_field=>"a");
index_writer.add_document(doc1);
let doc2 = doc!(text_field=>"b");
index_writer.add_document(doc2);
assert!(index_writer.commit().is_ok());
}
{
index_writer.delete_term(Term::from_field_text(text_field, "a"));
assert!(index_writer.commit().is_ok());
}
}
let segments = index.searchable_segments().unwrap();
let directory = index.directory().clone();
assert_eq!(segments.len(), 1);
let delete_file_path = Path::new("00000000000000000000000000000000.4.del");
let idx_file_path = Path::new("00000000000000000000000000000000.term");
assert!(directory.exists(&*delete_file_path));
assert!(directory.exists(&*idx_file_path));
{
let _file_protection = segments[0].protect_from_delete(SegmentComponent::DELETE);
index_writer.delete_term(Term::from_field_text(text_field, "b"));
index_writer.commit().unwrap();
// the delete file is protected, and should not be gc'ed.
assert!(directory.exists(&*delete_file_path));
}
index_writer.commit().unwrap();
// at this point the protection is released.
assert!(!directory.exists(&*delete_file_path));
}
}

View File

@@ -1,6 +1,6 @@
use std::cell::UnsafeCell;
use std::mem;
use common::create_vec_with_len;
use common::allocate_vec;
use std::ptr;
/// `BytesRef` refers to a slice in tantivy's custom `Heap`.
@@ -105,18 +105,11 @@ struct InnerHeap {
next_heap: Option<Box<InnerHeap>>,
}
/// initializing a long Vec<u8> is crazy slow in
/// debug mode.
/// We use this unsafe trick to make unit test
/// way faster.
fn allocate_fast(num_bytes: usize) -> Vec<u8> {
create_vec_with_len(num_bytes)
}
impl InnerHeap {
pub fn with_capacity(num_bytes: usize) -> InnerHeap {
let buffer: Vec<u8> = allocate_fast(num_bytes);
let buffer: Vec<u8> = allocate_vec(num_bytes);
InnerHeap {
buffer: buffer,
buffer_len: num_bytes as u32,

View File

@@ -35,11 +35,26 @@ struct MetaInformation {
protected_files: HashMap<PathBuf, usize>,
}
/// A `FileProtection` prevents the garbage collection of a file.
///
/// See `ManagedDirectory.protect_file_from_delete`.
pub struct FileProtection {
directory: ManagedDirectory,
path: PathBuf,
}
fn unprotect_file_from_delete(directory: &ManagedDirectory, path: &Path) {
let mut meta_informations_wlock = directory.meta_informations
.write()
.expect("Managed file lock poisoned");
if let Some(counter_ref_mut) = meta_informations_wlock
.protected_files
.get_mut(path) {
(*counter_ref_mut) -= 1;
}
}
impl fmt::Debug for FileProtection {
fn fmt(&self, formatter: &mut fmt::Formatter) -> result::Result<(), fmt::Error> {
write!(formatter, "FileProtectionFor({:?})", self.path)
@@ -48,7 +63,7 @@ impl fmt::Debug for FileProtection {
impl Drop for FileProtection {
fn drop(&mut self) {
self.directory.unprotect_file_from_delete(&self.path);
unprotect_file_from_delete(&self.directory, &*self.path);
}
}
@@ -152,6 +167,12 @@ impl ManagedDirectory {
}
/// Protects a file from being garbage collected.
///
/// The method returns a `FileProtection` object.
/// The file will not be garbage collected as long as the
/// `FileProtection` object is kept alive.
pub fn protect_file_from_delete(&self, path: &Path) -> FileProtection {
let mut meta_informations_wlock = self.meta_informations
.write()
@@ -167,16 +188,6 @@ impl ManagedDirectory {
}
}
pub fn unprotect_file_from_delete(&self, path: &Path) {
let mut meta_informations_wlock = self.meta_informations
.write()
.expect("Managed file lock poisoned");
if let Some(counter_ref_mut) = meta_informations_wlock
.protected_files
.get_mut(path) {
(*counter_ref_mut) -= 1;
}
}
/// Saves the file containing the list of existing files
/// that were created by tantivy.
@@ -358,4 +369,28 @@ mod tests {
}
#[test]
fn test_managed_directory_protect() {
let tempdir = TempDir::new("index").unwrap();
let tempdir_path = PathBuf::from(tempdir.path());
let living_files = HashSet::new();
let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
let mut managed_directory = ManagedDirectory::new(mmap_directory).unwrap();
managed_directory.atomic_write(*TEST_PATH1, &vec!(0u8,1u8)).unwrap();
assert!(managed_directory.exists(*TEST_PATH1));
{
let _file_protection = managed_directory.protect_file_from_delete(*TEST_PATH1);
managed_directory.garbage_collect(living_files.clone());
assert!(managed_directory.exists(*TEST_PATH1));
}
managed_directory.garbage_collect(living_files.clone());
assert!(!managed_directory.exists(*TEST_PATH1));
}
}

View File

@@ -197,9 +197,6 @@ pub fn compute_deleted_bitset(
}
// TODO put delete bitset in segment entry
// rather than DocToOpstamp.
// TODO skip delete operation before teh
// last delete opstamp

View File

@@ -17,9 +17,7 @@ use fastfield::FastFieldSerializer;
use store::StoreWriter;
use core::SegmentInfo;
use std::cmp::{min, max};
use std::iter;
use common::allocate_vec;
pub struct IndexMerger {
schema: Schema,
@@ -35,9 +33,7 @@ struct DeltaPositionComputer {
impl DeltaPositionComputer {
fn new() -> DeltaPositionComputer {
DeltaPositionComputer {
buffer: iter::repeat(0u32)
.take(512)
.collect::<Vec<u32>>()
buffer: allocate_vec(512)
}
}

View File

@@ -171,7 +171,7 @@ impl SegmentUpdater {
pub fn new(index: Index,
stamper: Stamper,
delete_cursor: DeleteCursor) -> Result<SegmentUpdater> {
let segments = index.segments()?;
let segments = index.searchable_segment_metas()?;
let segment_manager = SegmentManager::from_segments(segments, delete_cursor);
Ok(
SegmentUpdater(Arc::new(InnerSegmentUpdater {

View File

@@ -1,7 +1,7 @@
use std::fmt;
use common::BinarySerializable;
use common::create_vec_with_len;
use common::allocate_vec;
use byteorder::{BigEndian, ByteOrder};
use super::Field;
use std::str;
@@ -45,7 +45,7 @@ impl Term {
/// The first byte is `1`, and the 4 following bytes are that of the u32.
pub fn from_field_u32(field: Field, val: u32) -> Term {
const U32_TERM_LEN: usize = 1 + 4;
let mut buffer = create_vec_with_len(U32_TERM_LEN);
let mut buffer = allocate_vec(U32_TERM_LEN);
buffer[0] = field.0;
// we want BigEndian here to have lexicographic order
// match the natural order of vals.