This commit is contained in:
Paul Masurel
2016-01-18 20:45:15 +09:00
parent e244bd8403
commit f3d5fc257e
6 changed files with 69 additions and 20 deletions

View File

@@ -12,3 +12,4 @@ regex = "0.1"
fst = "0.1.26"
rand = "0.3.13"
atomicwrites = "0.0.14"
tempfile = "2.0.0"

View File

@@ -6,9 +6,12 @@ use core::error::*;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use core::directory::Segment;
use core::directory::SegmentComponent;
use core::reader::*;
pub trait Codec {
// type SearchableSegmentImpl: SearchableSegment;
// fn open(segment: &Segment) -> Self::SearchableSegmentImpl;
fn write<'a, I: SerializableSegment<'a>>(index: &'a I, segment: &'a Segment) -> Result<usize>;
}
@@ -39,9 +42,20 @@ impl SimpleCodec {
}
}
// TODO impl packed int
// TODO skip lists
impl Codec for SimpleCodec {
// type SearchableSegmentImpl = SimpleSearchableSegment;
//
// fn open(segment: &Segment) -> SimpleSearchableSegment {
// SimpleSearchableSegment::new(segment)
// }
fn write<'a, I: SerializableSegment<'a>>(index: &'a I, segment: &'a Segment) -> Result<usize> {
let mut term_write = try!(segment.open_writable(SegmentComponent::TERMS));
let term_write = try!(segment.open_writable(SegmentComponent::TERMS));
let mut postings_write = try!(segment.open_writable(SegmentComponent::POSTINGS));
let term_trie_builder_result = MapBuilder::new(term_write);
if term_trie_builder_result.is_err() {

View File

@@ -11,10 +11,10 @@ use std::io;
use std::borrow::Borrow;
use std::borrow::BorrowMut;
use std::rc::Rc;
use std::sync::{Arc, Mutex, RwLock, MutexGuard};
use std::fmt;
use std::ops::Deref;
use std::cell::RefCell;
use std::sync::Arc;
use core::error::*;
use rand::{thread_rng, Rng};
@@ -30,10 +30,11 @@ pub fn generate_segment_name() -> SegmentId {
SegmentId( String::from("_") + &random_name)
}
#[derive(Clone)]
pub struct Directory {
index_path: PathBuf,
mmap_cache: RefCell<HashMap<PathBuf, SharedMmapMemory >>,
mmap_cache: Arc<Mutex<HashMap<PathBuf, SharedMmapMemory>>>,
}
impl fmt::Debug for Directory {
@@ -58,7 +59,7 @@ impl Directory {
pub fn from(filepath: &str) -> Directory {
Directory {
index_path: PathBuf::from(filepath),
mmap_cache: RefCell::new(HashMap::new()),
mmap_cache: Arc::new(Mutex::new(HashMap::new())),
}
}
@@ -66,19 +67,19 @@ impl Directory {
self.index_path.join(relative_path)
}
fn segment<'a>(&'a self, segment_id: &SegmentId) -> Segment<'a> {
pub fn segment(&self, segment_id: &SegmentId) -> Segment {
Segment {
directory: self,
directory: self.clone(),
segment_id: segment_id.clone()
}
}
pub fn new_segment<'a>(&'a self,) -> Segment<'a> {
pub fn new_segment(&self,) -> Segment {
// TODO check it does not exists
self.segment(&generate_segment_name())
}
fn open_writable<'a>(&self, relative_path: &PathBuf) -> Result<File> {
fn open_writable(&self, relative_path: &PathBuf) -> Result<File> {
let full_path = self.resolve_path(relative_path);
match File::create(full_path.clone()) {
Ok(f) => Ok(f),
@@ -89,13 +90,20 @@ impl Directory {
}
}
fn open_readable<'a>(&self, relative_path: &PathBuf) -> Result<SharedMmapMemory> {
fn open_readable(&self, relative_path: &PathBuf) -> Result<SharedMmapMemory> {
let full_path = self.resolve_path(relative_path);
let mut cache = self.mmap_cache.borrow_mut();
if !cache.contains_key(&full_path) {
cache.insert(full_path.clone(), try!(open_mmap(&full_path)) );
let mut cache_mutex = self.mmap_cache.deref();
match cache_mutex.lock() {
Ok(mut cache) => {
if !cache.contains_key(&full_path) {
cache.insert(full_path.clone(), try!(open_mmap(&full_path)) );
}
return Ok(cache.get(&full_path).unwrap().clone())
},
Err(_) => {
return Err(Error::CannotAcquireLock(String::from("Cannot acquire mmap cache lock.")))
}
}
Ok(cache.get(&full_path).unwrap().clone())
}
}
@@ -109,12 +117,12 @@ pub enum SegmentComponent {
}
#[derive(Debug)]
pub struct Segment<'a> {
directory: &'a Directory,
pub struct Segment {
directory: Directory,
segment_id: SegmentId,
}
impl<'a> Segment<'a> {
impl Segment {
fn path_suffix(component: SegmentComponent)-> &'static str {
match component {
SegmentComponent::POSTINGS => ".idx",

View File

@@ -7,6 +7,7 @@ pub enum Error {
IOError(io::ErrorKind, String),
FileNotFound(String),
ReadOnly(String),
CannotAcquireLock(String),
}
pub type Result<T> = result::Result<T, Error>;

View File

@@ -1,9 +1,34 @@
use core::directory::Directory;
use core::directory::Segment;
pub struct SegmentIndexReader {
directory: Directory,
}
// pub trait SearchableSegment {
//
// }
//
// pub struct SimpleSearchableSegment {
// segment: Segment,
// }
//
// impl SimpleSearchableSegment {
//
// pub fn new(segment: &Segment) -> SimpleSearchableSegment {
// SimpleSearchableSegment {
// segment: segment.clone()
// }
// }
// }
//
// impl SearchableSegment for SimpleSearchableSegment {
//
//
// }
//
// impl SegmentIndexReader {
//

View File

@@ -36,7 +36,7 @@ fn test_tokenizer() {
#[test]
fn test_indexing() {
let directory = Directory::from("/home/paul/temp/idx");
let directory = Directory::from("/Users/pmasurel/temp/idx");
{
let mut index_writer = IndexWriter::open(&directory);
{
@@ -55,10 +55,10 @@ fn test_indexing() {
index_writer.add(doc);
}
let commit_result = index_writer.commit();
// println!("{:?}", commit_result.err());
assert!(commit_result.is_ok());
println!("{:?}", commit_result.err());
//debug_assert!(commit_result.is_ok(), commit_result);
// assert!(commit_result.is_ok());
assert!(false);
// SimpleCodec::write(closed_index_writer, output);
// let mut term_cursor = closed_index_writer.term_cursor();
// loop {