From 3d2d28d164d35e0cc41aae4bc3b1d24cef0356f9 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Wed, 20 Jan 2016 10:08:45 +0900 Subject: [PATCH] blip... SharedMmap -> fst::raw::MmapReadOnly --- src/core/directory.rs | 53 +++++++++++++------------------------------ src/core/error.rs | 1 + src/core/reader.rs | 38 ++++++++++++++++++------------- src/lib.rs | 2 +- 4 files changed, 40 insertions(+), 54 deletions(-) diff --git a/src/core/directory.rs b/src/core/directory.rs index d1cb19431..d3b698d83 100644 --- a/src/core/directory.rs +++ b/src/core/directory.rs @@ -1,6 +1,3 @@ -extern crate memmap; - -use self::memmap::{Mmap, Protection}; use std::path::PathBuf; use std::collections::HashMap; use std::collections::hash_map::Entry; @@ -17,6 +14,8 @@ use std::ops::Deref; use std::cell::RefCell; use core::error::*; use rand::{thread_rng, Rng}; +// use memmap::{Mmap, Protection}; +use fst::raw::MmapReadOnly; #[derive(Clone, Debug)] pub struct SegmentId(pub String); @@ -33,7 +32,7 @@ pub fn generate_segment_name() -> SegmentId { #[derive(Clone)] pub struct Directory { index_path: PathBuf, - mmap_cache: Arc>>, + mmap_cache: Arc>>, } impl fmt::Debug for Directory { @@ -42,9 +41,9 @@ impl fmt::Debug for Directory { } } -fn open_mmap(full_path: &PathBuf) -> Result { - match Mmap::open_path(full_path.clone(), Protection::Read) { - Ok(mmapped_file) => Ok(SharedMmapMemory::new(mmapped_file)), +fn open_mmap(full_path: &PathBuf) -> Result { + match MmapReadOnly::open_path(full_path.clone()) { + Ok(mmapped_file) => Ok(mmapped_file), Err(ioerr) => { // TODO add file let error_msg = format!("Read-Only MMap of {:?} failed", full_path); @@ -89,7 +88,7 @@ impl Directory { } } - fn open_readable(&self, relative_path: &PathBuf) -> Result { + fn mmap(&self, relative_path: &PathBuf) -> Result { let full_path = self.resolve_path(relative_path); let mut cache_mutex = self.mmap_cache.deref(); match cache_mutex.lock() { @@ -97,7 +96,10 @@ impl Directory { if !cache.contains_key(&full_path) { cache.insert(full_path.clone(), try!(open_mmap(&full_path)) ); } - return Ok(cache.get(&full_path).unwrap().clone()) + let mmap_readonly: &MmapReadOnly = cache.get(&full_path).unwrap(); + // TODO remove if a proper clone is available + let len = unsafe { mmap_readonly.as_slice().len() }; + return Ok(mmap_readonly.range(0, len)) }, Err(_) => { return Err(Error::CannotAcquireLock(String::from("Cannot acquire mmap cache lock."))) @@ -130,42 +132,19 @@ impl Segment { } } - fn get_full_path(&self, component: SegmentComponent) -> PathBuf { - let relative_path = self.get_relative_path(component); - self.directory.resolve_path(&relative_path) - } - - fn get_relative_path(&self, component: SegmentComponent) -> PathBuf { + fn relative_path(&self, component: SegmentComponent) -> PathBuf { let SegmentId(ref segment_id_str) = self.segment_id; let filename = String::new() + segment_id_str + Segment::path_suffix(component); PathBuf::from(filename) } - pub fn get_data(&self, component: SegmentComponent) -> Result { - let path = self.get_relative_path(component); - self.directory.open_readable(&path) + pub fn mmap(&self, component: SegmentComponent) -> Result { + let path = self.relative_path(component); + self.directory.mmap(&path) } pub fn open_writable(&self, component: SegmentComponent) -> Result { - let path = self.get_relative_path(component); + let path = self.relative_path(component); self.directory.open_writable(&path) } } - -#[derive(Clone)] -pub struct SharedMmapMemory(Arc); - -impl SharedMmapMemory { - pub fn new(mmap_memory: Mmap) -> SharedMmapMemory { - SharedMmapMemory(Arc::new(mmap_memory)) - } -} - -impl Borrow<[u8]> for SharedMmapMemory { - - fn borrow(&self) -> &[u8] { - let SharedMmapMemory(ref arc) = *self; - let mmap: &Mmap = arc.borrow(); - unsafe { mmap.as_slice() } - } -} diff --git a/src/core/error.rs b/src/core/error.rs index 626b5da51..849c62921 100644 --- a/src/core/error.rs +++ b/src/core/error.rs @@ -8,6 +8,7 @@ pub enum Error { FileNotFound(String), ReadOnly(String), CannotAcquireLock(String), + FSTFormat(String), } pub type Result = result::Result; diff --git a/src/core/reader.rs b/src/core/reader.rs index 2760fcab5..4c493a637 100644 --- a/src/core/reader.rs +++ b/src/core/reader.rs @@ -1,9 +1,9 @@ use core::directory::Directory; use core::directory::Segment; use core::schema::Term; -use core::directory::SharedMmapMemory; use fst::Streamer; use fst; +// use fst::raw::{Fst, FstData}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use std::borrow::Borrow; use std::io::Cursor; @@ -11,6 +11,11 @@ use core::global::DocId; use core::serial::{DocCursor, TermCursor}; use core::serial::SerializableSegment; use core::directory::SegmentComponent; +use fst::raw::MmapReadOnly; +use core::error::{Result, Error}; + + + pub struct SegmentDocCursor<'a> { postings_data: Cursor<&'a [u8]>, @@ -83,25 +88,26 @@ impl<'a> TermCursor for SegmentTermCur<'a> { pub struct SegmentIndexReader { segment: Segment, term_offsets: fst::Map, - postings_data: SharedMmapMemory, + postings_data: MmapReadOnly, } impl SegmentIndexReader { - // - // fn open(&self, ) -> Result { - // - // } // // pub fn open(segment: Segment) -> Result { - // let term_filepath = segment.get_full_path(SegmentComponent::TERMS); - // let term_shared_mmap = - // match fst::Map::from_path(term_filepath) { - // Some() - // } - // let postings_shared_mmap = try!(segment.get_data(SegmentComponent::POSTINGS)); - // SegmentIndexReader { - // - // } + // let term_shared_mmap = try!(segment.mmap(SegmentComponent::TERMS)); + // let term_offsets = match Fst::new(FstData::Mmap(term_shared_mmap)).map(fst::Map) { + // Ok(term_offsets) => term_offsets, + // Err(_) => { + // let filepath = segment.relative_path(SegmentComponent::TERMS); + // return Err(Error::FSTFormat(format!("The file {:?} does not seem to be a valid term to offset transducer.", filepath))); + // } + // }; + // let postings_shared_mmap = try!(segment.mmap(SegmentComponent::POSTINGS)); + // Ok(SegmentIndexReader { + // postings_data: postings_shared_mmap, + // term_offsets: term_offsets, + // segment: segment, + // }) // } } @@ -114,7 +120,7 @@ impl<'a> SerializableSegment<'a> for SegmentIndexReader { SegmentTermCur { segment: &self.segment, fst_streamer: self.term_offsets.stream(), - postings_data: self.postings_data.borrow(), + postings_data: unsafe { self.postings_data.borrow().as_slice() }, } } } diff --git a/src/lib.rs b/src/lib.rs index e129aaa35..267ebd555 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ extern crate lazy_static; extern crate fst; extern crate byteorder; - +extern crate memmap; extern crate rand; extern crate regex;