mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-31 23:50:41 +00:00
249 lines
7.2 KiB
Rust
249 lines
7.2 KiB
Rust
use std::path::{PathBuf, Path};
|
|
use std::collections::HashMap;
|
|
use std::fs;
|
|
use std::io;
|
|
use core::schema::Schema;
|
|
use std::io::Write;
|
|
use std::borrow::BorrowMut;
|
|
use std::sync::{Arc, RwLock, RwLockWriteGuard, RwLockReadGuard};
|
|
use std::fmt;
|
|
use std::cell::RefCell;
|
|
use rand::{thread_rng, Rng};
|
|
use rustc_serialize::json;
|
|
use std::error;
|
|
use std::io::Read;
|
|
use std::io::ErrorKind as IOErrorKind;
|
|
use core::directory::{Directory, MmapDirectory, RAMDirectory, ReadOnlySource, WritePtr};
|
|
|
|
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
|
pub struct SegmentId(pub String);
|
|
|
|
|
|
pub fn generate_segment_name() -> SegmentId {
|
|
static CHARS: &'static [u8] = b"abcdefghijklmnopqrstuvwxyz0123456789";
|
|
let random_name: String = (0..8)
|
|
.map(|_| thread_rng().choose(CHARS).unwrap().clone() as char)
|
|
.collect();
|
|
SegmentId( String::from("_") + &random_name)
|
|
}
|
|
|
|
#[derive(Clone,Debug,RustcDecodable,RustcEncodable)]
|
|
pub struct IndexMeta {
|
|
segments: Vec<String>,
|
|
schema: Schema,
|
|
}
|
|
|
|
impl IndexMeta {
|
|
fn new() -> IndexMeta {
|
|
IndexMeta {
|
|
segments: Vec::new(),
|
|
schema: Schema::new(),
|
|
}
|
|
}
|
|
fn with_schema(schema: Schema) -> IndexMeta {
|
|
IndexMeta {
|
|
segments: Vec::new(),
|
|
schema: schema,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for Index {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
write!(f, "Index({:?})", self.directory)
|
|
}
|
|
}
|
|
|
|
type DirectoryPtr = Box<Directory>;
|
|
|
|
#[derive(Clone)]
|
|
pub struct Index {
|
|
metas: Arc<RwLock<IndexMeta>>,
|
|
directory: Arc<RwLock<DirectoryPtr>>,
|
|
}
|
|
|
|
fn could_not_acquire_lock<E>(e: E) -> io::Error {
|
|
io::Error::new(IOErrorKind::Other, "Could not acquire read lock on directory")
|
|
}
|
|
|
|
|
|
struct IndexError;
|
|
|
|
lazy_static! {
|
|
static ref META_FILEPATH: PathBuf = PathBuf::from("meta.json");
|
|
}
|
|
|
|
impl Index {
|
|
|
|
pub fn create(directory_path: &Path, schema: Schema) -> io::Result<Index> {
|
|
let directory = Box::new(try!(MmapDirectory::create(directory_path)));
|
|
Ok(Index::from_directory(directory, schema))
|
|
}
|
|
|
|
pub fn create_from_tempdir(schema: Schema) -> io::Result<Index> {
|
|
let directory = Box::new(try!(MmapDirectory::create_from_tempdir()));
|
|
Ok(Index::from_directory(directory, schema))
|
|
}
|
|
|
|
pub fn open(directory_path: &Path) -> io::Result<Index> {
|
|
let directory = try!(MmapDirectory::create(directory_path));
|
|
let directory_ptr = Box::new(directory);
|
|
let mut index = Index::from_directory(directory_ptr, Schema::new());
|
|
try!(index.load_metas()); //< does the directory already exists?
|
|
Ok(index)
|
|
}
|
|
|
|
fn from_directory(directory: DirectoryPtr, schema: Schema) -> Index {
|
|
Index {
|
|
metas: Arc::new(RwLock::new(IndexMeta::with_schema(schema))),
|
|
directory: Arc::new(RwLock::new(directory)),
|
|
}
|
|
}
|
|
|
|
pub fn schema(&self,) -> Schema {
|
|
self.metas.read().unwrap().schema.clone()
|
|
}
|
|
|
|
fn get_write(&mut self) -> io::Result<RwLockWriteGuard<DirectoryPtr>> {
|
|
self.directory
|
|
.write()
|
|
.map_err(|e| io::Error::new(IOErrorKind::Other,
|
|
format!("Failed acquiring lock on directory.\n
|
|
It can happen if another thread panicked! Error was: {:?}", e) ))
|
|
}
|
|
|
|
fn get_read(&self) -> io::Result<RwLockReadGuard<DirectoryPtr>> {
|
|
self.directory
|
|
.read()
|
|
.map_err(|e| io::Error::new(IOErrorKind::Other,
|
|
format!("Failed acquiring lock on directory.\n
|
|
It can happen if another thread panicked! Error was: {:?}", e) ))
|
|
}
|
|
|
|
|
|
|
|
// TODO find a rusty way to hide that, while keeping
|
|
// it visible for IndexWriters.
|
|
pub fn publish_segment(&mut self, segment: Segment) -> io::Result<()> {
|
|
println!("publish segment {:?}", segment);
|
|
self.metas.write().unwrap().segments.push(segment.segment_id.0.clone());
|
|
// TODO use logs
|
|
self.save_metas()
|
|
}
|
|
|
|
pub fn sync(&mut self, segment: Segment) -> io::Result<()> {
|
|
for component in [SegmentComponent::POSTINGS, SegmentComponent::TERMS].iter() {
|
|
let path = segment.relative_path(component);
|
|
let directory = try!(self.directory
|
|
.read()
|
|
.map_err(could_not_acquire_lock));
|
|
try!(directory.sync(&path));
|
|
}
|
|
let directory = try!(self.directory
|
|
.read()
|
|
.map_err(could_not_acquire_lock));
|
|
directory.sync_directory()
|
|
}
|
|
|
|
pub fn segments(&self,) -> Vec<Segment> {
|
|
// TODO handle error
|
|
self.segment_ids()
|
|
.into_iter()
|
|
.map(|segment_id| self.segment(&segment_id))
|
|
.collect()
|
|
}
|
|
|
|
pub fn segment(&self, segment_id: &SegmentId) -> Segment {
|
|
Segment {
|
|
index: self.clone(),
|
|
segment_id: segment_id.clone()
|
|
}
|
|
}
|
|
|
|
fn segment_ids(&self,) -> Vec<SegmentId> {
|
|
self.metas
|
|
.read()
|
|
.unwrap()
|
|
.segments
|
|
.iter()
|
|
.cloned()
|
|
.map(SegmentId)
|
|
.collect()
|
|
}
|
|
|
|
pub fn new_segment(&self,) -> Segment {
|
|
// TODO check it does not exists
|
|
self.segment(&generate_segment_name())
|
|
}
|
|
|
|
pub fn load_metas(&mut self,) -> io::Result<()> {
|
|
let meta_file = try!(
|
|
self.directory
|
|
.read()
|
|
.map_err(could_not_acquire_lock)
|
|
.and_then(|d| d.open_read(&META_FILEPATH)));
|
|
let meta_content = String::from_utf8_lossy(meta_file.as_slice());
|
|
let loaded_meta: IndexMeta = json::decode(&meta_content).unwrap();
|
|
self.metas.write().unwrap().clone_from(&loaded_meta);
|
|
Ok(())
|
|
}
|
|
|
|
pub fn save_metas(&self,) -> io::Result<()> {
|
|
let metas_lock = self.metas.read().unwrap();
|
|
let encoded = json::encode(&*metas_lock).unwrap();
|
|
try!(self.directory
|
|
.write()
|
|
.map_err(could_not_acquire_lock)
|
|
).atomic_write(&META_FILEPATH, encoded.as_bytes())
|
|
}
|
|
}
|
|
|
|
|
|
/////////////////////////
|
|
// Segment
|
|
|
|
pub enum SegmentComponent {
|
|
POSTINGS,
|
|
// POSITIONS,
|
|
TERMS,
|
|
STORE,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct Segment {
|
|
index: Index,
|
|
segment_id: SegmentId,
|
|
}
|
|
|
|
impl Segment {
|
|
|
|
pub fn id(&self,) -> SegmentId {
|
|
self.segment_id.clone()
|
|
}
|
|
|
|
fn path_suffix(component: &SegmentComponent)-> &'static str {
|
|
match *component {
|
|
// SegmentComponent::POSITIONS => ".pos",
|
|
SegmentComponent::POSTINGS => ".idx",
|
|
SegmentComponent::TERMS => ".term",
|
|
SegmentComponent::STORE => ".store",
|
|
}
|
|
}
|
|
|
|
pub fn relative_path(&self, component: &SegmentComponent) -> PathBuf {
|
|
let SegmentId(ref segment_id_str) = self.segment_id;
|
|
let filename = String::new() + segment_id_str + Segment::path_suffix(component);
|
|
PathBuf::from(filename)
|
|
}
|
|
|
|
pub fn open_read(&self, component: SegmentComponent) -> io::Result<ReadOnlySource> {
|
|
let path = self.relative_path(&component);
|
|
self.index.directory.read().unwrap().open_read(&path)
|
|
}
|
|
|
|
pub fn open_write(&self, component: SegmentComponent) -> io::Result<WritePtr> {
|
|
let path = self.relative_path(&component);
|
|
self.index.directory.write().unwrap().open_write(&path)
|
|
}
|
|
}
|