This commit is contained in:
Paul Masurel
2016-01-29 17:18:19 +09:00
7 changed files with 153 additions and 14 deletions

View File

@@ -15,4 +15,5 @@ atomicwrites = "0.0.14"
tempfile = "2.0.0"
rustc-serialize = "0.3.16"
log = "0.3.5"
combine = "1.2.0"
tempdir = "0.3.4"

View File

@@ -1,5 +1,5 @@
use std::path::PathBuf;
use std::path::Path;
use std::path::{PathBuf, Path};
use std::collections::HashMap;
use std::collections::hash_map::Entry;
use std::fs::File;
@@ -18,6 +18,7 @@ use rand::{thread_rng, Rng};
use fst::raw::MmapReadOnly;
use rustc_serialize::json;
use atomicwrites;
use tempdir::TempDir;
#[derive(Clone, Debug)]
pub struct SegmentId(pub String);
@@ -44,12 +45,6 @@ impl DirectoryMeta {
}
}
#[derive(Clone)]
pub struct Directory {
index_path: PathBuf,
mmap_cache: Arc<Mutex<HashMap<PathBuf, MmapReadOnly>>>,
metas: DirectoryMeta,
}
impl fmt::Debug for Directory {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -80,6 +75,15 @@ fn sync_file(filepath: &PathBuf) -> Result<()> {
}
}
#[derive(Clone)]
pub struct Directory {
index_path: PathBuf,
mmap_cache: Arc<Mutex<HashMap<PathBuf, MmapReadOnly>>>,
metas: DirectoryMeta,
_temp_directory: Option<Arc<TempDir>>,
}
impl Directory {
// TODO find a rusty way to hide that, while keeping
@@ -90,16 +94,42 @@ impl Directory {
}
pub fn open(filepath: &Path) -> Result<Directory> {
// TODO error management
let mut directory = Directory {
index_path: PathBuf::from(filepath),
mmap_cache: Arc::new(Mutex::new(HashMap::new())),
metas: DirectoryMeta::new()
metas: DirectoryMeta::new(),
_temp_directory: None,
};
try!(directory.load_metas()); //< does the directory already exists?
Ok(directory)
}
fn create_tempdir() -> Result<TempDir> {
let tempdir_res = TempDir::new("index");
match tempdir_res {
Ok(tempdir) => Ok(tempdir),
Err(_) => Err(Error::FileNotFound(String::from("Could not create temp directory")))
}
}
pub fn from_tempdir() -> Result<Directory> {
let tempdir = try!(Directory::create_tempdir());
let tempdir_path: PathBuf;
{
tempdir_path = PathBuf::from(tempdir.path());
};
let tempdir_arc = Arc::new(tempdir);
let mut directory = Directory {
index_path: PathBuf::from(tempdir_path),
mmap_cache: Arc::new(Mutex::new(HashMap::new())),
metas: DirectoryMeta::new(),
_temp_directory: Some(tempdir_arc)
};
//< does the directory already exists?
try!(directory.load_metas());
Ok(directory)
}
pub fn load_metas(&mut self,) -> Result<()> {
// TODO load segment info
Ok(())

View File

@@ -1,4 +1,4 @@
pub mod query;
pub mod postings;
pub mod global;
pub mod schema;

61
src/core/query.rs Normal file
View File

@@ -0,0 +1,61 @@
use combine;
use combine::{between, char, letter, spaces, space, many1, parser, sep_by1, Parser, ParseError, ParserExt, combinator};
use combine::primitives::{State, Stream, ParseResult};
#[derive(Debug, PartialEq)]
pub struct Term(pub String, pub String);
#[derive(Debug, PartialEq)]
pub enum BoolExpr {
AlwaysTrue,
Conjunction(Vec<Term>),
}
pub fn grammar<I>(input: State<I>) -> ParseResult<BoolExpr, I>
where I: Stream<Item=char>
{
let make_term = || {
let term_field: combinator::Many1<String, _> = many1(letter());
let term_value: combinator::Many1<String, _> = many1(letter());
(term_field, char(':'), term_value).map(|t| Term(t.0.clone(), t.2.clone()))
};
// let term_seqs = (make_term(), space(), parser(grammar::<I>),).map(|t| BoolExpr::AlwaysTrue);
sep_by1(make_term(), space())
.map(BoolExpr::Conjunction)
.parse_state(input)
// make_term().or(term_seqs).parse_state(input)
//make_term()
//
// let word = many1(letter());
//
// //Creates a parser which parses a char and skips any trailing whitespace
// let lex_char = |c| char(c).skip(spaces());
//
// let comma_list = sep_by(parser(expr::<I>), lex_char(','));
// let array = between(lex_char('['), lex_char(']'), comma_list);
//
// //We can use tuples to run several parsers in sequence
// //The resulting type is a tuple containing each parsers output
// let pair = (lex_char('('),
// parser(expr::<I>),
// lex_char(','),
// parser(expr::<I>),
// lex_char(')'))
// .map(|t| Expr::Pair(Box::new(t.1), Box::new(t.3)));
//
// word.map(Expr::Id)
// .or(array.map(Expr::Array))
// .or(pair)
// .skip(spaces())
// .parse_state(input)
}
pub fn parse_query(query_str: &str) -> Result<(BoolExpr, &str), ParseError<&str>> {
parser(grammar).parse(query_str)
}

View File

@@ -12,6 +12,8 @@ extern crate memmap;
extern crate rand;
extern crate regex;
extern crate rustc_serialize;
extern crate combine;
extern crate atomicwrites;
extern crate tempdir;
pub mod core;

View File

@@ -17,6 +17,20 @@ use std::io::{ BufWriter, Write};
use regex::Regex;
use std::convert::From;
use std::path::PathBuf;
use tantivy::core::query;
use tantivy::core::query::{parse_query, BoolExpr};
#[test]
fn test_parse_query() {
// let left = VecPostings::new(vec!(1, 3, 9));
// let right = VecPostings::new(vec!(3, 4, 9, 18));
// let inter = intersection(&left, &right);
// let vals: Vec<DocId> = inter.iter().collect();
// assert_eq!(vals, vec!(3, 9));
{
let (parsed_query, _) = parse_query("toto:titi toto:tutu").unwrap();
assert_eq!(parsed_query, BoolExpr::Conjunction(vec!(query::Term(String::from("toto"), String::from("titi")), query::Term(String::from("toto"), String::from("tutu")))));
}
}
#[test]
fn test_intersection() {
@@ -35,8 +49,7 @@ fn test_tokenizer() {
#[test]
fn test_indexing() {
let tmp_dir = tempdir::TempDir::new("test_indexing").unwrap();
let directory = Directory::open(tmp_dir.path()).unwrap();
let directory = Directory::from_tempdir().unwrap();
{
// writing the segment
let mut index_writer = IndexWriter::open(&directory);
@@ -55,13 +68,45 @@ fn test_indexing() {
doc.set(Field(1), "a b c d");
index_writer.add(doc);
}
let debug_serializer = DebugSegmentSerializer::new();
let segment_str_before_writing = DebugSegmentSerializer::debug_string(index_writer.current_segment_writer());
assert!(index_writer.commit().is_ok());
let commit_result = index_writer.commit();
assert!(commit_result.is_ok());
let segment = commit_result.unwrap();
let index_reader = SegmentIndexReader::open(segment).unwrap();
let segment_str_after_reading = DebugSegmentSerializer::debug_string(&index_reader);
assert_eq!(segment_str_before_writing, segment_str_after_reading);
// =======
//
// let commit_result = index_writer.commit();
// println!("{:?}", commit_result);
// assert!(commit_result.is_ok());
// // reading the segment
// println!("------");
// {
// let segment = commit_result.unwrap();
// let index_reader = SegmentIndexReader::open(segment).unwrap();
// let mut term_cursor = index_reader.term_cursor();
// loop {
// match term_cursor.next() {
// Some((term, doc_cursor)) => {
// println!("{:?}", term);
// for doc in doc_cursor {
// println!(" Doc {}", doc);
// }
// },
// None => {
// break;
// },
// }
// }
// }
// }
// {
// // TODO add index opening stuff
// // let index_reader = IndexReader::open(&directory);
// >>>>>>> a515294b8df80a518a096830bfa2940b802117d8
}
}

0
tests/query.rs Normal file
View File