diff --git a/Cargo.toml b/Cargo.toml index b12e04f41..341c58db0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,5 @@ rand = "0.3.13" atomicwrites = "0.0.14" tempfile = "2.0.0" rustc-serialize = "0.3.16" +combine = "1.2.0" +tempdir = "0.3.4" diff --git a/src/core/directory.rs b/src/core/directory.rs index f2962389f..5dafd14dd 100644 --- a/src/core/directory.rs +++ b/src/core/directory.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::path::{PathBuf, Path}; use std::collections::HashMap; use std::collections::hash_map::Entry; use std::fs::File; @@ -17,6 +17,7 @@ use rand::{thread_rng, Rng}; use fst::raw::MmapReadOnly; use rustc_serialize::json; use atomicwrites; +use tempdir::TempDir; #[derive(Clone, Debug)] pub struct SegmentId(pub String); @@ -43,12 +44,6 @@ impl DirectoryMeta { } } -#[derive(Clone)] -pub struct Directory { - index_path: PathBuf, - mmap_cache: Arc>>, - metas: DirectoryMeta, -} impl fmt::Debug for Directory { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -79,6 +74,15 @@ fn sync_file(filepath: &PathBuf) -> Result<()> { } } + +#[derive(Clone)] +pub struct Directory { + index_path: PathBuf, + mmap_cache: Arc>>, + metas: DirectoryMeta, + _temp_directory: Option>, +} + impl Directory { // TODO find a rusty way to hide that, while keeping @@ -89,16 +93,42 @@ impl Directory { } pub fn from(filepath: &str) -> Result { - // TODO error management let mut directory = Directory { index_path: PathBuf::from(filepath), mmap_cache: Arc::new(Mutex::new(HashMap::new())), - metas: DirectoryMeta::new() + metas: DirectoryMeta::new(), + _temp_directory: None, }; try!(directory.load_metas()); //< does the directory already exists? Ok(directory) } + fn create_tempdir() -> Result { + let tempdir_res = TempDir::new("index"); + match tempdir_res { + Ok(tempdir) => Ok(tempdir), + Err(_) => Err(Error::FileNotFound(String::from("Could not create temp directory"))) + } + } + + pub fn from_tempdir() -> Result { + let tempdir = try!(Directory::create_tempdir()); + let tempdir_path: PathBuf; + { + tempdir_path = PathBuf::from(tempdir.path()); + }; + let tempdir_arc = Arc::new(tempdir); + let mut directory = Directory { + index_path: PathBuf::from(tempdir_path), + mmap_cache: Arc::new(Mutex::new(HashMap::new())), + metas: DirectoryMeta::new(), + _temp_directory: Some(tempdir_arc) + }; + //< does the directory already exists? + try!(directory.load_metas()); + Ok(directory) + } + pub fn load_metas(&mut self,) -> Result<()> { // TODO load segment info Ok(()) diff --git a/src/core/mod.rs b/src/core/mod.rs index 59260d8c1..6aca6ad90 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -1,4 +1,4 @@ - +pub mod query; pub mod postings; pub mod global; pub mod schema; diff --git a/src/core/query.rs b/src/core/query.rs new file mode 100644 index 000000000..b8972fd7a --- /dev/null +++ b/src/core/query.rs @@ -0,0 +1,61 @@ +use combine; +use combine::{between, char, letter, spaces, space, many1, parser, sep_by1, Parser, ParseError, ParserExt, combinator}; +use combine::primitives::{State, Stream, ParseResult}; + +#[derive(Debug, PartialEq)] +pub struct Term(pub String, pub String); + + +#[derive(Debug, PartialEq)] +pub enum BoolExpr { + AlwaysTrue, + Conjunction(Vec), +} + + + +pub fn grammar(input: State) -> ParseResult + where I: Stream +{ + let make_term = || { + let term_field: combinator::Many1 = many1(letter()); + let term_value: combinator::Many1 = many1(letter()); + (term_field, char(':'), term_value).map(|t| Term(t.0.clone(), t.2.clone())) + }; + + + // let term_seqs = (make_term(), space(), parser(grammar::),).map(|t| BoolExpr::AlwaysTrue); + sep_by1(make_term(), space()) + .map(BoolExpr::Conjunction) + .parse_state(input) + // make_term().or(term_seqs).parse_state(input) + //make_term() + + // + // let word = many1(letter()); + // + // //Creates a parser which parses a char and skips any trailing whitespace + // let lex_char = |c| char(c).skip(spaces()); + // + // let comma_list = sep_by(parser(expr::), lex_char(',')); + // let array = between(lex_char('['), lex_char(']'), comma_list); + // + // //We can use tuples to run several parsers in sequence + // //The resulting type is a tuple containing each parsers output + // let pair = (lex_char('('), + // parser(expr::), + // lex_char(','), + // parser(expr::), + // lex_char(')')) + // .map(|t| Expr::Pair(Box::new(t.1), Box::new(t.3))); + // + // word.map(Expr::Id) + // .or(array.map(Expr::Array)) + // .or(pair) + // .skip(spaces()) + // .parse_state(input) +} + +pub fn parse_query(query_str: &str) -> Result<(BoolExpr, &str), ParseError<&str>> { + parser(grammar).parse(query_str) +} diff --git a/src/lib.rs b/src/lib.rs index d9bb16e2d..f53e46ae6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,8 @@ extern crate memmap; extern crate rand; extern crate regex; extern crate rustc_serialize; +extern crate combine; extern crate atomicwrites; - +extern crate tempdir; pub mod core; diff --git a/tests/core.rs b/tests/core.rs index 964ea72fa..8ce0b7fb5 100644 --- a/tests/core.rs +++ b/tests/core.rs @@ -20,6 +20,20 @@ use std::io::{ BufWriter, Write}; use regex::Regex; use std::convert::From; use std::path::PathBuf; +use tantivy::core::query; +use tantivy::core::query::{parse_query, BoolExpr}; +#[test] +fn test_parse_query() { + // let left = VecPostings::new(vec!(1, 3, 9)); + // let right = VecPostings::new(vec!(3, 4, 9, 18)); + // let inter = intersection(&left, &right); + // let vals: Vec = inter.iter().collect(); + // assert_eq!(vals, vec!(3, 9)); + { + let (parsed_query, _) = parse_query("toto:titi toto:tutu").unwrap(); + assert_eq!(parsed_query, BoolExpr::Conjunction(vec!(query::Term(String::from("toto"), String::from("titi")), query::Term(String::from("toto"), String::from("tutu"))))); + } +} #[test] fn test_intersection() { @@ -38,7 +52,7 @@ fn test_tokenizer() { #[test] fn test_indexing() { - let directory = Directory::from("/Users/pmasurel/temp/idx").unwrap(); + let directory = Directory::from_tempdir().unwrap(); { // writing the segment let mut index_writer = IndexWriter::open(&directory); @@ -59,6 +73,7 @@ fn test_indexing() { } let commit_result = index_writer.commit(); + println!("{:?}", commit_result); assert!(commit_result.is_ok()); // reading the segment println!("------"); @@ -68,7 +83,7 @@ fn test_indexing() { let mut term_cursor = index_reader.term_cursor(); loop { match term_cursor.next() { - Some((term, mut doc_cursor)) => { + Some((term, doc_cursor)) => { println!("{:?}", term); for doc in doc_cursor { println!(" Doc {}", doc); @@ -80,7 +95,6 @@ fn test_indexing() { } } } - assert!(false); } { // TODO add index opening stuff diff --git a/tests/query.rs b/tests/query.rs new file mode 100644 index 000000000..e69de29bb