mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 17:42:55 +00:00
blop
This commit is contained in:
@@ -15,4 +15,5 @@ atomicwrites = "0.0.14"
|
||||
tempfile = "2.0.0"
|
||||
rustc-serialize = "0.3.16"
|
||||
log = "0.3.5"
|
||||
combine = "1.2.0"
|
||||
tempdir = "0.3.4"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::path::PathBuf;
|
||||
use std::path::Path;
|
||||
|
||||
use std::path::{PathBuf, Path};
|
||||
use std::collections::HashMap;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::fs::File;
|
||||
@@ -18,6 +18,7 @@ use rand::{thread_rng, Rng};
|
||||
use fst::raw::MmapReadOnly;
|
||||
use rustc_serialize::json;
|
||||
use atomicwrites;
|
||||
use tempdir::TempDir;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SegmentId(pub String);
|
||||
@@ -44,12 +45,6 @@ impl DirectoryMeta {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Directory {
|
||||
index_path: PathBuf,
|
||||
mmap_cache: Arc<Mutex<HashMap<PathBuf, MmapReadOnly>>>,
|
||||
metas: DirectoryMeta,
|
||||
}
|
||||
|
||||
impl fmt::Debug for Directory {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
@@ -80,6 +75,15 @@ fn sync_file(filepath: &PathBuf) -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Directory {
|
||||
index_path: PathBuf,
|
||||
mmap_cache: Arc<Mutex<HashMap<PathBuf, MmapReadOnly>>>,
|
||||
metas: DirectoryMeta,
|
||||
_temp_directory: Option<Arc<TempDir>>,
|
||||
}
|
||||
|
||||
impl Directory {
|
||||
|
||||
// TODO find a rusty way to hide that, while keeping
|
||||
@@ -90,16 +94,42 @@ impl Directory {
|
||||
}
|
||||
|
||||
pub fn open(filepath: &Path) -> Result<Directory> {
|
||||
// TODO error management
|
||||
let mut directory = Directory {
|
||||
index_path: PathBuf::from(filepath),
|
||||
mmap_cache: Arc::new(Mutex::new(HashMap::new())),
|
||||
metas: DirectoryMeta::new()
|
||||
metas: DirectoryMeta::new(),
|
||||
_temp_directory: None,
|
||||
};
|
||||
try!(directory.load_metas()); //< does the directory already exists?
|
||||
Ok(directory)
|
||||
}
|
||||
|
||||
fn create_tempdir() -> Result<TempDir> {
|
||||
let tempdir_res = TempDir::new("index");
|
||||
match tempdir_res {
|
||||
Ok(tempdir) => Ok(tempdir),
|
||||
Err(_) => Err(Error::FileNotFound(String::from("Could not create temp directory")))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_tempdir() -> Result<Directory> {
|
||||
let tempdir = try!(Directory::create_tempdir());
|
||||
let tempdir_path: PathBuf;
|
||||
{
|
||||
tempdir_path = PathBuf::from(tempdir.path());
|
||||
};
|
||||
let tempdir_arc = Arc::new(tempdir);
|
||||
let mut directory = Directory {
|
||||
index_path: PathBuf::from(tempdir_path),
|
||||
mmap_cache: Arc::new(Mutex::new(HashMap::new())),
|
||||
metas: DirectoryMeta::new(),
|
||||
_temp_directory: Some(tempdir_arc)
|
||||
};
|
||||
//< does the directory already exists?
|
||||
try!(directory.load_metas());
|
||||
Ok(directory)
|
||||
}
|
||||
|
||||
pub fn load_metas(&mut self,) -> Result<()> {
|
||||
// TODO load segment info
|
||||
Ok(())
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
|
||||
pub mod query;
|
||||
pub mod postings;
|
||||
pub mod global;
|
||||
pub mod schema;
|
||||
|
||||
61
src/core/query.rs
Normal file
61
src/core/query.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
use combine;
|
||||
use combine::{between, char, letter, spaces, space, many1, parser, sep_by1, Parser, ParseError, ParserExt, combinator};
|
||||
use combine::primitives::{State, Stream, ParseResult};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Term(pub String, pub String);
|
||||
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum BoolExpr {
|
||||
AlwaysTrue,
|
||||
Conjunction(Vec<Term>),
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub fn grammar<I>(input: State<I>) -> ParseResult<BoolExpr, I>
|
||||
where I: Stream<Item=char>
|
||||
{
|
||||
let make_term = || {
|
||||
let term_field: combinator::Many1<String, _> = many1(letter());
|
||||
let term_value: combinator::Many1<String, _> = many1(letter());
|
||||
(term_field, char(':'), term_value).map(|t| Term(t.0.clone(), t.2.clone()))
|
||||
};
|
||||
|
||||
|
||||
// let term_seqs = (make_term(), space(), parser(grammar::<I>),).map(|t| BoolExpr::AlwaysTrue);
|
||||
sep_by1(make_term(), space())
|
||||
.map(BoolExpr::Conjunction)
|
||||
.parse_state(input)
|
||||
// make_term().or(term_seqs).parse_state(input)
|
||||
//make_term()
|
||||
|
||||
//
|
||||
// let word = many1(letter());
|
||||
//
|
||||
// //Creates a parser which parses a char and skips any trailing whitespace
|
||||
// let lex_char = |c| char(c).skip(spaces());
|
||||
//
|
||||
// let comma_list = sep_by(parser(expr::<I>), lex_char(','));
|
||||
// let array = between(lex_char('['), lex_char(']'), comma_list);
|
||||
//
|
||||
// //We can use tuples to run several parsers in sequence
|
||||
// //The resulting type is a tuple containing each parsers output
|
||||
// let pair = (lex_char('('),
|
||||
// parser(expr::<I>),
|
||||
// lex_char(','),
|
||||
// parser(expr::<I>),
|
||||
// lex_char(')'))
|
||||
// .map(|t| Expr::Pair(Box::new(t.1), Box::new(t.3)));
|
||||
//
|
||||
// word.map(Expr::Id)
|
||||
// .or(array.map(Expr::Array))
|
||||
// .or(pair)
|
||||
// .skip(spaces())
|
||||
// .parse_state(input)
|
||||
}
|
||||
|
||||
pub fn parse_query(query_str: &str) -> Result<(BoolExpr, &str), ParseError<&str>> {
|
||||
parser(grammar).parse(query_str)
|
||||
}
|
||||
@@ -12,6 +12,8 @@ extern crate memmap;
|
||||
extern crate rand;
|
||||
extern crate regex;
|
||||
extern crate rustc_serialize;
|
||||
extern crate combine;
|
||||
extern crate atomicwrites;
|
||||
extern crate tempdir;
|
||||
|
||||
pub mod core;
|
||||
|
||||
@@ -17,6 +17,20 @@ use std::io::{ BufWriter, Write};
|
||||
use regex::Regex;
|
||||
use std::convert::From;
|
||||
use std::path::PathBuf;
|
||||
use tantivy::core::query;
|
||||
use tantivy::core::query::{parse_query, BoolExpr};
|
||||
#[test]
|
||||
fn test_parse_query() {
|
||||
// let left = VecPostings::new(vec!(1, 3, 9));
|
||||
// let right = VecPostings::new(vec!(3, 4, 9, 18));
|
||||
// let inter = intersection(&left, &right);
|
||||
// let vals: Vec<DocId> = inter.iter().collect();
|
||||
// assert_eq!(vals, vec!(3, 9));
|
||||
{
|
||||
let (parsed_query, _) = parse_query("toto:titi toto:tutu").unwrap();
|
||||
assert_eq!(parsed_query, BoolExpr::Conjunction(vec!(query::Term(String::from("toto"), String::from("titi")), query::Term(String::from("toto"), String::from("tutu")))));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intersection() {
|
||||
@@ -35,8 +49,7 @@ fn test_tokenizer() {
|
||||
|
||||
#[test]
|
||||
fn test_indexing() {
|
||||
let tmp_dir = tempdir::TempDir::new("test_indexing").unwrap();
|
||||
let directory = Directory::open(tmp_dir.path()).unwrap();
|
||||
let directory = Directory::from_tempdir().unwrap();
|
||||
{
|
||||
// writing the segment
|
||||
let mut index_writer = IndexWriter::open(&directory);
|
||||
@@ -55,13 +68,45 @@ fn test_indexing() {
|
||||
doc.set(Field(1), "a b c d");
|
||||
index_writer.add(doc);
|
||||
}
|
||||
|
||||
let debug_serializer = DebugSegmentSerializer::new();
|
||||
let segment_str_before_writing = DebugSegmentSerializer::debug_string(index_writer.current_segment_writer());
|
||||
assert!(index_writer.commit().is_ok());
|
||||
let commit_result = index_writer.commit();
|
||||
assert!(commit_result.is_ok());
|
||||
let segment = commit_result.unwrap();
|
||||
let index_reader = SegmentIndexReader::open(segment).unwrap();
|
||||
let segment_str_after_reading = DebugSegmentSerializer::debug_string(&index_reader);
|
||||
assert_eq!(segment_str_before_writing, segment_str_after_reading);
|
||||
// =======
|
||||
//
|
||||
// let commit_result = index_writer.commit();
|
||||
// println!("{:?}", commit_result);
|
||||
// assert!(commit_result.is_ok());
|
||||
// // reading the segment
|
||||
// println!("------");
|
||||
// {
|
||||
// let segment = commit_result.unwrap();
|
||||
// let index_reader = SegmentIndexReader::open(segment).unwrap();
|
||||
// let mut term_cursor = index_reader.term_cursor();
|
||||
// loop {
|
||||
// match term_cursor.next() {
|
||||
// Some((term, doc_cursor)) => {
|
||||
// println!("{:?}", term);
|
||||
// for doc in doc_cursor {
|
||||
// println!(" Doc {}", doc);
|
||||
// }
|
||||
// },
|
||||
// None => {
|
||||
// break;
|
||||
// },
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// {
|
||||
// // TODO add index opening stuff
|
||||
// // let index_reader = IndexReader::open(&directory);
|
||||
// >>>>>>> a515294b8df80a518a096830bfa2940b802117d8
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
0
tests/query.rs
Normal file
0
tests/query.rs
Normal file
Reference in New Issue
Block a user