diff --git a/src/core/directory.rs b/src/core/directory.rs index d555497b6..699c9d330 100644 --- a/src/core/directory.rs +++ b/src/core/directory.rs @@ -277,4 +277,7 @@ mod tests { assert_eq!(data[4], 5); } + + + } diff --git a/src/core/index.rs b/src/core/index.rs index f0c14593e..303457cdc 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -14,6 +14,8 @@ use std::error; use std::io::Read; use std::io::ErrorKind as IOErrorKind; use core::directory::{Directory, MmapDirectory, RAMDirectory, ReadOnlySource, WritePtr}; +use core::writer::IndexWriter; +use core::searcher::Searcher; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct SegmentId(pub String); @@ -93,6 +95,14 @@ impl Index { Ok(index) } + pub fn writer(&self,) -> IndexWriter { + IndexWriter::open(self,) + } + + pub fn searcher(&self,) -> Searcher { + Searcher::for_index(self.clone()) + } + fn from_directory(directory: DirectoryPtr, schema: Schema) -> Index { Index { metas: Arc::new(RwLock::new(IndexMeta::with_schema(schema))), @@ -246,3 +256,19 @@ impl Segment { self.index.directory.write().unwrap().open_write(&path) } } + + + +mod test { + + use super::*; + use regex::Regex; + + #[test] + fn test_new_segment() { + let SegmentId(segment_name) = generate_segment_name(); + let segment_ptn = Regex::new(r"^_[a-z0-9]{8}$").unwrap(); + assert!(segment_ptn.is_match(&segment_name)); + } + +} diff --git a/src/lib.rs b/src/lib.rs index 3315eb8e4..b231e6d4f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,6 +23,161 @@ extern crate lz4; #[cfg(test)] extern crate test; -mod core; +pub mod core; +pub use core::schema::DocId; pub use core::index::Index; +pub use core::schema::Schema; +pub use core::schema::FieldOptions; +pub use core::schema::Document; + + +mod tests { + + use super::*; + use core::serial::DebugSegmentSerializer; + use core::reader::SegmentReader; + use core::schema::Term; + use core::collector::Collector; + + // only make sense for a single segment + struct TestCollector { + docs: Vec, + } + + impl TestCollector { + pub fn new() -> TestCollector { + TestCollector { + docs: Vec::new(), + } + } + + pub fn docs(self,) -> Vec { + self.docs + } + } + + impl Collector for TestCollector { + + fn set_segment(&mut self, segment: &SegmentReader) {} + + fn collect(&mut self, doc_id: DocId) { + self.docs.push(doc_id); + } + } + + + #[test] + fn test_indexing() { + let mut schema = Schema::new(); + let text_fieldtype = FieldOptions::new().set_tokenized_indexed(); + let text_field = schema.add_field("text", &text_fieldtype); + + let index = Index::create_from_tempdir(schema).unwrap(); + + { + // writing the segment + let mut index_writer = index.writer(); + { + let mut doc = Document::new(); + doc.set(&text_field, "af b"); + index_writer.add(doc); + } + { + let mut doc = Document::new(); + doc.set(&text_field, "a b c"); + index_writer.add(doc); + } + { + let mut doc = Document::new(); + doc.set(&text_field, "a b c d"); + index_writer.add(doc); + } + + let segment_str_before_writing = DebugSegmentSerializer::debug_string(index_writer.current_segment_writer()); + println!("{:?}", segment_str_before_writing); + + + let commit_result = index_writer.commit(); + assert!(commit_result.is_ok()); + let segment = commit_result.unwrap(); + let segment_reader = SegmentReader::open(segment).unwrap(); + // TODO ENABLE TEST + + // let segment_str_after_reading = DebugSegmentSerializer::debug_string(&segment_reader); + // assert_eq!(segment_str_before_writing, segment_str_after_reading); + } + + } + + + #[test] + fn test_searcher() { + let mut schema = Schema::new(); + let text_fieldtype = FieldOptions::new().set_tokenized_indexed(); + let text_field = schema.add_field("text", &text_fieldtype); + let index = Index::create_from_tempdir(schema).unwrap(); + + { + // writing the segment + let mut index_writer = index.writer(); + { + let mut doc = Document::new(); + doc.set(&text_field, "af b"); + index_writer.add(doc); + } + { + let mut doc = Document::new(); + doc.set(&text_field, "a b c"); + index_writer.add(doc); + } + { + let mut doc = Document::new(); + doc.set(&text_field, "a b c d"); + index_writer.add(doc); + } + let commit_result = index_writer.commit(); + let segment = commit_result.unwrap(); + } + println!("index {:?}", index.schema()); + { + + let searcher = index.searcher(); + let get_doc_ids = |terms: Vec| { + let mut collector = TestCollector::new(); + searcher.search(&terms, &mut collector); + collector.docs() + }; + { + assert_eq!( + get_doc_ids(vec!(Term::from_field_text(&text_field, "a"))), + vec!(1, 2)); + } + { + assert_eq!( + get_doc_ids(vec!(Term::from_field_text(&text_field, "af"))), + vec!(0)); + } + { + assert_eq!( + get_doc_ids(vec!(Term::from_field_text(&text_field, "b"))), + vec!(0, 1, 2)); + } + { + assert_eq!( + get_doc_ids(vec!(Term::from_field_text(&text_field, "c"))), + vec!(1, 2)); + } + { + assert_eq!( + get_doc_ids(vec!(Term::from_field_text(&text_field, "d"))), + vec!(2)); + } + { + assert_eq!( + get_doc_ids(vec!(Term::from_field_text(&text_field, "b"), Term::from_field_text(&text_field, "a"), )), + vec!(1, 2)); + } + } + } +} diff --git a/tests/core.rs b/tests/core.rs deleted file mode 100644 index 7ad844107..000000000 --- a/tests/core.rs +++ /dev/null @@ -1,165 +0,0 @@ -#![feature(test)] - -extern crate tantivy; -extern crate regex; -extern crate tempdir; - -use tantivy::core::schema::*; -use tantivy::core::writer::IndexWriter; -use tantivy::core::collector::Collector; -use tantivy::core::searcher::Searcher; -use tantivy::core::index::{Index, generate_segment_name, SegmentId}; -use tantivy::core::reader::SegmentReader; -use regex::Regex; -use tantivy::core::serial::DebugSegmentSerializer; - - -// only make sense for a single segment -struct TestCollector { - docs: Vec, -} - -impl TestCollector { - pub fn new() -> TestCollector { - TestCollector { - docs: Vec::new(), - } - } - - pub fn docs(self,) -> Vec { - self.docs - } -} - -impl Collector for TestCollector { - - fn set_segment(&mut self, segment: &SegmentReader) {} - - fn collect(&mut self, doc_id: DocId) { - self.docs.push(doc_id); - } -} - - - - -#[test] -fn test_indexing() { - let mut schema = Schema::new(); - let text_fieldtype = FieldOptions::new().set_tokenized_indexed(); - let text_field = schema.add_field("text", &text_fieldtype); - - let directory = Index::create_from_tempdir(schema).unwrap(); - - { - // writing the segment - let mut index_writer = IndexWriter::open(&directory); - { - let mut doc = Document::new(); - doc.set(&text_field, "af b"); - index_writer.add(doc); - } - { - let mut doc = Document::new(); - doc.set(&text_field, "a b c"); - index_writer.add(doc); - } - { - let mut doc = Document::new(); - doc.set(&text_field, "a b c d"); - index_writer.add(doc); - } - - let segment_str_before_writing = DebugSegmentSerializer::debug_string(index_writer.current_segment_writer()); - println!("{:?}", segment_str_before_writing); - - - let commit_result = index_writer.commit(); - assert!(commit_result.is_ok()); - let segment = commit_result.unwrap(); - let segment_reader = SegmentReader::open(segment).unwrap(); - // TODO ENABLE TEST - - // let segment_str_after_reading = DebugSegmentSerializer::debug_string(&segment_reader); - // assert_eq!(segment_str_before_writing, segment_str_after_reading); - } - -} - - -#[test] -fn test_searcher() { - let mut schema = Schema::new(); - let text_fieldtype = FieldOptions::new().set_tokenized_indexed(); - let text_field = schema.add_field("text", &text_fieldtype); - let index = Index::create_from_tempdir(schema).unwrap(); - - { - // writing the segment - let mut index_writer = IndexWriter::open(&index); - { - let mut doc = Document::new(); - doc.set(&text_field, "af b"); - index_writer.add(doc); - } - { - let mut doc = Document::new(); - doc.set(&text_field, "a b c"); - index_writer.add(doc); - } - { - let mut doc = Document::new(); - doc.set(&text_field, "a b c d"); - index_writer.add(doc); - } - let commit_result = index_writer.commit(); - let segment = commit_result.unwrap(); - } - println!("index {:?}", index.schema()); - { - - let searcher = Searcher::for_index(index); - let get_doc_ids = |terms: Vec| { - let mut collector = TestCollector::new(); - searcher.search(&terms, &mut collector); - collector.docs() - }; - { - assert_eq!( - get_doc_ids(vec!(Term::from_field_text(&text_field, "a"))), - vec!(1, 2)); - } - { - assert_eq!( - get_doc_ids(vec!(Term::from_field_text(&text_field, "af"))), - vec!(0)); - } - { - assert_eq!( - get_doc_ids(vec!(Term::from_field_text(&text_field, "b"))), - vec!(0, 1, 2)); - } - { - assert_eq!( - get_doc_ids(vec!(Term::from_field_text(&text_field, "c"))), - vec!(1, 2)); - } - { - assert_eq!( - get_doc_ids(vec!(Term::from_field_text(&text_field, "d"))), - vec!(2)); - } - { - assert_eq!( - get_doc_ids(vec!(Term::from_field_text(&text_field, "b"), Term::from_field_text(&text_field, "a"), )), - vec!(1, 2)); - } - } -} - -#[test] -fn test_new_segment() { - let SegmentId(segment_name) = generate_segment_name(); - let segment_ptn = Regex::new(r"^_[a-z0-9]{8}$").unwrap(); - assert!(segment_ptn.is_match(&segment_name)); -}