diff --git a/src/core/analyzer.rs b/src/core/analyzer.rs index d70bad598..09bb9eff5 100644 --- a/src/core/analyzer.rs +++ b/src/core/analyzer.rs @@ -17,7 +17,6 @@ pub trait StreamingIterator<'a, T> { fn next(&'a mut self) -> Option; } - impl<'a, 'b> TokenIter<'b> { fn consume_token(&'a mut self) -> Option<&'a str> { loop { @@ -79,7 +78,6 @@ impl SimpleTokenizer { #[test] fn test_tokenizer() { let simple_tokenizer = SimpleTokenizer::new(); - let mut term_buffer = String::new(); let mut term_reader = simple_tokenizer.tokenize("hello, happy tax payer!"); assert_eq!(term_reader.next().unwrap(), "hello"); assert_eq!(term_reader.next().unwrap(), "happy"); @@ -87,3 +85,11 @@ fn test_tokenizer() { assert_eq!(term_reader.next().unwrap(), "payer"); assert_eq!(term_reader.next(), None); } + + +#[test] +fn test_tokenizer_empty() { + let simple_tokenizer = SimpleTokenizer::new(); + let mut term_reader = simple_tokenizer.tokenize(""); + assert_eq!(term_reader.next(), None); +} diff --git a/src/core/codec.rs b/src/core/codec.rs index e80d894cd..da4989556 100644 --- a/src/core/codec.rs +++ b/src/core/codec.rs @@ -1,12 +1,10 @@ -use std::io; use core::serial::*; use std::io::Write; use fst::MapBuilder; use core::error::*; -use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use byteorder::{BigEndian, WriteBytesExt}; use core::directory::Segment; use core::directory::SegmentComponent; -use core::reader::*; use core::schema::Term; use core::DocId; use core::store::StoreWriter; diff --git a/src/core/directory.rs b/src/core/directory.rs index 79fb288a3..0c30a8f57 100644 --- a/src/core/directory.rs +++ b/src/core/directory.rs @@ -1,19 +1,13 @@ use std::path::{PathBuf, Path}; use std::collections::HashMap; -use std::collections::hash_map::Entry; use std::fs::File; use std::fs; use core::schema::Schema; use std::io::Write; -use std::io::BufWriter; -use std::io; -use std::borrow::Borrow; use std::borrow::BorrowMut; -use std::rc::Rc; -use std::sync::{Arc, Mutex, RwLock, MutexGuard, RwLockWriteGuard, RwLockReadGuard}; +use std::sync::{Arc, RwLock, RwLockWriteGuard, RwLockReadGuard}; use std::fmt; -use std::ops::Deref; use std::cell::RefCell; use core::error::*; use rand::{thread_rng, Rng}; @@ -145,19 +139,14 @@ impl Directory { } pub fn segments(&self,) -> Vec { - match self.inner_directory.read() { - Ok(inner) => inner - .segment_ids() - .into_iter() - .map(|segment_id| self.segment(&segment_id)) - .collect(), - Err(e) => { - //Err(Error::LockError(format!("Could not obtain read lock for {:?}", self))) - // TODO make it return a result - panic!("Could not work"); - } - } - + // TODO handle error + self.inner_directory + .read() + .unwrap() + .segment_ids() + .into_iter() + .map(|segment_id| self.segment(&segment_id)) + .collect() } pub fn segment(&self, segment_id: &SegmentId) -> Segment { diff --git a/src/core/global.rs b/src/core/global.rs index e50a97e00..52b2f0ed1 100644 --- a/src/core/global.rs +++ b/src/core/global.rs @@ -1,4 +1 @@ -use std::io::{BufWriter, Write}; -use std::io; - pub type DocId = u32; diff --git a/src/core/mod.rs b/src/core/mod.rs index 6c6ffec2f..7bbe61ed9 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -1,5 +1,3 @@ - -pub mod query; pub mod postings; pub mod global; pub mod schema; diff --git a/src/core/postings.rs b/src/core/postings.rs index edc3e00b7..0b0925bdf 100644 --- a/src/core/postings.rs +++ b/src/core/postings.rs @@ -1,9 +1,4 @@ -use std::fmt; -use std::fmt::{Debug, Formatter}; -use std::io::prelude::Read; use core::global::DocId; -use std::cmp::Ordering; -use std::vec; use std::ptr; @@ -79,7 +74,7 @@ pub struct IntersectionPostings { } impl IntersectionPostings { - pub fn from_postings(mut postings: Vec) -> IntersectionPostings { + pub fn from_postings(postings: Vec) -> IntersectionPostings { IntersectionPostings { postings: postings, } @@ -124,3 +119,23 @@ impl Iterator for IntersectionPostings { } } + + +#[test] +fn test_intersection() { + { + let left = VecPostings::new(vec!(1, 3, 9)); + let right = VecPostings::new(vec!(3, 4, 9, 18)); + let inter = IntersectionPostings::from_postings(vec!(left, right)); + let vals: Vec = inter.collect(); + assert_eq!(vals, vec!(3, 9)); + } + { + let a = VecPostings::new(vec!(1, 3, 9)); + let b = VecPostings::new(vec!(3, 4, 9, 18)); + let c = VecPostings::new(vec!(1, 5, 9, 111)); + let inter = IntersectionPostings::from_postings(vec!(a, b, c)); + let vals: Vec = inter.collect(); + assert_eq!(vals, vec!(9)); + } +} diff --git a/src/core/query.rs b/src/core/query.rs index 4c481b538..a4a44d4f6 100644 --- a/src/core/query.rs +++ b/src/core/query.rs @@ -58,3 +58,12 @@ pub fn grammar(input: State) -> ParseResult, I> pub fn parse_query(query_str: &str) -> Result<(Vec, &str), ParseError<&str>> { parser(grammar).parse(query_str) } + + +#[test] +fn test_parse_query() { + { + let (parsed_query, _) = parse_query("toto:titi toto:tutu").unwrap(); + assert_eq!(parsed_query, vec!(query::Term(String::from("toto"), String::from("titi")), query::Term(String::from("toto"), String::from("tutu")))); + } +} diff --git a/src/core/reader.rs b/src/core/reader.rs index ed3f0869c..8cdd3e279 100644 --- a/src/core/reader.rs +++ b/src/core/reader.rs @@ -1,20 +1,12 @@ -use core::directory::Directory; use core::directory::{Segment, SegmentId}; -use std::collections::BinaryHeap; use core::schema::Term; use core::store::StoreReader; use core::schema::Document; -use fst::Streamer; use fst; -use std::io; use core::postings::IntersectionPostings; -use fst::raw::Fst; -use std::cmp::{Eq,PartialEq,Ord,PartialOrd,Ordering}; -use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; -use std::borrow::Borrow; +use byteorder::{BigEndian, ReadBytesExt}; use std::io::Cursor; use core::global::DocId; -use core::serial::*; use core::directory::SegmentComponent; use fst::raw::MmapReadOnly; use core::error::{Result, Error}; @@ -143,7 +135,6 @@ impl SegmentReader { for term in terms.iter() { match self.get_term(term) { Some(segment_posting) => { - println!("term found {:?}", term); segment_postings.push(segment_posting); } None => { diff --git a/src/core/schema.rs b/src/core/schema.rs index 8031b71fb..a3a30f83c 100644 --- a/src/core/schema.rs +++ b/src/core/schema.rs @@ -1,18 +1,12 @@ -use core::global::*; use core::error; use std::io::Write; -use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; -use std::string::FromUtf8Error; use std::collections::HashMap; use std::str; -use std::iter; use std::slice; use std::fmt; use std::io::Read; use core::serialize::BinarySerializable; - - #[derive(Clone,Debug,PartialEq,Eq)] pub struct FieldOptions { // untokenized_indexed: bool, diff --git a/src/core/searcher.rs b/src/core/searcher.rs index facccc213..d82d6b1b4 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -7,7 +7,6 @@ use core::directory::Segment; use core::collector::Collector; use std::collections::HashMap; use core::schema::Term; -use core::postings::Postings; use core::error::Result; pub struct Searcher { diff --git a/src/core/serial.rs b/src/core/serial.rs index 117efc8a5..2266a5162 100644 --- a/src/core/serial.rs +++ b/src/core/serial.rs @@ -1,9 +1,8 @@ use core::global::*; use core::schema::*; -use core::error::{Result, Error}; +use core::error::Result; use std::fmt; - pub trait SegmentSerializer { fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<()>; fn write_docs(&mut self, docs: &[DocId]) -> Result<()>; // TODO add size @@ -42,7 +41,6 @@ impl DebugSegmentSerializer { } } - impl SegmentSerializer for DebugSegmentSerializer { fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<()> { self.text.push_str(&format!("{:?}\n", term)); diff --git a/src/core/serialize.rs b/src/core/serialize.rs index fc60a1805..a1417af8d 100644 --- a/src/core/serialize.rs +++ b/src/core/serialize.rs @@ -1,10 +1,8 @@ -use byteorder; use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; use std::fmt; use std::io::Write; use core::error; use core::error::Error; -use std::io::Cursor; use std::io::Read; pub trait BinarySerializable : fmt::Debug + Sized { @@ -14,10 +12,10 @@ pub trait BinarySerializable : fmt::Debug + Sized { } impl BinarySerializable for () { - fn serialize(&self, writer: &mut Write) -> error::Result { + fn serialize(&self, _: &mut Write) -> error::Result { Ok(0) } - fn deserialize(reader: &mut Read) -> error::Result { + fn deserialize(_: &mut Read) -> error::Result { Ok(()) } } @@ -46,7 +44,7 @@ impl BinarySerializable for Vec { impl BinarySerializable for u32 { fn serialize(&self, writer: &mut Write) -> error::Result { writer.write_u32::(self.clone()) - .map(|x| 4) + .map(|_| 4) .map_err(Error::BinaryReadError) } fn deserialize(reader: &mut Read) -> error::Result { @@ -58,7 +56,7 @@ impl BinarySerializable for u32 { impl BinarySerializable for u64 { fn serialize(&self, writer: &mut Write) -> error::Result { writer.write_u64::(self.clone()) - .map(|x| 4) + .map(|_| 8) .map_err(Error::BinaryReadError) } fn deserialize(reader: &mut Read) -> error::Result { @@ -99,79 +97,87 @@ impl BinarySerializable for String { } } +#[cfg(test)] +mod test { -#[test] -fn test_serialize_u8() { - let mut buffer: Vec = Vec::new(); - { - let x: u8 = 3; - x.serialize(&mut buffer); - assert_eq!(buffer.len(), 1); + use core::serialize::BinarySerializable; + use std::io::Cursor; + + #[test] + fn test_serialize_u8() { + let mut buffer: Vec = Vec::new(); + { + let x: u8 = 3; + x.serialize(&mut buffer); + assert_eq!(buffer.len(), 1); + } + { + let x: u8 = 5; + x.serialize(&mut buffer); + assert_eq!(buffer.len(), 2); + } + let mut cursor = Cursor::new(&buffer[..]); + assert_eq!(3, u8::deserialize(&mut cursor).unwrap()); + assert_eq!(5, u8::deserialize(&mut cursor).unwrap()); + assert!(u8::deserialize(&mut cursor).is_err()); } - { - let x: u8 = 5; - x.serialize(&mut buffer); - assert_eq!(buffer.len(), 2); - } - let mut cursor = Cursor::new(&buffer[..]); - assert_eq!(3, u8::deserialize(&mut cursor).unwrap()); - assert_eq!(5, u8::deserialize(&mut cursor).unwrap()); - assert!(u8::deserialize(&mut cursor).is_err()); -} - - -#[test] -fn test_serialize_u32() { - let mut buffer: Vec = Vec::new(); - { - let x: u32 = 3; - x.serialize(&mut buffer); - assert_eq!(buffer.len(), 4); - } - { - let x: u32 = 5; - x.serialize(&mut buffer); - assert_eq!(buffer.len(), 8); - } - let mut cursor = Cursor::new(&buffer[..]); - assert_eq!(3, u32::deserialize(&mut cursor).unwrap()); - assert_eq!(5, u32::deserialize(&mut cursor).unwrap()); - assert!(u32::deserialize(&mut cursor).is_err()); -} - -#[test] -fn test_serialize_string() { - let mut buffer: Vec = Vec::new(); - let first_length = 4 + 3 * 4; - let second_length = 4 + 3 * 8; - { - let x: String = String::from("ぽよぽよ"); - assert_eq!(x.serialize(&mut buffer).unwrap(), first_length); - assert_eq!(buffer.len(), first_length); - } - { - let x: String = String::from("富士さん見える。"); - assert_eq!(x.serialize(&mut buffer).unwrap(), second_length); - assert_eq!(buffer.len(), first_length + second_length); - } - let mut cursor = Cursor::new(&buffer[..]); - assert_eq!("ぽよぽよ", String::deserialize(&mut cursor).unwrap()); - assert_eq!("富士さん見える。", String::deserialize(&mut cursor).unwrap()); - assert!(u32::deserialize(&mut cursor).is_err()); -} - -#[test] -fn test_serialize_vec() { - let mut buffer: Vec = Vec::new(); - let first_length = 4 + 3 * 4; - let second_length = 4 + 3 * 8; - let vec = vec!(String::from("ぽよぽよ"), String::from("富士さん見える。")); - assert_eq!(vec.serialize(&mut buffer).unwrap(), first_length + second_length + 4); - let mut cursor = Cursor::new(&buffer[..]); - { - let deser: Vec = Vec::deserialize(&mut cursor).unwrap(); - assert_eq!(deser.len(), 2); - assert_eq!("ぽよぽよ", deser[0]); - assert_eq!("富士さん見える。", deser[1]); + + + #[test] + fn test_serialize_u32() { + let mut buffer: Vec = Vec::new(); + { + let x: u32 = 3; + x.serialize(&mut buffer); + assert_eq!(buffer.len(), 4); + } + { + let x: u32 = 5; + x.serialize(&mut buffer); + assert_eq!(buffer.len(), 8); + } + let mut cursor = Cursor::new(&buffer[..]); + assert_eq!(3, u32::deserialize(&mut cursor).unwrap()); + assert_eq!(5, u32::deserialize(&mut cursor).unwrap()); + assert!(u32::deserialize(&mut cursor).is_err()); } + + #[test] + fn test_serialize_string() { + let mut buffer: Vec = Vec::new(); + let first_length = 4 + 3 * 4; + let second_length = 4 + 3 * 8; + { + let x: String = String::from("ぽよぽよ"); + assert_eq!(x.serialize(&mut buffer).unwrap(), first_length); + assert_eq!(buffer.len(), first_length); + } + { + let x: String = String::from("富士さん見える。"); + assert_eq!(x.serialize(&mut buffer).unwrap(), second_length); + assert_eq!(buffer.len(), first_length + second_length); + } + let mut cursor = Cursor::new(&buffer[..]); + assert_eq!("ぽよぽよ", String::deserialize(&mut cursor).unwrap()); + assert_eq!("富士さん見える。", String::deserialize(&mut cursor).unwrap()); + assert!(u32::deserialize(&mut cursor).is_err()); + } + + #[test] + fn test_serialize_vec() { + let mut buffer: Vec = Vec::new(); + let first_length = 4 + 3 * 4; + let second_length = 4 + 3 * 8; + let vec = vec!(String::from("ぽよぽよ"), String::from("富士さん見える。")); + assert_eq!(vec.serialize(&mut buffer).unwrap(), first_length + second_length + 4); + let mut cursor = Cursor::new(&buffer[..]); + { + let deser: Vec = Vec::deserialize(&mut cursor).unwrap(); + assert_eq!(deser.len(), 2); + assert_eq!("ぽよぽよ", deser[0]); + assert_eq!("富士さん見える。", deser[1]); + } + } + + } diff --git a/src/core/simdcompression.rs b/src/core/simdcompression.rs index b605380f7..8d63565df 100644 --- a/src/core/simdcompression.rs +++ b/src/core/simdcompression.rs @@ -46,7 +46,6 @@ impl Encoder { - pub struct Decoder; impl Decoder { diff --git a/src/core/skip.rs b/src/core/skip.rs index a143fa6fe..eb3414f28 100644 --- a/src/core/skip.rs +++ b/src/core/skip.rs @@ -1,16 +1,13 @@ use std::io::Write; -use std::io::BufWriter; use std::io::Read; use std::io::Cursor; use std::io::SeekFrom; use std::io::Seek; use std::marker::PhantomData; use core::DocId; -use std::ops::DerefMut; use core::error; use byteorder; use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; -use std::fmt; use core::serialize::*; struct LayerBuilder { @@ -31,11 +28,7 @@ impl LayerBuilder { try!(output.write_all(&self.buffer)); Ok(()) } - - fn len(&self,) -> usize { - self.len - } - + fn with_period(period: usize) -> LayerBuilder { LayerBuilder { period: period, @@ -64,23 +57,6 @@ impl LayerBuilder { } } - -// -// fn display_layer<'a, T: BinarySerializable>(layer: &mut Layer<'a, T>) { -// for it in layer { -// println!(" - {:?}", it); -// } -// } -// -// pub fn display_skip_list(skiplist: &mut SkipList) { -// println!("DataLayer"); -// display_layer(&mut skiplist.data_layer); -// println!("SkipLayer"); -// for mut layer in skiplist.skip_layers.iter_mut() { -// display_layer(&mut layer); -// } -// } - pub struct SkipListBuilder { period: usize, data_layer: LayerBuilder, @@ -293,3 +269,137 @@ impl<'a, T: BinarySerializable> SkipList<'a, T> { } } } + + + + +#[test] +fn test_skip_list_builder() { + { + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(10); + skip_list_builder.insert(2, &3); + skip_list_builder.write::>(&mut output); + assert_eq!(output.len(), 16); + assert_eq!(output[0], 0); + } + { + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(3); + for i in 0..9 { + skip_list_builder.insert(i, &i); + } + skip_list_builder.write::>(&mut output); + assert_eq!(output.len(), 120); + assert_eq!(output[0], 0); + } + { + // checking that void gets serialized to nothing. + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3); + for i in 0..9 { + skip_list_builder.insert(i, &()); + } + skip_list_builder.write::>(&mut output); + assert_eq!(output.len(), 84); + assert_eq!(output[0], 0); + } +} + +#[test] +fn test_skip_list_reader() { + { + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(10); + skip_list_builder.insert(2, &3); + skip_list_builder.write::>(&mut output); + let mut skip_list: SkipList = SkipList::read(&mut output); + assert_eq!(skip_list.next(), Some((2, 3))); + } + { + let mut output: Vec = Vec::new(); + let skip_list_builder: SkipListBuilder = SkipListBuilder::new(10); + skip_list_builder.write::>(&mut output); + let mut skip_list: SkipList = SkipList::read(&mut output); + assert_eq!(skip_list.next(), None); + } + { + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2); + skip_list_builder.insert(2, &()); + skip_list_builder.insert(3, &()); + skip_list_builder.insert(5, &()); + skip_list_builder.insert(7, &()); + skip_list_builder.insert(9, &()); + skip_list_builder.write::>(&mut output); + let mut skip_list: SkipList<()> = SkipList::read(&mut output); + assert_eq!(skip_list.next().unwrap(), (2, ())); + assert_eq!(skip_list.next().unwrap(), (3, ())); + assert_eq!(skip_list.next().unwrap(), (5, ())); + assert_eq!(skip_list.next().unwrap(), (7, ())); + assert_eq!(skip_list.next().unwrap(), (9, ())); + assert_eq!(skip_list.next(), None); + } + { + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2); + skip_list_builder.insert(2, &()); + skip_list_builder.insert(3, &()); + skip_list_builder.insert(5, &()); + skip_list_builder.insert(7, &()); + skip_list_builder.insert(9, &()); + skip_list_builder.write::>(&mut output); + let mut skip_list: SkipList<()> = SkipList::read(&mut output); + assert_eq!(skip_list.next().unwrap(), (2, ())); + skip_list.seek(5); + assert_eq!(skip_list.next().unwrap(), (5, ())); + assert_eq!(skip_list.next().unwrap(), (7, ())); + assert_eq!(skip_list.next().unwrap(), (9, ())); + assert_eq!(skip_list.next(), None); + } + { + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3); + skip_list_builder.insert(2, &()); + skip_list_builder.insert(3, &()); + skip_list_builder.insert(5, &()); + skip_list_builder.insert(6, &()); + skip_list_builder.write::>(&mut output); + let mut skip_list: SkipList<()> = SkipList::read(&mut output); + assert_eq!(skip_list.next().unwrap(), (2, ())); + skip_list.seek(6); + assert_eq!(skip_list.next().unwrap(), (6, ())); + assert_eq!(skip_list.next(), None); + + } + { + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2); + skip_list_builder.insert(2, &()); + skip_list_builder.insert(3, &()); + skip_list_builder.insert(5, &()); + skip_list_builder.insert(7, &()); + skip_list_builder.insert(9, &()); + skip_list_builder.write::>(&mut output); + let mut skip_list: SkipList<()> = SkipList::read(&mut output); + assert_eq!(skip_list.next().unwrap(), (2, ())); + skip_list.seek(10); + assert_eq!(skip_list.next(), None); + } + { + let mut output: Vec = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3); + for i in 0..1000 { + skip_list_builder.insert(i, &()); + } + skip_list_builder.insert(1004, &()); + skip_list_builder.write::>(&mut output); + let mut skip_list: SkipList<()> = SkipList::read(&mut output); + assert_eq!(skip_list.next().unwrap(), (0, ())); + skip_list.seek(431); + assert_eq!(skip_list.next().unwrap(), (431,()) ); + skip_list.seek(1003); + assert_eq!(skip_list.next().unwrap(), (1004,()) ); + assert_eq!(skip_list.next(), None); + } +} diff --git a/src/core/store.rs b/src/core/store.rs index 2bdb57b0f..848b78f79 100644 --- a/src/core/store.rs +++ b/src/core/store.rs @@ -1,14 +1,9 @@ -use time::PreciseTime; use std::io::BufWriter; use std::fs::File; -use std::fmt; use std::cell::RefCell; use core::global::DocId; use core::schema::Document; -use core::schema::Field; use core::schema::FieldValue; -use core::schema::FieldOptions; -use core::schema::Schema; use core::error; use core::serialize::BinarySerializable; use std::io::Write; @@ -18,7 +13,6 @@ use std::io::SeekFrom; use fst::raw::MmapReadOnly; use std::io::Seek; use lz4; -use tempfile; // TODO cache uncompressed pages @@ -176,43 +170,56 @@ impl StoreReader { } -#[test] -fn test_store() { - let offsets; - let store_file = tempfile::NamedTempFile::new().unwrap(); - let mut schema = Schema::new(); - let field_body = schema.add_field("body", &FieldOptions::new().set_stored()); - let field_title = schema.add_field("title", &FieldOptions::new().set_stored()); - let lorem = String::from("Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."); - { - let mut store_writer = StoreWriter::new(store_file.reopen().unwrap()); - for i in 0..10000 { - let mut fields: Vec = Vec::new(); - { - let field_value = FieldValue { - field: field_body.clone(), - text: lorem.clone(), - }; - fields.push(field_value); +#[cfg(test)] +mod tests { + + use tempfile; + use core::schema::Schema; + use core::schema::FieldOptions; + use core::schema::FieldValue; + use fst::raw::MmapReadOnly; + use core::store::StoreWriter; + use core::store::StoreReader; + + #[test] + fn test_store() { + let offsets; + let store_file = tempfile::NamedTempFile::new().unwrap(); + let mut schema = Schema::new(); + let field_body = schema.add_field("body", &FieldOptions::new().set_stored()); + let field_title = schema.add_field("title", &FieldOptions::new().set_stored()); + let lorem = String::from("Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."); + { + let mut store_writer = StoreWriter::new(store_file.reopen().unwrap()); + for i in 0..10000 { + let mut fields: Vec = Vec::new(); + { + let field_value = FieldValue { + field: field_body.clone(), + text: lorem.clone(), + }; + fields.push(field_value); + } + { + let title_text = format!("Doc {}", i); + let field_value = FieldValue { + field: field_title.clone(), + text: title_text, + }; + fields.push(field_value); + } + let fields_refs: Vec<&FieldValue> = fields.iter().collect(); + store_writer.store(&fields_refs); } - { - let title_text = format!("Doc {}", i); - let field_value = FieldValue { - field: field_title.clone(), - text: title_text, - }; - fields.push(field_value); - } - let fields_refs: Vec<&FieldValue> = fields.iter().collect(); - store_writer.store(&fields_refs); + store_writer.close(); + offsets = store_writer.offsets.clone(); + } + let store_mmap = MmapReadOnly::open(&store_file).unwrap(); + let store = StoreReader::new(store_mmap); + assert_eq!(offsets, store.offsets); + for i in 0..1000 { + assert_eq!(*store.get(&i).get_one(&field_title).unwrap(), format!("Doc {}", i)); } - store_writer.close(); - offsets = store_writer.offsets.clone(); - } - let store_mmap = MmapReadOnly::open(&store_file).unwrap(); - let store = StoreReader::new(store_mmap); - assert_eq!(offsets, store.offsets); - for i in 0..10000 { - assert_eq!(*store.get(&i).get_one(&field_title).unwrap(), format!("Doc {}", i)); } + } diff --git a/src/core/writer.rs b/src/core/writer.rs index eefbfb822..e1a76d268 100644 --- a/src/core/writer.rs +++ b/src/core/writer.rs @@ -1,24 +1,13 @@ - -use std::io; -use std::slice; -use core::global::*; +use core::global::DocId; use core::schema::*; use core::codec::*; use std::rc::Rc; use core::directory::Directory; use core::analyzer::SimpleTokenizer; -use std::collections::{HashMap, BTreeMap}; -use std::collections::{hash_map, btree_map}; -use std::io::{Write}; -use std::sync::Arc; -use std::mem; -use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; -use std::iter::Peekable; +use std::collections::BTreeMap; use core::analyzer::StreamingIterator; use core::serial::*; use core::error::*; -use std::cell::RefCell; -use std::borrow::BorrowMut; use core::directory::Segment; @@ -75,7 +64,7 @@ impl IndexWriter { pub fn commit(&mut self,) -> Result { // TODO error handling - let mut segment_writer_rc = self.segment_writer.clone(); + let segment_writer_rc = self.segment_writer.clone(); self.segment_writer = Rc::new(new_segment_writer(&self.directory)); let segment_writer_res = Rc::try_unwrap(segment_writer_rc); match segment_writer_res { @@ -146,7 +135,6 @@ impl SegmentWriter { } pub fn add(&mut self, doc: Document, schema: &Schema) { - let mut term_buffer = String::new(); let doc_id = self.max_doc; for field_value in doc.fields() { let field_options = schema.get_field(&field_value.field); diff --git a/src/lib.rs b/src/lib.rs index 7145e4acf..00e6eb154 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,5 @@ #[allow(unused_imports)] - - #[macro_use] extern crate lazy_static; diff --git a/tests/core.rs b/tests/core.rs index 61f00645e..a650ea5a4 100644 --- a/tests/core.rs +++ b/tests/core.rs @@ -2,54 +2,14 @@ extern crate tantivy; extern crate regex; extern crate tempdir; -use tantivy::core::postings::VecPostings; -use tantivy::core::postings::Postings; -use tantivy::core::analyzer::SimpleTokenizer; use tantivy::core::collector::TestCollector; -use tantivy::core::serial::*; use tantivy::core::schema::*; -use tantivy::core::codec::SimpleCodec; use tantivy::core::global::*; -use tantivy::core::postings::IntersectionPostings; use tantivy::core::writer::IndexWriter; use tantivy::core::searcher::Searcher; use tantivy::core::directory::{Directory, generate_segment_name, SegmentId}; -use std::ops::DerefMut; use tantivy::core::reader::SegmentReader; -use std::io::{ BufWriter, Write}; use regex::Regex; -use std::convert::From; -use std::path::PathBuf; -use tantivy::core::query; -use tantivy::core::query::parse_query; - - -#[test] -fn test_parse_query() { - { - let (parsed_query, _) = parse_query("toto:titi toto:tutu").unwrap(); - assert_eq!(parsed_query, vec!(query::Term(String::from("toto"), String::from("titi")), query::Term(String::from("toto"), String::from("tutu")))); - } -} - -#[test] -fn test_intersection() { - { - let left = VecPostings::new(vec!(1, 3, 9)); - let right = VecPostings::new(vec!(3, 4, 9, 18)); - let inter = IntersectionPostings::from_postings(vec!(left, right)); - let vals: Vec = inter.collect(); - assert_eq!(vals, vec!(3, 9)); - } - { - let a = VecPostings::new(vec!(1, 3, 9)); - let b = VecPostings::new(vec!(3, 4, 9, 18)); - let c = VecPostings::new(vec!(1, 5, 9, 111)); - let inter = IntersectionPostings::from_postings(vec!(a, b, c)); - let vals: Vec = inter.collect(); - assert_eq!(vals, vec!(9)); - } -} #[test] @@ -85,7 +45,7 @@ fn test_indexing() { let commit_result = index_writer.commit(); assert!(commit_result.is_ok()); let segment = commit_result.unwrap(); - let segment_reader = SegmentReader::open(segment).unwrap(); + SegmentReader::open(segment).unwrap(); // TODO ENABLE TEST //let segment_str_after_reading = DebugSegmentSerializer::debug_string(&segment_reader); //assert_eq!(segment_str_before_writing, segment_str_after_reading); @@ -134,8 +94,6 @@ fn test_searcher() { } } - - #[test] fn test_new_segment() { let SegmentId(segment_name) = generate_segment_name(); diff --git a/tests/skip.rs b/tests/skip.rs deleted file mode 100644 index 4fdbb755e..000000000 --- a/tests/skip.rs +++ /dev/null @@ -1,138 +0,0 @@ -extern crate tantivy; -extern crate byteorder; -use std::io::{Write, Seek}; -use std::io::SeekFrom; -use tantivy::core::skip::*; -use std::io::Cursor; -use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; - -#[test] -fn test_skip_list_builder() { - { - let mut output: Vec = Vec::new(); - let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(10); - skip_list_builder.insert(2, &3); - skip_list_builder.write::>(&mut output); - assert_eq!(output.len(), 16); - assert_eq!(output[0], 0); - } - { - let mut output: Vec = Vec::new(); - let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(3); - for i in (0..9) { - skip_list_builder.insert(i, &i); - } - skip_list_builder.write::>(&mut output); - assert_eq!(output.len(), 120); - assert_eq!(output[0], 0); - } - { - // checking that void gets serialized to nothing. - let mut output: Vec = Vec::new(); - let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3); - for i in (0..9) { - skip_list_builder.insert(i, &()); - } - skip_list_builder.write::>(&mut output); - assert_eq!(output.len(), 84); - assert_eq!(output[0], 0); - } -} - -#[test] -fn test_skip_list_reader() { - { - let mut output: Vec = Vec::new(); - let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(10); - skip_list_builder.insert(2, &3); - skip_list_builder.write::>(&mut output); - let mut skip_list: SkipList = SkipList::read(&mut output); - assert_eq!(skip_list.next(), Some((2, 3))); - } - { - let mut output: Vec = Vec::new(); - let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(10); - skip_list_builder.write::>(&mut output); - let mut skip_list: SkipList = SkipList::read(&mut output); - assert_eq!(skip_list.next(), None); - } - { - let mut output: Vec = Vec::new(); - let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2); - skip_list_builder.insert(2, &()); - skip_list_builder.insert(3, &()); - skip_list_builder.insert(5, &()); - skip_list_builder.insert(7, &()); - skip_list_builder.insert(9, &()); - skip_list_builder.write::>(&mut output); - let mut skip_list: SkipList<()> = SkipList::read(&mut output); - assert_eq!(skip_list.next().unwrap(), (2, ())); - assert_eq!(skip_list.next().unwrap(), (3, ())); - assert_eq!(skip_list.next().unwrap(), (5, ())); - assert_eq!(skip_list.next().unwrap(), (7, ())); - assert_eq!(skip_list.next().unwrap(), (9, ())); - assert_eq!(skip_list.next(), None); - } - { - let mut output: Vec = Vec::new(); - let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2); - skip_list_builder.insert(2, &()); - skip_list_builder.insert(3, &()); - skip_list_builder.insert(5, &()); - skip_list_builder.insert(7, &()); - skip_list_builder.insert(9, &()); - skip_list_builder.write::>(&mut output); - let mut skip_list: SkipList<()> = SkipList::read(&mut output); - assert_eq!(skip_list.next().unwrap(), (2, ())); - skip_list.seek(5); - assert_eq!(skip_list.next().unwrap(), (5, ())); - assert_eq!(skip_list.next().unwrap(), (7, ())); - assert_eq!(skip_list.next().unwrap(), (9, ())); - assert_eq!(skip_list.next(), None); - } - { - let mut output: Vec = Vec::new(); - let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3); - skip_list_builder.insert(2, &()); - skip_list_builder.insert(3, &()); - skip_list_builder.insert(5, &()); - skip_list_builder.insert(6, &()); - skip_list_builder.write::>(&mut output); - let mut skip_list: SkipList<()> = SkipList::read(&mut output); - assert_eq!(skip_list.next().unwrap(), (2, ())); - skip_list.seek(6); - assert_eq!(skip_list.next().unwrap(), (6, ())); - assert_eq!(skip_list.next(), None); - - } - { - let mut output: Vec = Vec::new(); - let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2); - skip_list_builder.insert(2, &()); - skip_list_builder.insert(3, &()); - skip_list_builder.insert(5, &()); - skip_list_builder.insert(7, &()); - skip_list_builder.insert(9, &()); - skip_list_builder.write::>(&mut output); - let mut skip_list: SkipList<()> = SkipList::read(&mut output); - assert_eq!(skip_list.next().unwrap(), (2, ())); - skip_list.seek(10); - assert_eq!(skip_list.next(), None); - } - { - let mut output: Vec = Vec::new(); - let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3); - for i in (0..1000) { - skip_list_builder.insert(i, &()); - } - skip_list_builder.insert(1004, &()); - skip_list_builder.write::>(&mut output); - let mut skip_list: SkipList<()> = SkipList::read(&mut output); - assert_eq!(skip_list.next().unwrap(), (0, ())); - skip_list.seek(431); - assert_eq!(skip_list.next().unwrap(), (431,()) ); - skip_list.seek(1003); - assert_eq!(skip_list.next().unwrap(), (1004,()) ); - assert_eq!(skip_list.next(), None); - } -}