mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
cleaning up imports
This commit is contained in:
@@ -17,7 +17,6 @@ pub trait StreamingIterator<'a, T> {
|
||||
fn next(&'a mut self) -> Option<T>;
|
||||
}
|
||||
|
||||
|
||||
impl<'a, 'b> TokenIter<'b> {
|
||||
fn consume_token(&'a mut self) -> Option<&'a str> {
|
||||
loop {
|
||||
@@ -79,7 +78,6 @@ impl SimpleTokenizer {
|
||||
#[test]
|
||||
fn test_tokenizer() {
|
||||
let simple_tokenizer = SimpleTokenizer::new();
|
||||
let mut term_buffer = String::new();
|
||||
let mut term_reader = simple_tokenizer.tokenize("hello, happy tax payer!");
|
||||
assert_eq!(term_reader.next().unwrap(), "hello");
|
||||
assert_eq!(term_reader.next().unwrap(), "happy");
|
||||
@@ -87,3 +85,11 @@ fn test_tokenizer() {
|
||||
assert_eq!(term_reader.next().unwrap(), "payer");
|
||||
assert_eq!(term_reader.next(), None);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_tokenizer_empty() {
|
||||
let simple_tokenizer = SimpleTokenizer::new();
|
||||
let mut term_reader = simple_tokenizer.tokenize("");
|
||||
assert_eq!(term_reader.next(), None);
|
||||
}
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
use std::io;
|
||||
use core::serial::*;
|
||||
use std::io::Write;
|
||||
use fst::MapBuilder;
|
||||
use core::error::*;
|
||||
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||
use byteorder::{BigEndian, WriteBytesExt};
|
||||
use core::directory::Segment;
|
||||
use core::directory::SegmentComponent;
|
||||
use core::reader::*;
|
||||
use core::schema::Term;
|
||||
use core::DocId;
|
||||
use core::store::StoreWriter;
|
||||
|
||||
@@ -1,19 +1,13 @@
|
||||
|
||||
use std::path::{PathBuf, Path};
|
||||
use std::collections::HashMap;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::fs::File;
|
||||
use std::fs;
|
||||
use core::schema::Schema;
|
||||
use std::io::Write;
|
||||
use std::io::BufWriter;
|
||||
use std::io;
|
||||
use std::borrow::Borrow;
|
||||
use std::borrow::BorrowMut;
|
||||
use std::rc::Rc;
|
||||
use std::sync::{Arc, Mutex, RwLock, MutexGuard, RwLockWriteGuard, RwLockReadGuard};
|
||||
use std::sync::{Arc, RwLock, RwLockWriteGuard, RwLockReadGuard};
|
||||
use std::fmt;
|
||||
use std::ops::Deref;
|
||||
use std::cell::RefCell;
|
||||
use core::error::*;
|
||||
use rand::{thread_rng, Rng};
|
||||
@@ -145,19 +139,14 @@ impl Directory {
|
||||
}
|
||||
|
||||
pub fn segments(&self,) -> Vec<Segment> {
|
||||
match self.inner_directory.read() {
|
||||
Ok(inner) => inner
|
||||
.segment_ids()
|
||||
.into_iter()
|
||||
.map(|segment_id| self.segment(&segment_id))
|
||||
.collect(),
|
||||
Err(e) => {
|
||||
//Err(Error::LockError(format!("Could not obtain read lock for {:?}", self)))
|
||||
// TODO make it return a result
|
||||
panic!("Could not work");
|
||||
}
|
||||
}
|
||||
|
||||
// TODO handle error
|
||||
self.inner_directory
|
||||
.read()
|
||||
.unwrap()
|
||||
.segment_ids()
|
||||
.into_iter()
|
||||
.map(|segment_id| self.segment(&segment_id))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn segment(&self, segment_id: &SegmentId) -> Segment {
|
||||
|
||||
@@ -1,4 +1 @@
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::io;
|
||||
|
||||
pub type DocId = u32;
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
|
||||
pub mod query;
|
||||
pub mod postings;
|
||||
pub mod global;
|
||||
pub mod schema;
|
||||
|
||||
@@ -1,9 +1,4 @@
|
||||
use std::fmt;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::io::prelude::Read;
|
||||
use core::global::DocId;
|
||||
use std::cmp::Ordering;
|
||||
use std::vec;
|
||||
use std::ptr;
|
||||
|
||||
|
||||
@@ -79,7 +74,7 @@ pub struct IntersectionPostings<T: Postings> {
|
||||
}
|
||||
|
||||
impl<T: Postings> IntersectionPostings<T> {
|
||||
pub fn from_postings(mut postings: Vec<T>) -> IntersectionPostings<T> {
|
||||
pub fn from_postings(postings: Vec<T>) -> IntersectionPostings<T> {
|
||||
IntersectionPostings {
|
||||
postings: postings,
|
||||
}
|
||||
@@ -124,3 +119,23 @@ impl<T: Postings> Iterator for IntersectionPostings<T> {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_intersection() {
|
||||
{
|
||||
let left = VecPostings::new(vec!(1, 3, 9));
|
||||
let right = VecPostings::new(vec!(3, 4, 9, 18));
|
||||
let inter = IntersectionPostings::from_postings(vec!(left, right));
|
||||
let vals: Vec<DocId> = inter.collect();
|
||||
assert_eq!(vals, vec!(3, 9));
|
||||
}
|
||||
{
|
||||
let a = VecPostings::new(vec!(1, 3, 9));
|
||||
let b = VecPostings::new(vec!(3, 4, 9, 18));
|
||||
let c = VecPostings::new(vec!(1, 5, 9, 111));
|
||||
let inter = IntersectionPostings::from_postings(vec!(a, b, c));
|
||||
let vals: Vec<DocId> = inter.collect();
|
||||
assert_eq!(vals, vec!(9));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,3 +58,12 @@ pub fn grammar<I>(input: State<I>) -> ParseResult<Vec<Term>, I>
|
||||
pub fn parse_query(query_str: &str) -> Result<(Vec<Term>, &str), ParseError<&str>> {
|
||||
parser(grammar).parse(query_str)
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_parse_query() {
|
||||
{
|
||||
let (parsed_query, _) = parse_query("toto:titi toto:tutu").unwrap();
|
||||
assert_eq!(parsed_query, vec!(query::Term(String::from("toto"), String::from("titi")), query::Term(String::from("toto"), String::from("tutu"))));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,20 +1,12 @@
|
||||
use core::directory::Directory;
|
||||
use core::directory::{Segment, SegmentId};
|
||||
use std::collections::BinaryHeap;
|
||||
use core::schema::Term;
|
||||
use core::store::StoreReader;
|
||||
use core::schema::Document;
|
||||
use fst::Streamer;
|
||||
use fst;
|
||||
use std::io;
|
||||
use core::postings::IntersectionPostings;
|
||||
use fst::raw::Fst;
|
||||
use std::cmp::{Eq,PartialEq,Ord,PartialOrd,Ordering};
|
||||
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||
use std::borrow::Borrow;
|
||||
use byteorder::{BigEndian, ReadBytesExt};
|
||||
use std::io::Cursor;
|
||||
use core::global::DocId;
|
||||
use core::serial::*;
|
||||
use core::directory::SegmentComponent;
|
||||
use fst::raw::MmapReadOnly;
|
||||
use core::error::{Result, Error};
|
||||
@@ -143,7 +135,6 @@ impl SegmentReader {
|
||||
for term in terms.iter() {
|
||||
match self.get_term(term) {
|
||||
Some(segment_posting) => {
|
||||
println!("term found {:?}", term);
|
||||
segment_postings.push(segment_posting);
|
||||
}
|
||||
None => {
|
||||
|
||||
@@ -1,18 +1,12 @@
|
||||
use core::global::*;
|
||||
use core::error;
|
||||
use std::io::Write;
|
||||
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
||||
use std::string::FromUtf8Error;
|
||||
use std::collections::HashMap;
|
||||
use std::str;
|
||||
use std::iter;
|
||||
use std::slice;
|
||||
use std::fmt;
|
||||
use std::io::Read;
|
||||
use core::serialize::BinarySerializable;
|
||||
|
||||
|
||||
|
||||
#[derive(Clone,Debug,PartialEq,Eq)]
|
||||
pub struct FieldOptions {
|
||||
// untokenized_indexed: bool,
|
||||
|
||||
@@ -7,7 +7,6 @@ use core::directory::Segment;
|
||||
use core::collector::Collector;
|
||||
use std::collections::HashMap;
|
||||
use core::schema::Term;
|
||||
use core::postings::Postings;
|
||||
use core::error::Result;
|
||||
|
||||
pub struct Searcher {
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
use core::global::*;
|
||||
use core::schema::*;
|
||||
use core::error::{Result, Error};
|
||||
use core::error::Result;
|
||||
use std::fmt;
|
||||
|
||||
|
||||
pub trait SegmentSerializer<Output> {
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<()>;
|
||||
fn write_docs(&mut self, docs: &[DocId]) -> Result<()>; // TODO add size
|
||||
@@ -42,7 +41,6 @@ impl DebugSegmentSerializer {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl SegmentSerializer<String> for DebugSegmentSerializer {
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<()> {
|
||||
self.text.push_str(&format!("{:?}\n", term));
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
use byteorder;
|
||||
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||
use std::fmt;
|
||||
use std::io::Write;
|
||||
use core::error;
|
||||
use core::error::Error;
|
||||
use std::io::Cursor;
|
||||
use std::io::Read;
|
||||
|
||||
pub trait BinarySerializable : fmt::Debug + Sized {
|
||||
@@ -14,10 +12,10 @@ pub trait BinarySerializable : fmt::Debug + Sized {
|
||||
}
|
||||
|
||||
impl BinarySerializable for () {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
fn serialize(&self, _: &mut Write) -> error::Result<usize> {
|
||||
Ok(0)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> error::Result<Self> {
|
||||
fn deserialize(_: &mut Read) -> error::Result<Self> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -46,7 +44,7 @@ impl<T: BinarySerializable> BinarySerializable for Vec<T> {
|
||||
impl BinarySerializable for u32 {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
writer.write_u32::<BigEndian>(self.clone())
|
||||
.map(|x| 4)
|
||||
.map(|_| 4)
|
||||
.map_err(Error::BinaryReadError)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> error::Result<u32> {
|
||||
@@ -58,7 +56,7 @@ impl BinarySerializable for u32 {
|
||||
impl BinarySerializable for u64 {
|
||||
fn serialize(&self, writer: &mut Write) -> error::Result<usize> {
|
||||
writer.write_u64::<BigEndian>(self.clone())
|
||||
.map(|x| 4)
|
||||
.map(|_| 8)
|
||||
.map_err(Error::BinaryReadError)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> error::Result<u64> {
|
||||
@@ -99,79 +97,87 @@ impl BinarySerializable for String {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
#[test]
|
||||
fn test_serialize_u8() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
{
|
||||
let x: u8 = 3;
|
||||
x.serialize(&mut buffer);
|
||||
assert_eq!(buffer.len(), 1);
|
||||
use core::serialize::BinarySerializable;
|
||||
use std::io::Cursor;
|
||||
|
||||
#[test]
|
||||
fn test_serialize_u8() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
{
|
||||
let x: u8 = 3;
|
||||
x.serialize(&mut buffer);
|
||||
assert_eq!(buffer.len(), 1);
|
||||
}
|
||||
{
|
||||
let x: u8 = 5;
|
||||
x.serialize(&mut buffer);
|
||||
assert_eq!(buffer.len(), 2);
|
||||
}
|
||||
let mut cursor = Cursor::new(&buffer[..]);
|
||||
assert_eq!(3, u8::deserialize(&mut cursor).unwrap());
|
||||
assert_eq!(5, u8::deserialize(&mut cursor).unwrap());
|
||||
assert!(u8::deserialize(&mut cursor).is_err());
|
||||
}
|
||||
{
|
||||
let x: u8 = 5;
|
||||
x.serialize(&mut buffer);
|
||||
assert_eq!(buffer.len(), 2);
|
||||
}
|
||||
let mut cursor = Cursor::new(&buffer[..]);
|
||||
assert_eq!(3, u8::deserialize(&mut cursor).unwrap());
|
||||
assert_eq!(5, u8::deserialize(&mut cursor).unwrap());
|
||||
assert!(u8::deserialize(&mut cursor).is_err());
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_serialize_u32() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
{
|
||||
let x: u32 = 3;
|
||||
x.serialize(&mut buffer);
|
||||
assert_eq!(buffer.len(), 4);
|
||||
}
|
||||
{
|
||||
let x: u32 = 5;
|
||||
x.serialize(&mut buffer);
|
||||
assert_eq!(buffer.len(), 8);
|
||||
}
|
||||
let mut cursor = Cursor::new(&buffer[..]);
|
||||
assert_eq!(3, u32::deserialize(&mut cursor).unwrap());
|
||||
assert_eq!(5, u32::deserialize(&mut cursor).unwrap());
|
||||
assert!(u32::deserialize(&mut cursor).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_string() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
let first_length = 4 + 3 * 4;
|
||||
let second_length = 4 + 3 * 8;
|
||||
{
|
||||
let x: String = String::from("ぽよぽよ");
|
||||
assert_eq!(x.serialize(&mut buffer).unwrap(), first_length);
|
||||
assert_eq!(buffer.len(), first_length);
|
||||
}
|
||||
{
|
||||
let x: String = String::from("富士さん見える。");
|
||||
assert_eq!(x.serialize(&mut buffer).unwrap(), second_length);
|
||||
assert_eq!(buffer.len(), first_length + second_length);
|
||||
}
|
||||
let mut cursor = Cursor::new(&buffer[..]);
|
||||
assert_eq!("ぽよぽよ", String::deserialize(&mut cursor).unwrap());
|
||||
assert_eq!("富士さん見える。", String::deserialize(&mut cursor).unwrap());
|
||||
assert!(u32::deserialize(&mut cursor).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_vec() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
let first_length = 4 + 3 * 4;
|
||||
let second_length = 4 + 3 * 8;
|
||||
let vec = vec!(String::from("ぽよぽよ"), String::from("富士さん見える。"));
|
||||
assert_eq!(vec.serialize(&mut buffer).unwrap(), first_length + second_length + 4);
|
||||
let mut cursor = Cursor::new(&buffer[..]);
|
||||
{
|
||||
let deser: Vec<String> = Vec::deserialize(&mut cursor).unwrap();
|
||||
assert_eq!(deser.len(), 2);
|
||||
assert_eq!("ぽよぽよ", deser[0]);
|
||||
assert_eq!("富士さん見える。", deser[1]);
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_serialize_u32() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
{
|
||||
let x: u32 = 3;
|
||||
x.serialize(&mut buffer);
|
||||
assert_eq!(buffer.len(), 4);
|
||||
}
|
||||
{
|
||||
let x: u32 = 5;
|
||||
x.serialize(&mut buffer);
|
||||
assert_eq!(buffer.len(), 8);
|
||||
}
|
||||
let mut cursor = Cursor::new(&buffer[..]);
|
||||
assert_eq!(3, u32::deserialize(&mut cursor).unwrap());
|
||||
assert_eq!(5, u32::deserialize(&mut cursor).unwrap());
|
||||
assert!(u32::deserialize(&mut cursor).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_string() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
let first_length = 4 + 3 * 4;
|
||||
let second_length = 4 + 3 * 8;
|
||||
{
|
||||
let x: String = String::from("ぽよぽよ");
|
||||
assert_eq!(x.serialize(&mut buffer).unwrap(), first_length);
|
||||
assert_eq!(buffer.len(), first_length);
|
||||
}
|
||||
{
|
||||
let x: String = String::from("富士さん見える。");
|
||||
assert_eq!(x.serialize(&mut buffer).unwrap(), second_length);
|
||||
assert_eq!(buffer.len(), first_length + second_length);
|
||||
}
|
||||
let mut cursor = Cursor::new(&buffer[..]);
|
||||
assert_eq!("ぽよぽよ", String::deserialize(&mut cursor).unwrap());
|
||||
assert_eq!("富士さん見える。", String::deserialize(&mut cursor).unwrap());
|
||||
assert!(u32::deserialize(&mut cursor).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_vec() {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
let first_length = 4 + 3 * 4;
|
||||
let second_length = 4 + 3 * 8;
|
||||
let vec = vec!(String::from("ぽよぽよ"), String::from("富士さん見える。"));
|
||||
assert_eq!(vec.serialize(&mut buffer).unwrap(), first_length + second_length + 4);
|
||||
let mut cursor = Cursor::new(&buffer[..]);
|
||||
{
|
||||
let deser: Vec<String> = Vec::deserialize(&mut cursor).unwrap();
|
||||
assert_eq!(deser.len(), 2);
|
||||
assert_eq!("ぽよぽよ", deser[0]);
|
||||
assert_eq!("富士さん見える。", deser[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -46,7 +46,6 @@ impl Encoder {
|
||||
|
||||
|
||||
|
||||
|
||||
pub struct Decoder;
|
||||
|
||||
impl Decoder {
|
||||
|
||||
158
src/core/skip.rs
158
src/core/skip.rs
@@ -1,16 +1,13 @@
|
||||
use std::io::Write;
|
||||
use std::io::BufWriter;
|
||||
use std::io::Read;
|
||||
use std::io::Cursor;
|
||||
use std::io::SeekFrom;
|
||||
use std::io::Seek;
|
||||
use std::marker::PhantomData;
|
||||
use core::DocId;
|
||||
use std::ops::DerefMut;
|
||||
use core::error;
|
||||
use byteorder;
|
||||
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||
use std::fmt;
|
||||
use core::serialize::*;
|
||||
|
||||
struct LayerBuilder<T: BinarySerializable> {
|
||||
@@ -32,10 +29,6 @@ impl<T: BinarySerializable> LayerBuilder<T> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn len(&self,) -> usize {
|
||||
self.len
|
||||
}
|
||||
|
||||
fn with_period(period: usize) -> LayerBuilder<T> {
|
||||
LayerBuilder {
|
||||
period: period,
|
||||
@@ -64,23 +57,6 @@ impl<T: BinarySerializable> LayerBuilder<T> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// fn display_layer<'a, T: BinarySerializable>(layer: &mut Layer<'a, T>) {
|
||||
// for it in layer {
|
||||
// println!(" - {:?}", it);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// pub fn display_skip_list<T: BinarySerializable>(skiplist: &mut SkipList<T>) {
|
||||
// println!("DataLayer");
|
||||
// display_layer(&mut skiplist.data_layer);
|
||||
// println!("SkipLayer");
|
||||
// for mut layer in skiplist.skip_layers.iter_mut() {
|
||||
// display_layer(&mut layer);
|
||||
// }
|
||||
// }
|
||||
|
||||
pub struct SkipListBuilder<T: BinarySerializable> {
|
||||
period: usize,
|
||||
data_layer: LayerBuilder<T>,
|
||||
@@ -293,3 +269,137 @@ impl<'a, T: BinarySerializable> SkipList<'a, T> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_skip_list_builder() {
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(10);
|
||||
skip_list_builder.insert(2, &3);
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
assert_eq!(output.len(), 16);
|
||||
assert_eq!(output[0], 0);
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(3);
|
||||
for i in 0..9 {
|
||||
skip_list_builder.insert(i, &i);
|
||||
}
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
assert_eq!(output.len(), 120);
|
||||
assert_eq!(output[0], 0);
|
||||
}
|
||||
{
|
||||
// checking that void gets serialized to nothing.
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3);
|
||||
for i in 0..9 {
|
||||
skip_list_builder.insert(i, &());
|
||||
}
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
assert_eq!(output.len(), 84);
|
||||
assert_eq!(output[0], 0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_skip_list_reader() {
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(10);
|
||||
skip_list_builder.insert(2, &3);
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<u32> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next(), Some((2, 3)));
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(10);
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<u32> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2);
|
||||
skip_list_builder.insert(2, &());
|
||||
skip_list_builder.insert(3, &());
|
||||
skip_list_builder.insert(5, &());
|
||||
skip_list_builder.insert(7, &());
|
||||
skip_list_builder.insert(9, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (2, ()));
|
||||
assert_eq!(skip_list.next().unwrap(), (3, ()));
|
||||
assert_eq!(skip_list.next().unwrap(), (5, ()));
|
||||
assert_eq!(skip_list.next().unwrap(), (7, ()));
|
||||
assert_eq!(skip_list.next().unwrap(), (9, ()));
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2);
|
||||
skip_list_builder.insert(2, &());
|
||||
skip_list_builder.insert(3, &());
|
||||
skip_list_builder.insert(5, &());
|
||||
skip_list_builder.insert(7, &());
|
||||
skip_list_builder.insert(9, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (2, ()));
|
||||
skip_list.seek(5);
|
||||
assert_eq!(skip_list.next().unwrap(), (5, ()));
|
||||
assert_eq!(skip_list.next().unwrap(), (7, ()));
|
||||
assert_eq!(skip_list.next().unwrap(), (9, ()));
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3);
|
||||
skip_list_builder.insert(2, &());
|
||||
skip_list_builder.insert(3, &());
|
||||
skip_list_builder.insert(5, &());
|
||||
skip_list_builder.insert(6, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (2, ()));
|
||||
skip_list.seek(6);
|
||||
assert_eq!(skip_list.next().unwrap(), (6, ()));
|
||||
assert_eq!(skip_list.next(), None);
|
||||
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2);
|
||||
skip_list_builder.insert(2, &());
|
||||
skip_list_builder.insert(3, &());
|
||||
skip_list_builder.insert(5, &());
|
||||
skip_list_builder.insert(7, &());
|
||||
skip_list_builder.insert(9, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (2, ()));
|
||||
skip_list.seek(10);
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3);
|
||||
for i in 0..1000 {
|
||||
skip_list_builder.insert(i, &());
|
||||
}
|
||||
skip_list_builder.insert(1004, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (0, ()));
|
||||
skip_list.seek(431);
|
||||
assert_eq!(skip_list.next().unwrap(), (431,()) );
|
||||
skip_list.seek(1003);
|
||||
assert_eq!(skip_list.next().unwrap(), (1004,()) );
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,14 +1,9 @@
|
||||
use time::PreciseTime;
|
||||
use std::io::BufWriter;
|
||||
use std::fs::File;
|
||||
use std::fmt;
|
||||
use std::cell::RefCell;
|
||||
use core::global::DocId;
|
||||
use core::schema::Document;
|
||||
use core::schema::Field;
|
||||
use core::schema::FieldValue;
|
||||
use core::schema::FieldOptions;
|
||||
use core::schema::Schema;
|
||||
use core::error;
|
||||
use core::serialize::BinarySerializable;
|
||||
use std::io::Write;
|
||||
@@ -18,7 +13,6 @@ use std::io::SeekFrom;
|
||||
use fst::raw::MmapReadOnly;
|
||||
use std::io::Seek;
|
||||
use lz4;
|
||||
use tempfile;
|
||||
|
||||
// TODO cache uncompressed pages
|
||||
|
||||
@@ -176,43 +170,56 @@ impl StoreReader {
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_store() {
|
||||
let offsets;
|
||||
let store_file = tempfile::NamedTempFile::new().unwrap();
|
||||
let mut schema = Schema::new();
|
||||
let field_body = schema.add_field("body", &FieldOptions::new().set_stored());
|
||||
let field_title = schema.add_field("title", &FieldOptions::new().set_stored());
|
||||
let lorem = String::from("Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.");
|
||||
{
|
||||
let mut store_writer = StoreWriter::new(store_file.reopen().unwrap());
|
||||
for i in 0..10000 {
|
||||
let mut fields: Vec<FieldValue> = Vec::new();
|
||||
{
|
||||
let field_value = FieldValue {
|
||||
field: field_body.clone(),
|
||||
text: lorem.clone(),
|
||||
};
|
||||
fields.push(field_value);
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use tempfile;
|
||||
use core::schema::Schema;
|
||||
use core::schema::FieldOptions;
|
||||
use core::schema::FieldValue;
|
||||
use fst::raw::MmapReadOnly;
|
||||
use core::store::StoreWriter;
|
||||
use core::store::StoreReader;
|
||||
|
||||
#[test]
|
||||
fn test_store() {
|
||||
let offsets;
|
||||
let store_file = tempfile::NamedTempFile::new().unwrap();
|
||||
let mut schema = Schema::new();
|
||||
let field_body = schema.add_field("body", &FieldOptions::new().set_stored());
|
||||
let field_title = schema.add_field("title", &FieldOptions::new().set_stored());
|
||||
let lorem = String::from("Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.");
|
||||
{
|
||||
let mut store_writer = StoreWriter::new(store_file.reopen().unwrap());
|
||||
for i in 0..10000 {
|
||||
let mut fields: Vec<FieldValue> = Vec::new();
|
||||
{
|
||||
let field_value = FieldValue {
|
||||
field: field_body.clone(),
|
||||
text: lorem.clone(),
|
||||
};
|
||||
fields.push(field_value);
|
||||
}
|
||||
{
|
||||
let title_text = format!("Doc {}", i);
|
||||
let field_value = FieldValue {
|
||||
field: field_title.clone(),
|
||||
text: title_text,
|
||||
};
|
||||
fields.push(field_value);
|
||||
}
|
||||
let fields_refs: Vec<&FieldValue> = fields.iter().collect();
|
||||
store_writer.store(&fields_refs);
|
||||
}
|
||||
{
|
||||
let title_text = format!("Doc {}", i);
|
||||
let field_value = FieldValue {
|
||||
field: field_title.clone(),
|
||||
text: title_text,
|
||||
};
|
||||
fields.push(field_value);
|
||||
}
|
||||
let fields_refs: Vec<&FieldValue> = fields.iter().collect();
|
||||
store_writer.store(&fields_refs);
|
||||
store_writer.close();
|
||||
offsets = store_writer.offsets.clone();
|
||||
}
|
||||
let store_mmap = MmapReadOnly::open(&store_file).unwrap();
|
||||
let store = StoreReader::new(store_mmap);
|
||||
assert_eq!(offsets, store.offsets);
|
||||
for i in 0..1000 {
|
||||
assert_eq!(*store.get(&i).get_one(&field_title).unwrap(), format!("Doc {}", i));
|
||||
}
|
||||
store_writer.close();
|
||||
offsets = store_writer.offsets.clone();
|
||||
}
|
||||
let store_mmap = MmapReadOnly::open(&store_file).unwrap();
|
||||
let store = StoreReader::new(store_mmap);
|
||||
assert_eq!(offsets, store.offsets);
|
||||
for i in 0..10000 {
|
||||
assert_eq!(*store.get(&i).get_one(&field_title).unwrap(), format!("Doc {}", i));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,24 +1,13 @@
|
||||
|
||||
use std::io;
|
||||
use std::slice;
|
||||
use core::global::*;
|
||||
use core::global::DocId;
|
||||
use core::schema::*;
|
||||
use core::codec::*;
|
||||
use std::rc::Rc;
|
||||
use core::directory::Directory;
|
||||
use core::analyzer::SimpleTokenizer;
|
||||
use std::collections::{HashMap, BTreeMap};
|
||||
use std::collections::{hash_map, btree_map};
|
||||
use std::io::{Write};
|
||||
use std::sync::Arc;
|
||||
use std::mem;
|
||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
use std::iter::Peekable;
|
||||
use std::collections::BTreeMap;
|
||||
use core::analyzer::StreamingIterator;
|
||||
use core::serial::*;
|
||||
use core::error::*;
|
||||
use std::cell::RefCell;
|
||||
use std::borrow::BorrowMut;
|
||||
use core::directory::Segment;
|
||||
|
||||
|
||||
@@ -75,7 +64,7 @@ impl IndexWriter {
|
||||
|
||||
pub fn commit(&mut self,) -> Result<Segment> {
|
||||
// TODO error handling
|
||||
let mut segment_writer_rc = self.segment_writer.clone();
|
||||
let segment_writer_rc = self.segment_writer.clone();
|
||||
self.segment_writer = Rc::new(new_segment_writer(&self.directory));
|
||||
let segment_writer_res = Rc::try_unwrap(segment_writer_rc);
|
||||
match segment_writer_res {
|
||||
@@ -146,7 +135,6 @@ impl SegmentWriter {
|
||||
}
|
||||
|
||||
pub fn add(&mut self, doc: Document, schema: &Schema) {
|
||||
let mut term_buffer = String::new();
|
||||
let doc_id = self.max_doc;
|
||||
for field_value in doc.fields() {
|
||||
let field_options = schema.get_field(&field_value.field);
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
#[allow(unused_imports)]
|
||||
|
||||
|
||||
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
|
||||
@@ -2,54 +2,14 @@ extern crate tantivy;
|
||||
extern crate regex;
|
||||
extern crate tempdir;
|
||||
|
||||
use tantivy::core::postings::VecPostings;
|
||||
use tantivy::core::postings::Postings;
|
||||
use tantivy::core::analyzer::SimpleTokenizer;
|
||||
use tantivy::core::collector::TestCollector;
|
||||
use tantivy::core::serial::*;
|
||||
use tantivy::core::schema::*;
|
||||
use tantivy::core::codec::SimpleCodec;
|
||||
use tantivy::core::global::*;
|
||||
use tantivy::core::postings::IntersectionPostings;
|
||||
use tantivy::core::writer::IndexWriter;
|
||||
use tantivy::core::searcher::Searcher;
|
||||
use tantivy::core::directory::{Directory, generate_segment_name, SegmentId};
|
||||
use std::ops::DerefMut;
|
||||
use tantivy::core::reader::SegmentReader;
|
||||
use std::io::{ BufWriter, Write};
|
||||
use regex::Regex;
|
||||
use std::convert::From;
|
||||
use std::path::PathBuf;
|
||||
use tantivy::core::query;
|
||||
use tantivy::core::query::parse_query;
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_parse_query() {
|
||||
{
|
||||
let (parsed_query, _) = parse_query("toto:titi toto:tutu").unwrap();
|
||||
assert_eq!(parsed_query, vec!(query::Term(String::from("toto"), String::from("titi")), query::Term(String::from("toto"), String::from("tutu"))));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intersection() {
|
||||
{
|
||||
let left = VecPostings::new(vec!(1, 3, 9));
|
||||
let right = VecPostings::new(vec!(3, 4, 9, 18));
|
||||
let inter = IntersectionPostings::from_postings(vec!(left, right));
|
||||
let vals: Vec<DocId> = inter.collect();
|
||||
assert_eq!(vals, vec!(3, 9));
|
||||
}
|
||||
{
|
||||
let a = VecPostings::new(vec!(1, 3, 9));
|
||||
let b = VecPostings::new(vec!(3, 4, 9, 18));
|
||||
let c = VecPostings::new(vec!(1, 5, 9, 111));
|
||||
let inter = IntersectionPostings::from_postings(vec!(a, b, c));
|
||||
let vals: Vec<DocId> = inter.collect();
|
||||
assert_eq!(vals, vec!(9));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
@@ -85,7 +45,7 @@ fn test_indexing() {
|
||||
let commit_result = index_writer.commit();
|
||||
assert!(commit_result.is_ok());
|
||||
let segment = commit_result.unwrap();
|
||||
let segment_reader = SegmentReader::open(segment).unwrap();
|
||||
SegmentReader::open(segment).unwrap();
|
||||
// TODO ENABLE TEST
|
||||
//let segment_str_after_reading = DebugSegmentSerializer::debug_string(&segment_reader);
|
||||
//assert_eq!(segment_str_before_writing, segment_str_after_reading);
|
||||
@@ -134,8 +94,6 @@ fn test_searcher() {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_new_segment() {
|
||||
let SegmentId(segment_name) = generate_segment_name();
|
||||
|
||||
138
tests/skip.rs
138
tests/skip.rs
@@ -1,138 +0,0 @@
|
||||
extern crate tantivy;
|
||||
extern crate byteorder;
|
||||
use std::io::{Write, Seek};
|
||||
use std::io::SeekFrom;
|
||||
use tantivy::core::skip::*;
|
||||
use std::io::Cursor;
|
||||
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||
|
||||
#[test]
|
||||
fn test_skip_list_builder() {
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(10);
|
||||
skip_list_builder.insert(2, &3);
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
assert_eq!(output.len(), 16);
|
||||
assert_eq!(output[0], 0);
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(3);
|
||||
for i in (0..9) {
|
||||
skip_list_builder.insert(i, &i);
|
||||
}
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
assert_eq!(output.len(), 120);
|
||||
assert_eq!(output[0], 0);
|
||||
}
|
||||
{
|
||||
// checking that void gets serialized to nothing.
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3);
|
||||
for i in (0..9) {
|
||||
skip_list_builder.insert(i, &());
|
||||
}
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
assert_eq!(output.len(), 84);
|
||||
assert_eq!(output[0], 0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_skip_list_reader() {
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(10);
|
||||
skip_list_builder.insert(2, &3);
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<u32> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next(), Some((2, 3)));
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(10);
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<u32> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2);
|
||||
skip_list_builder.insert(2, &());
|
||||
skip_list_builder.insert(3, &());
|
||||
skip_list_builder.insert(5, &());
|
||||
skip_list_builder.insert(7, &());
|
||||
skip_list_builder.insert(9, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (2, ()));
|
||||
assert_eq!(skip_list.next().unwrap(), (3, ()));
|
||||
assert_eq!(skip_list.next().unwrap(), (5, ()));
|
||||
assert_eq!(skip_list.next().unwrap(), (7, ()));
|
||||
assert_eq!(skip_list.next().unwrap(), (9, ()));
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2);
|
||||
skip_list_builder.insert(2, &());
|
||||
skip_list_builder.insert(3, &());
|
||||
skip_list_builder.insert(5, &());
|
||||
skip_list_builder.insert(7, &());
|
||||
skip_list_builder.insert(9, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (2, ()));
|
||||
skip_list.seek(5);
|
||||
assert_eq!(skip_list.next().unwrap(), (5, ()));
|
||||
assert_eq!(skip_list.next().unwrap(), (7, ()));
|
||||
assert_eq!(skip_list.next().unwrap(), (9, ()));
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3);
|
||||
skip_list_builder.insert(2, &());
|
||||
skip_list_builder.insert(3, &());
|
||||
skip_list_builder.insert(5, &());
|
||||
skip_list_builder.insert(6, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (2, ()));
|
||||
skip_list.seek(6);
|
||||
assert_eq!(skip_list.next().unwrap(), (6, ()));
|
||||
assert_eq!(skip_list.next(), None);
|
||||
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2);
|
||||
skip_list_builder.insert(2, &());
|
||||
skip_list_builder.insert(3, &());
|
||||
skip_list_builder.insert(5, &());
|
||||
skip_list_builder.insert(7, &());
|
||||
skip_list_builder.insert(9, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (2, ()));
|
||||
skip_list.seek(10);
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
{
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3);
|
||||
for i in (0..1000) {
|
||||
skip_list_builder.insert(i, &());
|
||||
}
|
||||
skip_list_builder.insert(1004, &());
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output);
|
||||
let mut skip_list: SkipList<()> = SkipList::read(&mut output);
|
||||
assert_eq!(skip_list.next().unwrap(), (0, ()));
|
||||
skip_list.seek(431);
|
||||
assert_eq!(skip_list.next().unwrap(), (431,()) );
|
||||
skip_list.seek(1003);
|
||||
assert_eq!(skip_list.next().unwrap(), (1004,()) );
|
||||
assert_eq!(skip_list.next(), None);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user