Switched for unboxed subtype for index reader

This commit is contained in:
Paul Masurel
2016-01-14 14:48:29 +09:00
parent c5775330b9
commit de48ebe740
3 changed files with 52 additions and 56 deletions

View File

@@ -3,39 +3,26 @@ use core::global::DocId;
use core::schema::Field;
//
// pub trait DocCursor {
// fn get(&self) -> DocId;
// fn next(&self) -> bool;
// }
// pub trait TermCursor {
// // fn doc_cursor<'a>(&'a self) -> Box<'a, DocEnum>;
// fn get(&self) -> &str;
// fn next(&self) -> bool;
// }
// term is not empty
// field
pub trait DocCursor<'a>: Iterator<Item=DocId> {
pub trait DocCursor: Iterator<Item=DocId> {
fn doc(&self) -> DocId;
}
pub trait TermCursor<'a>: Iterator<Item=&'a String> {
type TDocCur: DocCursor;
fn get_term(&self) -> &'a String;
fn doc_cursor<'b>(&'b self) -> Box<DocCursor<Item=DocId> + 'b>;
fn doc_cursor(&self) -> Self::TDocCur;
}
pub trait FieldCursor<'a>: Iterator<Item=&'a Field> {
type TTermCur: TermCursor<'a>;
fn get_field(&self) -> Option<&'a Field>;
fn term_cursor<'b>(&'b self) -> Box<TermCursor<Item=&'b String> + 'b>;
fn term_cursor(&'a self) -> Self::TTermCur;
}
pub trait IndexFlushable {
fn field_cursor<'a>(&'a self) -> Box<FieldCursor<Item=&'a Field> + 'a>;
pub trait IndexFlushable<'a> {
type TFieldCur: FieldCursor<'a>;
fn field_cursor(&'a self) -> Self::TFieldCur;
}
pub struct SegmentIndexReader {

View File

@@ -15,6 +15,8 @@ use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
use core::reader::IndexFlushable;
use std::iter::Peekable;
use core::reader::{FieldCursor, TermCursor, DocCursor};
//use core::reader::FieldCursor;
// use core::reader::TermCursor;
pub struct SimplePostingsWriter {
doc_ids: Vec<DocId>,
@@ -148,7 +150,7 @@ pub struct ClosedIndexWriter {
// fn next(&self) -> bool;
// }
struct CIWFieldCursor<'a> {
pub struct CIWFieldCursor<'a> {
field_it: hash_map::Iter<'a, Field, FieldWriter>,
current: Option<(&'a Field, &'a FieldWriter)>
}
@@ -169,39 +171,43 @@ impl<'a> Iterator for CIWFieldCursor<'a> {
}
impl<'a> FieldCursor<'a> for CIWFieldCursor<'a> {
type TTermCur = CIWTermCursor<'a>;
fn get_field(&self) -> Option<&'a Field> {
self.current.map(|(first, _)| first)
}
fn term_cursor<'b>(&'b self) -> Box<TermCursor<Item=&'b String> + 'b> {
fn term_cursor<'b>(&'b self) -> CIWTermCursor<'b> {
let field_writer = self.get_field_writer();
Box::new(CIWTermCursor {
CIWTermCursor {
postings: &field_writer.postings,
term_it: field_writer.term_index.iter(),
current: None
})
}
}
}
// TODO use a Term type
impl IndexFlushable for ClosedIndexWriter {
fn field_cursor<'a>(&'a self) -> Box<FieldCursor<Item=&'a Field> + 'a> {
impl<'a> IndexFlushable<'a> for ClosedIndexWriter {
type TFieldCur = CIWFieldCursor<'a>;
fn field_cursor(&'a self) -> CIWFieldCursor<'a> {
let mut field_it: hash_map::Iter<'a, Field, FieldWriter> = self.index_writer.term_writers.iter();
let current: Option<(&'a Field, &'a FieldWriter)> = None;
Box::new(
CIWFieldCursor {
CIWFieldCursor {
current: current,
field_it: field_it
}
)
}
}
}
//////////////////////////////////
// CIWTermCursor
//
struct CIWTermCursor<'a> {
pub struct CIWTermCursor<'a> {
postings: &'a Vec<SimplePostingsWriter>,
term_it: btree_map::Iter<'a, String, usize>,
current: Option<(&'a String, &'a usize)>
@@ -224,17 +230,17 @@ impl<'a> Iterator for CIWTermCursor<'a> {
}
impl<'a> TermCursor<'a> for CIWTermCursor<'a> {
fn doc_cursor<'b>(&'b self) -> Box<DocCursor<Item=DocId> + 'b> {
type TDocCur = CIWDocCursor<'a>;
fn doc_cursor(&self) -> CIWDocCursor<'a> {
let (_, &postings_id) = self.current.unwrap();
unsafe {
let postings_writer = self.postings.get_unchecked(postings_id);
let docs_it = postings_writer.doc_ids.iter();
return Box::new(
CIWDocCursor {
docs_it: Box::new(docs_it),
current: None,
}
)
CIWDocCursor {
docs_it: Box::new(docs_it),
current: None,
}
}
}
@@ -249,8 +255,8 @@ impl<'a> TermCursor<'a> for CIWTermCursor<'a> {
//
// TODO add positions
struct CIWDocCursor<'a> {
//
pub struct CIWDocCursor<'a> {
docs_it: Box<Iterator<Item=&'a DocId> + 'a>,
current: Option<DocId>,
}
@@ -264,7 +270,7 @@ impl<'a> Iterator for CIWDocCursor<'a> {
}
}
impl<'a> DocCursor<'a> for CIWDocCursor<'a> {
impl<'a> DocCursor for CIWDocCursor<'a> {
fn doc(&self,) -> DocId {
self.current.unwrap()
}

View File

@@ -8,15 +8,15 @@ use tantivy::core::postings::{VecPostings, intersection};
use tantivy::core::postings::Postings;
use tantivy::core::analyzer::tokenize;
use tantivy::core::reader::IndexFlushable;
use tantivy::core::reader::DocCursor;
use tantivy::core::writer::{IndexWriter, ClosedIndexWriter};
use tantivy::core::directory::{Directory, generate_segment_name, SegmentId};
use tantivy::core::schema::{Field, Document};
// use tantivy::core::reader::IndexReader;
use std::ops::DerefMut;
use tantivy::core::writer::SimplePostingsWriter;
use tantivy::core::postings::PostingsWriter;
use tantivy::core::global::Flushable;
use tantivy::core::reader::DocCursor;
use tantivy::core::reader::FieldCursor;
use tantivy::core::reader::TermCursor;
use std::io::{ BufWriter, Write};
use regex::Regex;
@@ -42,17 +42,23 @@ fn test_indexing() {
let directory = Directory::in_mem();
{
let mut index_writer = IndexWriter::open(&directory);
let mut doc = Document::new();
doc.set(Field("text"), "toto");
index_writer.add(doc);
{
let mut doc = Document::new();
doc.set(Field("text"), "toto titi");
index_writer.add(doc);
}
{
let mut doc = Document::new();
doc.set(Field("text"), "titi tata");
index_writer.add(doc);
}
let closed_index_writer: ClosedIndexWriter = index_writer.close();
let mut field_cursor = closed_index_writer.field_cursor();
println!("--{}---", 3);
loop {
match field_cursor.next() {
Some(field) => {
println!(" {:?}", field);
show_term_cursor(&mut *field_cursor.term_cursor());
show_term_cursor(field_cursor.term_cursor());
},
None => { break; },
}
@@ -67,15 +73,12 @@ fn test_indexing() {
}
fn show_term_cursor<'a>(term_cursor: &mut TermCursor<Item=&'a String>) {
fn show_term_cursor<'a, T: TermCursor<'a>>(mut term_cursor: T) {
loop {
match term_cursor.next() {
Some(term) => {
println!(" {:?}", term);
// let () = term_cursor.doc_cursor();
// let () = term_cursor.doc_cursor();
// let a = term_cursor.doc_cursor();
// show_doc_cursor(&mut *a);
println!(" term: {:?}", term);
show_doc_cursor(term_cursor.doc_cursor());
},
None => {
break;
@@ -84,7 +87,7 @@ fn show_term_cursor<'a>(term_cursor: &mut TermCursor<Item=&'a String>) {
}
}
fn show_doc_cursor<'a>(doc_cursor: &'a mut DocCursor<Item=DocId>) {
fn show_doc_cursor<'a, D: DocCursor>(mut doc_cursor: D) {
loop {
match doc_cursor.next() {
Some(doc) => {