From de48ebe7400673a3c1091b84128efede657dc783 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Thu, 14 Jan 2016 14:48:29 +0900 Subject: [PATCH] Switched for unboxed subtype for index reader --- src/core/reader.rs | 29 ++++++++-------------------- src/core/writer.rs | 48 ++++++++++++++++++++++++++-------------------- tests/core.rs | 31 ++++++++++++++++-------------- 3 files changed, 52 insertions(+), 56 deletions(-) diff --git a/src/core/reader.rs b/src/core/reader.rs index 55f375e9f..1605f06d5 100644 --- a/src/core/reader.rs +++ b/src/core/reader.rs @@ -3,39 +3,26 @@ use core::global::DocId; use core::schema::Field; -// -// pub trait DocCursor { -// fn get(&self) -> DocId; -// fn next(&self) -> bool; -// } -// pub trait TermCursor { -// // fn doc_cursor<'a>(&'a self) -> Box<'a, DocEnum>; -// fn get(&self) -> &str; -// fn next(&self) -> bool; -// } - - -// term is not empty -// field - - -pub trait DocCursor<'a>: Iterator { +pub trait DocCursor: Iterator { fn doc(&self) -> DocId; } pub trait TermCursor<'a>: Iterator { + type TDocCur: DocCursor; fn get_term(&self) -> &'a String; - fn doc_cursor<'b>(&'b self) -> Box + 'b>; + fn doc_cursor(&self) -> Self::TDocCur; } pub trait FieldCursor<'a>: Iterator { + type TTermCur: TermCursor<'a>; fn get_field(&self) -> Option<&'a Field>; - fn term_cursor<'b>(&'b self) -> Box + 'b>; + fn term_cursor(&'a self) -> Self::TTermCur; } -pub trait IndexFlushable { - fn field_cursor<'a>(&'a self) -> Box + 'a>; +pub trait IndexFlushable<'a> { + type TFieldCur: FieldCursor<'a>; + fn field_cursor(&'a self) -> Self::TFieldCur; } pub struct SegmentIndexReader { diff --git a/src/core/writer.rs b/src/core/writer.rs index 29e9c45de..18f22df49 100644 --- a/src/core/writer.rs +++ b/src/core/writer.rs @@ -15,6 +15,8 @@ use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; use core::reader::IndexFlushable; use std::iter::Peekable; use core::reader::{FieldCursor, TermCursor, DocCursor}; +//use core::reader::FieldCursor; +// use core::reader::TermCursor; pub struct SimplePostingsWriter { doc_ids: Vec, @@ -148,7 +150,7 @@ pub struct ClosedIndexWriter { // fn next(&self) -> bool; // } -struct CIWFieldCursor<'a> { +pub struct CIWFieldCursor<'a> { field_it: hash_map::Iter<'a, Field, FieldWriter>, current: Option<(&'a Field, &'a FieldWriter)> } @@ -169,39 +171,43 @@ impl<'a> Iterator for CIWFieldCursor<'a> { } impl<'a> FieldCursor<'a> for CIWFieldCursor<'a> { + + type TTermCur = CIWTermCursor<'a>; + fn get_field(&self) -> Option<&'a Field> { self.current.map(|(first, _)| first) } - fn term_cursor<'b>(&'b self) -> Box + 'b> { + fn term_cursor<'b>(&'b self) -> CIWTermCursor<'b> { let field_writer = self.get_field_writer(); - Box::new(CIWTermCursor { + CIWTermCursor { postings: &field_writer.postings, term_it: field_writer.term_index.iter(), current: None - }) + } } } // TODO use a Term type -impl IndexFlushable for ClosedIndexWriter { - fn field_cursor<'a>(&'a self) -> Box + 'a> { +impl<'a> IndexFlushable<'a> for ClosedIndexWriter { + + type TFieldCur = CIWFieldCursor<'a>; + + fn field_cursor(&'a self) -> CIWFieldCursor<'a> { let mut field_it: hash_map::Iter<'a, Field, FieldWriter> = self.index_writer.term_writers.iter(); let current: Option<(&'a Field, &'a FieldWriter)> = None; - Box::new( - CIWFieldCursor { + CIWFieldCursor { current: current, field_it: field_it - } - ) + } } } ////////////////////////////////// // CIWTermCursor // -struct CIWTermCursor<'a> { +pub struct CIWTermCursor<'a> { postings: &'a Vec, term_it: btree_map::Iter<'a, String, usize>, current: Option<(&'a String, &'a usize)> @@ -224,17 +230,17 @@ impl<'a> Iterator for CIWTermCursor<'a> { } impl<'a> TermCursor<'a> for CIWTermCursor<'a> { - fn doc_cursor<'b>(&'b self) -> Box + 'b> { + type TDocCur = CIWDocCursor<'a>; + + fn doc_cursor(&self) -> CIWDocCursor<'a> { let (_, &postings_id) = self.current.unwrap(); unsafe { let postings_writer = self.postings.get_unchecked(postings_id); let docs_it = postings_writer.doc_ids.iter(); - return Box::new( - CIWDocCursor { - docs_it: Box::new(docs_it), - current: None, - } - ) + CIWDocCursor { + docs_it: Box::new(docs_it), + current: None, + } } } @@ -249,8 +255,8 @@ impl<'a> TermCursor<'a> for CIWTermCursor<'a> { // // TODO add positions - -struct CIWDocCursor<'a> { +// +pub struct CIWDocCursor<'a> { docs_it: Box + 'a>, current: Option, } @@ -264,7 +270,7 @@ impl<'a> Iterator for CIWDocCursor<'a> { } } -impl<'a> DocCursor<'a> for CIWDocCursor<'a> { +impl<'a> DocCursor for CIWDocCursor<'a> { fn doc(&self,) -> DocId { self.current.unwrap() } diff --git a/tests/core.rs b/tests/core.rs index 2e4156e1b..38e2a4c38 100644 --- a/tests/core.rs +++ b/tests/core.rs @@ -8,15 +8,15 @@ use tantivy::core::postings::{VecPostings, intersection}; use tantivy::core::postings::Postings; use tantivy::core::analyzer::tokenize; use tantivy::core::reader::IndexFlushable; -use tantivy::core::reader::DocCursor; use tantivy::core::writer::{IndexWriter, ClosedIndexWriter}; use tantivy::core::directory::{Directory, generate_segment_name, SegmentId}; use tantivy::core::schema::{Field, Document}; -// use tantivy::core::reader::IndexReader; use std::ops::DerefMut; use tantivy::core::writer::SimplePostingsWriter; use tantivy::core::postings::PostingsWriter; use tantivy::core::global::Flushable; +use tantivy::core::reader::DocCursor; +use tantivy::core::reader::FieldCursor; use tantivy::core::reader::TermCursor; use std::io::{ BufWriter, Write}; use regex::Regex; @@ -42,17 +42,23 @@ fn test_indexing() { let directory = Directory::in_mem(); { let mut index_writer = IndexWriter::open(&directory); - let mut doc = Document::new(); - doc.set(Field("text"), "toto"); - index_writer.add(doc); + { + let mut doc = Document::new(); + doc.set(Field("text"), "toto titi"); + index_writer.add(doc); + } + { + let mut doc = Document::new(); + doc.set(Field("text"), "titi tata"); + index_writer.add(doc); + } let closed_index_writer: ClosedIndexWriter = index_writer.close(); let mut field_cursor = closed_index_writer.field_cursor(); - println!("--{}---", 3); loop { match field_cursor.next() { Some(field) => { println!(" {:?}", field); - show_term_cursor(&mut *field_cursor.term_cursor()); + show_term_cursor(field_cursor.term_cursor()); }, None => { break; }, } @@ -67,15 +73,12 @@ fn test_indexing() { } -fn show_term_cursor<'a>(term_cursor: &mut TermCursor) { +fn show_term_cursor<'a, T: TermCursor<'a>>(mut term_cursor: T) { loop { match term_cursor.next() { Some(term) => { - println!(" {:?}", term); - // let () = term_cursor.doc_cursor(); - // let () = term_cursor.doc_cursor(); - // let a = term_cursor.doc_cursor(); - // show_doc_cursor(&mut *a); + println!(" term: {:?}", term); + show_doc_cursor(term_cursor.doc_cursor()); }, None => { break; @@ -84,7 +87,7 @@ fn show_term_cursor<'a>(term_cursor: &mut TermCursor) { } } -fn show_doc_cursor<'a>(doc_cursor: &'a mut DocCursor) { +fn show_doc_cursor<'a, D: DocCursor>(mut doc_cursor: D) { loop { match doc_cursor.next() { Some(doc) => {