diff --git a/src/core/codec.rs b/src/core/codec.rs index 68c7fa674..3da1c90e8 100644 --- a/src/core/codec.rs +++ b/src/core/codec.rs @@ -62,7 +62,7 @@ impl Codec for SimpleCodec { // TODO include cause somehow return Err(Error::WriteError(String::from("Failed creating the term builder"))); } - let mut term_buffer: String = String::new(); + let mut term_buffer: Vec = Vec::new(); let mut term_trie_builder = term_trie_builder_result.unwrap(); let mut term_cursor = index.term_cursor(); let mut offset: usize = 0; diff --git a/src/core/global.rs b/src/core/global.rs index 299230b03..84bdc0f57 100644 --- a/src/core/global.rs +++ b/src/core/global.rs @@ -2,7 +2,7 @@ use std::io::{BufWriter, Write}; use std::io; pub type DocId = usize; -pub type FieldId = u32; +pub type FieldId = u8; #[derive(Clone,Debug,PartialEq,PartialOrd,Eq,Hash)] pub struct Field(pub FieldId); diff --git a/src/core/reader.rs b/src/core/reader.rs index 6265967cd..a2280a739 100644 --- a/src/core/reader.rs +++ b/src/core/reader.rs @@ -1,40 +1,46 @@ use core::directory::Directory; use core::directory::Segment; +use core::schema::Term; +// use fst::stream::Streamer; +use fst; pub struct SegmentIndexReader { - directory: Directory, + segment: Segment, + term_offsets: fst::Map, } - - -// pub trait SearchableSegment { +// +// pub struct SegmentDocCursor<'a> { +// data: &'a [u8], +// } +// +// struct SegmentTermCur<'a> { +// segment: &'a Segment, +// fst_streamer: fst::map::Stream<'a>, +// term: Term<'a>, // // } // -// pub struct SimpleSearchableSegment { -// segment: Segment, -// } +// impl<'a> SegmentTermCur<'a> { // -// impl SimpleSearchableSegment { -// -// pub fn new(segment: &Segment) -> SimpleSearchableSegment { -// SimpleSearchableSegment { -// segment: segment.clone() +// fn next(&mut self,) -> Option<(Term<'a>, SegmentDocCursor<'a>)> { +// match self.fst_streamer.next() { +// Some(_) => None, +// None => None // } // } // } // -// impl SearchableSegment for SimpleSearchableSegment { -// -// -// } - - // // impl SegmentIndexReader { // -// pub fn open(directory: &Directory) -> IndexReader { -// IndexReader { -// directory: (*directory).clone(), +// fn term_cursor<'a>(&'a self) -> SegmentTermCur<'a> { +// let term: Term<'a> { +// self. +// }; +// SegmentTermCur { +// segment: &self.segment, +// fst_streamer: self.term_offsets.stream(), +// term: // } // } // diff --git a/src/core/schema.rs b/src/core/schema.rs index 668b974b0..dfb8673a5 100644 --- a/src/core/schema.rs +++ b/src/core/schema.rs @@ -1,5 +1,6 @@ use core::global::*; use std::fmt::Write; +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; #[derive(Clone,Debug,PartialEq,PartialOrd,Eq)] pub struct FieldValue { @@ -8,18 +9,31 @@ pub struct FieldValue { } -#[derive(Clone,Debug,PartialEq,PartialOrd,Eq,Hash)] -pub struct Term<'a> { - pub field: Field, - pub text: &'a str, +#[derive(Clone,PartialEq,PartialOrd,Eq,Hash)] +pub struct Term { + pub data: Vec, // avoid copies + // pub field: Field, + // pub text: &'a [u8], } -impl<'a> Term<'a> { - pub fn write_into(&self, term_str: &mut String) { - term_str.clear(); - let Field(field_idx) = self.field; - // TODO avoid writing the field idx. - term_str.write_fmt(format_args!("{}:{}", field_idx, self.text)); +impl Term { + + // TODO avoid all these copies. + + pub fn from_field_text(field: Field, text: &str) -> Term { + let mut buffer = Vec::with_capacity(1 + text.len()); + let Field(field_idx) = field; + buffer.clear(); + buffer.push(field_idx); + buffer.extend(text.as_bytes()); + Term { + data: buffer, + } + } + + pub fn write_into(&self, buf: &mut Vec) { + buf.clear(); + buf.extend(&self.data); } } diff --git a/src/core/serial.rs b/src/core/serial.rs index 7163452e0..f40387768 100644 --- a/src/core/serial.rs +++ b/src/core/serial.rs @@ -3,7 +3,7 @@ use core::schema::*; // Trait sufficient to serialize a segment. pub trait SerializableSegment<'a> { - type TermCur: TermCursor<'a>; // TODO rename TermCursorImpl + type TermCur: TermCursor; // TODO rename TermCursorImpl fn term_cursor(&'a self) -> Self::TermCur; } @@ -14,7 +14,8 @@ pub trait DocCursor: Iterator { // TODO make iteration over Fields somehow sorted -pub trait TermCursor<'a> { +pub trait TermCursor { type DocCur: DocCursor; - fn next(&mut self,) -> Option<(Term<'a>, Self::DocCur)>; + // fn next(&mut self,) -> Option<(Term<'a>, Self::DocCur)>; + fn next(&mut self,) -> Option<(Term, Self::DocCur)>; } diff --git a/src/core/writer.rs b/src/core/writer.rs index 0f88a56d0..40a8282ff 100644 --- a/src/core/writer.rs +++ b/src/core/writer.rs @@ -15,6 +15,8 @@ use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; use std::iter::Peekable; use core::serial::*; use core::error::*; +use std::cell::RefCell; +use std::borrow::BorrowMut; pub struct SimplePostingsWriter { doc_ids: Vec, @@ -166,11 +168,12 @@ impl<'a> CIWTermCursor<'a> { } } - fn get_term(&self) -> Term<'a> { - Term { - field: self.field.clone(), - text: self.current_form_postings.as_ref().unwrap().form, - } + fn get_term(&self) -> Term { + Term::from_field_text(self.field.clone(), self.current_form_postings.as_ref().unwrap().form) + // Term { + // field: self.field.clone(), + // text: self.current_form_postings.as_ref().unwrap().form, + // } } fn doc_cursor(&self,) -> CIWDocCursor<'a> { @@ -218,11 +221,11 @@ impl<'a> CIWTermCursor<'a> { } -impl<'a> TermCursor<'a> for CIWTermCursor<'a> { +impl<'a> TermCursor for CIWTermCursor<'a> { type DocCur = CIWDocCursor<'a>; - fn next(&mut self,) -> Option<(Term<'a>, CIWDocCursor<'a>)> { + fn next(&mut self,) -> Option<(Term, CIWDocCursor<'a>)> { if self.advance() { Some((self.get_term(), self.doc_cursor())) } @@ -242,7 +245,7 @@ impl<'a> SerializableSegment<'a> for IndexWriter { fn term_cursor(&'a self) -> CIWTermCursor<'a> { let mut field_it: hash_map::Iter<'a, Field, FieldWriter> = self.term_writers.iter(); let (field, field_writer) = field_it.next().unwrap(); // TODO handle no field - let term_cursor = CIWTermCursor { + CIWTermCursor { field_it: field_it, form_it: CIWFormCursor { term_it: field_writer.term_index.iter(), @@ -250,9 +253,8 @@ impl<'a> SerializableSegment<'a> for IndexWriter { }, field: field, current_form_postings: None, - }; + } // TODO handle having no fields at all - term_cursor } }