Terms own their own value

This commit is contained in:
Paul Masurel
2016-01-19 18:01:30 +09:00
parent f3d5fc257e
commit 994a68741e
6 changed files with 69 additions and 46 deletions

View File

@@ -62,7 +62,7 @@ impl Codec for SimpleCodec {
// TODO include cause somehow
return Err(Error::WriteError(String::from("Failed creating the term builder")));
}
let mut term_buffer: String = String::new();
let mut term_buffer: Vec<u8> = Vec::new();
let mut term_trie_builder = term_trie_builder_result.unwrap();
let mut term_cursor = index.term_cursor();
let mut offset: usize = 0;

View File

@@ -2,7 +2,7 @@ use std::io::{BufWriter, Write};
use std::io;
pub type DocId = usize;
pub type FieldId = u32;
pub type FieldId = u8;
#[derive(Clone,Debug,PartialEq,PartialOrd,Eq,Hash)]
pub struct Field(pub FieldId);

View File

@@ -1,40 +1,46 @@
use core::directory::Directory;
use core::directory::Segment;
use core::schema::Term;
// use fst::stream::Streamer;
use fst;
pub struct SegmentIndexReader {
directory: Directory,
segment: Segment,
term_offsets: fst::Map,
}
// pub trait SearchableSegment {
//
// pub struct SegmentDocCursor<'a> {
// data: &'a [u8],
// }
//
// struct SegmentTermCur<'a> {
// segment: &'a Segment,
// fst_streamer: fst::map::Stream<'a>,
// term: Term<'a>,
//
// }
//
// pub struct SimpleSearchableSegment {
// segment: Segment,
// }
// impl<'a> SegmentTermCur<'a> {
//
// impl SimpleSearchableSegment {
//
// pub fn new(segment: &Segment) -> SimpleSearchableSegment {
// SimpleSearchableSegment {
// segment: segment.clone()
// fn next(&mut self,) -> Option<(Term<'a>, SegmentDocCursor<'a>)> {
// match self.fst_streamer.next() {
// Some(_) => None,
// None => None
// }
// }
// }
//
// impl SearchableSegment for SimpleSearchableSegment {
//
//
// }
//
// impl SegmentIndexReader {
//
// pub fn open(directory: &Directory) -> IndexReader {
// IndexReader {
// directory: (*directory).clone(),
// fn term_cursor<'a>(&'a self) -> SegmentTermCur<'a> {
// let term: Term<'a> {
// self.
// };
// SegmentTermCur {
// segment: &self.segment,
// fst_streamer: self.term_offsets.stream(),
// term:
// }
// }
//

View File

@@ -1,5 +1,6 @@
use core::global::*;
use std::fmt::Write;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
#[derive(Clone,Debug,PartialEq,PartialOrd,Eq)]
pub struct FieldValue {
@@ -8,18 +9,31 @@ pub struct FieldValue {
}
#[derive(Clone,Debug,PartialEq,PartialOrd,Eq,Hash)]
pub struct Term<'a> {
pub field: Field,
pub text: &'a str,
#[derive(Clone,PartialEq,PartialOrd,Eq,Hash)]
pub struct Term {
pub data: Vec<u8>, // avoid copies
// pub field: Field,
// pub text: &'a [u8],
}
impl<'a> Term<'a> {
pub fn write_into(&self, term_str: &mut String) {
term_str.clear();
let Field(field_idx) = self.field;
// TODO avoid writing the field idx.
term_str.write_fmt(format_args!("{}:{}", field_idx, self.text));
impl Term {
// TODO avoid all these copies.
pub fn from_field_text(field: Field, text: &str) -> Term {
let mut buffer = Vec::with_capacity(1 + text.len());
let Field(field_idx) = field;
buffer.clear();
buffer.push(field_idx);
buffer.extend(text.as_bytes());
Term {
data: buffer,
}
}
pub fn write_into(&self, buf: &mut Vec<u8>) {
buf.clear();
buf.extend(&self.data);
}
}

View File

@@ -3,7 +3,7 @@ use core::schema::*;
// Trait sufficient to serialize a segment.
pub trait SerializableSegment<'a> {
type TermCur: TermCursor<'a>; // TODO rename TermCursorImpl
type TermCur: TermCursor; // TODO rename TermCursorImpl
fn term_cursor(&'a self) -> Self::TermCur;
}
@@ -14,7 +14,8 @@ pub trait DocCursor: Iterator<Item=DocId> {
// TODO make iteration over Fields somehow sorted
pub trait TermCursor<'a> {
pub trait TermCursor {
type DocCur: DocCursor;
fn next(&mut self,) -> Option<(Term<'a>, Self::DocCur)>;
// fn next(&mut self,) -> Option<(Term<'a>, Self::DocCur)>;
fn next(&mut self,) -> Option<(Term, Self::DocCur)>;
}

View File

@@ -15,6 +15,8 @@ use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
use std::iter::Peekable;
use core::serial::*;
use core::error::*;
use std::cell::RefCell;
use std::borrow::BorrowMut;
pub struct SimplePostingsWriter {
doc_ids: Vec<DocId>,
@@ -166,11 +168,12 @@ impl<'a> CIWTermCursor<'a> {
}
}
fn get_term(&self) -> Term<'a> {
Term {
field: self.field.clone(),
text: self.current_form_postings.as_ref().unwrap().form,
}
fn get_term(&self) -> Term {
Term::from_field_text(self.field.clone(), self.current_form_postings.as_ref().unwrap().form)
// Term {
// field: self.field.clone(),
// text: self.current_form_postings.as_ref().unwrap().form,
// }
}
fn doc_cursor(&self,) -> CIWDocCursor<'a> {
@@ -218,11 +221,11 @@ impl<'a> CIWTermCursor<'a> {
}
impl<'a> TermCursor<'a> for CIWTermCursor<'a> {
impl<'a> TermCursor for CIWTermCursor<'a> {
type DocCur = CIWDocCursor<'a>;
fn next(&mut self,) -> Option<(Term<'a>, CIWDocCursor<'a>)> {
fn next(&mut self,) -> Option<(Term, CIWDocCursor<'a>)> {
if self.advance() {
Some((self.get_term(), self.doc_cursor()))
}
@@ -242,7 +245,7 @@ impl<'a> SerializableSegment<'a> for IndexWriter {
fn term_cursor(&'a self) -> CIWTermCursor<'a> {
let mut field_it: hash_map::Iter<'a, Field, FieldWriter> = self.term_writers.iter();
let (field, field_writer) = field_it.next().unwrap(); // TODO handle no field
let term_cursor = CIWTermCursor {
CIWTermCursor {
field_it: field_it,
form_it: CIWFormCursor {
term_it: field_writer.term_index.iter(),
@@ -250,9 +253,8 @@ impl<'a> SerializableSegment<'a> for IndexWriter {
},
field: field,
current_form_postings: None,
};
}
// TODO handle having no fields at all
term_cursor
}
}