mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 17:42:55 +00:00
Terms own their own value
This commit is contained in:
@@ -62,7 +62,7 @@ impl Codec for SimpleCodec {
|
||||
// TODO include cause somehow
|
||||
return Err(Error::WriteError(String::from("Failed creating the term builder")));
|
||||
}
|
||||
let mut term_buffer: String = String::new();
|
||||
let mut term_buffer: Vec<u8> = Vec::new();
|
||||
let mut term_trie_builder = term_trie_builder_result.unwrap();
|
||||
let mut term_cursor = index.term_cursor();
|
||||
let mut offset: usize = 0;
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::io::{BufWriter, Write};
|
||||
use std::io;
|
||||
|
||||
pub type DocId = usize;
|
||||
pub type FieldId = u32;
|
||||
pub type FieldId = u8;
|
||||
|
||||
#[derive(Clone,Debug,PartialEq,PartialOrd,Eq,Hash)]
|
||||
pub struct Field(pub FieldId);
|
||||
|
||||
@@ -1,40 +1,46 @@
|
||||
use core::directory::Directory;
|
||||
use core::directory::Segment;
|
||||
use core::schema::Term;
|
||||
// use fst::stream::Streamer;
|
||||
use fst;
|
||||
|
||||
pub struct SegmentIndexReader {
|
||||
directory: Directory,
|
||||
segment: Segment,
|
||||
term_offsets: fst::Map,
|
||||
}
|
||||
|
||||
|
||||
// pub trait SearchableSegment {
|
||||
//
|
||||
// pub struct SegmentDocCursor<'a> {
|
||||
// data: &'a [u8],
|
||||
// }
|
||||
//
|
||||
// struct SegmentTermCur<'a> {
|
||||
// segment: &'a Segment,
|
||||
// fst_streamer: fst::map::Stream<'a>,
|
||||
// term: Term<'a>,
|
||||
//
|
||||
// }
|
||||
//
|
||||
// pub struct SimpleSearchableSegment {
|
||||
// segment: Segment,
|
||||
// }
|
||||
// impl<'a> SegmentTermCur<'a> {
|
||||
//
|
||||
// impl SimpleSearchableSegment {
|
||||
//
|
||||
// pub fn new(segment: &Segment) -> SimpleSearchableSegment {
|
||||
// SimpleSearchableSegment {
|
||||
// segment: segment.clone()
|
||||
// fn next(&mut self,) -> Option<(Term<'a>, SegmentDocCursor<'a>)> {
|
||||
// match self.fst_streamer.next() {
|
||||
// Some(_) => None,
|
||||
// None => None
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// impl SearchableSegment for SimpleSearchableSegment {
|
||||
//
|
||||
//
|
||||
// }
|
||||
|
||||
|
||||
//
|
||||
// impl SegmentIndexReader {
|
||||
//
|
||||
// pub fn open(directory: &Directory) -> IndexReader {
|
||||
// IndexReader {
|
||||
// directory: (*directory).clone(),
|
||||
// fn term_cursor<'a>(&'a self) -> SegmentTermCur<'a> {
|
||||
// let term: Term<'a> {
|
||||
// self.
|
||||
// };
|
||||
// SegmentTermCur {
|
||||
// segment: &self.segment,
|
||||
// fst_streamer: self.term_offsets.stream(),
|
||||
// term:
|
||||
// }
|
||||
// }
|
||||
//
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use core::global::*;
|
||||
use std::fmt::Write;
|
||||
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
||||
|
||||
#[derive(Clone,Debug,PartialEq,PartialOrd,Eq)]
|
||||
pub struct FieldValue {
|
||||
@@ -8,18 +9,31 @@ pub struct FieldValue {
|
||||
}
|
||||
|
||||
|
||||
#[derive(Clone,Debug,PartialEq,PartialOrd,Eq,Hash)]
|
||||
pub struct Term<'a> {
|
||||
pub field: Field,
|
||||
pub text: &'a str,
|
||||
#[derive(Clone,PartialEq,PartialOrd,Eq,Hash)]
|
||||
pub struct Term {
|
||||
pub data: Vec<u8>, // avoid copies
|
||||
// pub field: Field,
|
||||
// pub text: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> Term<'a> {
|
||||
pub fn write_into(&self, term_str: &mut String) {
|
||||
term_str.clear();
|
||||
let Field(field_idx) = self.field;
|
||||
// TODO avoid writing the field idx.
|
||||
term_str.write_fmt(format_args!("{}:{}", field_idx, self.text));
|
||||
impl Term {
|
||||
|
||||
// TODO avoid all these copies.
|
||||
|
||||
pub fn from_field_text(field: Field, text: &str) -> Term {
|
||||
let mut buffer = Vec::with_capacity(1 + text.len());
|
||||
let Field(field_idx) = field;
|
||||
buffer.clear();
|
||||
buffer.push(field_idx);
|
||||
buffer.extend(text.as_bytes());
|
||||
Term {
|
||||
data: buffer,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write_into(&self, buf: &mut Vec<u8>) {
|
||||
buf.clear();
|
||||
buf.extend(&self.data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ use core::schema::*;
|
||||
|
||||
// Trait sufficient to serialize a segment.
|
||||
pub trait SerializableSegment<'a> {
|
||||
type TermCur: TermCursor<'a>; // TODO rename TermCursorImpl
|
||||
type TermCur: TermCursor; // TODO rename TermCursorImpl
|
||||
fn term_cursor(&'a self) -> Self::TermCur;
|
||||
}
|
||||
|
||||
@@ -14,7 +14,8 @@ pub trait DocCursor: Iterator<Item=DocId> {
|
||||
|
||||
// TODO make iteration over Fields somehow sorted
|
||||
|
||||
pub trait TermCursor<'a> {
|
||||
pub trait TermCursor {
|
||||
type DocCur: DocCursor;
|
||||
fn next(&mut self,) -> Option<(Term<'a>, Self::DocCur)>;
|
||||
// fn next(&mut self,) -> Option<(Term<'a>, Self::DocCur)>;
|
||||
fn next(&mut self,) -> Option<(Term, Self::DocCur)>;
|
||||
}
|
||||
|
||||
@@ -15,6 +15,8 @@ use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
use std::iter::Peekable;
|
||||
use core::serial::*;
|
||||
use core::error::*;
|
||||
use std::cell::RefCell;
|
||||
use std::borrow::BorrowMut;
|
||||
|
||||
pub struct SimplePostingsWriter {
|
||||
doc_ids: Vec<DocId>,
|
||||
@@ -166,11 +168,12 @@ impl<'a> CIWTermCursor<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_term(&self) -> Term<'a> {
|
||||
Term {
|
||||
field: self.field.clone(),
|
||||
text: self.current_form_postings.as_ref().unwrap().form,
|
||||
}
|
||||
fn get_term(&self) -> Term {
|
||||
Term::from_field_text(self.field.clone(), self.current_form_postings.as_ref().unwrap().form)
|
||||
// Term {
|
||||
// field: self.field.clone(),
|
||||
// text: self.current_form_postings.as_ref().unwrap().form,
|
||||
// }
|
||||
}
|
||||
|
||||
fn doc_cursor(&self,) -> CIWDocCursor<'a> {
|
||||
@@ -218,11 +221,11 @@ impl<'a> CIWTermCursor<'a> {
|
||||
}
|
||||
|
||||
|
||||
impl<'a> TermCursor<'a> for CIWTermCursor<'a> {
|
||||
impl<'a> TermCursor for CIWTermCursor<'a> {
|
||||
|
||||
type DocCur = CIWDocCursor<'a>;
|
||||
|
||||
fn next(&mut self,) -> Option<(Term<'a>, CIWDocCursor<'a>)> {
|
||||
fn next(&mut self,) -> Option<(Term, CIWDocCursor<'a>)> {
|
||||
if self.advance() {
|
||||
Some((self.get_term(), self.doc_cursor()))
|
||||
}
|
||||
@@ -242,7 +245,7 @@ impl<'a> SerializableSegment<'a> for IndexWriter {
|
||||
fn term_cursor(&'a self) -> CIWTermCursor<'a> {
|
||||
let mut field_it: hash_map::Iter<'a, Field, FieldWriter> = self.term_writers.iter();
|
||||
let (field, field_writer) = field_it.next().unwrap(); // TODO handle no field
|
||||
let term_cursor = CIWTermCursor {
|
||||
CIWTermCursor {
|
||||
field_it: field_it,
|
||||
form_it: CIWFormCursor {
|
||||
term_it: field_writer.term_index.iter(),
|
||||
@@ -250,9 +253,8 @@ impl<'a> SerializableSegment<'a> for IndexWriter {
|
||||
},
|
||||
field: field,
|
||||
current_form_postings: None,
|
||||
};
|
||||
}
|
||||
// TODO handle having no fields at all
|
||||
term_cursor
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user