mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-14 04:52:54 +00:00
blip
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::io;
|
||||
|
||||
pub type DocId = usize;
|
||||
pub type DocId = u32;
|
||||
pub type FieldId = u8;
|
||||
|
||||
#[derive(Clone,Debug,PartialEq,PartialOrd,Eq,Hash)]
|
||||
|
||||
@@ -57,7 +57,7 @@ impl<R: Read> Iterator for SimplePostingsIterator<R> {
|
||||
Ok(num_bytes) => {
|
||||
if num_bytes == 8 {
|
||||
unsafe {
|
||||
let val = *(*buf.as_ptr() as *const usize);
|
||||
let val = *(*buf.as_ptr() as *const u32);
|
||||
return Some(val)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,11 @@ use core::schema::Term;
|
||||
use core::directory::SharedMmapMemory;
|
||||
use fst::Streamer;
|
||||
use fst;
|
||||
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
||||
use std::borrow::Borrow;
|
||||
use std::io::Cursor;
|
||||
use core::global::DocId;
|
||||
use core::serial::DocCursor;
|
||||
|
||||
pub struct SegmentIndexReader {
|
||||
segment: Segment,
|
||||
@@ -12,9 +16,7 @@ pub struct SegmentIndexReader {
|
||||
postings_data: SharedMmapMemory,
|
||||
}
|
||||
|
||||
|
||||
impl SegmentIndexReader {
|
||||
|
||||
fn term_cursor<'a>(&'a self) -> SegmentTermCur<'a> {
|
||||
SegmentTermCur {
|
||||
segment: &self.segment,
|
||||
@@ -22,13 +24,37 @@ impl SegmentIndexReader {
|
||||
postings_data: self.postings_data.borrow(),
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
pub struct SegmentDocCursor<'a> {
|
||||
postings_data: &'a [u8],
|
||||
offset: usize,
|
||||
postings_data: Cursor<&'a [u8]>,
|
||||
num_docs: DocId,
|
||||
current_doc: DocId,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for SegmentDocCursor<'a> {
|
||||
type Item = DocId;
|
||||
|
||||
fn next(&mut self) -> Option<DocId> {
|
||||
if self.num_docs == 0 {
|
||||
None
|
||||
}
|
||||
else {
|
||||
self.num_docs -= 1;
|
||||
self.current_doc = self.postings_data.read_u32::<LittleEndian>().unwrap();
|
||||
Some(self.current_doc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> DocCursor for SegmentDocCursor<'a> {
|
||||
fn doc(&self) -> DocId{
|
||||
self.current_doc
|
||||
}
|
||||
|
||||
fn len(&self) -> DocId {
|
||||
self.num_docs
|
||||
}
|
||||
}
|
||||
|
||||
struct SegmentTermCur<'a> {
|
||||
@@ -41,11 +67,16 @@ impl<'a> SegmentTermCur<'a> {
|
||||
|
||||
fn next(&mut self,) -> Option<(Term, SegmentDocCursor<'a>)> {
|
||||
match self.fst_streamer.next() {
|
||||
Some((k, offset)) => {
|
||||
Some((k, offset_u64)) => {
|
||||
let term = Term::from(k);
|
||||
let offset = offset_u64 as usize;
|
||||
let data = &self.postings_data[offset..];
|
||||
let mut cursor = Cursor::new(data);
|
||||
let num_docs = cursor.read_u32::<LittleEndian>().unwrap();
|
||||
let doc_cursor = SegmentDocCursor {
|
||||
postings_data: self.postings_data,
|
||||
offset: offset as usize,
|
||||
postings_data: cursor,
|
||||
num_docs: num_docs,
|
||||
current_doc: 0,
|
||||
};
|
||||
Some((term, doc_cursor))
|
||||
},
|
||||
|
||||
@@ -9,7 +9,7 @@ pub trait SerializableSegment<'a> {
|
||||
|
||||
pub trait DocCursor: Iterator<Item=DocId> {
|
||||
fn doc(&self) -> DocId;
|
||||
fn len(&self) -> usize;
|
||||
fn len(&self) -> DocId;
|
||||
}
|
||||
|
||||
// TODO make iteration over Fields somehow sorted
|
||||
|
||||
@@ -70,7 +70,7 @@ impl FieldWriter {
|
||||
}
|
||||
|
||||
pub struct IndexWriter {
|
||||
max_doc: usize,
|
||||
max_doc: DocId,
|
||||
term_writers: HashMap<Field, FieldWriter>,
|
||||
directory: Directory,
|
||||
}
|
||||
@@ -169,11 +169,9 @@ impl<'a> CIWTermCursor<'a> {
|
||||
}
|
||||
|
||||
fn get_term(&self) -> Term {
|
||||
Term::from_field_text(self.field.clone(), self.current_form_postings.as_ref().unwrap().form)
|
||||
// Term {
|
||||
// field: self.field.clone(),
|
||||
// text: self.current_form_postings.as_ref().unwrap().form,
|
||||
// }
|
||||
let field = self.field.clone();
|
||||
let value = self.current_form_postings.as_ref().unwrap().form;
|
||||
Term::from_field_text(field, value)
|
||||
}
|
||||
|
||||
fn doc_cursor(&self,) -> CIWDocCursor<'a> {
|
||||
@@ -181,7 +179,7 @@ impl<'a> CIWTermCursor<'a> {
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.postings;
|
||||
let num_docs = postings.doc_ids.len();
|
||||
let num_docs = postings.doc_ids.len() as DocId;
|
||||
CIWDocCursor {
|
||||
num_docs: num_docs,
|
||||
docs_it: postings
|
||||
@@ -191,7 +189,6 @@ impl<'a> CIWTermCursor<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn next_form(&mut self,) -> bool {
|
||||
match self.form_it.next() {
|
||||
Some(form_postings) => {
|
||||
@@ -220,7 +217,6 @@ impl<'a> CIWTermCursor<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<'a> TermCursor for CIWTermCursor<'a> {
|
||||
|
||||
type DocCur = CIWDocCursor<'a>;
|
||||
@@ -263,7 +259,7 @@ impl<'a> SerializableSegment<'a> for IndexWriter {
|
||||
pub struct CIWDocCursor<'a> {
|
||||
docs_it: slice::Iter<'a, DocId>,
|
||||
current: Option<DocId>,
|
||||
num_docs: usize,
|
||||
num_docs: DocId,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for CIWDocCursor<'a> {
|
||||
@@ -281,7 +277,7 @@ impl<'a> DocCursor for CIWDocCursor<'a> {
|
||||
self.current.unwrap()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
fn len(&self) -> DocId {
|
||||
self.num_docs
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user