This commit is contained in:
Paul Masurel
2016-01-20 01:05:24 +09:00
parent 22ebd6cd5c
commit d002ee2aaf
5 changed files with 50 additions and 23 deletions

View File

@@ -1,7 +1,7 @@
use std::io::{BufWriter, Write};
use std::io;
pub type DocId = usize;
pub type DocId = u32;
pub type FieldId = u8;
#[derive(Clone,Debug,PartialEq,PartialOrd,Eq,Hash)]

View File

@@ -57,7 +57,7 @@ impl<R: Read> Iterator for SimplePostingsIterator<R> {
Ok(num_bytes) => {
if num_bytes == 8 {
unsafe {
let val = *(*buf.as_ptr() as *const usize);
let val = *(*buf.as_ptr() as *const u32);
return Some(val)
}
}

View File

@@ -4,7 +4,11 @@ use core::schema::Term;
use core::directory::SharedMmapMemory;
use fst::Streamer;
use fst;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use std::borrow::Borrow;
use std::io::Cursor;
use core::global::DocId;
use core::serial::DocCursor;
pub struct SegmentIndexReader {
segment: Segment,
@@ -12,9 +16,7 @@ pub struct SegmentIndexReader {
postings_data: SharedMmapMemory,
}
impl SegmentIndexReader {
fn term_cursor<'a>(&'a self) -> SegmentTermCur<'a> {
SegmentTermCur {
segment: &self.segment,
@@ -22,13 +24,37 @@ impl SegmentIndexReader {
postings_data: self.postings_data.borrow(),
}
}
}
pub struct SegmentDocCursor<'a> {
postings_data: &'a [u8],
offset: usize,
postings_data: Cursor<&'a [u8]>,
num_docs: DocId,
current_doc: DocId,
}
impl<'a> Iterator for SegmentDocCursor<'a> {
type Item = DocId;
fn next(&mut self) -> Option<DocId> {
if self.num_docs == 0 {
None
}
else {
self.num_docs -= 1;
self.current_doc = self.postings_data.read_u32::<LittleEndian>().unwrap();
Some(self.current_doc)
}
}
}
impl<'a> DocCursor for SegmentDocCursor<'a> {
fn doc(&self) -> DocId{
self.current_doc
}
fn len(&self) -> DocId {
self.num_docs
}
}
struct SegmentTermCur<'a> {
@@ -41,11 +67,16 @@ impl<'a> SegmentTermCur<'a> {
fn next(&mut self,) -> Option<(Term, SegmentDocCursor<'a>)> {
match self.fst_streamer.next() {
Some((k, offset)) => {
Some((k, offset_u64)) => {
let term = Term::from(k);
let offset = offset_u64 as usize;
let data = &self.postings_data[offset..];
let mut cursor = Cursor::new(data);
let num_docs = cursor.read_u32::<LittleEndian>().unwrap();
let doc_cursor = SegmentDocCursor {
postings_data: self.postings_data,
offset: offset as usize,
postings_data: cursor,
num_docs: num_docs,
current_doc: 0,
};
Some((term, doc_cursor))
},

View File

@@ -9,7 +9,7 @@ pub trait SerializableSegment<'a> {
pub trait DocCursor: Iterator<Item=DocId> {
fn doc(&self) -> DocId;
fn len(&self) -> usize;
fn len(&self) -> DocId;
}
// TODO make iteration over Fields somehow sorted

View File

@@ -70,7 +70,7 @@ impl FieldWriter {
}
pub struct IndexWriter {
max_doc: usize,
max_doc: DocId,
term_writers: HashMap<Field, FieldWriter>,
directory: Directory,
}
@@ -169,11 +169,9 @@ impl<'a> CIWTermCursor<'a> {
}
fn get_term(&self) -> Term {
Term::from_field_text(self.field.clone(), self.current_form_postings.as_ref().unwrap().form)
// Term {
// field: self.field.clone(),
// text: self.current_form_postings.as_ref().unwrap().form,
// }
let field = self.field.clone();
let value = self.current_form_postings.as_ref().unwrap().form;
Term::from_field_text(field, value)
}
fn doc_cursor(&self,) -> CIWDocCursor<'a> {
@@ -181,7 +179,7 @@ impl<'a> CIWTermCursor<'a> {
.as_ref()
.unwrap()
.postings;
let num_docs = postings.doc_ids.len();
let num_docs = postings.doc_ids.len() as DocId;
CIWDocCursor {
num_docs: num_docs,
docs_it: postings
@@ -191,7 +189,6 @@ impl<'a> CIWTermCursor<'a> {
}
}
fn next_form(&mut self,) -> bool {
match self.form_it.next() {
Some(form_postings) => {
@@ -220,7 +217,6 @@ impl<'a> CIWTermCursor<'a> {
}
}
impl<'a> TermCursor for CIWTermCursor<'a> {
type DocCur = CIWDocCursor<'a>;
@@ -263,7 +259,7 @@ impl<'a> SerializableSegment<'a> for IndexWriter {
pub struct CIWDocCursor<'a> {
docs_it: slice::Iter<'a, DocId>,
current: Option<DocId>,
num_docs: usize,
num_docs: DocId,
}
impl<'a> Iterator for CIWDocCursor<'a> {
@@ -281,7 +277,7 @@ impl<'a> DocCursor for CIWDocCursor<'a> {
self.current.unwrap()
}
fn len(&self) -> usize {
fn len(&self) -> DocId {
self.num_docs
}
}