reader impl serialize.

This commit is contained in:
Paul Masurel
2016-01-20 16:29:57 +09:00
parent cff364be66
commit d69036a35a
4 changed files with 35 additions and 25 deletions

View File

@@ -28,7 +28,7 @@ impl SimpleCodec {
},
}
for doc_id in doc_it {
println!("doc {}", doc_id);
println!(" Doc {}", doc_id);
match postings.write_u32::<LittleEndian>(doc_id as u32) {
Ok(_) => {},
Err(_) => {
@@ -69,6 +69,7 @@ impl Codec for SimpleCodec {
loop {
match term_cursor.next() {
Some((term, doc_it)) => {
println!("Term {}", term.text());
term.write_into(&mut term_buffer);
match term_trie_builder.insert(&term_buffer, offset as u64) {
Ok(_) => {}
@@ -83,6 +84,7 @@ impl Codec for SimpleCodec {
}
}
}
term_trie_builder.finish();
Ok(0)
}

View File

@@ -1,6 +1,7 @@
use core::global::*;
use std::fmt::Write;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use std::string::FromUtf8Error;
#[derive(Clone,Debug,PartialEq,PartialOrd,Eq)]
pub struct FieldValue {
@@ -11,7 +12,7 @@ pub struct FieldValue {
#[derive(Clone,PartialEq,PartialOrd,Eq,Hash)]
pub struct Term {
pub data: Vec<u8>, // avoid copies
data: Vec<u8>, // avoid copies
// pub field: Field,
// pub text: &'a [u8],
}
@@ -20,6 +21,10 @@ impl Term {
// TODO avoid all these copies.
pub fn text(&self,) -> String {
String::from_utf8_lossy(&self.data[1..]).into_owned()
}
pub fn from_field_text(field: Field, text: &str) -> Term {
let mut buffer = Vec::with_capacity(1 + text.len());
let Field(field_idx) = field;

View File

@@ -17,6 +17,7 @@ use core::serial::*;
use core::error::*;
use std::cell::RefCell;
use std::borrow::BorrowMut;
use core::directory::Segment;
pub struct SimplePostingsWriter {
doc_ids: Vec<DocId>,
@@ -104,9 +105,9 @@ impl IndexWriter {
self.max_doc += 1;
}
pub fn commit(self,) -> Result<usize> {
pub fn commit(self,) -> Result<(Segment, usize)> {
let segment = self.directory.new_segment();
SimpleCodec::write(&self, &segment)
SimpleCodec::write(&self, &segment).map(|sz| (segment, sz))
}
}

View File

@@ -15,6 +15,7 @@ use tantivy::core::directory::{Directory, generate_segment_name, SegmentId};
use std::ops::DerefMut;
use tantivy::core::writer::SimplePostingsWriter;
use tantivy::core::postings::PostingsWriter;
use tantivy::core::reader::SegmentIndexReader;
use std::io::{ BufWriter, Write};
use regex::Regex;
use std::convert::From;
@@ -41,7 +42,7 @@ fn test_indexing() {
let mut index_writer = IndexWriter::open(&directory);
{
let mut doc = Document::new();
doc.set(Field(1), "a b");
doc.set(Field(1), "af b");
index_writer.add(doc);
}
{
@@ -54,27 +55,28 @@ fn test_indexing() {
doc.set(Field(1), "a b c d");
index_writer.add(doc);
}
let commit_result = index_writer.commit();
println!("{:?}", commit_result.err());
//debug_assert!(commit_result.is_ok(), commit_result);
// assert!(commit_result.is_ok());
let (segment, num_bytes) = index_writer.commit().unwrap();
// reading the segment
println!("------");
{
let index_reader = SegmentIndexReader::open(segment).unwrap();
let mut term_cursor = index_reader.term_cursor();
loop {
match term_cursor.next() {
Some((term, mut doc_cursor)) => {
println!("Term {:?}", term.text());
for doc in doc_cursor {
println!(" Doc {}", doc);
}
},
None => {
break;
},
}
}
}
assert!(false);
// SimpleCodec::write(closed_index_writer, output);
// let mut term_cursor = closed_index_writer.term_cursor();
// loop {
// match term_cursor.next() {
// Some((term, doc_it)) => {
// println!("{:?}", term);
// for doc in doc_it {
// println!(" doc {}", doc);
// }
// },
// None => {
// break;
// }
// }
// }
// assert!(false);
}
{
// TODO add index opening stuff