mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-29 14:40:40 +00:00
reader impl serialize.
This commit is contained in:
@@ -28,7 +28,7 @@ impl SimpleCodec {
|
||||
},
|
||||
}
|
||||
for doc_id in doc_it {
|
||||
println!("doc {}", doc_id);
|
||||
println!(" Doc {}", doc_id);
|
||||
match postings.write_u32::<LittleEndian>(doc_id as u32) {
|
||||
Ok(_) => {},
|
||||
Err(_) => {
|
||||
@@ -69,6 +69,7 @@ impl Codec for SimpleCodec {
|
||||
loop {
|
||||
match term_cursor.next() {
|
||||
Some((term, doc_it)) => {
|
||||
println!("Term {}", term.text());
|
||||
term.write_into(&mut term_buffer);
|
||||
match term_trie_builder.insert(&term_buffer, offset as u64) {
|
||||
Ok(_) => {}
|
||||
@@ -83,6 +84,7 @@ impl Codec for SimpleCodec {
|
||||
}
|
||||
}
|
||||
}
|
||||
term_trie_builder.finish();
|
||||
Ok(0)
|
||||
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use core::global::*;
|
||||
use std::fmt::Write;
|
||||
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
||||
use std::string::FromUtf8Error;
|
||||
|
||||
#[derive(Clone,Debug,PartialEq,PartialOrd,Eq)]
|
||||
pub struct FieldValue {
|
||||
@@ -11,7 +12,7 @@ pub struct FieldValue {
|
||||
|
||||
#[derive(Clone,PartialEq,PartialOrd,Eq,Hash)]
|
||||
pub struct Term {
|
||||
pub data: Vec<u8>, // avoid copies
|
||||
data: Vec<u8>, // avoid copies
|
||||
// pub field: Field,
|
||||
// pub text: &'a [u8],
|
||||
}
|
||||
@@ -20,6 +21,10 @@ impl Term {
|
||||
|
||||
// TODO avoid all these copies.
|
||||
|
||||
pub fn text(&self,) -> String {
|
||||
String::from_utf8_lossy(&self.data[1..]).into_owned()
|
||||
}
|
||||
|
||||
pub fn from_field_text(field: Field, text: &str) -> Term {
|
||||
let mut buffer = Vec::with_capacity(1 + text.len());
|
||||
let Field(field_idx) = field;
|
||||
|
||||
@@ -17,6 +17,7 @@ use core::serial::*;
|
||||
use core::error::*;
|
||||
use std::cell::RefCell;
|
||||
use std::borrow::BorrowMut;
|
||||
use core::directory::Segment;
|
||||
|
||||
pub struct SimplePostingsWriter {
|
||||
doc_ids: Vec<DocId>,
|
||||
@@ -104,9 +105,9 @@ impl IndexWriter {
|
||||
self.max_doc += 1;
|
||||
}
|
||||
|
||||
pub fn commit(self,) -> Result<usize> {
|
||||
pub fn commit(self,) -> Result<(Segment, usize)> {
|
||||
let segment = self.directory.new_segment();
|
||||
SimpleCodec::write(&self, &segment)
|
||||
SimpleCodec::write(&self, &segment).map(|sz| (segment, sz))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ use tantivy::core::directory::{Directory, generate_segment_name, SegmentId};
|
||||
use std::ops::DerefMut;
|
||||
use tantivy::core::writer::SimplePostingsWriter;
|
||||
use tantivy::core::postings::PostingsWriter;
|
||||
use tantivy::core::reader::SegmentIndexReader;
|
||||
use std::io::{ BufWriter, Write};
|
||||
use regex::Regex;
|
||||
use std::convert::From;
|
||||
@@ -41,7 +42,7 @@ fn test_indexing() {
|
||||
let mut index_writer = IndexWriter::open(&directory);
|
||||
{
|
||||
let mut doc = Document::new();
|
||||
doc.set(Field(1), "a b");
|
||||
doc.set(Field(1), "af b");
|
||||
index_writer.add(doc);
|
||||
}
|
||||
{
|
||||
@@ -54,27 +55,28 @@ fn test_indexing() {
|
||||
doc.set(Field(1), "a b c d");
|
||||
index_writer.add(doc);
|
||||
}
|
||||
let commit_result = index_writer.commit();
|
||||
println!("{:?}", commit_result.err());
|
||||
//debug_assert!(commit_result.is_ok(), commit_result);
|
||||
// assert!(commit_result.is_ok());
|
||||
|
||||
let (segment, num_bytes) = index_writer.commit().unwrap();
|
||||
// reading the segment
|
||||
println!("------");
|
||||
{
|
||||
let index_reader = SegmentIndexReader::open(segment).unwrap();
|
||||
let mut term_cursor = index_reader.term_cursor();
|
||||
loop {
|
||||
match term_cursor.next() {
|
||||
Some((term, mut doc_cursor)) => {
|
||||
println!("Term {:?}", term.text());
|
||||
for doc in doc_cursor {
|
||||
println!(" Doc {}", doc);
|
||||
}
|
||||
},
|
||||
None => {
|
||||
break;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
assert!(false);
|
||||
// SimpleCodec::write(closed_index_writer, output);
|
||||
// let mut term_cursor = closed_index_writer.term_cursor();
|
||||
// loop {
|
||||
// match term_cursor.next() {
|
||||
// Some((term, doc_it)) => {
|
||||
// println!("{:?}", term);
|
||||
// for doc in doc_it {
|
||||
// println!(" doc {}", doc);
|
||||
// }
|
||||
// },
|
||||
// None => {
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// assert!(false);
|
||||
}
|
||||
{
|
||||
// TODO add index opening stuff
|
||||
|
||||
Reference in New Issue
Block a user