mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-30 23:20:40 +00:00
blop
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
use std::io;
|
||||
use std::io::{Read, Write};
|
||||
use core::serial::{SegmentSerializer, SerializableSegment};
|
||||
use rustc_serialize::json;
|
||||
use core::directory::WritePtr;
|
||||
use core::index::Segment;
|
||||
use core::index::SegmentInfo;
|
||||
use core::index::SegmentComponent;
|
||||
use core::schema::Term;
|
||||
use core::schema::DocId;
|
||||
@@ -79,6 +81,14 @@ impl SegmentSerializer<()> for SimpleSegmentSerializer {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_segment_info(&self, segment_info: &SegmentInfo) -> io::Result<()> {
|
||||
let mut write = try!(self.segment.open_write(SegmentComponent::INFO));
|
||||
let json_data = json::encode(segment_info).unwrap();
|
||||
write.write_all(json_data.as_bytes());
|
||||
write.flush();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn close(mut self,) -> io::Result<()> {
|
||||
// TODO handle errors on close
|
||||
try!(self.term_fst_builder
|
||||
|
||||
@@ -41,9 +41,6 @@ impl Collector for FirstNCollector {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
|
||||
pub struct CountCollector {
|
||||
count: usize,
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::path::{PathBuf, Path};
|
||||
use std::io;
|
||||
use core::schema::Schema;
|
||||
use core::schema::DocId;
|
||||
use std::io::Write;
|
||||
use std::sync::{Arc, RwLock, RwLockWriteGuard, RwLockReadGuard};
|
||||
use std::fmt;
|
||||
@@ -190,10 +191,18 @@ impl Index {
|
||||
}
|
||||
|
||||
|
||||
|
||||
/////////////////////////
|
||||
// Segment
|
||||
|
||||
#[derive(Clone,Debug,RustcDecodable,RustcEncodable)]
|
||||
pub struct SegmentInfo {
|
||||
pub max_doc: DocId,
|
||||
}
|
||||
|
||||
|
||||
pub enum SegmentComponent {
|
||||
INFO,
|
||||
POSTINGS,
|
||||
// POSITIONS,
|
||||
TERMS,
|
||||
@@ -215,6 +224,7 @@ impl Segment {
|
||||
fn path_suffix(component: &SegmentComponent)-> &'static str {
|
||||
match *component {
|
||||
// SegmentComponent::POSITIONS => ".pos",
|
||||
SegmentComponent::INFO => ".info",
|
||||
SegmentComponent::POSTINGS => ".idx",
|
||||
SegmentComponent::TERMS => ".term",
|
||||
SegmentComponent::STORE => ".store",
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
use core::schema::*;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use core::index::SegmentInfo;
|
||||
|
||||
pub trait SegmentSerializer<Output> {
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<(), io::Error>;
|
||||
fn write_docs(&mut self, docs: &[DocId]) -> Result<(), io::Error>; // TODO add size
|
||||
fn store_doc(&mut self, field: &mut Iterator<Item=&FieldValue>);
|
||||
fn close(self,) -> Result<Output, io::Error>;
|
||||
fn write_segment_info(&self, segment_info: &SegmentInfo) -> io::Result<()>;
|
||||
}
|
||||
|
||||
pub trait SerializableSegment {
|
||||
@@ -67,6 +69,10 @@ impl SegmentSerializer<String> for DebugSegmentSerializer {
|
||||
fn close(self,) -> Result<String, io::Error> {
|
||||
Ok(self.text)
|
||||
}
|
||||
|
||||
fn write_segment_info(&self, segment_info: &SegmentInfo) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serialize_eq<L: SerializableSegment, R: SerializableSegment>(left: &L, right: &R) -> bool{
|
||||
|
||||
@@ -9,6 +9,7 @@ use core::serial::{SegmentSerializer, SerializableSegment};
|
||||
use core::analyzer::StreamingIterator;
|
||||
use std::io::Error as IOError;
|
||||
use core::index::Segment;
|
||||
use core::index::SegmentInfo;
|
||||
|
||||
|
||||
pub struct PostingsWriter {
|
||||
@@ -69,7 +70,7 @@ impl IndexWriter {
|
||||
match segment_writer_res {
|
||||
Ok(segment_writer) => {
|
||||
let segment = segment_writer.segment();
|
||||
segment_writer.write_pending();
|
||||
segment_writer.finalize();
|
||||
// write(self.segment_serializer);
|
||||
// try!(SimpleCodec::write(&self.segment_writer, &segment).map(|sz| (segment.clone(), sz)));
|
||||
// At this point, the segment is written
|
||||
@@ -88,6 +89,7 @@ impl IndexWriter {
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub struct SegmentWriter {
|
||||
num_tokens: usize,
|
||||
max_doc: DocId,
|
||||
@@ -99,22 +101,33 @@ pub struct SegmentWriter {
|
||||
|
||||
impl SegmentWriter {
|
||||
|
||||
// write on disk all of the stuff that
|
||||
// are still on RAM.
|
||||
// for this version, that's the term dictionary
|
||||
// and the postings
|
||||
fn write_pending(mut self,) -> Result<(), IOError> {
|
||||
// Write on disk all of the stuff that
|
||||
// is still on RAM :
|
||||
// - the dictionary in an fst
|
||||
// - the postings
|
||||
// - the segment info
|
||||
fn finalize(mut self,) -> Result<(), IOError> {
|
||||
{
|
||||
for (term, postings_id) in self.term_index.iter() {
|
||||
let doc_ids = &self.postings[postings_id.clone()].doc_ids;
|
||||
let term_docfreq = doc_ids.len() as u32;
|
||||
self.segment_serializer.new_term(&term, term_docfreq);
|
||||
self.segment_serializer.write_docs(&doc_ids);
|
||||
for (term, postings_id) in self.term_index.iter() {
|
||||
let doc_ids = &self.postings[postings_id.clone()].doc_ids;
|
||||
let term_docfreq = doc_ids.len() as u32;
|
||||
self.segment_serializer.new_term(&term, term_docfreq);
|
||||
self.segment_serializer.write_docs(&doc_ids);
|
||||
}
|
||||
}
|
||||
{
|
||||
let segment_info = SegmentInfo {
|
||||
max_doc: self.max_doc
|
||||
};
|
||||
self.segment_serializer.write_segment_info(&segment_info);
|
||||
}
|
||||
self.segment_serializer.close()
|
||||
}
|
||||
|
||||
pub fn num_docs(&self,) -> DocId {
|
||||
self.max_doc
|
||||
}
|
||||
|
||||
pub fn segment(&self,) -> Segment {
|
||||
self.segment_serializer.segment()
|
||||
}
|
||||
|
||||
14
src/lib.rs
14
src/lib.rs
@@ -96,18 +96,14 @@ mod tests {
|
||||
}
|
||||
|
||||
let segment_str_before_writing = DebugSegmentSerializer::debug_string(index_writer.current_segment_writer());
|
||||
println!("{:?}", segment_str_before_writing);
|
||||
|
||||
|
||||
let commit_result = index_writer.commit();
|
||||
assert!(commit_result.is_ok());
|
||||
let segment = commit_result.unwrap();
|
||||
SegmentReader::open(segment).unwrap();
|
||||
|
||||
// let segment_reader = SegmentReader::open(segment).unwrap();
|
||||
// TODO ENABLE TEST
|
||||
// let segment_str_after_reading = DebugSegmentSerializer::debug_string(&segment_reader);
|
||||
// assert_eq!(segment_str_before_writing, segment_str_after_reading);
|
||||
let segment = commit_result.unwrap();
|
||||
let segment_reader = SegmentReader::open(segment).unwrap();
|
||||
assert_eq!(segment_reader.num_docs(), 3);
|
||||
let segment_str_after_reading = DebugSegmentSerializer::debug_string(&segment_reader);
|
||||
assert_eq!(segment_str_before_writing, segment_str_after_reading);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user