mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
blop
This commit is contained in:
@@ -97,6 +97,6 @@ impl SimpleCodec {
|
||||
|
||||
pub fn write<I: SerializableSegment>(index: &I, segment: &Segment) -> Result<(), IOError> {
|
||||
let mut serializer = try!(SimpleCodec::serializer(segment));
|
||||
index.write(&mut serializer)
|
||||
index.write(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,27 +1,29 @@
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::fs::File;
|
||||
use fst::Map;
|
||||
use fst::MapBuilder;
|
||||
use std::rc::Rc;
|
||||
use core::serialize::BinarySerializable;
|
||||
use std::marker::PhantomData;
|
||||
use fst;
|
||||
use std::ops::Deref;
|
||||
|
||||
|
||||
fn convert_fst_error(e: fst::Error) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::Other, e)
|
||||
}
|
||||
|
||||
struct FstMapBuilder<V: BinarySerializable> {
|
||||
fst_builder: MapBuilder<File>,
|
||||
pub struct FstMapBuilder<W: Write, V: BinarySerializable> {
|
||||
fst_builder: MapBuilder<W>,
|
||||
data: Vec<u8>,
|
||||
_phantom_: PhantomData<V>,
|
||||
}
|
||||
|
||||
impl<W: Write, V: BinarySerializable> FstMapBuilder<W, V> {
|
||||
|
||||
impl<V: BinarySerializable> FstMapBuilder<V> {
|
||||
|
||||
fn new(file: File) -> io::Result<FstMapBuilder<V>> {
|
||||
let fst_builder = try!(MapBuilder::new(file).map_err(convert_fst_error));
|
||||
fn new(w: W) -> io::Result<FstMapBuilder<W, V>> {
|
||||
let fst_builder = try!(MapBuilder::new(w).map_err(convert_fst_error));
|
||||
Ok(FstMapBuilder {
|
||||
fst_builder: fst_builder,
|
||||
data: Vec::new(),
|
||||
@@ -37,12 +39,46 @@ impl<V: BinarySerializable> FstMapBuilder<V> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn close(self,) -> io::Result<()> {
|
||||
fn close(self,) -> io::Result<W> {
|
||||
let mut file = try!(self.fst_builder
|
||||
.into_inner()
|
||||
.map_err(convert_fst_error));
|
||||
file.write_all(&self.data);
|
||||
Ok(())
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub struct FstMap<R: Deref<Target=[u8]>, V: BinarySerializable> {
|
||||
//fst::Map,
|
||||
data: R,
|
||||
_phantom_: PhantomData<V>,
|
||||
}
|
||||
|
||||
impl<R: Deref<Target=[u8]>, V: BinarySerializable> FstMap<R, V> {
|
||||
pub fn new(data: R) -> FstMap<R, V> {
|
||||
FstMap {
|
||||
data: data,
|
||||
_phantom_: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read(key: &[u8]) -> Option<V> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
mod tests {
|
||||
use super::{FstMapBuilder, FstMap};
|
||||
|
||||
#[test]
|
||||
fn test_fstmap() {
|
||||
let mut fst_map_builder: FstMapBuilder<Vec<u8>, u32> = FstMapBuilder::new(Vec::new()).unwrap();
|
||||
fst_map_builder.insert("abc".as_bytes(), &34).unwrap();
|
||||
fst_map_builder.insert("abcd".as_bytes(), &343).unwrap();
|
||||
let data = fst_map_builder.close().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
use core::schema::*;
|
||||
use std::fmt;
|
||||
use std::io::Error as IOError;
|
||||
use std::io;
|
||||
|
||||
pub trait SegmentSerializer<Output> {
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<(), IOError>;
|
||||
fn write_docs(&mut self, docs: &[DocId]) -> Result<(), IOError>; // TODO add size
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<(), io::Error>;
|
||||
fn write_docs(&mut self, docs: &[DocId]) -> Result<(), io::Error>; // TODO add size
|
||||
fn store_doc(&mut self, field: &mut Iterator<Item=&FieldValue>);
|
||||
fn close(self,) -> Result<Output, IOError>;
|
||||
fn close(self,) -> Result<Output, io::Error>;
|
||||
}
|
||||
|
||||
pub trait SerializableSegment {
|
||||
fn write<Output, SegSer: SegmentSerializer<Output>>(&self, serializer: &mut SegSer) -> Result<Output, IOError>;
|
||||
fn write<Output, SegSer: SegmentSerializer<Output>>(&self, serializer: SegSer) -> io::Result<Output>;
|
||||
}
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ impl DebugSegmentSerializer {
|
||||
|
||||
pub fn debug_string<S: SerializableSegment>(index: &S) -> String {
|
||||
let mut serializer = DebugSegmentSerializer::new();
|
||||
index.write(&mut serializer).unwrap()
|
||||
index.write(serializer).unwrap()
|
||||
}
|
||||
|
||||
pub fn new() -> DebugSegmentSerializer {
|
||||
@@ -41,7 +41,7 @@ impl DebugSegmentSerializer {
|
||||
}
|
||||
|
||||
impl SegmentSerializer<String> for DebugSegmentSerializer {
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<(), IOError> {
|
||||
fn new_term(&mut self, term: &Term, doc_freq: DocId) -> Result<(), io::Error> {
|
||||
self.text.push_str(&format!("{:?}\n", term));
|
||||
Ok(())
|
||||
}
|
||||
@@ -57,14 +57,14 @@ impl SegmentSerializer<String> for DebugSegmentSerializer {
|
||||
}
|
||||
}
|
||||
|
||||
fn write_docs(&mut self, docs: &[DocId]) -> Result<(), IOError> {
|
||||
fn write_docs(&mut self, docs: &[DocId]) -> Result<(), io::Error> {
|
||||
for doc in docs {
|
||||
self.text.push_str(&format!(" - Doc {:?}\n", doc));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn close(self,) -> Result<String, IOError> {
|
||||
fn close(self,) -> Result<String, io::Error> {
|
||||
Ok(self.text)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use core::schema::*;
|
||||
use core::codec::*;
|
||||
use std::io;
|
||||
use std::rc::Rc;
|
||||
use core::directory::Directory;
|
||||
use core::analyzer::SimpleTokenizer;
|
||||
@@ -35,8 +36,7 @@ pub struct IndexWriter {
|
||||
schema: Schema,
|
||||
}
|
||||
|
||||
|
||||
fn new_segment_writer(directory: &Directory, ) -> SegmentWriter {
|
||||
fn new_segment_writer(directory: &Directory, ) -> SegmentWriter {
|
||||
let segment = directory.new_segment();
|
||||
SegmentWriter::for_segment(segment)
|
||||
}
|
||||
@@ -174,15 +174,15 @@ impl SegmentWriter {
|
||||
self.get_postings_writer(term).suscribe(doc);
|
||||
}
|
||||
}
|
||||
//
|
||||
// impl SerializableSegment for SegmentWriter {
|
||||
// fn write<Output, SegSer: SegmentSerializer<Output>>(&self, serializer: &mut SegSer) -> Result<Output> {
|
||||
// for (term, postings_id) in self.term_index.iter() {
|
||||
// let doc_ids = &self.postings[postings_id.clone()].doc_ids;
|
||||
// let term_docfreq = doc_ids.len() as u32;
|
||||
// serializer.new_term(&term, term_docfreq);
|
||||
// serializer.write_docs(&doc_ids);
|
||||
// }
|
||||
// serializer.close()
|
||||
// }
|
||||
// }
|
||||
|
||||
impl SerializableSegment for SegmentWriter {
|
||||
fn write<Output, SegSer: SegmentSerializer<Output>>(&self, mut serializer: SegSer) -> io::Result<Output> {
|
||||
for (term, postings_id) in self.term_index.iter() {
|
||||
let doc_ids = &self.postings[postings_id.clone()].doc_ids;
|
||||
let term_docfreq = doc_ids.len() as u32;
|
||||
serializer.new_term(&term, term_docfreq);
|
||||
serializer.write_docs(&doc_ids);
|
||||
}
|
||||
serializer.close()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,11 +7,11 @@ extern crate tempdir;
|
||||
use tantivy::core::schema::*;
|
||||
use tantivy::core::writer::IndexWriter;
|
||||
use tantivy::core::collector::Collector;
|
||||
use tantivy::core::searcher::{Searcher, DocAddress};
|
||||
use tantivy::core::searcher::Searcher;
|
||||
use tantivy::core::directory::{Directory, generate_segment_name, SegmentId};
|
||||
use tantivy::core::reader::SegmentReader;
|
||||
use regex::Regex;
|
||||
|
||||
use tantivy::core::serial::DebugSegmentSerializer;
|
||||
|
||||
|
||||
// only make sense for a single segment
|
||||
@@ -49,7 +49,7 @@ fn test_indexing() {
|
||||
let text_fieldtype = FieldOptions::new().set_tokenized_indexed();
|
||||
let text_field = schema.add_field("text", &text_fieldtype);
|
||||
|
||||
let mut directory = Directory::create_from_tempdir(schema).unwrap();
|
||||
let directory = Directory::create_from_tempdir(schema).unwrap();
|
||||
|
||||
{
|
||||
// writing the segment
|
||||
@@ -70,16 +70,20 @@ fn test_indexing() {
|
||||
index_writer.add(doc);
|
||||
}
|
||||
|
||||
//let debug_serializer = DebugSegmentSerializer::new();
|
||||
//let segment_str_before_writing = DebugSegmentSerializer::debug_string(index_writer.current_segment_writer());
|
||||
let segment_str_before_writing = DebugSegmentSerializer::debug_string(index_writer.current_segment_writer());
|
||||
println!("{:?}", segment_str_before_writing);
|
||||
|
||||
|
||||
let commit_result = index_writer.commit();
|
||||
assert!(commit_result.is_ok());
|
||||
let segment = commit_result.unwrap();
|
||||
SegmentReader::open(segment).unwrap();
|
||||
let segment_reader = SegmentReader::open(segment).unwrap();
|
||||
// TODO ENABLE TEST
|
||||
//let segment_str_after_reading = DebugSegmentSerializer::debug_string(&segment_reader);
|
||||
//assert_eq!(segment_str_before_writing, segment_str_after_reading);
|
||||
|
||||
// let segment_str_after_reading = DebugSegmentSerializer::debug_string(&segment_reader);
|
||||
// assert_eq!(segment_str_before_writing, segment_str_after_reading);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -88,7 +92,7 @@ fn test_searcher() {
|
||||
let mut schema = Schema::new();
|
||||
let text_fieldtype = FieldOptions::new().set_tokenized_indexed();
|
||||
let text_field = schema.add_field("text", &text_fieldtype);
|
||||
let mut directory = Directory::create_from_tempdir(schema).unwrap();
|
||||
let directory = Directory::create_from_tempdir(schema).unwrap();
|
||||
|
||||
{
|
||||
// writing the segment
|
||||
|
||||
Reference in New Issue
Block a user