mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-18 09:10:41 +00:00
blop
This commit is contained in:
@@ -13,6 +13,7 @@ use core::serialize::BinarySerializable;
|
||||
use core::simdcompression;
|
||||
use core::schema::TextFieldValue;
|
||||
use core::convert_to_ioerror;
|
||||
use core::fastfield::FastFieldWriters;
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -53,7 +54,6 @@ pub struct SegmentSerializer {
|
||||
|
||||
impl SegmentSerializer {
|
||||
|
||||
|
||||
pub fn for_segment(segment: &Segment) -> io::Result<SegmentSerializer> {
|
||||
let term_write = try!(segment.open_write(SegmentComponent::TERMS));
|
||||
let postings_write = try!(segment.open_write(SegmentComponent::POSTINGS));
|
||||
@@ -89,6 +89,10 @@ impl SegmentSerializer {
|
||||
.insert(term.as_slice(), &term_info)
|
||||
}
|
||||
|
||||
pub fn write_fast_field(&mut self, vals: &Vec<u32>) {
|
||||
|
||||
}
|
||||
|
||||
pub fn write_docs(&mut self, doc_ids: &[DocId]) -> io::Result<()> {
|
||||
// TODO write_all transmuted [u8]
|
||||
let docs_data = self.encoder.encode_sorted(doc_ids);
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
use std::io::Write;
|
||||
use std::io;
|
||||
use std::io::Cursor;
|
||||
use core::directory::WritePtr;
|
||||
use core::serialize::BinarySerializable;
|
||||
use core::directory::ReadOnlySource;
|
||||
use core::schema::DocId;
|
||||
use core::schema::Schema;
|
||||
use core::schema::Document;
|
||||
use std::ops::Deref;
|
||||
use core::fastdivide::count_leading_zeros;
|
||||
use core::fastdivide::DividerU32;
|
||||
@@ -14,12 +16,13 @@ pub fn compute_num_bits(amplitude: u32) -> u8 {
|
||||
32 - count_leading_zeros(amplitude)
|
||||
}
|
||||
|
||||
fn serialize_packed_ints<I: Iterator<Item=u32>>(vals_it: I, num_bits: u8, write: &mut Write) -> io::Result<()> {
|
||||
fn serialize_packed_ints<I: Iterator<Item=u32>>(vals_it: I, num_bits: u8, write: &mut Write) -> io::Result<usize> {
|
||||
let mut mini_buffer_written = 0;
|
||||
let mut mini_buffer = 0u64;
|
||||
let mut written_size = 0;
|
||||
for val in vals_it {
|
||||
if mini_buffer_written + num_bits > 64 {
|
||||
try!(mini_buffer.serialize(write));
|
||||
written_size += try!(mini_buffer.serialize(write));
|
||||
mini_buffer = 0;
|
||||
mini_buffer_written = 0;
|
||||
}
|
||||
@@ -27,33 +30,64 @@ fn serialize_packed_ints<I: Iterator<Item=u32>>(vals_it: I, num_bits: u8, write:
|
||||
mini_buffer_written += num_bits;
|
||||
}
|
||||
if mini_buffer_written > 0 {
|
||||
try!(mini_buffer.serialize(write));
|
||||
written_size += try!(mini_buffer.serialize(write));
|
||||
}
|
||||
Ok(())
|
||||
Ok(written_size)
|
||||
}
|
||||
|
||||
pub struct FastFieldWriters {
|
||||
u32_fast_fields: Vec<U32Field>,
|
||||
u32_fast_field_writers: Vec<U32FastFieldWriter>,
|
||||
write: WritePtr,
|
||||
}
|
||||
|
||||
impl FastFieldWriters {
|
||||
pub fn from_schema(schema: &Schema) -> FastFieldWriters {
|
||||
let u32_fast_fields: Vec<U32Field> = schema
|
||||
.get_u32_fields()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|&(i, u32_field_entry)| u32_field_entry.option.is_fast())
|
||||
.map(|(i, u32_field_entry)| U32Field(i as u8))
|
||||
.collect();
|
||||
let num_32_fast_fields = u32_fast_fields.len();
|
||||
pub fn with_num_fields(write: WritePtr,) -> FastFieldWriters {
|
||||
FastFieldWriters {
|
||||
u32_fast_fields: u32_fast_fields,
|
||||
u32_fast_field_writers: (0..num_32_fast_fields)
|
||||
.map(|_| U32FastFieldWriter::new())
|
||||
.collect()
|
||||
write: write
|
||||
}
|
||||
}
|
||||
//
|
||||
//
|
||||
// pub fn write_fast_field(&mut self, vals: &Vec<u32>) -> io::Result<()> {
|
||||
// let u32_fast_field_writer = U32FastFieldWriter::new();
|
||||
// for val in vals {
|
||||
// u32_fast_field_writer.add(*val);
|
||||
// }
|
||||
// self.u32_fast_field_writers.finish();
|
||||
// }
|
||||
// pub fn from_schema(schema: &Schema) -> FastFieldWriters {
|
||||
// let u32_fast_fields: Vec<U32Field> = schema
|
||||
// .get_u32_fields()
|
||||
// .iter()
|
||||
// .enumerate()
|
||||
// .filter(|&(_, u32_field_entry)| u32_field_entry.option.is_fast())
|
||||
// .map(|(i, _)| U32Field(i as u8))
|
||||
// .collect();
|
||||
// let num_32_fast_fields = u32_fast_fields.len();
|
||||
// FastFieldWriters {
|
||||
// u32_fast_fields: u32_fast_fields,
|
||||
// u32_fast_field_writers: (0..num_32_fast_fields)
|
||||
// .map(|_| U32FastFieldWriter::new())
|
||||
// .collect()
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
|
||||
|
||||
// pub fn add_doc(&mut self, doc: &Document) -> io::Result<()> {
|
||||
// for (field, field_writer) in self.u32_fast_fields.iter().zip(self.u32_fast_field_writers.iter_mut()) {
|
||||
// let some_val = doc.get_u32(field);
|
||||
// match some_val {
|
||||
// Some(v) => {
|
||||
// field_writer.add(v);
|
||||
// }
|
||||
// None => {
|
||||
// return Err(io::Error::new(io::ErrorKind::InvalidData, "u32 fast field missing"));
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
}
|
||||
|
||||
|
||||
@@ -73,22 +107,24 @@ impl U32FastFieldWriter {
|
||||
self.vals.push(val);
|
||||
}
|
||||
|
||||
pub fn close(&self, write: &mut Write) -> io::Result<()> {
|
||||
pub fn close(&self, write: &mut Write) -> io::Result<usize> {
|
||||
if self.vals.is_empty() {
|
||||
return Ok(())
|
||||
return Ok((0))
|
||||
}
|
||||
let mut written_size = 0;
|
||||
let min = self.vals.iter().min().unwrap();
|
||||
let max = self.vals.iter().max().unwrap();
|
||||
try!(min.serialize(write));
|
||||
written_size += try!(min.serialize(write));
|
||||
let amplitude: u32 = max - min;
|
||||
let num_bits: u8 = compute_num_bits(amplitude);
|
||||
try!(num_bits.serialize(write));
|
||||
written_size += try!(num_bits.serialize(write));
|
||||
let vals_it = self.vals.iter().map(|i| i-min);
|
||||
serialize_packed_ints(vals_it, num_bits, write)
|
||||
written_size += try!(serialize_packed_ints(vals_it, num_bits, write));
|
||||
Ok(written_size)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IntFastFieldReader {
|
||||
pub struct U32FastFieldReader {
|
||||
_data: ReadOnlySource,
|
||||
data_ptr: *const u64,
|
||||
min_val: u32,
|
||||
@@ -98,15 +134,15 @@ pub struct IntFastFieldReader {
|
||||
divider: DividerU32,
|
||||
}
|
||||
|
||||
impl IntFastFieldReader {
|
||||
pub fn open(data: &ReadOnlySource) -> io::Result<IntFastFieldReader> {
|
||||
impl U32FastFieldReader {
|
||||
pub fn open(data: &ReadOnlySource) -> io::Result<U32FastFieldReader> {
|
||||
let mut cursor: Cursor<&[u8]> = Cursor::new(&*data);
|
||||
let min_val = try!(u32::deserialize(&mut cursor));
|
||||
let num_bits = try!(u8::deserialize(&mut cursor));
|
||||
let mask = (1 << num_bits) - 1;
|
||||
let num_in_pack = 64u32 / (num_bits as u32);
|
||||
let ptr: *const u8 = &(data.deref()[5]);
|
||||
Ok(IntFastFieldReader {
|
||||
Ok(U32FastFieldReader {
|
||||
_data: data.slice(5, data.len()),
|
||||
data_ptr: ptr as *const u64,
|
||||
min_val: min_val,
|
||||
@@ -132,7 +168,7 @@ mod tests {
|
||||
|
||||
use super::compute_num_bits;
|
||||
use super::U32FastFieldWriter;
|
||||
use super::IntFastFieldReader;
|
||||
use super::U32FastFieldReader;
|
||||
use core::directory::ReadOnlySource;
|
||||
use test::Bencher;
|
||||
use test;
|
||||
@@ -165,7 +201,7 @@ mod tests {
|
||||
}
|
||||
{
|
||||
let source = ReadOnlySource::Anonymous(buffer);
|
||||
let fast_field_reader = IntFastFieldReader::open(&source).unwrap();
|
||||
let fast_field_reader = U32FastFieldReader::open(&source).unwrap();
|
||||
assert_eq!(fast_field_reader.get(0), 4u32);
|
||||
assert_eq!(fast_field_reader.get(1), 14u32);
|
||||
assert_eq!(fast_field_reader.get(2), 2u32);
|
||||
@@ -186,7 +222,7 @@ mod tests {
|
||||
}
|
||||
{
|
||||
let source = ReadOnlySource::Anonymous(buffer);
|
||||
let fast_field_reader = IntFastFieldReader::open(&source).unwrap();
|
||||
let fast_field_reader = U32FastFieldReader::open(&source).unwrap();
|
||||
assert_eq!(fast_field_reader.get(0), 4u32);
|
||||
assert_eq!(fast_field_reader.get(1), 14_082_001u32);
|
||||
assert_eq!(fast_field_reader.get(2), 3_052u32);
|
||||
@@ -213,7 +249,7 @@ mod tests {
|
||||
int_fast_field_writer.close(&mut buffer).unwrap();
|
||||
}
|
||||
let source = ReadOnlySource::Anonymous(buffer);
|
||||
let int_fast_field_reader = IntFastFieldReader::open(&source).unwrap();
|
||||
let int_fast_field_reader = U32FastFieldReader::open(&source).unwrap();
|
||||
|
||||
let n = test::black_box(100);
|
||||
let mut a = 0u32;
|
||||
@@ -261,7 +297,7 @@ mod tests {
|
||||
int_fast_field_writer.close(&mut buffer).unwrap();
|
||||
}
|
||||
let source = ReadOnlySource::Anonymous(buffer);
|
||||
let int_fast_field_reader = IntFastFieldReader::open(&source).unwrap();
|
||||
let int_fast_field_reader = U32FastFieldReader::open(&source).unwrap();
|
||||
b.iter(|| {
|
||||
let n = test::black_box(7000u32);
|
||||
let mut a = 0u32;
|
||||
@@ -284,7 +320,7 @@ mod tests {
|
||||
int_fast_field_writer.close(&mut buffer).unwrap();
|
||||
}
|
||||
let source = ReadOnlySource::Anonymous(buffer);
|
||||
let int_fast_field_reader = IntFastFieldReader::open(&source).unwrap();
|
||||
let int_fast_field_reader = U32FastFieldReader::open(&source).unwrap();
|
||||
b.iter(|| {
|
||||
let n = test::black_box(1000);
|
||||
let mut a = 0u32;
|
||||
|
||||
@@ -4,7 +4,6 @@ use std::slice;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::io::Read;
|
||||
use std::str;
|
||||
use core::serialize::BinarySerializable;
|
||||
use rustc_serialize::Decodable;
|
||||
use rustc_serialize::Encodable;
|
||||
@@ -370,7 +369,7 @@ impl Term {
|
||||
let U32Field(field_idx) = *field;
|
||||
buffer.clear();
|
||||
buffer.push(128 | field_idx);
|
||||
val.serialize(&mut buffer);
|
||||
val.serialize(&mut buffer).unwrap();
|
||||
Term {
|
||||
data: buffer,
|
||||
}
|
||||
@@ -465,6 +464,15 @@ impl Document {
|
||||
self.u32_field_values.iter()
|
||||
}
|
||||
|
||||
pub fn get_u32(&self, field: &U32Field) -> Option<u32> {
|
||||
self.u32_field_values
|
||||
.iter()
|
||||
.filter(|field_value| field_value.field == *field)
|
||||
.map(|field_value| &field_value.value)
|
||||
.cloned()
|
||||
.next()
|
||||
}
|
||||
|
||||
pub fn get_texts<'a>(&'a self, field: &TextField) -> Vec<&'a String> {
|
||||
self.text_field_values
|
||||
.iter()
|
||||
|
||||
@@ -61,7 +61,6 @@ pub struct SegmentWriter {
|
||||
max_doc: DocId,
|
||||
tokenizer: SimpleTokenizer,
|
||||
postings_writer: PostingsWriter,
|
||||
fastfield_writers: FastFieldWriters,
|
||||
segment_serializer: SegmentSerializer,
|
||||
}
|
||||
|
||||
@@ -98,7 +97,6 @@ impl SegmentWriter {
|
||||
postings_writer: PostingsWriter::new(),
|
||||
segment_serializer: segment_serializer,
|
||||
tokenizer: SimpleTokenizer::new(),
|
||||
fastfield_writers: FastFieldWriters::from_schema(schema),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -129,6 +127,7 @@ impl SegmentWriter {
|
||||
let mut stored_fieldvalues_it = doc.text_fields().filter(|text_field_value| {
|
||||
schema.text_field_options(&text_field_value.field).is_stored()
|
||||
});
|
||||
// try!(self.fastfield_writers.add_doc(&doc));
|
||||
try!(self.segment_serializer.store_doc(&mut stored_fieldvalues_it));
|
||||
self.max_doc += 1;
|
||||
Ok(())
|
||||
|
||||
Reference in New Issue
Block a user