This commit is contained in:
Paul Masurel
2016-03-07 10:14:41 +09:00
parent 24f6108bc5
commit fbceebcce3
9 changed files with 127 additions and 128 deletions

View File

@@ -14,7 +14,7 @@ use core::store::StoreWriter;
use core::serialize::BinarySerializable;
use core::simdcompression;
use core::schema::FieldValue;
use core::convert_to_ioerror;
#[derive(Debug)]
pub struct TermInfo {
@@ -22,6 +22,7 @@ pub struct TermInfo {
pub postings_offset: u32,
}
impl BinarySerializable for TermInfo {
const SIZE: Size = Size::Constant(8);
@@ -89,9 +90,9 @@ impl SegmentSerializer<()> for SimpleSegmentSerializer {
fn write_segment_info(&self, segment_info: &SegmentInfo) -> io::Result<()> {
let mut write = try!(self.segment.open_write(SegmentComponent::INFO));
let json_data = json::encode(segment_info).unwrap();
write.write_all(json_data.as_bytes());
write.flush();
let json_data = try!(json::encode(segment_info).map_err(convert_to_ioerror));
try!(write.write_all(json_data.as_bytes()));
try!(write.flush());
Ok(())
}

View File

@@ -215,7 +215,7 @@ impl Directory for RAMDirectory {
}
fn open_write(&mut self, path: &Path) -> io::Result<WritePtr> {
let full_path = PathBuf::from(&path);
let mut data = SharedVec::new();
let data = SharedVec::new();
self.fs.insert(full_path, data.clone());
Ok(Box::new(data))
}

View File

@@ -32,12 +32,6 @@ pub struct IndexMeta {
}
impl IndexMeta {
fn new() -> IndexMeta {
IndexMeta {
segments: Vec::new(),
schema: Schema::new(),
}
}
fn with_schema(schema: Schema) -> IndexMeta {
IndexMeta {
segments: Vec::new(),

View File

@@ -8,9 +8,19 @@ pub mod reader;
pub mod codec;
pub mod searcher;
pub mod collector;
pub mod skip;
pub mod serialize;
pub mod store;
pub mod simdcompression;
pub mod fstmap;
pub mod index;
use std::error;
use std::io;
pub fn convert_to_ioerror<E: 'static + error::Error + Send + Sync>(err: E) -> io::Error {
io::Error::new(
io::ErrorKind::InvalidData,
err
)
}

View File

@@ -17,56 +17,6 @@ pub trait Postings: Iterator<Item=DocId> {
// impl<T: Iterator<Item=DocId>> Postings for T {}
#[derive(Debug)]
pub struct VecPostings {
doc_ids: Vec<DocId>,
cursor: usize,
}
impl VecPostings {
pub fn new(vals: Vec<DocId>) -> VecPostings {
VecPostings {
doc_ids: vals,
cursor: 0,
}
}
}
impl Postings for VecPostings {
// after skipping position
// the iterator in such a way that the
// next call to next() will return a
// value greater or equal to target.
fn skip_next(&mut self, target: DocId) -> Option<DocId> {
loop {
match Iterator::next(self) {
Some(val) if val >= target => {
return Some(val);
},
None => {
return None;
},
_ => {}
}
}
}
}
impl Iterator for VecPostings {
type Item = DocId;
fn next(&mut self,) -> Option<DocId> {
if self.cursor >= self.doc_ids.len() {
None
}
else {
self.cursor += 1;
Some(self.doc_ids[self.cursor - 1])
}
}
}
pub struct IntersectionPostings<T: Postings> {
@@ -127,6 +77,55 @@ mod tests {
use test::Bencher;
use core::schema::DocId;
#[derive(Debug)]
pub struct VecPostings {
doc_ids: Vec<DocId>,
cursor: usize,
}
impl VecPostings {
pub fn new(vals: Vec<DocId>) -> VecPostings {
VecPostings {
doc_ids: vals,
cursor: 0,
}
}
}
impl Postings for VecPostings {
// after skipping position
// the iterator in such a way that the
// next call to next() will return a
// value greater or equal to target.
fn skip_next(&mut self, target: DocId) -> Option<DocId> {
loop {
match Iterator::next(self) {
Some(val) if val >= target => {
return Some(val);
},
None => {
return None;
},
_ => {}
}
}
}
}
impl Iterator for VecPostings {
type Item = DocId;
fn next(&mut self,) -> Option<DocId> {
if self.cursor >= self.doc_ids.len() {
None
}
else {
self.cursor += 1;
Some(self.doc_ids[self.cursor - 1])
}
}
}
#[test]
fn test_intersection() {
{

View File

@@ -14,12 +14,11 @@ use std::io;
use std::str;
use core::codec::TermInfo;
use core::fstmap::FstMap;
use std::error;
use rustc_serialize::json;
use core::serial::SegmentSerializer;
use core::serial::SerializableSegment;
use std::str::Utf8Error;
use core::index::SegmentInfo;
use core::convert_to_ioerror;
// TODO file structure should be in codec
@@ -99,14 +98,6 @@ impl Iterator for SegmentPostings {
}
fn convert_to_ioerror<E: 'static + error::Error + Send + Sync>(err: E) -> io::Error {
io::Error::new(
io::ErrorKind::InvalidData,
err
)
}
impl SegmentReader {

View File

@@ -4,12 +4,12 @@ use std::ptr;
// use std::iter;
extern {
fn encode_unsorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t;
fn decode_unsorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
// fn encode_unsorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t;
// fn decode_unsorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
// fn intersection_native(left_data: *const u32, left_size: size_t, right_data: *const u32, right_size: size_t, output: *mut u32) -> size_t;
fn encode_sorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t;
fn decode_sorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
fn intersection_native(left_data: *const u32, left_size: size_t, right_data: *const u32, right_size: size_t, output: *mut u32) -> size_t;
}
pub struct Encoder {
@@ -48,26 +48,26 @@ impl Encoder {
}
pub fn encode_unsorted(&mut self, input: &[u32]) -> &[u32] {
self.input_buffer.clear();
let input_len = input.len();
if input_len + 10000 >= self.input_buffer.len() {
let target_length = input_len + 1024;
self.input_buffer.resize(target_length, 0);
self.output_buffer.resize(target_length, 0);
}
// TODO use clone_from when available
unsafe {
ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
let written_size = encode_unsorted_native(
self.input_buffer.as_mut_ptr(),
input_len as size_t,
self.output_buffer.as_mut_ptr(),
self.output_buffer.len() as size_t,
);
return &self.output_buffer[0..written_size];
}
}
// pub fn encode_unsorted(&mut self, input: &[u32]) -> &[u32] {
// self.input_buffer.clear();
// let input_len = input.len();
// if input_len + 10000 >= self.input_buffer.len() {
// let target_length = input_len + 1024;
// self.input_buffer.resize(target_length, 0);
// self.output_buffer.resize(target_length, 0);
// }
// // TODO use clone_from when available
// unsafe {
// ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
// let written_size = encode_unsorted_native(
// self.input_buffer.as_mut_ptr(),
// input_len as size_t,
// self.output_buffer.as_mut_ptr(),
// self.output_buffer.len() as size_t,
// );
// return &self.output_buffer[0..written_size];
// }
// }
}
@@ -92,17 +92,17 @@ impl Decoder {
}
}
pub fn decode_unsorted(&self,
compressed_data: &[u32],
uncompressed_values: &mut [u32]) -> size_t {
unsafe {
return decode_unsorted_native(
compressed_data.as_ptr(),
compressed_data.len() as size_t,
uncompressed_values.as_mut_ptr(),
uncompressed_values.len() as size_t);
}
}
// pub fn decode_unsorted(&self,
// compressed_data: &[u32],
// uncompressed_values: &mut [u32]) -> size_t {
// unsafe {
// return decode_unsorted_native(
// compressed_data.as_ptr(),
// compressed_data.len() as size_t,
// uncompressed_values.as_mut_ptr(),
// uncompressed_values.len() as size_t);
// }
// }
}
@@ -176,22 +176,22 @@ mod tests {
assert_eq!(decoded_data, input);
}
#[test]
fn test_encode_unsorted() {
let mut encoder = Encoder::new();
let num_ints = 10_000 as usize;
let expected_length = 4361;
let input: Vec<u32> = (0..num_ints as u32)
.map(|i| i * 213_127 % 501)
.into_iter().collect();
assert_eq!(input.len(), 10_000);
let encoded_data = encoder.encode_unsorted(&input);
assert_eq!(encoded_data.len(), expected_length);
let decoder = Decoder::new();
let mut decoded_data: Vec<u32> = (0..num_ints as u32).collect();
assert_eq!(num_ints, decoder.decode_unsorted(&encoded_data[..], &mut decoded_data));
assert_eq!(decoded_data, input);
}
// #[test]
// fn test_encode_unsorted() {
// let mut encoder = Encoder::new();
// let num_ints = 10_000 as usize;
// let expected_length = 4361;
// let input: Vec<u32> = (0..num_ints as u32)
// .map(|i| i * 213_127 % 501)
// .into_iter().collect();
// assert_eq!(input.len(), 10_000);
// let encoded_data = encoder.encode_unsorted(&input);
// assert_eq!(encoded_data.len(), expected_length);
// let decoder = Decoder::new();
// let mut decoded_data: Vec<u32> = (0..num_ints as u32).collect();
// assert_eq!(num_ints, decoder.decode_unsorted(&encoded_data[..], &mut decoded_data));
// assert_eq!(decoded_data, input);
// }
//
// #[test]
// fn test_simd_intersection() {

View File

@@ -139,7 +139,7 @@ impl StoreReader {
return offset;
}
fn read_block(&self, block_offset: usize) {
fn read_block(&self, block_offset: usize) -> io::Result<()> {
let mut current_block_mut = self.current_block.borrow_mut();
current_block_mut.clear();
let total_buffer = self.data.as_slice();
@@ -147,21 +147,21 @@ impl StoreReader {
let block_length = u32::deserialize(&mut cursor).unwrap();
let block_array: &[u8] = &total_buffer[(block_offset + 4 as usize)..(block_offset + 4 + block_length as usize)];
let mut lz4_decoder = lz4::Decoder::new(Cursor::new(block_array)).unwrap();
lz4_decoder.read_to_end(&mut current_block_mut);
lz4_decoder.read_to_end(&mut current_block_mut).map(|_| ())
}
pub fn get(&self, doc_id: &DocId) -> io::Result<Document> {
let OffsetIndex(first_doc_id, block_offset) = self.block_offset(doc_id);
self.read_block(block_offset as usize);
try!(self.read_block(block_offset as usize));
let mut current_block_mut = self.current_block.borrow_mut();
let mut cursor = Cursor::new(&mut current_block_mut[..]);
for _ in first_doc_id..*doc_id {
let block_length = try!(u32::deserialize(&mut cursor));
cursor.seek(SeekFrom::Current(block_length as i64));
try!(cursor.seek(SeekFrom::Current(block_length as i64)));
}
try!(u32::deserialize(&mut cursor));
let mut field_values = Vec::new();
let num_fields = u32::deserialize(&mut cursor).unwrap();
let num_fields = try!(u32::deserialize(&mut cursor));
for _ in 0..num_fields {
let field_value = try!(FieldValue::deserialize(&mut cursor));
field_values.push(field_value);

View File

@@ -35,6 +35,10 @@ pub use core::schema::Document;
pub use core::collector;
pub use core::reader::SegmentReader;
#[cfg(test)]
mod tests {