blop

2026-06-01 08:00:41 +00:00 · 2016-03-07 10:14:41 +09:00
parent 24f6108bc5
commit fbceebcce3
9 changed files with 127 additions and 128 deletions
--- a/src/core/codec.rs
+++ b/src/core/codec.rs
@@ -14,7 +14,7 @@ use core::store::StoreWriter;
 use core::serialize::BinarySerializable;
 use core::simdcompression;
 use core::schema::FieldValue;
-
+use core::convert_to_ioerror;

 #[derive(Debug)]
 pub struct TermInfo {
@@ -22,6 +22,7 @@ pub struct TermInfo {
    pub postings_offset: u32,
 }

+
 impl BinarySerializable for TermInfo {

    const SIZE: Size = Size::Constant(8);
@@ -89,9 +90,9 @@ impl SegmentSerializer<()> for SimpleSegmentSerializer {

    fn write_segment_info(&self, segment_info: &SegmentInfo) -> io::Result<()> {
        let mut write = try!(self.segment.open_write(SegmentComponent::INFO));
-        let json_data = json::encode(segment_info).unwrap();
-        write.write_all(json_data.as_bytes());
-        write.flush();
+        let json_data = try!(json::encode(segment_info).map_err(convert_to_ioerror));
+        try!(write.write_all(json_data.as_bytes()));
+        try!(write.flush());
        Ok(())
    }

--- a/src/core/directory.rs
+++ b/src/core/directory.rs
@@ -215,7 +215,7 @@ impl Directory for RAMDirectory {
    }
    fn open_write(&mut self, path: &Path) -> io::Result<WritePtr> {
        let full_path = PathBuf::from(&path);
-        let mut data = SharedVec::new();
+        let data = SharedVec::new();
        self.fs.insert(full_path, data.clone());
        Ok(Box::new(data))
    }
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -32,12 +32,6 @@ pub struct IndexMeta {
 }

 impl IndexMeta {
-    fn new() -> IndexMeta {
-        IndexMeta {
-            segments: Vec::new(),
-            schema: Schema::new(),
-        }
-    }
    fn with_schema(schema: Schema) -> IndexMeta {
        IndexMeta {
            segments: Vec::new(),
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -8,9 +8,19 @@ pub mod reader;
 pub mod codec;
 pub mod searcher;
 pub mod collector;
-pub mod skip;
 pub mod serialize;
 pub mod store;
 pub mod simdcompression;
 pub mod fstmap;
 pub mod index;
+
+
+use std::error;
+use std::io;
+
+pub fn convert_to_ioerror<E: 'static + error::Error + Send + Sync>(err: E) -> io::Error {
+    io::Error::new(
+        io::ErrorKind::InvalidData,
+        err
+    )
+}
--- a/src/core/postings.rs
+++ b/src/core/postings.rs
@@ -17,56 +17,6 @@ pub trait Postings: Iterator<Item=DocId> {
 // impl<T: Iterator<Item=DocId>> Postings for T {}


-#[derive(Debug)]
-pub struct VecPostings {
-    doc_ids: Vec<DocId>,
-	cursor: usize,
-}
-
-impl VecPostings {
-    pub fn new(vals: Vec<DocId>) -> VecPostings {
-        VecPostings {
-            doc_ids: vals,
-			cursor: 0,
-        }
-    }
-}
-
-impl Postings for VecPostings {
-    // after skipping position
-    // the iterator in such a way that the
-    // next call to next() will return a
-    // value greater or equal to target.
-    fn skip_next(&mut self, target: DocId) -> Option<DocId> {
-        loop {
-            match Iterator::next(self) {
-                Some(val) if val >= target => {
-                    return Some(val);
-                },
-                None => {
-                    return None;
-                },
-                _ => {}
-            }
-        }
-    }
-}
-
-impl Iterator for VecPostings {
-	type Item = DocId;
-	fn next(&mut self,) -> Option<DocId> {
-		if self.cursor >= self.doc_ids.len() {
-			None
-		}
-		else {
-            self.cursor += 1;
-			Some(self.doc_ids[self.cursor - 1])
-		}
-	}
-}
-
-
-


 pub struct IntersectionPostings<T: Postings> {
@@ -127,6 +77,55 @@ mod tests {
    use test::Bencher;
    use core::schema::DocId;

+
+    #[derive(Debug)]
+    pub struct VecPostings {
+        doc_ids: Vec<DocId>,
+    	cursor: usize,
+    }
+
+    impl VecPostings {
+        pub fn new(vals: Vec<DocId>) -> VecPostings {
+            VecPostings {
+                doc_ids: vals,
+    			cursor: 0,
+            }
+        }
+    }
+
+    impl Postings for VecPostings {
+        // after skipping position
+        // the iterator in such a way that the
+        // next call to next() will return a
+        // value greater or equal to target.
+        fn skip_next(&mut self, target: DocId) -> Option<DocId> {
+            loop {
+                match Iterator::next(self) {
+                    Some(val) if val >= target => {
+                        return Some(val);
+                    },
+                    None => {
+                        return None;
+                    },
+                    _ => {}
+                }
+            }
+        }
+    }
+
+    impl Iterator for VecPostings {
+    	type Item = DocId;
+    	fn next(&mut self,) -> Option<DocId> {
+    		if self.cursor >= self.doc_ids.len() {
+    			None
+    		}
+    		else {
+                self.cursor += 1;
+    			Some(self.doc_ids[self.cursor - 1])
+    		}
+    	}
+    }
+
    #[test]
    fn test_intersection() {
        {
--- a/src/core/reader.rs
+++ b/src/core/reader.rs
@@ -14,12 +14,11 @@ use std::io;
 use std::str;
 use core::codec::TermInfo;
 use core::fstmap::FstMap;
-use std::error;
 use rustc_serialize::json;
 use core::serial::SegmentSerializer;
 use core::serial::SerializableSegment;
-use std::str::Utf8Error;
 use core::index::SegmentInfo;
+use core::convert_to_ioerror;

 // TODO file structure should be in codec

@@ -99,14 +98,6 @@ impl Iterator for SegmentPostings {
 }


-fn convert_to_ioerror<E: 'static + error::Error + Send + Sync>(err: E) -> io::Error {
-    io::Error::new(
-        io::ErrorKind::InvalidData,
-        err
-    )
-}
-
-

 impl SegmentReader {

--- a/src/core/simdcompression.rs
+++ b/src/core/simdcompression.rs
@@ -4,12 +4,12 @@ use std::ptr;
 // use std::iter;

 extern {
-    fn encode_unsorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t;
-    fn decode_unsorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
-
+    // fn encode_unsorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t;
+    // fn decode_unsorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
+    // fn intersection_native(left_data: *const u32, left_size: size_t, right_data: *const u32, right_size: size_t, output: *mut u32) -> size_t;
    fn encode_sorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t;
    fn decode_sorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
-    fn intersection_native(left_data: *const u32, left_size: size_t, right_data: *const u32, right_size: size_t, output: *mut u32) -> size_t;
+
 }

 pub struct Encoder {
@@ -48,26 +48,26 @@ impl Encoder {
    }


-    pub fn encode_unsorted(&mut self, input: &[u32]) -> &[u32] {
-        self.input_buffer.clear();
-        let input_len = input.len();
-        if input_len + 10000 >= self.input_buffer.len() {
-            let target_length = input_len + 1024;
-            self.input_buffer.resize(target_length, 0);
-            self.output_buffer.resize(target_length, 0);
-        }
-        // TODO use clone_from when available
-        unsafe {
-            ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
-            let written_size = encode_unsorted_native(
-                self.input_buffer.as_mut_ptr(),
-                input_len as size_t,
-                self.output_buffer.as_mut_ptr(),
-                self.output_buffer.len() as size_t,
-            );
-            return &self.output_buffer[0..written_size];
-        }
-    }
+    // pub fn encode_unsorted(&mut self, input: &[u32]) -> &[u32] {
+    //     self.input_buffer.clear();
+    //     let input_len = input.len();
+    //     if input_len + 10000 >= self.input_buffer.len() {
+    //         let target_length = input_len + 1024;
+    //         self.input_buffer.resize(target_length, 0);
+    //         self.output_buffer.resize(target_length, 0);
+    //     }
+    //     // TODO use clone_from when available
+    //     unsafe {
+    //         ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
+    //         let written_size = encode_unsorted_native(
+    //             self.input_buffer.as_mut_ptr(),
+    //             input_len as size_t,
+    //             self.output_buffer.as_mut_ptr(),
+    //             self.output_buffer.len() as size_t,
+    //         );
+    //         return &self.output_buffer[0..written_size];
+    //     }
+    // }
 }


@@ -92,17 +92,17 @@ impl Decoder {
        }
    }

-    pub fn decode_unsorted(&self,
-                  compressed_data: &[u32],
-                  uncompressed_values: &mut [u32]) -> size_t {
-        unsafe {
-            return decode_unsorted_native(
-                        compressed_data.as_ptr(),
-                        compressed_data.len() as size_t,
-                        uncompressed_values.as_mut_ptr(),
-                        uncompressed_values.len() as size_t);
-        }
-    }
+    // pub fn decode_unsorted(&self,
+    //               compressed_data: &[u32],
+    //               uncompressed_values: &mut [u32]) -> size_t {
+    //     unsafe {
+    //         return decode_unsorted_native(
+    //                     compressed_data.as_ptr(),
+    //                     compressed_data.len() as size_t,
+    //                     uncompressed_values.as_mut_ptr(),
+    //                     uncompressed_values.len() as size_t);
+    //     }
+    // }
 }


@@ -176,22 +176,22 @@ mod tests {
        assert_eq!(decoded_data, input);
    }

-    #[test]
-    fn test_encode_unsorted() {
-        let mut encoder = Encoder::new();
-        let num_ints = 10_000 as usize;
-        let expected_length = 4361;
-        let input: Vec<u32> = (0..num_ints as u32)
-            .map(|i| i * 213_127 % 501)
-            .into_iter().collect();
-        assert_eq!(input.len(), 10_000);
-        let encoded_data = encoder.encode_unsorted(&input);
-        assert_eq!(encoded_data.len(), expected_length);
-        let decoder = Decoder::new();
-        let mut decoded_data: Vec<u32> = (0..num_ints as u32).collect();
-        assert_eq!(num_ints, decoder.decode_unsorted(&encoded_data[..], &mut decoded_data));
-        assert_eq!(decoded_data, input);
-    }
+    // #[test]
+    // fn test_encode_unsorted() {
+    //     let mut encoder = Encoder::new();
+    //     let num_ints = 10_000 as usize;
+    //     let expected_length = 4361;
+    //     let input: Vec<u32> = (0..num_ints as u32)
+    //         .map(|i| i * 213_127 % 501)
+    //         .into_iter().collect();
+    //     assert_eq!(input.len(), 10_000);
+    //     let encoded_data = encoder.encode_unsorted(&input);
+    //     assert_eq!(encoded_data.len(), expected_length);
+    //     let decoder = Decoder::new();
+    //     let mut decoded_data: Vec<u32> = (0..num_ints as u32).collect();
+    //     assert_eq!(num_ints, decoder.decode_unsorted(&encoded_data[..], &mut decoded_data));
+    //     assert_eq!(decoded_data, input);
+    // }
    //
    // #[test]
    // fn test_simd_intersection() {
--- a/src/core/store.rs
+++ b/src/core/store.rs
@@ -139,7 +139,7 @@ impl StoreReader {
        return offset;
    }

-    fn read_block(&self, block_offset: usize) {
+    fn read_block(&self, block_offset: usize) -> io::Result<()> {
        let mut current_block_mut = self.current_block.borrow_mut();
        current_block_mut.clear();
        let total_buffer = self.data.as_slice();
@@ -147,21 +147,21 @@ impl StoreReader {
        let block_length = u32::deserialize(&mut cursor).unwrap();
        let block_array: &[u8] = &total_buffer[(block_offset + 4 as usize)..(block_offset + 4 + block_length as usize)];
        let mut lz4_decoder = lz4::Decoder::new(Cursor::new(block_array)).unwrap();
-        lz4_decoder.read_to_end(&mut current_block_mut);
+        lz4_decoder.read_to_end(&mut current_block_mut).map(|_| ())
    }

    pub fn get(&self, doc_id: &DocId) -> io::Result<Document> {
        let OffsetIndex(first_doc_id, block_offset) = self.block_offset(doc_id);
-        self.read_block(block_offset as usize);
+        try!(self.read_block(block_offset as usize));
        let mut current_block_mut = self.current_block.borrow_mut();
        let mut cursor = Cursor::new(&mut current_block_mut[..]);
        for _ in first_doc_id..*doc_id  {
            let block_length = try!(u32::deserialize(&mut cursor));
-            cursor.seek(SeekFrom::Current(block_length as i64));
+            try!(cursor.seek(SeekFrom::Current(block_length as i64)));
        }
        try!(u32::deserialize(&mut cursor));
        let mut field_values = Vec::new();
-        let num_fields = u32::deserialize(&mut cursor).unwrap();
+        let num_fields = try!(u32::deserialize(&mut cursor));
        for _ in 0..num_fields {
            let field_value = try!(FieldValue::deserialize(&mut cursor));
            field_values.push(field_value);
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -35,6 +35,10 @@ pub use core::schema::Document;
 pub use core::collector;
 pub use core::reader::SegmentReader;

+
+
+
+
 #[cfg(test)]
 mod tests {