From 0b899e00291d3051658f3c8cef0374ea8e35ab19 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sun, 27 Mar 2016 00:21:28 +0900 Subject: [PATCH] Added fast field reader in the segment reader. --- src/core/fastfield.rs | 38 +++++++++++++++++++------------------- src/core/reader.rs | 30 ++++++++++++++++++------------ 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/core/fastfield.rs b/src/core/fastfield.rs index 3a956d7b3..272d947f7 100644 --- a/src/core/fastfield.rs +++ b/src/core/fastfield.rs @@ -217,17 +217,21 @@ impl U32FastFieldReader { } } -pub struct U32FastFieldReaders { +pub struct U32FastFieldsReader { source: ReadOnlySource, field_offsets: HashMap, } -impl U32FastFieldReaders { - pub fn open(source: &ReadOnlySource) -> io::Result { - let mut cursor = source.cursor(); - let header_offset = try!(u32::deserialize(&mut cursor)); - try!(cursor.seek(SeekFrom::Start(header_offset as u64))); - let field_offsets: Vec<(U32Field, u32)> = try!(Vec::deserialize(&mut cursor)); +impl U32FastFieldsReader { + pub fn open(source: ReadOnlySource) -> io::Result { + let header_offset; + let field_offsets: Vec<(U32Field, u32)>; + { + let mut cursor = source.cursor(); + header_offset = try!(u32::deserialize(&mut cursor)); + try!(cursor.seek(SeekFrom::Start(header_offset as u64))); + field_offsets = try!(Vec::deserialize(&mut cursor)); + } let mut end_offsets: Vec = field_offsets .iter() .map(|&(_, offset)| offset.clone()) @@ -239,9 +243,9 @@ impl U32FastFieldReaders { let (field, start_offset) = field_start_offsets.clone(); field_offsets_map.insert(field.clone(), (start_offset.clone(), stop_offset.clone())); } - Ok(U32FastFieldReaders { + Ok(U32FastFieldsReader { field_offsets: field_offsets_map, - source: (*source).clone(), + source: source, }) } @@ -264,20 +268,16 @@ impl U32FastFieldReaders { mod tests { use super::compute_num_bits; - // use super::U32FastFieldWriter; - // use super::U32FastFieldReader; - use super::U32FastFieldReaders; + use super::U32FastFieldsReader; use super::U32FastFieldWriters; use core::schema::U32Field; use std::path::Path; use core::directory::WritePtr; use core::directory::Directory; use core::schema::Document; - // use core::directory::MmapDirectory; use core::directory::RAMDirectory; use core::schema::Schema; use core::schema::FAST_U32; - // use core::directory::ReadOnlySource; use core::fastfield::FastFieldSerializer; use test::Bencher; use test; @@ -323,7 +323,7 @@ mod tests { assert_eq!(source.len(), 29 as usize); } { - let fast_field_readers = U32FastFieldReaders::open(&source).unwrap(); + let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); let fast_field_reader = fast_field_readers.get_field(&field).unwrap(); assert_eq!(fast_field_reader.get(0), 13u32); assert_eq!(fast_field_reader.get(1), 14u32); @@ -357,7 +357,7 @@ mod tests { assert_eq!(source.len(), 61 as usize); } { - let fast_field_readers = U32FastFieldReaders::open(&source).unwrap(); + let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); let fast_field_reader = fast_field_readers.get_field(&field).unwrap(); assert_eq!(fast_field_reader.get(0), 4u32); assert_eq!(fast_field_reader.get(1), 14_082_001u32); @@ -398,7 +398,7 @@ mod tests { } let source = directory.open_read(&path).unwrap(); { - let fast_field_readers = U32FastFieldReaders::open(&source).unwrap(); + let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); let fast_field_reader = fast_field_readers.get_field(&field).unwrap(); let mut a = 0u32; for _ in 0..n { @@ -452,7 +452,7 @@ mod tests { } let source = directory.open_read(&path).unwrap(); { - let fast_field_readers = U32FastFieldReaders::open(&source).unwrap(); + let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); let fast_field_reader = fast_field_readers.get_field(&field).unwrap(); b.iter(|| { let n = test::black_box(7000u32); @@ -483,7 +483,7 @@ mod tests { } let source = directory.open_read(&path).unwrap(); { - let fast_field_readers = U32FastFieldReaders::open(&source).unwrap(); + let fast_field_readers = U32FastFieldsReader::open(source).unwrap(); let fast_field_reader = fast_field_readers.get_field(&field).unwrap(); b.iter(|| { let n = test::black_box(1000u32); diff --git a/src/core/reader.rs b/src/core/reader.rs index f83517f33..108603da4 100644 --- a/src/core/reader.rs +++ b/src/core/reader.rs @@ -18,19 +18,11 @@ use rustc_serialize::json; use core::codec::SegmentSerializer; use core::index::SerializableSegment; use core::index::SegmentInfo; +use core::schema::U32Field; use core::convert_to_ioerror; use core::serialize::BinarySerializable; - -// TODO file structure should be in codec - -pub struct SegmentReader { - segment_info: SegmentInfo, - segment_id: SegmentId, - term_offsets: FstMap, - postings_data: ReadOnlySource, - store_reader: StoreReader, -} - +use core::fastfield::U32FastFieldsReader; +use core::fastfield::U32FastFieldReader; impl fmt::Debug for SegmentReader { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -100,6 +92,14 @@ impl Iterator for SegmentPostings { } } +pub struct SegmentReader { + segment_info: SegmentInfo, + segment_id: SegmentId, + term_offsets: FstMap, + postings_data: ReadOnlySource, + store_reader: StoreReader, + fast_fields_reader: U32FastFieldsReader, +} impl SegmentReader { @@ -111,7 +111,6 @@ impl SegmentReader { self.segment_info.max_doc } - /// Open a new segment for reading. pub fn open(segment: Segment) -> io::Result { let segment_info_reader = try!(segment.open_read(SegmentComponent::INFO)); @@ -121,12 +120,15 @@ impl SegmentReader { let term_offsets = try!(FstMap::from_source(source)); let store_reader = StoreReader::new(try!(segment.open_read(SegmentComponent::STORE))); let postings_shared_mmap = try!(segment.open_read(SegmentComponent::POSTINGS)); + let fast_field_data = try!(segment.open_read(SegmentComponent::FASTFIELDS)); + let fast_fields_reader = try!(U32FastFieldsReader::open(fast_field_data)); Ok(SegmentReader { segment_info: segment_info, postings_data: postings_shared_mmap, term_offsets: term_offsets, segment_id: segment.id(), store_reader: store_reader, + fast_fields_reader: fast_fields_reader, }) } @@ -138,6 +140,10 @@ impl SegmentReader { self.store_reader.get(doc_id) } + pub fn get_fast_field_reader(&self, u32_field: &U32Field) -> io::Result { + self.fast_fields_reader.get_field(u32_field) + } + fn read_postings(&self, offset: u32) -> SegmentPostings { let postings_data = &self.postings_data.as_slice()[(offset as usize)..]; SegmentPostings::from_data(&postings_data)