diff --git a/Cargo.toml b/Cargo.toml index c502fdb90..f1d9ddcbd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ matches = "0.1" bitpacking = "0.5" census = "0.1" fnv = "1.0.6" +owned-read = "0.1" [target.'cfg(windows)'.dependencies] winapi = "0.2" diff --git a/src/compression/stream.rs b/src/compression/stream.rs index 762792a9b..4822f3fdc 100644 --- a/src/compression/stream.rs +++ b/src/compression/stream.rs @@ -1,7 +1,8 @@ use compression::compressed_block_size; use compression::BlockDecoder; use compression::COMPRESSION_BLOCK_SIZE; -use directory::{ReadOnlySource, SourceRead}; +use directory::ReadOnlySource; +use owned_read::OwnedRead; /// Reads a stream of compressed ints. /// @@ -10,7 +11,7 @@ use directory::{ReadOnlySource, SourceRead}; /// The `.skip(...)` makes it possible to avoid /// decompressing blocks that are not required. pub struct CompressedIntStream { - buffer: SourceRead, + buffer: OwnedRead, block_decoder: BlockDecoder, cached_addr: usize, // address of the currently decoded block @@ -24,7 +25,7 @@ impl CompressedIntStream { /// Opens a compressed int stream. pub(crate) fn wrap(source: ReadOnlySource) -> CompressedIntStream { CompressedIntStream { - buffer: SourceRead::from(source), + buffer: OwnedRead::new(source), block_decoder: BlockDecoder::new(), cached_addr: usize::max_value(), cached_next_addr: usize::max_value(), diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index 7620258bc..a3600a702 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -1,6 +1,6 @@ use common::BinarySerializable; use compression::CompressedIntStream; -use directory::{ReadOnlySource, SourceRead}; +use directory::ReadOnlySource; use postings::FreqReadingOption; use postings::TermInfo; use postings::{BlockSegmentPostings, SegmentPostings}; @@ -8,6 +8,7 @@ use schema::FieldType; use schema::IndexRecordOption; use schema::Term; use termdict::TermDictionary; +use owned_read::OwnedRead; /// The inverted index reader is in charge of accessing /// the inverted index associated to a specific field. @@ -92,7 +93,7 @@ impl InvertedIndexReader { let offset = term_info.postings_offset as usize; let end_source = self.postings_source.len(); let postings_slice = self.postings_source.slice(offset, end_source); - let postings_reader = SourceRead::from(postings_slice); + let postings_reader = OwnedRead::new(postings_slice); block_postings.reset(term_info.doc_freq as usize, postings_reader); } @@ -114,7 +115,7 @@ impl InvertedIndexReader { }; BlockSegmentPostings::from_data( term_info.doc_freq as usize, - SourceRead::from(postings_data), + OwnedRead::new(postings_data), freq_reading_option, ) } diff --git a/src/directory/mod.rs b/src/directory/mod.rs index 72301430b..5667b7b01 100644 --- a/src/directory/mod.rs +++ b/src/directory/mod.rs @@ -26,7 +26,6 @@ pub use self::read_only_source::ReadOnlySource; pub use self::mmap_directory::MmapDirectory; pub(crate) use self::managed_directory::ManagedDirectory; -pub(crate) use self::read_only_source::SourceRead; /// Synonym of Seek + Write pub trait SeekableWrite: Seek + Write {} diff --git a/src/directory/read_only_source.rs b/src/directory/read_only_source.rs index 99812155a..d2e9358d4 100644 --- a/src/directory/read_only_source.rs +++ b/src/directory/read_only_source.rs @@ -3,9 +3,8 @@ use common::HasLen; #[cfg(feature = "mmap")] use fst::raw::MmapReadOnly; use stable_deref_trait::{CloneStableDeref, StableDeref}; -use std::io::{self, Read}; use std::ops::Deref; -use std::slice; + /// Read object that represents files in tantivy. /// @@ -120,49 +119,3 @@ impl From> for ReadOnlySource { ReadOnlySource::Anonymous(shared_data) } } - -/// Acts as a owning cursor over the data backed up by a `ReadOnlySource` -pub(crate) struct SourceRead { - _data_owner: ReadOnlySource, - cursor: &'static [u8], -} - -impl SourceRead { - // Advance the cursor by a given number of bytes. - pub fn advance(&mut self, len: usize) { - self.cursor = &self.cursor[len..]; - } - - pub fn slice_from(&self, start: usize) -> &[u8] { - &self.cursor[start..] - } - - pub fn get(&self, idx: usize) -> u8 { - self.cursor[idx] - } -} - -impl AsRef<[u8]> for SourceRead { - fn as_ref(&self) -> &[u8] { - self.cursor - } -} - -impl From for SourceRead { - // Creates a new `SourceRead` from a given `ReadOnlySource` - fn from(source: ReadOnlySource) -> SourceRead { - let len = source.len(); - let slice_ptr = source.as_slice().as_ptr(); - let static_slice = unsafe { slice::from_raw_parts(slice_ptr, len) }; - SourceRead { - _data_owner: source, - cursor: static_slice, - } - } -} - -impl Read for SourceRead { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.cursor.read(buf) - } -} diff --git a/src/lib.rs b/src/lib.rs index cf94a1dad..0a7e217dd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -181,6 +181,7 @@ mod macros; pub use error::{Error, ErrorKind, ResultExt}; extern crate census; +extern crate owned_read; /// Tantivy result. pub type Result = std::result::Result; diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index 185732451..c09ec7970 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -5,12 +5,13 @@ use common::BitSet; use common::CountingWriter; use common::HasLen; use compression::compressed_block_size; -use directory::{ReadOnlySource, SourceRead}; +use directory::ReadOnlySource; use docset::{DocSet, SkipResult}; use fst::Streamer; use postings::serializer::PostingsSerializer; use postings::FreqReadingOption; use postings::Postings; +use owned_read::OwnedRead; struct PositionComputer { // store the amount of position int @@ -94,7 +95,7 @@ impl SegmentPostings { let data = ReadOnlySource::from(buffer); let block_segment_postings = BlockSegmentPostings::from_data( docs.len(), - SourceRead::from(data), + OwnedRead::new(data), FreqReadingOption::NoFreq, ); SegmentPostings::from_block_postings(block_segment_postings, None) @@ -306,13 +307,13 @@ pub struct BlockSegmentPostings { doc_offset: DocId, num_bitpacked_blocks: usize, num_vint_docs: usize, - remaining_data: SourceRead, + remaining_data: OwnedRead, } impl BlockSegmentPostings { pub(crate) fn from_data( doc_freq: usize, - data: SourceRead, + data: OwnedRead, freq_reading_option: FreqReadingOption, ) -> BlockSegmentPostings { let num_bitpacked_blocks: usize = (doc_freq as usize) / COMPRESSION_BLOCK_SIZE; @@ -339,7 +340,7 @@ impl BlockSegmentPostings { // # Warning // // This does not reset the positions list. - pub(crate) fn reset(&mut self, doc_freq: usize, postings_data: SourceRead) { + pub(crate) fn reset(&mut self, doc_freq: usize, postings_data: OwnedRead) { let num_binpacked_blocks: usize = doc_freq / COMPRESSION_BLOCK_SIZE; let num_vint_docs = doc_freq & (COMPRESSION_BLOCK_SIZE - 1); self.num_bitpacked_blocks = num_binpacked_blocks; @@ -449,7 +450,7 @@ impl BlockSegmentPostings { freq_decoder: BlockDecoder::with_val(1), freq_reading_option: FreqReadingOption::NoFreq, - remaining_data: From::from(ReadOnlySource::empty()), + remaining_data: OwnedRead::new(ReadOnlySource::empty()), doc_offset: 0, doc_freq: 0, }