mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-05 16:52:55 +00:00
Cargo fmt
This commit is contained in:
@@ -34,7 +34,7 @@ impl Searcher {
|
||||
}
|
||||
|
||||
/// Returns the overall number of documents in the index.
|
||||
pub fn num_docs(&self) -> DocId {
|
||||
pub fn num_docs(&self) -> DocId {
|
||||
self.segment_readers
|
||||
.iter()
|
||||
.map(|segment_reader| segment_reader.num_docs())
|
||||
|
||||
@@ -40,4 +40,4 @@ impl From<Vec<u8>> for SharedVecSlice {
|
||||
fn from(data: Vec<u8>) -> SharedVecSlice {
|
||||
SharedVecSlice::new(Arc::new(data))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,3 @@ pub use self::termdict::TermDictionaryImpl;
|
||||
pub use self::termdict::TermDictionaryBuilderImpl;
|
||||
pub use self::streamer::TermStreamerImpl;
|
||||
pub use self::streamer::TermStreamerBuilderImpl;
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -12,9 +12,9 @@ pub struct TermStreamerBuilderImpl<'a, V>
|
||||
stream_builder: StreamBuilder<'a>,
|
||||
}
|
||||
|
||||
impl<'a, V> TermStreamerBuilderImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default {
|
||||
|
||||
impl<'a, V> TermStreamerBuilderImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default
|
||||
{
|
||||
pub(crate) fn new(fst_map: &'a TermDictionaryImpl<V>,
|
||||
stream_builder: StreamBuilder<'a>)
|
||||
-> Self {
|
||||
@@ -25,9 +25,9 @@ impl<'a, V> TermStreamerBuilderImpl<'a, V>
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, V> TermStreamerBuilder<V> for TermStreamerBuilderImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default {
|
||||
|
||||
impl<'a, V> TermStreamerBuilder<V> for TermStreamerBuilderImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default
|
||||
{
|
||||
type Streamer = TermStreamerImpl<'a, V>;
|
||||
|
||||
fn ge<T: AsRef<[u8]>>(mut self, bound: T) -> Self {
|
||||
@@ -74,16 +74,17 @@ pub struct TermStreamerImpl<'a, V>
|
||||
}
|
||||
|
||||
impl<'a, V> TermStreamer<V> for TermStreamerImpl<'a, V>
|
||||
where V: BinarySerializable + Default {
|
||||
|
||||
where V: BinarySerializable + Default
|
||||
{
|
||||
fn advance(&mut self) -> bool {
|
||||
if let Some((term, offset)) = self.stream.next() {
|
||||
self.current_key.clear();
|
||||
self.current_key.extend_from_slice(term);
|
||||
self.offset = offset;
|
||||
self.current_value = self.fst_map
|
||||
.read_value(self.offset)
|
||||
.expect("Fst data is corrupted. Failed to deserialize a value.");
|
||||
self.current_value =
|
||||
self.fst_map
|
||||
.read_value(self.offset)
|
||||
.expect("Fst data is corrupted. Failed to deserialize a value.");
|
||||
true
|
||||
} else {
|
||||
false
|
||||
|
||||
@@ -14,16 +14,18 @@ fn convert_fst_error(e: fst::Error) -> io::Error {
|
||||
|
||||
/// See [TermDictionaryBuilder](./trait.TermDictionaryBuilder.html)
|
||||
pub struct TermDictionaryBuilderImpl<W, V = TermInfo>
|
||||
where W: Write, V: BinarySerializable + Default
|
||||
where W: Write,
|
||||
V: BinarySerializable + Default
|
||||
{
|
||||
fst_builder: fst::MapBuilder<W>,
|
||||
data: Vec<u8>,
|
||||
_phantom_: PhantomData<V>,
|
||||
}
|
||||
|
||||
impl<W, V> TermDictionaryBuilderImpl<W, V>
|
||||
where W: Write, V: BinarySerializable + Default {
|
||||
|
||||
impl<W, V> TermDictionaryBuilderImpl<W, V>
|
||||
where W: Write,
|
||||
V: BinarySerializable + Default
|
||||
{
|
||||
/// # Warning
|
||||
/// Horribly dangerous internal API
|
||||
///
|
||||
@@ -45,12 +47,12 @@ impl<W, V> TermDictionaryBuilderImpl<W, V>
|
||||
value.serialize(&mut self.data)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl<W, V> TermDictionaryBuilder<W, V> for TermDictionaryBuilderImpl<W, V>
|
||||
where W: Write, V: BinarySerializable + Default {
|
||||
|
||||
where W: Write,
|
||||
V: BinarySerializable + Default
|
||||
{
|
||||
fn new(w: W) -> io::Result<Self> {
|
||||
let fst_builder = fst::MapBuilder::new(w).map_err(convert_fst_error)?;
|
||||
Ok(TermDictionaryBuilderImpl {
|
||||
@@ -95,7 +97,7 @@ fn open_fst_index(source: ReadOnlySource) -> io::Result<fst::Map> {
|
||||
/// See [TermDictionary](./trait.TermDictionary.html)
|
||||
pub struct TermDictionaryImpl<V = TermInfo>
|
||||
where V: BinarySerializable + Default
|
||||
{
|
||||
{
|
||||
fst_index: fst::Map,
|
||||
values_mmap: ReadOnlySource,
|
||||
_phantom_: PhantomData<V>,
|
||||
@@ -114,12 +116,12 @@ impl<V> TermDictionaryImpl<V>
|
||||
|
||||
|
||||
impl<'a, V> TermDictionary<'a, V> for TermDictionaryImpl<V>
|
||||
where V: BinarySerializable + Default + 'a {
|
||||
|
||||
where V: BinarySerializable + Default + 'a
|
||||
{
|
||||
type Streamer = TermStreamerImpl<'a, V>;
|
||||
|
||||
type StreamBuilder = TermStreamerBuilderImpl<'a, V>;
|
||||
|
||||
|
||||
fn from_source(source: ReadOnlySource) -> io::Result<Self> {
|
||||
let total_len = source.len();
|
||||
let length_offset = total_len - 4;
|
||||
|
||||
@@ -132,8 +132,7 @@ impl<'a, V> TermMerger<'a, V>
|
||||
|
||||
|
||||
|
||||
impl<'a> From<&'a [SegmentReader]> for TermMerger<'a, TermInfo>
|
||||
{
|
||||
impl<'a> From<&'a [SegmentReader]> for TermMerger<'a, TermInfo> {
|
||||
fn from(segment_readers: &'a [SegmentReader]) -> TermMerger<'a, TermInfo> {
|
||||
TermMerger::new(segment_readers
|
||||
.iter()
|
||||
|
||||
@@ -85,13 +85,15 @@ pub use self::merger::TermMerger;
|
||||
#[cfg(not(feature="streamdict"))]
|
||||
mod fstdict;
|
||||
#[cfg(not(feature="streamdict"))]
|
||||
pub use self::fstdict::{TermDictionaryImpl, TermDictionaryBuilderImpl, TermStreamerImpl, TermStreamerBuilderImpl};
|
||||
pub use self::fstdict::{TermDictionaryImpl, TermDictionaryBuilderImpl, TermStreamerImpl,
|
||||
TermStreamerBuilderImpl};
|
||||
|
||||
|
||||
#[cfg(feature="streamdict")]
|
||||
mod streamdict;
|
||||
#[cfg(feature="streamdict")]
|
||||
pub use self::streamdict::{TermDictionaryImpl, TermDictionaryBuilderImpl, TermStreamerImpl, TermStreamerBuilderImpl};
|
||||
pub use self::streamdict::{TermDictionaryImpl, TermDictionaryBuilderImpl, TermStreamerImpl,
|
||||
TermStreamerBuilderImpl};
|
||||
|
||||
|
||||
mod merger;
|
||||
@@ -100,16 +102,17 @@ use std::io;
|
||||
|
||||
/// Dictionary associating sorted `&[u8]` to values
|
||||
pub trait TermDictionary<'a, V>
|
||||
where V: BinarySerializable + Default + 'a , Self: Sized {
|
||||
|
||||
where V: BinarySerializable + Default + 'a,
|
||||
Self: Sized
|
||||
{
|
||||
/// Streamer type associated to the term dictionary
|
||||
type Streamer: TermStreamer<V> + 'a;
|
||||
|
||||
/// StreamerBuilder type associated to the term dictionary
|
||||
type StreamBuilder: TermStreamerBuilder<V, Streamer=Self::Streamer> + 'a;
|
||||
type StreamBuilder: TermStreamerBuilder<V, Streamer = Self::Streamer> + 'a;
|
||||
|
||||
/// Opens a `TermDictionary` given a data source.
|
||||
fn from_source(source: ReadOnlySource) -> io::Result<Self>;
|
||||
fn from_source(source: ReadOnlySource) -> io::Result<Self>;
|
||||
|
||||
/// Lookups the value corresponding to the key.
|
||||
fn get<K: AsRef<[u8]>>(&self, target_key: K) -> Option<V>;
|
||||
@@ -138,8 +141,9 @@ pub trait TermDictionary<'a, V>
|
||||
///
|
||||
/// Inserting must be done in the order of the `keys`.
|
||||
pub trait TermDictionaryBuilder<W, V>: Sized
|
||||
where W: io::Write, V: BinarySerializable + Default {
|
||||
|
||||
where W: io::Write,
|
||||
V: BinarySerializable + Default
|
||||
{
|
||||
/// Creates a new `TermDictionaryBuilder`
|
||||
fn new(write: W) -> io::Result<Self>;
|
||||
|
||||
@@ -157,7 +161,6 @@ pub trait TermDictionaryBuilder<W, V>: Sized
|
||||
/// `TermStreamer` acts as a cursor over a range of terms of a segment.
|
||||
/// Terms are guaranteed to be sorted.
|
||||
pub trait TermStreamer<V>: Sized {
|
||||
|
||||
/// Advance position the stream on the next item.
|
||||
/// Before the first call to `.advance()`, the stream
|
||||
/// is an unitialized state.
|
||||
@@ -176,10 +179,10 @@ pub trait TermStreamer<V>: Sized {
|
||||
fn key(&self) -> &[u8];
|
||||
|
||||
/// Accesses the current value.
|
||||
///
|
||||
///
|
||||
/// Calling `.value()` after the end of the stream will return the
|
||||
/// last `.value()` encounterred.
|
||||
///
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Calling `.value()` before the first call to `.advance()` returns
|
||||
@@ -199,8 +202,9 @@ pub trait TermStreamer<V>: Sized {
|
||||
|
||||
/// `TermStreamerBuilder` is an helper object used to define
|
||||
/// a range of terms that should be streamed.
|
||||
pub trait TermStreamerBuilder<V> where V: BinarySerializable + Default {
|
||||
|
||||
pub trait TermStreamerBuilder<V>
|
||||
where V: BinarySerializable + Default
|
||||
{
|
||||
/// Associated `TermStreamer` type that this builder is building.
|
||||
type Streamer: TermStreamer<V>;
|
||||
|
||||
@@ -215,7 +219,7 @@ pub trait TermStreamerBuilder<V> where V: BinarySerializable + Default {
|
||||
|
||||
/// Limit the range to terms lesser or equal to the bound
|
||||
fn le<T: AsRef<[u8]>>(self, bound: T) -> Self;
|
||||
|
||||
|
||||
/// Creates the stream corresponding to the range
|
||||
/// of terms defined using the `TermStreamerBuilder`.
|
||||
fn into_stream(self) -> Self::Streamer;
|
||||
@@ -235,7 +239,7 @@ mod tests {
|
||||
use termdict::TermDictionary;
|
||||
use termdict::TermDictionaryBuilder;
|
||||
const BLOCK_SIZE: usize = 1_500;
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_term_dictionary() {
|
||||
@@ -319,14 +323,15 @@ mod tests {
|
||||
.map(|i| (format!("doc{:0>6}", i), i))
|
||||
.collect();
|
||||
let buffer: Vec<u8> = {
|
||||
let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec!()).unwrap();
|
||||
let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec![]).unwrap();
|
||||
for &(ref id, ref i) in &ids {
|
||||
term_dictionary_builder.insert(id.as_bytes(), i).unwrap();
|
||||
}
|
||||
term_dictionary_builder.finish().unwrap()
|
||||
};
|
||||
let source = ReadOnlySource::from(buffer);
|
||||
let term_dictionary: TermDictionaryImpl<u32> = TermDictionaryImpl::from_source(source).unwrap();
|
||||
let term_dictionary: TermDictionaryImpl<u32> = TermDictionaryImpl::from_source(source)
|
||||
.unwrap();
|
||||
{
|
||||
let mut streamer = term_dictionary.stream();
|
||||
let mut i = 0;
|
||||
@@ -337,7 +342,7 @@ mod tests {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
let &(ref key, ref _v) = &ids[2047];
|
||||
term_dictionary.get(key.as_bytes());
|
||||
}
|
||||
@@ -348,7 +353,7 @@ mod tests {
|
||||
.map(|i| (format!("doc{:0>6}", i), i))
|
||||
.collect();
|
||||
let buffer: Vec<u8> = {
|
||||
let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec!()).unwrap();
|
||||
let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec![]).unwrap();
|
||||
for &(ref id, ref i) in &ids {
|
||||
term_dictionary_builder.insert(id.as_bytes(), i).unwrap();
|
||||
}
|
||||
@@ -356,8 +361,9 @@ mod tests {
|
||||
};
|
||||
|
||||
let source = ReadOnlySource::from(buffer);
|
||||
|
||||
let term_dictionary: TermDictionaryImpl<u32> = TermDictionaryImpl::from_source(source).unwrap();
|
||||
|
||||
let term_dictionary: TermDictionaryImpl<u32> = TermDictionaryImpl::from_source(source)
|
||||
.unwrap();
|
||||
{
|
||||
for i in (0..20).chain(6000..8_000) {
|
||||
let &(ref target_key, _) = &ids[i];
|
||||
@@ -415,7 +421,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_stream_range_boundaries() {
|
||||
let buffer: Vec<u8> = {
|
||||
let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec!()).unwrap();
|
||||
let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec![]).unwrap();
|
||||
for i in 0u8..10u8 {
|
||||
let number_arr = [i; 1];
|
||||
term_dictionary_builder.insert(&number_arr, &i).unwrap();
|
||||
@@ -423,50 +429,36 @@ mod tests {
|
||||
term_dictionary_builder.finish().unwrap()
|
||||
};
|
||||
let source = ReadOnlySource::from(buffer);
|
||||
let term_dictionary: TermDictionaryImpl<u8> = TermDictionaryImpl::from_source(source).unwrap();
|
||||
|
||||
let term_dictionary: TermDictionaryImpl<u8> = TermDictionaryImpl::from_source(source)
|
||||
.unwrap();
|
||||
|
||||
let value_list = |mut streamer: TermStreamerImpl<u8>| {
|
||||
let mut res: Vec<u8> = vec!();
|
||||
let mut res: Vec<u8> = vec![];
|
||||
while let Some((_, &v)) = streamer.next() {
|
||||
res.push(v);
|
||||
}
|
||||
res
|
||||
};
|
||||
{
|
||||
let range = term_dictionary
|
||||
.range()
|
||||
.ge([2u8])
|
||||
.into_stream();
|
||||
assert_eq!(value_list(range), vec!(2u8, 3u8, 4u8, 5u8, 6u8, 7u8, 8u8, 9u8));
|
||||
let range = term_dictionary.range().ge([2u8]).into_stream();
|
||||
assert_eq!(value_list(range),
|
||||
vec![2u8, 3u8, 4u8, 5u8, 6u8, 7u8, 8u8, 9u8]);
|
||||
}
|
||||
{
|
||||
let range = term_dictionary
|
||||
.range()
|
||||
.gt([2u8])
|
||||
.into_stream();
|
||||
assert_eq!(value_list(range), vec!(3u8, 4u8, 5u8, 6u8, 7u8, 8u8, 9u8));
|
||||
let range = term_dictionary.range().gt([2u8]).into_stream();
|
||||
assert_eq!(value_list(range), vec![3u8, 4u8, 5u8, 6u8, 7u8, 8u8, 9u8]);
|
||||
}
|
||||
{
|
||||
let range = term_dictionary
|
||||
.range()
|
||||
.lt([6u8])
|
||||
.into_stream();
|
||||
assert_eq!(value_list(range), vec!(0u8, 1u8, 2u8, 3u8, 4u8, 5u8));
|
||||
let range = term_dictionary.range().lt([6u8]).into_stream();
|
||||
assert_eq!(value_list(range), vec![0u8, 1u8, 2u8, 3u8, 4u8, 5u8]);
|
||||
}
|
||||
{
|
||||
let range = term_dictionary
|
||||
.range()
|
||||
.le([6u8])
|
||||
.into_stream();
|
||||
assert_eq!(value_list(range), vec!(0u8, 1u8, 2u8, 3u8, 4u8, 5u8, 6u8));
|
||||
let range = term_dictionary.range().le([6u8]).into_stream();
|
||||
assert_eq!(value_list(range), vec![0u8, 1u8, 2u8, 3u8, 4u8, 5u8, 6u8]);
|
||||
}
|
||||
{
|
||||
let range = term_dictionary
|
||||
.range()
|
||||
.ge([0u8])
|
||||
.lt([5u8])
|
||||
.into_stream();
|
||||
assert_eq!(value_list(range), vec!(0u8, 1u8, 2u8, 3u8, 4u8));
|
||||
let range = term_dictionary.range().ge([0u8]).lt([5u8]).into_stream();
|
||||
assert_eq!(value_list(range), vec![0u8, 1u8, 2u8, 3u8, 4u8]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,8 +14,8 @@ impl<W: Write> CountingWriter<W> {
|
||||
written_bytes: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn written_bytes(&self,) -> usize {
|
||||
|
||||
pub fn written_bytes(&self) -> usize {
|
||||
self.written_bytes
|
||||
}
|
||||
|
||||
@@ -47,7 +47,7 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn test_counting_writer() {
|
||||
let buffer: Vec<u8> = vec!();
|
||||
let buffer: Vec<u8> = vec![];
|
||||
let mut counting_writer = CountingWriter::wrap(buffer);
|
||||
let bytes = (0u8..10u8).collect::<Vec<u8>>();
|
||||
counting_writer.write_all(&bytes).unwrap();
|
||||
@@ -55,4 +55,4 @@ mod test {
|
||||
assert_eq!(len, 10);
|
||||
assert_eq!(w.len(), 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,4 +8,3 @@ pub use self::termdict::TermDictionaryImpl;
|
||||
pub use self::termdict::TermDictionaryBuilderImpl;
|
||||
pub use self::streamer::TermStreamerImpl;
|
||||
pub use self::streamer::TermStreamerBuilderImpl;
|
||||
|
||||
|
||||
@@ -7,8 +7,11 @@ use common::BinarySerializable;
|
||||
use super::TermDictionaryImpl;
|
||||
use termdict::{TermStreamerBuilder, TermStreamer};
|
||||
|
||||
pub(crate) fn stream_before<'a, V>(term_dictionary: &'a TermDictionaryImpl<V>, target_key: &[u8]) -> TermStreamerImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default {
|
||||
pub(crate) fn stream_before<'a, V>(term_dictionary: &'a TermDictionaryImpl<V>,
|
||||
target_key: &[u8])
|
||||
-> TermStreamerImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default
|
||||
{
|
||||
let (prev_key, offset) = term_dictionary.strictly_previous_key(target_key.as_ref());
|
||||
let offset: usize = offset as usize;
|
||||
TermStreamerImpl {
|
||||
@@ -20,7 +23,8 @@ pub(crate) fn stream_before<'a, V>(term_dictionary: &'a TermDictionaryImpl<V>, t
|
||||
|
||||
/// See [TermStreamerBuilder](./trait.TermStreamerBuilder.html)
|
||||
pub struct TermStreamerBuilderImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default {
|
||||
where V: 'a + BinarySerializable + Default
|
||||
{
|
||||
term_dictionary: &'a TermDictionaryImpl<V>,
|
||||
origin: usize,
|
||||
offset_from: usize,
|
||||
@@ -28,16 +32,16 @@ pub struct TermStreamerBuilderImpl<'a, V>
|
||||
current_key: Vec<u8>,
|
||||
}
|
||||
|
||||
impl<'a, V> TermStreamerBuilder<V> for TermStreamerBuilderImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default {
|
||||
|
||||
impl<'a, V> TermStreamerBuilder<V> for TermStreamerBuilderImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default
|
||||
{
|
||||
type Streamer = TermStreamerImpl<'a, V>;
|
||||
|
||||
/// Limit the range to terms greater or equal to the bound
|
||||
fn ge<T: AsRef<[u8]>>(mut self, bound: T) -> Self {
|
||||
let target_key = bound.as_ref();
|
||||
let streamer = stream_before(&self.term_dictionary, target_key.as_ref());
|
||||
let smaller_than = |k: &[u8]| { k.lt(target_key) };
|
||||
let smaller_than = |k: &[u8]| k.lt(target_key);
|
||||
let (offset_before, current_key) = get_offset(smaller_than, streamer);
|
||||
self.current_key = current_key;
|
||||
self.offset_from = offset_before - self.origin;
|
||||
@@ -48,7 +52,7 @@ impl<'a, V> TermStreamerBuilder<V> for TermStreamerBuilderImpl<'a, V>
|
||||
fn gt<T: AsRef<[u8]>>(mut self, bound: T) -> Self {
|
||||
let target_key = bound.as_ref();
|
||||
let streamer = stream_before(self.term_dictionary, target_key.as_ref());
|
||||
let smaller_than = |k: &[u8]| { k.le(target_key) };
|
||||
let smaller_than = |k: &[u8]| k.le(target_key);
|
||||
let (offset_before, current_key) = get_offset(smaller_than, streamer);
|
||||
self.current_key = current_key;
|
||||
self.offset_from = offset_before - self.origin;
|
||||
@@ -59,7 +63,7 @@ impl<'a, V> TermStreamerBuilder<V> for TermStreamerBuilderImpl<'a, V>
|
||||
fn lt<T: AsRef<[u8]>>(mut self, bound: T) -> Self {
|
||||
let target_key = bound.as_ref();
|
||||
let streamer = stream_before(self.term_dictionary, target_key.as_ref());
|
||||
let smaller_than = |k: &[u8]| { k.lt(target_key) };
|
||||
let smaller_than = |k: &[u8]| k.lt(target_key);
|
||||
let (offset_before, _) = get_offset(smaller_than, streamer);
|
||||
self.offset_to = offset_before - self.origin;
|
||||
self
|
||||
@@ -69,7 +73,7 @@ impl<'a, V> TermStreamerBuilder<V> for TermStreamerBuilderImpl<'a, V>
|
||||
fn le<T: AsRef<[u8]>>(mut self, bound: T) -> Self {
|
||||
let target_key = bound.as_ref();
|
||||
let streamer = stream_before(self.term_dictionary, target_key.as_ref());
|
||||
let smaller_than = |k: &[u8]| { k.le(target_key) };
|
||||
let smaller_than = |k: &[u8]| k.le(target_key);
|
||||
let (offset_before, _) = get_offset(smaller_than, streamer);
|
||||
self.offset_to = offset_before - self.origin;
|
||||
self
|
||||
@@ -88,16 +92,19 @@ impl<'a, V> TermStreamerBuilder<V> for TermStreamerBuilderImpl<'a, V>
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns offset information for the first
|
||||
/// Returns offset information for the first
|
||||
/// key in the stream matching a given predicate.
|
||||
///
|
||||
/// returns (start offset, the data required to load the value)
|
||||
fn get_offset<'a, V, P: Fn(&[u8])->bool>(predicate: P, mut streamer: TermStreamerImpl<V>) -> (usize, Vec<u8>)
|
||||
where V: 'a + BinarySerializable + Default {
|
||||
fn get_offset<'a, V, P: Fn(&[u8]) -> bool>(predicate: P,
|
||||
mut streamer: TermStreamerImpl<V>)
|
||||
-> (usize, Vec<u8>)
|
||||
where V: 'a + BinarySerializable + Default
|
||||
{
|
||||
let mut prev: &[u8] = streamer.cursor;
|
||||
|
||||
|
||||
let mut prev_data: Vec<u8> = streamer.current_key.clone();
|
||||
|
||||
|
||||
while let Some((iter_key, _)) = streamer.next() {
|
||||
if !predicate(iter_key) {
|
||||
return (prev.as_ptr() as usize, prev_data);
|
||||
@@ -110,9 +117,8 @@ fn get_offset<'a, V, P: Fn(&[u8])->bool>(predicate: P, mut streamer: TermStreame
|
||||
}
|
||||
|
||||
impl<'a, V> TermStreamerBuilderImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default {
|
||||
|
||||
|
||||
where V: 'a + BinarySerializable + Default
|
||||
{
|
||||
pub(crate) fn new(term_dictionary: &'a TermDictionaryImpl<V>) -> Self {
|
||||
let data = term_dictionary.stream_data();
|
||||
let origin = data.as_ptr() as usize;
|
||||
@@ -121,43 +127,46 @@ impl<'a, V> TermStreamerBuilderImpl<'a, V>
|
||||
origin: origin,
|
||||
offset_from: 0,
|
||||
offset_to: data.len(),
|
||||
current_key: vec!(),
|
||||
current_key: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// See [TermStreamer](./trait.TermStreamer.html)
|
||||
pub struct TermStreamerImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default {
|
||||
where V: 'a + BinarySerializable + Default
|
||||
{
|
||||
cursor: &'a [u8],
|
||||
current_key: Vec<u8>,
|
||||
current_value: V,
|
||||
}
|
||||
|
||||
|
||||
impl<'a, V: BinarySerializable> TermStreamerImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default {
|
||||
|
||||
|
||||
impl<'a, V: BinarySerializable> TermStreamerImpl<'a, V>
|
||||
where V: 'a + BinarySerializable + Default
|
||||
{
|
||||
pub(crate) fn extract_value(self) -> V {
|
||||
self.current_value
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, V> TermStreamer<V> for TermStreamerImpl<'a, V>
|
||||
where V: BinarySerializable + Default {
|
||||
|
||||
where V: BinarySerializable + Default
|
||||
{
|
||||
fn advance(&mut self) -> bool {
|
||||
if self.cursor.len() == 0 {
|
||||
return false;
|
||||
}
|
||||
let common_length: usize = VInt::deserialize(&mut self.cursor).unwrap().0 as usize;
|
||||
let new_length: usize = common_length + VInt::deserialize(&mut self.cursor).unwrap().0 as usize;
|
||||
let new_length: usize = common_length +
|
||||
VInt::deserialize(&mut self.cursor).unwrap().0 as usize;
|
||||
self.current_key.reserve(new_length);
|
||||
unsafe {
|
||||
self.current_key.set_len(new_length);
|
||||
}
|
||||
self.cursor.read_exact(&mut self.current_key[common_length..new_length]).unwrap();
|
||||
self.cursor
|
||||
.read_exact(&mut self.current_key[common_length..new_length])
|
||||
.unwrap();
|
||||
self.current_value = V::deserialize(&mut self.cursor).unwrap();
|
||||
return true;
|
||||
}
|
||||
@@ -169,4 +178,4 @@ impl<'a, V> TermStreamer<V> for TermStreamerImpl<'a, V>
|
||||
fn value(&self) -> &V {
|
||||
&self.current_value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,8 +22,10 @@ fn convert_fst_error(e: fst::Error) -> io::Error {
|
||||
}
|
||||
|
||||
/// See [TermDictionaryBuilder](./trait.TermDictionaryBuilder.html)
|
||||
pub struct TermDictionaryBuilderImpl<W, V=TermInfo>
|
||||
where W: Write, V: BinarySerializable + Default {
|
||||
pub struct TermDictionaryBuilderImpl<W, V = TermInfo>
|
||||
where W: Write,
|
||||
V: BinarySerializable + Default
|
||||
{
|
||||
write: CountingWriter<W>,
|
||||
block_index: fst::MapBuilder<Vec<u8>>,
|
||||
last_key: Vec<u8>,
|
||||
@@ -32,7 +34,8 @@ pub struct TermDictionaryBuilderImpl<W, V=TermInfo>
|
||||
}
|
||||
|
||||
fn common_prefix_length(left: &[u8], right: &[u8]) -> usize {
|
||||
left.iter().cloned()
|
||||
left.iter()
|
||||
.cloned()
|
||||
.zip(right.iter().cloned())
|
||||
.take_while(|&(b1, b2)| b1 == b2)
|
||||
.count()
|
||||
@@ -43,18 +46,20 @@ fn fill_last<'a>(fst: &'a Fst, mut node: Node<'a>, buffer: &mut Vec<u8>) {
|
||||
if let Some(transition) = node.transitions().last() {
|
||||
buffer.push(transition.inp);
|
||||
node = fst.node(transition.addr);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<W, V> TermDictionaryBuilderImpl<W, V>
|
||||
where W: Write, V: BinarySerializable + Default {
|
||||
|
||||
where W: Write,
|
||||
V: BinarySerializable + Default
|
||||
{
|
||||
fn add_index_entry(&mut self) {
|
||||
self.block_index.insert(&self.last_key, self.write.written_bytes() as u64).unwrap();
|
||||
self.block_index
|
||||
.insert(&self.last_key, self.write.written_bytes() as u64)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// # Warning
|
||||
@@ -64,7 +69,7 @@ impl<W, V> TermDictionaryBuilderImpl<W, V>
|
||||
/// to insert_key and insert_value.
|
||||
///
|
||||
/// Prefer using `.insert(key, value)`
|
||||
pub(crate) fn insert_key(&mut self, key: &[u8]) -> io::Result<()>{
|
||||
pub(crate) fn insert_key(&mut self, key: &[u8]) -> io::Result<()> {
|
||||
if self.len % BLOCK_SIZE == 0 {
|
||||
self.add_index_entry();
|
||||
}
|
||||
@@ -73,37 +78,38 @@ impl<W, V> TermDictionaryBuilderImpl<W, V>
|
||||
VInt(common_len as u64).serialize(&mut self.write)?;
|
||||
self.last_key.truncate(common_len);
|
||||
self.last_key.extend_from_slice(&key[common_len..]);
|
||||
VInt((key.len() - common_len) as u64).serialize(&mut self.write)?;
|
||||
VInt((key.len() - common_len) as u64)
|
||||
.serialize(&mut self.write)?;
|
||||
self.write.write_all(&key[common_len..])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn insert_value(&mut self, value: &V) -> io::Result<()>{
|
||||
pub(crate) fn insert_value(&mut self, value: &V) -> io::Result<()> {
|
||||
value.serialize(&mut self.write)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<W, V> TermDictionaryBuilder<W, V> for TermDictionaryBuilderImpl<W, V>
|
||||
where W: Write, V: BinarySerializable + Default {
|
||||
|
||||
where W: Write,
|
||||
V: BinarySerializable + Default
|
||||
{
|
||||
/// Creates a new `TermDictionaryBuilder`
|
||||
fn new(write: W) -> io::Result<Self> {
|
||||
let buffer: Vec<u8> = vec!();
|
||||
let buffer: Vec<u8> = vec![];
|
||||
Ok(TermDictionaryBuilderImpl {
|
||||
write: CountingWriter::wrap(write),
|
||||
block_index: fst::MapBuilder::new(buffer)
|
||||
.expect("This cannot fail"),
|
||||
last_key: Vec::with_capacity(128),
|
||||
len: 0,
|
||||
_phantom_: PhantomData,
|
||||
})
|
||||
write: CountingWriter::wrap(write),
|
||||
block_index: fst::MapBuilder::new(buffer).expect("This cannot fail"),
|
||||
last_key: Vec::with_capacity(128),
|
||||
len: 0,
|
||||
_phantom_: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
/// Inserts a `(key, value)` pair in the term dictionary.
|
||||
///
|
||||
/// *Keys have to be inserted in order.*
|
||||
fn insert<K: AsRef<[u8]>>(&mut self, key_ref: K, value: &V) -> io::Result<()>{
|
||||
fn insert<K: AsRef<[u8]>>(&mut self, key_ref: K, value: &V) -> io::Result<()> {
|
||||
let key = key_ref.as_ref();
|
||||
self.insert_key(key)?;
|
||||
self.insert_value(value)
|
||||
@@ -114,9 +120,7 @@ impl<W, V> TermDictionaryBuilder<W, V> for TermDictionaryBuilderImpl<W, V>
|
||||
fn finish(mut self) -> io::Result<W> {
|
||||
self.add_index_entry();
|
||||
let (mut w, split_len) = self.write.finish()?;
|
||||
let fst_write = self.block_index
|
||||
.into_inner()
|
||||
.map_err(convert_fst_error)?;
|
||||
let fst_write = self.block_index.into_inner().map_err(convert_fst_error)?;
|
||||
w.write(&fst_write)?;
|
||||
(split_len as u64).serialize(&mut w)?;
|
||||
w.flush()?;
|
||||
@@ -127,44 +131,49 @@ impl<W, V> TermDictionaryBuilder<W, V> for TermDictionaryBuilderImpl<W, V>
|
||||
|
||||
fn open_fst_index(source: ReadOnlySource) -> io::Result<fst::Map> {
|
||||
Ok(fst::Map::from(match source {
|
||||
ReadOnlySource::Anonymous(data) => try!(Fst::from_shared_bytes(data.data, data.start, data.len).map_err(convert_fst_error)),
|
||||
ReadOnlySource::Mmap(mmap_readonly) => try!(Fst::from_mmap(mmap_readonly).map_err(convert_fst_error)),
|
||||
}))
|
||||
ReadOnlySource::Anonymous(data) => {
|
||||
try!(Fst::from_shared_bytes(data.data, data.start, data.len)
|
||||
.map_err(convert_fst_error))
|
||||
}
|
||||
ReadOnlySource::Mmap(mmap_readonly) => {
|
||||
try!(Fst::from_mmap(mmap_readonly).map_err(convert_fst_error))
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
/// See [TermDictionary](./trait.TermDictionary.html)
|
||||
pub struct TermDictionaryImpl<V=TermInfo> where V: BinarySerializable + Default {
|
||||
pub struct TermDictionaryImpl<V = TermInfo>
|
||||
where V: BinarySerializable + Default
|
||||
{
|
||||
stream_data: ReadOnlySource,
|
||||
fst_index: fst::Map,
|
||||
_phantom_: PhantomData<V>,
|
||||
}
|
||||
|
||||
impl<V> TermDictionaryImpl<V>
|
||||
where V: BinarySerializable + Default {
|
||||
|
||||
where V: BinarySerializable + Default
|
||||
{
|
||||
pub(crate) fn stream_data(&self) -> &[u8] {
|
||||
self.stream_data.as_slice()
|
||||
}
|
||||
|
||||
pub(crate) fn strictly_previous_key(&self, key: &[u8]) -> (Vec<u8>, u64) {
|
||||
let fst_map = &self.fst_index;
|
||||
let fst_map = &self.fst_index;
|
||||
let fst = fst_map.as_fst();
|
||||
let mut node = fst.root();
|
||||
let mut node_stack: Vec<Node> = vec!(node.clone());
|
||||
let mut node_stack: Vec<Node> = vec![node.clone()];
|
||||
|
||||
// first check the longest prefix.
|
||||
for &b in &key[..key.len() - 1] {
|
||||
node = match node.find_input(b) {
|
||||
None => {
|
||||
break;
|
||||
},
|
||||
Some(i) => {
|
||||
fst.node(node.transition_addr(i))
|
||||
},
|
||||
}
|
||||
Some(i) => fst.node(node.transition_addr(i)),
|
||||
};
|
||||
node_stack.push(node);
|
||||
}
|
||||
|
||||
|
||||
let len_node_stack = node_stack.len();
|
||||
for i in (1..len_node_stack).rev() {
|
||||
let cur_node = &node_stack[i];
|
||||
@@ -173,7 +182,7 @@ impl<V> TermDictionaryImpl<V>
|
||||
.transitions()
|
||||
.take_while(|transition| transition.inp < b)
|
||||
.last();
|
||||
|
||||
|
||||
if let Some(last_transition) = last_transition_opt {
|
||||
let mut result_buffer = Vec::from(&key[..i]);
|
||||
result_buffer.push(last_transition.inp);
|
||||
@@ -183,8 +192,7 @@ impl<V> TermDictionaryImpl<V>
|
||||
fill_last(fst, fork_node, &mut result);
|
||||
let val = fst_map.get(&result).unwrap();
|
||||
return (result, val);
|
||||
}
|
||||
else if cur_node.is_final() {
|
||||
} else if cur_node.is_final() {
|
||||
// the previous key is a prefix
|
||||
let result_buffer = Vec::from(&key[..i]);
|
||||
let val = fst_map.get(&result_buffer).unwrap();
|
||||
@@ -192,36 +200,35 @@ impl<V> TermDictionaryImpl<V>
|
||||
}
|
||||
}
|
||||
|
||||
return (vec!(), 0);
|
||||
return (vec![], 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
impl<'a, V> TermDictionary<'a, V> for TermDictionaryImpl<V>
|
||||
where V: BinarySerializable + Default + 'a {
|
||||
|
||||
where V: BinarySerializable + Default + 'a
|
||||
{
|
||||
type Streamer = TermStreamerImpl<'a, V>;
|
||||
|
||||
type StreamBuilder = TermStreamerBuilderImpl<'a, V>;
|
||||
|
||||
/// Opens a `TermDictionary` given a data source.
|
||||
fn from_source(source: ReadOnlySource) -> io::Result<Self> {
|
||||
fn from_source(source: ReadOnlySource) -> io::Result<Self> {
|
||||
let total_len = source.len();
|
||||
let length_offset = total_len - 8;
|
||||
let split_len: usize = {
|
||||
let mut split_len_buffer: &[u8] = &source.as_slice()[length_offset..];
|
||||
u64::deserialize(&mut split_len_buffer)? as usize
|
||||
u64::deserialize(&mut split_len_buffer)? as usize
|
||||
};
|
||||
let stream_data = source.slice(0, split_len);
|
||||
let fst_data = source.slice(split_len, length_offset);
|
||||
let fst_index = open_fst_index(fst_data)?;
|
||||
|
||||
|
||||
Ok(TermDictionaryImpl {
|
||||
stream_data: stream_data,
|
||||
fst_index: fst_index,
|
||||
_phantom_: PhantomData
|
||||
})
|
||||
stream_data: stream_data,
|
||||
fst_index: fst_index,
|
||||
_phantom_: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
/// Lookups the value corresponding to the key.
|
||||
@@ -231,9 +238,7 @@ impl<'a, V> TermDictionary<'a, V> for TermDictionaryImpl<V>
|
||||
let position = streamer.key().cmp(target_key.as_ref());
|
||||
match position {
|
||||
Ordering::Less => {}
|
||||
Ordering::Equal => {
|
||||
return Some(streamer.extract_value())
|
||||
}
|
||||
Ordering::Equal => return Some(streamer.extract_value()),
|
||||
Ordering::Greater => {
|
||||
return None;
|
||||
}
|
||||
@@ -247,4 +252,4 @@ impl<'a, V> TermDictionary<'a, V> for TermDictionaryImpl<V>
|
||||
fn range(&'a self) -> Self::StreamBuilder {
|
||||
Self::StreamBuilder::new(self)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user