diff --git a/src/common/bitpacker.rs b/src/common/bitpacker.rs index 992e2d1db..1521fd2af 100644 --- a/src/common/bitpacker.rs +++ b/src/common/bitpacker.rs @@ -4,21 +4,25 @@ use common::serialize::BinarySerializable; use std::mem; use std::ops::Deref; - pub(crate) struct BitPacker { mini_buffer: u64, - mini_buffer_written: usize + mini_buffer_written: usize, } impl BitPacker { pub fn new() -> BitPacker { BitPacker { mini_buffer: 0u64, - mini_buffer_written: 0 + mini_buffer_written: 0, } } - pub fn write(&mut self, val: u64, num_bits: u8, output: &mut TWrite) -> io::Result<()> { + pub fn write( + &mut self, + val: u64, + num_bits: u8, + output: &mut TWrite, + ) -> io::Result<()> { let val_u64 = val as u64; let num_bits = num_bits as usize; if self.mini_buffer_written + num_bits > 64 { @@ -58,8 +62,8 @@ impl BitPacker { #[derive(Clone)] pub struct BitUnpacker - where - Data: Deref, +where + Data: Deref, { num_bits: usize, mask: u64, @@ -67,16 +71,15 @@ pub struct BitUnpacker } impl BitUnpacker - where - Data: Deref, +where + Data: Deref, { pub fn new(data: Data, num_bits: u8) -> BitUnpacker { - let mask: u64 = - if num_bits == 64 { - !0u64 - } else { - (1u64 << num_bits) - 1u64 - }; + let mask: u64 = if num_bits == 64 { + !0u64 + } else { + (1u64 << num_bits) - 1u64 + }; BitUnpacker { num_bits: num_bits as usize, mask, @@ -102,8 +105,7 @@ impl BitUnpacker addr + 8 <= data.len(), "The fast field field should have been padded with 7 bytes." ); - let val_unshifted_unmasked: u64 = - unsafe { *(data[addr..].as_ptr() as *const u64) }; + let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) }; let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64; (val_shifted & mask) } else { @@ -134,8 +136,7 @@ impl BitUnpacker for output_val in output.iter_mut() { let addr = addr_in_bits >> 3; let bit_shift = addr_in_bits & 7; - let val_unshifted_unmasked: u64 = - unsafe { *(data[addr..].as_ptr() as *const u64) }; + let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) }; let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64; *output_val = val_shifted & mask; addr_in_bits += num_bits; @@ -148,7 +149,6 @@ impl BitUnpacker mod test { use super::{BitPacker, BitUnpacker}; - fn create_fastfield_bitpacker(len: usize, num_bits: u8) -> (BitUnpacker>, Vec) { let mut data = Vec::new(); let mut bitpacker = BitPacker::new(); @@ -157,10 +157,10 @@ mod test { .map(|i| if max_val == 0 { 0 } else { i % max_val }) .collect(); for &val in &vals { - bitpacker.write(val, num_bits,&mut data).unwrap(); + bitpacker.write(val, num_bits, &mut data).unwrap(); } bitpacker.close(&mut data).unwrap(); - assert_eq!(data.len(), ((num_bits as usize)* len + 7) / 8 + 7); + assert_eq!(data.len(), ((num_bits as usize) * len + 7) / 8 + 7); let bitunpacker = BitUnpacker::new(data, num_bits); (bitunpacker, vals) } diff --git a/src/common/bitset.rs b/src/common/bitset.rs index fb01e961e..9381929d4 100644 --- a/src/common/bitset.rs +++ b/src/common/bitset.rs @@ -27,7 +27,6 @@ impl IntoIterator for TinySet { } impl TinySet { - /// Returns an empty `TinySet`. pub fn empty() -> TinySet { TinySet(0u64) @@ -38,7 +37,6 @@ impl TinySet { TinySet(!self.0) } - /// Returns true iff the `TinySet` contains the element `el`. pub fn contains(&self, el: u32) -> bool { !self.intersect(TinySet::singleton(el)).is_empty() @@ -137,7 +135,6 @@ fn num_buckets(max_val: u32) -> u32 { } impl BitSet { - /// Create a new `BitSet` that may contain elements /// within `[0, max_val[`. pub fn with_max_value(max_value: u32) -> BitSet { @@ -146,7 +143,7 @@ impl BitSet { BitSet { tinysets: tinybisets, len: 0, - max_value + max_value, } } @@ -167,18 +164,16 @@ impl BitSet { // we do not check saturated els. let higher = el / 64u32; let lower = el % 64u32; - self.len += - if self.tinysets[higher as usize].insert_mut(lower) { - 1 - } else { - 0 - }; + self.len += if self.tinysets[higher as usize].insert_mut(lower) { + 1 + } else { + 0 + }; } /// Returns true iff the elements is in the `BitSet`. pub fn contains(&self, el: u32) -> bool { - self.tinyset(el / 64u32) - .contains(el % 64) + self.tinyset(el / 64u32).contains(el % 64) } /// Returns the first non-empty `TinySet` associated to a bucket lower @@ -206,7 +201,6 @@ impl BitSet { } } - #[cfg(test)] mod tests { @@ -229,9 +223,7 @@ mod tests { assert!(u.pop_lowest().is_none()) } { - let mut u = TinySet::empty() - .insert(1u32) - .insert(1u32); + let mut u = TinySet::empty().insert(1u32).insert(1u32); assert_eq!(u.pop_lowest(), Some(1u32)); assert!(u.pop_lowest().is_none()) } @@ -275,7 +267,6 @@ mod tests { test_against_hashset(&[62u32, 63u32], 64); } - #[test] fn test_bitset_large() { let arr = generate_nonunique_unsorted(1_000_000, 50_000); @@ -310,16 +301,27 @@ mod tests { #[test] fn test_tinyset_range() { - assert_eq!(TinySet::range_lower(3).into_iter().collect::>(), [0, 1, 2]); + assert_eq!( + TinySet::range_lower(3).into_iter().collect::>(), + [0, 1, 2] + ); assert!(TinySet::range_lower(0).is_empty()); assert_eq!( TinySet::range_lower(63).into_iter().collect::>(), (0u32..63u32).collect::>() ); - assert_eq!(TinySet::range_lower(1).into_iter().collect::>(), [0]); - assert_eq!(TinySet::range_lower(2).into_iter().collect::>(), [0, 1]); assert_eq!( - TinySet::range_greater_or_equal(3).into_iter().collect::>(), + TinySet::range_lower(1).into_iter().collect::>(), + [0] + ); + assert_eq!( + TinySet::range_lower(2).into_iter().collect::>(), + [0, 1] + ); + assert_eq!( + TinySet::range_greater_or_equal(3) + .into_iter() + .collect::>(), (3u32..64u32).collect::>() ); } @@ -350,47 +352,31 @@ mod tests { assert!(els.iter().all(|el| bitset.contains(*el))); bitset.clear(); for el in 0u32..1000u32 { - assert!(!bitset.contains(el)); + assert!(!bitset.contains(el)); } } #[bench] fn bench_tinyset_pop(b: &mut test::Bencher) { - b.iter(|| { - test::black_box(TinySet::singleton(31u32)) - .pop_lowest() - }); + b.iter(|| test::black_box(TinySet::singleton(31u32)).pop_lowest()); } #[bench] fn bench_tinyset_sum(b: &mut test::Bencher) { - let tiny_set = TinySet::empty() - .insert(10u32) - .insert(14u32) - .insert(21u32); + let tiny_set = TinySet::empty().insert(10u32).insert(14u32).insert(21u32); b.iter(|| { - assert_eq!( - test::black_box(tiny_set).into_iter().sum::(), - 45u32); + assert_eq!(test::black_box(tiny_set).into_iter().sum::(), 45u32); }); } #[bench] fn bench_tinyarr_sum(b: &mut test::Bencher) { - let v = [10u32, 14u32, 21u32] ; - b.iter(|| { - test::black_box(v) - .iter() - .cloned() - .sum::() - }); + let v = [10u32, 14u32, 21u32]; + b.iter(|| test::black_box(v).iter().cloned().sum::()); } #[bench] fn bench_bitset_initialize(b: &mut test::Bencher) { - b.iter(|| { - BitSet::with_max_value(1_000_000) - }); + b.iter(|| BitSet::with_max_value(1_000_000)); } } - diff --git a/src/common/mod.rs b/src/common/mod.rs index c103b468d..66e4bbfde 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -52,7 +52,6 @@ pub(crate) fn compute_num_bits(n: u64) -> u8 { } } - pub(crate) fn is_power_of_2(n: usize) -> bool { (n > 0) && (n & (n - 1) == 0) } @@ -128,7 +127,6 @@ pub(crate) mod test { } } - #[test] fn test_compute_num_bits() { assert_eq!(compute_num_bits(1), 1u8); @@ -141,4 +139,3 @@ pub(crate) mod test { assert_eq!(compute_num_bits(5_000_000_000), 33u8); } } - diff --git a/src/common/serialize.rs b/src/common/serialize.rs index 9012c0eb2..543b72b19 100644 --- a/src/common/serialize.rs +++ b/src/common/serialize.rs @@ -14,7 +14,6 @@ pub trait BinarySerializable: fmt::Debug + Sized { fn deserialize(reader: &mut R) -> io::Result; } - /// `FixedSize` marks a `BinarySerializable` as /// always serializing to the same size. pub trait FixedSize: BinarySerializable { @@ -103,7 +102,6 @@ impl FixedSize for i64 { const SIZE_IN_BYTES: usize = 8; } - impl BinarySerializable for u8 { fn serialize(&self, writer: &mut W) -> io::Result<()> { writer.write_u8(*self) @@ -134,21 +132,18 @@ impl BinarySerializable for String { } } - #[cfg(test)] pub mod test { use common::VInt; use super::*; - pub fn fixed_size_test() { let mut buffer = Vec::new(); O::default().serialize(&mut buffer).unwrap(); assert_eq!(buffer.len(), O::SIZE_IN_BYTES); } - fn serialize_test(v: T) -> usize { let mut buffer: Vec = Vec::new(); v.serialize(&mut buffer).unwrap(); @@ -186,7 +181,10 @@ pub mod test { fn test_serialize_string() { assert_eq!(serialize_test(String::from("")), 1); assert_eq!(serialize_test(String::from("ぽよぽよ")), 1 + 3 * 4); - assert_eq!(serialize_test(String::from("富士さん見える。")), 1 + 3 * 8); + assert_eq!( + serialize_test(String::from("富士さん見える。")), + 1 + 3 * 8 + ); } #[test] diff --git a/src/compression/pack/compression_pack_nosimd.rs b/src/compression/pack/compression_pack_nosimd.rs index 420cd5dbe..8a083e145 100644 --- a/src/compression/pack/compression_pack_nosimd.rs +++ b/src/compression/pack/compression_pack_nosimd.rs @@ -25,7 +25,9 @@ pub fn compress_sorted(vals: &mut [u32], output: &mut [u8], offset: u32) -> usiz let mut bit_packer = BitPacker::new(); for val in vals { - bit_packer.write(*val as u64, num_bits,&mut counting_writer).unwrap(); + bit_packer + .write(*val as u64, num_bits, &mut counting_writer) + .unwrap(); } counting_writer.written_bytes() } @@ -63,7 +65,9 @@ impl BlockEncoder { counting_writer.write_all(&[num_bits]).unwrap(); let mut bit_packer = BitPacker::new(); for val in vals { - bit_packer.write(*val as u64, num_bits, &mut counting_writer).unwrap(); + bit_packer + .write(*val as u64, num_bits, &mut counting_writer) + .unwrap(); } for _ in vals.len()..COMPRESSION_BLOCK_SIZE { bit_packer diff --git a/src/compression/pack/compression_pack_simd.rs b/src/compression/pack/compression_pack_simd.rs index 2db372630..2a900e9ed 100644 --- a/src/compression/pack/compression_pack_simd.rs +++ b/src/compression/pack/compression_pack_simd.rs @@ -25,9 +25,7 @@ fn compress_sorted(vals: &[u32], output: &mut [u8], offset: u32) -> usize { } fn uncompress_sorted(compressed_data: &[u8], output: &mut [u32], offset: u32) -> usize { - unsafe { - simdcomp::uncompress_sorted(compressed_data.as_ptr(), output.as_mut_ptr(), offset) - } + unsafe { simdcomp::uncompress_sorted(compressed_data.as_ptr(), output.as_mut_ptr(), offset) } } fn compress_unsorted(vals: &[u32], output: &mut [u8]) -> usize { diff --git a/src/core/index_meta.rs b/src/core/index_meta.rs index a7c11ea88..9382dd3f0 100644 --- a/src/core/index_meta.rs +++ b/src/core/index_meta.rs @@ -14,7 +14,8 @@ pub struct IndexMeta { pub segments: Vec, pub schema: Schema, pub opstamp: u64, - #[serde(skip_serializing_if = "Option::is_none")] pub payload: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub payload: Option, } impl IndexMeta { diff --git a/src/datastruct/skip/mod.rs b/src/datastruct/skip/mod.rs index 260393e72..7f99888d2 100644 --- a/src/datastruct/skip/mod.rs +++ b/src/datastruct/skip/mod.rs @@ -132,7 +132,7 @@ mod tests { fn test_skiplist9() { let mut output: Vec = Vec::new(); let mut skip_list_builder: SkipListBuilder = SkipListBuilder::new(4); - for i in 0..4*4*4 { + for i in 0..4 * 4 * 4 { skip_list_builder.insert(i, &i).unwrap(); } skip_list_builder.write::>(&mut output).unwrap(); @@ -145,7 +145,7 @@ mod tests { // checking that void gets serialized to nothing. let mut output: Vec = Vec::new(); let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4); - for i in 0..((4*4*4) - 1) { + for i in 0..((4 * 4 * 4) - 1) { skip_list_builder.insert(i, &()).unwrap(); } skip_list_builder.write::>(&mut output).unwrap(); @@ -158,7 +158,7 @@ mod tests { // checking that void gets serialized to nothing. let mut output: Vec = Vec::new(); let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4); - for i in 0..(4*4) { + for i in 0..(4 * 4) { skip_list_builder.insert(i, &()).unwrap(); } skip_list_builder.write::>(&mut output).unwrap(); diff --git a/src/datastruct/skip/skiplist_builder.rs b/src/datastruct/skip/skiplist_builder.rs index 63aec23dd..0d8b7d416 100644 --- a/src/datastruct/skip/skiplist_builder.rs +++ b/src/datastruct/skip/skiplist_builder.rs @@ -1,9 +1,8 @@ use std::io::Write; -use common::{is_power_of_2, VInt, BinarySerializable}; +use common::{BinarySerializable, VInt, is_power_of_2}; use std::marker::PhantomData; use std::io; - struct LayerBuilder { period_mask: usize, buffer: Vec, diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs index 8fab68e95..43b55daf0 100644 --- a/src/fastfield/serializer.rs +++ b/src/fastfield/serializer.rs @@ -93,14 +93,15 @@ impl<'a, W: Write> FastSingleFieldSerializer<'a, W> { write, bit_packer, min_value, - num_bits + num_bits, }) } /// Pushes a new value to the currently open u64 fast field. pub fn add_val(&mut self, val: u64) -> io::Result<()> { let val_to_write: u64 = val - self.min_value; - self.bit_packer.write(val_to_write, self.num_bits,&mut self.write)?; + self.bit_packer + .write(val_to_write, self.num_bits, &mut self.write)?; Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index ec38e0936..6cdefc7e2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -115,9 +115,6 @@ //! [literate programming](http://fulmicoton.com/tantivy-examples/simple_search.html) / //! [source code](https://github.com/fulmicoton/tantivy/blob/master/examples/simple_search.rs)) - - - #[macro_use] extern crate lazy_static; @@ -286,7 +283,7 @@ mod tests { use fastfield::{FastFieldReader, I64FastFieldReader, U64FastFieldReader}; use Postings; use rand::{Rng, SeedableRng, XorShiftRng}; - use rand::distributions::{Range, IndependentSample}; + use rand::distributions::{IndependentSample, Range}; fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec { let seed: &[u32; 4] = &[1, 2, 3, seed_val]; @@ -306,7 +303,6 @@ mod tests { .collect::>() } - pub fn generate_array(n: usize, ratio: f32) -> Vec { generate_array_with_seed(n, ratio, 4) } diff --git a/src/postings/term_info.rs b/src/postings/term_info.rs index a6af45e8a..ab42d7253 100644 --- a/src/postings/term_info.rs +++ b/src/postings/term_info.rs @@ -29,7 +29,7 @@ impl FixedSize for TermInfo { /// of the block are bitpacked. /// /// See `TermInfoStore`. - const SIZE_IN_BYTES: usize = u32::SIZE_IN_BYTES + 2*u64::SIZE_IN_BYTES + u8::SIZE_IN_BYTES; + const SIZE_IN_BYTES: usize = u32::SIZE_IN_BYTES + 2 * u64::SIZE_IN_BYTES + u8::SIZE_IN_BYTES; } impl BinarySerializable for TermInfo { diff --git a/src/query/bitset/mod.rs b/src/query/bitset/mod.rs index cb8ecde7b..084940a1e 100644 --- a/src/query/bitset/mod.rs +++ b/src/query/bitset/mod.rs @@ -50,14 +50,14 @@ impl DocSet for BitSetDocSet { return true; } if let Some(cursor_bucket) = self.docs.first_non_empty_bucket(self.cursor_bucket + 1) { - self.go_to_bucket(cursor_bucket); - let lower = self.cursor_tinybitset.pop_lowest().unwrap(); - self.doc = (cursor_bucket * 64u32) | lower; - true + self.go_to_bucket(cursor_bucket); + let lower = self.cursor_tinybitset.pop_lowest().unwrap(); + self.doc = (cursor_bucket * 64u32) | lower; + true } else { false } -} + } fn skip_next(&mut self, target: DocId) -> SkipResult { // skip is required to advance. @@ -232,14 +232,15 @@ mod tests { } } - #[bench] fn bench_bitset_1pct_insert(b: &mut test::Bencher) { use tests; let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000); b.iter(|| { let mut bitset = BitSet::with_max_value(1_000_000); - for el in els.iter().cloned() { bitset.insert(el); } + for el in els.iter().cloned() { + bitset.insert(el); + } }); } @@ -248,8 +249,10 @@ mod tests { use tests; let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000); let mut bitset = BitSet::with_max_value(1_000_000); - for el in els { bitset.insert(el); } - b.iter(|| { bitset.clone() }); + for el in els { + bitset.insert(el); + } + b.iter(|| bitset.clone()); } #[bench] @@ -258,11 +261,12 @@ mod tests { use DocSet; let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000); let mut bitset = BitSet::with_max_value(1_000_000); - for el in els { bitset.insert(el); } + for el in els { + bitset.insert(el); + } b.iter(|| { let mut docset = BitSetDocSet::from(bitset.clone()); while docset.advance() {} }); } } - diff --git a/src/query/range_query.rs b/src/query/range_query.rs index 3b9e65994..162254e1d 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -11,18 +11,18 @@ use query::ConstScorer; use std::collections::Bound; use std::collections::range::RangeArgument; - -fn map_boundVec >(bound: Bound, transform: &Transform) -> Bound> { +fn map_bound Vec>( + bound: Bound, + transform: &Transform, +) -> Bound> { use self::Bound::*; match bound { Excluded(from_val) => Excluded(transform(from_val)), Included(from_val) => Included(transform(from_val)), - Unbounded => Unbounded + Unbounded => Unbounded, } } - - /// `RangeQuery` match all documents that have at least one term within a defined range. /// /// Matched document will all get a constant `Score` of one. @@ -88,40 +88,42 @@ pub struct RangeQuery { } impl RangeQuery { - /// Create a new `RangeQuery` over a `i64` field. - pub fn new_i64>(field: Field, range: TRangeArgument) -> RangeQuery { - let make_term_val = |val: &i64| { - Term::from_field_i64(field, *val).value_bytes().to_owned() - }; + pub fn new_i64>( + field: Field, + range: TRangeArgument, + ) -> RangeQuery { + let make_term_val = |val: &i64| Term::from_field_i64(field, *val).value_bytes().to_owned(); RangeQuery { field, left_bound: map_bound(range.start(), &make_term_val), - right_bound: map_bound(range.end(), &make_term_val) + right_bound: map_bound(range.end(), &make_term_val), } } /// Create a new `RangeQuery` over a `u64` field. - pub fn new_u64>(field: Field, range: TRangeArgument) -> RangeQuery { - let make_term_val = |val: &u64| { - Term::from_field_u64(field, *val).value_bytes().to_owned() - }; + pub fn new_u64>( + field: Field, + range: TRangeArgument, + ) -> RangeQuery { + let make_term_val = |val: &u64| Term::from_field_u64(field, *val).value_bytes().to_owned(); RangeQuery { field, left_bound: map_bound(range.start(), &make_term_val), - right_bound: map_bound(range.end(), &make_term_val) + right_bound: map_bound(range.end(), &make_term_val), } } /// Create a new `RangeQuery` over a `Str` field. - pub fn new_str<'b, TRangeArgument: RangeArgument<&'b str>>(field: Field, range: TRangeArgument) -> RangeQuery { - let make_term_val = |val: &&str| { - val.as_bytes().to_vec() - }; + pub fn new_str<'b, TRangeArgument: RangeArgument<&'b str>>( + field: Field, + range: TRangeArgument, + ) -> RangeQuery { + let make_term_val = |val: &&str| val.as_bytes().to_vec(); RangeQuery { field, left_bound: map_bound(range.start(), &make_term_val), - right_bound: map_bound(range.end(), &make_term_val) + right_bound: map_bound(range.end(), &make_term_val), } } } @@ -135,7 +137,7 @@ impl Query for RangeQuery { Ok(box RangeWeight { field: self.field, left_bound: self.left_bound.clone(), - right_bound: self.right_bound.clone() + right_bound: self.right_bound.clone(), }) } } @@ -148,8 +150,8 @@ pub struct RangeWeight { impl RangeWeight { fn term_range<'a, T>(&self, term_dict: &'a T) -> T::Streamer - where - T: TermDictionary<'a> + 'a, + where + T: TermDictionary<'a> + 'a, { use std::collections::Bound::*; let mut term_stream_builder = term_dict.range(); @@ -203,10 +205,9 @@ mod tests { #[test] fn test_range_query_simple() { - fn run() -> Result<()> { let mut schema_builder = SchemaBuilder::new(); - let year_field= schema_builder.add_u64_field("year", INT_INDEXED); + let year_field = schema_builder.add_u64_field("year", INT_INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -233,7 +234,6 @@ mod tests { } run().unwrap(); - } #[test] @@ -271,22 +271,22 @@ mod tests { count_collector.count() }; + assert_eq!(count_multiples(RangeQuery::new_i64(int_field, 10..11)), 9); assert_eq!( - count_multiples(RangeQuery::new_i64(int_field, 10..11)), - 9 - ); - assert_eq!( - count_multiples(RangeQuery::new_i64(int_field, (Bound::Included(10), Bound::Included(11)) )), + count_multiples(RangeQuery::new_i64( + int_field, + (Bound::Included(10), Bound::Included(11)) + )), 18 ); assert_eq!( - count_multiples(RangeQuery::new_i64(int_field, (Bound::Excluded(9), Bound::Included(10)))), + count_multiples(RangeQuery::new_i64( + int_field, + (Bound::Excluded(9), Bound::Included(10)) + )), 9 ); - assert_eq!( - count_multiples(RangeQuery::new_i64(int_field, 9..)), - 91 - ); + assert_eq!(count_multiples(RangeQuery::new_i64(int_field, 9..)), 91); } } diff --git a/src/query/scorer.rs b/src/query/scorer.rs index 2cbeb001d..619e580aa 100644 --- a/src/query/scorer.rs +++ b/src/query/scorer.rs @@ -62,7 +62,6 @@ impl Scorer for EmptyScorer { } } - /// Wraps a `DocSet` and simply returns a constant `Scorer`. /// The `ConstScorer` is useful if you have a `DocSet` where /// you needed a scorer. @@ -75,7 +74,6 @@ pub struct ConstScorer { } impl ConstScorer { - /// Creates a new `ConstScorer`. pub fn new(docset: TDocSet) -> ConstScorer { ConstScorer { diff --git a/src/schema/int_options.rs b/src/schema/int_options.rs index cd1fd8a22..b4a69cf67 100644 --- a/src/schema/int_options.rs +++ b/src/schema/int_options.rs @@ -16,7 +16,8 @@ pub enum Cardinality { #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct IntOptions { indexed: bool, - #[serde(skip_serializing_if = "Option::is_none")] fast: Option, + #[serde(skip_serializing_if = "Option::is_none")] + fast: Option, stored: bool, } diff --git a/src/termdict/fstdict/term_info_store.rs b/src/termdict/fstdict/term_info_store.rs index 407b68b00..63d8d4957 100644 --- a/src/termdict/fstdict/term_info_store.rs +++ b/src/termdict/fstdict/term_info_store.rs @@ -10,10 +10,8 @@ use directory::ReadOnlySource; use termdict::TermOrdinal; use byteorder::ByteOrder; - const BLOCK_LEN: usize = 256; - #[derive(Debug, Eq, PartialEq, Default)] struct TermInfoBlockMeta { offset: u64, @@ -27,9 +25,11 @@ impl BinarySerializable for TermInfoBlockMeta { fn serialize(&self, write: &mut W) -> io::Result<()> { self.offset.serialize(write)?; self.ref_term_info.serialize(write)?; - write.write_all(&[self.doc_freq_nbits, - self.postings_offset_nbits, - self.positions_offset_nbits])?; + write.write_all(&[ + self.doc_freq_nbits, + self.postings_offset_nbits, + self.positions_offset_nbits, + ])?; Ok(()) } @@ -43,17 +43,17 @@ impl BinarySerializable for TermInfoBlockMeta { ref_term_info, doc_freq_nbits: buffer[0], postings_offset_nbits: buffer[1], - positions_offset_nbits: buffer[2] + positions_offset_nbits: buffer[2], }) } } impl FixedSize for TermInfoBlockMeta { - const SIZE_IN_BYTES: usize = u64::SIZE_IN_BYTES + TermInfo::SIZE_IN_BYTES + 3 * u8::SIZE_IN_BYTES; + const SIZE_IN_BYTES: usize = + u64::SIZE_IN_BYTES + TermInfo::SIZE_IN_BYTES + 3 * u8::SIZE_IN_BYTES; } impl TermInfoBlockMeta { - fn num_bits(&self) -> u8 { self.doc_freq_nbits + self.postings_offset_nbits + self.positions_offset_nbits + 7 } @@ -82,11 +82,10 @@ impl TermInfoBlockMeta { } } - pub struct TermInfoStore { num_terms: usize, block_meta_source: ReadOnlySource, - term_info_source: ReadOnlySource + term_info_source: ReadOnlySource, } fn extract_bits(data: &[u8], addr_bits: usize, num_bits: u8) -> u64 { @@ -109,7 +108,7 @@ impl TermInfoStore { TermInfoStore { num_terms, block_meta_source, - term_info_source + term_info_source, } } @@ -117,13 +116,17 @@ impl TermInfoStore { let block_id = (term_ord as usize) / BLOCK_LEN; let buffer = self.block_meta_source.as_slice(); let mut block_data: &[u8] = &buffer[block_id * TermInfoBlockMeta::SIZE_IN_BYTES..]; - let term_info_block_data = TermInfoBlockMeta::deserialize(&mut block_data).expect("Failed to deserialize terminfoblockmeta"); + let term_info_block_data = TermInfoBlockMeta::deserialize(&mut block_data) + .expect("Failed to deserialize terminfoblockmeta"); let inner_offset = (term_ord as usize) % BLOCK_LEN; if inner_offset == 0 { term_info_block_data.ref_term_info } else { let term_info_data = self.term_info_source.as_slice(); - term_info_block_data.deserialize_term_info(&term_info_data[term_info_block_data.offset as usize..], inner_offset - 1) + term_info_block_data.deserialize_term_info( + &term_info_data[term_info_block_data.offset as usize..], + inner_offset - 1, + ) } } @@ -140,13 +143,26 @@ pub struct TermInfoStoreWriter { } fn bitpack_serialize( - write: &mut W, - bit_packer: &mut BitPacker, - term_info_block_meta: &TermInfoBlockMeta, - term_info: &TermInfo) -> io::Result<()> { - bit_packer.write(term_info.doc_freq as u64, term_info_block_meta.doc_freq_nbits, write)?; - bit_packer.write(term_info.postings_offset, term_info_block_meta.postings_offset_nbits, write)?; - bit_packer.write(term_info.positions_offset, term_info_block_meta.positions_offset_nbits, write)?; + write: &mut W, + bit_packer: &mut BitPacker, + term_info_block_meta: &TermInfoBlockMeta, + term_info: &TermInfo, +) -> io::Result<()> { + bit_packer.write( + term_info.doc_freq as u64, + term_info_block_meta.doc_freq_nbits, + write, + )?; + bit_packer.write( + term_info.postings_offset, + term_info_block_meta.postings_offset_nbits, + write, + )?; + bit_packer.write( + term_info.positions_offset, + term_info_block_meta.positions_offset_nbits, + write, + )?; bit_packer.write(term_info.positions_inner_offset as u64, 7, write)?; Ok(()) } @@ -157,7 +173,7 @@ impl TermInfoStoreWriter { buffer_block_metas: Vec::new(), buffer_term_infos: Vec::new(), term_infos: Vec::with_capacity(BLOCK_LEN), - num_terms: 0u64 + num_terms: 0u64, } } @@ -199,7 +215,7 @@ impl TermInfoStoreWriter { &mut self.buffer_term_infos, &mut bit_packer, &term_info_block_meta, - &term_info + &term_info, )?; } @@ -276,11 +292,11 @@ mod tests { doc_freq: 512, postings_offset: 51, positions_offset: 3584, - positions_inner_offset: 0 + positions_inner_offset: 0, }, doc_freq_nbits: 10, postings_offset_nbits: 5, - positions_offset_nbits: 11 + positions_offset_nbits: 11, }; let mut buffer: Vec = Vec::new(); term_info_block_meta.serialize(&mut buffer).unwrap(); @@ -292,7 +308,7 @@ mod tests { #[test] fn test_pack() { let mut store_writer = TermInfoStoreWriter::new(); - let mut term_infos = vec!(); + let mut term_infos = vec![]; for i in 0..1000 { let term_info = TermInfo { doc_freq: i as u32, @@ -304,9 +320,7 @@ mod tests { term_infos.push(term_info); } let mut buffer = Vec::new(); - store_writer - .serialize(&mut buffer) - .unwrap(); + store_writer.serialize(&mut buffer).unwrap(); let term_info_store = TermInfoStore::open(ReadOnlySource::from(buffer)); for i in 0..1000 { assert_eq!(term_info_store.get(i as u64), term_infos[i]); @@ -314,5 +328,3 @@ mod tests { } } - - diff --git a/src/termdict/fstdict/termdict.rs b/src/termdict/fstdict/termdict.rs index f2d1dfaa6..4a4d1be9a 100644 --- a/src/termdict/fstdict/termdict.rs +++ b/src/termdict/fstdict/termdict.rs @@ -7,7 +7,7 @@ use common::CountingWriter; use schema::FieldType; use postings::TermInfo; use termdict::{TermDictionary, TermDictionaryBuilder, TermOrdinal}; -use super::{TermStreamerBuilderImpl, TermStreamerImpl, TermInfoStoreWriter, TermInfoStore}; +use super::{TermInfoStore, TermInfoStoreWriter, TermStreamerBuilderImpl, TermStreamerImpl}; fn convert_fst_error(e: fst::Error) -> io::Error { io::Error::new(io::ErrorKind::Other, e) diff --git a/src/tokenizer/facet_tokenizer.rs b/src/tokenizer/facet_tokenizer.rs index 982c35f7b..95b0d3711 100644 --- a/src/tokenizer/facet_tokenizer.rs +++ b/src/tokenizer/facet_tokenizer.rs @@ -87,6 +87,7 @@ mod tests { use tokenizer::{Token, TokenStream, Tokenizer}; use super::FacetTokenizer; use schema::Facet; + use std::str; #[test] fn test_facet_tokenizer() { @@ -98,9 +99,7 @@ mod tests { tokens.push(format!("{}", facet)); }; FacetTokenizer - .token_stream(unsafe { - ::std::str::from_utf8_unchecked(facet.encoded_bytes()) - }) + .token_stream(unsafe { str::from_utf8_unchecked(facet.encoded_bytes()) }) .process(&mut add_token); } assert_eq!(tokens.len(), 4); @@ -120,9 +119,7 @@ mod tests { tokens.push(format!("{}", facet)); }; FacetTokenizer - .token_stream(unsafe { - ::std::str::from_utf8_unchecked(facet.encoded_bytes()) - }) + .token_stream(unsafe { str::from_utf8_unchecked(facet.encoded_bytes()) }) .process(&mut add_token); } assert_eq!(tokens.len(), 1);