mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 16:22:55 +00:00
Rustfmt
This commit is contained in:
@@ -4,21 +4,25 @@ use common::serialize::BinarySerializable;
|
||||
use std::mem;
|
||||
use std::ops::Deref;
|
||||
|
||||
|
||||
pub(crate) struct BitPacker {
|
||||
mini_buffer: u64,
|
||||
mini_buffer_written: usize
|
||||
mini_buffer_written: usize,
|
||||
}
|
||||
|
||||
impl BitPacker {
|
||||
pub fn new() -> BitPacker {
|
||||
BitPacker {
|
||||
mini_buffer: 0u64,
|
||||
mini_buffer_written: 0
|
||||
mini_buffer_written: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write<TWrite: Write>(&mut self, val: u64, num_bits: u8, output: &mut TWrite) -> io::Result<()> {
|
||||
pub fn write<TWrite: Write>(
|
||||
&mut self,
|
||||
val: u64,
|
||||
num_bits: u8,
|
||||
output: &mut TWrite,
|
||||
) -> io::Result<()> {
|
||||
let val_u64 = val as u64;
|
||||
let num_bits = num_bits as usize;
|
||||
if self.mini_buffer_written + num_bits > 64 {
|
||||
@@ -58,8 +62,8 @@ impl BitPacker {
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct BitUnpacker<Data>
|
||||
where
|
||||
Data: Deref<Target=[u8]>,
|
||||
where
|
||||
Data: Deref<Target = [u8]>,
|
||||
{
|
||||
num_bits: usize,
|
||||
mask: u64,
|
||||
@@ -67,16 +71,15 @@ pub struct BitUnpacker<Data>
|
||||
}
|
||||
|
||||
impl<Data> BitUnpacker<Data>
|
||||
where
|
||||
Data: Deref<Target=[u8]>,
|
||||
where
|
||||
Data: Deref<Target = [u8]>,
|
||||
{
|
||||
pub fn new(data: Data, num_bits: u8) -> BitUnpacker<Data> {
|
||||
let mask: u64 =
|
||||
if num_bits == 64 {
|
||||
!0u64
|
||||
} else {
|
||||
(1u64 << num_bits) - 1u64
|
||||
};
|
||||
let mask: u64 = if num_bits == 64 {
|
||||
!0u64
|
||||
} else {
|
||||
(1u64 << num_bits) - 1u64
|
||||
};
|
||||
BitUnpacker {
|
||||
num_bits: num_bits as usize,
|
||||
mask,
|
||||
@@ -102,8 +105,7 @@ impl<Data> BitUnpacker<Data>
|
||||
addr + 8 <= data.len(),
|
||||
"The fast field field should have been padded with 7 bytes."
|
||||
);
|
||||
let val_unshifted_unmasked: u64 =
|
||||
unsafe { *(data[addr..].as_ptr() as *const u64) };
|
||||
let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) };
|
||||
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
|
||||
(val_shifted & mask)
|
||||
} else {
|
||||
@@ -134,8 +136,7 @@ impl<Data> BitUnpacker<Data>
|
||||
for output_val in output.iter_mut() {
|
||||
let addr = addr_in_bits >> 3;
|
||||
let bit_shift = addr_in_bits & 7;
|
||||
let val_unshifted_unmasked: u64 =
|
||||
unsafe { *(data[addr..].as_ptr() as *const u64) };
|
||||
let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) };
|
||||
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
|
||||
*output_val = val_shifted & mask;
|
||||
addr_in_bits += num_bits;
|
||||
@@ -148,7 +149,6 @@ impl<Data> BitUnpacker<Data>
|
||||
mod test {
|
||||
use super::{BitPacker, BitUnpacker};
|
||||
|
||||
|
||||
fn create_fastfield_bitpacker(len: usize, num_bits: u8) -> (BitUnpacker<Vec<u8>>, Vec<u64>) {
|
||||
let mut data = Vec::new();
|
||||
let mut bitpacker = BitPacker::new();
|
||||
@@ -157,10 +157,10 @@ mod test {
|
||||
.map(|i| if max_val == 0 { 0 } else { i % max_val })
|
||||
.collect();
|
||||
for &val in &vals {
|
||||
bitpacker.write(val, num_bits,&mut data).unwrap();
|
||||
bitpacker.write(val, num_bits, &mut data).unwrap();
|
||||
}
|
||||
bitpacker.close(&mut data).unwrap();
|
||||
assert_eq!(data.len(), ((num_bits as usize)* len + 7) / 8 + 7);
|
||||
assert_eq!(data.len(), ((num_bits as usize) * len + 7) / 8 + 7);
|
||||
let bitunpacker = BitUnpacker::new(data, num_bits);
|
||||
(bitunpacker, vals)
|
||||
}
|
||||
|
||||
@@ -27,7 +27,6 @@ impl IntoIterator for TinySet {
|
||||
}
|
||||
|
||||
impl TinySet {
|
||||
|
||||
/// Returns an empty `TinySet`.
|
||||
pub fn empty() -> TinySet {
|
||||
TinySet(0u64)
|
||||
@@ -38,7 +37,6 @@ impl TinySet {
|
||||
TinySet(!self.0)
|
||||
}
|
||||
|
||||
|
||||
/// Returns true iff the `TinySet` contains the element `el`.
|
||||
pub fn contains(&self, el: u32) -> bool {
|
||||
!self.intersect(TinySet::singleton(el)).is_empty()
|
||||
@@ -137,7 +135,6 @@ fn num_buckets(max_val: u32) -> u32 {
|
||||
}
|
||||
|
||||
impl BitSet {
|
||||
|
||||
/// Create a new `BitSet` that may contain elements
|
||||
/// within `[0, max_val[`.
|
||||
pub fn with_max_value(max_value: u32) -> BitSet {
|
||||
@@ -146,7 +143,7 @@ impl BitSet {
|
||||
BitSet {
|
||||
tinysets: tinybisets,
|
||||
len: 0,
|
||||
max_value
|
||||
max_value,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -167,18 +164,16 @@ impl BitSet {
|
||||
// we do not check saturated els.
|
||||
let higher = el / 64u32;
|
||||
let lower = el % 64u32;
|
||||
self.len +=
|
||||
if self.tinysets[higher as usize].insert_mut(lower) {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
self.len += if self.tinysets[higher as usize].insert_mut(lower) {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
}
|
||||
|
||||
/// Returns true iff the elements is in the `BitSet`.
|
||||
pub fn contains(&self, el: u32) -> bool {
|
||||
self.tinyset(el / 64u32)
|
||||
.contains(el % 64)
|
||||
self.tinyset(el / 64u32).contains(el % 64)
|
||||
}
|
||||
|
||||
/// Returns the first non-empty `TinySet` associated to a bucket lower
|
||||
@@ -206,7 +201,6 @@ impl BitSet {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -229,9 +223,7 @@ mod tests {
|
||||
assert!(u.pop_lowest().is_none())
|
||||
}
|
||||
{
|
||||
let mut u = TinySet::empty()
|
||||
.insert(1u32)
|
||||
.insert(1u32);
|
||||
let mut u = TinySet::empty().insert(1u32).insert(1u32);
|
||||
assert_eq!(u.pop_lowest(), Some(1u32));
|
||||
assert!(u.pop_lowest().is_none())
|
||||
}
|
||||
@@ -275,7 +267,6 @@ mod tests {
|
||||
test_against_hashset(&[62u32, 63u32], 64);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_bitset_large() {
|
||||
let arr = generate_nonunique_unsorted(1_000_000, 50_000);
|
||||
@@ -310,16 +301,27 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_tinyset_range() {
|
||||
assert_eq!(TinySet::range_lower(3).into_iter().collect::<Vec<u32>>(), [0, 1, 2]);
|
||||
assert_eq!(
|
||||
TinySet::range_lower(3).into_iter().collect::<Vec<u32>>(),
|
||||
[0, 1, 2]
|
||||
);
|
||||
assert!(TinySet::range_lower(0).is_empty());
|
||||
assert_eq!(
|
||||
TinySet::range_lower(63).into_iter().collect::<Vec<u32>>(),
|
||||
(0u32..63u32).collect::<Vec<_>>()
|
||||
);
|
||||
assert_eq!(TinySet::range_lower(1).into_iter().collect::<Vec<u32>>(), [0]);
|
||||
assert_eq!(TinySet::range_lower(2).into_iter().collect::<Vec<u32>>(), [0, 1]);
|
||||
assert_eq!(
|
||||
TinySet::range_greater_or_equal(3).into_iter().collect::<Vec<u32>>(),
|
||||
TinySet::range_lower(1).into_iter().collect::<Vec<u32>>(),
|
||||
[0]
|
||||
);
|
||||
assert_eq!(
|
||||
TinySet::range_lower(2).into_iter().collect::<Vec<u32>>(),
|
||||
[0, 1]
|
||||
);
|
||||
assert_eq!(
|
||||
TinySet::range_greater_or_equal(3)
|
||||
.into_iter()
|
||||
.collect::<Vec<u32>>(),
|
||||
(3u32..64u32).collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
@@ -350,47 +352,31 @@ mod tests {
|
||||
assert!(els.iter().all(|el| bitset.contains(*el)));
|
||||
bitset.clear();
|
||||
for el in 0u32..1000u32 {
|
||||
assert!(!bitset.contains(el));
|
||||
assert!(!bitset.contains(el));
|
||||
}
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_tinyset_pop(b: &mut test::Bencher) {
|
||||
b.iter(|| {
|
||||
test::black_box(TinySet::singleton(31u32))
|
||||
.pop_lowest()
|
||||
});
|
||||
b.iter(|| test::black_box(TinySet::singleton(31u32)).pop_lowest());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_tinyset_sum(b: &mut test::Bencher) {
|
||||
let tiny_set = TinySet::empty()
|
||||
.insert(10u32)
|
||||
.insert(14u32)
|
||||
.insert(21u32);
|
||||
let tiny_set = TinySet::empty().insert(10u32).insert(14u32).insert(21u32);
|
||||
b.iter(|| {
|
||||
assert_eq!(
|
||||
test::black_box(tiny_set).into_iter().sum::<u32>(),
|
||||
45u32);
|
||||
assert_eq!(test::black_box(tiny_set).into_iter().sum::<u32>(), 45u32);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_tinyarr_sum(b: &mut test::Bencher) {
|
||||
let v = [10u32, 14u32, 21u32] ;
|
||||
b.iter(|| {
|
||||
test::black_box(v)
|
||||
.iter()
|
||||
.cloned()
|
||||
.sum::<u32>()
|
||||
});
|
||||
let v = [10u32, 14u32, 21u32];
|
||||
b.iter(|| test::black_box(v).iter().cloned().sum::<u32>());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_bitset_initialize(b: &mut test::Bencher) {
|
||||
b.iter(|| {
|
||||
BitSet::with_max_value(1_000_000)
|
||||
});
|
||||
b.iter(|| BitSet::with_max_value(1_000_000));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -52,7 +52,6 @@ pub(crate) fn compute_num_bits(n: u64) -> u8 {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub(crate) fn is_power_of_2(n: usize) -> bool {
|
||||
(n > 0) && (n & (n - 1) == 0)
|
||||
}
|
||||
@@ -128,7 +127,6 @@ pub(crate) mod test {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_compute_num_bits() {
|
||||
assert_eq!(compute_num_bits(1), 1u8);
|
||||
@@ -141,4 +139,3 @@ pub(crate) mod test {
|
||||
assert_eq!(compute_num_bits(5_000_000_000), 33u8);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,6 @@ pub trait BinarySerializable: fmt::Debug + Sized {
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self>;
|
||||
}
|
||||
|
||||
|
||||
/// `FixedSize` marks a `BinarySerializable` as
|
||||
/// always serializing to the same size.
|
||||
pub trait FixedSize: BinarySerializable {
|
||||
@@ -103,7 +102,6 @@ impl FixedSize for i64 {
|
||||
const SIZE_IN_BYTES: usize = 8;
|
||||
}
|
||||
|
||||
|
||||
impl BinarySerializable for u8 {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
writer.write_u8(*self)
|
||||
@@ -134,21 +132,18 @@ impl BinarySerializable for String {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
|
||||
use common::VInt;
|
||||
use super::*;
|
||||
|
||||
|
||||
pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
|
||||
let mut buffer = Vec::new();
|
||||
O::default().serialize(&mut buffer).unwrap();
|
||||
assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
|
||||
}
|
||||
|
||||
|
||||
fn serialize_test<T: BinarySerializable + Eq>(v: T) -> usize {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
v.serialize(&mut buffer).unwrap();
|
||||
@@ -186,7 +181,10 @@ pub mod test {
|
||||
fn test_serialize_string() {
|
||||
assert_eq!(serialize_test(String::from("")), 1);
|
||||
assert_eq!(serialize_test(String::from("ぽよぽよ")), 1 + 3 * 4);
|
||||
assert_eq!(serialize_test(String::from("富士さん見える。")), 1 + 3 * 8);
|
||||
assert_eq!(
|
||||
serialize_test(String::from("富士さん見える。")),
|
||||
1 + 3 * 8
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -25,7 +25,9 @@ pub fn compress_sorted(vals: &mut [u32], output: &mut [u8], offset: u32) -> usiz
|
||||
|
||||
let mut bit_packer = BitPacker::new();
|
||||
for val in vals {
|
||||
bit_packer.write(*val as u64, num_bits,&mut counting_writer).unwrap();
|
||||
bit_packer
|
||||
.write(*val as u64, num_bits, &mut counting_writer)
|
||||
.unwrap();
|
||||
}
|
||||
counting_writer.written_bytes()
|
||||
}
|
||||
@@ -63,7 +65,9 @@ impl BlockEncoder {
|
||||
counting_writer.write_all(&[num_bits]).unwrap();
|
||||
let mut bit_packer = BitPacker::new();
|
||||
for val in vals {
|
||||
bit_packer.write(*val as u64, num_bits, &mut counting_writer).unwrap();
|
||||
bit_packer
|
||||
.write(*val as u64, num_bits, &mut counting_writer)
|
||||
.unwrap();
|
||||
}
|
||||
for _ in vals.len()..COMPRESSION_BLOCK_SIZE {
|
||||
bit_packer
|
||||
|
||||
@@ -25,9 +25,7 @@ fn compress_sorted(vals: &[u32], output: &mut [u8], offset: u32) -> usize {
|
||||
}
|
||||
|
||||
fn uncompress_sorted(compressed_data: &[u8], output: &mut [u32], offset: u32) -> usize {
|
||||
unsafe {
|
||||
simdcomp::uncompress_sorted(compressed_data.as_ptr(), output.as_mut_ptr(), offset)
|
||||
}
|
||||
unsafe { simdcomp::uncompress_sorted(compressed_data.as_ptr(), output.as_mut_ptr(), offset) }
|
||||
}
|
||||
|
||||
fn compress_unsorted(vals: &[u32], output: &mut [u8]) -> usize {
|
||||
|
||||
@@ -14,7 +14,8 @@ pub struct IndexMeta {
|
||||
pub segments: Vec<SegmentMeta>,
|
||||
pub schema: Schema,
|
||||
pub opstamp: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")] pub payload: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub payload: Option<String>,
|
||||
}
|
||||
|
||||
impl IndexMeta {
|
||||
|
||||
@@ -132,7 +132,7 @@ mod tests {
|
||||
fn test_skiplist9() {
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<u64> = SkipListBuilder::new(4);
|
||||
for i in 0..4*4*4 {
|
||||
for i in 0..4 * 4 * 4 {
|
||||
skip_list_builder.insert(i, &i).unwrap();
|
||||
}
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
|
||||
@@ -145,7 +145,7 @@ mod tests {
|
||||
// checking that void gets serialized to nothing.
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4);
|
||||
for i in 0..((4*4*4) - 1) {
|
||||
for i in 0..((4 * 4 * 4) - 1) {
|
||||
skip_list_builder.insert(i, &()).unwrap();
|
||||
}
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
|
||||
@@ -158,7 +158,7 @@ mod tests {
|
||||
// checking that void gets serialized to nothing.
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4);
|
||||
for i in 0..(4*4) {
|
||||
for i in 0..(4 * 4) {
|
||||
skip_list_builder.insert(i, &()).unwrap();
|
||||
}
|
||||
skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
use std::io::Write;
|
||||
use common::{is_power_of_2, VInt, BinarySerializable};
|
||||
use common::{BinarySerializable, VInt, is_power_of_2};
|
||||
use std::marker::PhantomData;
|
||||
use std::io;
|
||||
|
||||
|
||||
struct LayerBuilder<T: BinarySerializable> {
|
||||
period_mask: usize,
|
||||
buffer: Vec<u8>,
|
||||
|
||||
@@ -93,14 +93,15 @@ impl<'a, W: Write> FastSingleFieldSerializer<'a, W> {
|
||||
write,
|
||||
bit_packer,
|
||||
min_value,
|
||||
num_bits
|
||||
num_bits,
|
||||
})
|
||||
}
|
||||
|
||||
/// Pushes a new value to the currently open u64 fast field.
|
||||
pub fn add_val(&mut self, val: u64) -> io::Result<()> {
|
||||
let val_to_write: u64 = val - self.min_value;
|
||||
self.bit_packer.write(val_to_write, self.num_bits,&mut self.write)?;
|
||||
self.bit_packer
|
||||
.write(val_to_write, self.num_bits, &mut self.write)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -115,9 +115,6 @@
|
||||
//! [literate programming](http://fulmicoton.com/tantivy-examples/simple_search.html) /
|
||||
//! [source code](https://github.com/fulmicoton/tantivy/blob/master/examples/simple_search.rs))
|
||||
|
||||
|
||||
|
||||
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
@@ -286,7 +283,7 @@ mod tests {
|
||||
use fastfield::{FastFieldReader, I64FastFieldReader, U64FastFieldReader};
|
||||
use Postings;
|
||||
use rand::{Rng, SeedableRng, XorShiftRng};
|
||||
use rand::distributions::{Range, IndependentSample};
|
||||
use rand::distributions::{IndependentSample, Range};
|
||||
|
||||
fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
|
||||
let seed: &[u32; 4] = &[1, 2, 3, seed_val];
|
||||
@@ -306,7 +303,6 @@ mod tests {
|
||||
.collect::<Vec<u32>>()
|
||||
}
|
||||
|
||||
|
||||
pub fn generate_array(n: usize, ratio: f32) -> Vec<u32> {
|
||||
generate_array_with_seed(n, ratio, 4)
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ impl FixedSize for TermInfo {
|
||||
/// of the block are bitpacked.
|
||||
///
|
||||
/// See `TermInfoStore`.
|
||||
const SIZE_IN_BYTES: usize = u32::SIZE_IN_BYTES + 2*u64::SIZE_IN_BYTES + u8::SIZE_IN_BYTES;
|
||||
const SIZE_IN_BYTES: usize = u32::SIZE_IN_BYTES + 2 * u64::SIZE_IN_BYTES + u8::SIZE_IN_BYTES;
|
||||
}
|
||||
|
||||
impl BinarySerializable for TermInfo {
|
||||
|
||||
@@ -50,14 +50,14 @@ impl DocSet for BitSetDocSet {
|
||||
return true;
|
||||
}
|
||||
if let Some(cursor_bucket) = self.docs.first_non_empty_bucket(self.cursor_bucket + 1) {
|
||||
self.go_to_bucket(cursor_bucket);
|
||||
let lower = self.cursor_tinybitset.pop_lowest().unwrap();
|
||||
self.doc = (cursor_bucket * 64u32) | lower;
|
||||
true
|
||||
self.go_to_bucket(cursor_bucket);
|
||||
let lower = self.cursor_tinybitset.pop_lowest().unwrap();
|
||||
self.doc = (cursor_bucket * 64u32) | lower;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult {
|
||||
// skip is required to advance.
|
||||
@@ -232,14 +232,15 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[bench]
|
||||
fn bench_bitset_1pct_insert(b: &mut test::Bencher) {
|
||||
use tests;
|
||||
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
|
||||
b.iter(|| {
|
||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||
for el in els.iter().cloned() { bitset.insert(el); }
|
||||
for el in els.iter().cloned() {
|
||||
bitset.insert(el);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -248,8 +249,10 @@ mod tests {
|
||||
use tests;
|
||||
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
|
||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||
for el in els { bitset.insert(el); }
|
||||
b.iter(|| { bitset.clone() });
|
||||
for el in els {
|
||||
bitset.insert(el);
|
||||
}
|
||||
b.iter(|| bitset.clone());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
@@ -258,11 +261,12 @@ mod tests {
|
||||
use DocSet;
|
||||
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
|
||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||
for el in els { bitset.insert(el); }
|
||||
for el in els {
|
||||
bitset.insert(el);
|
||||
}
|
||||
b.iter(|| {
|
||||
let mut docset = BitSetDocSet::from(bitset.clone());
|
||||
while docset.advance() {}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -11,18 +11,18 @@ use query::ConstScorer;
|
||||
use std::collections::Bound;
|
||||
use std::collections::range::RangeArgument;
|
||||
|
||||
|
||||
fn map_bound<TFrom, Transform: Fn(TFrom)->Vec<u8> >(bound: Bound<TFrom>, transform: &Transform) -> Bound<Vec<u8>> {
|
||||
fn map_bound<TFrom, Transform: Fn(TFrom) -> Vec<u8>>(
|
||||
bound: Bound<TFrom>,
|
||||
transform: &Transform,
|
||||
) -> Bound<Vec<u8>> {
|
||||
use self::Bound::*;
|
||||
match bound {
|
||||
Excluded(from_val) => Excluded(transform(from_val)),
|
||||
Included(from_val) => Included(transform(from_val)),
|
||||
Unbounded => Unbounded
|
||||
Unbounded => Unbounded,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// `RangeQuery` match all documents that have at least one term within a defined range.
|
||||
///
|
||||
/// Matched document will all get a constant `Score` of one.
|
||||
@@ -88,40 +88,42 @@ pub struct RangeQuery {
|
||||
}
|
||||
|
||||
impl RangeQuery {
|
||||
|
||||
/// Create a new `RangeQuery` over a `i64` field.
|
||||
pub fn new_i64<TRangeArgument: RangeArgument<i64>>(field: Field, range: TRangeArgument) -> RangeQuery {
|
||||
let make_term_val = |val: &i64| {
|
||||
Term::from_field_i64(field, *val).value_bytes().to_owned()
|
||||
};
|
||||
pub fn new_i64<TRangeArgument: RangeArgument<i64>>(
|
||||
field: Field,
|
||||
range: TRangeArgument,
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &i64| Term::from_field_i64(field, *val).value_bytes().to_owned();
|
||||
RangeQuery {
|
||||
field,
|
||||
left_bound: map_bound(range.start(), &make_term_val),
|
||||
right_bound: map_bound(range.end(), &make_term_val)
|
||||
right_bound: map_bound(range.end(), &make_term_val),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `RangeQuery` over a `u64` field.
|
||||
pub fn new_u64<TRangeArgument: RangeArgument<u64>>(field: Field, range: TRangeArgument) -> RangeQuery {
|
||||
let make_term_val = |val: &u64| {
|
||||
Term::from_field_u64(field, *val).value_bytes().to_owned()
|
||||
};
|
||||
pub fn new_u64<TRangeArgument: RangeArgument<u64>>(
|
||||
field: Field,
|
||||
range: TRangeArgument,
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &u64| Term::from_field_u64(field, *val).value_bytes().to_owned();
|
||||
RangeQuery {
|
||||
field,
|
||||
left_bound: map_bound(range.start(), &make_term_val),
|
||||
right_bound: map_bound(range.end(), &make_term_val)
|
||||
right_bound: map_bound(range.end(), &make_term_val),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `RangeQuery` over a `Str` field.
|
||||
pub fn new_str<'b, TRangeArgument: RangeArgument<&'b str>>(field: Field, range: TRangeArgument) -> RangeQuery {
|
||||
let make_term_val = |val: &&str| {
|
||||
val.as_bytes().to_vec()
|
||||
};
|
||||
pub fn new_str<'b, TRangeArgument: RangeArgument<&'b str>>(
|
||||
field: Field,
|
||||
range: TRangeArgument,
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &&str| val.as_bytes().to_vec();
|
||||
RangeQuery {
|
||||
field,
|
||||
left_bound: map_bound(range.start(), &make_term_val),
|
||||
right_bound: map_bound(range.end(), &make_term_val)
|
||||
right_bound: map_bound(range.end(), &make_term_val),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -135,7 +137,7 @@ impl Query for RangeQuery {
|
||||
Ok(box RangeWeight {
|
||||
field: self.field,
|
||||
left_bound: self.left_bound.clone(),
|
||||
right_bound: self.right_bound.clone()
|
||||
right_bound: self.right_bound.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -148,8 +150,8 @@ pub struct RangeWeight {
|
||||
|
||||
impl RangeWeight {
|
||||
fn term_range<'a, T>(&self, term_dict: &'a T) -> T::Streamer
|
||||
where
|
||||
T: TermDictionary<'a> + 'a,
|
||||
where
|
||||
T: TermDictionary<'a> + 'a,
|
||||
{
|
||||
use std::collections::Bound::*;
|
||||
let mut term_stream_builder = term_dict.range();
|
||||
@@ -203,10 +205,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_range_query_simple() {
|
||||
|
||||
fn run() -> Result<()> {
|
||||
let mut schema_builder = SchemaBuilder::new();
|
||||
let year_field= schema_builder.add_u64_field("year", INT_INDEXED);
|
||||
let year_field = schema_builder.add_u64_field("year", INT_INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
@@ -233,7 +234,6 @@ mod tests {
|
||||
}
|
||||
|
||||
run().unwrap();
|
||||
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -271,22 +271,22 @@ mod tests {
|
||||
count_collector.count()
|
||||
};
|
||||
|
||||
assert_eq!(count_multiples(RangeQuery::new_i64(int_field, 10..11)), 9);
|
||||
assert_eq!(
|
||||
count_multiples(RangeQuery::new_i64(int_field, 10..11)),
|
||||
9
|
||||
);
|
||||
assert_eq!(
|
||||
count_multiples(RangeQuery::new_i64(int_field, (Bound::Included(10), Bound::Included(11)) )),
|
||||
count_multiples(RangeQuery::new_i64(
|
||||
int_field,
|
||||
(Bound::Included(10), Bound::Included(11))
|
||||
)),
|
||||
18
|
||||
);
|
||||
assert_eq!(
|
||||
count_multiples(RangeQuery::new_i64(int_field, (Bound::Excluded(9), Bound::Included(10)))),
|
||||
count_multiples(RangeQuery::new_i64(
|
||||
int_field,
|
||||
(Bound::Excluded(9), Bound::Included(10))
|
||||
)),
|
||||
9
|
||||
);
|
||||
assert_eq!(
|
||||
count_multiples(RangeQuery::new_i64(int_field, 9..)),
|
||||
91
|
||||
);
|
||||
assert_eq!(count_multiples(RangeQuery::new_i64(int_field, 9..)), 91);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -62,7 +62,6 @@ impl Scorer for EmptyScorer {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Wraps a `DocSet` and simply returns a constant `Scorer`.
|
||||
/// The `ConstScorer` is useful if you have a `DocSet` where
|
||||
/// you needed a scorer.
|
||||
@@ -75,7 +74,6 @@ pub struct ConstScorer<TDocSet: DocSet> {
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet> ConstScorer<TDocSet> {
|
||||
|
||||
/// Creates a new `ConstScorer`.
|
||||
pub fn new(docset: TDocSet) -> ConstScorer<TDocSet> {
|
||||
ConstScorer {
|
||||
|
||||
@@ -16,7 +16,8 @@ pub enum Cardinality {
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct IntOptions {
|
||||
indexed: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")] fast: Option<Cardinality>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
fast: Option<Cardinality>,
|
||||
stored: bool,
|
||||
}
|
||||
|
||||
|
||||
@@ -10,10 +10,8 @@ use directory::ReadOnlySource;
|
||||
use termdict::TermOrdinal;
|
||||
use byteorder::ByteOrder;
|
||||
|
||||
|
||||
const BLOCK_LEN: usize = 256;
|
||||
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, Default)]
|
||||
struct TermInfoBlockMeta {
|
||||
offset: u64,
|
||||
@@ -27,9 +25,11 @@ impl BinarySerializable for TermInfoBlockMeta {
|
||||
fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
|
||||
self.offset.serialize(write)?;
|
||||
self.ref_term_info.serialize(write)?;
|
||||
write.write_all(&[self.doc_freq_nbits,
|
||||
self.postings_offset_nbits,
|
||||
self.positions_offset_nbits])?;
|
||||
write.write_all(&[
|
||||
self.doc_freq_nbits,
|
||||
self.postings_offset_nbits,
|
||||
self.positions_offset_nbits,
|
||||
])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -43,17 +43,17 @@ impl BinarySerializable for TermInfoBlockMeta {
|
||||
ref_term_info,
|
||||
doc_freq_nbits: buffer[0],
|
||||
postings_offset_nbits: buffer[1],
|
||||
positions_offset_nbits: buffer[2]
|
||||
positions_offset_nbits: buffer[2],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FixedSize for TermInfoBlockMeta {
|
||||
const SIZE_IN_BYTES: usize = u64::SIZE_IN_BYTES + TermInfo::SIZE_IN_BYTES + 3 * u8::SIZE_IN_BYTES;
|
||||
const SIZE_IN_BYTES: usize =
|
||||
u64::SIZE_IN_BYTES + TermInfo::SIZE_IN_BYTES + 3 * u8::SIZE_IN_BYTES;
|
||||
}
|
||||
|
||||
impl TermInfoBlockMeta {
|
||||
|
||||
fn num_bits(&self) -> u8 {
|
||||
self.doc_freq_nbits + self.postings_offset_nbits + self.positions_offset_nbits + 7
|
||||
}
|
||||
@@ -82,11 +82,10 @@ impl TermInfoBlockMeta {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub struct TermInfoStore {
|
||||
num_terms: usize,
|
||||
block_meta_source: ReadOnlySource,
|
||||
term_info_source: ReadOnlySource
|
||||
term_info_source: ReadOnlySource,
|
||||
}
|
||||
|
||||
fn extract_bits(data: &[u8], addr_bits: usize, num_bits: u8) -> u64 {
|
||||
@@ -109,7 +108,7 @@ impl TermInfoStore {
|
||||
TermInfoStore {
|
||||
num_terms,
|
||||
block_meta_source,
|
||||
term_info_source
|
||||
term_info_source,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,13 +116,17 @@ impl TermInfoStore {
|
||||
let block_id = (term_ord as usize) / BLOCK_LEN;
|
||||
let buffer = self.block_meta_source.as_slice();
|
||||
let mut block_data: &[u8] = &buffer[block_id * TermInfoBlockMeta::SIZE_IN_BYTES..];
|
||||
let term_info_block_data = TermInfoBlockMeta::deserialize(&mut block_data).expect("Failed to deserialize terminfoblockmeta");
|
||||
let term_info_block_data = TermInfoBlockMeta::deserialize(&mut block_data)
|
||||
.expect("Failed to deserialize terminfoblockmeta");
|
||||
let inner_offset = (term_ord as usize) % BLOCK_LEN;
|
||||
if inner_offset == 0 {
|
||||
term_info_block_data.ref_term_info
|
||||
} else {
|
||||
let term_info_data = self.term_info_source.as_slice();
|
||||
term_info_block_data.deserialize_term_info(&term_info_data[term_info_block_data.offset as usize..], inner_offset - 1)
|
||||
term_info_block_data.deserialize_term_info(
|
||||
&term_info_data[term_info_block_data.offset as usize..],
|
||||
inner_offset - 1,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,13 +143,26 @@ pub struct TermInfoStoreWriter {
|
||||
}
|
||||
|
||||
fn bitpack_serialize<W: Write>(
|
||||
write: &mut W,
|
||||
bit_packer: &mut BitPacker,
|
||||
term_info_block_meta: &TermInfoBlockMeta,
|
||||
term_info: &TermInfo) -> io::Result<()> {
|
||||
bit_packer.write(term_info.doc_freq as u64, term_info_block_meta.doc_freq_nbits, write)?;
|
||||
bit_packer.write(term_info.postings_offset, term_info_block_meta.postings_offset_nbits, write)?;
|
||||
bit_packer.write(term_info.positions_offset, term_info_block_meta.positions_offset_nbits, write)?;
|
||||
write: &mut W,
|
||||
bit_packer: &mut BitPacker,
|
||||
term_info_block_meta: &TermInfoBlockMeta,
|
||||
term_info: &TermInfo,
|
||||
) -> io::Result<()> {
|
||||
bit_packer.write(
|
||||
term_info.doc_freq as u64,
|
||||
term_info_block_meta.doc_freq_nbits,
|
||||
write,
|
||||
)?;
|
||||
bit_packer.write(
|
||||
term_info.postings_offset,
|
||||
term_info_block_meta.postings_offset_nbits,
|
||||
write,
|
||||
)?;
|
||||
bit_packer.write(
|
||||
term_info.positions_offset,
|
||||
term_info_block_meta.positions_offset_nbits,
|
||||
write,
|
||||
)?;
|
||||
bit_packer.write(term_info.positions_inner_offset as u64, 7, write)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -157,7 +173,7 @@ impl TermInfoStoreWriter {
|
||||
buffer_block_metas: Vec::new(),
|
||||
buffer_term_infos: Vec::new(),
|
||||
term_infos: Vec::with_capacity(BLOCK_LEN),
|
||||
num_terms: 0u64
|
||||
num_terms: 0u64,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -199,7 +215,7 @@ impl TermInfoStoreWriter {
|
||||
&mut self.buffer_term_infos,
|
||||
&mut bit_packer,
|
||||
&term_info_block_meta,
|
||||
&term_info
|
||||
&term_info,
|
||||
)?;
|
||||
}
|
||||
|
||||
@@ -276,11 +292,11 @@ mod tests {
|
||||
doc_freq: 512,
|
||||
postings_offset: 51,
|
||||
positions_offset: 3584,
|
||||
positions_inner_offset: 0
|
||||
positions_inner_offset: 0,
|
||||
},
|
||||
doc_freq_nbits: 10,
|
||||
postings_offset_nbits: 5,
|
||||
positions_offset_nbits: 11
|
||||
positions_offset_nbits: 11,
|
||||
};
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
term_info_block_meta.serialize(&mut buffer).unwrap();
|
||||
@@ -292,7 +308,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_pack() {
|
||||
let mut store_writer = TermInfoStoreWriter::new();
|
||||
let mut term_infos = vec!();
|
||||
let mut term_infos = vec![];
|
||||
for i in 0..1000 {
|
||||
let term_info = TermInfo {
|
||||
doc_freq: i as u32,
|
||||
@@ -304,9 +320,7 @@ mod tests {
|
||||
term_infos.push(term_info);
|
||||
}
|
||||
let mut buffer = Vec::new();
|
||||
store_writer
|
||||
.serialize(&mut buffer)
|
||||
.unwrap();
|
||||
store_writer.serialize(&mut buffer).unwrap();
|
||||
let term_info_store = TermInfoStore::open(ReadOnlySource::from(buffer));
|
||||
for i in 0..1000 {
|
||||
assert_eq!(term_info_store.get(i as u64), term_infos[i]);
|
||||
@@ -314,5 +328,3 @@ mod tests {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ use common::CountingWriter;
|
||||
use schema::FieldType;
|
||||
use postings::TermInfo;
|
||||
use termdict::{TermDictionary, TermDictionaryBuilder, TermOrdinal};
|
||||
use super::{TermStreamerBuilderImpl, TermStreamerImpl, TermInfoStoreWriter, TermInfoStore};
|
||||
use super::{TermInfoStore, TermInfoStoreWriter, TermStreamerBuilderImpl, TermStreamerImpl};
|
||||
|
||||
fn convert_fst_error(e: fst::Error) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::Other, e)
|
||||
|
||||
@@ -87,6 +87,7 @@ mod tests {
|
||||
use tokenizer::{Token, TokenStream, Tokenizer};
|
||||
use super::FacetTokenizer;
|
||||
use schema::Facet;
|
||||
use std::str;
|
||||
|
||||
#[test]
|
||||
fn test_facet_tokenizer() {
|
||||
@@ -98,9 +99,7 @@ mod tests {
|
||||
tokens.push(format!("{}", facet));
|
||||
};
|
||||
FacetTokenizer
|
||||
.token_stream(unsafe {
|
||||
::std::str::from_utf8_unchecked(facet.encoded_bytes())
|
||||
})
|
||||
.token_stream(unsafe { str::from_utf8_unchecked(facet.encoded_bytes()) })
|
||||
.process(&mut add_token);
|
||||
}
|
||||
assert_eq!(tokens.len(), 4);
|
||||
@@ -120,9 +119,7 @@ mod tests {
|
||||
tokens.push(format!("{}", facet));
|
||||
};
|
||||
FacetTokenizer
|
||||
.token_stream(unsafe {
|
||||
::std::str::from_utf8_unchecked(facet.encoded_bytes())
|
||||
})
|
||||
.token_stream(unsafe { str::from_utf8_unchecked(facet.encoded_bytes()) })
|
||||
.process(&mut add_token);
|
||||
}
|
||||
assert_eq!(tokens.len(), 1);
|
||||
|
||||
Reference in New Issue
Block a user