mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 09:32:54 +00:00
Merge branch 'master' into exp/hash_intable
Conflicts: src/datastruct/stacker/hashmap.rs
This commit is contained in:
@@ -22,9 +22,7 @@ before_script:
|
||||
- |
|
||||
pip install 'travis-cargo<0.2' --user &&
|
||||
export PATH=$HOME/.local/bin:$PATH
|
||||
- (cargo install rustfmt || true)
|
||||
script:
|
||||
- cargo fmt -- --write-mode=diff
|
||||
- |
|
||||
travis-cargo build &&
|
||||
travis-cargo test &&
|
||||
|
||||
@@ -42,7 +42,6 @@ futures-cpupool = "0.1"
|
||||
error-chain = "0.8"
|
||||
owning_ref = "0.3"
|
||||
stable_deref_trait = "1.0.0"
|
||||
murmurhash64 = "0.3"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = "0.2"
|
||||
|
||||
@@ -37,7 +37,6 @@ pub struct BitPacker {
|
||||
mini_buffer: u64,
|
||||
mini_buffer_written: usize,
|
||||
num_bits: usize,
|
||||
written_size: usize,
|
||||
}
|
||||
|
||||
impl BitPacker {
|
||||
@@ -46,7 +45,6 @@ impl BitPacker {
|
||||
mini_buffer: 0u64,
|
||||
mini_buffer_written: 0,
|
||||
num_bits: num_bits,
|
||||
written_size: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -54,14 +52,14 @@ impl BitPacker {
|
||||
let val_u64 = val as u64;
|
||||
if self.mini_buffer_written + self.num_bits > 64 {
|
||||
self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32);
|
||||
self.written_size += self.mini_buffer.serialize(output)?;
|
||||
self.mini_buffer.serialize(output)?;
|
||||
self.mini_buffer = val_u64.wrapping_shr((64 - self.mini_buffer_written) as u32);
|
||||
self.mini_buffer_written = self.mini_buffer_written + (self.num_bits as usize) - 64;
|
||||
} else {
|
||||
self.mini_buffer |= val_u64 << self.mini_buffer_written;
|
||||
self.mini_buffer_written += self.num_bits;
|
||||
if self.mini_buffer_written == 64 {
|
||||
self.written_size += self.mini_buffer.serialize(output)?;
|
||||
self.mini_buffer.serialize(output)?;
|
||||
self.mini_buffer_written = 0;
|
||||
self.mini_buffer = 0u64;
|
||||
}
|
||||
@@ -74,18 +72,16 @@ impl BitPacker {
|
||||
let num_bytes = (self.mini_buffer_written + 7) / 8;
|
||||
let arr: [u8; 8] = unsafe { mem::transmute::<u64, [u8; 8]>(self.mini_buffer) };
|
||||
output.write_all(&arr[..num_bytes])?;
|
||||
self.written_size += num_bytes;
|
||||
self.mini_buffer_written = 0;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn close<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<usize> {
|
||||
pub fn close<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
|
||||
self.flush(output)?;
|
||||
// Padding the write file to simplify reads.
|
||||
output.write_all(&[0u8; 7])?;
|
||||
self.written_size += 7;
|
||||
Ok(self.written_size)
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -127,11 +123,32 @@ impl<Data> BitUnpacker<Data>
|
||||
let bit_shift = addr_in_bits & 7;
|
||||
debug_assert!(addr + 8 <= data.len(),
|
||||
"The fast field field should have been padded with 7 bytes.");
|
||||
let val_unshifted_unmasked: u64 =
|
||||
unsafe { *(data.as_ptr().offset(addr as isize) as *const u64) };
|
||||
let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) };
|
||||
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
|
||||
(val_shifted & mask)
|
||||
}
|
||||
|
||||
pub fn get_range(&self, start: u32, output: &mut [u64]) {
|
||||
if self.num_bits == 0 {
|
||||
for val in output.iter_mut() {
|
||||
*val = 0;
|
||||
}
|
||||
} else {
|
||||
let data: &[u8] = &*self.data;
|
||||
let num_bits = self.num_bits;
|
||||
let mask = self.mask;
|
||||
let mut addr_in_bits = (start as usize) * num_bits;
|
||||
for output_val in output.iter_mut() {
|
||||
let addr = addr_in_bits >> 3;
|
||||
let bit_shift = addr_in_bits & 7;
|
||||
let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) };
|
||||
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
|
||||
*output_val = val_shifted & mask;
|
||||
addr_in_bits += num_bits;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -153,7 +170,7 @@ mod test {
|
||||
assert_eq!(compute_num_bits(5_000_000_000), 33u8);
|
||||
}
|
||||
|
||||
fn test_bitpacker_util(len: usize, num_bits: usize) {
|
||||
fn create_fastfield_bitpacker(len: usize, num_bits: usize) -> (BitUnpacker<Vec<u8>>, Vec<u64>) {
|
||||
let mut data = Vec::new();
|
||||
let mut bitpacker = BitPacker::new(num_bits);
|
||||
let max_val: u64 = (1 << num_bits) - 1;
|
||||
@@ -163,10 +180,14 @@ mod test {
|
||||
for &val in &vals {
|
||||
bitpacker.write(val, &mut data).unwrap();
|
||||
}
|
||||
let num_bytes = bitpacker.close(&mut data).unwrap();
|
||||
assert_eq!(num_bytes, (num_bits * len + 7) / 8 + 7);
|
||||
assert_eq!(data.len(), num_bytes);
|
||||
bitpacker.close(&mut data).unwrap();
|
||||
assert_eq!(data.len(), (num_bits * len + 7) / 8 + 7);
|
||||
let bitunpacker = BitUnpacker::new(data, num_bits);
|
||||
(bitunpacker, vals)
|
||||
}
|
||||
|
||||
fn test_bitpacker_util(len: usize, num_bits: usize) {
|
||||
let (bitunpacker, vals) = create_fastfield_bitpacker(len, num_bits);
|
||||
for (i, val) in vals.iter().enumerate() {
|
||||
assert_eq!(bitunpacker.get(i), *val);
|
||||
}
|
||||
@@ -180,4 +201,17 @@ mod test {
|
||||
test_bitpacker_util(6, 14);
|
||||
test_bitpacker_util(1000, 14);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bitpacker_range() {
|
||||
let (bitunpacker, vals) = create_fastfield_bitpacker(100_000, 12);
|
||||
let buffer_len = 100;
|
||||
let mut buffer = vec![0u64; buffer_len];
|
||||
for start in vec![0, 10, 20, 100, 1_000] {
|
||||
bitunpacker.get_range(start as u32, &mut buffer[..]);
|
||||
for i in 0..buffer_len {
|
||||
assert_eq!(buffer[i], vals[start + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
mod serialize;
|
||||
mod timer;
|
||||
mod vint;
|
||||
mod counting_writer;
|
||||
pub mod bitpacker;
|
||||
|
||||
pub use self::serialize::BinarySerializable;
|
||||
@@ -8,6 +9,7 @@ pub use self::timer::Timing;
|
||||
pub use self::timer::TimerTree;
|
||||
pub use self::timer::OpenTimer;
|
||||
pub use self::vint::VInt;
|
||||
pub use self::counting_writer::CountingWriter;
|
||||
|
||||
use std::io;
|
||||
|
||||
|
||||
@@ -6,33 +6,35 @@ use std::io::Read;
|
||||
use std::io;
|
||||
use common::VInt;
|
||||
|
||||
|
||||
|
||||
pub trait BinarySerializable: fmt::Debug + Sized {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize>;
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self>;
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()>;
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self>;
|
||||
}
|
||||
|
||||
impl BinarySerializable for () {
|
||||
fn serialize(&self, _: &mut Write) -> io::Result<usize> {
|
||||
Ok(0)
|
||||
fn serialize<W: Write>(&self, _: &mut W) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
fn deserialize(_: &mut Read) -> io::Result<Self> {
|
||||
fn deserialize<R: Read>(_: &mut R) -> io::Result<Self> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: BinarySerializable> BinarySerializable for Vec<T> {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let mut total_size = try!(VInt(self.len() as u64).serialize(writer));
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
VInt(self.len() as u64).serialize(writer)?;
|
||||
for it in self {
|
||||
total_size += try!(it.serialize(writer));
|
||||
it.serialize(writer)?;
|
||||
}
|
||||
Ok(total_size)
|
||||
Ok(())
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Vec<T>> {
|
||||
let num_items = try!(VInt::deserialize(reader)).val();
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Vec<T>> {
|
||||
let num_items = VInt::deserialize(reader)?.val();
|
||||
let mut items: Vec<T> = Vec::with_capacity(num_items as usize);
|
||||
for _ in 0..num_items {
|
||||
let item = try!(T::deserialize(reader));
|
||||
let item = T::deserialize(reader)?;
|
||||
items.push(item);
|
||||
}
|
||||
Ok(items)
|
||||
@@ -41,69 +43,67 @@ impl<T: BinarySerializable> BinarySerializable for Vec<T> {
|
||||
|
||||
|
||||
impl<Left: BinarySerializable, Right: BinarySerializable> BinarySerializable for (Left, Right) {
|
||||
fn serialize(&self, write: &mut Write) -> io::Result<usize> {
|
||||
Ok(try!(self.0.serialize(write)) + try!(self.1.serialize(write)))
|
||||
fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
|
||||
self.0.serialize(write)?;
|
||||
self.1.serialize(write)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self> {
|
||||
Ok((try!(Left::deserialize(reader)), try!(Right::deserialize(reader))))
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||
Ok((Left::deserialize(reader)?, Right::deserialize(reader)?))
|
||||
}
|
||||
}
|
||||
|
||||
impl BinarySerializable for u32 {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
writer.write_u32::<Endianness>(*self).map(|_| 4)
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
writer.write_u32::<Endianness>(*self)
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> io::Result<u32> {
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<u32> {
|
||||
reader.read_u32::<Endianness>()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl BinarySerializable for u64 {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
writer.write_u64::<Endianness>(*self).map(|_| 8)
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
writer.write_u64::<Endianness>(*self)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> io::Result<u64> {
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||
reader.read_u64::<Endianness>()
|
||||
}
|
||||
}
|
||||
|
||||
impl BinarySerializable for i64 {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
writer.write_i64::<Endianness>(*self).map(|_| 8)
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
writer.write_i64::<Endianness>(*self)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> io::Result<i64> {
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||
reader.read_i64::<Endianness>()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl BinarySerializable for u8 {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
try!(writer.write_u8(*self));
|
||||
Ok(1)
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
writer.write_u8(*self)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> io::Result<u8> {
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<u8> {
|
||||
reader.read_u8()
|
||||
}
|
||||
}
|
||||
|
||||
impl BinarySerializable for String {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
let data: &[u8] = self.as_bytes();
|
||||
let mut size = try!(VInt(data.len() as u64).serialize(writer));
|
||||
size += data.len();
|
||||
try!(writer.write_all(data));
|
||||
Ok(size)
|
||||
VInt(data.len() as u64).serialize(writer)?;
|
||||
writer.write_all(data)
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> io::Result<String> {
|
||||
let string_length = try!(VInt::deserialize(reader)).val() as usize;
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<String> {
|
||||
let string_length = VInt::deserialize(reader)?.val() as usize;
|
||||
let mut result = String::with_capacity(string_length);
|
||||
try!(reader
|
||||
.take(string_length as u64)
|
||||
.read_to_string(&mut result));
|
||||
reader
|
||||
.take(string_length as u64)
|
||||
.read_to_string(&mut result)?;
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
@@ -117,9 +117,8 @@ mod test {
|
||||
|
||||
fn serialize_test<T: BinarySerializable + Eq>(v: T, num_bytes: usize) {
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
|
||||
if num_bytes != 0 {
|
||||
assert_eq!(v.serialize(&mut buffer).unwrap(), num_bytes);
|
||||
v.serialize(&mut buffer).unwrap();
|
||||
assert_eq!(buffer.len(), num_bytes);
|
||||
} else {
|
||||
v.serialize(&mut buffer).unwrap();
|
||||
|
||||
@@ -16,27 +16,25 @@ impl VInt {
|
||||
}
|
||||
|
||||
impl BinarySerializable for VInt {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
let mut remaining = self.0;
|
||||
let mut written: usize = 0;
|
||||
let mut buffer = [0u8; 10];
|
||||
let mut i = 0;
|
||||
loop {
|
||||
let next_byte: u8 = (remaining % 128u64) as u8;
|
||||
remaining /= 128u64;
|
||||
if remaining == 0u64 {
|
||||
buffer[written] = next_byte | 128u8;
|
||||
written += 1;
|
||||
break;
|
||||
buffer[i] = next_byte | 128u8;
|
||||
return writer.write_all(&buffer[0..i + 1]);
|
||||
} else {
|
||||
buffer[written] = next_byte;
|
||||
written += 1;
|
||||
buffer[i] = next_byte;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
try!(writer.write_all(&buffer[0..written]));
|
||||
Ok(written)
|
||||
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self> {
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||
let mut bytes = reader.bytes();
|
||||
let mut result = 0u64;
|
||||
let mut shift = 0u64;
|
||||
|
||||
@@ -18,7 +18,7 @@ impl<T: BinarySerializable> LayerBuilder<T> {
|
||||
}
|
||||
|
||||
fn write(&self, output: &mut Write) -> Result<(), io::Error> {
|
||||
try!(output.write_all(&self.buffer));
|
||||
output.write_all(&self.buffer)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -36,8 +36,8 @@ impl<T: BinarySerializable> LayerBuilder<T> {
|
||||
self.remaining -= 1;
|
||||
self.len += 1;
|
||||
let offset = self.written_size() as u32;
|
||||
try!(doc_id.serialize(&mut self.buffer));
|
||||
try!(value.serialize(&mut self.buffer));
|
||||
doc_id.serialize(&mut self.buffer)?;
|
||||
value.serialize(&mut self.buffer)?;
|
||||
Ok(if self.remaining == 0 {
|
||||
self.remaining = self.period;
|
||||
Some((doc_id, offset))
|
||||
@@ -89,7 +89,7 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write<W: Write>(self, output: &mut Write) -> io::Result<()> {
|
||||
pub fn write<W: Write>(self, output: &mut W) -> io::Result<()> {
|
||||
let mut size: u32 = 0;
|
||||
let mut layer_sizes: Vec<u32> = Vec::new();
|
||||
size += self.data_layer.buffer.len() as u32;
|
||||
@@ -98,10 +98,10 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
|
||||
size += layer.buffer.len() as u32;
|
||||
layer_sizes.push(size);
|
||||
}
|
||||
try!(layer_sizes.serialize(output));
|
||||
try!(self.data_layer.write(output));
|
||||
layer_sizes.serialize(output)?;
|
||||
self.data_layer.write(output)?;
|
||||
for layer in self.skip_layers.iter().rev() {
|
||||
try!(layer.write(output));
|
||||
layer.write(output)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ pub fn jump_needed(val: u32) -> bool {
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ExpUnrolledLinkedList {
|
||||
len: u32,
|
||||
end: u32,
|
||||
@@ -52,6 +52,12 @@ impl ExpUnrolledLinkedList {
|
||||
}
|
||||
|
||||
|
||||
impl HeapAllocable for u32 {
|
||||
fn with_addr(_addr: u32) -> u32 {
|
||||
0u32
|
||||
}
|
||||
}
|
||||
|
||||
impl HeapAllocable for ExpUnrolledLinkedList {
|
||||
fn with_addr(addr: u32) -> ExpUnrolledLinkedList {
|
||||
let last_addr = addr + mem::size_of::<u32>() as u32 * 2u32;
|
||||
|
||||
@@ -1,11 +1,54 @@
|
||||
use std::iter;
|
||||
use super::heap::{Heap, HeapAllocable, BytesRef};
|
||||
use murmurhash64::murmur_hash64a;
|
||||
|
||||
const SEED: u64 = 2915580697u64;
|
||||
mod murmurhash2 {
|
||||
|
||||
fn hash(key: &[u8]) -> u64 {
|
||||
murmur_hash64a(key, SEED)
|
||||
const SEED: u32 = 3_242_157_231u32;
|
||||
|
||||
#[inline(always)]
|
||||
pub fn murmurhash2(key: &[u8]) -> u32 {
|
||||
let mut key_ptr: *const u32 = key.as_ptr() as *const u32;
|
||||
let m: u32 = 0x5bd1e995;
|
||||
let r = 24;
|
||||
let len = key.len() as u32;
|
||||
|
||||
let mut h: u32 = SEED ^ len;
|
||||
let num_blocks = len >> 2;
|
||||
for _ in 0..num_blocks {
|
||||
let mut k: u32 = unsafe { *key_ptr };
|
||||
k = k.wrapping_mul(m);
|
||||
k ^= k >> r;
|
||||
k = k.wrapping_mul(m);
|
||||
k = k.wrapping_mul(m);
|
||||
h ^= k;
|
||||
key_ptr = key_ptr.wrapping_offset(1);
|
||||
}
|
||||
|
||||
// Handle the last few bytes of the input array
|
||||
let remaining = len & 3;
|
||||
let key_ptr_u8: *const u8 = key_ptr as *const u8;
|
||||
match remaining {
|
||||
3 => {
|
||||
h ^= unsafe { *key_ptr_u8.wrapping_offset(2) as u32 } << 16;
|
||||
h ^= unsafe { *key_ptr_u8.wrapping_offset(1) as u32 } << 8;
|
||||
h ^= unsafe { *key_ptr_u8 as u32 };
|
||||
h = h.wrapping_mul(m);
|
||||
}
|
||||
2 => {
|
||||
h ^= unsafe { *key_ptr_u8.wrapping_offset(1) as u32 } << 8;
|
||||
h ^= unsafe { *key_ptr_u8 as u32 };
|
||||
h = h.wrapping_mul(m);
|
||||
}
|
||||
1 => {
|
||||
h ^= unsafe { *key_ptr_u8 as u32 };
|
||||
h = h.wrapping_mul(m);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
h ^= h >> 13;
|
||||
h = h.wrapping_mul(m);
|
||||
h ^ (h >> 15)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -136,7 +179,7 @@ impl<'a> HashMap<'a> {
|
||||
|
||||
pub fn get_or_create<S: AsRef<[u8]>, V: HeapAllocable>(&mut self, key: S) -> &mut V {
|
||||
let key_bytes: &[u8] = key.as_ref();
|
||||
let hash = hash(key.as_ref());
|
||||
let hash = murmurhash2::murmurhash2(key) as usize;
|
||||
let masked_hash = self.mask_hash(hash);
|
||||
let mut probe = self.probe(hash);
|
||||
loop {
|
||||
@@ -162,7 +205,9 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use super::super::heap::{Heap, HeapAllocable};
|
||||
use super::murmurhash2::murmurhash2;
|
||||
use test::Bencher;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::Hasher;
|
||||
|
||||
@@ -216,21 +261,41 @@ mod tests {
|
||||
assert!(iter_values.next().is_none());
|
||||
}
|
||||
|
||||
// #[bench]
|
||||
// fn bench_djb2(bench: &mut Bencher) {
|
||||
// let v = String::from("abwer");
|
||||
// bench.iter(|| djb2(v.as_bytes()));
|
||||
// }
|
||||
#[test]
|
||||
fn test_murmur() {
|
||||
let s1 = "abcdef";
|
||||
let s2 = "abcdeg";
|
||||
for i in 0..5 {
|
||||
assert_eq!(murmurhash2(&s1[i..5].as_bytes()),
|
||||
murmurhash2(&s2[i..5].as_bytes()));
|
||||
}
|
||||
}
|
||||
|
||||
// #[bench]
|
||||
// fn bench_siphasher(bench: &mut Bencher) {
|
||||
// let v = String::from("abwer");
|
||||
// bench.iter(|| {
|
||||
// let mut h = DefaultHasher::new();
|
||||
// h.write(v.as_bytes());
|
||||
// h.finish()
|
||||
// });
|
||||
// }
|
||||
#[test]
|
||||
fn test_murmur_collisions() {
|
||||
let mut set: HashSet<u32> = HashSet::default();
|
||||
for i in 0..10_000 {
|
||||
let s = format!("hash{}", i);
|
||||
let hash = murmurhash2(s.as_bytes());
|
||||
set.insert(hash);
|
||||
}
|
||||
assert_eq!(set.len(), 10_000);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_murmurhash_2(b: &mut Bencher) {
|
||||
let keys: Vec<&'static str> =
|
||||
vec!["wer qwe qwe qwe ", "werbq weqweqwe2 ", "weraq weqweqwe3 "];
|
||||
b.iter(|| {
|
||||
keys.iter()
|
||||
.map(|&s| s.as_bytes())
|
||||
.map(murmurhash2::murmurhash2)
|
||||
.map(|h| h as u64)
|
||||
.last()
|
||||
.unwrap()
|
||||
});
|
||||
}
|
||||
>>>>>>> master
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -93,8 +93,9 @@ impl Heap {
|
||||
|
||||
struct InnerHeap {
|
||||
buffer: Vec<u8>,
|
||||
buffer_len: u32,
|
||||
used: u32,
|
||||
has_been_resized: bool,
|
||||
next_heap: Option<Box<InnerHeap>>,
|
||||
}
|
||||
|
||||
|
||||
@@ -103,13 +104,15 @@ impl InnerHeap {
|
||||
let buffer: Vec<u8> = vec![0u8; num_bytes];
|
||||
InnerHeap {
|
||||
buffer: buffer,
|
||||
buffer_len: num_bytes as u32,
|
||||
next_heap: None,
|
||||
used: 0u32,
|
||||
has_been_resized: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
self.used = 0u32;
|
||||
self.next_heap = None;
|
||||
}
|
||||
|
||||
pub fn capacity(&self) -> u32 {
|
||||
@@ -119,30 +122,48 @@ impl InnerHeap {
|
||||
// Returns the number of free bytes. If the buffer
|
||||
// has reached it's capacity and overflowed to another buffer, return 0.
|
||||
pub fn num_free_bytes(&self) -> u32 {
|
||||
if self.has_been_resized {
|
||||
if self.next_heap.is_some() {
|
||||
0u32
|
||||
} else {
|
||||
(self.buffer.len() as u32) - self.used
|
||||
self.buffer_len - self.used
|
||||
}
|
||||
}
|
||||
|
||||
pub fn allocate_space(&mut self, num_bytes: usize) -> u32 {
|
||||
let addr = self.used;
|
||||
self.used += num_bytes as u32;
|
||||
let buffer_len = self.buffer.len();
|
||||
if self.used > buffer_len as u32 {
|
||||
self.buffer.resize(buffer_len * 2, 0u8);
|
||||
self.has_been_resized = true
|
||||
if self.used <= self.buffer_len {
|
||||
addr
|
||||
} else {
|
||||
if self.next_heap.is_none() {
|
||||
info!(r#"Exceeded heap size.
|
||||
The segment will be committed right after indexing this document."#,);
|
||||
self.next_heap = Some(Box::new(InnerHeap::with_capacity(self.buffer_len as usize)));
|
||||
}
|
||||
self.next_heap.as_mut().unwrap().allocate_space(num_bytes) + self.buffer_len
|
||||
}
|
||||
addr
|
||||
}
|
||||
|
||||
fn get_slice(&self, start: u32, stop: u32) -> &[u8] {
|
||||
&self.buffer[start as usize..stop as usize]
|
||||
if start >= self.buffer_len {
|
||||
self.next_heap
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.get_slice(start - self.buffer_len, stop - self.buffer_len)
|
||||
} else {
|
||||
&self.buffer[start as usize..stop as usize]
|
||||
}
|
||||
}
|
||||
|
||||
fn get_mut_slice(&mut self, start: u32, stop: u32) -> &mut [u8] {
|
||||
&mut self.buffer[start as usize..stop as usize]
|
||||
if start >= self.buffer_len {
|
||||
self.next_heap
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.get_mut_slice(start - self.buffer_len, stop - self.buffer_len)
|
||||
} else {
|
||||
&mut self.buffer[start as usize..stop as usize]
|
||||
}
|
||||
}
|
||||
|
||||
fn allocate_and_set(&mut self, data: &[u8]) -> BytesRef {
|
||||
@@ -156,23 +177,46 @@ impl InnerHeap {
|
||||
}
|
||||
|
||||
fn get_mut(&mut self, addr: u32) -> *mut u8 {
|
||||
let addr_isize = addr as isize;
|
||||
unsafe { self.buffer.as_mut_ptr().offset(addr_isize) }
|
||||
if addr >= self.buffer_len {
|
||||
self.next_heap
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.get_mut(addr - self.buffer_len)
|
||||
} else {
|
||||
let addr_isize = addr as isize;
|
||||
unsafe { self.buffer.as_mut_ptr().offset(addr_isize) }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
fn get_mut_ref<Item>(&mut self, addr: u32) -> &mut Item {
|
||||
let v_ptr_u8 = self.get_mut(addr) as *mut u8;
|
||||
let v_ptr = v_ptr_u8 as *mut Item;
|
||||
unsafe { &mut *v_ptr }
|
||||
if addr >= self.buffer_len {
|
||||
self.next_heap
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.get_mut_ref(addr - self.buffer_len)
|
||||
} else {
|
||||
let v_ptr_u8 = self.get_mut(addr) as *mut u8;
|
||||
let v_ptr = v_ptr_u8 as *mut Item;
|
||||
unsafe { &mut *v_ptr }
|
||||
}
|
||||
}
|
||||
|
||||
fn set<Item>(&mut self, addr: u32, val: &Item) {
|
||||
let v_ptr: *const Item = val as *const Item;
|
||||
let v_ptr_u8: *const u8 = v_ptr as *const u8;
|
||||
debug_assert!(addr + mem::size_of::<Item>() as u32 <= self.used);
|
||||
unsafe {
|
||||
let dest_ptr: *mut u8 = self.get_mut(addr);
|
||||
ptr::copy(v_ptr_u8, dest_ptr, mem::size_of::<Item>());
|
||||
pub fn set<Item>(&mut self, addr: u32, val: &Item) {
|
||||
if addr >= self.buffer_len {
|
||||
self.next_heap
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.set(addr - self.buffer_len, val);
|
||||
} else {
|
||||
let v_ptr: *const Item = val as *const Item;
|
||||
let v_ptr_u8: *const u8 = v_ptr as *const u8;
|
||||
debug_assert!(addr + mem::size_of::<Item>() as u32 <= self.used);
|
||||
unsafe {
|
||||
let dest_ptr: *mut u8 = self.get_mut(addr);
|
||||
ptr::copy(v_ptr_u8, dest_ptr, mem::size_of::<Item>());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -463,7 +463,6 @@ mod tests {
|
||||
assert_eq!(mmap_directory.get_cache_info().mmapped.len(), i + 1);
|
||||
}
|
||||
let cache_info = mmap_directory.get_cache_info();
|
||||
println!("{:?}", cache_info);
|
||||
assert_eq!(cache_info.counters.miss_empty, 30);
|
||||
assert_eq!(cache_info.counters.miss_weak, 10);
|
||||
assert_eq!(cache_info.mmapped.len(), 10);
|
||||
|
||||
@@ -211,7 +211,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_signed_intfastfield() {
|
||||
let path = Path::new("test");
|
||||
@@ -245,6 +244,11 @@ mod tests {
|
||||
for (doc, i) in (-100i64..10_000i64).enumerate() {
|
||||
assert_eq!(fast_field_reader.get(doc as u32), i);
|
||||
}
|
||||
let mut buffer = vec![0i64; 100];
|
||||
fast_field_reader.get_range(53, &mut buffer[..]);
|
||||
for i in 0..100 {
|
||||
assert_eq!(buffer[i], -100i64 + 53i64 + i as i64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -306,10 +310,6 @@ mod tests {
|
||||
fast_field_readers.open_reader(*FIELD).unwrap();
|
||||
let mut a = 0u64;
|
||||
for _ in 0..n {
|
||||
println!("i {}=> {} {}",
|
||||
a,
|
||||
fast_field_reader.get(a as u32),
|
||||
permutation[a as usize]);
|
||||
assert_eq!(fast_field_reader.get(a as u32), permutation[a as usize]);
|
||||
a = fast_field_reader.get(a as u32);
|
||||
}
|
||||
|
||||
@@ -12,6 +12,8 @@ use fastfield::FastFieldsWriter;
|
||||
use common::bitpacker::compute_num_bits;
|
||||
use common::bitpacker::BitUnpacker;
|
||||
use schema::FieldType;
|
||||
use error::ResultExt;
|
||||
use std::mem;
|
||||
use common;
|
||||
use owning_ref::OwningRef;
|
||||
|
||||
@@ -26,8 +28,22 @@ pub trait FastFieldReader: Sized {
|
||||
/// Return the value associated to the given document.
|
||||
///
|
||||
/// This accessor should return as fast as possible.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `doc` is greater than the segment
|
||||
// `maxdoc`.
|
||||
fn get(&self, doc: DocId) -> Self::ValueType;
|
||||
|
||||
/// Fills an output buffer with the fast field values
|
||||
/// associated with the `DocId` going from
|
||||
/// `start` to `start + output.len()`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `start + output.len()` is greater than
|
||||
/// the segment's `maxdoc`.
|
||||
fn get_range(&self, start: u32, output: &mut [Self::ValueType]);
|
||||
|
||||
/// Opens a fast field given a source.
|
||||
fn open(source: ReadOnlySource) -> Self;
|
||||
@@ -79,6 +95,13 @@ impl FastFieldReader for U64FastFieldReader {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_range(&self, start: u32, output: &mut [Self::ValueType]) {
|
||||
self.bit_unpacker.get_range(start, output);
|
||||
for out in output.iter_mut() {
|
||||
*out += self.min_value;
|
||||
}
|
||||
}
|
||||
|
||||
/// Opens a new fast field reader given a read only source.
|
||||
///
|
||||
/// # Panics
|
||||
@@ -125,9 +148,20 @@ impl From<Vec<u64>> for U64FastFieldReader {
|
||||
fast_field_writers.serialize(&mut serializer).unwrap();
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
let source = directory.open_read(path).unwrap();
|
||||
let fast_field_readers = FastFieldsReader::from_source(source).unwrap();
|
||||
fast_field_readers.open_reader(field).unwrap()
|
||||
directory
|
||||
.open_read(path)
|
||||
.chain_err(|| "Failed to open the file")
|
||||
.and_then(|source| {
|
||||
FastFieldsReader::from_source(source)
|
||||
.chain_err(|| "Failed to read the file.")
|
||||
})
|
||||
.and_then(|ff_readers| {
|
||||
ff_readers
|
||||
.open_reader(field)
|
||||
.ok_or_else(|| "Failed to find the requested field".into())
|
||||
})
|
||||
.expect("This should never happen, please report.")
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,6 +203,19 @@ impl FastFieldReader for I64FastFieldReader {
|
||||
common::u64_to_i64(self.underlying.get(doc))
|
||||
}
|
||||
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic or return wrong random result if `doc`
|
||||
/// is greater or equal to the segment's `maxdoc`.
|
||||
fn get_range(&self, start: u32, output: &mut [Self::ValueType]) {
|
||||
let output_u64: &mut [u64] = unsafe { mem::transmute(output) };
|
||||
self.underlying.get_range(start, output_u64);
|
||||
for mut_val in output_u64.iter_mut() {
|
||||
*mut_val ^= 1 << 63;
|
||||
}
|
||||
}
|
||||
|
||||
/// Opens a new fast field reader given a read only source.
|
||||
///
|
||||
/// # Panics
|
||||
|
||||
@@ -2,9 +2,9 @@ use common::BinarySerializable;
|
||||
use directory::WritePtr;
|
||||
use schema::Field;
|
||||
use common::bitpacker::{compute_num_bits, BitPacker};
|
||||
use common::CountingWriter;
|
||||
use std::io::{self, Write, Seek, SeekFrom};
|
||||
|
||||
|
||||
/// `FastFieldSerializer` is in charge of serializing
|
||||
/// fastfields on disk.
|
||||
///
|
||||
@@ -26,8 +26,7 @@ use std::io::{self, Write, Seek, SeekFrom};
|
||||
/// * `close_field()`
|
||||
/// * `close()`
|
||||
pub struct FastFieldSerializer {
|
||||
write: WritePtr,
|
||||
written_size: usize,
|
||||
write: CountingWriter<WritePtr>,
|
||||
fields: Vec<(Field, u32)>,
|
||||
min_value: u64,
|
||||
field_open: bool,
|
||||
@@ -37,12 +36,12 @@ pub struct FastFieldSerializer {
|
||||
|
||||
impl FastFieldSerializer {
|
||||
/// Constructor
|
||||
pub fn new(mut write: WritePtr) -> io::Result<FastFieldSerializer> {
|
||||
pub fn new(write: WritePtr) -> io::Result<FastFieldSerializer> {
|
||||
// just making room for the pointer to header.
|
||||
let written_size: usize = try!(0u32.serialize(&mut write));
|
||||
let mut counting_writer = CountingWriter::wrap(write);
|
||||
0u32.serialize(&mut counting_writer)?;
|
||||
Ok(FastFieldSerializer {
|
||||
write: write,
|
||||
written_size: written_size,
|
||||
write: counting_writer,
|
||||
fields: Vec::new(),
|
||||
min_value: 0,
|
||||
field_open: false,
|
||||
@@ -61,11 +60,11 @@ impl FastFieldSerializer {
|
||||
}
|
||||
self.min_value = min_value;
|
||||
self.field_open = true;
|
||||
self.fields.push((field, self.written_size as u32));
|
||||
let write: &mut Write = &mut self.write;
|
||||
self.written_size += try!(min_value.serialize(write));
|
||||
self.fields.push((field, self.write.written_bytes() as u32));
|
||||
let write = &mut self.write;
|
||||
min_value.serialize(write)?;
|
||||
let amplitude = max_value - min_value;
|
||||
self.written_size += try!(amplitude.serialize(write));
|
||||
amplitude.serialize(write)?;
|
||||
let num_bits = compute_num_bits(amplitude);
|
||||
self.bit_packer = BitPacker::new(num_bits as usize);
|
||||
Ok(())
|
||||
@@ -88,7 +87,7 @@ impl FastFieldSerializer {
|
||||
// adding some padding to make sure we
|
||||
// can read the last elements with our u64
|
||||
// cursor
|
||||
self.written_size += self.bit_packer.close(&mut self.write)?;
|
||||
self.bit_packer.close(&mut self.write)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -96,15 +95,16 @@ impl FastFieldSerializer {
|
||||
/// Closes the serializer
|
||||
///
|
||||
/// After this call the data must be persistently save on disk.
|
||||
pub fn close(mut self) -> io::Result<usize> {
|
||||
pub fn close(self) -> io::Result<usize> {
|
||||
if self.field_open {
|
||||
return Err(io::Error::new(io::ErrorKind::Other, "Last field not closed"));
|
||||
}
|
||||
let header_offset: usize = self.written_size;
|
||||
self.written_size += try!(self.fields.serialize(&mut self.write));
|
||||
try!(self.write.seek(SeekFrom::Start(0)));
|
||||
try!((header_offset as u32).serialize(&mut self.write));
|
||||
try!(self.write.flush());
|
||||
Ok(self.written_size)
|
||||
let header_offset: usize = self.write.written_bytes() as usize;
|
||||
let (mut write, written_size) = self.write.finish()?;
|
||||
self.fields.serialize(&mut write)?;
|
||||
write.seek(SeekFrom::Start(0))?;
|
||||
(header_offset as u32).serialize(&mut write)?;
|
||||
write.flush()?;
|
||||
Ok(written_size)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,6 +65,10 @@ pub trait DocSet {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a best-effort hint of the
|
||||
/// length of the docset.
|
||||
fn size_hint(&self) -> usize;
|
||||
}
|
||||
|
||||
|
||||
@@ -83,6 +87,11 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
|
||||
let unboxed: &TDocSet = self.borrow();
|
||||
unboxed.doc()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
let unboxed: &TDocSet = self.borrow();
|
||||
unboxed.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
|
||||
@@ -100,4 +109,9 @@ impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
|
||||
let unref: &TDocSet = *self;
|
||||
unref.doc()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
let unref: &TDocSet = *self;
|
||||
unref.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,12 +10,13 @@ pub struct IntersectionDocSet<TDocSet: DocSet> {
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet> From<Vec<TDocSet>> for IntersectionDocSet<TDocSet> {
|
||||
fn from(docsets: Vec<TDocSet>) -> IntersectionDocSet<TDocSet> {
|
||||
fn from(mut docsets: Vec<TDocSet>) -> IntersectionDocSet<TDocSet> {
|
||||
assert!(docsets.len() >= 2);
|
||||
docsets.sort_by_key(|docset| docset.size_hint());
|
||||
IntersectionDocSet {
|
||||
docsets: docsets,
|
||||
finished: false,
|
||||
doc: DocId::max_value(),
|
||||
doc: 0u32,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -31,37 +32,51 @@ impl<TDocSet: DocSet> IntersectionDocSet<TDocSet> {
|
||||
|
||||
|
||||
impl<TDocSet: DocSet> DocSet for IntersectionDocSet<TDocSet> {
|
||||
fn size_hint(&self) -> usize {
|
||||
self.docsets
|
||||
.iter()
|
||||
.map(|docset| docset.size_hint())
|
||||
.min()
|
||||
.unwrap() // safe as docsets cannot be empty.
|
||||
}
|
||||
|
||||
#[allow(never_loop)]
|
||||
fn advance(&mut self) -> bool {
|
||||
if self.finished {
|
||||
return false;
|
||||
}
|
||||
let num_docsets = self.docsets.len();
|
||||
let mut count_matching = 0;
|
||||
let mut doc_candidate = 0;
|
||||
let mut ord = 0;
|
||||
loop {
|
||||
let mut doc_set = &mut self.docsets[ord];
|
||||
match doc_set.skip_next(doc_candidate) {
|
||||
SkipResult::Reached => {
|
||||
count_matching += 1;
|
||||
if count_matching == num_docsets {
|
||||
self.doc = doc_candidate;
|
||||
return true;
|
||||
|
||||
let mut candidate_doc = self.doc;
|
||||
let mut candidate_ord = self.docsets.len();
|
||||
|
||||
'outer: loop {
|
||||
|
||||
for (ord, docset) in self.docsets.iter_mut().enumerate() {
|
||||
if ord != candidate_ord {
|
||||
// `candidate_ord` is already at the
|
||||
// right position.
|
||||
//
|
||||
// Calling `skip_next` would advance this docset
|
||||
// and miss it.
|
||||
match docset.skip_next(candidate_doc) {
|
||||
SkipResult::Reached => {}
|
||||
SkipResult::OverStep => {
|
||||
// this is not in the intersection,
|
||||
// let's update our candidate.
|
||||
candidate_doc = docset.doc();
|
||||
candidate_ord = ord;
|
||||
continue 'outer;
|
||||
}
|
||||
SkipResult::End => {
|
||||
self.finished = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
SkipResult::End => {
|
||||
self.finished = true;
|
||||
return false;
|
||||
}
|
||||
SkipResult::OverStep => {
|
||||
count_matching = 1;
|
||||
doc_candidate = doc_set.doc();
|
||||
}
|
||||
}
|
||||
ord += 1;
|
||||
if ord == num_docsets {
|
||||
ord = 0;
|
||||
}
|
||||
|
||||
self.doc = candidate_doc;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,3 +84,51 @@ impl<TDocSet: DocSet> DocSet for IntersectionDocSet<TDocSet> {
|
||||
self.doc
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use postings::{DocSet, VecPostings, IntersectionDocSet};
|
||||
|
||||
#[test]
|
||||
fn test_intersection() {
|
||||
{
|
||||
let left = VecPostings::from(vec![1, 3, 9]);
|
||||
let right = VecPostings::from(vec![3, 4, 9, 18]);
|
||||
let mut intersection = IntersectionDocSet::from(vec![left, right]);
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 3);
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 9);
|
||||
assert!(!intersection.advance());
|
||||
}
|
||||
{
|
||||
let a = VecPostings::from(vec![1, 3, 9]);
|
||||
let b = VecPostings::from(vec![3, 4, 9, 18]);
|
||||
let c = VecPostings::from(vec![1, 5, 9, 111]);
|
||||
let mut intersection = IntersectionDocSet::from(vec![a, b, c]);
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 9);
|
||||
assert!(!intersection.advance());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intersection_zero() {
|
||||
let left = VecPostings::from(vec![0]);
|
||||
let right = VecPostings::from(vec![0]);
|
||||
let mut intersection = IntersectionDocSet::from(vec![left, right]);
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intersection_empty() {
|
||||
let a = VecPostings::from(vec![1, 3]);
|
||||
let b = VecPostings::from(vec![1, 4]);
|
||||
let c = VecPostings::from(vec![3, 9]);
|
||||
let mut intersection = IntersectionDocSet::from(vec![a, b, c]);
|
||||
assert!(!intersection.advance());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -373,29 +373,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intersection() {
|
||||
{
|
||||
let left = VecPostings::from(vec![1, 3, 9]);
|
||||
let right = VecPostings::from(vec![3, 4, 9, 18]);
|
||||
let mut intersection = IntersectionDocSet::from(vec![left, right]);
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 3);
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 9);
|
||||
assert!(!intersection.advance());
|
||||
}
|
||||
{
|
||||
let a = VecPostings::from(vec![1, 3, 9]);
|
||||
let b = VecPostings::from(vec![3, 4, 9, 18]);
|
||||
let c = VecPostings::from(vec![1, 5, 9, 111]);
|
||||
let mut intersection = IntersectionDocSet::from(vec![a, b, c]);
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 9);
|
||||
assert!(!intersection.advance());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
lazy_static! {
|
||||
static ref TERM_A: Term = {
|
||||
@@ -406,6 +383,14 @@ mod tests {
|
||||
let field = Field(0);
|
||||
Term::from_field_text(field, "b")
|
||||
};
|
||||
static ref TERM_C: Term = {
|
||||
let field = Field(0);
|
||||
Term::from_field_text(field, "c")
|
||||
};
|
||||
static ref TERM_D: Term = {
|
||||
let field = Field(0);
|
||||
Term::from_field_text(field, "d")
|
||||
};
|
||||
static ref INDEX: Index = {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let text_field = schema_builder.add_text_field("text", STRING);
|
||||
@@ -415,25 +400,23 @@ mod tests {
|
||||
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut count_a = 0;
|
||||
let mut count_b = 0;
|
||||
let posting_list_size = 100_000;
|
||||
let posting_list_size = 1_000_000;
|
||||
{
|
||||
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
|
||||
for _ in 0 .. {
|
||||
if count_a >= posting_list_size &&
|
||||
count_b >= posting_list_size {
|
||||
break;
|
||||
}
|
||||
for _ in 0 .. posting_list_size {
|
||||
let mut doc = Document::default();
|
||||
if count_a < posting_list_size && rng.gen_weighted_bool(15) {
|
||||
count_a += 1;
|
||||
if rng.gen_weighted_bool(15) {
|
||||
doc.add_text(text_field, "a");
|
||||
}
|
||||
if count_b < posting_list_size && rng.gen_weighted_bool(10) {
|
||||
count_b += 1;
|
||||
if rng.gen_weighted_bool(10) {
|
||||
doc.add_text(text_field, "b");
|
||||
}
|
||||
if rng.gen_weighted_bool(5) {
|
||||
doc.add_text(text_field, "c");
|
||||
}
|
||||
if rng.gen_weighted_bool(1) {
|
||||
doc.add_text(text_field, "d");
|
||||
}
|
||||
index_writer.add_document(doc);
|
||||
}
|
||||
assert!(index_writer.commit().is_ok());
|
||||
@@ -467,8 +450,16 @@ mod tests {
|
||||
let segment_postings_b = segment_reader
|
||||
.read_postings(&*TERM_B, SegmentPostingsOption::NoFreq)
|
||||
.unwrap();
|
||||
let segment_postings_c = segment_reader
|
||||
.read_postings(&*TERM_C, SegmentPostingsOption::NoFreq)
|
||||
.unwrap();
|
||||
let segment_postings_d = segment_reader
|
||||
.read_postings(&*TERM_D, SegmentPostingsOption::NoFreq)
|
||||
.unwrap();
|
||||
let mut intersection = IntersectionDocSet::from(vec![segment_postings_a,
|
||||
segment_postings_b]);
|
||||
segment_postings_b,
|
||||
segment_postings_c,
|
||||
segment_postings_d]);
|
||||
while intersection.advance() {}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -215,6 +215,7 @@ impl<'a, Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<'
|
||||
position: u32,
|
||||
term: &Term,
|
||||
heap: &Heap) {
|
||||
debug_assert!(term.as_slice().len() >= 4);
|
||||
let recorder: &mut Rec = term_index.get_or_create(term);
|
||||
let current_doc = recorder.current_doc();
|
||||
if current_doc != doc {
|
||||
|
||||
@@ -152,6 +152,10 @@ impl<'a> DocSet for SegmentPostings<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn doc(&self) -> DocId {
|
||||
let docs = self.block_cursor.docs();
|
||||
|
||||
@@ -10,12 +10,11 @@ use directory::WritePtr;
|
||||
use compression::{NUM_DOCS_PER_BLOCK, BlockEncoder, CompositeEncoder};
|
||||
use DocId;
|
||||
use core::Segment;
|
||||
use std::io;
|
||||
use core::SegmentComponent;
|
||||
use std::io::Write;
|
||||
use std::io::{self, Write};
|
||||
use compression::VIntEncoder;
|
||||
use common::VInt;
|
||||
use common::BinarySerializable;
|
||||
use common::CountingWriter;
|
||||
use termdict::TermDictionaryBuilder;
|
||||
|
||||
|
||||
@@ -52,10 +51,8 @@ use termdict::TermDictionaryBuilder;
|
||||
/// [available here](https://fulmicoton.gitbooks.io/tantivy-doc/content/inverted-index.html).
|
||||
pub struct PostingsSerializer {
|
||||
terms_fst_builder: TermDictionaryBuilderImpl<WritePtr, TermInfo>,
|
||||
postings_write: WritePtr,
|
||||
positions_write: WritePtr,
|
||||
written_bytes_postings: usize,
|
||||
written_bytes_positions: usize,
|
||||
postings_write: CountingWriter<WritePtr>,
|
||||
positions_write: CountingWriter<WritePtr>,
|
||||
last_doc_id_encoded: u32,
|
||||
positions_encoder: CompositeEncoder,
|
||||
block_encoder: BlockEncoder,
|
||||
@@ -78,10 +75,8 @@ impl PostingsSerializer {
|
||||
let terms_fst_builder = try!(TermDictionaryBuilderImpl::new(terms_write));
|
||||
Ok(PostingsSerializer {
|
||||
terms_fst_builder: terms_fst_builder,
|
||||
postings_write: postings_write,
|
||||
positions_write: positions_write,
|
||||
written_bytes_postings: 0,
|
||||
written_bytes_positions: 0,
|
||||
postings_write: CountingWriter::wrap(postings_write),
|
||||
positions_write: CountingWriter::wrap(positions_write),
|
||||
last_doc_id_encoded: 0u32,
|
||||
positions_encoder: CompositeEncoder::new(),
|
||||
block_encoder: BlockEncoder::new(),
|
||||
@@ -98,12 +93,10 @@ impl PostingsSerializer {
|
||||
|
||||
/// Open a new `PostingsSerializer` for the given segment
|
||||
pub fn open(segment: &mut Segment) -> Result<PostingsSerializer> {
|
||||
let terms_write = try!(segment.open_write(SegmentComponent::TERMS));
|
||||
let postings_write = try!(segment.open_write(SegmentComponent::POSTINGS));
|
||||
let positions_write = try!(segment.open_write(SegmentComponent::POSITIONS));
|
||||
PostingsSerializer::new(terms_write,
|
||||
postings_write,
|
||||
positions_write,
|
||||
use SegmentComponent::{TERMS, POSTINGS, POSITIONS};
|
||||
PostingsSerializer::new(segment.open_write(TERMS)?,
|
||||
segment.open_write(POSTINGS)?,
|
||||
segment.open_write(POSITIONS)?,
|
||||
segment.schema())
|
||||
}
|
||||
|
||||
@@ -141,8 +134,8 @@ impl PostingsSerializer {
|
||||
self.position_deltas.clear();
|
||||
self.current_term_info = TermInfo {
|
||||
doc_freq: 0,
|
||||
postings_offset: self.written_bytes_postings as u32,
|
||||
positions_offset: self.written_bytes_positions as u32,
|
||||
postings_offset: self.postings_write.written_bytes() as u32,
|
||||
positions_offset: self.positions_write.written_bytes() as u32,
|
||||
};
|
||||
self.terms_fst_builder.insert_key(term)
|
||||
}
|
||||
@@ -168,8 +161,7 @@ impl PostingsSerializer {
|
||||
let block_encoded =
|
||||
self.block_encoder
|
||||
.compress_vint_sorted(&self.doc_ids, self.last_doc_id_encoded);
|
||||
self.written_bytes_postings += block_encoded.len();
|
||||
try!(self.postings_write.write_all(block_encoded));
|
||||
self.postings_write.write_all(block_encoded)?;
|
||||
self.doc_ids.clear();
|
||||
}
|
||||
// ... Idem for term frequencies
|
||||
@@ -177,8 +169,7 @@ impl PostingsSerializer {
|
||||
let block_encoded = self.block_encoder
|
||||
.compress_vint_unsorted(&self.term_freqs[..]);
|
||||
for num in block_encoded {
|
||||
self.written_bytes_postings +=
|
||||
try!(num.serialize(&mut self.postings_write));
|
||||
num.serialize(&mut self.postings_write)?;
|
||||
}
|
||||
self.term_freqs.clear();
|
||||
}
|
||||
@@ -186,13 +177,11 @@ impl PostingsSerializer {
|
||||
// On the other hand, positions are entirely buffered until the
|
||||
// end of the term, at which point they are compressed and written.
|
||||
if self.text_indexing_options.is_position_enabled() {
|
||||
self.written_bytes_positions +=
|
||||
try!(VInt(self.position_deltas.len() as u64)
|
||||
.serialize(&mut self.positions_write));
|
||||
let posdelta_len = VInt(self.position_deltas.len() as u64);
|
||||
posdelta_len.serialize(&mut self.positions_write)?;
|
||||
let positions_encoded: &[u8] = self.positions_encoder
|
||||
.compress_unsorted(&self.position_deltas[..]);
|
||||
try!(self.positions_write.write_all(positions_encoded));
|
||||
self.written_bytes_positions += positions_encoded.len();
|
||||
self.positions_write.write_all(positions_encoded)?;
|
||||
self.position_deltas.clear();
|
||||
}
|
||||
self.term_open = false;
|
||||
@@ -230,15 +219,13 @@ impl PostingsSerializer {
|
||||
self.block_encoder
|
||||
.compress_block_sorted(&self.doc_ids, self.last_doc_id_encoded);
|
||||
self.last_doc_id_encoded = self.doc_ids[self.doc_ids.len() - 1];
|
||||
try!(self.postings_write.write_all(block_encoded));
|
||||
self.written_bytes_postings += block_encoded.len();
|
||||
self.postings_write.write_all(block_encoded)?;
|
||||
}
|
||||
if self.text_indexing_options.is_termfreq_enabled() {
|
||||
// encode the term_freqs
|
||||
let block_encoded: &[u8] = self.block_encoder
|
||||
.compress_block_unsorted(&self.term_freqs);
|
||||
try!(self.postings_write.write_all(block_encoded));
|
||||
self.written_bytes_postings += block_encoded.len();
|
||||
self.postings_write.write_all(block_encoded)?;
|
||||
self.term_freqs.clear();
|
||||
}
|
||||
self.doc_ids.clear();
|
||||
|
||||
@@ -24,11 +24,13 @@ pub struct TermInfo {
|
||||
|
||||
|
||||
impl BinarySerializable for TermInfo {
|
||||
fn serialize(&self, writer: &mut io::Write) -> io::Result<usize> {
|
||||
Ok(try!(self.doc_freq.serialize(writer)) + try!(self.postings_offset.serialize(writer)) +
|
||||
try!(self.positions_offset.serialize(writer)))
|
||||
fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
self.doc_freq.serialize(writer)?;
|
||||
self.postings_offset.serialize(writer)?;
|
||||
self.positions_offset.serialize(writer)
|
||||
}
|
||||
fn deserialize(reader: &mut io::Read) -> io::Result<Self> {
|
||||
|
||||
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
|
||||
let doc_freq = try!(u32::deserialize(reader));
|
||||
let postings_offset = try!(u32::deserialize(reader));
|
||||
let positions_offset = try!(u32::deserialize(reader));
|
||||
|
||||
@@ -34,6 +34,10 @@ impl DocSet for VecPostings {
|
||||
fn doc(&self) -> DocId {
|
||||
self.doc_ids[self.cursor.0]
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl HasLen for VecPostings {
|
||||
|
||||
@@ -93,6 +93,18 @@ impl<TScorer: Scorer> BooleanScorer<TScorer> {
|
||||
}
|
||||
|
||||
impl<TScorer: Scorer> DocSet for BooleanScorer<TScorer> {
|
||||
fn size_hint(&self) -> usize {
|
||||
// TODO fix this. it should be the min
|
||||
// of the MUST scorer
|
||||
// and the max of the SHOULD scorers.
|
||||
self.scorers
|
||||
.iter()
|
||||
.map(|scorer| scorer.size_hint())
|
||||
.max()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
|
||||
fn advance(&mut self) -> bool {
|
||||
loop {
|
||||
self.score_combiner.clear();
|
||||
|
||||
@@ -67,6 +67,10 @@ impl<'a> DocSet for PhraseScorer<'a> {
|
||||
fn doc(&self) -> DocId {
|
||||
self.intersection_docset.doc()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
self.intersection_docset.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -49,6 +49,10 @@ impl DocSet for EmptyScorer {
|
||||
fn doc(&self) -> DocId {
|
||||
DocId::max_value()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
impl Scorer for EmptyScorer {
|
||||
|
||||
@@ -32,6 +32,11 @@ impl<TPostings> DocSet for TermScorer<TPostings>
|
||||
fn doc(&self) -> DocId {
|
||||
self.postings.doc()
|
||||
}
|
||||
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
self.postings.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl<TPostings> Scorer for TermScorer<TPostings>
|
||||
|
||||
@@ -14,11 +14,11 @@ use common::BinarySerializable;
|
||||
pub struct Field(pub u32);
|
||||
|
||||
impl BinarySerializable for Field {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
self.0.serialize(writer)
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Field> {
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Field> {
|
||||
u32::deserialize(reader).map(Field)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,11 +34,12 @@ impl FieldValue {
|
||||
}
|
||||
|
||||
impl BinarySerializable for FieldValue {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
Ok(self.field.serialize(writer)? + self.value.serialize(writer)?)
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
self.field.serialize(writer)?;
|
||||
self.value.serialize(writer)
|
||||
}
|
||||
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self> {
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||
let field = Field::deserialize(reader)?;
|
||||
let value = Value::deserialize(reader)?;
|
||||
Ok(FieldValue::new(field, value))
|
||||
|
||||
@@ -381,8 +381,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
]"#;
|
||||
println!("{}", schema_json);
|
||||
println!("{}", expected);
|
||||
assert_eq!(schema_json, expected);
|
||||
|
||||
let schema: Schema = serde_json::from_str(expected).unwrap();
|
||||
|
||||
@@ -130,25 +130,23 @@ mod binary_serialize {
|
||||
const I64_CODE: u8 = 2;
|
||||
|
||||
impl BinarySerializable for Value {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let mut written_size = 0;
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
match *self {
|
||||
Value::Str(ref text) => {
|
||||
written_size += try!(TEXT_CODE.serialize(writer));
|
||||
written_size += try!(text.serialize(writer));
|
||||
TEXT_CODE.serialize(writer)?;
|
||||
text.serialize(writer)
|
||||
}
|
||||
Value::U64(ref val) => {
|
||||
written_size += try!(U64_CODE.serialize(writer));
|
||||
written_size += try!(val.serialize(writer));
|
||||
U64_CODE.serialize(writer)?;
|
||||
val.serialize(writer)
|
||||
}
|
||||
Value::I64(ref val) => {
|
||||
written_size += try!(I64_CODE.serialize(writer));
|
||||
written_size += try!(val.serialize(writer));
|
||||
I64_CODE.serialize(writer)?;
|
||||
val.serialize(writer)
|
||||
}
|
||||
}
|
||||
Ok(written_size)
|
||||
}
|
||||
fn deserialize(reader: &mut Read) -> io::Result<Self> {
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||
let type_code = try!(u8::deserialize(reader));
|
||||
match type_code {
|
||||
TEXT_CODE => {
|
||||
|
||||
@@ -5,6 +5,7 @@ use common::BinarySerializable;
|
||||
use std::io::{self, Write};
|
||||
use lz4;
|
||||
use datastruct::SkipListBuilder;
|
||||
use common::CountingWriter;
|
||||
|
||||
const BLOCK_SIZE: usize = 16_384;
|
||||
|
||||
@@ -19,9 +20,8 @@ const BLOCK_SIZE: usize = 16_384;
|
||||
///
|
||||
pub struct StoreWriter {
|
||||
doc: DocId,
|
||||
written: u64,
|
||||
offset_index_writer: SkipListBuilder<u64>,
|
||||
writer: WritePtr,
|
||||
writer: CountingWriter<WritePtr>,
|
||||
intermediary_buffer: Vec<u8>,
|
||||
current_block: Vec<u8>,
|
||||
}
|
||||
@@ -35,9 +35,8 @@ impl StoreWriter {
|
||||
pub fn new(writer: WritePtr) -> StoreWriter {
|
||||
StoreWriter {
|
||||
doc: 0,
|
||||
written: 0,
|
||||
offset_index_writer: SkipListBuilder::new(3),
|
||||
writer: writer,
|
||||
writer: CountingWriter::wrap(writer),
|
||||
intermediary_buffer: Vec::new(),
|
||||
current_block: Vec::new(),
|
||||
}
|
||||
@@ -54,11 +53,12 @@ impl StoreWriter {
|
||||
for field_value in field_values {
|
||||
try!((*field_value).serialize(&mut self.intermediary_buffer));
|
||||
}
|
||||
try!((self.intermediary_buffer.len() as u32).serialize(&mut self.current_block));
|
||||
try!(self.current_block.write_all(&self.intermediary_buffer[..]));
|
||||
(self.intermediary_buffer.len() as u32)
|
||||
.serialize(&mut self.current_block)?;
|
||||
self.current_block.write_all(&self.intermediary_buffer[..])?;
|
||||
self.doc += 1;
|
||||
if self.current_block.len() > BLOCK_SIZE {
|
||||
try!(self.write_and_compress_block());
|
||||
self.write_and_compress_block()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -71,11 +71,11 @@ impl StoreWriter {
|
||||
let (_, encoder_result) = encoder.finish();
|
||||
try!(encoder_result);
|
||||
}
|
||||
let compressed_block_size = self.intermediary_buffer.len() as u64;
|
||||
self.written += try!((compressed_block_size as u32).serialize(&mut self.writer)) as u64;
|
||||
try!(self.writer.write_all(&self.intermediary_buffer));
|
||||
self.written += compressed_block_size;
|
||||
try!(self.offset_index_writer.insert(self.doc, &self.written));
|
||||
(self.intermediary_buffer.len() as u32)
|
||||
.serialize(&mut self.writer)?;
|
||||
self.writer.write_all(&self.intermediary_buffer)?;
|
||||
self.offset_index_writer
|
||||
.insert(self.doc, &(self.writer.written_bytes() as u64))?;
|
||||
self.current_block.clear();
|
||||
Ok(())
|
||||
}
|
||||
@@ -89,9 +89,9 @@ impl StoreWriter {
|
||||
if !self.current_block.is_empty() {
|
||||
try!(self.write_and_compress_block());
|
||||
}
|
||||
let header_offset: u64 = self.written;
|
||||
let header_offset: u64 = self.writer.written_bytes() as u64;
|
||||
try!(self.offset_index_writer
|
||||
.write::<Box<Write>>(&mut self.writer));
|
||||
.write(&mut self.writer));
|
||||
try!(header_offset.serialize(&mut self.writer));
|
||||
try!(self.doc.serialize(&mut self.writer));
|
||||
self.writer.flush()
|
||||
|
||||
@@ -48,7 +48,6 @@ impl<'a, V> Ord for HeapItem<'a, V>
|
||||
/// - the term
|
||||
/// - a slice with the ordinal of the segments containing
|
||||
/// the terms.
|
||||
#[allow(should_implement_trait)]
|
||||
pub struct TermMerger<'a, V>
|
||||
where V: 'a + BinarySerializable + Default
|
||||
{
|
||||
@@ -131,6 +130,7 @@ impl<'a, V> TermMerger<'a, V>
|
||||
}
|
||||
|
||||
/// Iterates through terms
|
||||
#[allow(should_implement_trait)]
|
||||
pub fn next(&mut self) -> Option<Term<&[u8]>> {
|
||||
if self.advance() {
|
||||
Some(Term::wrap(self.current_streamers[0].streamer.key()))
|
||||
|
||||
@@ -411,7 +411,6 @@ mod tests {
|
||||
{
|
||||
for i in (0..20).chain((BLOCK_SIZE - 10..BLOCK_SIZE + 10)) {
|
||||
for j in 0..3 {
|
||||
println!("i {} j {}", i, j);
|
||||
let &(ref fst_key, _) = &ids[i];
|
||||
let &(ref last_key, _) = &ids[i + j];
|
||||
let mut streamer = term_dictionary
|
||||
@@ -420,7 +419,6 @@ mod tests {
|
||||
.lt(last_key.as_bytes())
|
||||
.into_stream();
|
||||
for _ in 0..j {
|
||||
println!("ij");
|
||||
assert!(streamer.next().is_some());
|
||||
}
|
||||
assert!(streamer.next().is_none());
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
|
||||
mod termdict;
|
||||
mod streamer;
|
||||
mod counting_writer;
|
||||
|
||||
use self::counting_writer::CountingWriter;
|
||||
pub use self::termdict::TermDictionaryImpl;
|
||||
pub use self::termdict::TermDictionaryBuilderImpl;
|
||||
pub use self::streamer::TermStreamerImpl;
|
||||
|
||||
@@ -7,7 +7,7 @@ use common::VInt;
|
||||
use directory::ReadOnlySource;
|
||||
use common::BinarySerializable;
|
||||
use std::marker::PhantomData;
|
||||
use super::CountingWriter;
|
||||
use common::CountingWriter;
|
||||
use std::cmp::Ordering;
|
||||
use postings::TermInfo;
|
||||
use fst::raw::Node;
|
||||
|
||||
Reference in New Issue
Block a user