mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 16:22:55 +00:00
Cargo fmt
This commit is contained in:
@@ -5,11 +5,11 @@ extern crate tempdir;
|
||||
extern crate serde_json;
|
||||
|
||||
use std::path::Path;
|
||||
use tempdir::TempDir;
|
||||
use tantivy::Index;
|
||||
use tantivy::schema::*;
|
||||
use tantivy::collector::TopCollector;
|
||||
use tantivy::query::QueryParser;
|
||||
use tantivy::schema::*;
|
||||
use tantivy::Index;
|
||||
use tempdir::TempDir;
|
||||
|
||||
fn main() {
|
||||
// Let's create a temporary directory for the
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use Result;
|
||||
use collector::Collector;
|
||||
use DocId;
|
||||
use Result;
|
||||
use Score;
|
||||
use SegmentLocalId;
|
||||
use SegmentReader;
|
||||
use DocId;
|
||||
use Score;
|
||||
|
||||
/// Collector that does nothing.
|
||||
/// This is used in the chain Collector and will hopefully
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use super::Collector;
|
||||
use DocId;
|
||||
use Score;
|
||||
use Result;
|
||||
use SegmentReader;
|
||||
use Score;
|
||||
use SegmentLocalId;
|
||||
use SegmentReader;
|
||||
|
||||
/// `CountCollector` collector only counts how many
|
||||
/// documents match the query.
|
||||
|
||||
@@ -1,25 +1,25 @@
|
||||
use std::mem;
|
||||
use collector::Collector;
|
||||
use docset::SkipResult;
|
||||
use fastfield::FacetReader;
|
||||
use schema::Facet;
|
||||
use schema::Field;
|
||||
use std::cell::UnsafeCell;
|
||||
use schema::Facet;
|
||||
use std::collections::btree_map;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::BTreeSet;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::collections::Bound;
|
||||
use std::collections::BTreeSet;
|
||||
use termdict::TermMerger;
|
||||
use docset::SkipResult;
|
||||
use std::collections::btree_map;
|
||||
use std::{usize, u64};
|
||||
use std::iter::Peekable;
|
||||
use std::mem;
|
||||
use std::{u64, usize};
|
||||
use termdict::TermMerger;
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use DocId;
|
||||
use Result;
|
||||
use Score;
|
||||
use SegmentReader;
|
||||
use SegmentLocalId;
|
||||
use std::cmp::Ordering;
|
||||
use SegmentReader;
|
||||
|
||||
struct Hit<'a> {
|
||||
count: u64,
|
||||
@@ -430,27 +430,22 @@ pub struct FacetCounts {
|
||||
facet_counts: BTreeMap<Facet, u64>,
|
||||
}
|
||||
|
||||
|
||||
pub struct FacetChildIterator<'a> {
|
||||
underlying: btree_map::Range<'a, Facet, u64>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for FacetChildIterator<'a> {
|
||||
|
||||
type Item = (&'a Facet, u64);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.underlying
|
||||
.next()
|
||||
.map(|(facet, count)| (facet, *count))
|
||||
self.underlying.next().map(|(facet, count)| (facet, *count))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl FacetCounts {
|
||||
|
||||
pub fn get<T>(&self, facet_from: T) -> FacetChildIterator //impl Iterator<Item = (&'a Facet, u64)>
|
||||
where Facet: From<T>
|
||||
pub fn get<T>(&self, facet_from: T) -> FacetChildIterator
|
||||
where
|
||||
Facet: From<T>,
|
||||
{
|
||||
let facet = Facet::from(facet_from);
|
||||
let left_bound = Bound::Excluded(facet.clone());
|
||||
@@ -463,9 +458,7 @@ impl FacetCounts {
|
||||
Bound::Excluded(facet_after)
|
||||
};
|
||||
let underlying: btree_map::Range<_, _> = self.facet_counts.range((left_bound, right_bound));
|
||||
FacetChildIterator {
|
||||
underlying
|
||||
}
|
||||
FacetChildIterator { underlying }
|
||||
}
|
||||
|
||||
pub fn top_k<T>(&self, facet: T, k: usize) -> Vec<(&Facet, u64)>
|
||||
@@ -497,13 +490,13 @@ impl FacetCounts {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use core::Index;
|
||||
use schema::{Document, Facet, SchemaBuilder};
|
||||
use query::AllQuery;
|
||||
use super::{FacetCollector, FacetCounts};
|
||||
use std::iter;
|
||||
use core::Index;
|
||||
use query::AllQuery;
|
||||
use rand::{thread_rng, Rng};
|
||||
use schema::Field;
|
||||
use schema::{Document, Facet, SchemaBuilder};
|
||||
use std::iter;
|
||||
|
||||
#[test]
|
||||
fn test_facet_collector_drilldown() {
|
||||
@@ -558,8 +551,10 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "Tried to add a facet which is a descendant of \
|
||||
an already added facet.")]
|
||||
#[should_panic(
|
||||
expected = "Tried to add a facet which is a descendant of \
|
||||
an already added facet."
|
||||
)]
|
||||
fn test_misused_facet_collector() {
|
||||
let mut facet_collector = FacetCollector::for_field(Field(0));
|
||||
facet_collector.add_facet(Facet::from("/country"));
|
||||
@@ -619,18 +614,16 @@ mod tests {
|
||||
|
||||
}
|
||||
|
||||
|
||||
#[cfg(all(test, feature="unstable"))]
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
|
||||
use test::Bencher;
|
||||
use schema::SchemaBuilder;
|
||||
use Index;
|
||||
use collector::FacetCollector;
|
||||
use schema::Facet;
|
||||
use query::AllQuery;
|
||||
use rand::{thread_rng, Rng};
|
||||
|
||||
use schema::Facet;
|
||||
use schema::SchemaBuilder;
|
||||
use test::Bencher;
|
||||
use Index;
|
||||
|
||||
#[bench]
|
||||
fn bench_facet_collector(b: &mut Bencher) {
|
||||
@@ -662,4 +655,4 @@ mod bench {
|
||||
searcher.search(&AllQuery, &mut facet_collector).unwrap();
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
Defines how the documents matching a search query should be processed.
|
||||
*/
|
||||
|
||||
use SegmentReader;
|
||||
use SegmentLocalId;
|
||||
use DocId;
|
||||
use Score;
|
||||
use Result;
|
||||
use Score;
|
||||
use SegmentLocalId;
|
||||
use SegmentReader;
|
||||
|
||||
mod count_collector;
|
||||
pub use self::count_collector::CountCollector;
|
||||
@@ -89,12 +89,12 @@ impl<'a, C: Collector> Collector for &'a mut C {
|
||||
pub mod tests {
|
||||
|
||||
use super::*;
|
||||
use DocId;
|
||||
use Score;
|
||||
use core::SegmentReader;
|
||||
use SegmentLocalId;
|
||||
use fastfield::FastFieldReader;
|
||||
use schema::Field;
|
||||
use DocId;
|
||||
use Score;
|
||||
use SegmentLocalId;
|
||||
|
||||
/// Stores all of the doc ids.
|
||||
/// This collector is only used for tests.
|
||||
@@ -187,11 +187,10 @@ pub mod tests {
|
||||
|
||||
}
|
||||
|
||||
|
||||
#[cfg(all(test, feature="unstable"))]
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
use test::Bencher;
|
||||
use collector::{Collector, CountCollector};
|
||||
use test::Bencher;
|
||||
|
||||
#[bench]
|
||||
fn build_collector(b: &mut Bencher) {
|
||||
@@ -204,4 +203,4 @@ mod bench {
|
||||
count_collector.count()
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use super::Collector;
|
||||
use DocId;
|
||||
use Score;
|
||||
use Result;
|
||||
use SegmentReader;
|
||||
use Score;
|
||||
use SegmentLocalId;
|
||||
use SegmentReader;
|
||||
|
||||
/// Multicollector makes it possible to collect on more than one collector.
|
||||
/// It should only be used for use cases where the Collector types is unknown
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
use super::Collector;
|
||||
use SegmentReader;
|
||||
use SegmentLocalId;
|
||||
use DocAddress;
|
||||
use Result;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BinaryHeap;
|
||||
use DocAddress;
|
||||
use DocId;
|
||||
use Result;
|
||||
use Score;
|
||||
use SegmentLocalId;
|
||||
use SegmentReader;
|
||||
|
||||
// Rust heap is a max-heap and we need a min heap.
|
||||
#[derive(Clone, Copy)]
|
||||
@@ -135,9 +135,9 @@ impl Collector for TopCollector {
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use collector::Collector;
|
||||
use DocId;
|
||||
use Score;
|
||||
use collector::Collector;
|
||||
|
||||
#[test]
|
||||
fn test_top_collector_not_at_capacity() {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::io::Write;
|
||||
use std::io;
|
||||
use common::serialize::BinarySerializable;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::mem;
|
||||
use std::ops::Deref;
|
||||
use std::ptr;
|
||||
@@ -106,7 +106,8 @@ where
|
||||
addr + 8 <= data.len(),
|
||||
"The fast field field should have been padded with 7 bytes."
|
||||
);
|
||||
let val_unshifted_unmasked: u64 = unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
|
||||
let val_unshifted_unmasked: u64 =
|
||||
unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
|
||||
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
|
||||
val_shifted & mask
|
||||
} else {
|
||||
@@ -141,7 +142,8 @@ where
|
||||
for output_val in output.iter_mut() {
|
||||
let addr = addr_in_bits >> 3;
|
||||
let bit_shift = addr_in_bits & 7;
|
||||
let val_unshifted_unmasked: u64 = unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
|
||||
let val_unshifted_unmasked: u64 =
|
||||
unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
|
||||
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
|
||||
*output_val = val_shifted & mask;
|
||||
addr_in_bits += num_bits;
|
||||
|
||||
@@ -202,14 +202,14 @@ impl BitSet {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use tests;
|
||||
use std::collections::HashSet;
|
||||
use super::BitSet;
|
||||
use super::TinySet;
|
||||
use tests::generate_nonunique_unsorted;
|
||||
use std::collections::BTreeSet;
|
||||
use query::BitSetDocSet;
|
||||
use docset::DocSet;
|
||||
use query::BitSetDocSet;
|
||||
use std::collections::BTreeSet;
|
||||
use std::collections::HashSet;
|
||||
use tests;
|
||||
use tests::generate_nonunique_unsorted;
|
||||
|
||||
#[test]
|
||||
fn test_tiny_set() {
|
||||
@@ -354,12 +354,12 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature="unstable"))]
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
|
||||
use test;
|
||||
use super::TinySet;
|
||||
use super::BitSet;
|
||||
use super::TinySet;
|
||||
use test;
|
||||
|
||||
#[bench]
|
||||
fn bench_tinyset_pop(b: &mut test::Bencher) {
|
||||
@@ -392,4 +392,4 @@ mod bench {
|
||||
fn bench_bitset_initialize(b: &mut test::Bencher) {
|
||||
b.iter(|| BitSet::with_max_value(1_000_000));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
use std::io::Write;
|
||||
use common::CountingWriter;
|
||||
use std::collections::HashMap;
|
||||
use schema::Field;
|
||||
use common::VInt;
|
||||
use directory::WritePtr;
|
||||
use std::io::{self, Read};
|
||||
use directory::ReadOnlySource;
|
||||
use common::BinarySerializable;
|
||||
use common::CountingWriter;
|
||||
use common::VInt;
|
||||
use directory::ReadOnlySource;
|
||||
use directory::WritePtr;
|
||||
use schema::Field;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use std::io::{self, Read};
|
||||
|
||||
#[derive(Eq, PartialEq, Hash, Copy, Ord, PartialOrd, Clone, Debug)]
|
||||
pub struct FileAddr {
|
||||
@@ -30,10 +30,7 @@ impl BinarySerializable for FileAddr {
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||
let field = Field::deserialize(reader)?;
|
||||
let idx = VInt::deserialize(reader)?.0 as usize;
|
||||
Ok(FileAddr {
|
||||
field,
|
||||
idx,
|
||||
})
|
||||
Ok(FileAddr { field, idx })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,7 +163,7 @@ impl CompositeFile {
|
||||
/// to a given `Field` and stored in a `CompositeFile`.
|
||||
pub fn open_read_with_idx(&self, field: Field, idx: usize) -> Option<ReadOnlySource> {
|
||||
self.offsets_index
|
||||
.get(&FileAddr { field, idx, })
|
||||
.get(&FileAddr { field, idx })
|
||||
.map(|&(from, to)| self.data.slice(from, to))
|
||||
}
|
||||
}
|
||||
@@ -174,12 +171,12 @@ impl CompositeFile {
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use std::io::Write;
|
||||
use super::{CompositeFile, CompositeWrite};
|
||||
use common::BinarySerializable;
|
||||
use common::VInt;
|
||||
use directory::{Directory, RAMDirectory};
|
||||
use schema::Field;
|
||||
use common::VInt;
|
||||
use common::BinarySerializable;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::io::Write;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
|
||||
pub struct CountingWriter<W> {
|
||||
underlying: W,
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
mod serialize;
|
||||
mod vint;
|
||||
mod counting_writer;
|
||||
mod composite_file;
|
||||
pub mod bitpacker;
|
||||
mod bitset;
|
||||
mod composite_file;
|
||||
mod counting_writer;
|
||||
mod serialize;
|
||||
mod vint;
|
||||
|
||||
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
|
||||
pub use self::serialize::{BinarySerializable, FixedSize};
|
||||
pub use self::vint::VInt;
|
||||
pub use self::counting_writer::CountingWriter;
|
||||
pub use self::bitset::BitSet;
|
||||
pub(crate) use self::bitset::TinySet;
|
||||
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
|
||||
pub use self::counting_writer::CountingWriter;
|
||||
pub use self::serialize::{BinarySerializable, FixedSize};
|
||||
pub use self::vint::VInt;
|
||||
pub use byteorder::LittleEndian as Endianness;
|
||||
|
||||
use std::io;
|
||||
@@ -104,8 +104,8 @@ pub fn u64_to_i64(val: u64) -> i64 {
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
|
||||
use super::{compute_num_bits, i64_to_u64, u64_to_i64};
|
||||
pub use super::serialize::test::fixed_size_test;
|
||||
use super::{compute_num_bits, i64_to_u64, u64_to_i64};
|
||||
|
||||
fn test_i64_converter_helper(val: i64) {
|
||||
assert_eq!(u64_to_i64(i64_to_u64(val)), val);
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use byteorder::{ReadBytesExt, WriteBytesExt};
|
||||
use common::Endianness;
|
||||
use std::fmt;
|
||||
use std::io::Write;
|
||||
use std::io::Read;
|
||||
use std::io;
|
||||
use common::VInt;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
|
||||
/// Trait for a simple binary serialization.
|
||||
pub trait BinarySerializable: fmt::Debug + Sized {
|
||||
@@ -135,8 +135,8 @@ impl BinarySerializable for String {
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
|
||||
use common::VInt;
|
||||
use super::*;
|
||||
use common::VInt;
|
||||
|
||||
pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use super::BinarySerializable;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
|
||||
/// Wrapper over a `u64` that serializes as a variable int.
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
|
||||
@@ -8,10 +8,8 @@ const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * 4 + 1;
|
||||
|
||||
pub use self::stream::CompressedIntStream;
|
||||
|
||||
|
||||
use bitpacking::{BitPacker, BitPacker4x};
|
||||
|
||||
|
||||
/// Returns the size in bytes of a compressed block, given `num_bits`.
|
||||
pub fn compressed_block_size(num_bits: u8) -> usize {
|
||||
1 + (num_bits as usize) * COMPRESSION_BLOCK_SIZE / 8
|
||||
@@ -35,19 +33,21 @@ impl BlockEncoder {
|
||||
pub fn compress_block_sorted(&mut self, block: &[u32], offset: u32) -> &[u8] {
|
||||
let num_bits = self.bitpacker.num_bits_sorted(offset, block);
|
||||
self.output[0] = num_bits;
|
||||
let written_size = 1 + self.bitpacker.compress_sorted(offset, block, &mut self.output[1..], num_bits);
|
||||
let written_size =
|
||||
1 + self.bitpacker
|
||||
.compress_sorted(offset, block, &mut self.output[1..], num_bits);
|
||||
&self.output[..written_size]
|
||||
}
|
||||
|
||||
pub fn compress_block_unsorted(&mut self, block: &[u32]) -> &[u8] {
|
||||
let num_bits = self.bitpacker.num_bits(block);
|
||||
self.output[0] = num_bits;
|
||||
let written_size = 1 + self.bitpacker.compress(block, &mut self.output[1..], num_bits);
|
||||
let written_size = 1 + self.bitpacker
|
||||
.compress(block, &mut self.output[1..], num_bits);
|
||||
&self.output[..written_size]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub struct BlockDecoder {
|
||||
bitpacker: BitPacker4x,
|
||||
pub output: [u32; COMPRESSION_BLOCK_SIZE + 1],
|
||||
@@ -68,17 +68,23 @@ impl BlockDecoder {
|
||||
output_len: 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn uncompress_block_sorted(&mut self, compressed_data: &[u8], offset: u32) -> usize {
|
||||
let num_bits = compressed_data[0];
|
||||
self.output_len = COMPRESSION_BLOCK_SIZE;
|
||||
1 + self.bitpacker.decompress_sorted(offset, &compressed_data[1..], &mut self.output, num_bits)
|
||||
1 + self.bitpacker.decompress_sorted(
|
||||
offset,
|
||||
&compressed_data[1..],
|
||||
&mut self.output,
|
||||
num_bits,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn uncompress_block_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> usize {
|
||||
let num_bits = compressed_data[0];
|
||||
self.output_len = COMPRESSION_BLOCK_SIZE;
|
||||
1 + self.bitpacker.decompress(&compressed_data[1..], &mut self.output, num_bits)
|
||||
1 + self.bitpacker
|
||||
.decompress(&compressed_data[1..], &mut self.output, num_bits)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -264,14 +270,13 @@ pub mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature="unstable"))]
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
|
||||
use super::*;
|
||||
use test::Bencher;
|
||||
use tests;
|
||||
|
||||
|
||||
fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
|
||||
let seed: &[u32; 4] = &[1, 2, 3, seed_val];
|
||||
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use compression::compressed_block_size;
|
||||
use compression::BlockDecoder;
|
||||
use compression::COMPRESSION_BLOCK_SIZE;
|
||||
use compression::compressed_block_size;
|
||||
use directory::{ReadOnlySource, SourceRead};
|
||||
|
||||
/// Reads a stream of compressed ints.
|
||||
@@ -13,7 +13,7 @@ pub struct CompressedIntStream {
|
||||
buffer: SourceRead,
|
||||
|
||||
block_decoder: BlockDecoder,
|
||||
cached_addr: usize, // address of the currently decoded block
|
||||
cached_addr: usize, // address of the currently decoded block
|
||||
cached_next_addr: usize, // address following the currently decoded block
|
||||
|
||||
addr: usize, // address of the block associated to the current position
|
||||
@@ -42,7 +42,8 @@ impl CompressedIntStream {
|
||||
// no need to read.
|
||||
self.cached_next_addr
|
||||
} else {
|
||||
let next_addr = addr + self.block_decoder.uncompress_block_unsorted(self.buffer.slice_from(addr));
|
||||
let next_addr = addr + self.block_decoder
|
||||
.uncompress_block_unsorted(self.buffer.slice_from(addr));
|
||||
self.cached_addr = addr;
|
||||
self.cached_next_addr = next_addr;
|
||||
next_addr
|
||||
@@ -101,8 +102,8 @@ pub mod tests {
|
||||
|
||||
use super::CompressedIntStream;
|
||||
use compression::compressed_block_size;
|
||||
use compression::COMPRESSION_BLOCK_SIZE;
|
||||
use compression::BlockEncoder;
|
||||
use compression::COMPRESSION_BLOCK_SIZE;
|
||||
use directory::ReadOnlySource;
|
||||
|
||||
fn create_stream_buffer() -> ReadOnlySource {
|
||||
|
||||
@@ -1,33 +1,32 @@
|
||||
use Result;
|
||||
use core::SegmentId;
|
||||
use error::{ErrorKind, ResultExt};
|
||||
use serde_json;
|
||||
use schema::Schema;
|
||||
use std::sync::Arc;
|
||||
use serde_json;
|
||||
use std::borrow::BorrowMut;
|
||||
use std::fmt;
|
||||
use core::SegmentId;
|
||||
use std::sync::Arc;
|
||||
use Result;
|
||||
|
||||
|
||||
#[cfg(feature="mmap")]
|
||||
use super::pool::LeasedItem;
|
||||
use super::pool::Pool;
|
||||
use super::segment::create_segment;
|
||||
use super::segment::Segment;
|
||||
use core::searcher::Searcher;
|
||||
use core::IndexMeta;
|
||||
use core::SegmentMeta;
|
||||
use core::SegmentReader;
|
||||
use core::META_FILEPATH;
|
||||
use directory::ManagedDirectory;
|
||||
#[cfg(feature = "mmap")]
|
||||
use directory::MmapDirectory;
|
||||
use directory::{Directory, RAMDirectory};
|
||||
use indexer::index_writer::open_index_writer;
|
||||
use core::searcher::Searcher;
|
||||
use num_cpus;
|
||||
use super::segment::Segment;
|
||||
use core::SegmentReader;
|
||||
use super::pool::Pool;
|
||||
use core::SegmentMeta;
|
||||
use super::pool::LeasedItem;
|
||||
use std::path::Path;
|
||||
use core::IndexMeta;
|
||||
use indexer::DirectoryLock;
|
||||
use IndexWriter;
|
||||
use directory::ManagedDirectory;
|
||||
use core::META_FILEPATH;
|
||||
use super::segment::create_segment;
|
||||
use indexer::segment_updater::save_new_metas;
|
||||
use indexer::DirectoryLock;
|
||||
use num_cpus;
|
||||
use std::path::Path;
|
||||
use tokenizer::TokenizerManager;
|
||||
use IndexWriter;
|
||||
|
||||
const NUM_SEARCHERS: usize = 12;
|
||||
|
||||
@@ -64,7 +63,7 @@ impl Index {
|
||||
/// The index will use the `MMapDirectory`.
|
||||
///
|
||||
/// If a previous index was in this directory, then its meta file will be destroyed.
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
pub fn create<P: AsRef<Path>>(directory_path: P, schema: Schema) -> Result<Index> {
|
||||
let mmap_directory = MmapDirectory::open(directory_path)?;
|
||||
let directory = ManagedDirectory::new(mmap_directory)?;
|
||||
@@ -84,7 +83,7 @@ impl Index {
|
||||
///
|
||||
/// The temp directory is only used for testing the `MmapDirectory`.
|
||||
/// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
pub fn create_from_tempdir(schema: Schema) -> Result<Index> {
|
||||
let mmap_directory = MmapDirectory::create_from_tempdir()?;
|
||||
let directory = ManagedDirectory::new(mmap_directory)?;
|
||||
@@ -112,7 +111,7 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Opens a new directory from an index path.
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
pub fn open<P: AsRef<Path>>(directory_path: P) -> Result<Index> {
|
||||
let mmap_directory = MmapDirectory::open(directory_path)?;
|
||||
let directory = ManagedDirectory::new(mmap_directory)?;
|
||||
@@ -224,7 +223,7 @@ impl Index {
|
||||
.collect::<Result<_>>()?;
|
||||
let schema = self.schema();
|
||||
let searchers = (0..NUM_SEARCHERS)
|
||||
.map(|_| Searcher::new(schema.clone(),segment_readers.clone()))
|
||||
.map(|_| Searcher::new(schema.clone(), segment_readers.clone()))
|
||||
.collect();
|
||||
self.searcher_pool.publish_new_generation(searchers);
|
||||
Ok(())
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use schema::Schema;
|
||||
use core::SegmentMeta;
|
||||
use std::fmt;
|
||||
use schema::Schema;
|
||||
use serde_json;
|
||||
use std::fmt;
|
||||
|
||||
/// Meta information about the `Index`.
|
||||
///
|
||||
@@ -45,9 +45,9 @@ impl fmt::Debug for IndexMeta {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use serde_json;
|
||||
use super::IndexMeta;
|
||||
use schema::{SchemaBuilder, TEXT};
|
||||
use serde_json;
|
||||
|
||||
#[test]
|
||||
fn test_serialize_metas() {
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
use common::BinarySerializable;
|
||||
use compression::CompressedIntStream;
|
||||
use directory::{ReadOnlySource, SourceRead};
|
||||
use termdict::TermDictionary;
|
||||
use postings::{BlockSegmentPostings, SegmentPostings};
|
||||
use postings::FreqReadingOption;
|
||||
use postings::TermInfo;
|
||||
use postings::{BlockSegmentPostings, SegmentPostings};
|
||||
use schema::FieldType;
|
||||
use schema::IndexRecordOption;
|
||||
use schema::Term;
|
||||
use compression::CompressedIntStream;
|
||||
use postings::FreqReadingOption;
|
||||
use common::BinarySerializable;
|
||||
use schema::FieldType;
|
||||
use termdict::TermDictionary;
|
||||
|
||||
/// The inverted index reader is in charge of accessing
|
||||
/// the inverted index associated to a specific field.
|
||||
@@ -27,7 +27,7 @@ pub struct InvertedIndexReader {
|
||||
postings_source: ReadOnlySource,
|
||||
positions_source: ReadOnlySource,
|
||||
record_option: IndexRecordOption,
|
||||
total_num_tokens: u64
|
||||
total_num_tokens: u64,
|
||||
}
|
||||
|
||||
impl InvertedIndexReader {
|
||||
@@ -45,7 +45,7 @@ impl InvertedIndexReader {
|
||||
postings_source: postings_source.slice_from(8),
|
||||
positions_source,
|
||||
record_option,
|
||||
total_num_tokens
|
||||
total_num_tokens,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,11 +56,11 @@ impl InvertedIndexReader {
|
||||
.get_index_record_option()
|
||||
.unwrap_or(IndexRecordOption::Basic);
|
||||
InvertedIndexReader {
|
||||
termdict: TermDictionary::empty(field_type),
|
||||
termdict: TermDictionary::empty(field_type),
|
||||
postings_source: ReadOnlySource::empty(),
|
||||
positions_source: ReadOnlySource::empty(),
|
||||
record_option,
|
||||
total_num_tokens: 0u64
|
||||
total_num_tokens: 0u64,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,8 +149,6 @@ impl InvertedIndexReader {
|
||||
self.total_num_tokens
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Returns the segment postings associated with the term, and with the given option,
|
||||
/// or `None` if the term has never been encountered and indexed.
|
||||
///
|
||||
@@ -166,12 +164,15 @@ impl InvertedIndexReader {
|
||||
Some(self.read_postings_from_terminfo(&term_info, option))
|
||||
}
|
||||
|
||||
pub(crate) fn read_postings_no_deletes(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
|
||||
pub(crate) fn read_postings_no_deletes(
|
||||
&self,
|
||||
term: &Term,
|
||||
option: IndexRecordOption,
|
||||
) -> Option<SegmentPostings> {
|
||||
let term_info = get!(self.get_term_info(term));
|
||||
Some(self.read_postings_from_terminfo(&term_info, option))
|
||||
}
|
||||
|
||||
|
||||
/// Returns the number of documents containing the term.
|
||||
pub fn doc_freq(&self, term: &Term) -> u32 {
|
||||
self.get_term_info(term)
|
||||
@@ -179,6 +180,3 @@ impl InvertedIndexReader {
|
||||
.unwrap_or(0u32)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,24 +1,24 @@
|
||||
pub mod searcher;
|
||||
pub mod index;
|
||||
mod segment_reader;
|
||||
mod segment_id;
|
||||
mod segment_component;
|
||||
mod segment;
|
||||
mod index_meta;
|
||||
mod pool;
|
||||
mod segment_meta;
|
||||
mod inverted_index_reader;
|
||||
mod pool;
|
||||
pub mod searcher;
|
||||
mod segment;
|
||||
mod segment_component;
|
||||
mod segment_id;
|
||||
mod segment_meta;
|
||||
mod segment_reader;
|
||||
|
||||
pub use self::index::Index;
|
||||
pub use self::index_meta::IndexMeta;
|
||||
pub use self::inverted_index_reader::InvertedIndexReader;
|
||||
pub use self::searcher::Searcher;
|
||||
pub use self::segment_component::SegmentComponent;
|
||||
pub use self::segment_id::SegmentId;
|
||||
pub use self::segment_reader::SegmentReader;
|
||||
pub use self::segment::Segment;
|
||||
pub use self::segment::SerializableSegment;
|
||||
pub use self::index::Index;
|
||||
pub use self::segment_component::SegmentComponent;
|
||||
pub use self::segment_id::SegmentId;
|
||||
pub use self::segment_meta::SegmentMeta;
|
||||
pub use self::index_meta::IndexMeta;
|
||||
pub use self::segment_reader::SegmentReader;
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
use crossbeam::sync::MsQueue;
|
||||
use std::mem;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use crossbeam::sync::MsQueue;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub struct GenerationItem<T> {
|
||||
@@ -114,8 +114,8 @@ impl<T> Drop for LeasedItem<T> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use std::iter;
|
||||
use super::Pool;
|
||||
use std::iter;
|
||||
|
||||
#[test]
|
||||
fn test_pool() {
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
use Result;
|
||||
use core::SegmentReader;
|
||||
use schema::Document;
|
||||
use collector::Collector;
|
||||
use query::Query;
|
||||
use DocAddress;
|
||||
use schema::{Field, Term};
|
||||
use termdict::TermMerger;
|
||||
use std::sync::Arc;
|
||||
use std::fmt;
|
||||
use schema::Schema;
|
||||
use core::InvertedIndexReader;
|
||||
use core::SegmentReader;
|
||||
use query::Query;
|
||||
use schema::Document;
|
||||
use schema::Schema;
|
||||
use schema::{Field, Term};
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
use termdict::TermMerger;
|
||||
use DocAddress;
|
||||
use Result;
|
||||
|
||||
/// Holds a list of `SegmentReader`s ready for search.
|
||||
///
|
||||
@@ -22,14 +22,11 @@ pub struct Searcher {
|
||||
}
|
||||
|
||||
impl Searcher {
|
||||
|
||||
/// Creates a new `Searcher`
|
||||
pub(crate) fn new(
|
||||
schema: Schema,
|
||||
segment_readers: Vec<SegmentReader>) -> Searcher {
|
||||
pub(crate) fn new(schema: Schema, segment_readers: Vec<SegmentReader>) -> Searcher {
|
||||
Searcher {
|
||||
schema,
|
||||
segment_readers
|
||||
segment_readers,
|
||||
}
|
||||
}
|
||||
/// Fetches a document from tantivy's store given a `DocAddress`.
|
||||
@@ -109,7 +106,6 @@ impl FieldSearcher {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl fmt::Debug for Searcher {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let segment_ids = self.segment_readers
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
use Result;
|
||||
use std::path::PathBuf;
|
||||
use schema::Schema;
|
||||
use std::fmt;
|
||||
use core::SegmentId;
|
||||
use directory::{FileProtection, ReadOnlySource, WritePtr};
|
||||
use indexer::segment_serializer::SegmentSerializer;
|
||||
use super::SegmentComponent;
|
||||
use core::Index;
|
||||
use std::result;
|
||||
use directory::Directory;
|
||||
use core::SegmentId;
|
||||
use core::SegmentMeta;
|
||||
use directory::error::{OpenReadError, OpenWriteError};
|
||||
use directory::Directory;
|
||||
use directory::{FileProtection, ReadOnlySource, WritePtr};
|
||||
use indexer::segment_serializer::SegmentSerializer;
|
||||
use schema::Schema;
|
||||
use std::fmt;
|
||||
use std::path::PathBuf;
|
||||
use std::result;
|
||||
use Result;
|
||||
|
||||
/// A segment is a piece of the index.
|
||||
#[derive(Clone)]
|
||||
@@ -111,8 +111,8 @@ mod tests {
|
||||
|
||||
use core::SegmentComponent;
|
||||
use directory::Directory;
|
||||
use std::collections::HashSet;
|
||||
use schema::SchemaBuilder;
|
||||
use std::collections::HashSet;
|
||||
use Index;
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use uuid::Uuid;
|
||||
use std::fmt;
|
||||
use std::cmp::{Ord, Ordering};
|
||||
use std::fmt;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::sync::atomic;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use core::SegmentId;
|
||||
use super::SegmentComponent;
|
||||
use std::path::PathBuf;
|
||||
use core::SegmentId;
|
||||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
struct DeleteMeta {
|
||||
|
||||
@@ -1,30 +1,30 @@
|
||||
use Result;
|
||||
use core::Segment;
|
||||
use core::SegmentId;
|
||||
use core::SegmentComponent;
|
||||
use std::sync::RwLock;
|
||||
use common::HasLen;
|
||||
use core::SegmentMeta;
|
||||
use fastfield::{self, FastFieldNotAvailableError};
|
||||
use fastfield::DeleteBitSet;
|
||||
use store::StoreReader;
|
||||
use schema::Document;
|
||||
use DocId;
|
||||
use std::sync::Arc;
|
||||
use std::collections::HashMap;
|
||||
use common::CompositeFile;
|
||||
use std::fmt;
|
||||
use common::HasLen;
|
||||
use core::InvertedIndexReader;
|
||||
use schema::Field;
|
||||
use schema::FieldType;
|
||||
use core::Segment;
|
||||
use core::SegmentComponent;
|
||||
use core::SegmentId;
|
||||
use core::SegmentMeta;
|
||||
use error::ErrorKind;
|
||||
use fastfield::DeleteBitSet;
|
||||
use fastfield::FacetReader;
|
||||
use fastfield::FastFieldReader;
|
||||
use schema::Schema;
|
||||
use termdict::TermDictionary;
|
||||
use fastfield::{self, FastFieldNotAvailableError};
|
||||
use fastfield::{FastValue, MultiValueIntFastFieldReader};
|
||||
use schema::Cardinality;
|
||||
use fieldnorm::FieldNormReader;
|
||||
use schema::Cardinality;
|
||||
use schema::Document;
|
||||
use schema::Field;
|
||||
use schema::FieldType;
|
||||
use schema::Schema;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
use std::sync::RwLock;
|
||||
use store::StoreReader;
|
||||
use termdict::TermDictionary;
|
||||
use DocId;
|
||||
use Result;
|
||||
|
||||
/// Entry point to access all of the datastructures of the `Segment`
|
||||
///
|
||||
@@ -109,12 +109,12 @@ impl SegmentReader {
|
||||
) -> fastfield::Result<FastFieldReader<Item>> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue)
|
||||
{
|
||||
self.fast_fields_composite
|
||||
.open_read(field)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)
|
||||
} else {
|
||||
{
|
||||
self.fast_fields_composite
|
||||
.open_read(field)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)
|
||||
} else {
|
||||
Err(FastFieldNotAvailableError::new(field_entry))
|
||||
}
|
||||
}
|
||||
@@ -127,17 +127,17 @@ impl SegmentReader {
|
||||
) -> fastfield::Result<MultiValueIntFastFieldReader<Item>> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues)
|
||||
{
|
||||
let idx_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 0)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)?;
|
||||
let vals_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 1)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)?;
|
||||
Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
|
||||
} else {
|
||||
{
|
||||
let idx_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 0)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)?;
|
||||
let vals_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 1)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)?;
|
||||
Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
|
||||
} else {
|
||||
Err(FastFieldNotAvailableError::new(field_entry))
|
||||
}
|
||||
}
|
||||
@@ -175,12 +175,14 @@ impl SegmentReader {
|
||||
/// They are simply stored as a fast field, serialized in
|
||||
/// the `.fieldnorm` file of the segment.
|
||||
pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader {
|
||||
if let Some(fieldnorm_source) = self.fieldnorms_composite
|
||||
.open_read(field) {
|
||||
if let Some(fieldnorm_source) = self.fieldnorms_composite.open_read(field) {
|
||||
FieldNormReader::open(fieldnorm_source)
|
||||
} else {
|
||||
let field_name = self.schema.get_field_name(field);
|
||||
let err_msg= format!("Field norm not found for field {:?}. Was it market as indexed during indexing.", field_name);
|
||||
let err_msg = format!(
|
||||
"Field norm not found for field {:?}. Was it market as indexed during indexing.",
|
||||
field_name
|
||||
);
|
||||
panic!(err_msg);
|
||||
}
|
||||
}
|
||||
@@ -215,13 +217,12 @@ impl SegmentReader {
|
||||
let fieldnorms_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
|
||||
let fieldnorms_composite = CompositeFile::open(&fieldnorms_data)?;
|
||||
|
||||
let delete_bitset_opt =
|
||||
if segment.meta().has_deletes() {
|
||||
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
|
||||
Some(DeleteBitSet::open(delete_data))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let delete_bitset_opt = if segment.meta().has_deletes() {
|
||||
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
|
||||
Some(DeleteBitSet::open(delete_data))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let schema = segment.schema();
|
||||
Ok(SegmentReader {
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
mod skiplist_builder;
|
||||
mod skiplist;
|
||||
mod skiplist_builder;
|
||||
|
||||
pub use self::skiplist_builder::SkipListBuilder;
|
||||
pub use self::skiplist::SkipList;
|
||||
pub use self::skiplist_builder::SkipListBuilder;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use common::{BinarySerializable, VInt};
|
||||
use std::marker::PhantomData;
|
||||
use std::cmp::max;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
static EMPTY: [u8; 0] = [];
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::io::Write;
|
||||
use common::{BinarySerializable, VInt, is_power_of_2};
|
||||
use std::marker::PhantomData;
|
||||
use common::{is_power_of_2, BinarySerializable, VInt};
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
struct LayerBuilder<T: BinarySerializable> {
|
||||
period_mask: usize,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::mem;
|
||||
use super::heap::{Heap, HeapAllocable};
|
||||
use std::mem;
|
||||
|
||||
#[inline]
|
||||
pub fn is_power_of_2(val: u32) -> bool {
|
||||
@@ -99,8 +99,8 @@ impl<'a> Iterator for ExpUnrolledLinkedListIterator<'a> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use super::super::heap::Heap;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_stack() {
|
||||
@@ -120,14 +120,13 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature="unstable"))]
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
use test::Bencher;
|
||||
use super::Heap;
|
||||
use super::ExpUnrolledLinkedList;
|
||||
use super::Heap;
|
||||
use test::Bencher;
|
||||
|
||||
const NUM_STACK: usize = 10_000;
|
||||
const STACK_SIZE: u32 = 1000;
|
||||
@@ -166,4 +165,4 @@ mod bench {
|
||||
heap.clear();
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use super::heap::{BytesRef, Heap, HeapAllocable};
|
||||
use postings::UnorderedTermId;
|
||||
use std::iter;
|
||||
use std::mem;
|
||||
use postings::UnorderedTermId;
|
||||
use super::heap::{BytesRef, Heap, HeapAllocable};
|
||||
|
||||
mod murmurhash2 {
|
||||
|
||||
@@ -117,11 +117,7 @@ struct QuadraticProbing {
|
||||
|
||||
impl QuadraticProbing {
|
||||
fn compute(hash: usize, mask: usize) -> QuadraticProbing {
|
||||
QuadraticProbing {
|
||||
hash,
|
||||
i: 0,
|
||||
mask,
|
||||
}
|
||||
QuadraticProbing { hash, i: 0, mask }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -135,21 +131,18 @@ use std::slice;
|
||||
|
||||
pub struct Iter<'a: 'b, 'b> {
|
||||
hashmap: &'b TermHashMap<'a>,
|
||||
inner: slice::Iter<'a, usize>
|
||||
inner: slice::Iter<'a, usize>,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Iterator for Iter<'a, 'b> {
|
||||
type Item = (&'b [u8], u32, UnorderedTermId);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.inner
|
||||
.next()
|
||||
.cloned()
|
||||
.map(move |bucket: usize| {
|
||||
let kv = self.hashmap.table[bucket];
|
||||
let (key, offset): (&'b [u8], u32) = self.hashmap.get_key_value(kv.key_value_addr);
|
||||
(key, offset, bucket as UnorderedTermId)
|
||||
})
|
||||
self.inner.next().cloned().map(move |bucket: usize| {
|
||||
let kv = self.hashmap.table[bucket];
|
||||
let (key, offset): (&'b [u8], u32) = self.hashmap.get_key_value(kv.key_value_addr);
|
||||
(key, offset, bucket as UnorderedTermId)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -183,14 +176,15 @@ impl<'a> TermHashMap<'a> {
|
||||
pub fn set_bucket(&mut self, hash: u32, key_value_addr: BytesRef, bucket: usize) {
|
||||
self.occupied.push(bucket);
|
||||
self.table[bucket] = KeyValue {
|
||||
key_value_addr, hash
|
||||
key_value_addr,
|
||||
hash,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn iter<'b: 'a>(&'b self) -> Iter<'a, 'b> {
|
||||
Iter {
|
||||
inner: self.occupied.iter(),
|
||||
hashmap: &self
|
||||
hashmap: &self,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -225,8 +219,8 @@ impl<'a> TermHashMap<'a> {
|
||||
|
||||
#[cfg(all(test, unstable))]
|
||||
mod bench {
|
||||
use test::Bencher;
|
||||
use super::murmurhash2::murmurhash2;
|
||||
use test::Bencher;
|
||||
|
||||
#[bench]
|
||||
fn bench_murmurhash_2(b: &mut Bencher) {
|
||||
@@ -246,11 +240,11 @@ mod bench {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use super::super::heap::{Heap, HeapAllocable};
|
||||
use super::murmurhash2::murmurhash2;
|
||||
use std::collections::HashSet;
|
||||
use super::split_memory;
|
||||
use super::*;
|
||||
use std::collections::HashSet;
|
||||
|
||||
struct TestValue {
|
||||
val: u32,
|
||||
@@ -332,5 +326,4 @@ mod tests {
|
||||
assert_eq!(set.len(), 10_000);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use byteorder::{ByteOrder, NativeEndian};
|
||||
use std::cell::UnsafeCell;
|
||||
use std::mem;
|
||||
use std::ptr;
|
||||
use byteorder::{ByteOrder, NativeEndian};
|
||||
|
||||
/// `BytesRef` refers to a slice in tantivy's custom `Heap`.
|
||||
///
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
mod expull;
|
||||
pub(crate) mod hashmap;
|
||||
mod heap;
|
||||
mod expull;
|
||||
|
||||
pub use self::heap::{Heap, HeapAllocable};
|
||||
pub use self::expull::ExpUnrolledLinkedList;
|
||||
pub use self::hashmap::TermHashMap;
|
||||
pub use self::heap::{Heap, HeapAllocable};
|
||||
|
||||
#[test]
|
||||
fn test_unrolled_linked_list() {
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use std::marker::Send;
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
use directory::error::{DeleteError, OpenReadError, OpenWriteError};
|
||||
use directory::{ReadOnlySource, WritePtr};
|
||||
use std::result;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::marker::Send;
|
||||
use std::marker::Sync;
|
||||
use std::path::Path;
|
||||
use std::result;
|
||||
|
||||
/// Write-once read many (WORM) abstraction for where
|
||||
/// tantivy's data should be stored.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::error::Error as StdError;
|
||||
use std::path::PathBuf;
|
||||
use std::io;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// General IO error with an optional path to the offending file.
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use serde_json;
|
||||
use core::MANAGED_FILEPATH;
|
||||
use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError};
|
||||
use directory::{ReadOnlySource, WritePtr};
|
||||
use std::result;
|
||||
use std::io;
|
||||
use Directory;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::collections::HashSet;
|
||||
use std::sync::RwLockWriteGuard;
|
||||
use std::io::Write;
|
||||
use core::MANAGED_FILEPATH;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use error::{ErrorKind, Result, ResultExt};
|
||||
use serde_json;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::result;
|
||||
use std::sync::RwLockWriteGuard;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use Directory;
|
||||
|
||||
/// Wrapper of directories that keeps track of files created by Tantivy.
|
||||
///
|
||||
@@ -282,10 +282,10 @@ impl Clone for ManagedDirectory {
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
use directory::MmapDirectory;
|
||||
use std::path::Path;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use tempdir::TempDir;
|
||||
|
||||
lazy_static! {
|
||||
@@ -294,7 +294,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
fn test_managed_directory() {
|
||||
let tempdir = TempDir::new("index").unwrap();
|
||||
let tempdir_path = PathBuf::from(tempdir.path());
|
||||
@@ -343,7 +343,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature="mmap ")]
|
||||
#[cfg(feature = "mmap ")]
|
||||
fn test_managed_directory_gc_while_mmapped() {
|
||||
let tempdir = TempDir::new("index").unwrap();
|
||||
let tempdir_path = PathBuf::from(tempdir.path());
|
||||
@@ -373,7 +373,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
fn test_managed_directory_protect() {
|
||||
let tempdir = TempDir::new("index").unwrap();
|
||||
let tempdir_path = PathBuf::from(tempdir.path());
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
use atomicwrites;
|
||||
use common::make_io_err;
|
||||
use directory::Directory;
|
||||
use directory::error::{DeleteError, IOError, OpenDirectoryError, OpenReadError, OpenWriteError};
|
||||
use directory::ReadOnlySource;
|
||||
use directory::shared_vec_slice::SharedVecSlice;
|
||||
use directory::Directory;
|
||||
use directory::ReadOnlySource;
|
||||
use directory::WritePtr;
|
||||
use fst::raw::MmapReadOnly;
|
||||
use std::collections::hash_map::Entry as HashMapEntry;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::From;
|
||||
use std::fmt;
|
||||
use std::fs::{self, File};
|
||||
use std::fs::OpenOptions;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{self, Seek, SeekFrom};
|
||||
use std::io::{BufWriter, Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
@@ -4,29 +4,29 @@ WORM directory abstraction.
|
||||
|
||||
*/
|
||||
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
mod mmap_directory;
|
||||
|
||||
mod ram_directory;
|
||||
mod directory;
|
||||
mod managed_directory;
|
||||
mod ram_directory;
|
||||
mod read_only_source;
|
||||
mod shared_vec_slice;
|
||||
mod managed_directory;
|
||||
|
||||
/// Errors specific to the directory module.
|
||||
pub mod error;
|
||||
|
||||
use std::io::{BufWriter, Seek, Write};
|
||||
|
||||
pub use self::read_only_source::ReadOnlySource;
|
||||
pub use self::directory::Directory;
|
||||
pub use self::ram_directory::RAMDirectory;
|
||||
pub use self::read_only_source::ReadOnlySource;
|
||||
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
pub use self::mmap_directory::MmapDirectory;
|
||||
|
||||
pub(crate) use self::read_only_source::SourceRead;
|
||||
pub(crate) use self::managed_directory::{FileProtection, ManagedDirectory};
|
||||
pub(crate) use self::read_only_source::SourceRead;
|
||||
|
||||
/// Synonym of Seek + Write
|
||||
pub trait SeekableWrite: Seek + Write {}
|
||||
@@ -42,8 +42,8 @@ pub type WritePtr = BufWriter<Box<SeekableWrite>>;
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use std::path::Path;
|
||||
use std::io::{Seek, SeekFrom, Write};
|
||||
use std::path::Path;
|
||||
|
||||
lazy_static! {
|
||||
static ref TEST_PATH: &'static Path = Path::new("some_path_for_test");
|
||||
@@ -56,7 +56,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
fn test_mmap_directory() {
|
||||
let mut mmap_directory = MmapDirectory::create_from_tempdir().unwrap();
|
||||
test_directory(&mut mmap_directory);
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
use super::shared_vec_slice::SharedVecSlice;
|
||||
use common::make_io_err;
|
||||
use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError};
|
||||
use directory::WritePtr;
|
||||
use directory::{Directory, ReadOnlySource};
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::io::{self, BufWriter, Cursor, Seek, SeekFrom, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::result;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use common::make_io_err;
|
||||
use directory::{Directory, ReadOnlySource};
|
||||
use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError};
|
||||
use directory::WritePtr;
|
||||
use super::shared_vec_slice::SharedVecSlice;
|
||||
|
||||
/// Writer associated with the `RAMDirectory`
|
||||
///
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
#[cfg(feature="mmap")]
|
||||
use fst::raw::MmapReadOnly;
|
||||
use std::ops::Deref;
|
||||
use super::shared_vec_slice::SharedVecSlice;
|
||||
use common::HasLen;
|
||||
use std::slice;
|
||||
use std::io::{self, Read};
|
||||
#[cfg(feature = "mmap")]
|
||||
use fst::raw::MmapReadOnly;
|
||||
use stable_deref_trait::{CloneStableDeref, StableDeref};
|
||||
use std::io::{self, Read};
|
||||
use std::ops::Deref;
|
||||
use std::slice;
|
||||
|
||||
/// Read object that represents files in tantivy.
|
||||
///
|
||||
@@ -15,7 +15,7 @@ use stable_deref_trait::{CloneStableDeref, StableDeref};
|
||||
/// hold by this object should never be altered or destroyed.
|
||||
pub enum ReadOnlySource {
|
||||
/// Mmap source of data
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
Mmap(MmapReadOnly),
|
||||
/// Wrapping a `Vec<u8>`
|
||||
Anonymous(SharedVecSlice),
|
||||
@@ -41,7 +41,7 @@ impl ReadOnlySource {
|
||||
/// Returns the data underlying the ReadOnlySource object.
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
match *self {
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
ReadOnlySource::Mmap(ref mmap_read_only) => unsafe { mmap_read_only.as_slice() },
|
||||
ReadOnlySource::Anonymous(ref shared_vec) => shared_vec.as_slice(),
|
||||
}
|
||||
@@ -66,9 +66,14 @@ impl ReadOnlySource {
|
||||
/// 1KB slice is remaining, the whole `500MBs`
|
||||
/// are retained in memory.
|
||||
pub fn slice(&self, from_offset: usize, to_offset: usize) -> ReadOnlySource {
|
||||
assert!(from_offset <= to_offset, "Requested negative slice [{}..{}]", from_offset, to_offset);
|
||||
assert!(
|
||||
from_offset <= to_offset,
|
||||
"Requested negative slice [{}..{}]",
|
||||
from_offset,
|
||||
to_offset
|
||||
);
|
||||
match *self {
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
ReadOnlySource::Mmap(ref mmap_read_only) => {
|
||||
let sliced_mmap = mmap_read_only.range(from_offset, to_offset - from_offset);
|
||||
ReadOnlySource::Mmap(sliced_mmap)
|
||||
@@ -130,13 +135,11 @@ impl SourceRead {
|
||||
|
||||
pub fn slice_from(&self, start: usize) -> &[u8] {
|
||||
&self.cursor[start..]
|
||||
|
||||
}
|
||||
|
||||
pub fn get(&self, idx: usize) -> u8 {
|
||||
self.cursor[idx]
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl AsRef<[u8]> for SourceRead {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use DocId;
|
||||
use common::BitSet;
|
||||
use std::borrow::Borrow;
|
||||
use std::borrow::BorrowMut;
|
||||
use std::cmp::Ordering;
|
||||
use common::BitSet;
|
||||
use DocId;
|
||||
|
||||
/// Expresses the outcome of a call to `DocSet`'s `.skip_next(...)`.
|
||||
#[derive(PartialEq, Eq, Debug)]
|
||||
|
||||
@@ -2,13 +2,13 @@
|
||||
|
||||
use std::io;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::PoisonError;
|
||||
use directory::error::{IOError, OpenDirectoryError, OpenReadError, OpenWriteError};
|
||||
use fastfield::FastFieldNotAvailableError;
|
||||
use query;
|
||||
use schema;
|
||||
use fastfield::FastFieldNotAvailableError;
|
||||
use serde_json;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::PoisonError;
|
||||
|
||||
error_chain!(
|
||||
errors {
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use bit_set::BitSet;
|
||||
use directory::WritePtr;
|
||||
use std::io::Write;
|
||||
use std::io;
|
||||
use directory::ReadOnlySource;
|
||||
use DocId;
|
||||
use common::HasLen;
|
||||
use directory::ReadOnlySource;
|
||||
use directory::WritePtr;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use DocId;
|
||||
|
||||
/// Write a delete `BitSet`
|
||||
///
|
||||
@@ -62,10 +62,8 @@ impl DeleteBitSet {
|
||||
b & (1u8 << shift) != 0
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
impl HasLen for DeleteBitSet {
|
||||
fn len(&self) -> usize {
|
||||
self.len
|
||||
@@ -74,10 +72,10 @@ impl HasLen for DeleteBitSet {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::PathBuf;
|
||||
use super::*;
|
||||
use bit_set::BitSet;
|
||||
use directory::*;
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn test_delete_bitset_helper(bitset: &BitSet) {
|
||||
let test_path = PathBuf::from("test");
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::result;
|
||||
use schema::FieldEntry;
|
||||
use std::result;
|
||||
|
||||
/// `FastFieldNotAvailableError` is returned when the
|
||||
/// user requested for a fast field reader, and the field was not
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use super::MultiValueIntFastFieldReader;
|
||||
use DocId;
|
||||
use termdict::TermOrdinal;
|
||||
use schema::Facet;
|
||||
use termdict::TermDictionary;
|
||||
use termdict::TermOrdinal;
|
||||
use DocId;
|
||||
|
||||
/// The facet reader makes it possible to access the list of
|
||||
/// facets associated to a given document in a specific
|
||||
|
||||
@@ -23,26 +23,26 @@ values stored.
|
||||
Read access performance is comparable to that of an array lookup.
|
||||
*/
|
||||
|
||||
pub use self::delete::write_delete_bitset;
|
||||
pub use self::delete::DeleteBitSet;
|
||||
pub use self::error::{FastFieldNotAvailableError, Result};
|
||||
pub use self::facet_reader::FacetReader;
|
||||
pub use self::multivalued::{MultiValueIntFastFieldReader, MultiValueIntFastFieldWriter};
|
||||
pub use self::reader::FastFieldReader;
|
||||
pub use self::serializer::FastFieldSerializer;
|
||||
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
||||
use common;
|
||||
use schema::Cardinality;
|
||||
use schema::FieldType;
|
||||
use schema::Value;
|
||||
pub use self::delete::DeleteBitSet;
|
||||
pub use self::delete::write_delete_bitset;
|
||||
pub use self::error::{FastFieldNotAvailableError, Result};
|
||||
pub use self::facet_reader::FacetReader;
|
||||
pub use self::multivalued::{MultiValueIntFastFieldWriter, MultiValueIntFastFieldReader};
|
||||
pub use self::reader::FastFieldReader;
|
||||
pub use self::serializer::FastFieldSerializer;
|
||||
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
||||
|
||||
mod reader;
|
||||
mod writer;
|
||||
mod serializer;
|
||||
mod error;
|
||||
mod delete;
|
||||
mod error;
|
||||
mod facet_reader;
|
||||
mod multivalued;
|
||||
mod reader;
|
||||
mod serializer;
|
||||
mod writer;
|
||||
|
||||
/// Trait for types that are allowed for fast fields: (u64 or i64).
|
||||
pub trait FastValue: Default + Clone + Copy {
|
||||
@@ -121,19 +121,19 @@ fn value_to_u64(value: &Value) -> u64 {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use common::CompositeFile;
|
||||
use directory::{Directory, RAMDirectory, WritePtr};
|
||||
use fastfield::FastFieldReader;
|
||||
use rand::Rng;
|
||||
use rand::SeedableRng;
|
||||
use rand::XorShiftRng;
|
||||
use schema::{Schema, SchemaBuilder};
|
||||
use schema::Document;
|
||||
use schema::FAST;
|
||||
use schema::Field;
|
||||
use schema::FAST;
|
||||
use schema::{Schema, SchemaBuilder};
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use super::*;
|
||||
|
||||
lazy_static! {
|
||||
pub static ref SCHEMA: Schema = {
|
||||
@@ -141,9 +141,7 @@ mod tests {
|
||||
schema_builder.add_u64_field("field", FAST);
|
||||
schema_builder.build()
|
||||
};
|
||||
pub static ref FIELD: Field = {
|
||||
SCHEMA.get_field("field").unwrap()
|
||||
};
|
||||
pub static ref FIELD: Field = { SCHEMA.get_field("field").unwrap() };
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -409,17 +407,17 @@ mod tests {
|
||||
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature="unstable"))]
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
use super::tests::{SCHEMA, generate_permutation};
|
||||
use test::{self, Bencher};
|
||||
use super::tests::FIELD;
|
||||
use super::tests::{generate_permutation, SCHEMA};
|
||||
use super::*;
|
||||
use common::CompositeFile;
|
||||
use directory::{Directory, RAMDirectory, WritePtr};
|
||||
use fastfield::FastFieldReader;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use super::*;
|
||||
use test::{self, Bencher};
|
||||
|
||||
#[bench]
|
||||
fn bench_intfastfield_linear_veclookup(b: &mut Bencher) {
|
||||
@@ -515,4 +513,4 @@ mod bench {
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
mod writer;
|
||||
mod reader;
|
||||
mod writer;
|
||||
|
||||
pub use self::writer::MultiValueIntFastFieldWriter;
|
||||
pub use self::reader::MultiValueIntFastFieldReader;
|
||||
pub use self::writer::MultiValueIntFastFieldWriter;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use schema::SchemaBuilder;
|
||||
use schema::Cardinality;
|
||||
use schema::IntOptions;
|
||||
use schema::SchemaBuilder;
|
||||
use Index;
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use DocId;
|
||||
use fastfield::{FastFieldReader, FastValue};
|
||||
use DocId;
|
||||
|
||||
/// Reader for a multivalued `u64` fast field.
|
||||
///
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
use fastfield::FastFieldSerializer;
|
||||
use fastfield::serializer::FastSingleFieldSerializer;
|
||||
use fastfield::value_to_u64;
|
||||
use std::collections::HashMap;
|
||||
use DocId;
|
||||
use fastfield::FastFieldSerializer;
|
||||
use itertools::Itertools;
|
||||
use postings::UnorderedTermId;
|
||||
use schema::{Document, Field};
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
use itertools::Itertools;
|
||||
use DocId;
|
||||
|
||||
/// Writer for multi-valued (as in, more than one value per document)
|
||||
/// int fast field.
|
||||
@@ -37,7 +37,6 @@ pub struct MultiValueIntFastFieldWriter {
|
||||
}
|
||||
|
||||
impl MultiValueIntFastFieldWriter {
|
||||
|
||||
/// Creates a new `IntFastFieldWriter`
|
||||
pub(crate) fn new(field: Field, is_facet: bool) -> Self {
|
||||
MultiValueIntFastFieldWriter {
|
||||
@@ -68,7 +67,7 @@ impl MultiValueIntFastFieldWriter {
|
||||
pub fn add_document(&mut self, doc: &Document) {
|
||||
self.next_doc();
|
||||
// facets are indexed in the `SegmentWriter` as we encode their unordered id.
|
||||
if !self.is_facet {
|
||||
if !self.is_facet {
|
||||
for field_value in doc.field_values() {
|
||||
if field_value.field() == self.field {
|
||||
self.add_val(value_to_u64(field_value.value()));
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
use common::BinarySerializable;
|
||||
use super::FastValue;
|
||||
use common::bitpacker::BitUnpacker;
|
||||
use common::CompositeFile;
|
||||
use common::compute_num_bits;
|
||||
use directory::{Directory, RAMDirectory, WritePtr};
|
||||
use common::BinarySerializable;
|
||||
use common::CompositeFile;
|
||||
use directory::ReadOnlySource;
|
||||
use DocId;
|
||||
use directory::{Directory, RAMDirectory, WritePtr};
|
||||
use fastfield::{FastFieldSerializer, FastFieldsWriter};
|
||||
use owning_ref::OwningRef;
|
||||
use schema::FAST;
|
||||
use schema::SchemaBuilder;
|
||||
use schema::FAST;
|
||||
use std::collections::HashMap;
|
||||
use std::marker::PhantomData;
|
||||
use std::mem;
|
||||
use std::path::Path;
|
||||
use super::FastValue;
|
||||
use DocId;
|
||||
|
||||
/// Trait for accessing a fastfield.
|
||||
///
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use common::BinarySerializable;
|
||||
use directory::WritePtr;
|
||||
use schema::Field;
|
||||
use common::bitpacker::BitPacker;
|
||||
use common::compute_num_bits;
|
||||
use common::CountingWriter;
|
||||
use common::BinarySerializable;
|
||||
use common::CompositeWrite;
|
||||
use common::CountingWriter;
|
||||
use directory::WritePtr;
|
||||
use schema::Field;
|
||||
use std::io::{self, Write};
|
||||
|
||||
/// `FastFieldSerializer` is in charge of serializing
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
use schema::{Cardinality, Document, Field, Schema};
|
||||
use fastfield::FastFieldSerializer;
|
||||
use std::io;
|
||||
use schema::FieldType;
|
||||
use common;
|
||||
use common::VInt;
|
||||
use std::collections::HashMap;
|
||||
use postings::UnorderedTermId;
|
||||
use super::multivalued::MultiValueIntFastFieldWriter;
|
||||
use common;
|
||||
use common::BinarySerializable;
|
||||
use common::VInt;
|
||||
use fastfield::FastFieldSerializer;
|
||||
use postings::UnorderedTermId;
|
||||
use schema::FieldType;
|
||||
use schema::{Cardinality, Document, Field, Schema};
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
|
||||
/// The fastfieldswriter regroup all of the fast field writers.
|
||||
pub struct FastFieldsWriter {
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
|
||||
#[inline(always)]
|
||||
pub fn id_to_fieldnorm(id: u8) -> u32 {
|
||||
FIELD_NORMS_TABLE[id as usize]
|
||||
}
|
||||
|
||||
|
||||
#[inline(always)]
|
||||
pub fn fieldnorm_to_id(fieldnorm: u32) -> u8 {
|
||||
FIELD_NORMS_TABLE
|
||||
@@ -12,45 +10,34 @@ pub fn fieldnorm_to_id(fieldnorm: u32) -> u8 {
|
||||
.unwrap_or_else(|idx| idx - 1) as u8
|
||||
}
|
||||
|
||||
|
||||
pub const FIELD_NORMS_TABLE: [u32; 256] = [
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54,
|
||||
56, 60, 64, 68, 72, 76, 80, 84, 88, 96, 104, 112, 120, 128, 136, 144,
|
||||
152, 168, 184, 200, 216, 232, 248, 264, 280, 312, 344, 376, 408, 440, 472, 504,
|
||||
536, 600, 664, 728, 792, 856, 920, 984,
|
||||
1048, 1176, 1304, 1432, 1560, 1688, 1816, 1944,
|
||||
2072, 2328, 2584, 2840, 3096, 3352, 3608, 3864, 4120,
|
||||
4632, 5144, 5656, 6168, 6680, 7192, 7704, 8216, 9240,
|
||||
10264, 11288, 12312, 13336, 14360, 15384,
|
||||
16408, 18456, 20504, 22552, 24600, 26648, 28696, 30744,
|
||||
32792, 36888, 40984, 45080, 49176, 53272, 57368, 61464,
|
||||
65560, 73752, 81944, 90136, 98328, 106520, 114712, 122904, 131096, 147480,
|
||||
163864, 180248, 196632, 213016, 229400, 245784, 262168,
|
||||
294936, 327704, 360472, 393240, 426008, 458776,
|
||||
491544, 524312, 589848, 655384, 720920, 786456, 851992, 917528,
|
||||
983064, 1048600, 1179672, 1310744, 1441816, 1572888, 1703960, 1835032,
|
||||
1966104, 2097176, 2359320, 2621464, 2883608, 3145752, 3407896, 3670040, 3932184,
|
||||
4194328, 4718616, 5242904, 5767192, 6291480, 6815768, 7340056, 7864344, 8388632, 9437208,
|
||||
10485784, 11534360, 12582936, 13631512, 14680088, 15728664, 16777240, 18874392, 20971544,
|
||||
23068696, 25165848, 27263000, 29360152, 31457304, 33554456, 37748760, 41943064,
|
||||
46137368, 50331672, 54525976, 58720280, 62914584, 67108888, 75497496, 83886104,
|
||||
92274712, 100663320, 109051928, 117440536, 125829144, 134217752, 150994968, 167772184,
|
||||
184549400, 201326616, 218103832, 234881048, 251658264, 268435480, 301989912, 335544344,
|
||||
369098776, 402653208, 436207640, 469762072, 503316504, 536870936, 603979800, 671088664,
|
||||
738197528, 805306392, 872415256, 939524120, 1006632984, 1073741848, 1207959576, 1342177304,
|
||||
1476395032, 1610612760, 1744830488, 1879048216, 2013265944
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
|
||||
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, 56, 60,
|
||||
64, 68, 72, 76, 80, 84, 88, 96, 104, 112, 120, 128, 136, 144, 152, 168, 184, 200, 216, 232,
|
||||
248, 264, 280, 312, 344, 376, 408, 440, 472, 504, 536, 600, 664, 728, 792, 856, 920, 984, 1048,
|
||||
1176, 1304, 1432, 1560, 1688, 1816, 1944, 2072, 2328, 2584, 2840, 3096, 3352, 3608, 3864, 4120,
|
||||
4632, 5144, 5656, 6168, 6680, 7192, 7704, 8216, 9240, 10264, 11288, 12312, 13336, 14360, 15384,
|
||||
16408, 18456, 20504, 22552, 24600, 26648, 28696, 30744, 32792, 36888, 40984, 45080, 49176,
|
||||
53272, 57368, 61464, 65560, 73752, 81944, 90136, 98328, 106520, 114712, 122904, 131096, 147480,
|
||||
163864, 180248, 196632, 213016, 229400, 245784, 262168, 294936, 327704, 360472, 393240, 426008,
|
||||
458776, 491544, 524312, 589848, 655384, 720920, 786456, 851992, 917528, 983064, 1048600,
|
||||
1179672, 1310744, 1441816, 1572888, 1703960, 1835032, 1966104, 2097176, 2359320, 2621464,
|
||||
2883608, 3145752, 3407896, 3670040, 3932184, 4194328, 4718616, 5242904, 5767192, 6291480,
|
||||
6815768, 7340056, 7864344, 8388632, 9437208, 10485784, 11534360, 12582936, 13631512, 14680088,
|
||||
15728664, 16777240, 18874392, 20971544, 23068696, 25165848, 27263000, 29360152, 31457304,
|
||||
33554456, 37748760, 41943064, 46137368, 50331672, 54525976, 58720280, 62914584, 67108888,
|
||||
75497496, 83886104, 92274712, 100663320, 109051928, 117440536, 125829144, 134217752, 150994968,
|
||||
167772184, 184549400, 201326616, 218103832, 234881048, 251658264, 268435480, 301989912,
|
||||
335544344, 369098776, 402653208, 436207640, 469762072, 503316504, 536870936, 603979800,
|
||||
671088664, 738197528, 805306392, 872415256, 939524120, 1006632984, 1073741848, 1207959576,
|
||||
1342177304, 1476395032, 1610612760, 1744830488, 1879048216, 2013265944,
|
||||
];
|
||||
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::{fieldnorm_to_id, id_to_fieldnorm, FIELD_NORMS_TABLE};
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_decode_code() {
|
||||
assert_eq!(fieldnorm_to_id(0), 0);
|
||||
@@ -103,4 +90,4 @@ mod tests {
|
||||
assert_eq!(FIELD_NORMS_TABLE[i], decode_fieldnorm_byte(i as u8));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,13 +17,12 @@
|
||||
//!
|
||||
//! This trick is used by the [BM25 similarity]().
|
||||
mod code;
|
||||
mod reader;
|
||||
mod serializer;
|
||||
mod writer;
|
||||
mod reader;
|
||||
|
||||
pub use self::reader::FieldNormReader;
|
||||
pub use self::writer::FieldNormsWriter;
|
||||
pub use self::serializer::FieldNormsSerializer;
|
||||
pub use self::writer::FieldNormsWriter;
|
||||
|
||||
use self::code::{fieldnorm_to_id, id_to_fieldnorm};
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use super::{id_to_fieldnorm, fieldnorm_to_id};
|
||||
use super::{fieldnorm_to_id, id_to_fieldnorm};
|
||||
use directory::ReadOnlySource;
|
||||
use DocId;
|
||||
|
||||
|
||||
/// Reads the fieldnorm associated to a document.
|
||||
/// The fieldnorm represents the length associated to
|
||||
/// a given Field of a given document.
|
||||
@@ -21,16 +20,13 @@ use DocId;
|
||||
/// precompute computationally expensive functions of the fieldnorm
|
||||
/// in a very short array.
|
||||
pub struct FieldNormReader {
|
||||
data: ReadOnlySource
|
||||
data: ReadOnlySource,
|
||||
}
|
||||
|
||||
impl FieldNormReader {
|
||||
|
||||
/// Opens a field norm reader given its data source.
|
||||
pub fn open(data: ReadOnlySource) -> Self {
|
||||
FieldNormReader {
|
||||
data
|
||||
}
|
||||
FieldNormReader { data }
|
||||
}
|
||||
|
||||
/// Returns the `fieldnorm` associated to a doc id.
|
||||
@@ -71,12 +67,13 @@ impl FieldNormReader {
|
||||
#[cfg(test)]
|
||||
impl From<Vec<u32>> for FieldNormReader {
|
||||
fn from(field_norms: Vec<u32>) -> FieldNormReader {
|
||||
let field_norms_id = field_norms.into_iter()
|
||||
let field_norms_id = field_norms
|
||||
.into_iter()
|
||||
.map(FieldNormReader::fieldnorm_to_id)
|
||||
.collect::<Vec<u8>>();
|
||||
let field_norms_data = ReadOnlySource::from(field_norms_id);
|
||||
FieldNormReader {
|
||||
data: field_norms_data
|
||||
data: field_norms_data,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,26 +1,21 @@
|
||||
use directory::WritePtr;
|
||||
use std::io;
|
||||
use common::CompositeWrite;
|
||||
use directory::WritePtr;
|
||||
use schema::Field;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
|
||||
|
||||
pub struct FieldNormsSerializer {
|
||||
composite_write: CompositeWrite,
|
||||
}
|
||||
|
||||
impl FieldNormsSerializer {
|
||||
|
||||
/// Constructor
|
||||
pub fn from_write(write: WritePtr) -> io::Result<FieldNormsSerializer> {
|
||||
// just making room for the pointer to header.
|
||||
let composite_write = CompositeWrite::wrap(write);
|
||||
Ok(FieldNormsSerializer {
|
||||
composite_write
|
||||
})
|
||||
Ok(FieldNormsSerializer { composite_write })
|
||||
}
|
||||
|
||||
|
||||
pub fn serialize_field(&mut self, field: Field, fieldnorms_data: &[u8]) -> io::Result<()> {
|
||||
let write = self.composite_write.for_field(field);
|
||||
write.write_all(fieldnorms_data)?;
|
||||
@@ -32,6 +27,4 @@ impl FieldNormsSerializer {
|
||||
self.composite_write.close()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,26 +1,23 @@
|
||||
use DocId;
|
||||
|
||||
use schema::Field;
|
||||
use super::FieldNormsSerializer;
|
||||
use std::io;
|
||||
use schema::Schema;
|
||||
use super::fieldnorm_to_id;
|
||||
use super::FieldNormsSerializer;
|
||||
use schema::Field;
|
||||
use schema::Schema;
|
||||
use std::io;
|
||||
|
||||
pub struct FieldNormsWriter {
|
||||
fields: Vec<Field>,
|
||||
fieldnorms_buffer: Vec<Vec<u8>>
|
||||
fieldnorms_buffer: Vec<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl FieldNormsWriter {
|
||||
|
||||
pub fn fields_with_fieldnorm(schema: &Schema) -> Vec<Field> {
|
||||
schema
|
||||
.fields()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|&(_, field_entry)| {
|
||||
field_entry.is_indexed()
|
||||
})
|
||||
.filter(|&(_, field_entry)| field_entry.is_indexed())
|
||||
.map(|(field, _)| Field(field as u32))
|
||||
.collect::<Vec<Field>>()
|
||||
}
|
||||
@@ -35,9 +32,7 @@ impl FieldNormsWriter {
|
||||
.unwrap_or(0);
|
||||
FieldNormsWriter {
|
||||
fields,
|
||||
fieldnorms_buffer: (0..max_field)
|
||||
.map(|_| Vec::new())
|
||||
.collect::<Vec<_>>()
|
||||
fieldnorms_buffer: (0..max_field).map(|_| Vec::new()).collect::<Vec<_>>(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,7 +44,10 @@ impl FieldNormsWriter {
|
||||
|
||||
pub fn record(&mut self, doc: DocId, field: Field, fieldnorm: u32) {
|
||||
let fieldnorm_buffer: &mut Vec<u8> = &mut self.fieldnorms_buffer[field.0 as usize];
|
||||
assert!(fieldnorm_buffer.len() <= doc as usize, "Cannot register a given fieldnorm twice");
|
||||
assert!(
|
||||
fieldnorm_buffer.len() <= doc as usize,
|
||||
"Cannot register a given fieldnorm twice"
|
||||
);
|
||||
// we fill intermediary `DocId` as having a fieldnorm of 0.
|
||||
fieldnorm_buffer.resize(doc as usize + 1, 0u8);
|
||||
fieldnorm_buffer[doc as usize] = fieldnorm_to_id(fieldnorm);
|
||||
@@ -62,4 +60,4 @@ impl FieldNormsWriter {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use std::collections::HashSet;
|
||||
use rand::thread_rng;
|
||||
use std::collections::HashSet;
|
||||
|
||||
use rand::distributions::{IndependentSample, Range};
|
||||
use schema::*;
|
||||
use Index;
|
||||
use Searcher;
|
||||
use rand::distributions::{IndependentSample, Range};
|
||||
|
||||
fn check_index_content(searcher: &Searcher, vals: &HashSet<u64>) {
|
||||
assert!(searcher.segment_readers().len() < 20);
|
||||
@@ -13,7 +13,7 @@ fn check_index_content(searcher: &Searcher, vals: &HashSet<u64>) {
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
fn test_indexing() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use super::operation::DeleteOperation;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::mem;
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
// The DeleteQueue is similar in conceptually to a multiple
|
||||
// consumer single producer broadcast channel.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use Directory;
|
||||
use directory::error::OpenWriteError;
|
||||
use core::LOCKFILE_FILEPATH;
|
||||
use directory::error::OpenWriteError;
|
||||
use Directory;
|
||||
|
||||
/// The directory lock is a mechanism used to
|
||||
/// prevent the creation of two [`IndexWriter`](struct.IndexWriter.html)
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
use super::operation::AddOperation;
|
||||
use super::segment_updater::SegmentUpdater;
|
||||
use super::PreparedCommit;
|
||||
use bit_set::BitSet;
|
||||
use chan;
|
||||
use core::Index;
|
||||
@@ -6,31 +9,28 @@ use core::SegmentComponent;
|
||||
use core::SegmentId;
|
||||
use core::SegmentMeta;
|
||||
use core::SegmentReader;
|
||||
use indexer::stamper::Stamper;
|
||||
use futures::sync::oneshot::Receiver;
|
||||
use datastruct::stacker::hashmap::split_memory;
|
||||
use datastruct::stacker::Heap;
|
||||
use directory::FileProtection;
|
||||
use docset::DocSet;
|
||||
use error::{Error, ErrorKind, Result, ResultExt};
|
||||
use fastfield::write_delete_bitset;
|
||||
use futures::sync::oneshot::Receiver;
|
||||
use indexer::delete_queue::{DeleteCursor, DeleteQueue};
|
||||
use datastruct::stacker::hashmap::split_memory;
|
||||
use indexer::doc_opstamp_mapping::DocToOpstampMapping;
|
||||
use indexer::MergePolicy;
|
||||
use indexer::operation::DeleteOperation;
|
||||
use indexer::stamper::Stamper;
|
||||
use indexer::DirectoryLock;
|
||||
use indexer::MergePolicy;
|
||||
use indexer::SegmentEntry;
|
||||
use indexer::SegmentWriter;
|
||||
use docset::DocSet;
|
||||
use schema::IndexRecordOption;
|
||||
use schema::Document;
|
||||
use schema::IndexRecordOption;
|
||||
use schema::Term;
|
||||
use std::mem;
|
||||
use std::mem::swap;
|
||||
use std::thread::JoinHandle;
|
||||
use indexer::DirectoryLock;
|
||||
use super::operation::AddOperation;
|
||||
use super::segment_updater::SegmentUpdater;
|
||||
use super::PreparedCommit;
|
||||
use std::thread;
|
||||
use std::thread::JoinHandle;
|
||||
|
||||
// Size of the margin for the heap. A segment is closed when the remaining memory
|
||||
// in the heap goes below MARGIN_IN_BYTES.
|
||||
@@ -443,10 +443,7 @@ impl IndexWriter {
|
||||
}
|
||||
|
||||
/// Merges a given list of segments
|
||||
pub fn merge(
|
||||
&mut self,
|
||||
segment_ids: &[SegmentId],
|
||||
) -> Receiver<SegmentMeta> {
|
||||
pub fn merge(&mut self, segment_ids: &[SegmentId]) -> Receiver<SegmentMeta> {
|
||||
self.segment_updater.start_merge(segment_ids)
|
||||
}
|
||||
|
||||
@@ -642,12 +639,12 @@ impl IndexWriter {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use env_logger;
|
||||
use error::*;
|
||||
use indexer::NoMergePolicy;
|
||||
use schema::{self, Document};
|
||||
use Index;
|
||||
use Term;
|
||||
use error::*;
|
||||
use env_logger;
|
||||
|
||||
#[test]
|
||||
fn test_lockfile_stops_duplicates() {
|
||||
|
||||
@@ -99,8 +99,8 @@ impl Default for LogMergePolicy {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use indexer::merge_policy::MergePolicy;
|
||||
use core::{SegmentId, SegmentMeta};
|
||||
use indexer::merge_policy::MergePolicy;
|
||||
|
||||
fn test_merge_policy() -> LogMergePolicy {
|
||||
let mut log_merge_policy = LogMergePolicy::default();
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use core::SegmentId;
|
||||
use core::SegmentMeta;
|
||||
use std::marker;
|
||||
use std::fmt::Debug;
|
||||
use std::marker;
|
||||
|
||||
/// Set of segment suggested for a merge.
|
||||
#[derive(Debug, Clone)]
|
||||
|
||||
@@ -1,24 +1,23 @@
|
||||
use error::{ErrorKind, Result};
|
||||
use core::SegmentReader;
|
||||
use core::Segment;
|
||||
use DocId;
|
||||
use core::SegmentReader;
|
||||
use core::SerializableSegment;
|
||||
use indexer::SegmentSerializer;
|
||||
use postings::InvertedIndexSerializer;
|
||||
use itertools::Itertools;
|
||||
use docset::DocSet;
|
||||
use error::{ErrorKind, Result};
|
||||
use fastfield::DeleteBitSet;
|
||||
use schema::{Field, Schema};
|
||||
use termdict::TermMerger;
|
||||
use fastfield::FastFieldSerializer;
|
||||
use fastfield::FastFieldReader;
|
||||
use store::StoreWriter;
|
||||
use std::cmp::{max, min};
|
||||
use fastfield::FastFieldSerializer;
|
||||
use fieldnorm::FieldNormReader;
|
||||
use fieldnorm::FieldNormsSerializer;
|
||||
use fieldnorm::FieldNormsWriter;
|
||||
use fieldnorm::FieldNormReader;
|
||||
use indexer::SegmentSerializer;
|
||||
use itertools::Itertools;
|
||||
use postings::InvertedIndexSerializer;
|
||||
use postings::Postings;
|
||||
|
||||
use schema::{Field, Schema};
|
||||
use std::cmp::{max, min};
|
||||
use store::StoreWriter;
|
||||
use termdict::TermMerger;
|
||||
use DocId;
|
||||
|
||||
fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 {
|
||||
let mut total_tokens = 0u64;
|
||||
@@ -38,15 +37,17 @@ fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 {
|
||||
total_tokens += reader.inverted_index(field).total_num_tokens();
|
||||
}
|
||||
}
|
||||
total_tokens + count
|
||||
.iter()
|
||||
.cloned()
|
||||
.enumerate()
|
||||
.map(|(fieldnorm_ord, count)| count as u64 * FieldNormReader::id_to_fieldnorm(fieldnorm_ord as u8) as u64)
|
||||
.sum::<u64>()
|
||||
total_tokens
|
||||
+ count
|
||||
.iter()
|
||||
.cloned()
|
||||
.enumerate()
|
||||
.map(|(fieldnorm_ord, count)| {
|
||||
count as u64 * FieldNormReader::id_to_fieldnorm(fieldnorm_ord as u8) as u64
|
||||
})
|
||||
.sum::<u64>()
|
||||
}
|
||||
|
||||
|
||||
pub struct IndexMerger {
|
||||
schema: Schema,
|
||||
readers: Vec<SegmentReader>,
|
||||
@@ -70,7 +71,6 @@ fn compute_min_max_val(
|
||||
.map(|doc_id| u64_reader.get(doc_id))
|
||||
.minmax()
|
||||
.into_option()
|
||||
|
||||
}
|
||||
None => {
|
||||
// no deleted documents,
|
||||
@@ -162,7 +162,7 @@ impl IndexMerger {
|
||||
if let Some((seg_min_val, seg_max_val)) = compute_min_max_val(
|
||||
&u64_reader,
|
||||
reader.max_doc(),
|
||||
reader.delete_bitset()
|
||||
reader.delete_bitset(),
|
||||
) {
|
||||
// the segment has some non-deleted documents
|
||||
min_val = min(min_val, seg_min_val);
|
||||
@@ -176,8 +176,10 @@ impl IndexMerger {
|
||||
}
|
||||
Err(_) => {
|
||||
let fieldname = self.schema.get_field_name(field);
|
||||
let error_msg =
|
||||
format!("Failed to find a fast field reader for field {:?}", fieldname);
|
||||
let error_msg = format!(
|
||||
"Failed to find a fast field reader for field {:?}",
|
||||
fieldname
|
||||
);
|
||||
bail!(ErrorKind::SchemaError(error_msg));
|
||||
}
|
||||
}
|
||||
@@ -211,7 +213,6 @@ impl IndexMerger {
|
||||
}
|
||||
|
||||
fn write_postings(&self, serializer: &mut InvertedIndexSerializer) -> Result<()> {
|
||||
|
||||
let mut positions_buffer: Vec<u32> = Vec::with_capacity(1_000);
|
||||
let mut delta_computer = DeltaComputer::new();
|
||||
|
||||
@@ -318,7 +319,7 @@ impl IndexMerger {
|
||||
for (segment_ord, mut segment_postings) in segment_postings {
|
||||
let old_to_new_doc_id = &merged_doc_id_map[segment_ord];
|
||||
loop {
|
||||
let doc = segment_postings.doc();
|
||||
let doc = segment_postings.doc();
|
||||
|
||||
// `.advance()` has been called once before the loop.
|
||||
//
|
||||
@@ -335,7 +336,8 @@ impl IndexMerger {
|
||||
let term_freq = segment_postings.term_freq();
|
||||
segment_postings.positions(&mut positions_buffer);
|
||||
|
||||
let delta_positions = delta_computer.compute_delta(&positions_buffer);
|
||||
let delta_positions =
|
||||
delta_computer.compute_delta(&positions_buffer);
|
||||
field_serializer.write_doc(
|
||||
remapped_doc_id,
|
||||
term_freq,
|
||||
@@ -389,21 +391,21 @@ impl SerializableSegment for IndexMerger {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use schema;
|
||||
use schema::Document;
|
||||
use schema::Term;
|
||||
use schema::TextFieldIndexing;
|
||||
use query::TermQuery;
|
||||
use schema::Field;
|
||||
use core::Index;
|
||||
use Searcher;
|
||||
use DocAddress;
|
||||
use collector::tests::FastFieldTestCollector;
|
||||
use collector::tests::TestCollector;
|
||||
use query::BooleanQuery;
|
||||
use schema::IndexRecordOption;
|
||||
use schema::Cardinality;
|
||||
use core::Index;
|
||||
use futures::Future;
|
||||
use query::BooleanQuery;
|
||||
use query::TermQuery;
|
||||
use schema;
|
||||
use schema::Cardinality;
|
||||
use schema::Document;
|
||||
use schema::Field;
|
||||
use schema::IndexRecordOption;
|
||||
use schema::Term;
|
||||
use schema::TextFieldIndexing;
|
||||
use DocAddress;
|
||||
use Searcher;
|
||||
|
||||
#[test]
|
||||
fn test_index_merger_no_deletes() {
|
||||
|
||||
@@ -1,29 +1,29 @@
|
||||
pub mod index_writer;
|
||||
pub mod segment_serializer;
|
||||
pub mod merger;
|
||||
pub mod merge_policy;
|
||||
mod log_merge_policy;
|
||||
mod segment_register;
|
||||
mod segment_writer;
|
||||
mod segment_manager;
|
||||
pub mod delete_queue;
|
||||
pub mod segment_updater;
|
||||
mod directory_lock;
|
||||
mod segment_entry;
|
||||
mod doc_opstamp_mapping;
|
||||
pub mod index_writer;
|
||||
mod log_merge_policy;
|
||||
pub mod merge_policy;
|
||||
pub mod merger;
|
||||
pub mod operation;
|
||||
mod stamper;
|
||||
mod prepared_commit;
|
||||
mod segment_entry;
|
||||
mod segment_manager;
|
||||
mod segment_register;
|
||||
pub mod segment_serializer;
|
||||
pub mod segment_updater;
|
||||
mod segment_writer;
|
||||
mod stamper;
|
||||
|
||||
pub use self::prepared_commit::PreparedCommit;
|
||||
pub use self::segment_entry::{SegmentEntry, SegmentState};
|
||||
pub use self::segment_serializer::SegmentSerializer;
|
||||
pub use self::segment_writer::SegmentWriter;
|
||||
pub(crate) use self::directory_lock::DirectoryLock;
|
||||
pub use self::index_writer::IndexWriter;
|
||||
pub use self::log_merge_policy::LogMergePolicy;
|
||||
pub use self::merge_policy::{MergeCandidate, MergePolicy, NoMergePolicy};
|
||||
pub use self::prepared_commit::PreparedCommit;
|
||||
pub use self::segment_entry::{SegmentEntry, SegmentState};
|
||||
pub use self::segment_manager::SegmentManager;
|
||||
pub(crate) use self::directory_lock::DirectoryLock;
|
||||
pub use self::segment_serializer::SegmentSerializer;
|
||||
pub use self::segment_writer::SegmentWriter;
|
||||
|
||||
/// Alias for the default merge policy, which is the `LogMergePolicy`.
|
||||
pub type DefaultMergePolicy = LogMergePolicy;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use Result;
|
||||
use super::IndexWriter;
|
||||
use Result;
|
||||
|
||||
/// A prepared commit
|
||||
pub struct PreparedCommit<'a> {
|
||||
@@ -13,7 +13,7 @@ impl<'a> PreparedCommit<'a> {
|
||||
PreparedCommit {
|
||||
index_writer,
|
||||
payload: None,
|
||||
opstamp
|
||||
opstamp,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use core::SegmentMeta;
|
||||
use bit_set::BitSet;
|
||||
use indexer::delete_queue::DeleteCursor;
|
||||
use core::SegmentId;
|
||||
use core::SegmentMeta;
|
||||
use indexer::delete_queue::DeleteCursor;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
use super::segment_register::SegmentRegister;
|
||||
use std::sync::RwLock;
|
||||
use core::SegmentId;
|
||||
use core::SegmentMeta;
|
||||
use core::{LOCKFILE_FILEPATH, META_FILEPATH};
|
||||
use core::SegmentId;
|
||||
use indexer::SegmentEntry;
|
||||
use std::path::PathBuf;
|
||||
use std::collections::hash_set::HashSet;
|
||||
use std::sync::{RwLockReadGuard, RwLockWriteGuard};
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use indexer::delete_queue::DeleteCursor;
|
||||
use indexer::SegmentEntry;
|
||||
use std::collections::hash_set::HashSet;
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::RwLock;
|
||||
use std::sync::{RwLockReadGuard, RwLockWriteGuard};
|
||||
|
||||
#[derive(Default)]
|
||||
struct SegmentRegisters {
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use core::SegmentId;
|
||||
use std::collections::HashMap;
|
||||
use core::SegmentMeta;
|
||||
use indexer::delete_queue::DeleteCursor;
|
||||
use indexer::segment_entry::SegmentEntry;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use indexer::segment_entry::SegmentEntry;
|
||||
use indexer::delete_queue::DeleteCursor;
|
||||
|
||||
/// The segment register keeps track
|
||||
/// of the list of segment, their size as well
|
||||
@@ -113,11 +113,11 @@ impl SegmentRegister {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use indexer::SegmentState;
|
||||
use super::*;
|
||||
use core::SegmentId;
|
||||
use core::SegmentMeta;
|
||||
use indexer::delete_queue::*;
|
||||
use super::*;
|
||||
use indexer::SegmentState;
|
||||
|
||||
fn segment_ids(segment_register: &SegmentRegister) -> Vec<SegmentId> {
|
||||
segment_register
|
||||
|
||||
@@ -3,9 +3,9 @@ use Result;
|
||||
use core::Segment;
|
||||
use core::SegmentComponent;
|
||||
use fastfield::FastFieldSerializer;
|
||||
use store::StoreWriter;
|
||||
use fieldnorm::FieldNormsSerializer;
|
||||
use postings::InvertedIndexSerializer;
|
||||
use store::StoreWriter;
|
||||
|
||||
/// Segment serializer is in charge of laying out on disk
|
||||
/// the data accumulated and sorted by the `SegmentWriter`.
|
||||
@@ -47,7 +47,7 @@ impl SegmentSerializer {
|
||||
}
|
||||
|
||||
/// Accessor to the field norm serializer.
|
||||
pub fn get_fieldnorms_serializer(&mut self) -> &mut FieldNormsSerializer {
|
||||
pub fn get_fieldnorms_serializer(&mut self) -> &mut FieldNormsSerializer {
|
||||
&mut self.fieldnorms_serializer
|
||||
}
|
||||
|
||||
|
||||
@@ -1,40 +1,40 @@
|
||||
use super::segment_manager::{get_mergeable_segments, SegmentManager};
|
||||
use core::Index;
|
||||
use core::IndexMeta;
|
||||
use core::META_FILEPATH;
|
||||
use core::Segment;
|
||||
use core::SegmentId;
|
||||
use core::SegmentMeta;
|
||||
use core::SerializableSegment;
|
||||
use core::META_FILEPATH;
|
||||
use directory::Directory;
|
||||
use indexer::stamper::Stamper;
|
||||
use directory::FileProtection;
|
||||
use error::{Error, ErrorKind, Result};
|
||||
use futures_cpupool::CpuPool;
|
||||
use futures::Future;
|
||||
use futures::oneshot;
|
||||
use futures::sync::oneshot::Receiver;
|
||||
use directory::FileProtection;
|
||||
use indexer::{DefaultMergePolicy, MergePolicy};
|
||||
use futures::Future;
|
||||
use futures_cpupool::CpuFuture;
|
||||
use futures_cpupool::CpuPool;
|
||||
use indexer::delete_queue::DeleteCursor;
|
||||
use indexer::index_writer::advance_deletes;
|
||||
use indexer::MergeCandidate;
|
||||
use indexer::merger::IndexMerger;
|
||||
use indexer::stamper::Stamper;
|
||||
use indexer::MergeCandidate;
|
||||
use indexer::SegmentEntry;
|
||||
use indexer::SegmentSerializer;
|
||||
use futures_cpupool::CpuFuture;
|
||||
use serde_json;
|
||||
use indexer::delete_queue::DeleteCursor;
|
||||
use indexer::{DefaultMergePolicy, MergePolicy};
|
||||
use schema::Schema;
|
||||
use serde_json;
|
||||
use std::borrow::BorrowMut;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use std::mem;
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize};
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize};
|
||||
use std::sync::Arc;
|
||||
use std::sync::RwLock;
|
||||
use std::thread;
|
||||
use std::thread::JoinHandle;
|
||||
use super::segment_manager::{get_mergeable_segments, SegmentManager};
|
||||
|
||||
/// Save the index meta file.
|
||||
/// This operation is atomic :
|
||||
@@ -283,10 +283,7 @@ impl SegmentUpdater {
|
||||
}).wait()
|
||||
}
|
||||
|
||||
pub fn start_merge(
|
||||
&self,
|
||||
segment_ids: &[SegmentId],
|
||||
) -> Receiver<SegmentMeta> {
|
||||
pub fn start_merge(&self, segment_ids: &[SegmentId]) -> Receiver<SegmentMeta> {
|
||||
self.0.segment_manager.start_merge(segment_ids);
|
||||
let segment_updater_clone = self.clone();
|
||||
|
||||
@@ -482,9 +479,9 @@ impl SegmentUpdater {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use Index;
|
||||
use schema::*;
|
||||
use indexer::merge_policy::tests::MergeWheneverPossible;
|
||||
use schema::*;
|
||||
use Index;
|
||||
|
||||
#[test]
|
||||
fn test_delete_during_merge() {
|
||||
|
||||
@@ -1,23 +1,23 @@
|
||||
use Result;
|
||||
use DocId;
|
||||
use std::io;
|
||||
use std::str;
|
||||
use schema::Schema;
|
||||
use schema::Term;
|
||||
use super::operation::AddOperation;
|
||||
use core::Segment;
|
||||
use core::SerializableSegment;
|
||||
use fastfield::FastFieldsWriter;
|
||||
use schema::FieldType;
|
||||
use indexer::segment_serializer::SegmentSerializer;
|
||||
use datastruct::stacker::Heap;
|
||||
use fastfield::FastFieldsWriter;
|
||||
use fieldnorm::FieldNormsWriter;
|
||||
use indexer::index_writer::MARGIN_IN_BYTES;
|
||||
use super::operation::AddOperation;
|
||||
use indexer::segment_serializer::SegmentSerializer;
|
||||
use postings::MultiFieldPostingsWriter;
|
||||
use schema::FieldType;
|
||||
use schema::Schema;
|
||||
use schema::Term;
|
||||
use schema::Value;
|
||||
use std::io;
|
||||
use std::str;
|
||||
use tokenizer::BoxedTokenizer;
|
||||
use tokenizer::FacetTokenizer;
|
||||
use tokenizer::{TokenStream, Tokenizer};
|
||||
use schema::Value;
|
||||
use fieldnorm::FieldNormsWriter;
|
||||
use DocId;
|
||||
use Result;
|
||||
|
||||
/// A `SegmentWriter` is in charge of creating segment index from a
|
||||
/// documents.
|
||||
@@ -35,7 +35,6 @@ pub struct SegmentWriter<'a> {
|
||||
tokenizers: Vec<Option<Box<BoxedTokenizer>>>,
|
||||
}
|
||||
|
||||
|
||||
impl<'a> SegmentWriter<'a> {
|
||||
/// Creates a new `SegmentWriter`
|
||||
///
|
||||
@@ -179,8 +178,7 @@ impl<'a> SegmentWriter<'a> {
|
||||
} else {
|
||||
0
|
||||
};
|
||||
self.fieldnorms_writer
|
||||
.record(doc_id, field, num_tokens);
|
||||
self.fieldnorms_writer.record(doc_id, field, num_tokens);
|
||||
}
|
||||
FieldType::U64(ref int_option) => {
|
||||
if int_option.is_indexed() {
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
// For the moment let's just use AtomicUsize on
|
||||
// x86/64 bit platform, and a mutex on other platform.
|
||||
|
||||
#[cfg(target="x86_64")]
|
||||
#[cfg(target = "x86_64")]
|
||||
mod archicture_impl {
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Stamper(Arc<AtomicU64>);
|
||||
@@ -22,8 +22,7 @@ mod archicture_impl {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(not(target="x86_64"))]
|
||||
#[cfg(not(target = "x86_64"))]
|
||||
mod archicture_impl {
|
||||
|
||||
use std::sync::{Arc, Mutex};
|
||||
@@ -47,7 +46,6 @@ mod archicture_impl {
|
||||
|
||||
pub use self::archicture_impl::Stamper;
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
@@ -65,4 +63,4 @@ mod test {
|
||||
assert_eq!(stamper.stamp(), 10u64);
|
||||
assert_eq!(stamper_clone.stamp(), 11u64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
67
src/lib.rs
67
src/lib.rs
@@ -1,8 +1,7 @@
|
||||
#![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")]
|
||||
#![cfg_attr(feature = "cargo-clippy", allow(module_inception))]
|
||||
#![cfg_attr(feature = "cargo-clippy", allow(inline_always))]
|
||||
|
||||
#![cfg_attr(all(feature="unstable", test), feature(test))]
|
||||
#![cfg_attr(all(feature = "unstable", test), feature(test))]
|
||||
#![doc(test(attr(allow(unused_variables), deny(warnings))))]
|
||||
#![allow(unknown_lints)]
|
||||
#![allow(new_without_default)]
|
||||
@@ -123,9 +122,10 @@ extern crate log;
|
||||
#[macro_use]
|
||||
extern crate error_chain;
|
||||
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
extern crate atomicwrites;
|
||||
extern crate bit_set;
|
||||
extern crate bitpacking;
|
||||
extern crate byteorder;
|
||||
extern crate chan;
|
||||
extern crate combine;
|
||||
@@ -145,7 +145,6 @@ extern crate stable_deref_trait;
|
||||
extern crate tempdir;
|
||||
extern crate tempfile;
|
||||
extern crate uuid;
|
||||
extern crate bitpacking;
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
@@ -160,7 +159,7 @@ extern crate winapi;
|
||||
#[cfg(test)]
|
||||
extern crate rand;
|
||||
|
||||
#[cfg(all(test, feature="unstable"))]
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
extern crate test;
|
||||
|
||||
extern crate tinysegmenter;
|
||||
@@ -179,36 +178,36 @@ pub use error::{Error, ErrorKind, ResultExt};
|
||||
/// Tantivy result.
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
mod core;
|
||||
mod compression;
|
||||
mod indexer;
|
||||
mod common;
|
||||
mod compression;
|
||||
mod core;
|
||||
mod indexer;
|
||||
|
||||
mod datastruct;
|
||||
#[allow(unused_doc_comment)]
|
||||
mod error;
|
||||
pub mod tokenizer;
|
||||
mod datastruct;
|
||||
|
||||
pub mod termdict;
|
||||
pub mod store;
|
||||
pub mod query;
|
||||
pub mod directory;
|
||||
pub mod collector;
|
||||
pub mod postings;
|
||||
pub mod schema;
|
||||
pub mod directory;
|
||||
pub mod fastfield;
|
||||
pub(crate) mod fieldnorm;
|
||||
pub mod postings;
|
||||
pub mod query;
|
||||
pub mod schema;
|
||||
pub mod store;
|
||||
pub mod termdict;
|
||||
|
||||
mod docset;
|
||||
pub use self::docset::{DocSet, SkipResult};
|
||||
|
||||
pub use directory::Directory;
|
||||
pub use core::{Index, Searcher, Segment, SegmentId, SegmentMeta};
|
||||
pub use indexer::IndexWriter;
|
||||
pub use schema::{Document, Term};
|
||||
pub use core::{InvertedIndexReader, SegmentReader};
|
||||
pub use postings::Postings;
|
||||
pub use core::SegmentComponent;
|
||||
pub use core::{Index, Searcher, Segment, SegmentId, SegmentMeta};
|
||||
pub use core::{InvertedIndexReader, SegmentReader};
|
||||
pub use directory::Directory;
|
||||
pub use indexer::IndexWriter;
|
||||
pub use postings::Postings;
|
||||
pub use schema::{Document, Term};
|
||||
|
||||
pub use common::{i64_to_u64, u64_to_i64};
|
||||
|
||||
@@ -224,10 +223,10 @@ pub fn version() -> &'static str {
|
||||
|
||||
/// Defines tantivy's merging strategy
|
||||
pub mod merge_policy {
|
||||
pub use indexer::MergePolicy;
|
||||
pub use indexer::LogMergePolicy;
|
||||
pub use indexer::NoMergePolicy;
|
||||
pub use indexer::DefaultMergePolicy;
|
||||
pub use indexer::LogMergePolicy;
|
||||
pub use indexer::MergePolicy;
|
||||
pub use indexer::NoMergePolicy;
|
||||
}
|
||||
|
||||
/// A `u32` identifying a document within a segment.
|
||||
@@ -276,18 +275,23 @@ pub struct DocAddress(pub SegmentLocalId, pub DocId);
|
||||
mod tests {
|
||||
|
||||
use collector::tests::TestCollector;
|
||||
use Index;
|
||||
use core::SegmentReader;
|
||||
use query::BooleanQuery;
|
||||
use schema::*;
|
||||
use docset::DocSet;
|
||||
use query::BooleanQuery;
|
||||
use rand::distributions::{IndependentSample, Range};
|
||||
use rand::{Rng, SeedableRng, XorShiftRng};
|
||||
use schema::*;
|
||||
use Index;
|
||||
use IndexWriter;
|
||||
use Postings;
|
||||
use rand::{Rng, SeedableRng, XorShiftRng};
|
||||
use rand::distributions::{IndependentSample, Range};
|
||||
|
||||
pub fn assert_nearly_equals(expected: f32, val: f32) {
|
||||
assert!(nearly_equals(val, expected), "Got {}, expected {}.", val, expected);
|
||||
assert!(
|
||||
nearly_equals(val, expected),
|
||||
"Got {}, expected {}.",
|
||||
val,
|
||||
expected
|
||||
);
|
||||
}
|
||||
|
||||
pub fn nearly_equals(a: f32, b: f32) -> bool {
|
||||
@@ -314,7 +318,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature="mmap")]
|
||||
#[cfg(feature = "mmap")]
|
||||
fn test_indexing() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||
@@ -440,7 +444,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn advance_undeleted(docset: &mut DocSet, reader: &SegmentReader) -> bool {
|
||||
while docset.advance() {
|
||||
if !reader.is_deleted(docset.doc()) {
|
||||
|
||||
@@ -6,20 +6,19 @@ Postings module (also called inverted index)
|
||||
///
|
||||
/// Postings, also called inverted lists, is the key datastructure
|
||||
/// to full-text search.
|
||||
|
||||
mod postings;
|
||||
mod recorder;
|
||||
mod serializer;
|
||||
mod postings_writer;
|
||||
mod term_info;
|
||||
mod recorder;
|
||||
mod segment_postings;
|
||||
mod serializer;
|
||||
mod term_info;
|
||||
|
||||
pub(crate) use self::postings_writer::MultiFieldPostingsWriter;
|
||||
use self::recorder::{NothingRecorder, Recorder, TFAndPositionRecorder, TermFrequencyRecorder};
|
||||
pub use self::serializer::{FieldSerializer, InvertedIndexSerializer};
|
||||
pub(crate) use self::postings_writer::MultiFieldPostingsWriter;
|
||||
|
||||
pub use self::term_info::TermInfo;
|
||||
pub use self::postings::Postings;
|
||||
pub use self::term_info::TermInfo;
|
||||
|
||||
pub use self::segment_postings::{BlockSegmentPostings, SegmentPostings};
|
||||
|
||||
@@ -38,22 +37,22 @@ pub(crate) enum FreqReadingOption {
|
||||
pub mod tests {
|
||||
|
||||
use super::*;
|
||||
use core::Index;
|
||||
use core::SegmentComponent;
|
||||
use core::SegmentReader;
|
||||
use datastruct::stacker::Heap;
|
||||
use docset::{DocSet, SkipResult};
|
||||
use fieldnorm::FieldNormReader;
|
||||
use indexer::operation::AddOperation;
|
||||
use indexer::SegmentWriter;
|
||||
use query::Scorer;
|
||||
use rand::{Rng, SeedableRng, XorShiftRng};
|
||||
use schema::Field;
|
||||
use schema::IndexRecordOption;
|
||||
use schema::{Document, SchemaBuilder, Term, INT_INDEXED, STRING, TEXT};
|
||||
use std::iter;
|
||||
use DocId;
|
||||
use Score;
|
||||
use query::Scorer;
|
||||
use schema::{Document, SchemaBuilder, Term, INT_INDEXED, STRING, TEXT};
|
||||
use core::SegmentComponent;
|
||||
use indexer::SegmentWriter;
|
||||
use core::SegmentReader;
|
||||
use core::Index;
|
||||
use schema::IndexRecordOption;
|
||||
use std::iter;
|
||||
use datastruct::stacker::Heap;
|
||||
use schema::Field;
|
||||
use indexer::operation::AddOperation;
|
||||
use rand::{Rng, SeedableRng, XorShiftRng};
|
||||
use fieldnorm::FieldNormReader;
|
||||
|
||||
#[test]
|
||||
pub fn test_position_write() {
|
||||
@@ -124,7 +123,6 @@ pub mod tests {
|
||||
assert_eq!(&[0, 5], &positions[..]);
|
||||
}
|
||||
{
|
||||
|
||||
let mut postings = inverted_index
|
||||
.read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
|
||||
.unwrap();
|
||||
@@ -203,13 +201,14 @@ pub mod tests {
|
||||
{
|
||||
let segment_reader = SegmentReader::open(&segment).unwrap();
|
||||
{
|
||||
let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field) ;
|
||||
let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field);
|
||||
assert_eq!(fieldnorm_reader.fieldnorm(0), 8 + 5);
|
||||
assert_eq!(fieldnorm_reader.fieldnorm(1), 2);
|
||||
for i in 2..1000 {
|
||||
assert_eq!(
|
||||
fieldnorm_reader.fieldnorm_id(i),
|
||||
FieldNormReader::fieldnorm_to_id(i + 1) );
|
||||
FieldNormReader::fieldnorm_to_id(i + 1)
|
||||
);
|
||||
}
|
||||
}
|
||||
{
|
||||
@@ -446,7 +445,7 @@ pub mod tests {
|
||||
// delete everything else
|
||||
{
|
||||
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
|
||||
index_writer.delete_term(term_1);
|
||||
index_writer.delete_term(term_1);
|
||||
|
||||
assert!(index_writer.commit().is_ok());
|
||||
}
|
||||
@@ -504,7 +503,7 @@ pub mod tests {
|
||||
let posting_list_size = 1_000_000;
|
||||
{
|
||||
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
|
||||
for _ in 0 .. posting_list_size {
|
||||
for _ in 0..posting_list_size {
|
||||
let mut doc = Document::default();
|
||||
if rng.gen_weighted_bool(15) {
|
||||
doc.add_text(text_field, "a");
|
||||
@@ -595,17 +594,16 @@ pub mod tests {
|
||||
|
||||
}
|
||||
|
||||
|
||||
#[cfg(all(test, feature="unstable"))]
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
|
||||
use test::{self, Bencher};
|
||||
use schema::IndexRecordOption;
|
||||
use tests;
|
||||
use super::tests::*;
|
||||
use docset::SkipResult;
|
||||
use DocSet;
|
||||
use query::Intersection;
|
||||
use schema::IndexRecordOption;
|
||||
use test::{self, Bencher};
|
||||
use tests;
|
||||
use DocSet;
|
||||
|
||||
#[bench]
|
||||
fn bench_segment_postings(b: &mut Bencher) {
|
||||
@@ -723,4 +721,4 @@ mod bench {
|
||||
s
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
use DocId;
|
||||
use schema::Term;
|
||||
use postings::{FieldSerializer, InvertedIndexSerializer};
|
||||
use std::io;
|
||||
use std::collections::HashMap;
|
||||
use postings::Recorder;
|
||||
use Result;
|
||||
use schema::{Field, Schema};
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::DerefMut;
|
||||
use datastruct::stacker::{Heap, TermHashMap};
|
||||
use postings::Recorder;
|
||||
use postings::UnorderedTermId;
|
||||
use postings::{FieldSerializer, InvertedIndexSerializer};
|
||||
use postings::{NothingRecorder, TFAndPositionRecorder, TermFrequencyRecorder};
|
||||
use schema::FieldEntry;
|
||||
use schema::FieldType;
|
||||
use schema::IndexRecordOption;
|
||||
use schema::Term;
|
||||
use schema::{Field, Schema};
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::DerefMut;
|
||||
use tokenizer::Token;
|
||||
use tokenizer::TokenStream;
|
||||
use schema::IndexRecordOption;
|
||||
use postings::UnorderedTermId;
|
||||
use DocId;
|
||||
use Result;
|
||||
|
||||
fn posting_from_field_entry<'a>(
|
||||
field_entry: &FieldEntry,
|
||||
@@ -123,7 +123,8 @@ impl<'a> MultiFieldPostingsWriter<'a> {
|
||||
unordered_term_mappings.insert(field, mapping);
|
||||
|
||||
let postings_writer = &self.per_field_postings_writers[field.0 as usize];
|
||||
let mut field_serializer = serializer.new_field(field, postings_writer.total_num_tokens())?;
|
||||
let mut field_serializer =
|
||||
serializer.new_field(field, postings_writer.total_num_tokens())?;
|
||||
postings_writer.serialize(
|
||||
&term_offsets[start..stop],
|
||||
&mut field_serializer,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use DocId;
|
||||
use std::{self, io};
|
||||
use postings::FieldSerializer;
|
||||
use datastruct::stacker::{ExpUnrolledLinkedList, Heap, HeapAllocable};
|
||||
use postings::FieldSerializer;
|
||||
use std::{self, io};
|
||||
use DocId;
|
||||
|
||||
const EMPTY_ARRAY: [u32; 0] = [0u32; 0];
|
||||
const POSITION_END: u32 = std::u32::MAX;
|
||||
|
||||
@@ -2,15 +2,15 @@ use compression::{BlockDecoder, CompressedIntStream, VIntDecoder, COMPRESSION_BL
|
||||
use DocId;
|
||||
|
||||
use common::BitSet;
|
||||
use common::CountingWriter;
|
||||
use common::HasLen;
|
||||
use postings::Postings;
|
||||
use docset::{DocSet, SkipResult};
|
||||
use fst::Streamer;
|
||||
use compression::compressed_block_size;
|
||||
use directory::{ReadOnlySource, SourceRead};
|
||||
use postings::FreqReadingOption;
|
||||
use docset::{DocSet, SkipResult};
|
||||
use fst::Streamer;
|
||||
use postings::serializer::PostingsSerializer;
|
||||
use common::CountingWriter;
|
||||
use postings::FreqReadingOption;
|
||||
use postings::Postings;
|
||||
|
||||
struct PositionComputer {
|
||||
// store the amount of position int
|
||||
@@ -84,9 +84,13 @@ impl SegmentPostings {
|
||||
for &doc in docs {
|
||||
postings_serializer.write_doc(doc, 1u32).unwrap();
|
||||
}
|
||||
postings_serializer.close_term().expect("In memory Serialization should never fail.");
|
||||
postings_serializer
|
||||
.close_term()
|
||||
.expect("In memory Serialization should never fail.");
|
||||
}
|
||||
let (buffer , _) = counting_writer.finish().expect("Serializing in a buffer should never fail.");
|
||||
let (buffer, _) = counting_writer
|
||||
.finish()
|
||||
.expect("Serializing in a buffer should never fail.");
|
||||
let data = ReadOnlySource::from(buffer);
|
||||
let block_segment_postings = BlockSegmentPostings::from_data(
|
||||
docs.len(),
|
||||
@@ -98,7 +102,6 @@ impl SegmentPostings {
|
||||
}
|
||||
|
||||
impl SegmentPostings {
|
||||
|
||||
/// Reads a Segment postings from an &[u8]
|
||||
///
|
||||
/// * `len` - number of document in the posting lists.
|
||||
@@ -125,7 +128,7 @@ fn exponential_search(target: u32, mut start: usize, arr: &[u32]) -> (usize, usi
|
||||
loop {
|
||||
let new = start + jump;
|
||||
if new >= end {
|
||||
return (start, end)
|
||||
return (start, end);
|
||||
}
|
||||
if arr[new] > target {
|
||||
return (start, new);
|
||||
@@ -163,7 +166,8 @@ impl DocSet for SegmentPostings {
|
||||
if self.position_computer.is_some() {
|
||||
let freqs_skipped = &self.block_cursor.freqs()[self.cur..];
|
||||
let sum_freq: u32 = freqs_skipped.iter().sum();
|
||||
self.position_computer.as_mut()
|
||||
self.position_computer
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.add_skip(sum_freq as usize);
|
||||
}
|
||||
@@ -198,7 +202,8 @@ impl DocSet for SegmentPostings {
|
||||
if self.position_computer.is_some() {
|
||||
let freqs_skipped = &self.block_cursor.freqs()[self.cur..start];
|
||||
let sum_freqs: u32 = freqs_skipped.iter().sum();
|
||||
self.position_computer.as_mut()
|
||||
self.position_computer
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.add_skip(sum_freqs as usize);
|
||||
}
|
||||
@@ -211,7 +216,6 @@ impl DocSet for SegmentPostings {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// goes to the next element.
|
||||
// next needs to be called a first time to point to the correct element.
|
||||
#[inline]
|
||||
@@ -262,7 +266,6 @@ impl DocSet for SegmentPostings {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl HasLen for SegmentPostings {
|
||||
fn len(&self) -> usize {
|
||||
self.block_cursor.doc_freq()
|
||||
@@ -284,7 +287,10 @@ impl Postings for SegmentPostings {
|
||||
}
|
||||
unsafe {
|
||||
output.set_len(term_freq);
|
||||
self.position_computer.as_mut().unwrap().positions_with_offset(offset, &mut output[..])
|
||||
self.position_computer
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.positions_with_offset(offset, &mut output[..])
|
||||
}
|
||||
} else {
|
||||
output.clear();
|
||||
@@ -473,16 +479,16 @@ impl<'b> Streamer<'b> for BlockSegmentPostings {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use docset::DocSet;
|
||||
use super::BlockSegmentPostings;
|
||||
use super::SegmentPostings;
|
||||
use schema::SchemaBuilder;
|
||||
use common::HasLen;
|
||||
use core::Index;
|
||||
use schema::INT_INDEXED;
|
||||
use schema::Term;
|
||||
use docset::DocSet;
|
||||
use fst::Streamer;
|
||||
use schema::IndexRecordOption;
|
||||
use common::HasLen;
|
||||
use super::BlockSegmentPostings;
|
||||
use schema::SchemaBuilder;
|
||||
use schema::Term;
|
||||
use schema::INT_INDEXED;
|
||||
|
||||
#[test]
|
||||
fn test_empty_segment_postings() {
|
||||
@@ -570,4 +576,3 @@ mod tests {
|
||||
assert_eq!(block_segments.docs(), &[1, 3, 5]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
use Result;
|
||||
use super::TermInfo;
|
||||
use common::BinarySerializable;
|
||||
use common::CompositeWrite;
|
||||
use common::CountingWriter;
|
||||
use compression::VIntEncoder;
|
||||
use compression::{BlockEncoder, COMPRESSION_BLOCK_SIZE};
|
||||
use core::Segment;
|
||||
use directory::WritePtr;
|
||||
use schema::Field;
|
||||
use schema::FieldEntry;
|
||||
use schema::FieldType;
|
||||
use schema::Schema;
|
||||
use directory::WritePtr;
|
||||
use compression::{BlockEncoder, COMPRESSION_BLOCK_SIZE};
|
||||
use DocId;
|
||||
use core::Segment;
|
||||
use std::io::{self, Write};
|
||||
use compression::VIntEncoder;
|
||||
use common::BinarySerializable;
|
||||
use common::CountingWriter;
|
||||
use common::CompositeWrite;
|
||||
use termdict::TermDictionaryBuilder;
|
||||
use DocId;
|
||||
use Result;
|
||||
|
||||
/// `PostingsSerializer` is in charge of serializing
|
||||
/// postings on disk, in the
|
||||
@@ -84,7 +84,11 @@ impl InvertedIndexSerializer {
|
||||
/// a given field.
|
||||
///
|
||||
/// Loads the indexing options for the given field.
|
||||
pub fn new_field(&mut self, field: Field, total_num_tokens: u64) -> io::Result<FieldSerializer> {
|
||||
pub fn new_field(
|
||||
&mut self,
|
||||
field: Field,
|
||||
total_num_tokens: u64,
|
||||
) -> io::Result<FieldSerializer> {
|
||||
let field_entry: &FieldEntry = self.schema.get_field_entry(field);
|
||||
let term_dictionary_write = self.terms_write.for_field(field);
|
||||
let postings_write = self.postings_write.for_field(field);
|
||||
@@ -124,7 +128,6 @@ impl<'a> FieldSerializer<'a> {
|
||||
postings_write: &'a mut CountingWriter<WritePtr>,
|
||||
positions_write: &'a mut CountingWriter<WritePtr>,
|
||||
) -> io::Result<FieldSerializer<'a>> {
|
||||
|
||||
let (term_freq_enabled, position_enabled): (bool, bool) = match field_type {
|
||||
FieldType::Str(ref text_options) => {
|
||||
if let Some(text_indexing_options) = text_options.get_indexing_options() {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
use query::Query;
|
||||
use query::Weight;
|
||||
use query::Scorer;
|
||||
use core::Searcher;
|
||||
use core::SegmentReader;
|
||||
use docset::DocSet;
|
||||
use query::Query;
|
||||
use query::Scorer;
|
||||
use query::Weight;
|
||||
use DocId;
|
||||
use Result;
|
||||
use Score;
|
||||
use DocId;
|
||||
use core::Searcher;
|
||||
|
||||
/// Query that matches all of the documents.
|
||||
///
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use common::{BitSet, TinySet};
|
||||
use DocId;
|
||||
use docset::{DocSet, SkipResult};
|
||||
use std::cmp::Ordering;
|
||||
use DocId;
|
||||
|
||||
/// A `BitSetDocSet` makes it possible to iterate through a bitset as if it was a `DocSet`.
|
||||
///
|
||||
@@ -120,10 +120,10 @@ impl DocSet for BitSetDocSet {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use DocId;
|
||||
use super::BitSetDocSet;
|
||||
use common::BitSet;
|
||||
use docset::{DocSet, SkipResult};
|
||||
use super::BitSetDocSet;
|
||||
use DocId;
|
||||
|
||||
fn create_docbitset(docs: &[DocId], max_doc: DocId) -> BitSetDocSet {
|
||||
let mut docset = BitSet::with_max_value(max_doc);
|
||||
@@ -219,14 +219,13 @@ mod tests {
|
||||
|
||||
}
|
||||
|
||||
|
||||
#[cfg(all(test, feature="unstable"))]
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
|
||||
use tests;
|
||||
use test;
|
||||
use super::BitSet;
|
||||
use super::BitSetDocSet;
|
||||
use test;
|
||||
use tests;
|
||||
use DocSet;
|
||||
|
||||
#[bench]
|
||||
@@ -264,4 +263,4 @@ mod bench {
|
||||
while docset.advance() {}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use fieldnorm::FieldNormReader;
|
||||
use Term;
|
||||
use Searcher;
|
||||
use Score;
|
||||
use Searcher;
|
||||
use Term;
|
||||
|
||||
const K1: f32 = 1.2;
|
||||
const B: f32 = 0.75;
|
||||
@@ -11,7 +11,6 @@ fn idf(doc_freq: u64, doc_count: u64) -> f32 {
|
||||
(1f32 + x).ln()
|
||||
}
|
||||
|
||||
|
||||
fn cached_tf_component(fieldnorm: u32, average_fieldnorm: f32) -> f32 {
|
||||
K1 * (1f32 - B + B * fieldnorm as f32 / average_fieldnorm)
|
||||
}
|
||||
@@ -32,11 +31,10 @@ pub struct BM25Weight {
|
||||
}
|
||||
|
||||
impl BM25Weight {
|
||||
|
||||
pub fn null() -> BM25Weight {
|
||||
BM25Weight {
|
||||
weight: 0f32,
|
||||
cache: [1f32; 256]
|
||||
cache: [1f32; 256],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,7 +42,11 @@ impl BM25Weight {
|
||||
assert!(!terms.is_empty(), "BM25 requires at least one term");
|
||||
let field = terms[0].field();
|
||||
for term in &terms[1..] {
|
||||
assert_eq!(term.field(), field, "All terms must belong to the same field.");
|
||||
assert_eq!(
|
||||
term.field(),
|
||||
field,
|
||||
"All terms must belong to the same field."
|
||||
);
|
||||
}
|
||||
|
||||
let mut total_num_tokens = 0u64;
|
||||
@@ -56,7 +58,8 @@ impl BM25Weight {
|
||||
}
|
||||
let average_fieldnorm = total_num_tokens as f32 / total_num_docs as f32;
|
||||
|
||||
let idf = terms.iter()
|
||||
let idf = terms
|
||||
.iter()
|
||||
.map(|term| {
|
||||
let term_doc_freq = searcher.doc_freq(term);
|
||||
idf(term_doc_freq, total_num_docs)
|
||||
@@ -83,12 +86,12 @@ impl BM25Weight {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use tests::assert_nearly_equals;
|
||||
use super::idf;
|
||||
use tests::assert_nearly_equals;
|
||||
|
||||
#[test]
|
||||
fn test_idf() {
|
||||
assert_nearly_equals(idf(1, 2), 0.6931472);
|
||||
assert_nearly_equals(idf(1, 2), 0.6931472);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
use Result;
|
||||
use super::boolean_weight::BooleanWeight;
|
||||
use query::Weight;
|
||||
use Searcher;
|
||||
use query::Query;
|
||||
use schema::Term;
|
||||
use query::TermQuery;
|
||||
use schema::IndexRecordOption;
|
||||
use query::Occur;
|
||||
use query::Query;
|
||||
use query::TermQuery;
|
||||
use query::Weight;
|
||||
use schema::IndexRecordOption;
|
||||
use schema::Term;
|
||||
use Result;
|
||||
use Searcher;
|
||||
|
||||
/// The boolean query combines a set of queries
|
||||
///
|
||||
@@ -48,7 +48,8 @@ impl BooleanQuery {
|
||||
let occur_term_queries: Vec<(Occur, Box<Query>)> = terms
|
||||
.into_iter()
|
||||
.map(|term| {
|
||||
let term_query: Box<Query> = Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs));
|
||||
let term_query: Box<Query> =
|
||||
Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs));
|
||||
(Occur::Should, term_query)
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -1,19 +1,18 @@
|
||||
use query::Weight;
|
||||
use core::SegmentReader;
|
||||
use query::Union;
|
||||
use std::collections::HashMap;
|
||||
use query::EmptyScorer;
|
||||
use query::Scorer;
|
||||
use downcast::Downcast;
|
||||
use std::borrow::Borrow;
|
||||
use query::intersect_scorers;
|
||||
use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner};
|
||||
use query::term_query::TermScorer;
|
||||
use query::EmptyScorer;
|
||||
use query::Exclude;
|
||||
use query::Occur;
|
||||
use query::RequiredOptionalScorer;
|
||||
use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner};
|
||||
use query::Scorer;
|
||||
use query::Union;
|
||||
use query::Weight;
|
||||
use std::borrow::Borrow;
|
||||
use std::collections::HashMap;
|
||||
use Result;
|
||||
use query::intersect_scorers;
|
||||
use query::term_query::TermScorer;
|
||||
|
||||
|
||||
fn scorer_union<TScoreCombiner>(scorers: Vec<Box<Scorer>>) -> Box<Scorer>
|
||||
where
|
||||
@@ -41,7 +40,6 @@ where
|
||||
|
||||
let scorer: Box<Scorer> = Box::new(Union::<_, TScoreCombiner>::from(scorers));
|
||||
return scorer;
|
||||
|
||||
}
|
||||
|
||||
pub struct BooleanWeight {
|
||||
@@ -78,9 +76,9 @@ impl BooleanWeight {
|
||||
.remove(&Occur::MustNot)
|
||||
.map(scorer_union::<TScoreCombiner>);
|
||||
|
||||
let must_scorer_opt: Option<Box<Scorer>> =
|
||||
per_occur_scorers.remove(&Occur::Must)
|
||||
.map(intersect_scorers);
|
||||
let must_scorer_opt: Option<Box<Scorer>> = per_occur_scorers
|
||||
.remove(&Occur::Must)
|
||||
.map(intersect_scorers);
|
||||
|
||||
let positive_scorer: Box<Scorer> = match (should_scorer_opt, must_scorer_opt) {
|
||||
(Some(should_scorer), Some(must_scorer)) => {
|
||||
|
||||
@@ -7,19 +7,19 @@ pub use self::boolean_query::BooleanQuery;
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use query::Occur;
|
||||
use query::Query;
|
||||
use query::TermQuery;
|
||||
use query::Intersection;
|
||||
use query::Scorer;
|
||||
use collector::tests::TestCollector;
|
||||
use Index;
|
||||
use downcast::Downcast;
|
||||
use schema::*;
|
||||
use query::QueryParser;
|
||||
use query::RequiredOptionalScorer;
|
||||
use query::score_combiner::SumWithCoordsCombiner;
|
||||
use query::term_query::TermScorer;
|
||||
use query::Intersection;
|
||||
use query::Occur;
|
||||
use query::Query;
|
||||
use query::QueryParser;
|
||||
use query::RequiredOptionalScorer;
|
||||
use query::Scorer;
|
||||
use query::TermQuery;
|
||||
use schema::*;
|
||||
use Index;
|
||||
|
||||
fn aux_test_helper() -> (Index, Field) {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
@@ -171,7 +171,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
pub fn test_intersection_score() {
|
||||
let (index, text_field) = aux_test_helper();
|
||||
@@ -193,7 +192,10 @@ mod tests {
|
||||
};
|
||||
|
||||
{
|
||||
let boolean_query = BooleanQuery::from(vec![(Occur::Must, make_term_query("a")), (Occur::Must, make_term_query("b"))]);
|
||||
let boolean_query = BooleanQuery::from(vec![
|
||||
(Occur::Must, make_term_query("a")),
|
||||
(Occur::Must, make_term_query("b")),
|
||||
]);
|
||||
assert_eq!(score_docs(&boolean_query), vec![0.977973, 0.84699446]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use query::Scorer;
|
||||
use docset::{DocSet, SkipResult};
|
||||
use Score;
|
||||
use query::Scorer;
|
||||
use DocId;
|
||||
use Score;
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
enum State {
|
||||
@@ -129,10 +129,10 @@ where
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use tests::sample_with_seed;
|
||||
use postings::tests::test_skip_against_unoptimized;
|
||||
use super::*;
|
||||
use postings::tests::test_skip_against_unoptimized;
|
||||
use query::VecDocSet;
|
||||
use tests::sample_with_seed;
|
||||
|
||||
#[test]
|
||||
fn test_exclude() {
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use docset::{DocSet, SkipResult};
|
||||
use query::Scorer;
|
||||
use query::EmptyScorer;
|
||||
use DocId;
|
||||
use downcast::Downcast;
|
||||
use std::borrow::Borrow;
|
||||
use Score;
|
||||
use query::term_query::TermScorer;
|
||||
use query::EmptyScorer;
|
||||
use query::Scorer;
|
||||
use std::borrow::Borrow;
|
||||
use DocId;
|
||||
use Score;
|
||||
|
||||
/// Returns the intersection scorer.
|
||||
///
|
||||
@@ -36,27 +36,29 @@ pub fn intersect_scorers(mut scorers: Vec<Box<Scorer>>) -> Box<Scorer> {
|
||||
left,
|
||||
right,
|
||||
others: scorers,
|
||||
num_docsets
|
||||
})
|
||||
num_docsets,
|
||||
});
|
||||
}
|
||||
}
|
||||
return Box::new(Intersection {
|
||||
left,
|
||||
right,
|
||||
others: scorers,
|
||||
num_docsets
|
||||
})
|
||||
num_docsets,
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
unreachable!();
|
||||
}
|
||||
_ => { unreachable!(); }
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s.
|
||||
pub struct Intersection<TDocSet: DocSet, TOtherDocSet: DocSet=Box<Scorer>> {
|
||||
pub struct Intersection<TDocSet: DocSet, TOtherDocSet: DocSet = Box<Scorer>> {
|
||||
left: TDocSet,
|
||||
right: TDocSet,
|
||||
others: Vec<TOtherDocSet>,
|
||||
num_docsets: usize
|
||||
num_docsets: usize,
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
|
||||
@@ -71,18 +73,17 @@ impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
|
||||
left,
|
||||
right,
|
||||
others: docsets,
|
||||
num_docsets
|
||||
num_docsets,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
|
||||
pub(crate) fn docset_mut_specialized(&mut self, ord: usize) -> &mut TDocSet {
|
||||
match ord {
|
||||
0 => &mut self.left,
|
||||
1 => &mut self.right,
|
||||
n => &mut self.others[n - 2]
|
||||
n => &mut self.others[n - 2],
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -92,7 +93,7 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> Intersection<TDocSet, TOtherDocSet>
|
||||
match ord {
|
||||
0 => &mut self.left,
|
||||
1 => &mut self.right,
|
||||
n => &mut self.others[n - 2]
|
||||
n => &mut self.others[n - 2],
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -114,23 +115,30 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
|
||||
// of the two rarest `DocSet` in the intersection.
|
||||
loop {
|
||||
match right.skip_next(candidate) {
|
||||
SkipResult::Reached => { break; }
|
||||
SkipResult::Reached => {
|
||||
break;
|
||||
}
|
||||
SkipResult::OverStep => {
|
||||
candidate = right.doc();
|
||||
other_candidate_ord = usize::max_value();
|
||||
}
|
||||
SkipResult::End => { return false; }
|
||||
SkipResult::End => {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
match left.skip_next(candidate) {
|
||||
SkipResult::Reached => { break; }
|
||||
SkipResult::Reached => {
|
||||
break;
|
||||
}
|
||||
SkipResult::OverStep => {
|
||||
candidate = left.doc();
|
||||
other_candidate_ord = usize::max_value();
|
||||
}
|
||||
SkipResult::End => { return false; }
|
||||
SkipResult::End => {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
// test the remaining scorers;
|
||||
for (ord, docset) in self.others.iter_mut().enumerate() {
|
||||
@@ -147,16 +155,22 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
|
||||
// let's update our candidate.
|
||||
candidate = docset.doc();
|
||||
match left.skip_next(candidate) {
|
||||
SkipResult::Reached => { other_candidate_ord = ord; }
|
||||
SkipResult::Reached => {
|
||||
other_candidate_ord = ord;
|
||||
}
|
||||
SkipResult::OverStep => {
|
||||
candidate = left.doc();
|
||||
other_candidate_ord = usize::max_value();
|
||||
}
|
||||
SkipResult::End => { return false; }
|
||||
SkipResult::End => {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
continue 'outer;
|
||||
}
|
||||
SkipResult::End => { return false; }
|
||||
SkipResult::End => {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -164,9 +178,7 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult {
|
||||
|
||||
// We optimize skipping by skipping every single member
|
||||
// of the intersection to target.
|
||||
let mut current_target: DocId = target;
|
||||
@@ -211,18 +223,22 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
|
||||
}
|
||||
|
||||
impl<TScorer, TOtherScorer> Scorer for Intersection<TScorer, TOtherScorer>
|
||||
where TScorer: Scorer, TOtherScorer: Scorer {
|
||||
where
|
||||
TScorer: Scorer,
|
||||
TOtherScorer: Scorer,
|
||||
{
|
||||
fn score(&mut self) -> Score {
|
||||
self.left.score() + self.right.score() + self.others.iter_mut().map(Scorer::score).sum::<Score>()
|
||||
self.left.score() + self.right.score()
|
||||
+ self.others.iter_mut().map(Scorer::score).sum::<Score>()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use docset::{DocSet, SkipResult};
|
||||
use super::Intersection;
|
||||
use query::VecDocSet;
|
||||
use docset::{DocSet, SkipResult};
|
||||
use postings::tests::test_skip_against_unoptimized;
|
||||
use query::VecDocSet;
|
||||
|
||||
#[test]
|
||||
fn test_intersection() {
|
||||
|
||||
@@ -2,22 +2,22 @@
|
||||
Query
|
||||
*/
|
||||
|
||||
mod query;
|
||||
mod boolean_query;
|
||||
mod scorer;
|
||||
mod occur;
|
||||
mod weight;
|
||||
mod term_query;
|
||||
mod query_parser;
|
||||
mod phrase_query;
|
||||
mod all_query;
|
||||
mod bitset;
|
||||
mod range_query;
|
||||
mod exclude;
|
||||
mod union;
|
||||
mod intersection;
|
||||
mod reqopt_scorer;
|
||||
mod bm25;
|
||||
mod boolean_query;
|
||||
mod exclude;
|
||||
mod intersection;
|
||||
mod occur;
|
||||
mod phrase_query;
|
||||
mod query;
|
||||
mod query_parser;
|
||||
mod range_query;
|
||||
mod reqopt_scorer;
|
||||
mod scorer;
|
||||
mod term_query;
|
||||
mod union;
|
||||
mod weight;
|
||||
|
||||
#[cfg(test)]
|
||||
mod vec_docset;
|
||||
@@ -30,20 +30,20 @@ pub use self::union::Union;
|
||||
#[cfg(test)]
|
||||
pub use self::vec_docset::VecDocSet;
|
||||
|
||||
pub use self::reqopt_scorer::RequiredOptionalScorer;
|
||||
pub use self::exclude::Exclude;
|
||||
pub use self::all_query::{AllQuery, AllScorer, AllWeight};
|
||||
pub use self::bitset::BitSetDocSet;
|
||||
pub use self::boolean_query::BooleanQuery;
|
||||
pub use self::exclude::Exclude;
|
||||
pub use self::intersection::intersect_scorers;
|
||||
pub use self::occur::Occur;
|
||||
pub use self::phrase_query::PhraseQuery;
|
||||
pub use self::query_parser::QueryParserError;
|
||||
pub use self::query_parser::QueryParser;
|
||||
pub use self::query::Query;
|
||||
pub use self::query_parser::QueryParser;
|
||||
pub use self::query_parser::QueryParserError;
|
||||
pub use self::range_query::RangeQuery;
|
||||
pub use self::reqopt_scorer::RequiredOptionalScorer;
|
||||
pub use self::scorer::ConstScorer;
|
||||
pub use self::scorer::EmptyScorer;
|
||||
pub use self::scorer::Scorer;
|
||||
pub use self::term_query::TermQuery;
|
||||
pub use self::weight::Weight;
|
||||
pub use self::all_query::{AllQuery, AllScorer, AllWeight};
|
||||
pub use self::range_query::RangeQuery;
|
||||
pub use self::scorer::ConstScorer;
|
||||
pub use self::intersection::intersect_scorers;
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
mod phrase_query;
|
||||
mod phrase_weight;
|
||||
mod phrase_scorer;
|
||||
mod phrase_weight;
|
||||
|
||||
pub use self::phrase_query::PhraseQuery;
|
||||
pub use self::phrase_weight::PhraseWeight;
|
||||
pub use self::phrase_scorer::PhraseScorer;
|
||||
pub use self::phrase_weight::PhraseWeight;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use core::Index;
|
||||
use schema::{SchemaBuilder, Term, TEXT};
|
||||
use collector::tests::TestCollector;
|
||||
use tests::assert_nearly_equals;
|
||||
use core::Index;
|
||||
use error::ErrorKind;
|
||||
use schema::{SchemaBuilder, Term, TEXT};
|
||||
use tests::assert_nearly_equals;
|
||||
|
||||
fn create_index(texts: &[&'static str]) -> Index {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
@@ -40,7 +40,7 @@ mod tests {
|
||||
"a b b d c g c",
|
||||
"a b a b c",
|
||||
"c a b a d ga a",
|
||||
"a b c"
|
||||
"a b c",
|
||||
]);
|
||||
let schema = index.schema();
|
||||
let text_field = schema.get_field("text").unwrap();
|
||||
@@ -68,13 +68,14 @@ mod tests {
|
||||
#[test]
|
||||
pub fn test_phrase_query_no_positions() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
use schema::TextOptions;
|
||||
use schema::TextFieldIndexing;
|
||||
use schema::IndexRecordOption;
|
||||
let no_positions = TextOptions::default()
|
||||
.set_indexing_options(TextFieldIndexing::default()
|
||||
use schema::TextFieldIndexing;
|
||||
use schema::TextOptions;
|
||||
let no_positions = TextOptions::default().set_indexing_options(
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("default")
|
||||
.set_index_option(IndexRecordOption::WithFreqs));
|
||||
.set_index_option(IndexRecordOption::WithFreqs),
|
||||
);
|
||||
|
||||
let text_field = schema_builder.add_text_field("text", no_positions);
|
||||
let schema = schema_builder.build();
|
||||
@@ -88,11 +89,18 @@ mod tests {
|
||||
let searcher = index.searcher();
|
||||
let phrase_query = PhraseQuery::new(vec![
|
||||
Term::from_field_text(text_field, "a"),
|
||||
Term::from_field_text(text_field, "b")
|
||||
Term::from_field_text(text_field, "b"),
|
||||
]);
|
||||
let mut test_collector = TestCollector::default();
|
||||
if let &ErrorKind::SchemaError(ref msg) = searcher.search(&phrase_query, &mut test_collector).unwrap_err().kind() {
|
||||
assert_eq!("Applied phrase query on field \"text\", which does not have positions indexed", msg.as_str());
|
||||
if let &ErrorKind::SchemaError(ref msg) = searcher
|
||||
.search(&phrase_query, &mut test_collector)
|
||||
.unwrap_err()
|
||||
.kind()
|
||||
{
|
||||
assert_eq!(
|
||||
"Applied phrase query on field \"text\", which does not have positions indexed",
|
||||
msg.as_str()
|
||||
);
|
||||
} else {
|
||||
panic!("Should have returned an error");
|
||||
}
|
||||
@@ -120,7 +128,6 @@ mod tests {
|
||||
let scores = test_query(vec!["a", "b"]);
|
||||
assert_nearly_equals(scores[0], 0.40618482);
|
||||
assert_nearly_equals(scores[1], 0.46844664);
|
||||
|
||||
}
|
||||
|
||||
#[test] // motivated by #234
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use schema::{Field, Term};
|
||||
use query::Query;
|
||||
use core::searcher::Searcher;
|
||||
use super::PhraseWeight;
|
||||
use query::Weight;
|
||||
use Result;
|
||||
use query::bm25::BM25Weight;
|
||||
use core::searcher::Searcher;
|
||||
use error::ErrorKind;
|
||||
use query::bm25::BM25Weight;
|
||||
use query::Query;
|
||||
use query::Weight;
|
||||
use schema::{Field, Term};
|
||||
use Result;
|
||||
|
||||
/// `PhraseQuery` matches a specific sequence of words.
|
||||
///
|
||||
@@ -28,18 +28,23 @@ pub struct PhraseQuery {
|
||||
}
|
||||
|
||||
impl PhraseQuery {
|
||||
|
||||
/// Creates a new `PhraseQuery` given a list of terms.
|
||||
///
|
||||
/// There must be at least two terms, and all terms
|
||||
/// must belong to the same field.
|
||||
pub fn new(terms: Vec<Term>) -> PhraseQuery {
|
||||
assert!(terms.len() > 1, "A phrase query is required to have strictly more than one term.");
|
||||
assert!(
|
||||
terms.len() > 1,
|
||||
"A phrase query is required to have strictly more than one term."
|
||||
);
|
||||
let field = terms[0].field();
|
||||
assert!(terms[1..].iter().all(|term| term.field() == field), "All terms from a phrase query must belong to the same field");
|
||||
assert!(
|
||||
terms[1..].iter().all(|term| term.field() == field),
|
||||
"All terms from a phrase query must belong to the same field"
|
||||
);
|
||||
PhraseQuery {
|
||||
field,
|
||||
phrase_terms: terms
|
||||
phrase_terms: terms,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -50,26 +55,29 @@ impl Query for PhraseQuery {
|
||||
/// See [`Weight`](./trait.Weight.html).
|
||||
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> Result<Box<Weight>> {
|
||||
let schema = searcher.schema();
|
||||
let field_entry= schema.get_field_entry(self.field);
|
||||
let has_positions = field_entry.field_type().get_index_record_option()
|
||||
let field_entry = schema.get_field_entry(self.field);
|
||||
let has_positions = field_entry
|
||||
.field_type()
|
||||
.get_index_record_option()
|
||||
.map(|index_record_option| index_record_option.has_positions())
|
||||
.unwrap_or(false);
|
||||
if !has_positions {
|
||||
let field_name = field_entry.name();
|
||||
bail!(ErrorKind::SchemaError(format!("Applied phrase query on field {:?}, which does not have positions indexed",
|
||||
field_name)))
|
||||
bail!(ErrorKind::SchemaError(format!(
|
||||
"Applied phrase query on field {:?}, which does not have positions indexed",
|
||||
field_name
|
||||
)))
|
||||
}
|
||||
let terms = self.phrase_terms.clone();
|
||||
if scoring_enabled {
|
||||
let bm25_weight = BM25Weight::for_terms(searcher, &terms);
|
||||
Ok(Box::new(PhraseWeight::new(terms, bm25_weight, true)))
|
||||
} else {
|
||||
Ok(Box::new(PhraseWeight::new(
|
||||
terms,
|
||||
bm25_weight,
|
||||
true
|
||||
BM25Weight::null(),
|
||||
false,
|
||||
)))
|
||||
} else {
|
||||
Ok(Box::new(PhraseWeight::new(terms, BM25Weight::null(), false)))
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
use DocId;
|
||||
use docset::{DocSet, SkipResult};
|
||||
use postings::Postings;
|
||||
use query::{Intersection, Scorer};
|
||||
use query::bm25::BM25Weight;
|
||||
use fieldnorm::FieldNormReader;
|
||||
use postings::Postings;
|
||||
use query::bm25::BM25Weight;
|
||||
use query::{Intersection, Scorer};
|
||||
use DocId;
|
||||
|
||||
struct PostingsWithOffset<TPostings> {
|
||||
offset: u32,
|
||||
postings: TPostings
|
||||
postings: TPostings,
|
||||
}
|
||||
|
||||
impl<TPostings: Postings> PostingsWithOffset<TPostings> {
|
||||
pub fn new(segment_postings: TPostings, offset: u32) -> PostingsWithOffset<TPostings> {
|
||||
PostingsWithOffset {
|
||||
offset,
|
||||
postings: segment_postings
|
||||
postings: segment_postings,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,10 +49,9 @@ pub struct PhraseScorer<TPostings: Postings> {
|
||||
phrase_count: u32,
|
||||
fieldnorm_reader: FieldNormReader,
|
||||
similarity_weight: BM25Weight,
|
||||
score_needed: bool
|
||||
score_needed: bool,
|
||||
}
|
||||
|
||||
|
||||
/// Returns true iff the two sorted array contain a common element
|
||||
fn intersection_exists(left: &[u32], right: &[u32]) -> bool {
|
||||
let mut left_i = 0;
|
||||
@@ -118,18 +117,20 @@ fn intersection(left: &mut [u32], right: &[u32]) -> usize {
|
||||
count
|
||||
}
|
||||
|
||||
|
||||
impl<TPostings: Postings> PhraseScorer<TPostings> {
|
||||
|
||||
pub fn new(term_postings: Vec<TPostings>,
|
||||
similarity_weight: BM25Weight,
|
||||
fieldnorm_reader: FieldNormReader,
|
||||
score_needed: bool) -> PhraseScorer<TPostings> {
|
||||
pub fn new(
|
||||
term_postings: Vec<TPostings>,
|
||||
similarity_weight: BM25Weight,
|
||||
fieldnorm_reader: FieldNormReader,
|
||||
score_needed: bool,
|
||||
) -> PhraseScorer<TPostings> {
|
||||
let num_docsets = term_postings.len();
|
||||
let postings_with_offsets = term_postings
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(offset, postings)| PostingsWithOffset::new(postings, (num_docsets - offset) as u32))
|
||||
.map(|(offset, postings)| {
|
||||
PostingsWithOffset::new(postings, (num_docsets - offset) as u32)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
PhraseScorer {
|
||||
intersection_docset: Intersection::new(postings_with_offsets),
|
||||
@@ -153,7 +154,6 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn phrase_exists(&mut self) -> bool {
|
||||
{
|
||||
self.intersection_docset
|
||||
@@ -163,7 +163,9 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
|
||||
let mut intersection_len = self.left.len();
|
||||
for i in 1..self.num_docsets - 1 {
|
||||
{
|
||||
self.intersection_docset.docset_mut_specialized(i).positions(&mut self.right);
|
||||
self.intersection_docset
|
||||
.docset_mut_specialized(i)
|
||||
.positions(&mut self.right);
|
||||
}
|
||||
intersection_len = intersection(&mut self.left[..intersection_len], &self.right[..]);
|
||||
if intersection_len == 0 {
|
||||
@@ -171,7 +173,9 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
|
||||
}
|
||||
}
|
||||
|
||||
self.intersection_docset.docset_mut_specialized(self.num_docsets - 1).positions(&mut self.right);
|
||||
self.intersection_docset
|
||||
.docset_mut_specialized(self.num_docsets - 1)
|
||||
.positions(&mut self.right);
|
||||
intersection_exists(&self.left[..intersection_len], &self.right[..])
|
||||
}
|
||||
|
||||
@@ -184,7 +188,9 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
|
||||
let mut intersection_len = self.left.len();
|
||||
for i in 1..self.num_docsets - 1 {
|
||||
{
|
||||
self.intersection_docset.docset_mut_specialized(i).positions(&mut self.right);
|
||||
self.intersection_docset
|
||||
.docset_mut_specialized(i)
|
||||
.positions(&mut self.right);
|
||||
}
|
||||
intersection_len = intersection(&mut self.left[..intersection_len], &self.right[..]);
|
||||
if intersection_len == 0 {
|
||||
@@ -192,7 +198,9 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
|
||||
}
|
||||
}
|
||||
|
||||
self.intersection_docset.docset_mut_specialized(self.num_docsets - 1).positions(&mut self.right);
|
||||
self.intersection_docset
|
||||
.docset_mut_specialized(self.num_docsets - 1)
|
||||
.positions(&mut self.right);
|
||||
intersection_count(&self.left[..intersection_len], &self.right[..]) as u32
|
||||
}
|
||||
}
|
||||
@@ -238,15 +246,15 @@ impl<TPostings: Postings> Scorer for PhraseScorer<TPostings> {
|
||||
fn score(&mut self) -> f32 {
|
||||
let doc = self.doc();
|
||||
let fieldnorm_id = self.fieldnorm_reader.fieldnorm_id(doc);
|
||||
self.similarity_weight.score(fieldnorm_id, self.phrase_count)
|
||||
self.similarity_weight
|
||||
.score(fieldnorm_id, self.phrase_count)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::{intersection_count, intersection};
|
||||
|
||||
use super::{intersection, intersection_count};
|
||||
|
||||
fn test_intersection_sym(left: &[u32], right: &[u32], expected: &[u32]) {
|
||||
test_intersection_aux(left, right, expected);
|
||||
@@ -271,12 +279,11 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(all(test, feature="unstable"))]
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
|
||||
use super::{intersection, intersection_count};
|
||||
use test::Bencher;
|
||||
use super::{intersection_count, intersection};
|
||||
|
||||
#[bench]
|
||||
fn bench_intersection_short(b: &mut Bencher) {
|
||||
@@ -287,7 +294,6 @@ mod bench {
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
#[bench]
|
||||
fn bench_intersection_count_short(b: &mut Bencher) {
|
||||
b.iter(|| {
|
||||
@@ -296,4 +302,4 @@ mod bench {
|
||||
intersection_count(&left, &right);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
use query::Weight;
|
||||
use query::Scorer;
|
||||
use schema::Term;
|
||||
use schema::IndexRecordOption;
|
||||
use core::SegmentReader;
|
||||
use super::PhraseScorer;
|
||||
use query::EmptyScorer;
|
||||
use Result;
|
||||
use core::SegmentReader;
|
||||
use query::bm25::BM25Weight;
|
||||
use query::EmptyScorer;
|
||||
use query::Scorer;
|
||||
use query::Weight;
|
||||
use schema::IndexRecordOption;
|
||||
use schema::Term;
|
||||
use Result;
|
||||
|
||||
pub struct PhraseWeight {
|
||||
phrase_terms: Vec<Term>,
|
||||
@@ -16,13 +16,15 @@ pub struct PhraseWeight {
|
||||
|
||||
impl PhraseWeight {
|
||||
/// Creates a new phrase weight.
|
||||
pub fn new(phrase_terms: Vec<Term>,
|
||||
similarity_weight: BM25Weight,
|
||||
score_needed: bool) -> PhraseWeight {
|
||||
pub fn new(
|
||||
phrase_terms: Vec<Term>,
|
||||
similarity_weight: BM25Weight,
|
||||
score_needed: bool,
|
||||
) -> PhraseWeight {
|
||||
PhraseWeight {
|
||||
phrase_terms,
|
||||
similarity_weight,
|
||||
score_needed
|
||||
score_needed,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -37,25 +39,37 @@ impl Weight for PhraseWeight {
|
||||
for term in &self.phrase_terms {
|
||||
if let Some(postings) = reader
|
||||
.inverted_index(term.field())
|
||||
.read_postings(term, IndexRecordOption::WithFreqsAndPositions) {
|
||||
.read_postings(term, IndexRecordOption::WithFreqsAndPositions)
|
||||
{
|
||||
term_postings_list.push(postings);
|
||||
} else {
|
||||
return Ok(Box::new(EmptyScorer));
|
||||
}
|
||||
}
|
||||
Ok(Box::new(PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader, self.score_needed)))
|
||||
Ok(Box::new(PhraseScorer::new(
|
||||
term_postings_list,
|
||||
similarity_weight,
|
||||
fieldnorm_reader,
|
||||
self.score_needed,
|
||||
)))
|
||||
} else {
|
||||
let mut term_postings_list = Vec::new();
|
||||
for term in &self.phrase_terms {
|
||||
if let Some(postings) = reader
|
||||
.inverted_index(term.field())
|
||||
.read_postings_no_deletes(term, IndexRecordOption::WithFreqsAndPositions) {
|
||||
.read_postings_no_deletes(term, IndexRecordOption::WithFreqsAndPositions)
|
||||
{
|
||||
term_postings_list.push(postings);
|
||||
} else {
|
||||
return Ok(Box::new(EmptyScorer));
|
||||
}
|
||||
}
|
||||
Ok(Box::new(PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader, self.score_needed)))
|
||||
Ok(Box::new(PhraseScorer::new(
|
||||
term_postings_list,
|
||||
similarity_weight,
|
||||
fieldnorm_reader,
|
||||
self.score_needed,
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use Result;
|
||||
use super::Weight;
|
||||
use collector::Collector;
|
||||
use core::searcher::Searcher;
|
||||
use SegmentLocalId;
|
||||
use super::Weight;
|
||||
use std::fmt;
|
||||
use Result;
|
||||
use SegmentLocalId;
|
||||
|
||||
/// The `Query` trait defines a set of documents and a scoring method
|
||||
/// for those documents.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::fmt;
|
||||
use schema::Term;
|
||||
use query::Occur;
|
||||
use schema::Term;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum LogicalLiteral {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
mod query_parser;
|
||||
mod query_grammar;
|
||||
mod query_parser;
|
||||
mod user_input_ast;
|
||||
|
||||
pub mod logical_ast;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use combine::*;
|
||||
use combine::char::*;
|
||||
use super::user_input_ast::*;
|
||||
use combine::char::*;
|
||||
use combine::*;
|
||||
|
||||
fn literal<I>(input: I) -> ParseResult<UserInputAST, I>
|
||||
where
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
use schema::{Field, Schema};
|
||||
use query::Query;
|
||||
use query::BooleanQuery;
|
||||
use super::logical_ast::*;
|
||||
use super::user_input_ast::*;
|
||||
use super::query_grammar::parse_to_ast;
|
||||
use super::user_input_ast::*;
|
||||
use core::Index;
|
||||
use query::BooleanQuery;
|
||||
use query::Occur;
|
||||
use query::PhraseQuery;
|
||||
use query::Query;
|
||||
use query::TermQuery;
|
||||
use schema::IndexRecordOption;
|
||||
use query::PhraseQuery;
|
||||
use schema::{Field, Schema};
|
||||
use schema::{FieldType, Term};
|
||||
use std::num::ParseIntError;
|
||||
use std::str::FromStr;
|
||||
use tokenizer::TokenizerManager;
|
||||
use std::num::ParseIntError;
|
||||
use core::Index;
|
||||
|
||||
/// Possible error that may happen when parsing a query.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
@@ -179,14 +179,14 @@ impl QueryParser {
|
||||
}
|
||||
FieldType::Str(ref str_options) => {
|
||||
if let Some(option) = str_options.get_indexing_options() {
|
||||
let mut tokenizer = self.tokenizer_manager
|
||||
.get(option.tokenizer())
|
||||
.ok_or_else(|| {
|
||||
let mut tokenizer = self.tokenizer_manager.get(option.tokenizer()).ok_or_else(
|
||||
|| {
|
||||
QueryParserError::UnknownTokenizer(
|
||||
field_entry.name().to_string(),
|
||||
option.tokenizer().to_string(),
|
||||
)
|
||||
})?;
|
||||
},
|
||||
)?;
|
||||
let mut terms: Vec<Term> = Vec::new();
|
||||
let mut token_stream = tokenizer.token_stream(phrase);
|
||||
token_stream.process(&mut |token| {
|
||||
@@ -207,13 +207,14 @@ impl QueryParser {
|
||||
Ok(Some(LogicalLiteral::Phrase(terms)))
|
||||
} else {
|
||||
let fieldname = self.schema.get_field_name(field).to_string();
|
||||
Err(QueryParserError::FieldDoesNotHavePositionsIndexed(fieldname))
|
||||
Err(QueryParserError::FieldDoesNotHavePositionsIndexed(
|
||||
fieldname,
|
||||
))
|
||||
}
|
||||
} else {
|
||||
let fieldname = self.schema.get_field_name(field).to_string();
|
||||
Err(QueryParserError::FieldNotIndexed(fieldname))
|
||||
}
|
||||
|
||||
}
|
||||
} else {
|
||||
// This should have been seen earlier really.
|
||||
@@ -340,16 +341,16 @@ fn convert_to_query(logical_ast: LogicalAST) -> Box<Query> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use schema::{SchemaBuilder, Term, INT_INDEXED, STORED, STRING, TEXT};
|
||||
use tokenizer::TokenizerManager;
|
||||
use super::super::logical_ast::*;
|
||||
use super::QueryParser;
|
||||
use super::QueryParserError;
|
||||
use query::Query;
|
||||
use schema::Field;
|
||||
use schema::{IndexRecordOption, TextFieldIndexing, TextOptions};
|
||||
use super::QueryParser;
|
||||
use super::QueryParserError;
|
||||
use Index;
|
||||
use schema::{SchemaBuilder, Term, INT_INDEXED, STORED, STRING, TEXT};
|
||||
use tokenizer::SimpleTokenizer;
|
||||
use super::super::logical_ast::*;
|
||||
use tokenizer::TokenizerManager;
|
||||
use Index;
|
||||
|
||||
fn make_query_parser() -> QueryParser {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
use schema::{Field, IndexRecordOption, Term};
|
||||
use query::{Query, Scorer, Weight};
|
||||
use termdict::{TermDictionary, TermStreamer};
|
||||
use core::SegmentReader;
|
||||
use common::BitSet;
|
||||
use Result;
|
||||
use core::Searcher;
|
||||
use core::SegmentReader;
|
||||
use error::ErrorKind;
|
||||
use query::BitSetDocSet;
|
||||
use query::ConstScorer;
|
||||
use std::ops::Range;
|
||||
use query::{Query, Scorer, Weight};
|
||||
use schema::Type;
|
||||
use error::ErrorKind;
|
||||
use schema::{Field, IndexRecordOption, Term};
|
||||
use std::collections::Bound;
|
||||
use std::ops::Range;
|
||||
use termdict::{TermDictionary, TermStreamer};
|
||||
use Result;
|
||||
|
||||
fn map_bound<TFrom, Transform: Fn(TFrom) -> Vec<u8>>(
|
||||
bound: Bound<TFrom>,
|
||||
@@ -89,16 +89,16 @@ pub struct RangeQuery {
|
||||
}
|
||||
|
||||
impl RangeQuery {
|
||||
|
||||
/// Creates a new `RangeQuery` over a `i64` field.
|
||||
///
|
||||
/// If the field is not of the type `i64`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_i64(
|
||||
field: Field,
|
||||
range: Range<i64>
|
||||
) -> RangeQuery {
|
||||
RangeQuery::new_i64_bounds(field, Bound::Included(range.start), Bound::Excluded(range.end))
|
||||
pub fn new_i64(field: Field, range: Range<i64>) -> RangeQuery {
|
||||
RangeQuery::new_i64_bounds(
|
||||
field,
|
||||
Bound::Included(range.start),
|
||||
Bound::Excluded(range.end),
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a new `RangeQuery` over a `i64` field.
|
||||
@@ -111,7 +111,7 @@ impl RangeQuery {
|
||||
pub fn new_i64_bounds(
|
||||
field: Field,
|
||||
left_bound: Bound<i64>,
|
||||
right_bound: Bound<i64>
|
||||
right_bound: Bound<i64>,
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: i64| Term::from_field_i64(field, val).value_bytes().to_owned();
|
||||
RangeQuery {
|
||||
@@ -132,7 +132,7 @@ impl RangeQuery {
|
||||
pub fn new_u64_bounds(
|
||||
field: Field,
|
||||
left_bound: Bound<u64>,
|
||||
right_bound: Bound<u64>
|
||||
right_bound: Bound<u64>,
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: u64| Term::from_field_u64(field, val).value_bytes().to_owned();
|
||||
RangeQuery {
|
||||
@@ -147,11 +147,12 @@ impl RangeQuery {
|
||||
///
|
||||
/// If the field is not of the type `u64`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_u64(
|
||||
field: Field,
|
||||
range: Range<u64>
|
||||
) -> RangeQuery {
|
||||
RangeQuery::new_u64_bounds(field, Bound::Included(range.start), Bound::Excluded(range.end))
|
||||
pub fn new_u64(field: Field, range: Range<u64>) -> RangeQuery {
|
||||
RangeQuery::new_u64_bounds(
|
||||
field,
|
||||
Bound::Included(range.start),
|
||||
Bound::Excluded(range.end),
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a new `RangeQuery` over a `Str` field.
|
||||
@@ -164,7 +165,7 @@ impl RangeQuery {
|
||||
pub fn new_str_bounds<'b>(
|
||||
field: Field,
|
||||
left: Bound<&'b str>,
|
||||
right: Bound<&'b str>
|
||||
right: Bound<&'b str>,
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &str| val.as_bytes().to_vec();
|
||||
RangeQuery {
|
||||
@@ -179,11 +180,12 @@ impl RangeQuery {
|
||||
///
|
||||
/// If the field is not of the type `Str`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_str<'b>(
|
||||
field: Field,
|
||||
range: Range<&'b str>
|
||||
) -> RangeQuery {
|
||||
RangeQuery::new_str_bounds(field, Bound::Included(range.start), Bound::Excluded(range.end))
|
||||
pub fn new_str<'b>(field: Field, range: Range<&'b str>) -> RangeQuery {
|
||||
RangeQuery::new_str_bounds(
|
||||
field,
|
||||
Bound::Included(range.start),
|
||||
Bound::Excluded(range.end),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -256,13 +258,13 @@ impl Weight for RangeWeight {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use Index;
|
||||
use schema::{Document, Field, SchemaBuilder, INT_INDEXED};
|
||||
use collector::CountCollector;
|
||||
use std::collections::Bound;
|
||||
use query::Query;
|
||||
use Result;
|
||||
use super::RangeQuery;
|
||||
use collector::CountCollector;
|
||||
use query::Query;
|
||||
use schema::{Document, Field, SchemaBuilder, INT_INDEXED};
|
||||
use std::collections::Bound;
|
||||
use Index;
|
||||
use Result;
|
||||
|
||||
#[test]
|
||||
fn test_range_query_simple() {
|
||||
@@ -349,7 +351,14 @@ mod tests {
|
||||
)),
|
||||
9
|
||||
);
|
||||
assert_eq!(count_multiples(RangeQuery::new_i64_bounds(int_field, Bound::Included(9), Bound::Unbounded)), 91);
|
||||
assert_eq!(
|
||||
count_multiples(RangeQuery::new_i64_bounds(
|
||||
int_field,
|
||||
Bound::Included(9),
|
||||
Bound::Unbounded
|
||||
)),
|
||||
91
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use DocId;
|
||||
use query::Scorer;
|
||||
use query::score_combiner::ScoreCombiner;
|
||||
use Score;
|
||||
use docset::{DocSet, SkipResult};
|
||||
use query::score_combiner::ScoreCombiner;
|
||||
use query::Scorer;
|
||||
use std::cmp::Ordering;
|
||||
use std::marker::PhantomData;
|
||||
use DocId;
|
||||
use Score;
|
||||
|
||||
/// Given a required scorer and an optional scorer
|
||||
/// matches all document from the required scorer
|
||||
@@ -101,14 +101,14 @@ where
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tests::sample_with_seed;
|
||||
use super::RequiredOptionalScorer;
|
||||
use query::VecDocSet;
|
||||
use query::ConstScorer;
|
||||
use docset::DocSet;
|
||||
use postings::tests::test_skip_against_unoptimized;
|
||||
use query::Scorer;
|
||||
use query::score_combiner::{DoNothingCombiner, SumCombiner};
|
||||
use query::ConstScorer;
|
||||
use query::Scorer;
|
||||
use query::VecDocSet;
|
||||
use tests::sample_with_seed;
|
||||
|
||||
#[test]
|
||||
fn test_reqopt_scorer_empty() {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use Score;
|
||||
use query::Scorer;
|
||||
use Score;
|
||||
|
||||
/// The `ScoreCombiner` trait defines how to compute
|
||||
/// an overall score given a list of scores.
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user