Cargo fmt

This commit is contained in:
Paul Masurel
2018-04-21 20:05:36 +09:00
parent 175b76f119
commit 78673172d0
134 changed files with 1229 additions and 1228 deletions

View File

@@ -5,11 +5,11 @@ extern crate tempdir;
extern crate serde_json;
use std::path::Path;
use tempdir::TempDir;
use tantivy::Index;
use tantivy::schema::*;
use tantivy::collector::TopCollector;
use tantivy::query::QueryParser;
use tantivy::schema::*;
use tantivy::Index;
use tempdir::TempDir;
fn main() {
// Let's create a temporary directory for the

View File

@@ -1,9 +1,9 @@
use Result;
use collector::Collector;
use DocId;
use Result;
use Score;
use SegmentLocalId;
use SegmentReader;
use DocId;
use Score;
/// Collector that does nothing.
/// This is used in the chain Collector and will hopefully

View File

@@ -1,9 +1,9 @@
use super::Collector;
use DocId;
use Score;
use Result;
use SegmentReader;
use Score;
use SegmentLocalId;
use SegmentReader;
/// `CountCollector` collector only counts how many
/// documents match the query.

View File

@@ -1,25 +1,25 @@
use std::mem;
use collector::Collector;
use docset::SkipResult;
use fastfield::FacetReader;
use schema::Facet;
use schema::Field;
use std::cell::UnsafeCell;
use schema::Facet;
use std::collections::btree_map;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::BinaryHeap;
use std::collections::Bound;
use std::collections::BTreeSet;
use termdict::TermMerger;
use docset::SkipResult;
use std::collections::btree_map;
use std::{usize, u64};
use std::iter::Peekable;
use std::mem;
use std::{u64, usize};
use termdict::TermMerger;
use std::cmp::Ordering;
use DocId;
use Result;
use Score;
use SegmentReader;
use SegmentLocalId;
use std::cmp::Ordering;
use SegmentReader;
struct Hit<'a> {
count: u64,
@@ -430,27 +430,22 @@ pub struct FacetCounts {
facet_counts: BTreeMap<Facet, u64>,
}
pub struct FacetChildIterator<'a> {
underlying: btree_map::Range<'a, Facet, u64>,
}
impl<'a> Iterator for FacetChildIterator<'a> {
type Item = (&'a Facet, u64);
fn next(&mut self) -> Option<Self::Item> {
self.underlying
.next()
.map(|(facet, count)| (facet, *count))
self.underlying.next().map(|(facet, count)| (facet, *count))
}
}
impl FacetCounts {
pub fn get<T>(&self, facet_from: T) -> FacetChildIterator //impl Iterator<Item = (&'a Facet, u64)>
where Facet: From<T>
pub fn get<T>(&self, facet_from: T) -> FacetChildIterator
where
Facet: From<T>,
{
let facet = Facet::from(facet_from);
let left_bound = Bound::Excluded(facet.clone());
@@ -463,9 +458,7 @@ impl FacetCounts {
Bound::Excluded(facet_after)
};
let underlying: btree_map::Range<_, _> = self.facet_counts.range((left_bound, right_bound));
FacetChildIterator {
underlying
}
FacetChildIterator { underlying }
}
pub fn top_k<T>(&self, facet: T, k: usize) -> Vec<(&Facet, u64)>
@@ -497,13 +490,13 @@ impl FacetCounts {
#[cfg(test)]
mod tests {
use core::Index;
use schema::{Document, Facet, SchemaBuilder};
use query::AllQuery;
use super::{FacetCollector, FacetCounts};
use std::iter;
use core::Index;
use query::AllQuery;
use rand::{thread_rng, Rng};
use schema::Field;
use schema::{Document, Facet, SchemaBuilder};
use std::iter;
#[test]
fn test_facet_collector_drilldown() {
@@ -558,8 +551,10 @@ mod tests {
}
#[test]
#[should_panic(expected = "Tried to add a facet which is a descendant of \
an already added facet.")]
#[should_panic(
expected = "Tried to add a facet which is a descendant of \
an already added facet."
)]
fn test_misused_facet_collector() {
let mut facet_collector = FacetCollector::for_field(Field(0));
facet_collector.add_facet(Facet::from("/country"));
@@ -619,18 +614,16 @@ mod tests {
}
#[cfg(all(test, feature="unstable"))]
#[cfg(all(test, feature = "unstable"))]
mod bench {
use test::Bencher;
use schema::SchemaBuilder;
use Index;
use collector::FacetCollector;
use schema::Facet;
use query::AllQuery;
use rand::{thread_rng, Rng};
use schema::Facet;
use schema::SchemaBuilder;
use test::Bencher;
use Index;
#[bench]
fn bench_facet_collector(b: &mut Bencher) {
@@ -662,4 +655,4 @@ mod bench {
searcher.search(&AllQuery, &mut facet_collector).unwrap();
});
}
}
}

View File

@@ -2,11 +2,11 @@
Defines how the documents matching a search query should be processed.
*/
use SegmentReader;
use SegmentLocalId;
use DocId;
use Score;
use Result;
use Score;
use SegmentLocalId;
use SegmentReader;
mod count_collector;
pub use self::count_collector::CountCollector;
@@ -89,12 +89,12 @@ impl<'a, C: Collector> Collector for &'a mut C {
pub mod tests {
use super::*;
use DocId;
use Score;
use core::SegmentReader;
use SegmentLocalId;
use fastfield::FastFieldReader;
use schema::Field;
use DocId;
use Score;
use SegmentLocalId;
/// Stores all of the doc ids.
/// This collector is only used for tests.
@@ -187,11 +187,10 @@ pub mod tests {
}
#[cfg(all(test, feature="unstable"))]
#[cfg(all(test, feature = "unstable"))]
mod bench {
use test::Bencher;
use collector::{Collector, CountCollector};
use test::Bencher;
#[bench]
fn build_collector(b: &mut Bencher) {
@@ -204,4 +203,4 @@ mod bench {
count_collector.count()
});
}
}
}

View File

@@ -1,9 +1,9 @@
use super::Collector;
use DocId;
use Score;
use Result;
use SegmentReader;
use Score;
use SegmentLocalId;
use SegmentReader;
/// Multicollector makes it possible to collect on more than one collector.
/// It should only be used for use cases where the Collector types is unknown

View File

@@ -1,12 +1,12 @@
use super::Collector;
use SegmentReader;
use SegmentLocalId;
use DocAddress;
use Result;
use std::collections::BinaryHeap;
use std::cmp::Ordering;
use std::collections::BinaryHeap;
use DocAddress;
use DocId;
use Result;
use Score;
use SegmentLocalId;
use SegmentReader;
// Rust heap is a max-heap and we need a min heap.
#[derive(Clone, Copy)]
@@ -135,9 +135,9 @@ impl Collector for TopCollector {
mod tests {
use super::*;
use collector::Collector;
use DocId;
use Score;
use collector::Collector;
#[test]
fn test_top_collector_not_at_capacity() {

View File

@@ -1,6 +1,6 @@
use std::io::Write;
use std::io;
use common::serialize::BinarySerializable;
use std::io;
use std::io::Write;
use std::mem;
use std::ops::Deref;
use std::ptr;
@@ -106,7 +106,8 @@ where
addr + 8 <= data.len(),
"The fast field field should have been padded with 7 bytes."
);
let val_unshifted_unmasked: u64 = unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
let val_unshifted_unmasked: u64 =
unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
val_shifted & mask
} else {
@@ -141,7 +142,8 @@ where
for output_val in output.iter_mut() {
let addr = addr_in_bits >> 3;
let bit_shift = addr_in_bits & 7;
let val_unshifted_unmasked: u64 = unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
let val_unshifted_unmasked: u64 =
unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
*output_val = val_shifted & mask;
addr_in_bits += num_bits;

View File

@@ -202,14 +202,14 @@ impl BitSet {
#[cfg(test)]
mod tests {
use tests;
use std::collections::HashSet;
use super::BitSet;
use super::TinySet;
use tests::generate_nonunique_unsorted;
use std::collections::BTreeSet;
use query::BitSetDocSet;
use docset::DocSet;
use query::BitSetDocSet;
use std::collections::BTreeSet;
use std::collections::HashSet;
use tests;
use tests::generate_nonunique_unsorted;
#[test]
fn test_tiny_set() {
@@ -354,12 +354,12 @@ mod tests {
}
}
#[cfg(all(test, feature="unstable"))]
#[cfg(all(test, feature = "unstable"))]
mod bench {
use test;
use super::TinySet;
use super::BitSet;
use super::TinySet;
use test;
#[bench]
fn bench_tinyset_pop(b: &mut test::Bencher) {
@@ -392,4 +392,4 @@ mod bench {
fn bench_bitset_initialize(b: &mut test::Bencher) {
b.iter(|| BitSet::with_max_value(1_000_000));
}
}
}

View File

@@ -1,12 +1,12 @@
use std::io::Write;
use common::CountingWriter;
use std::collections::HashMap;
use schema::Field;
use common::VInt;
use directory::WritePtr;
use std::io::{self, Read};
use directory::ReadOnlySource;
use common::BinarySerializable;
use common::CountingWriter;
use common::VInt;
use directory::ReadOnlySource;
use directory::WritePtr;
use schema::Field;
use std::collections::HashMap;
use std::io::Write;
use std::io::{self, Read};
#[derive(Eq, PartialEq, Hash, Copy, Ord, PartialOrd, Clone, Debug)]
pub struct FileAddr {
@@ -30,10 +30,7 @@ impl BinarySerializable for FileAddr {
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
let field = Field::deserialize(reader)?;
let idx = VInt::deserialize(reader)?.0 as usize;
Ok(FileAddr {
field,
idx,
})
Ok(FileAddr { field, idx })
}
}
@@ -166,7 +163,7 @@ impl CompositeFile {
/// to a given `Field` and stored in a `CompositeFile`.
pub fn open_read_with_idx(&self, field: Field, idx: usize) -> Option<ReadOnlySource> {
self.offsets_index
.get(&FileAddr { field, idx, })
.get(&FileAddr { field, idx })
.map(|&(from, to)| self.data.slice(from, to))
}
}
@@ -174,12 +171,12 @@ impl CompositeFile {
#[cfg(test)]
mod test {
use std::io::Write;
use super::{CompositeFile, CompositeWrite};
use common::BinarySerializable;
use common::VInt;
use directory::{Directory, RAMDirectory};
use schema::Field;
use common::VInt;
use common::BinarySerializable;
use std::io::Write;
use std::path::Path;
#[test]

View File

@@ -1,5 +1,5 @@
use std::io::Write;
use std::io;
use std::io::Write;
pub struct CountingWriter<W> {
underlying: W,

View File

@@ -1,16 +1,16 @@
mod serialize;
mod vint;
mod counting_writer;
mod composite_file;
pub mod bitpacker;
mod bitset;
mod composite_file;
mod counting_writer;
mod serialize;
mod vint;
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
pub use self::serialize::{BinarySerializable, FixedSize};
pub use self::vint::VInt;
pub use self::counting_writer::CountingWriter;
pub use self::bitset::BitSet;
pub(crate) use self::bitset::TinySet;
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
pub use self::counting_writer::CountingWriter;
pub use self::serialize::{BinarySerializable, FixedSize};
pub use self::vint::VInt;
pub use byteorder::LittleEndian as Endianness;
use std::io;
@@ -104,8 +104,8 @@ pub fn u64_to_i64(val: u64) -> i64 {
#[cfg(test)]
pub(crate) mod test {
use super::{compute_num_bits, i64_to_u64, u64_to_i64};
pub use super::serialize::test::fixed_size_test;
use super::{compute_num_bits, i64_to_u64, u64_to_i64};
fn test_i64_converter_helper(val: i64) {
assert_eq!(u64_to_i64(i64_to_u64(val)), val);

View File

@@ -1,10 +1,10 @@
use byteorder::{ReadBytesExt, WriteBytesExt};
use common::Endianness;
use std::fmt;
use std::io::Write;
use std::io::Read;
use std::io;
use common::VInt;
use std::fmt;
use std::io;
use std::io::Read;
use std::io::Write;
/// Trait for a simple binary serialization.
pub trait BinarySerializable: fmt::Debug + Sized {
@@ -135,8 +135,8 @@ impl BinarySerializable for String {
#[cfg(test)]
pub mod test {
use common::VInt;
use super::*;
use common::VInt;
pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
let mut buffer = Vec::new();

View File

@@ -1,7 +1,7 @@
use super::BinarySerializable;
use std::io;
use std::io::Write;
use std::io::Read;
use std::io::Write;
/// Wrapper over a `u64` that serializes as a variable int.
#[derive(Debug, Eq, PartialEq)]

View File

@@ -8,10 +8,8 @@ const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * 4 + 1;
pub use self::stream::CompressedIntStream;
use bitpacking::{BitPacker, BitPacker4x};
/// Returns the size in bytes of a compressed block, given `num_bits`.
pub fn compressed_block_size(num_bits: u8) -> usize {
1 + (num_bits as usize) * COMPRESSION_BLOCK_SIZE / 8
@@ -35,19 +33,21 @@ impl BlockEncoder {
pub fn compress_block_sorted(&mut self, block: &[u32], offset: u32) -> &[u8] {
let num_bits = self.bitpacker.num_bits_sorted(offset, block);
self.output[0] = num_bits;
let written_size = 1 + self.bitpacker.compress_sorted(offset, block, &mut self.output[1..], num_bits);
let written_size =
1 + self.bitpacker
.compress_sorted(offset, block, &mut self.output[1..], num_bits);
&self.output[..written_size]
}
pub fn compress_block_unsorted(&mut self, block: &[u32]) -> &[u8] {
let num_bits = self.bitpacker.num_bits(block);
self.output[0] = num_bits;
let written_size = 1 + self.bitpacker.compress(block, &mut self.output[1..], num_bits);
let written_size = 1 + self.bitpacker
.compress(block, &mut self.output[1..], num_bits);
&self.output[..written_size]
}
}
pub struct BlockDecoder {
bitpacker: BitPacker4x,
pub output: [u32; COMPRESSION_BLOCK_SIZE + 1],
@@ -68,17 +68,23 @@ impl BlockDecoder {
output_len: 0,
}
}
pub fn uncompress_block_sorted(&mut self, compressed_data: &[u8], offset: u32) -> usize {
let num_bits = compressed_data[0];
self.output_len = COMPRESSION_BLOCK_SIZE;
1 + self.bitpacker.decompress_sorted(offset, &compressed_data[1..], &mut self.output, num_bits)
1 + self.bitpacker.decompress_sorted(
offset,
&compressed_data[1..],
&mut self.output,
num_bits,
)
}
pub fn uncompress_block_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> usize {
let num_bits = compressed_data[0];
self.output_len = COMPRESSION_BLOCK_SIZE;
1 + self.bitpacker.decompress(&compressed_data[1..], &mut self.output, num_bits)
1 + self.bitpacker
.decompress(&compressed_data[1..], &mut self.output, num_bits)
}
#[inline]
@@ -264,14 +270,13 @@ pub mod tests {
}
}
#[cfg(all(test, feature="unstable"))]
#[cfg(all(test, feature = "unstable"))]
mod bench {
use super::*;
use test::Bencher;
use tests;
fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
let seed: &[u32; 4] = &[1, 2, 3, seed_val];
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);

View File

@@ -1,6 +1,6 @@
use compression::compressed_block_size;
use compression::BlockDecoder;
use compression::COMPRESSION_BLOCK_SIZE;
use compression::compressed_block_size;
use directory::{ReadOnlySource, SourceRead};
/// Reads a stream of compressed ints.
@@ -13,7 +13,7 @@ pub struct CompressedIntStream {
buffer: SourceRead,
block_decoder: BlockDecoder,
cached_addr: usize, // address of the currently decoded block
cached_addr: usize, // address of the currently decoded block
cached_next_addr: usize, // address following the currently decoded block
addr: usize, // address of the block associated to the current position
@@ -42,7 +42,8 @@ impl CompressedIntStream {
// no need to read.
self.cached_next_addr
} else {
let next_addr = addr + self.block_decoder.uncompress_block_unsorted(self.buffer.slice_from(addr));
let next_addr = addr + self.block_decoder
.uncompress_block_unsorted(self.buffer.slice_from(addr));
self.cached_addr = addr;
self.cached_next_addr = next_addr;
next_addr
@@ -101,8 +102,8 @@ pub mod tests {
use super::CompressedIntStream;
use compression::compressed_block_size;
use compression::COMPRESSION_BLOCK_SIZE;
use compression::BlockEncoder;
use compression::COMPRESSION_BLOCK_SIZE;
use directory::ReadOnlySource;
fn create_stream_buffer() -> ReadOnlySource {

View File

@@ -1,33 +1,32 @@
use Result;
use core::SegmentId;
use error::{ErrorKind, ResultExt};
use serde_json;
use schema::Schema;
use std::sync::Arc;
use serde_json;
use std::borrow::BorrowMut;
use std::fmt;
use core::SegmentId;
use std::sync::Arc;
use Result;
#[cfg(feature="mmap")]
use super::pool::LeasedItem;
use super::pool::Pool;
use super::segment::create_segment;
use super::segment::Segment;
use core::searcher::Searcher;
use core::IndexMeta;
use core::SegmentMeta;
use core::SegmentReader;
use core::META_FILEPATH;
use directory::ManagedDirectory;
#[cfg(feature = "mmap")]
use directory::MmapDirectory;
use directory::{Directory, RAMDirectory};
use indexer::index_writer::open_index_writer;
use core::searcher::Searcher;
use num_cpus;
use super::segment::Segment;
use core::SegmentReader;
use super::pool::Pool;
use core::SegmentMeta;
use super::pool::LeasedItem;
use std::path::Path;
use core::IndexMeta;
use indexer::DirectoryLock;
use IndexWriter;
use directory::ManagedDirectory;
use core::META_FILEPATH;
use super::segment::create_segment;
use indexer::segment_updater::save_new_metas;
use indexer::DirectoryLock;
use num_cpus;
use std::path::Path;
use tokenizer::TokenizerManager;
use IndexWriter;
const NUM_SEARCHERS: usize = 12;
@@ -64,7 +63,7 @@ impl Index {
/// The index will use the `MMapDirectory`.
///
/// If a previous index was in this directory, then its meta file will be destroyed.
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
pub fn create<P: AsRef<Path>>(directory_path: P, schema: Schema) -> Result<Index> {
let mmap_directory = MmapDirectory::open(directory_path)?;
let directory = ManagedDirectory::new(mmap_directory)?;
@@ -84,7 +83,7 @@ impl Index {
///
/// The temp directory is only used for testing the `MmapDirectory`.
/// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
pub fn create_from_tempdir(schema: Schema) -> Result<Index> {
let mmap_directory = MmapDirectory::create_from_tempdir()?;
let directory = ManagedDirectory::new(mmap_directory)?;
@@ -112,7 +111,7 @@ impl Index {
}
/// Opens a new directory from an index path.
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
pub fn open<P: AsRef<Path>>(directory_path: P) -> Result<Index> {
let mmap_directory = MmapDirectory::open(directory_path)?;
let directory = ManagedDirectory::new(mmap_directory)?;
@@ -224,7 +223,7 @@ impl Index {
.collect::<Result<_>>()?;
let schema = self.schema();
let searchers = (0..NUM_SEARCHERS)
.map(|_| Searcher::new(schema.clone(),segment_readers.clone()))
.map(|_| Searcher::new(schema.clone(), segment_readers.clone()))
.collect();
self.searcher_pool.publish_new_generation(searchers);
Ok(())

View File

@@ -1,7 +1,7 @@
use schema::Schema;
use core::SegmentMeta;
use std::fmt;
use schema::Schema;
use serde_json;
use std::fmt;
/// Meta information about the `Index`.
///
@@ -45,9 +45,9 @@ impl fmt::Debug for IndexMeta {
#[cfg(test)]
mod tests {
use serde_json;
use super::IndexMeta;
use schema::{SchemaBuilder, TEXT};
use serde_json;
#[test]
fn test_serialize_metas() {

View File

@@ -1,13 +1,13 @@
use common::BinarySerializable;
use compression::CompressedIntStream;
use directory::{ReadOnlySource, SourceRead};
use termdict::TermDictionary;
use postings::{BlockSegmentPostings, SegmentPostings};
use postings::FreqReadingOption;
use postings::TermInfo;
use postings::{BlockSegmentPostings, SegmentPostings};
use schema::FieldType;
use schema::IndexRecordOption;
use schema::Term;
use compression::CompressedIntStream;
use postings::FreqReadingOption;
use common::BinarySerializable;
use schema::FieldType;
use termdict::TermDictionary;
/// The inverted index reader is in charge of accessing
/// the inverted index associated to a specific field.
@@ -27,7 +27,7 @@ pub struct InvertedIndexReader {
postings_source: ReadOnlySource,
positions_source: ReadOnlySource,
record_option: IndexRecordOption,
total_num_tokens: u64
total_num_tokens: u64,
}
impl InvertedIndexReader {
@@ -45,7 +45,7 @@ impl InvertedIndexReader {
postings_source: postings_source.slice_from(8),
positions_source,
record_option,
total_num_tokens
total_num_tokens,
}
}
@@ -56,11 +56,11 @@ impl InvertedIndexReader {
.get_index_record_option()
.unwrap_or(IndexRecordOption::Basic);
InvertedIndexReader {
termdict: TermDictionary::empty(field_type),
termdict: TermDictionary::empty(field_type),
postings_source: ReadOnlySource::empty(),
positions_source: ReadOnlySource::empty(),
record_option,
total_num_tokens: 0u64
total_num_tokens: 0u64,
}
}
@@ -149,8 +149,6 @@ impl InvertedIndexReader {
self.total_num_tokens
}
/// Returns the segment postings associated with the term, and with the given option,
/// or `None` if the term has never been encountered and indexed.
///
@@ -166,12 +164,15 @@ impl InvertedIndexReader {
Some(self.read_postings_from_terminfo(&term_info, option))
}
pub(crate) fn read_postings_no_deletes(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
pub(crate) fn read_postings_no_deletes(
&self,
term: &Term,
option: IndexRecordOption,
) -> Option<SegmentPostings> {
let term_info = get!(self.get_term_info(term));
Some(self.read_postings_from_terminfo(&term_info, option))
}
/// Returns the number of documents containing the term.
pub fn doc_freq(&self, term: &Term) -> u32 {
self.get_term_info(term)
@@ -179,6 +180,3 @@ impl InvertedIndexReader {
.unwrap_or(0u32)
}
}

View File

@@ -1,24 +1,24 @@
pub mod searcher;
pub mod index;
mod segment_reader;
mod segment_id;
mod segment_component;
mod segment;
mod index_meta;
mod pool;
mod segment_meta;
mod inverted_index_reader;
mod pool;
pub mod searcher;
mod segment;
mod segment_component;
mod segment_id;
mod segment_meta;
mod segment_reader;
pub use self::index::Index;
pub use self::index_meta::IndexMeta;
pub use self::inverted_index_reader::InvertedIndexReader;
pub use self::searcher::Searcher;
pub use self::segment_component::SegmentComponent;
pub use self::segment_id::SegmentId;
pub use self::segment_reader::SegmentReader;
pub use self::segment::Segment;
pub use self::segment::SerializableSegment;
pub use self::index::Index;
pub use self::segment_component::SegmentComponent;
pub use self::segment_id::SegmentId;
pub use self::segment_meta::SegmentMeta;
pub use self::index_meta::IndexMeta;
pub use self::segment_reader::SegmentReader;
use std::path::PathBuf;

View File

@@ -1,8 +1,8 @@
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
use crossbeam::sync::MsQueue;
use std::mem;
use std::ops::{Deref, DerefMut};
use crossbeam::sync::MsQueue;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
use std::sync::Arc;
pub struct GenerationItem<T> {
@@ -114,8 +114,8 @@ impl<T> Drop for LeasedItem<T> {
#[cfg(test)]
mod tests {
use std::iter;
use super::Pool;
use std::iter;
#[test]
fn test_pool() {

View File

@@ -1,15 +1,15 @@
use Result;
use core::SegmentReader;
use schema::Document;
use collector::Collector;
use query::Query;
use DocAddress;
use schema::{Field, Term};
use termdict::TermMerger;
use std::sync::Arc;
use std::fmt;
use schema::Schema;
use core::InvertedIndexReader;
use core::SegmentReader;
use query::Query;
use schema::Document;
use schema::Schema;
use schema::{Field, Term};
use std::fmt;
use std::sync::Arc;
use termdict::TermMerger;
use DocAddress;
use Result;
/// Holds a list of `SegmentReader`s ready for search.
///
@@ -22,14 +22,11 @@ pub struct Searcher {
}
impl Searcher {
/// Creates a new `Searcher`
pub(crate) fn new(
schema: Schema,
segment_readers: Vec<SegmentReader>) -> Searcher {
pub(crate) fn new(schema: Schema, segment_readers: Vec<SegmentReader>) -> Searcher {
Searcher {
schema,
segment_readers
segment_readers,
}
}
/// Fetches a document from tantivy's store given a `DocAddress`.
@@ -109,7 +106,6 @@ impl FieldSearcher {
}
}
impl fmt::Debug for Searcher {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let segment_ids = self.segment_readers

View File

@@ -1,16 +1,16 @@
use Result;
use std::path::PathBuf;
use schema::Schema;
use std::fmt;
use core::SegmentId;
use directory::{FileProtection, ReadOnlySource, WritePtr};
use indexer::segment_serializer::SegmentSerializer;
use super::SegmentComponent;
use core::Index;
use std::result;
use directory::Directory;
use core::SegmentId;
use core::SegmentMeta;
use directory::error::{OpenReadError, OpenWriteError};
use directory::Directory;
use directory::{FileProtection, ReadOnlySource, WritePtr};
use indexer::segment_serializer::SegmentSerializer;
use schema::Schema;
use std::fmt;
use std::path::PathBuf;
use std::result;
use Result;
/// A segment is a piece of the index.
#[derive(Clone)]
@@ -111,8 +111,8 @@ mod tests {
use core::SegmentComponent;
use directory::Directory;
use std::collections::HashSet;
use schema::SchemaBuilder;
use std::collections::HashSet;
use Index;
#[test]

View File

@@ -1,6 +1,6 @@
use uuid::Uuid;
use std::fmt;
use std::cmp::{Ord, Ordering};
use std::fmt;
use uuid::Uuid;
#[cfg(test)]
use std::sync::atomic;

View File

@@ -1,7 +1,7 @@
use core::SegmentId;
use super::SegmentComponent;
use std::path::PathBuf;
use core::SegmentId;
use std::collections::HashSet;
use std::path::PathBuf;
#[derive(Clone, Debug, Serialize, Deserialize)]
struct DeleteMeta {

View File

@@ -1,30 +1,30 @@
use Result;
use core::Segment;
use core::SegmentId;
use core::SegmentComponent;
use std::sync::RwLock;
use common::HasLen;
use core::SegmentMeta;
use fastfield::{self, FastFieldNotAvailableError};
use fastfield::DeleteBitSet;
use store::StoreReader;
use schema::Document;
use DocId;
use std::sync::Arc;
use std::collections::HashMap;
use common::CompositeFile;
use std::fmt;
use common::HasLen;
use core::InvertedIndexReader;
use schema::Field;
use schema::FieldType;
use core::Segment;
use core::SegmentComponent;
use core::SegmentId;
use core::SegmentMeta;
use error::ErrorKind;
use fastfield::DeleteBitSet;
use fastfield::FacetReader;
use fastfield::FastFieldReader;
use schema::Schema;
use termdict::TermDictionary;
use fastfield::{self, FastFieldNotAvailableError};
use fastfield::{FastValue, MultiValueIntFastFieldReader};
use schema::Cardinality;
use fieldnorm::FieldNormReader;
use schema::Cardinality;
use schema::Document;
use schema::Field;
use schema::FieldType;
use schema::Schema;
use std::collections::HashMap;
use std::fmt;
use std::sync::Arc;
use std::sync::RwLock;
use store::StoreReader;
use termdict::TermDictionary;
use DocId;
use Result;
/// Entry point to access all of the datastructures of the `Segment`
///
@@ -109,12 +109,12 @@ impl SegmentReader {
) -> fastfield::Result<FastFieldReader<Item>> {
let field_entry = self.schema.get_field_entry(field);
if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue)
{
self.fast_fields_composite
.open_read(field)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)
} else {
{
self.fast_fields_composite
.open_read(field)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)
} else {
Err(FastFieldNotAvailableError::new(field_entry))
}
}
@@ -127,17 +127,17 @@ impl SegmentReader {
) -> fastfield::Result<MultiValueIntFastFieldReader<Item>> {
let field_entry = self.schema.get_field_entry(field);
if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues)
{
let idx_reader = self.fast_fields_composite
.open_read_with_idx(field, 0)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)?;
let vals_reader = self.fast_fields_composite
.open_read_with_idx(field, 1)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)?;
Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
} else {
{
let idx_reader = self.fast_fields_composite
.open_read_with_idx(field, 0)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)?;
let vals_reader = self.fast_fields_composite
.open_read_with_idx(field, 1)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)?;
Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
} else {
Err(FastFieldNotAvailableError::new(field_entry))
}
}
@@ -175,12 +175,14 @@ impl SegmentReader {
/// They are simply stored as a fast field, serialized in
/// the `.fieldnorm` file of the segment.
pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader {
if let Some(fieldnorm_source) = self.fieldnorms_composite
.open_read(field) {
if let Some(fieldnorm_source) = self.fieldnorms_composite.open_read(field) {
FieldNormReader::open(fieldnorm_source)
} else {
let field_name = self.schema.get_field_name(field);
let err_msg= format!("Field norm not found for field {:?}. Was it market as indexed during indexing.", field_name);
let err_msg = format!(
"Field norm not found for field {:?}. Was it market as indexed during indexing.",
field_name
);
panic!(err_msg);
}
}
@@ -215,13 +217,12 @@ impl SegmentReader {
let fieldnorms_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
let fieldnorms_composite = CompositeFile::open(&fieldnorms_data)?;
let delete_bitset_opt =
if segment.meta().has_deletes() {
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
Some(DeleteBitSet::open(delete_data))
} else {
None
};
let delete_bitset_opt = if segment.meta().has_deletes() {
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
Some(DeleteBitSet::open(delete_data))
} else {
None
};
let schema = segment.schema();
Ok(SegmentReader {

View File

@@ -1,10 +1,10 @@
#![allow(dead_code)]
mod skiplist_builder;
mod skiplist;
mod skiplist_builder;
pub use self::skiplist_builder::SkipListBuilder;
pub use self::skiplist::SkipList;
pub use self::skiplist_builder::SkipListBuilder;
#[cfg(test)]
mod tests {

View File

@@ -1,6 +1,6 @@
use common::{BinarySerializable, VInt};
use std::marker::PhantomData;
use std::cmp::max;
use std::marker::PhantomData;
static EMPTY: [u8; 0] = [];

View File

@@ -1,7 +1,7 @@
use std::io::Write;
use common::{BinarySerializable, VInt, is_power_of_2};
use std::marker::PhantomData;
use common::{is_power_of_2, BinarySerializable, VInt};
use std::io;
use std::io::Write;
use std::marker::PhantomData;
struct LayerBuilder<T: BinarySerializable> {
period_mask: usize,

View File

@@ -1,5 +1,5 @@
use std::mem;
use super::heap::{Heap, HeapAllocable};
use std::mem;
#[inline]
pub fn is_power_of_2(val: u32) -> bool {
@@ -99,8 +99,8 @@ impl<'a> Iterator for ExpUnrolledLinkedListIterator<'a> {
#[cfg(test)]
mod tests {
use super::*;
use super::super::heap::Heap;
use super::*;
#[test]
fn test_stack() {
@@ -120,14 +120,13 @@ mod tests {
}
}
}
#[cfg(all(test, feature="unstable"))]
#[cfg(all(test, feature = "unstable"))]
mod bench {
use test::Bencher;
use super::Heap;
use super::ExpUnrolledLinkedList;
use super::Heap;
use test::Bencher;
const NUM_STACK: usize = 10_000;
const STACK_SIZE: u32 = 1000;
@@ -166,4 +165,4 @@ mod bench {
heap.clear();
});
}
}
}

View File

@@ -1,7 +1,7 @@
use super::heap::{BytesRef, Heap, HeapAllocable};
use postings::UnorderedTermId;
use std::iter;
use std::mem;
use postings::UnorderedTermId;
use super::heap::{BytesRef, Heap, HeapAllocable};
mod murmurhash2 {
@@ -117,11 +117,7 @@ struct QuadraticProbing {
impl QuadraticProbing {
fn compute(hash: usize, mask: usize) -> QuadraticProbing {
QuadraticProbing {
hash,
i: 0,
mask,
}
QuadraticProbing { hash, i: 0, mask }
}
#[inline]
@@ -135,21 +131,18 @@ use std::slice;
pub struct Iter<'a: 'b, 'b> {
hashmap: &'b TermHashMap<'a>,
inner: slice::Iter<'a, usize>
inner: slice::Iter<'a, usize>,
}
impl<'a, 'b> Iterator for Iter<'a, 'b> {
type Item = (&'b [u8], u32, UnorderedTermId);
fn next(&mut self) -> Option<Self::Item> {
self.inner
.next()
.cloned()
.map(move |bucket: usize| {
let kv = self.hashmap.table[bucket];
let (key, offset): (&'b [u8], u32) = self.hashmap.get_key_value(kv.key_value_addr);
(key, offset, bucket as UnorderedTermId)
})
self.inner.next().cloned().map(move |bucket: usize| {
let kv = self.hashmap.table[bucket];
let (key, offset): (&'b [u8], u32) = self.hashmap.get_key_value(kv.key_value_addr);
(key, offset, bucket as UnorderedTermId)
})
}
}
@@ -183,14 +176,15 @@ impl<'a> TermHashMap<'a> {
pub fn set_bucket(&mut self, hash: u32, key_value_addr: BytesRef, bucket: usize) {
self.occupied.push(bucket);
self.table[bucket] = KeyValue {
key_value_addr, hash
key_value_addr,
hash,
};
}
pub fn iter<'b: 'a>(&'b self) -> Iter<'a, 'b> {
Iter {
inner: self.occupied.iter(),
hashmap: &self
hashmap: &self,
}
}
@@ -225,8 +219,8 @@ impl<'a> TermHashMap<'a> {
#[cfg(all(test, unstable))]
mod bench {
use test::Bencher;
use super::murmurhash2::murmurhash2;
use test::Bencher;
#[bench]
fn bench_murmurhash_2(b: &mut Bencher) {
@@ -246,11 +240,11 @@ mod bench {
#[cfg(test)]
mod tests {
use super::*;
use super::super::heap::{Heap, HeapAllocable};
use super::murmurhash2::murmurhash2;
use std::collections::HashSet;
use super::split_memory;
use super::*;
use std::collections::HashSet;
struct TestValue {
val: u32,
@@ -332,5 +326,4 @@ mod tests {
assert_eq!(set.len(), 10_000);
}
}

View File

@@ -1,7 +1,7 @@
use byteorder::{ByteOrder, NativeEndian};
use std::cell::UnsafeCell;
use std::mem;
use std::ptr;
use byteorder::{ByteOrder, NativeEndian};
/// `BytesRef` refers to a slice in tantivy's custom `Heap`.
///

View File

@@ -1,10 +1,10 @@
mod expull;
pub(crate) mod hashmap;
mod heap;
mod expull;
pub use self::heap::{Heap, HeapAllocable};
pub use self::expull::ExpUnrolledLinkedList;
pub use self::hashmap::TermHashMap;
pub use self::heap::{Heap, HeapAllocable};
#[test]
fn test_unrolled_linked_list() {

View File

@@ -1,11 +1,11 @@
use std::marker::Send;
use std::fmt;
use std::path::Path;
use directory::error::{DeleteError, OpenReadError, OpenWriteError};
use directory::{ReadOnlySource, WritePtr};
use std::result;
use std::fmt;
use std::io;
use std::marker::Send;
use std::marker::Sync;
use std::path::Path;
use std::result;
/// Write-once read many (WORM) abstraction for where
/// tantivy's data should be stored.

View File

@@ -1,7 +1,7 @@
use std::error::Error as StdError;
use std::path::PathBuf;
use std::io;
use std::fmt;
use std::io;
use std::path::PathBuf;
/// General IO error with an optional path to the offending file.
#[derive(Debug)]

View File

@@ -1,18 +1,18 @@
use std::path::{Path, PathBuf};
use serde_json;
use core::MANAGED_FILEPATH;
use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError};
use directory::{ReadOnlySource, WritePtr};
use std::result;
use std::io;
use Directory;
use std::sync::{Arc, RwLock};
use std::collections::HashSet;
use std::sync::RwLockWriteGuard;
use std::io::Write;
use core::MANAGED_FILEPATH;
use std::collections::HashMap;
use std::fmt;
use error::{ErrorKind, Result, ResultExt};
use serde_json;
use std::collections::HashMap;
use std::collections::HashSet;
use std::fmt;
use std::io;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::result;
use std::sync::RwLockWriteGuard;
use std::sync::{Arc, RwLock};
use Directory;
/// Wrapper of directories that keeps track of files created by Tantivy.
///
@@ -282,10 +282,10 @@ impl Clone for ManagedDirectory {
mod tests {
use super::*;
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
use directory::MmapDirectory;
use std::path::Path;
use std::io::Write;
use std::path::Path;
use tempdir::TempDir;
lazy_static! {
@@ -294,7 +294,7 @@ mod tests {
}
#[test]
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
fn test_managed_directory() {
let tempdir = TempDir::new("index").unwrap();
let tempdir_path = PathBuf::from(tempdir.path());
@@ -343,7 +343,7 @@ mod tests {
}
#[test]
#[cfg(feature="mmap ")]
#[cfg(feature = "mmap ")]
fn test_managed_directory_gc_while_mmapped() {
let tempdir = TempDir::new("index").unwrap();
let tempdir_path = PathBuf::from(tempdir.path());
@@ -373,7 +373,7 @@ mod tests {
}
#[test]
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
fn test_managed_directory_protect() {
let tempdir = TempDir::new("index").unwrap();
let tempdir_path = PathBuf::from(tempdir.path());

View File

@@ -1,17 +1,17 @@
use atomicwrites;
use common::make_io_err;
use directory::Directory;
use directory::error::{DeleteError, IOError, OpenDirectoryError, OpenReadError, OpenWriteError};
use directory::ReadOnlySource;
use directory::shared_vec_slice::SharedVecSlice;
use directory::Directory;
use directory::ReadOnlySource;
use directory::WritePtr;
use fst::raw::MmapReadOnly;
use std::collections::hash_map::Entry as HashMapEntry;
use std::collections::HashMap;
use std::convert::From;
use std::fmt;
use std::fs::{self, File};
use std::fs::OpenOptions;
use std::fs::{self, File};
use std::io::{self, Seek, SeekFrom};
use std::io::{BufWriter, Read, Write};
use std::path::{Path, PathBuf};

View File

@@ -4,29 +4,29 @@ WORM directory abstraction.
*/
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
mod mmap_directory;
mod ram_directory;
mod directory;
mod managed_directory;
mod ram_directory;
mod read_only_source;
mod shared_vec_slice;
mod managed_directory;
/// Errors specific to the directory module.
pub mod error;
use std::io::{BufWriter, Seek, Write};
pub use self::read_only_source::ReadOnlySource;
pub use self::directory::Directory;
pub use self::ram_directory::RAMDirectory;
pub use self::read_only_source::ReadOnlySource;
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
pub use self::mmap_directory::MmapDirectory;
pub(crate) use self::read_only_source::SourceRead;
pub(crate) use self::managed_directory::{FileProtection, ManagedDirectory};
pub(crate) use self::read_only_source::SourceRead;
/// Synonym of Seek + Write
pub trait SeekableWrite: Seek + Write {}
@@ -42,8 +42,8 @@ pub type WritePtr = BufWriter<Box<SeekableWrite>>;
mod tests {
use super::*;
use std::path::Path;
use std::io::{Seek, SeekFrom, Write};
use std::path::Path;
lazy_static! {
static ref TEST_PATH: &'static Path = Path::new("some_path_for_test");
@@ -56,7 +56,7 @@ mod tests {
}
#[test]
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
fn test_mmap_directory() {
let mut mmap_directory = MmapDirectory::create_from_tempdir().unwrap();
test_directory(&mut mmap_directory);

View File

@@ -1,14 +1,14 @@
use super::shared_vec_slice::SharedVecSlice;
use common::make_io_err;
use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError};
use directory::WritePtr;
use directory::{Directory, ReadOnlySource};
use std::collections::HashMap;
use std::fmt;
use std::io::{self, BufWriter, Cursor, Seek, SeekFrom, Write};
use std::path::{Path, PathBuf};
use std::result;
use std::sync::{Arc, RwLock};
use common::make_io_err;
use directory::{Directory, ReadOnlySource};
use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError};
use directory::WritePtr;
use super::shared_vec_slice::SharedVecSlice;
/// Writer associated with the `RAMDirectory`
///

View File

@@ -1,11 +1,11 @@
#[cfg(feature="mmap")]
use fst::raw::MmapReadOnly;
use std::ops::Deref;
use super::shared_vec_slice::SharedVecSlice;
use common::HasLen;
use std::slice;
use std::io::{self, Read};
#[cfg(feature = "mmap")]
use fst::raw::MmapReadOnly;
use stable_deref_trait::{CloneStableDeref, StableDeref};
use std::io::{self, Read};
use std::ops::Deref;
use std::slice;
/// Read object that represents files in tantivy.
///
@@ -15,7 +15,7 @@ use stable_deref_trait::{CloneStableDeref, StableDeref};
/// hold by this object should never be altered or destroyed.
pub enum ReadOnlySource {
/// Mmap source of data
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
Mmap(MmapReadOnly),
/// Wrapping a `Vec<u8>`
Anonymous(SharedVecSlice),
@@ -41,7 +41,7 @@ impl ReadOnlySource {
/// Returns the data underlying the ReadOnlySource object.
pub fn as_slice(&self) -> &[u8] {
match *self {
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
ReadOnlySource::Mmap(ref mmap_read_only) => unsafe { mmap_read_only.as_slice() },
ReadOnlySource::Anonymous(ref shared_vec) => shared_vec.as_slice(),
}
@@ -66,9 +66,14 @@ impl ReadOnlySource {
/// 1KB slice is remaining, the whole `500MBs`
/// are retained in memory.
pub fn slice(&self, from_offset: usize, to_offset: usize) -> ReadOnlySource {
assert!(from_offset <= to_offset, "Requested negative slice [{}..{}]", from_offset, to_offset);
assert!(
from_offset <= to_offset,
"Requested negative slice [{}..{}]",
from_offset,
to_offset
);
match *self {
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
ReadOnlySource::Mmap(ref mmap_read_only) => {
let sliced_mmap = mmap_read_only.range(from_offset, to_offset - from_offset);
ReadOnlySource::Mmap(sliced_mmap)
@@ -130,13 +135,11 @@ impl SourceRead {
pub fn slice_from(&self, start: usize) -> &[u8] {
&self.cursor[start..]
}
pub fn get(&self, idx: usize) -> u8 {
self.cursor[idx]
}
}
impl AsRef<[u8]> for SourceRead {

View File

@@ -1,8 +1,8 @@
use DocId;
use common::BitSet;
use std::borrow::Borrow;
use std::borrow::BorrowMut;
use std::cmp::Ordering;
use common::BitSet;
use DocId;
/// Expresses the outcome of a call to `DocSet`'s `.skip_next(...)`.
#[derive(PartialEq, Eq, Debug)]

View File

@@ -2,13 +2,13 @@
use std::io;
use std::path::PathBuf;
use std::sync::PoisonError;
use directory::error::{IOError, OpenDirectoryError, OpenReadError, OpenWriteError};
use fastfield::FastFieldNotAvailableError;
use query;
use schema;
use fastfield::FastFieldNotAvailableError;
use serde_json;
use std::path::PathBuf;
use std::sync::PoisonError;
error_chain!(
errors {

View File

@@ -1,10 +1,10 @@
use bit_set::BitSet;
use directory::WritePtr;
use std::io::Write;
use std::io;
use directory::ReadOnlySource;
use DocId;
use common::HasLen;
use directory::ReadOnlySource;
use directory::WritePtr;
use std::io;
use std::io::Write;
use DocId;
/// Write a delete `BitSet`
///
@@ -62,10 +62,8 @@ impl DeleteBitSet {
b & (1u8 << shift) != 0
}
}
}
impl HasLen for DeleteBitSet {
fn len(&self) -> usize {
self.len
@@ -74,10 +72,10 @@ impl HasLen for DeleteBitSet {
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use super::*;
use bit_set::BitSet;
use directory::*;
use super::*;
use std::path::PathBuf;
fn test_delete_bitset_helper(bitset: &BitSet) {
let test_path = PathBuf::from("test");

View File

@@ -1,5 +1,5 @@
use std::result;
use schema::FieldEntry;
use std::result;
/// `FastFieldNotAvailableError` is returned when the
/// user requested for a fast field reader, and the field was not

View File

@@ -1,8 +1,8 @@
use super::MultiValueIntFastFieldReader;
use DocId;
use termdict::TermOrdinal;
use schema::Facet;
use termdict::TermDictionary;
use termdict::TermOrdinal;
use DocId;
/// The facet reader makes it possible to access the list of
/// facets associated to a given document in a specific

View File

@@ -23,26 +23,26 @@ values stored.
Read access performance is comparable to that of an array lookup.
*/
pub use self::delete::write_delete_bitset;
pub use self::delete::DeleteBitSet;
pub use self::error::{FastFieldNotAvailableError, Result};
pub use self::facet_reader::FacetReader;
pub use self::multivalued::{MultiValueIntFastFieldReader, MultiValueIntFastFieldWriter};
pub use self::reader::FastFieldReader;
pub use self::serializer::FastFieldSerializer;
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
use common;
use schema::Cardinality;
use schema::FieldType;
use schema::Value;
pub use self::delete::DeleteBitSet;
pub use self::delete::write_delete_bitset;
pub use self::error::{FastFieldNotAvailableError, Result};
pub use self::facet_reader::FacetReader;
pub use self::multivalued::{MultiValueIntFastFieldWriter, MultiValueIntFastFieldReader};
pub use self::reader::FastFieldReader;
pub use self::serializer::FastFieldSerializer;
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
mod reader;
mod writer;
mod serializer;
mod error;
mod delete;
mod error;
mod facet_reader;
mod multivalued;
mod reader;
mod serializer;
mod writer;
/// Trait for types that are allowed for fast fields: (u64 or i64).
pub trait FastValue: Default + Clone + Copy {
@@ -121,19 +121,19 @@ fn value_to_u64(value: &Value) -> u64 {
#[cfg(test)]
mod tests {
use super::*;
use common::CompositeFile;
use directory::{Directory, RAMDirectory, WritePtr};
use fastfield::FastFieldReader;
use rand::Rng;
use rand::SeedableRng;
use rand::XorShiftRng;
use schema::{Schema, SchemaBuilder};
use schema::Document;
use schema::FAST;
use schema::Field;
use schema::FAST;
use schema::{Schema, SchemaBuilder};
use std::collections::HashMap;
use std::path::Path;
use super::*;
lazy_static! {
pub static ref SCHEMA: Schema = {
@@ -141,9 +141,7 @@ mod tests {
schema_builder.add_u64_field("field", FAST);
schema_builder.build()
};
pub static ref FIELD: Field = {
SCHEMA.get_field("field").unwrap()
};
pub static ref FIELD: Field = { SCHEMA.get_field("field").unwrap() };
}
#[test]
@@ -409,17 +407,17 @@ mod tests {
}
#[cfg(all(test, feature="unstable"))]
#[cfg(all(test, feature = "unstable"))]
mod bench {
use super::tests::{SCHEMA, generate_permutation};
use test::{self, Bencher};
use super::tests::FIELD;
use super::tests::{generate_permutation, SCHEMA};
use super::*;
use common::CompositeFile;
use directory::{Directory, RAMDirectory, WritePtr};
use fastfield::FastFieldReader;
use std::collections::HashMap;
use std::path::Path;
use super::*;
use test::{self, Bencher};
#[bench]
fn bench_intfastfield_linear_veclookup(b: &mut Bencher) {
@@ -515,4 +513,4 @@ mod bench {
}
}
}
}

View File

@@ -1,15 +1,15 @@
mod writer;
mod reader;
mod writer;
pub use self::writer::MultiValueIntFastFieldWriter;
pub use self::reader::MultiValueIntFastFieldReader;
pub use self::writer::MultiValueIntFastFieldWriter;
#[cfg(test)]
mod tests {
use schema::SchemaBuilder;
use schema::Cardinality;
use schema::IntOptions;
use schema::SchemaBuilder;
use Index;
#[test]

View File

@@ -1,5 +1,5 @@
use DocId;
use fastfield::{FastFieldReader, FastValue};
use DocId;
/// Reader for a multivalued `u64` fast field.
///

View File

@@ -1,12 +1,12 @@
use fastfield::FastFieldSerializer;
use fastfield::serializer::FastSingleFieldSerializer;
use fastfield::value_to_u64;
use std::collections::HashMap;
use DocId;
use fastfield::FastFieldSerializer;
use itertools::Itertools;
use postings::UnorderedTermId;
use schema::{Document, Field};
use std::collections::HashMap;
use std::io;
use itertools::Itertools;
use DocId;
/// Writer for multi-valued (as in, more than one value per document)
/// int fast field.
@@ -37,7 +37,6 @@ pub struct MultiValueIntFastFieldWriter {
}
impl MultiValueIntFastFieldWriter {
/// Creates a new `IntFastFieldWriter`
pub(crate) fn new(field: Field, is_facet: bool) -> Self {
MultiValueIntFastFieldWriter {
@@ -68,7 +67,7 @@ impl MultiValueIntFastFieldWriter {
pub fn add_document(&mut self, doc: &Document) {
self.next_doc();
// facets are indexed in the `SegmentWriter` as we encode their unordered id.
if !self.is_facet {
if !self.is_facet {
for field_value in doc.field_values() {
if field_value.field() == self.field {
self.add_val(value_to_u64(field_value.value()));

View File

@@ -1,19 +1,19 @@
use common::BinarySerializable;
use super::FastValue;
use common::bitpacker::BitUnpacker;
use common::CompositeFile;
use common::compute_num_bits;
use directory::{Directory, RAMDirectory, WritePtr};
use common::BinarySerializable;
use common::CompositeFile;
use directory::ReadOnlySource;
use DocId;
use directory::{Directory, RAMDirectory, WritePtr};
use fastfield::{FastFieldSerializer, FastFieldsWriter};
use owning_ref::OwningRef;
use schema::FAST;
use schema::SchemaBuilder;
use schema::FAST;
use std::collections::HashMap;
use std::marker::PhantomData;
use std::mem;
use std::path::Path;
use super::FastValue;
use DocId;
/// Trait for accessing a fastfield.
///

View File

@@ -1,10 +1,10 @@
use common::BinarySerializable;
use directory::WritePtr;
use schema::Field;
use common::bitpacker::BitPacker;
use common::compute_num_bits;
use common::CountingWriter;
use common::BinarySerializable;
use common::CompositeWrite;
use common::CountingWriter;
use directory::WritePtr;
use schema::Field;
use std::io::{self, Write};
/// `FastFieldSerializer` is in charge of serializing

View File

@@ -1,13 +1,13 @@
use schema::{Cardinality, Document, Field, Schema};
use fastfield::FastFieldSerializer;
use std::io;
use schema::FieldType;
use common;
use common::VInt;
use std::collections::HashMap;
use postings::UnorderedTermId;
use super::multivalued::MultiValueIntFastFieldWriter;
use common;
use common::BinarySerializable;
use common::VInt;
use fastfield::FastFieldSerializer;
use postings::UnorderedTermId;
use schema::FieldType;
use schema::{Cardinality, Document, Field, Schema};
use std::collections::HashMap;
use std::io;
/// The fastfieldswriter regroup all of the fast field writers.
pub struct FastFieldsWriter {

View File

@@ -1,10 +1,8 @@
#[inline(always)]
pub fn id_to_fieldnorm(id: u8) -> u32 {
FIELD_NORMS_TABLE[id as usize]
}
#[inline(always)]
pub fn fieldnorm_to_id(fieldnorm: u32) -> u8 {
FIELD_NORMS_TABLE
@@ -12,45 +10,34 @@ pub fn fieldnorm_to_id(fieldnorm: u32) -> u8 {
.unwrap_or_else(|idx| idx - 1) as u8
}
pub const FIELD_NORMS_TABLE: [u32; 256] = [
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54,
56, 60, 64, 68, 72, 76, 80, 84, 88, 96, 104, 112, 120, 128, 136, 144,
152, 168, 184, 200, 216, 232, 248, 264, 280, 312, 344, 376, 408, 440, 472, 504,
536, 600, 664, 728, 792, 856, 920, 984,
1048, 1176, 1304, 1432, 1560, 1688, 1816, 1944,
2072, 2328, 2584, 2840, 3096, 3352, 3608, 3864, 4120,
4632, 5144, 5656, 6168, 6680, 7192, 7704, 8216, 9240,
10264, 11288, 12312, 13336, 14360, 15384,
16408, 18456, 20504, 22552, 24600, 26648, 28696, 30744,
32792, 36888, 40984, 45080, 49176, 53272, 57368, 61464,
65560, 73752, 81944, 90136, 98328, 106520, 114712, 122904, 131096, 147480,
163864, 180248, 196632, 213016, 229400, 245784, 262168,
294936, 327704, 360472, 393240, 426008, 458776,
491544, 524312, 589848, 655384, 720920, 786456, 851992, 917528,
983064, 1048600, 1179672, 1310744, 1441816, 1572888, 1703960, 1835032,
1966104, 2097176, 2359320, 2621464, 2883608, 3145752, 3407896, 3670040, 3932184,
4194328, 4718616, 5242904, 5767192, 6291480, 6815768, 7340056, 7864344, 8388632, 9437208,
10485784, 11534360, 12582936, 13631512, 14680088, 15728664, 16777240, 18874392, 20971544,
23068696, 25165848, 27263000, 29360152, 31457304, 33554456, 37748760, 41943064,
46137368, 50331672, 54525976, 58720280, 62914584, 67108888, 75497496, 83886104,
92274712, 100663320, 109051928, 117440536, 125829144, 134217752, 150994968, 167772184,
184549400, 201326616, 218103832, 234881048, 251658264, 268435480, 301989912, 335544344,
369098776, 402653208, 436207640, 469762072, 503316504, 536870936, 603979800, 671088664,
738197528, 805306392, 872415256, 939524120, 1006632984, 1073741848, 1207959576, 1342177304,
1476395032, 1610612760, 1744830488, 1879048216, 2013265944
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, 56, 60,
64, 68, 72, 76, 80, 84, 88, 96, 104, 112, 120, 128, 136, 144, 152, 168, 184, 200, 216, 232,
248, 264, 280, 312, 344, 376, 408, 440, 472, 504, 536, 600, 664, 728, 792, 856, 920, 984, 1048,
1176, 1304, 1432, 1560, 1688, 1816, 1944, 2072, 2328, 2584, 2840, 3096, 3352, 3608, 3864, 4120,
4632, 5144, 5656, 6168, 6680, 7192, 7704, 8216, 9240, 10264, 11288, 12312, 13336, 14360, 15384,
16408, 18456, 20504, 22552, 24600, 26648, 28696, 30744, 32792, 36888, 40984, 45080, 49176,
53272, 57368, 61464, 65560, 73752, 81944, 90136, 98328, 106520, 114712, 122904, 131096, 147480,
163864, 180248, 196632, 213016, 229400, 245784, 262168, 294936, 327704, 360472, 393240, 426008,
458776, 491544, 524312, 589848, 655384, 720920, 786456, 851992, 917528, 983064, 1048600,
1179672, 1310744, 1441816, 1572888, 1703960, 1835032, 1966104, 2097176, 2359320, 2621464,
2883608, 3145752, 3407896, 3670040, 3932184, 4194328, 4718616, 5242904, 5767192, 6291480,
6815768, 7340056, 7864344, 8388632, 9437208, 10485784, 11534360, 12582936, 13631512, 14680088,
15728664, 16777240, 18874392, 20971544, 23068696, 25165848, 27263000, 29360152, 31457304,
33554456, 37748760, 41943064, 46137368, 50331672, 54525976, 58720280, 62914584, 67108888,
75497496, 83886104, 92274712, 100663320, 109051928, 117440536, 125829144, 134217752, 150994968,
167772184, 184549400, 201326616, 218103832, 234881048, 251658264, 268435480, 301989912,
335544344, 369098776, 402653208, 436207640, 469762072, 503316504, 536870936, 603979800,
671088664, 738197528, 805306392, 872415256, 939524120, 1006632984, 1073741848, 1207959576,
1342177304, 1476395032, 1610612760, 1744830488, 1879048216, 2013265944,
];
#[cfg(test)]
mod tests {
use super::{fieldnorm_to_id, id_to_fieldnorm, FIELD_NORMS_TABLE};
#[test]
fn test_decode_code() {
assert_eq!(fieldnorm_to_id(0), 0);
@@ -103,4 +90,4 @@ mod tests {
assert_eq!(FIELD_NORMS_TABLE[i], decode_fieldnorm_byte(i as u8));
}
}
}
}

View File

@@ -17,13 +17,12 @@
//!
//! This trick is used by the [BM25 similarity]().
mod code;
mod reader;
mod serializer;
mod writer;
mod reader;
pub use self::reader::FieldNormReader;
pub use self::writer::FieldNormsWriter;
pub use self::serializer::FieldNormsSerializer;
pub use self::writer::FieldNormsWriter;
use self::code::{fieldnorm_to_id, id_to_fieldnorm};

View File

@@ -1,8 +1,7 @@
use super::{id_to_fieldnorm, fieldnorm_to_id};
use super::{fieldnorm_to_id, id_to_fieldnorm};
use directory::ReadOnlySource;
use DocId;
/// Reads the fieldnorm associated to a document.
/// The fieldnorm represents the length associated to
/// a given Field of a given document.
@@ -21,16 +20,13 @@ use DocId;
/// precompute computationally expensive functions of the fieldnorm
/// in a very short array.
pub struct FieldNormReader {
data: ReadOnlySource
data: ReadOnlySource,
}
impl FieldNormReader {
/// Opens a field norm reader given its data source.
pub fn open(data: ReadOnlySource) -> Self {
FieldNormReader {
data
}
FieldNormReader { data }
}
/// Returns the `fieldnorm` associated to a doc id.
@@ -71,12 +67,13 @@ impl FieldNormReader {
#[cfg(test)]
impl From<Vec<u32>> for FieldNormReader {
fn from(field_norms: Vec<u32>) -> FieldNormReader {
let field_norms_id = field_norms.into_iter()
let field_norms_id = field_norms
.into_iter()
.map(FieldNormReader::fieldnorm_to_id)
.collect::<Vec<u8>>();
let field_norms_data = ReadOnlySource::from(field_norms_id);
FieldNormReader {
data: field_norms_data
data: field_norms_data,
}
}
}
}

View File

@@ -1,26 +1,21 @@
use directory::WritePtr;
use std::io;
use common::CompositeWrite;
use directory::WritePtr;
use schema::Field;
use std::io;
use std::io::Write;
pub struct FieldNormsSerializer {
composite_write: CompositeWrite,
}
impl FieldNormsSerializer {
/// Constructor
pub fn from_write(write: WritePtr) -> io::Result<FieldNormsSerializer> {
// just making room for the pointer to header.
let composite_write = CompositeWrite::wrap(write);
Ok(FieldNormsSerializer {
composite_write
})
Ok(FieldNormsSerializer { composite_write })
}
pub fn serialize_field(&mut self, field: Field, fieldnorms_data: &[u8]) -> io::Result<()> {
let write = self.composite_write.for_field(field);
write.write_all(fieldnorms_data)?;
@@ -32,6 +27,4 @@ impl FieldNormsSerializer {
self.composite_write.close()?;
Ok(())
}
}

View File

@@ -1,26 +1,23 @@
use DocId;
use schema::Field;
use super::FieldNormsSerializer;
use std::io;
use schema::Schema;
use super::fieldnorm_to_id;
use super::FieldNormsSerializer;
use schema::Field;
use schema::Schema;
use std::io;
pub struct FieldNormsWriter {
fields: Vec<Field>,
fieldnorms_buffer: Vec<Vec<u8>>
fieldnorms_buffer: Vec<Vec<u8>>,
}
impl FieldNormsWriter {
pub fn fields_with_fieldnorm(schema: &Schema) -> Vec<Field> {
schema
.fields()
.iter()
.enumerate()
.filter(|&(_, field_entry)| {
field_entry.is_indexed()
})
.filter(|&(_, field_entry)| field_entry.is_indexed())
.map(|(field, _)| Field(field as u32))
.collect::<Vec<Field>>()
}
@@ -35,9 +32,7 @@ impl FieldNormsWriter {
.unwrap_or(0);
FieldNormsWriter {
fields,
fieldnorms_buffer: (0..max_field)
.map(|_| Vec::new())
.collect::<Vec<_>>()
fieldnorms_buffer: (0..max_field).map(|_| Vec::new()).collect::<Vec<_>>(),
}
}
@@ -49,7 +44,10 @@ impl FieldNormsWriter {
pub fn record(&mut self, doc: DocId, field: Field, fieldnorm: u32) {
let fieldnorm_buffer: &mut Vec<u8> = &mut self.fieldnorms_buffer[field.0 as usize];
assert!(fieldnorm_buffer.len() <= doc as usize, "Cannot register a given fieldnorm twice");
assert!(
fieldnorm_buffer.len() <= doc as usize,
"Cannot register a given fieldnorm twice"
);
// we fill intermediary `DocId` as having a fieldnorm of 0.
fieldnorm_buffer.resize(doc as usize + 1, 0u8);
fieldnorm_buffer[doc as usize] = fieldnorm_to_id(fieldnorm);
@@ -62,4 +60,4 @@ impl FieldNormsWriter {
}
Ok(())
}
}
}

View File

@@ -1,10 +1,10 @@
use std::collections::HashSet;
use rand::thread_rng;
use std::collections::HashSet;
use rand::distributions::{IndependentSample, Range};
use schema::*;
use Index;
use Searcher;
use rand::distributions::{IndependentSample, Range};
fn check_index_content(searcher: &Searcher, vals: &HashSet<u64>) {
assert!(searcher.segment_readers().len() < 20);
@@ -13,7 +13,7 @@ fn check_index_content(searcher: &Searcher, vals: &HashSet<u64>) {
#[test]
#[ignore]
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
fn test_indexing() {
let mut schema_builder = SchemaBuilder::default();

View File

@@ -1,7 +1,7 @@
use super::operation::DeleteOperation;
use std::sync::{Arc, RwLock};
use std::mem;
use std::ops::DerefMut;
use std::sync::{Arc, RwLock};
// The DeleteQueue is similar in conceptually to a multiple
// consumer single producer broadcast channel.

View File

@@ -1,6 +1,6 @@
use Directory;
use directory::error::OpenWriteError;
use core::LOCKFILE_FILEPATH;
use directory::error::OpenWriteError;
use Directory;
/// The directory lock is a mechanism used to
/// prevent the creation of two [`IndexWriter`](struct.IndexWriter.html)

View File

@@ -1,3 +1,6 @@
use super::operation::AddOperation;
use super::segment_updater::SegmentUpdater;
use super::PreparedCommit;
use bit_set::BitSet;
use chan;
use core::Index;
@@ -6,31 +9,28 @@ use core::SegmentComponent;
use core::SegmentId;
use core::SegmentMeta;
use core::SegmentReader;
use indexer::stamper::Stamper;
use futures::sync::oneshot::Receiver;
use datastruct::stacker::hashmap::split_memory;
use datastruct::stacker::Heap;
use directory::FileProtection;
use docset::DocSet;
use error::{Error, ErrorKind, Result, ResultExt};
use fastfield::write_delete_bitset;
use futures::sync::oneshot::Receiver;
use indexer::delete_queue::{DeleteCursor, DeleteQueue};
use datastruct::stacker::hashmap::split_memory;
use indexer::doc_opstamp_mapping::DocToOpstampMapping;
use indexer::MergePolicy;
use indexer::operation::DeleteOperation;
use indexer::stamper::Stamper;
use indexer::DirectoryLock;
use indexer::MergePolicy;
use indexer::SegmentEntry;
use indexer::SegmentWriter;
use docset::DocSet;
use schema::IndexRecordOption;
use schema::Document;
use schema::IndexRecordOption;
use schema::Term;
use std::mem;
use std::mem::swap;
use std::thread::JoinHandle;
use indexer::DirectoryLock;
use super::operation::AddOperation;
use super::segment_updater::SegmentUpdater;
use super::PreparedCommit;
use std::thread;
use std::thread::JoinHandle;
// Size of the margin for the heap. A segment is closed when the remaining memory
// in the heap goes below MARGIN_IN_BYTES.
@@ -443,10 +443,7 @@ impl IndexWriter {
}
/// Merges a given list of segments
pub fn merge(
&mut self,
segment_ids: &[SegmentId],
) -> Receiver<SegmentMeta> {
pub fn merge(&mut self, segment_ids: &[SegmentId]) -> Receiver<SegmentMeta> {
self.segment_updater.start_merge(segment_ids)
}
@@ -642,12 +639,12 @@ impl IndexWriter {
#[cfg(test)]
mod tests {
use env_logger;
use error::*;
use indexer::NoMergePolicy;
use schema::{self, Document};
use Index;
use Term;
use error::*;
use env_logger;
#[test]
fn test_lockfile_stops_duplicates() {

View File

@@ -99,8 +99,8 @@ impl Default for LogMergePolicy {
#[cfg(test)]
mod tests {
use super::*;
use indexer::merge_policy::MergePolicy;
use core::{SegmentId, SegmentMeta};
use indexer::merge_policy::MergePolicy;
fn test_merge_policy() -> LogMergePolicy {
let mut log_merge_policy = LogMergePolicy::default();

View File

@@ -1,7 +1,7 @@
use core::SegmentId;
use core::SegmentMeta;
use std::marker;
use std::fmt::Debug;
use std::marker;
/// Set of segment suggested for a merge.
#[derive(Debug, Clone)]

View File

@@ -1,24 +1,23 @@
use error::{ErrorKind, Result};
use core::SegmentReader;
use core::Segment;
use DocId;
use core::SegmentReader;
use core::SerializableSegment;
use indexer::SegmentSerializer;
use postings::InvertedIndexSerializer;
use itertools::Itertools;
use docset::DocSet;
use error::{ErrorKind, Result};
use fastfield::DeleteBitSet;
use schema::{Field, Schema};
use termdict::TermMerger;
use fastfield::FastFieldSerializer;
use fastfield::FastFieldReader;
use store::StoreWriter;
use std::cmp::{max, min};
use fastfield::FastFieldSerializer;
use fieldnorm::FieldNormReader;
use fieldnorm::FieldNormsSerializer;
use fieldnorm::FieldNormsWriter;
use fieldnorm::FieldNormReader;
use indexer::SegmentSerializer;
use itertools::Itertools;
use postings::InvertedIndexSerializer;
use postings::Postings;
use schema::{Field, Schema};
use std::cmp::{max, min};
use store::StoreWriter;
use termdict::TermMerger;
use DocId;
fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 {
let mut total_tokens = 0u64;
@@ -38,15 +37,17 @@ fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 {
total_tokens += reader.inverted_index(field).total_num_tokens();
}
}
total_tokens + count
.iter()
.cloned()
.enumerate()
.map(|(fieldnorm_ord, count)| count as u64 * FieldNormReader::id_to_fieldnorm(fieldnorm_ord as u8) as u64)
.sum::<u64>()
total_tokens
+ count
.iter()
.cloned()
.enumerate()
.map(|(fieldnorm_ord, count)| {
count as u64 * FieldNormReader::id_to_fieldnorm(fieldnorm_ord as u8) as u64
})
.sum::<u64>()
}
pub struct IndexMerger {
schema: Schema,
readers: Vec<SegmentReader>,
@@ -70,7 +71,6 @@ fn compute_min_max_val(
.map(|doc_id| u64_reader.get(doc_id))
.minmax()
.into_option()
}
None => {
// no deleted documents,
@@ -162,7 +162,7 @@ impl IndexMerger {
if let Some((seg_min_val, seg_max_val)) = compute_min_max_val(
&u64_reader,
reader.max_doc(),
reader.delete_bitset()
reader.delete_bitset(),
) {
// the segment has some non-deleted documents
min_val = min(min_val, seg_min_val);
@@ -176,8 +176,10 @@ impl IndexMerger {
}
Err(_) => {
let fieldname = self.schema.get_field_name(field);
let error_msg =
format!("Failed to find a fast field reader for field {:?}", fieldname);
let error_msg = format!(
"Failed to find a fast field reader for field {:?}",
fieldname
);
bail!(ErrorKind::SchemaError(error_msg));
}
}
@@ -211,7 +213,6 @@ impl IndexMerger {
}
fn write_postings(&self, serializer: &mut InvertedIndexSerializer) -> Result<()> {
let mut positions_buffer: Vec<u32> = Vec::with_capacity(1_000);
let mut delta_computer = DeltaComputer::new();
@@ -318,7 +319,7 @@ impl IndexMerger {
for (segment_ord, mut segment_postings) in segment_postings {
let old_to_new_doc_id = &merged_doc_id_map[segment_ord];
loop {
let doc = segment_postings.doc();
let doc = segment_postings.doc();
// `.advance()` has been called once before the loop.
//
@@ -335,7 +336,8 @@ impl IndexMerger {
let term_freq = segment_postings.term_freq();
segment_postings.positions(&mut positions_buffer);
let delta_positions = delta_computer.compute_delta(&positions_buffer);
let delta_positions =
delta_computer.compute_delta(&positions_buffer);
field_serializer.write_doc(
remapped_doc_id,
term_freq,
@@ -389,21 +391,21 @@ impl SerializableSegment for IndexMerger {
#[cfg(test)]
mod tests {
use schema;
use schema::Document;
use schema::Term;
use schema::TextFieldIndexing;
use query::TermQuery;
use schema::Field;
use core::Index;
use Searcher;
use DocAddress;
use collector::tests::FastFieldTestCollector;
use collector::tests::TestCollector;
use query::BooleanQuery;
use schema::IndexRecordOption;
use schema::Cardinality;
use core::Index;
use futures::Future;
use query::BooleanQuery;
use query::TermQuery;
use schema;
use schema::Cardinality;
use schema::Document;
use schema::Field;
use schema::IndexRecordOption;
use schema::Term;
use schema::TextFieldIndexing;
use DocAddress;
use Searcher;
#[test]
fn test_index_merger_no_deletes() {

View File

@@ -1,29 +1,29 @@
pub mod index_writer;
pub mod segment_serializer;
pub mod merger;
pub mod merge_policy;
mod log_merge_policy;
mod segment_register;
mod segment_writer;
mod segment_manager;
pub mod delete_queue;
pub mod segment_updater;
mod directory_lock;
mod segment_entry;
mod doc_opstamp_mapping;
pub mod index_writer;
mod log_merge_policy;
pub mod merge_policy;
pub mod merger;
pub mod operation;
mod stamper;
mod prepared_commit;
mod segment_entry;
mod segment_manager;
mod segment_register;
pub mod segment_serializer;
pub mod segment_updater;
mod segment_writer;
mod stamper;
pub use self::prepared_commit::PreparedCommit;
pub use self::segment_entry::{SegmentEntry, SegmentState};
pub use self::segment_serializer::SegmentSerializer;
pub use self::segment_writer::SegmentWriter;
pub(crate) use self::directory_lock::DirectoryLock;
pub use self::index_writer::IndexWriter;
pub use self::log_merge_policy::LogMergePolicy;
pub use self::merge_policy::{MergeCandidate, MergePolicy, NoMergePolicy};
pub use self::prepared_commit::PreparedCommit;
pub use self::segment_entry::{SegmentEntry, SegmentState};
pub use self::segment_manager::SegmentManager;
pub(crate) use self::directory_lock::DirectoryLock;
pub use self::segment_serializer::SegmentSerializer;
pub use self::segment_writer::SegmentWriter;
/// Alias for the default merge policy, which is the `LogMergePolicy`.
pub type DefaultMergePolicy = LogMergePolicy;

View File

@@ -1,5 +1,5 @@
use Result;
use super::IndexWriter;
use Result;
/// A prepared commit
pub struct PreparedCommit<'a> {
@@ -13,7 +13,7 @@ impl<'a> PreparedCommit<'a> {
PreparedCommit {
index_writer,
payload: None,
opstamp
opstamp,
}
}

View File

@@ -1,7 +1,7 @@
use core::SegmentMeta;
use bit_set::BitSet;
use indexer::delete_queue::DeleteCursor;
use core::SegmentId;
use core::SegmentMeta;
use indexer::delete_queue::DeleteCursor;
use std::fmt;
#[derive(Clone, Copy, PartialEq, Eq, Debug)]

View File

@@ -1,14 +1,14 @@
use super::segment_register::SegmentRegister;
use std::sync::RwLock;
use core::SegmentId;
use core::SegmentMeta;
use core::{LOCKFILE_FILEPATH, META_FILEPATH};
use core::SegmentId;
use indexer::SegmentEntry;
use std::path::PathBuf;
use std::collections::hash_set::HashSet;
use std::sync::{RwLockReadGuard, RwLockWriteGuard};
use std::fmt::{self, Debug, Formatter};
use indexer::delete_queue::DeleteCursor;
use indexer::SegmentEntry;
use std::collections::hash_set::HashSet;
use std::fmt::{self, Debug, Formatter};
use std::path::PathBuf;
use std::sync::RwLock;
use std::sync::{RwLockReadGuard, RwLockWriteGuard};
#[derive(Default)]
struct SegmentRegisters {

View File

@@ -1,10 +1,10 @@
use core::SegmentId;
use std::collections::HashMap;
use core::SegmentMeta;
use indexer::delete_queue::DeleteCursor;
use indexer::segment_entry::SegmentEntry;
use std::collections::HashMap;
use std::fmt;
use std::fmt::{Debug, Formatter};
use indexer::segment_entry::SegmentEntry;
use indexer::delete_queue::DeleteCursor;
/// The segment register keeps track
/// of the list of segment, their size as well
@@ -113,11 +113,11 @@ impl SegmentRegister {
#[cfg(test)]
mod tests {
use indexer::SegmentState;
use super::*;
use core::SegmentId;
use core::SegmentMeta;
use indexer::delete_queue::*;
use super::*;
use indexer::SegmentState;
fn segment_ids(segment_register: &SegmentRegister) -> Vec<SegmentId> {
segment_register

View File

@@ -3,9 +3,9 @@ use Result;
use core::Segment;
use core::SegmentComponent;
use fastfield::FastFieldSerializer;
use store::StoreWriter;
use fieldnorm::FieldNormsSerializer;
use postings::InvertedIndexSerializer;
use store::StoreWriter;
/// Segment serializer is in charge of laying out on disk
/// the data accumulated and sorted by the `SegmentWriter`.
@@ -47,7 +47,7 @@ impl SegmentSerializer {
}
/// Accessor to the field norm serializer.
pub fn get_fieldnorms_serializer(&mut self) -> &mut FieldNormsSerializer {
pub fn get_fieldnorms_serializer(&mut self) -> &mut FieldNormsSerializer {
&mut self.fieldnorms_serializer
}

View File

@@ -1,40 +1,40 @@
use super::segment_manager::{get_mergeable_segments, SegmentManager};
use core::Index;
use core::IndexMeta;
use core::META_FILEPATH;
use core::Segment;
use core::SegmentId;
use core::SegmentMeta;
use core::SerializableSegment;
use core::META_FILEPATH;
use directory::Directory;
use indexer::stamper::Stamper;
use directory::FileProtection;
use error::{Error, ErrorKind, Result};
use futures_cpupool::CpuPool;
use futures::Future;
use futures::oneshot;
use futures::sync::oneshot::Receiver;
use directory::FileProtection;
use indexer::{DefaultMergePolicy, MergePolicy};
use futures::Future;
use futures_cpupool::CpuFuture;
use futures_cpupool::CpuPool;
use indexer::delete_queue::DeleteCursor;
use indexer::index_writer::advance_deletes;
use indexer::MergeCandidate;
use indexer::merger::IndexMerger;
use indexer::stamper::Stamper;
use indexer::MergeCandidate;
use indexer::SegmentEntry;
use indexer::SegmentSerializer;
use futures_cpupool::CpuFuture;
use serde_json;
use indexer::delete_queue::DeleteCursor;
use indexer::{DefaultMergePolicy, MergePolicy};
use schema::Schema;
use serde_json;
use std::borrow::BorrowMut;
use std::collections::HashMap;
use std::io::Write;
use std::mem;
use std::ops::DerefMut;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicUsize};
use std::sync::atomic::Ordering;
use std::sync::atomic::{AtomicBool, AtomicUsize};
use std::sync::Arc;
use std::sync::RwLock;
use std::thread;
use std::thread::JoinHandle;
use super::segment_manager::{get_mergeable_segments, SegmentManager};
/// Save the index meta file.
/// This operation is atomic :
@@ -283,10 +283,7 @@ impl SegmentUpdater {
}).wait()
}
pub fn start_merge(
&self,
segment_ids: &[SegmentId],
) -> Receiver<SegmentMeta> {
pub fn start_merge(&self, segment_ids: &[SegmentId]) -> Receiver<SegmentMeta> {
self.0.segment_manager.start_merge(segment_ids);
let segment_updater_clone = self.clone();
@@ -482,9 +479,9 @@ impl SegmentUpdater {
#[cfg(test)]
mod tests {
use Index;
use schema::*;
use indexer::merge_policy::tests::MergeWheneverPossible;
use schema::*;
use Index;
#[test]
fn test_delete_during_merge() {

View File

@@ -1,23 +1,23 @@
use Result;
use DocId;
use std::io;
use std::str;
use schema::Schema;
use schema::Term;
use super::operation::AddOperation;
use core::Segment;
use core::SerializableSegment;
use fastfield::FastFieldsWriter;
use schema::FieldType;
use indexer::segment_serializer::SegmentSerializer;
use datastruct::stacker::Heap;
use fastfield::FastFieldsWriter;
use fieldnorm::FieldNormsWriter;
use indexer::index_writer::MARGIN_IN_BYTES;
use super::operation::AddOperation;
use indexer::segment_serializer::SegmentSerializer;
use postings::MultiFieldPostingsWriter;
use schema::FieldType;
use schema::Schema;
use schema::Term;
use schema::Value;
use std::io;
use std::str;
use tokenizer::BoxedTokenizer;
use tokenizer::FacetTokenizer;
use tokenizer::{TokenStream, Tokenizer};
use schema::Value;
use fieldnorm::FieldNormsWriter;
use DocId;
use Result;
/// A `SegmentWriter` is in charge of creating segment index from a
/// documents.
@@ -35,7 +35,6 @@ pub struct SegmentWriter<'a> {
tokenizers: Vec<Option<Box<BoxedTokenizer>>>,
}
impl<'a> SegmentWriter<'a> {
/// Creates a new `SegmentWriter`
///
@@ -179,8 +178,7 @@ impl<'a> SegmentWriter<'a> {
} else {
0
};
self.fieldnorms_writer
.record(doc_id, field, num_tokens);
self.fieldnorms_writer.record(doc_id, field, num_tokens);
}
FieldType::U64(ref int_option) => {
if int_option.is_indexed() {

View File

@@ -2,11 +2,11 @@
// For the moment let's just use AtomicUsize on
// x86/64 bit platform, and a mutex on other platform.
#[cfg(target="x86_64")]
#[cfg(target = "x86_64")]
mod archicture_impl {
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
#[derive(Clone, Default)]
pub struct Stamper(Arc<AtomicU64>);
@@ -22,8 +22,7 @@ mod archicture_impl {
}
}
#[cfg(not(target="x86_64"))]
#[cfg(not(target = "x86_64"))]
mod archicture_impl {
use std::sync::{Arc, Mutex};
@@ -47,7 +46,6 @@ mod archicture_impl {
pub use self::archicture_impl::Stamper;
#[cfg(test)]
mod test {
@@ -65,4 +63,4 @@ mod test {
assert_eq!(stamper.stamp(), 10u64);
assert_eq!(stamper_clone.stamp(), 11u64);
}
}
}

View File

@@ -1,8 +1,7 @@
#![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")]
#![cfg_attr(feature = "cargo-clippy", allow(module_inception))]
#![cfg_attr(feature = "cargo-clippy", allow(inline_always))]
#![cfg_attr(all(feature="unstable", test), feature(test))]
#![cfg_attr(all(feature = "unstable", test), feature(test))]
#![doc(test(attr(allow(unused_variables), deny(warnings))))]
#![allow(unknown_lints)]
#![allow(new_without_default)]
@@ -123,9 +122,10 @@ extern crate log;
#[macro_use]
extern crate error_chain;
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
extern crate atomicwrites;
extern crate bit_set;
extern crate bitpacking;
extern crate byteorder;
extern crate chan;
extern crate combine;
@@ -145,7 +145,6 @@ extern crate stable_deref_trait;
extern crate tempdir;
extern crate tempfile;
extern crate uuid;
extern crate bitpacking;
#[cfg(test)]
#[macro_use]
@@ -160,7 +159,7 @@ extern crate winapi;
#[cfg(test)]
extern crate rand;
#[cfg(all(test, feature="unstable"))]
#[cfg(all(test, feature = "unstable"))]
extern crate test;
extern crate tinysegmenter;
@@ -179,36 +178,36 @@ pub use error::{Error, ErrorKind, ResultExt};
/// Tantivy result.
pub type Result<T> = std::result::Result<T, Error>;
mod core;
mod compression;
mod indexer;
mod common;
mod compression;
mod core;
mod indexer;
mod datastruct;
#[allow(unused_doc_comment)]
mod error;
pub mod tokenizer;
mod datastruct;
pub mod termdict;
pub mod store;
pub mod query;
pub mod directory;
pub mod collector;
pub mod postings;
pub mod schema;
pub mod directory;
pub mod fastfield;
pub(crate) mod fieldnorm;
pub mod postings;
pub mod query;
pub mod schema;
pub mod store;
pub mod termdict;
mod docset;
pub use self::docset::{DocSet, SkipResult};
pub use directory::Directory;
pub use core::{Index, Searcher, Segment, SegmentId, SegmentMeta};
pub use indexer::IndexWriter;
pub use schema::{Document, Term};
pub use core::{InvertedIndexReader, SegmentReader};
pub use postings::Postings;
pub use core::SegmentComponent;
pub use core::{Index, Searcher, Segment, SegmentId, SegmentMeta};
pub use core::{InvertedIndexReader, SegmentReader};
pub use directory::Directory;
pub use indexer::IndexWriter;
pub use postings::Postings;
pub use schema::{Document, Term};
pub use common::{i64_to_u64, u64_to_i64};
@@ -224,10 +223,10 @@ pub fn version() -> &'static str {
/// Defines tantivy's merging strategy
pub mod merge_policy {
pub use indexer::MergePolicy;
pub use indexer::LogMergePolicy;
pub use indexer::NoMergePolicy;
pub use indexer::DefaultMergePolicy;
pub use indexer::LogMergePolicy;
pub use indexer::MergePolicy;
pub use indexer::NoMergePolicy;
}
/// A `u32` identifying a document within a segment.
@@ -276,18 +275,23 @@ pub struct DocAddress(pub SegmentLocalId, pub DocId);
mod tests {
use collector::tests::TestCollector;
use Index;
use core::SegmentReader;
use query::BooleanQuery;
use schema::*;
use docset::DocSet;
use query::BooleanQuery;
use rand::distributions::{IndependentSample, Range};
use rand::{Rng, SeedableRng, XorShiftRng};
use schema::*;
use Index;
use IndexWriter;
use Postings;
use rand::{Rng, SeedableRng, XorShiftRng};
use rand::distributions::{IndependentSample, Range};
pub fn assert_nearly_equals(expected: f32, val: f32) {
assert!(nearly_equals(val, expected), "Got {}, expected {}.", val, expected);
assert!(
nearly_equals(val, expected),
"Got {}, expected {}.",
val,
expected
);
}
pub fn nearly_equals(a: f32, b: f32) -> bool {
@@ -314,7 +318,7 @@ mod tests {
}
#[test]
#[cfg(feature="mmap")]
#[cfg(feature = "mmap")]
fn test_indexing() {
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", TEXT);
@@ -440,7 +444,6 @@ mod tests {
}
}
fn advance_undeleted(docset: &mut DocSet, reader: &SegmentReader) -> bool {
while docset.advance() {
if !reader.is_deleted(docset.doc()) {

View File

@@ -6,20 +6,19 @@ Postings module (also called inverted index)
///
/// Postings, also called inverted lists, is the key datastructure
/// to full-text search.
mod postings;
mod recorder;
mod serializer;
mod postings_writer;
mod term_info;
mod recorder;
mod segment_postings;
mod serializer;
mod term_info;
pub(crate) use self::postings_writer::MultiFieldPostingsWriter;
use self::recorder::{NothingRecorder, Recorder, TFAndPositionRecorder, TermFrequencyRecorder};
pub use self::serializer::{FieldSerializer, InvertedIndexSerializer};
pub(crate) use self::postings_writer::MultiFieldPostingsWriter;
pub use self::term_info::TermInfo;
pub use self::postings::Postings;
pub use self::term_info::TermInfo;
pub use self::segment_postings::{BlockSegmentPostings, SegmentPostings};
@@ -38,22 +37,22 @@ pub(crate) enum FreqReadingOption {
pub mod tests {
use super::*;
use core::Index;
use core::SegmentComponent;
use core::SegmentReader;
use datastruct::stacker::Heap;
use docset::{DocSet, SkipResult};
use fieldnorm::FieldNormReader;
use indexer::operation::AddOperation;
use indexer::SegmentWriter;
use query::Scorer;
use rand::{Rng, SeedableRng, XorShiftRng};
use schema::Field;
use schema::IndexRecordOption;
use schema::{Document, SchemaBuilder, Term, INT_INDEXED, STRING, TEXT};
use std::iter;
use DocId;
use Score;
use query::Scorer;
use schema::{Document, SchemaBuilder, Term, INT_INDEXED, STRING, TEXT};
use core::SegmentComponent;
use indexer::SegmentWriter;
use core::SegmentReader;
use core::Index;
use schema::IndexRecordOption;
use std::iter;
use datastruct::stacker::Heap;
use schema::Field;
use indexer::operation::AddOperation;
use rand::{Rng, SeedableRng, XorShiftRng};
use fieldnorm::FieldNormReader;
#[test]
pub fn test_position_write() {
@@ -124,7 +123,6 @@ pub mod tests {
assert_eq!(&[0, 5], &positions[..]);
}
{
let mut postings = inverted_index
.read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
@@ -203,13 +201,14 @@ pub mod tests {
{
let segment_reader = SegmentReader::open(&segment).unwrap();
{
let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field) ;
let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field);
assert_eq!(fieldnorm_reader.fieldnorm(0), 8 + 5);
assert_eq!(fieldnorm_reader.fieldnorm(1), 2);
for i in 2..1000 {
assert_eq!(
fieldnorm_reader.fieldnorm_id(i),
FieldNormReader::fieldnorm_to_id(i + 1) );
FieldNormReader::fieldnorm_to_id(i + 1)
);
}
}
{
@@ -446,7 +445,7 @@ pub mod tests {
// delete everything else
{
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
index_writer.delete_term(term_1);
index_writer.delete_term(term_1);
assert!(index_writer.commit().is_ok());
}
@@ -504,7 +503,7 @@ pub mod tests {
let posting_list_size = 1_000_000;
{
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
for _ in 0 .. posting_list_size {
for _ in 0..posting_list_size {
let mut doc = Document::default();
if rng.gen_weighted_bool(15) {
doc.add_text(text_field, "a");
@@ -595,17 +594,16 @@ pub mod tests {
}
#[cfg(all(test, feature="unstable"))]
#[cfg(all(test, feature = "unstable"))]
mod bench {
use test::{self, Bencher};
use schema::IndexRecordOption;
use tests;
use super::tests::*;
use docset::SkipResult;
use DocSet;
use query::Intersection;
use schema::IndexRecordOption;
use test::{self, Bencher};
use tests;
use DocSet;
#[bench]
fn bench_segment_postings(b: &mut Bencher) {
@@ -723,4 +721,4 @@ mod bench {
s
});
}
}
}

View File

@@ -1,21 +1,21 @@
use DocId;
use schema::Term;
use postings::{FieldSerializer, InvertedIndexSerializer};
use std::io;
use std::collections::HashMap;
use postings::Recorder;
use Result;
use schema::{Field, Schema};
use std::marker::PhantomData;
use std::ops::DerefMut;
use datastruct::stacker::{Heap, TermHashMap};
use postings::Recorder;
use postings::UnorderedTermId;
use postings::{FieldSerializer, InvertedIndexSerializer};
use postings::{NothingRecorder, TFAndPositionRecorder, TermFrequencyRecorder};
use schema::FieldEntry;
use schema::FieldType;
use schema::IndexRecordOption;
use schema::Term;
use schema::{Field, Schema};
use std::collections::HashMap;
use std::io;
use std::marker::PhantomData;
use std::ops::DerefMut;
use tokenizer::Token;
use tokenizer::TokenStream;
use schema::IndexRecordOption;
use postings::UnorderedTermId;
use DocId;
use Result;
fn posting_from_field_entry<'a>(
field_entry: &FieldEntry,
@@ -123,7 +123,8 @@ impl<'a> MultiFieldPostingsWriter<'a> {
unordered_term_mappings.insert(field, mapping);
let postings_writer = &self.per_field_postings_writers[field.0 as usize];
let mut field_serializer = serializer.new_field(field, postings_writer.total_num_tokens())?;
let mut field_serializer =
serializer.new_field(field, postings_writer.total_num_tokens())?;
postings_writer.serialize(
&term_offsets[start..stop],
&mut field_serializer,

View File

@@ -1,7 +1,7 @@
use DocId;
use std::{self, io};
use postings::FieldSerializer;
use datastruct::stacker::{ExpUnrolledLinkedList, Heap, HeapAllocable};
use postings::FieldSerializer;
use std::{self, io};
use DocId;
const EMPTY_ARRAY: [u32; 0] = [0u32; 0];
const POSITION_END: u32 = std::u32::MAX;

View File

@@ -2,15 +2,15 @@ use compression::{BlockDecoder, CompressedIntStream, VIntDecoder, COMPRESSION_BL
use DocId;
use common::BitSet;
use common::CountingWriter;
use common::HasLen;
use postings::Postings;
use docset::{DocSet, SkipResult};
use fst::Streamer;
use compression::compressed_block_size;
use directory::{ReadOnlySource, SourceRead};
use postings::FreqReadingOption;
use docset::{DocSet, SkipResult};
use fst::Streamer;
use postings::serializer::PostingsSerializer;
use common::CountingWriter;
use postings::FreqReadingOption;
use postings::Postings;
struct PositionComputer {
// store the amount of position int
@@ -84,9 +84,13 @@ impl SegmentPostings {
for &doc in docs {
postings_serializer.write_doc(doc, 1u32).unwrap();
}
postings_serializer.close_term().expect("In memory Serialization should never fail.");
postings_serializer
.close_term()
.expect("In memory Serialization should never fail.");
}
let (buffer , _) = counting_writer.finish().expect("Serializing in a buffer should never fail.");
let (buffer, _) = counting_writer
.finish()
.expect("Serializing in a buffer should never fail.");
let data = ReadOnlySource::from(buffer);
let block_segment_postings = BlockSegmentPostings::from_data(
docs.len(),
@@ -98,7 +102,6 @@ impl SegmentPostings {
}
impl SegmentPostings {
/// Reads a Segment postings from an &[u8]
///
/// * `len` - number of document in the posting lists.
@@ -125,7 +128,7 @@ fn exponential_search(target: u32, mut start: usize, arr: &[u32]) -> (usize, usi
loop {
let new = start + jump;
if new >= end {
return (start, end)
return (start, end);
}
if arr[new] > target {
return (start, new);
@@ -163,7 +166,8 @@ impl DocSet for SegmentPostings {
if self.position_computer.is_some() {
let freqs_skipped = &self.block_cursor.freqs()[self.cur..];
let sum_freq: u32 = freqs_skipped.iter().sum();
self.position_computer.as_mut()
self.position_computer
.as_mut()
.unwrap()
.add_skip(sum_freq as usize);
}
@@ -198,7 +202,8 @@ impl DocSet for SegmentPostings {
if self.position_computer.is_some() {
let freqs_skipped = &self.block_cursor.freqs()[self.cur..start];
let sum_freqs: u32 = freqs_skipped.iter().sum();
self.position_computer.as_mut()
self.position_computer
.as_mut()
.unwrap()
.add_skip(sum_freqs as usize);
}
@@ -211,7 +216,6 @@ impl DocSet for SegmentPostings {
}
}
// goes to the next element.
// next needs to be called a first time to point to the correct element.
#[inline]
@@ -262,7 +266,6 @@ impl DocSet for SegmentPostings {
}
}
impl HasLen for SegmentPostings {
fn len(&self) -> usize {
self.block_cursor.doc_freq()
@@ -284,7 +287,10 @@ impl Postings for SegmentPostings {
}
unsafe {
output.set_len(term_freq);
self.position_computer.as_mut().unwrap().positions_with_offset(offset, &mut output[..])
self.position_computer
.as_mut()
.unwrap()
.positions_with_offset(offset, &mut output[..])
}
} else {
output.clear();
@@ -473,16 +479,16 @@ impl<'b> Streamer<'b> for BlockSegmentPostings {
#[cfg(test)]
mod tests {
use docset::DocSet;
use super::BlockSegmentPostings;
use super::SegmentPostings;
use schema::SchemaBuilder;
use common::HasLen;
use core::Index;
use schema::INT_INDEXED;
use schema::Term;
use docset::DocSet;
use fst::Streamer;
use schema::IndexRecordOption;
use common::HasLen;
use super::BlockSegmentPostings;
use schema::SchemaBuilder;
use schema::Term;
use schema::INT_INDEXED;
#[test]
fn test_empty_segment_postings() {
@@ -570,4 +576,3 @@ mod tests {
assert_eq!(block_segments.docs(), &[1, 3, 5]);
}
}

View File

@@ -1,19 +1,19 @@
use Result;
use super::TermInfo;
use common::BinarySerializable;
use common::CompositeWrite;
use common::CountingWriter;
use compression::VIntEncoder;
use compression::{BlockEncoder, COMPRESSION_BLOCK_SIZE};
use core::Segment;
use directory::WritePtr;
use schema::Field;
use schema::FieldEntry;
use schema::FieldType;
use schema::Schema;
use directory::WritePtr;
use compression::{BlockEncoder, COMPRESSION_BLOCK_SIZE};
use DocId;
use core::Segment;
use std::io::{self, Write};
use compression::VIntEncoder;
use common::BinarySerializable;
use common::CountingWriter;
use common::CompositeWrite;
use termdict::TermDictionaryBuilder;
use DocId;
use Result;
/// `PostingsSerializer` is in charge of serializing
/// postings on disk, in the
@@ -84,7 +84,11 @@ impl InvertedIndexSerializer {
/// a given field.
///
/// Loads the indexing options for the given field.
pub fn new_field(&mut self, field: Field, total_num_tokens: u64) -> io::Result<FieldSerializer> {
pub fn new_field(
&mut self,
field: Field,
total_num_tokens: u64,
) -> io::Result<FieldSerializer> {
let field_entry: &FieldEntry = self.schema.get_field_entry(field);
let term_dictionary_write = self.terms_write.for_field(field);
let postings_write = self.postings_write.for_field(field);
@@ -124,7 +128,6 @@ impl<'a> FieldSerializer<'a> {
postings_write: &'a mut CountingWriter<WritePtr>,
positions_write: &'a mut CountingWriter<WritePtr>,
) -> io::Result<FieldSerializer<'a>> {
let (term_freq_enabled, position_enabled): (bool, bool) = match field_type {
FieldType::Str(ref text_options) => {
if let Some(text_indexing_options) = text_options.get_indexing_options() {

View File

@@ -1,12 +1,12 @@
use query::Query;
use query::Weight;
use query::Scorer;
use core::Searcher;
use core::SegmentReader;
use docset::DocSet;
use query::Query;
use query::Scorer;
use query::Weight;
use DocId;
use Result;
use Score;
use DocId;
use core::Searcher;
/// Query that matches all of the documents.
///

View File

@@ -1,7 +1,7 @@
use common::{BitSet, TinySet};
use DocId;
use docset::{DocSet, SkipResult};
use std::cmp::Ordering;
use DocId;
/// A `BitSetDocSet` makes it possible to iterate through a bitset as if it was a `DocSet`.
///
@@ -120,10 +120,10 @@ impl DocSet for BitSetDocSet {
#[cfg(test)]
mod tests {
use DocId;
use super::BitSetDocSet;
use common::BitSet;
use docset::{DocSet, SkipResult};
use super::BitSetDocSet;
use DocId;
fn create_docbitset(docs: &[DocId], max_doc: DocId) -> BitSetDocSet {
let mut docset = BitSet::with_max_value(max_doc);
@@ -219,14 +219,13 @@ mod tests {
}
#[cfg(all(test, feature="unstable"))]
#[cfg(all(test, feature = "unstable"))]
mod bench {
use tests;
use test;
use super::BitSet;
use super::BitSetDocSet;
use test;
use tests;
use DocSet;
#[bench]
@@ -264,4 +263,4 @@ mod bench {
while docset.advance() {}
});
}
}
}

View File

@@ -1,7 +1,7 @@
use fieldnorm::FieldNormReader;
use Term;
use Searcher;
use Score;
use Searcher;
use Term;
const K1: f32 = 1.2;
const B: f32 = 0.75;
@@ -11,7 +11,6 @@ fn idf(doc_freq: u64, doc_count: u64) -> f32 {
(1f32 + x).ln()
}
fn cached_tf_component(fieldnorm: u32, average_fieldnorm: f32) -> f32 {
K1 * (1f32 - B + B * fieldnorm as f32 / average_fieldnorm)
}
@@ -32,11 +31,10 @@ pub struct BM25Weight {
}
impl BM25Weight {
pub fn null() -> BM25Weight {
BM25Weight {
weight: 0f32,
cache: [1f32; 256]
cache: [1f32; 256],
}
}
@@ -44,7 +42,11 @@ impl BM25Weight {
assert!(!terms.is_empty(), "BM25 requires at least one term");
let field = terms[0].field();
for term in &terms[1..] {
assert_eq!(term.field(), field, "All terms must belong to the same field.");
assert_eq!(
term.field(),
field,
"All terms must belong to the same field."
);
}
let mut total_num_tokens = 0u64;
@@ -56,7 +58,8 @@ impl BM25Weight {
}
let average_fieldnorm = total_num_tokens as f32 / total_num_docs as f32;
let idf = terms.iter()
let idf = terms
.iter()
.map(|term| {
let term_doc_freq = searcher.doc_freq(term);
idf(term_doc_freq, total_num_docs)
@@ -83,12 +86,12 @@ impl BM25Weight {
#[cfg(test)]
mod tests {
use tests::assert_nearly_equals;
use super::idf;
use tests::assert_nearly_equals;
#[test]
fn test_idf() {
assert_nearly_equals(idf(1, 2), 0.6931472);
assert_nearly_equals(idf(1, 2), 0.6931472);
}
}

View File

@@ -1,12 +1,12 @@
use Result;
use super::boolean_weight::BooleanWeight;
use query::Weight;
use Searcher;
use query::Query;
use schema::Term;
use query::TermQuery;
use schema::IndexRecordOption;
use query::Occur;
use query::Query;
use query::TermQuery;
use query::Weight;
use schema::IndexRecordOption;
use schema::Term;
use Result;
use Searcher;
/// The boolean query combines a set of queries
///
@@ -48,7 +48,8 @@ impl BooleanQuery {
let occur_term_queries: Vec<(Occur, Box<Query>)> = terms
.into_iter()
.map(|term| {
let term_query: Box<Query> = Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs));
let term_query: Box<Query> =
Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs));
(Occur::Should, term_query)
})
.collect();

View File

@@ -1,19 +1,18 @@
use query::Weight;
use core::SegmentReader;
use query::Union;
use std::collections::HashMap;
use query::EmptyScorer;
use query::Scorer;
use downcast::Downcast;
use std::borrow::Borrow;
use query::intersect_scorers;
use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner};
use query::term_query::TermScorer;
use query::EmptyScorer;
use query::Exclude;
use query::Occur;
use query::RequiredOptionalScorer;
use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner};
use query::Scorer;
use query::Union;
use query::Weight;
use std::borrow::Borrow;
use std::collections::HashMap;
use Result;
use query::intersect_scorers;
use query::term_query::TermScorer;
fn scorer_union<TScoreCombiner>(scorers: Vec<Box<Scorer>>) -> Box<Scorer>
where
@@ -41,7 +40,6 @@ where
let scorer: Box<Scorer> = Box::new(Union::<_, TScoreCombiner>::from(scorers));
return scorer;
}
pub struct BooleanWeight {
@@ -78,9 +76,9 @@ impl BooleanWeight {
.remove(&Occur::MustNot)
.map(scorer_union::<TScoreCombiner>);
let must_scorer_opt: Option<Box<Scorer>> =
per_occur_scorers.remove(&Occur::Must)
.map(intersect_scorers);
let must_scorer_opt: Option<Box<Scorer>> = per_occur_scorers
.remove(&Occur::Must)
.map(intersect_scorers);
let positive_scorer: Box<Scorer> = match (should_scorer_opt, must_scorer_opt) {
(Some(should_scorer), Some(must_scorer)) => {

View File

@@ -7,19 +7,19 @@ pub use self::boolean_query::BooleanQuery;
mod tests {
use super::*;
use query::Occur;
use query::Query;
use query::TermQuery;
use query::Intersection;
use query::Scorer;
use collector::tests::TestCollector;
use Index;
use downcast::Downcast;
use schema::*;
use query::QueryParser;
use query::RequiredOptionalScorer;
use query::score_combiner::SumWithCoordsCombiner;
use query::term_query::TermScorer;
use query::Intersection;
use query::Occur;
use query::Query;
use query::QueryParser;
use query::RequiredOptionalScorer;
use query::Scorer;
use query::TermQuery;
use schema::*;
use Index;
fn aux_test_helper() -> (Index, Field) {
let mut schema_builder = SchemaBuilder::default();
@@ -171,7 +171,6 @@ mod tests {
}
}
#[test]
pub fn test_intersection_score() {
let (index, text_field) = aux_test_helper();
@@ -193,7 +192,10 @@ mod tests {
};
{
let boolean_query = BooleanQuery::from(vec![(Occur::Must, make_term_query("a")), (Occur::Must, make_term_query("b"))]);
let boolean_query = BooleanQuery::from(vec![
(Occur::Must, make_term_query("a")),
(Occur::Must, make_term_query("b")),
]);
assert_eq!(score_docs(&boolean_query), vec![0.977973, 0.84699446]);
}
}

View File

@@ -1,7 +1,7 @@
use query::Scorer;
use docset::{DocSet, SkipResult};
use Score;
use query::Scorer;
use DocId;
use Score;
#[derive(Clone, Copy, Debug)]
enum State {
@@ -129,10 +129,10 @@ where
#[cfg(test)]
mod tests {
use tests::sample_with_seed;
use postings::tests::test_skip_against_unoptimized;
use super::*;
use postings::tests::test_skip_against_unoptimized;
use query::VecDocSet;
use tests::sample_with_seed;
#[test]
fn test_exclude() {

View File

@@ -1,11 +1,11 @@
use docset::{DocSet, SkipResult};
use query::Scorer;
use query::EmptyScorer;
use DocId;
use downcast::Downcast;
use std::borrow::Borrow;
use Score;
use query::term_query::TermScorer;
use query::EmptyScorer;
use query::Scorer;
use std::borrow::Borrow;
use DocId;
use Score;
/// Returns the intersection scorer.
///
@@ -36,27 +36,29 @@ pub fn intersect_scorers(mut scorers: Vec<Box<Scorer>>) -> Box<Scorer> {
left,
right,
others: scorers,
num_docsets
})
num_docsets,
});
}
}
return Box::new(Intersection {
left,
right,
others: scorers,
num_docsets
})
num_docsets,
});
}
_ => {
unreachable!();
}
_ => { unreachable!(); }
}
}
/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s.
pub struct Intersection<TDocSet: DocSet, TOtherDocSet: DocSet=Box<Scorer>> {
pub struct Intersection<TDocSet: DocSet, TOtherDocSet: DocSet = Box<Scorer>> {
left: TDocSet,
right: TDocSet,
others: Vec<TOtherDocSet>,
num_docsets: usize
num_docsets: usize,
}
impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
@@ -71,18 +73,17 @@ impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
left,
right,
others: docsets,
num_docsets
num_docsets,
}
}
}
impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
pub(crate) fn docset_mut_specialized(&mut self, ord: usize) -> &mut TDocSet {
match ord {
0 => &mut self.left,
1 => &mut self.right,
n => &mut self.others[n - 2]
n => &mut self.others[n - 2],
}
}
}
@@ -92,7 +93,7 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> Intersection<TDocSet, TOtherDocSet>
match ord {
0 => &mut self.left,
1 => &mut self.right,
n => &mut self.others[n - 2]
n => &mut self.others[n - 2],
}
}
}
@@ -114,23 +115,30 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
// of the two rarest `DocSet` in the intersection.
loop {
match right.skip_next(candidate) {
SkipResult::Reached => { break; }
SkipResult::Reached => {
break;
}
SkipResult::OverStep => {
candidate = right.doc();
other_candidate_ord = usize::max_value();
}
SkipResult::End => { return false; }
SkipResult::End => {
return false;
}
}
match left.skip_next(candidate) {
SkipResult::Reached => { break; }
SkipResult::Reached => {
break;
}
SkipResult::OverStep => {
candidate = left.doc();
other_candidate_ord = usize::max_value();
}
SkipResult::End => { return false; }
SkipResult::End => {
return false;
}
}
}
// test the remaining scorers;
for (ord, docset) in self.others.iter_mut().enumerate() {
@@ -147,16 +155,22 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
// let's update our candidate.
candidate = docset.doc();
match left.skip_next(candidate) {
SkipResult::Reached => { other_candidate_ord = ord; }
SkipResult::Reached => {
other_candidate_ord = ord;
}
SkipResult::OverStep => {
candidate = left.doc();
other_candidate_ord = usize::max_value();
}
SkipResult::End => { return false; }
SkipResult::End => {
return false;
}
}
continue 'outer;
}
SkipResult::End => { return false; }
SkipResult::End => {
return false;
}
}
}
}
@@ -164,9 +178,7 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
}
}
fn skip_next(&mut self, target: DocId) -> SkipResult {
// We optimize skipping by skipping every single member
// of the intersection to target.
let mut current_target: DocId = target;
@@ -211,18 +223,22 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
}
impl<TScorer, TOtherScorer> Scorer for Intersection<TScorer, TOtherScorer>
where TScorer: Scorer, TOtherScorer: Scorer {
where
TScorer: Scorer,
TOtherScorer: Scorer,
{
fn score(&mut self) -> Score {
self.left.score() + self.right.score() + self.others.iter_mut().map(Scorer::score).sum::<Score>()
self.left.score() + self.right.score()
+ self.others.iter_mut().map(Scorer::score).sum::<Score>()
}
}
#[cfg(test)]
mod tests {
use docset::{DocSet, SkipResult};
use super::Intersection;
use query::VecDocSet;
use docset::{DocSet, SkipResult};
use postings::tests::test_skip_against_unoptimized;
use query::VecDocSet;
#[test]
fn test_intersection() {

View File

@@ -2,22 +2,22 @@
Query
*/
mod query;
mod boolean_query;
mod scorer;
mod occur;
mod weight;
mod term_query;
mod query_parser;
mod phrase_query;
mod all_query;
mod bitset;
mod range_query;
mod exclude;
mod union;
mod intersection;
mod reqopt_scorer;
mod bm25;
mod boolean_query;
mod exclude;
mod intersection;
mod occur;
mod phrase_query;
mod query;
mod query_parser;
mod range_query;
mod reqopt_scorer;
mod scorer;
mod term_query;
mod union;
mod weight;
#[cfg(test)]
mod vec_docset;
@@ -30,20 +30,20 @@ pub use self::union::Union;
#[cfg(test)]
pub use self::vec_docset::VecDocSet;
pub use self::reqopt_scorer::RequiredOptionalScorer;
pub use self::exclude::Exclude;
pub use self::all_query::{AllQuery, AllScorer, AllWeight};
pub use self::bitset::BitSetDocSet;
pub use self::boolean_query::BooleanQuery;
pub use self::exclude::Exclude;
pub use self::intersection::intersect_scorers;
pub use self::occur::Occur;
pub use self::phrase_query::PhraseQuery;
pub use self::query_parser::QueryParserError;
pub use self::query_parser::QueryParser;
pub use self::query::Query;
pub use self::query_parser::QueryParser;
pub use self::query_parser::QueryParserError;
pub use self::range_query::RangeQuery;
pub use self::reqopt_scorer::RequiredOptionalScorer;
pub use self::scorer::ConstScorer;
pub use self::scorer::EmptyScorer;
pub use self::scorer::Scorer;
pub use self::term_query::TermQuery;
pub use self::weight::Weight;
pub use self::all_query::{AllQuery, AllScorer, AllWeight};
pub use self::range_query::RangeQuery;
pub use self::scorer::ConstScorer;
pub use self::intersection::intersect_scorers;

View File

@@ -1,20 +1,20 @@
mod phrase_query;
mod phrase_weight;
mod phrase_scorer;
mod phrase_weight;
pub use self::phrase_query::PhraseQuery;
pub use self::phrase_weight::PhraseWeight;
pub use self::phrase_scorer::PhraseScorer;
pub use self::phrase_weight::PhraseWeight;
#[cfg(test)]
mod tests {
use super::*;
use core::Index;
use schema::{SchemaBuilder, Term, TEXT};
use collector::tests::TestCollector;
use tests::assert_nearly_equals;
use core::Index;
use error::ErrorKind;
use schema::{SchemaBuilder, Term, TEXT};
use tests::assert_nearly_equals;
fn create_index(texts: &[&'static str]) -> Index {
let mut schema_builder = SchemaBuilder::default();
@@ -40,7 +40,7 @@ mod tests {
"a b b d c g c",
"a b a b c",
"c a b a d ga a",
"a b c"
"a b c",
]);
let schema = index.schema();
let text_field = schema.get_field("text").unwrap();
@@ -68,13 +68,14 @@ mod tests {
#[test]
pub fn test_phrase_query_no_positions() {
let mut schema_builder = SchemaBuilder::default();
use schema::TextOptions;
use schema::TextFieldIndexing;
use schema::IndexRecordOption;
let no_positions = TextOptions::default()
.set_indexing_options(TextFieldIndexing::default()
use schema::TextFieldIndexing;
use schema::TextOptions;
let no_positions = TextOptions::default().set_indexing_options(
TextFieldIndexing::default()
.set_tokenizer("default")
.set_index_option(IndexRecordOption::WithFreqs));
.set_index_option(IndexRecordOption::WithFreqs),
);
let text_field = schema_builder.add_text_field("text", no_positions);
let schema = schema_builder.build();
@@ -88,11 +89,18 @@ mod tests {
let searcher = index.searcher();
let phrase_query = PhraseQuery::new(vec![
Term::from_field_text(text_field, "a"),
Term::from_field_text(text_field, "b")
Term::from_field_text(text_field, "b"),
]);
let mut test_collector = TestCollector::default();
if let &ErrorKind::SchemaError(ref msg) = searcher.search(&phrase_query, &mut test_collector).unwrap_err().kind() {
assert_eq!("Applied phrase query on field \"text\", which does not have positions indexed", msg.as_str());
if let &ErrorKind::SchemaError(ref msg) = searcher
.search(&phrase_query, &mut test_collector)
.unwrap_err()
.kind()
{
assert_eq!(
"Applied phrase query on field \"text\", which does not have positions indexed",
msg.as_str()
);
} else {
panic!("Should have returned an error");
}
@@ -120,7 +128,6 @@ mod tests {
let scores = test_query(vec!["a", "b"]);
assert_nearly_equals(scores[0], 0.40618482);
assert_nearly_equals(scores[1], 0.46844664);
}
#[test] // motivated by #234

View File

@@ -1,11 +1,11 @@
use schema::{Field, Term};
use query::Query;
use core::searcher::Searcher;
use super::PhraseWeight;
use query::Weight;
use Result;
use query::bm25::BM25Weight;
use core::searcher::Searcher;
use error::ErrorKind;
use query::bm25::BM25Weight;
use query::Query;
use query::Weight;
use schema::{Field, Term};
use Result;
/// `PhraseQuery` matches a specific sequence of words.
///
@@ -28,18 +28,23 @@ pub struct PhraseQuery {
}
impl PhraseQuery {
/// Creates a new `PhraseQuery` given a list of terms.
///
/// There must be at least two terms, and all terms
/// must belong to the same field.
pub fn new(terms: Vec<Term>) -> PhraseQuery {
assert!(terms.len() > 1, "A phrase query is required to have strictly more than one term.");
assert!(
terms.len() > 1,
"A phrase query is required to have strictly more than one term."
);
let field = terms[0].field();
assert!(terms[1..].iter().all(|term| term.field() == field), "All terms from a phrase query must belong to the same field");
assert!(
terms[1..].iter().all(|term| term.field() == field),
"All terms from a phrase query must belong to the same field"
);
PhraseQuery {
field,
phrase_terms: terms
phrase_terms: terms,
}
}
}
@@ -50,26 +55,29 @@ impl Query for PhraseQuery {
/// See [`Weight`](./trait.Weight.html).
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> Result<Box<Weight>> {
let schema = searcher.schema();
let field_entry= schema.get_field_entry(self.field);
let has_positions = field_entry.field_type().get_index_record_option()
let field_entry = schema.get_field_entry(self.field);
let has_positions = field_entry
.field_type()
.get_index_record_option()
.map(|index_record_option| index_record_option.has_positions())
.unwrap_or(false);
if !has_positions {
let field_name = field_entry.name();
bail!(ErrorKind::SchemaError(format!("Applied phrase query on field {:?}, which does not have positions indexed",
field_name)))
bail!(ErrorKind::SchemaError(format!(
"Applied phrase query on field {:?}, which does not have positions indexed",
field_name
)))
}
let terms = self.phrase_terms.clone();
if scoring_enabled {
let bm25_weight = BM25Weight::for_terms(searcher, &terms);
Ok(Box::new(PhraseWeight::new(terms, bm25_weight, true)))
} else {
Ok(Box::new(PhraseWeight::new(
terms,
bm25_weight,
true
BM25Weight::null(),
false,
)))
} else {
Ok(Box::new(PhraseWeight::new(terms, BM25Weight::null(), false)))
}
}
}

View File

@@ -1,20 +1,20 @@
use DocId;
use docset::{DocSet, SkipResult};
use postings::Postings;
use query::{Intersection, Scorer};
use query::bm25::BM25Weight;
use fieldnorm::FieldNormReader;
use postings::Postings;
use query::bm25::BM25Weight;
use query::{Intersection, Scorer};
use DocId;
struct PostingsWithOffset<TPostings> {
offset: u32,
postings: TPostings
postings: TPostings,
}
impl<TPostings: Postings> PostingsWithOffset<TPostings> {
pub fn new(segment_postings: TPostings, offset: u32) -> PostingsWithOffset<TPostings> {
PostingsWithOffset {
offset,
postings: segment_postings
postings: segment_postings,
}
}
@@ -49,10 +49,9 @@ pub struct PhraseScorer<TPostings: Postings> {
phrase_count: u32,
fieldnorm_reader: FieldNormReader,
similarity_weight: BM25Weight,
score_needed: bool
score_needed: bool,
}
/// Returns true iff the two sorted array contain a common element
fn intersection_exists(left: &[u32], right: &[u32]) -> bool {
let mut left_i = 0;
@@ -118,18 +117,20 @@ fn intersection(left: &mut [u32], right: &[u32]) -> usize {
count
}
impl<TPostings: Postings> PhraseScorer<TPostings> {
pub fn new(term_postings: Vec<TPostings>,
similarity_weight: BM25Weight,
fieldnorm_reader: FieldNormReader,
score_needed: bool) -> PhraseScorer<TPostings> {
pub fn new(
term_postings: Vec<TPostings>,
similarity_weight: BM25Weight,
fieldnorm_reader: FieldNormReader,
score_needed: bool,
) -> PhraseScorer<TPostings> {
let num_docsets = term_postings.len();
let postings_with_offsets = term_postings
.into_iter()
.enumerate()
.map(|(offset, postings)| PostingsWithOffset::new(postings, (num_docsets - offset) as u32))
.map(|(offset, postings)| {
PostingsWithOffset::new(postings, (num_docsets - offset) as u32)
})
.collect::<Vec<_>>();
PhraseScorer {
intersection_docset: Intersection::new(postings_with_offsets),
@@ -153,7 +154,6 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
}
}
fn phrase_exists(&mut self) -> bool {
{
self.intersection_docset
@@ -163,7 +163,9 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
let mut intersection_len = self.left.len();
for i in 1..self.num_docsets - 1 {
{
self.intersection_docset.docset_mut_specialized(i).positions(&mut self.right);
self.intersection_docset
.docset_mut_specialized(i)
.positions(&mut self.right);
}
intersection_len = intersection(&mut self.left[..intersection_len], &self.right[..]);
if intersection_len == 0 {
@@ -171,7 +173,9 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
}
}
self.intersection_docset.docset_mut_specialized(self.num_docsets - 1).positions(&mut self.right);
self.intersection_docset
.docset_mut_specialized(self.num_docsets - 1)
.positions(&mut self.right);
intersection_exists(&self.left[..intersection_len], &self.right[..])
}
@@ -184,7 +188,9 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
let mut intersection_len = self.left.len();
for i in 1..self.num_docsets - 1 {
{
self.intersection_docset.docset_mut_specialized(i).positions(&mut self.right);
self.intersection_docset
.docset_mut_specialized(i)
.positions(&mut self.right);
}
intersection_len = intersection(&mut self.left[..intersection_len], &self.right[..]);
if intersection_len == 0 {
@@ -192,7 +198,9 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
}
}
self.intersection_docset.docset_mut_specialized(self.num_docsets - 1).positions(&mut self.right);
self.intersection_docset
.docset_mut_specialized(self.num_docsets - 1)
.positions(&mut self.right);
intersection_count(&self.left[..intersection_len], &self.right[..]) as u32
}
}
@@ -238,15 +246,15 @@ impl<TPostings: Postings> Scorer for PhraseScorer<TPostings> {
fn score(&mut self) -> f32 {
let doc = self.doc();
let fieldnorm_id = self.fieldnorm_reader.fieldnorm_id(doc);
self.similarity_weight.score(fieldnorm_id, self.phrase_count)
self.similarity_weight
.score(fieldnorm_id, self.phrase_count)
}
}
#[cfg(test)]
mod tests {
use super::{intersection_count, intersection};
use super::{intersection, intersection_count};
fn test_intersection_sym(left: &[u32], right: &[u32], expected: &[u32]) {
test_intersection_aux(left, right, expected);
@@ -271,12 +279,11 @@ mod tests {
}
}
#[cfg(all(test, feature="unstable"))]
#[cfg(all(test, feature = "unstable"))]
mod bench {
use super::{intersection, intersection_count};
use test::Bencher;
use super::{intersection_count, intersection};
#[bench]
fn bench_intersection_short(b: &mut Bencher) {
@@ -287,7 +294,6 @@ mod bench {
});
}
#[bench]
fn bench_intersection_count_short(b: &mut Bencher) {
b.iter(|| {
@@ -296,4 +302,4 @@ mod bench {
intersection_count(&left, &right);
});
}
}
}

View File

@@ -1,12 +1,12 @@
use query::Weight;
use query::Scorer;
use schema::Term;
use schema::IndexRecordOption;
use core::SegmentReader;
use super::PhraseScorer;
use query::EmptyScorer;
use Result;
use core::SegmentReader;
use query::bm25::BM25Weight;
use query::EmptyScorer;
use query::Scorer;
use query::Weight;
use schema::IndexRecordOption;
use schema::Term;
use Result;
pub struct PhraseWeight {
phrase_terms: Vec<Term>,
@@ -16,13 +16,15 @@ pub struct PhraseWeight {
impl PhraseWeight {
/// Creates a new phrase weight.
pub fn new(phrase_terms: Vec<Term>,
similarity_weight: BM25Weight,
score_needed: bool) -> PhraseWeight {
pub fn new(
phrase_terms: Vec<Term>,
similarity_weight: BM25Weight,
score_needed: bool,
) -> PhraseWeight {
PhraseWeight {
phrase_terms,
similarity_weight,
score_needed
score_needed,
}
}
}
@@ -37,25 +39,37 @@ impl Weight for PhraseWeight {
for term in &self.phrase_terms {
if let Some(postings) = reader
.inverted_index(term.field())
.read_postings(term, IndexRecordOption::WithFreqsAndPositions) {
.read_postings(term, IndexRecordOption::WithFreqsAndPositions)
{
term_postings_list.push(postings);
} else {
return Ok(Box::new(EmptyScorer));
}
}
Ok(Box::new(PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader, self.score_needed)))
Ok(Box::new(PhraseScorer::new(
term_postings_list,
similarity_weight,
fieldnorm_reader,
self.score_needed,
)))
} else {
let mut term_postings_list = Vec::new();
for term in &self.phrase_terms {
if let Some(postings) = reader
.inverted_index(term.field())
.read_postings_no_deletes(term, IndexRecordOption::WithFreqsAndPositions) {
.read_postings_no_deletes(term, IndexRecordOption::WithFreqsAndPositions)
{
term_postings_list.push(postings);
} else {
return Ok(Box::new(EmptyScorer));
}
}
Ok(Box::new(PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader, self.score_needed)))
Ok(Box::new(PhraseScorer::new(
term_postings_list,
similarity_weight,
fieldnorm_reader,
self.score_needed,
)))
}
}
}

View File

@@ -1,9 +1,9 @@
use Result;
use super::Weight;
use collector::Collector;
use core::searcher::Searcher;
use SegmentLocalId;
use super::Weight;
use std::fmt;
use Result;
use SegmentLocalId;
/// The `Query` trait defines a set of documents and a scoring method
/// for those documents.

View File

@@ -1,6 +1,6 @@
use std::fmt;
use schema::Term;
use query::Occur;
use schema::Term;
use std::fmt;
#[derive(Clone)]
pub enum LogicalLiteral {

View File

@@ -1,5 +1,5 @@
mod query_parser;
mod query_grammar;
mod query_parser;
mod user_input_ast;
pub mod logical_ast;

View File

@@ -1,6 +1,6 @@
use combine::*;
use combine::char::*;
use super::user_input_ast::*;
use combine::char::*;
use combine::*;
fn literal<I>(input: I) -> ParseResult<UserInputAST, I>
where

View File

@@ -1,18 +1,18 @@
use schema::{Field, Schema};
use query::Query;
use query::BooleanQuery;
use super::logical_ast::*;
use super::user_input_ast::*;
use super::query_grammar::parse_to_ast;
use super::user_input_ast::*;
use core::Index;
use query::BooleanQuery;
use query::Occur;
use query::PhraseQuery;
use query::Query;
use query::TermQuery;
use schema::IndexRecordOption;
use query::PhraseQuery;
use schema::{Field, Schema};
use schema::{FieldType, Term};
use std::num::ParseIntError;
use std::str::FromStr;
use tokenizer::TokenizerManager;
use std::num::ParseIntError;
use core::Index;
/// Possible error that may happen when parsing a query.
#[derive(Debug, PartialEq, Eq)]
@@ -179,14 +179,14 @@ impl QueryParser {
}
FieldType::Str(ref str_options) => {
if let Some(option) = str_options.get_indexing_options() {
let mut tokenizer = self.tokenizer_manager
.get(option.tokenizer())
.ok_or_else(|| {
let mut tokenizer = self.tokenizer_manager.get(option.tokenizer()).ok_or_else(
|| {
QueryParserError::UnknownTokenizer(
field_entry.name().to_string(),
option.tokenizer().to_string(),
)
})?;
},
)?;
let mut terms: Vec<Term> = Vec::new();
let mut token_stream = tokenizer.token_stream(phrase);
token_stream.process(&mut |token| {
@@ -207,13 +207,14 @@ impl QueryParser {
Ok(Some(LogicalLiteral::Phrase(terms)))
} else {
let fieldname = self.schema.get_field_name(field).to_string();
Err(QueryParserError::FieldDoesNotHavePositionsIndexed(fieldname))
Err(QueryParserError::FieldDoesNotHavePositionsIndexed(
fieldname,
))
}
} else {
let fieldname = self.schema.get_field_name(field).to_string();
Err(QueryParserError::FieldNotIndexed(fieldname))
}
}
} else {
// This should have been seen earlier really.
@@ -340,16 +341,16 @@ fn convert_to_query(logical_ast: LogicalAST) -> Box<Query> {
#[cfg(test)]
mod test {
use schema::{SchemaBuilder, Term, INT_INDEXED, STORED, STRING, TEXT};
use tokenizer::TokenizerManager;
use super::super::logical_ast::*;
use super::QueryParser;
use super::QueryParserError;
use query::Query;
use schema::Field;
use schema::{IndexRecordOption, TextFieldIndexing, TextOptions};
use super::QueryParser;
use super::QueryParserError;
use Index;
use schema::{SchemaBuilder, Term, INT_INDEXED, STORED, STRING, TEXT};
use tokenizer::SimpleTokenizer;
use super::super::logical_ast::*;
use tokenizer::TokenizerManager;
use Index;
fn make_query_parser() -> QueryParser {
let mut schema_builder = SchemaBuilder::default();

View File

@@ -1,16 +1,16 @@
use schema::{Field, IndexRecordOption, Term};
use query::{Query, Scorer, Weight};
use termdict::{TermDictionary, TermStreamer};
use core::SegmentReader;
use common::BitSet;
use Result;
use core::Searcher;
use core::SegmentReader;
use error::ErrorKind;
use query::BitSetDocSet;
use query::ConstScorer;
use std::ops::Range;
use query::{Query, Scorer, Weight};
use schema::Type;
use error::ErrorKind;
use schema::{Field, IndexRecordOption, Term};
use std::collections::Bound;
use std::ops::Range;
use termdict::{TermDictionary, TermStreamer};
use Result;
fn map_bound<TFrom, Transform: Fn(TFrom) -> Vec<u8>>(
bound: Bound<TFrom>,
@@ -89,16 +89,16 @@ pub struct RangeQuery {
}
impl RangeQuery {
/// Creates a new `RangeQuery` over a `i64` field.
///
/// If the field is not of the type `i64`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_i64(
field: Field,
range: Range<i64>
) -> RangeQuery {
RangeQuery::new_i64_bounds(field, Bound::Included(range.start), Bound::Excluded(range.end))
pub fn new_i64(field: Field, range: Range<i64>) -> RangeQuery {
RangeQuery::new_i64_bounds(
field,
Bound::Included(range.start),
Bound::Excluded(range.end),
)
}
/// Create a new `RangeQuery` over a `i64` field.
@@ -111,7 +111,7 @@ impl RangeQuery {
pub fn new_i64_bounds(
field: Field,
left_bound: Bound<i64>,
right_bound: Bound<i64>
right_bound: Bound<i64>,
) -> RangeQuery {
let make_term_val = |val: i64| Term::from_field_i64(field, val).value_bytes().to_owned();
RangeQuery {
@@ -132,7 +132,7 @@ impl RangeQuery {
pub fn new_u64_bounds(
field: Field,
left_bound: Bound<u64>,
right_bound: Bound<u64>
right_bound: Bound<u64>,
) -> RangeQuery {
let make_term_val = |val: u64| Term::from_field_u64(field, val).value_bytes().to_owned();
RangeQuery {
@@ -147,11 +147,12 @@ impl RangeQuery {
///
/// If the field is not of the type `u64`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_u64(
field: Field,
range: Range<u64>
) -> RangeQuery {
RangeQuery::new_u64_bounds(field, Bound::Included(range.start), Bound::Excluded(range.end))
pub fn new_u64(field: Field, range: Range<u64>) -> RangeQuery {
RangeQuery::new_u64_bounds(
field,
Bound::Included(range.start),
Bound::Excluded(range.end),
)
}
/// Create a new `RangeQuery` over a `Str` field.
@@ -164,7 +165,7 @@ impl RangeQuery {
pub fn new_str_bounds<'b>(
field: Field,
left: Bound<&'b str>,
right: Bound<&'b str>
right: Bound<&'b str>,
) -> RangeQuery {
let make_term_val = |val: &str| val.as_bytes().to_vec();
RangeQuery {
@@ -179,11 +180,12 @@ impl RangeQuery {
///
/// If the field is not of the type `Str`, tantivy
/// will panic when the `Weight` object is created.
pub fn new_str<'b>(
field: Field,
range: Range<&'b str>
) -> RangeQuery {
RangeQuery::new_str_bounds(field, Bound::Included(range.start), Bound::Excluded(range.end))
pub fn new_str<'b>(field: Field, range: Range<&'b str>) -> RangeQuery {
RangeQuery::new_str_bounds(
field,
Bound::Included(range.start),
Bound::Excluded(range.end),
)
}
}
@@ -256,13 +258,13 @@ impl Weight for RangeWeight {
#[cfg(test)]
mod tests {
use Index;
use schema::{Document, Field, SchemaBuilder, INT_INDEXED};
use collector::CountCollector;
use std::collections::Bound;
use query::Query;
use Result;
use super::RangeQuery;
use collector::CountCollector;
use query::Query;
use schema::{Document, Field, SchemaBuilder, INT_INDEXED};
use std::collections::Bound;
use Index;
use Result;
#[test]
fn test_range_query_simple() {
@@ -349,7 +351,14 @@ mod tests {
)),
9
);
assert_eq!(count_multiples(RangeQuery::new_i64_bounds(int_field, Bound::Included(9), Bound::Unbounded)), 91);
assert_eq!(
count_multiples(RangeQuery::new_i64_bounds(
int_field,
Bound::Included(9),
Bound::Unbounded
)),
91
);
}
}

View File

@@ -1,10 +1,10 @@
use DocId;
use query::Scorer;
use query::score_combiner::ScoreCombiner;
use Score;
use docset::{DocSet, SkipResult};
use query::score_combiner::ScoreCombiner;
use query::Scorer;
use std::cmp::Ordering;
use std::marker::PhantomData;
use DocId;
use Score;
/// Given a required scorer and an optional scorer
/// matches all document from the required scorer
@@ -101,14 +101,14 @@ where
#[cfg(test)]
mod tests {
use tests::sample_with_seed;
use super::RequiredOptionalScorer;
use query::VecDocSet;
use query::ConstScorer;
use docset::DocSet;
use postings::tests::test_skip_against_unoptimized;
use query::Scorer;
use query::score_combiner::{DoNothingCombiner, SumCombiner};
use query::ConstScorer;
use query::Scorer;
use query::VecDocSet;
use tests::sample_with_seed;
#[test]
fn test_reqopt_scorer_empty() {

View File

@@ -1,5 +1,5 @@
use Score;
use query::Scorer;
use Score;
/// The `ScoreCombiner` trait defines how to compute
/// an overall score given a list of scores.

Some files were not shown because too many files have changed in this diff Show More