Compare commits

..

2 Commits

Author SHA1 Message Date
Paul Masurel
a7c579f5c9 Added method to convert named doc to doc 2019-08-06 08:00:32 +09:00
Paul Masurel
f2e546bdff Changes required for python binding 2019-08-01 17:23:49 +09:00
41 changed files with 593 additions and 593 deletions

View File

@@ -2,10 +2,6 @@ Tantivy 0.11.0
===================== =====================
- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima) - Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
- Various bugfixes in the query parser.
- Better handling of hyphens in query parser. (#609)
- Better handling of whitespaces.
Tantivy 0.10.1 Tantivy 0.10.1
===================== =====================

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "tantivy" name = "tantivy"
version = "0.11.0" version = "0.10.1"
authors = ["Paul Masurel <paul.masurel@gmail.com>"] authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT" license = "MIT"
categories = ["database-implementations", "data-structures"] categories = ["database-implementations", "data-structures"]
@@ -25,6 +25,7 @@ atomicwrites = {version="0.2.2", optional=true}
tempfile = "3.0" tempfile = "3.0"
log = "0.4" log = "0.4"
combine = ">=3.6.0,<4.0.0" combine = ">=3.6.0,<4.0.0"
tempdir = "0.3"
serde = "1.0" serde = "1.0"
serde_derive = "1.0" serde_derive = "1.0"
serde_json = "1.0" serde_json = "1.0"
@@ -35,7 +36,7 @@ levenshtein_automata = {version="0.1", features=["fst_automaton"]}
notify = {version="4", optional=true} notify = {version="4", optional=true}
bit-set = "0.5" bit-set = "0.5"
uuid = { version = "0.7.2", features = ["v4", "serde"] } uuid = { version = "0.7.2", features = ["v4", "serde"] }
crossbeam = "0.7" crossbeam = "0.5"
futures = "0.1" futures = "0.1"
futures-cpupool = "0.1" futures-cpupool = "0.1"
owning_ref = "0.4" owning_ref = "0.4"
@@ -62,7 +63,6 @@ rand = "0.7"
maplit = "1" maplit = "1"
matches = "0.1.8" matches = "0.1.8"
time = "0.1.42" time = "0.1.42"
criterion = "0.2"
[profile.release] [profile.release]
opt-level = 3 opt-level = 3
@@ -75,7 +75,6 @@ overflow-checks = true
[features] [features]
default = ["mmap"] default = ["mmap"]
forbench = []
mmap = ["atomicwrites", "fs2", "memmap", "notify"] mmap = ["atomicwrites", "fs2", "memmap", "notify"]
lz4-compression = ["lz4"] lz4-compression = ["lz4"]
failpoints = ["fail/failpoints"] failpoints = ["fail/failpoints"]
@@ -88,6 +87,7 @@ travis-ci = { repository = "tantivy-search/tantivy" }
[dev-dependencies.fail] [dev-dependencies.fail]
features = ["failpoints"] features = ["failpoints"]
# Following the "fail" crate best practises, we isolate # Following the "fail" crate best practises, we isolate
# tests that define specific behavior in fail check points # tests that define specific behavior in fail check points
# in a different binary. # in a different binary.
@@ -99,15 +99,3 @@ features = ["failpoints"]
name = "failpoints" name = "failpoints"
path = "tests/failpoints/mod.rs" path = "tests/failpoints/mod.rs"
required-features = ["fail/failpoints"] required-features = ["fail/failpoints"]
[profile.bench]
lto = true
[[bench]]
name = "vint"
harness = false
[[bench]]
name = "fastfield"
harness = false

View File

@@ -1,73 +0,0 @@
use criterion::{criterion_group, criterion_main, Criterion};
use rand::distributions::{Bernoulli, Uniform};
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use tantivy::forbench::bitset::{BitSet, TinySet};
use tantivy::query::BitSetDocSet;
use tantivy::DocSet;
fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
StdRng::from_seed([seed_val; 32])
.sample_iter(&Bernoulli::new(ratio).unwrap())
.take(n as usize)
.enumerate()
.filter_map(|(val, keep)| if keep { Some(val as u32) } else { None })
.collect()
}
fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
let seed: [u8; 32] = [1; 32];
StdRng::from_seed(seed)
.sample_iter(&Uniform::new(0u32, max_value))
.take(n_elems)
.collect::<Vec<u32>>()
}
fn bench_tinyset_pop(criterion: &mut Criterion) {
criterion.bench_function("pop_lowest", |b| {
b.iter(|| {
let mut tinyset = TinySet::singleton(criterion::black_box(31u32));
tinyset.pop_lowest();
tinyset.pop_lowest();
tinyset.pop_lowest();
tinyset.pop_lowest();
tinyset.pop_lowest();
tinyset.pop_lowest();
})
});
}
fn bench_bitset_insert(criterion: &mut Criterion) {
criterion.bench_function_over_inputs(
"bitset_insert",
|bench, (max_value, n_elems)| {
let els = generate_nonunique_unsorted(*max_value, *n_elems);
bench.iter(move || {
let mut bitset = BitSet::with_max_value(1_000_000);
for el in els.iter().cloned() {
bitset.insert(el);
}
});
},
vec![(1_000_000u32, 10_000)],
);
}
fn bench_bitsetdocset_iterate(b: &mut test::Bencher) {
let mut bitset = BitSet::with_max_value(1_000_000);
for el in sample_with_seed(1_000_000u32, 0.01, 0u8) {
bitset.insert(el);
}
b.iter(|| {
let mut docset = BitSetDocSet::from(bitset.clone());
while docset.advance() {}
});
}
criterion_group!(
benches,
bench_tinyset_pop,
bench_bitset_insert,
bench_bitsetdocset_iterate
);
criterion_main!(benches);

View File

@@ -1,107 +0,0 @@
use criterion::criterion_group;
use criterion::criterion_main;
use criterion::Criterion;
use criterion::ParameterizedBenchmark;
use rand::rngs::StdRng;
use rand::seq::SliceRandom;
use rand::SeedableRng;
use tantivy::schema::{Schema, FAST};
use tantivy::{doc, DocId, Index};
const NUM_LOOKUPS: usize = 1_000;
fn generate_permutation(stride: usize, bit_width: u8) -> Vec<u64> {
let mut permutation: Vec<u64> = (0u64..(NUM_LOOKUPS * stride) as u64).collect();
permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
permutation.push(1u64 << (bit_width as u64)); //< just to force the bit_width
permutation
}
fn bench_linear_lookup(c: &mut Criterion) {
c.bench(
"lookup_stride",
ParameterizedBenchmark::new(
"baseline_vec",
|bench, (stride, num_bits)| {
let arr = generate_permutation(*stride, *num_bits);
bench.iter(move || {
let mut a = 0u64;
for i in (0..NUM_LOOKUPS / stride).map(|v| v * 7) {
a ^= arr[i as usize];
}
a
})
},
vec![(7, 1), (7, 5), (7, 20)],
)
.with_function("fastfield", |bench, (stride, num_bits)| {
let mut schema_builder = Schema::builder();
let val_field = schema_builder.add_u64_field("val", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
for el in generate_permutation(*stride, *num_bits) {
index_writer.add_document(doc!(val_field=>el));
}
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let segment_reader = searcher.segment_reader(0u32);
let fast_field_reader = segment_reader.fast_fields().u64(val_field).unwrap();
bench.iter(move || {
let mut a = 0u64;
for i in (0..NUM_LOOKUPS / stride).map(|v| v * 7) {
a ^= fast_field_reader.get(i as DocId);
}
a
})
}),
);
}
fn bench_jumpy_lookup(c: &mut Criterion) {
c.bench(
"lookup_jumpy",
ParameterizedBenchmark::new(
"baseline_vec",
|bench, (stride, num_bits)| {
let arr = generate_permutation(*stride, *num_bits);
bench.iter(move || {
let mut a = 0u64;
for _ in 0..NUM_LOOKUPS {
a = arr[a as usize];
}
a
})
},
vec![(7, 1), (7, 5), (7, 20)],
)
.with_function("fastfield", |bench, (stride, num_bits)| {
let mut schema_builder = Schema::builder();
let val_field = schema_builder.add_u64_field("val", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
for el in generate_permutation(*stride, *num_bits) {
index_writer.add_document(doc!(val_field=>el));
}
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let segment_reader = searcher.segment_reader(0u32);
let fast_field_reader = segment_reader.fast_fields().u64(val_field).unwrap();
bench.iter(move || {
let mut a = 0u64;
for _ in 0..NUM_LOOKUPS {
a = fast_field_reader.get(a as DocId);
}
a
})
}),
);
}
criterion_group!(benches, bench_linear_lookup, bench_jumpy_lookup);
criterion_main!(benches);

View File

@@ -1,50 +0,0 @@
use criterion::{criterion_group, criterion_main, Criterion};
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use tantivy::query::QueryParser;
use tantivy::schema::{Schema, STRING};
use tantivy::{Document, Index};
fn bench_union(criterion: &mut Criterion) {
criterion.bench_function_over_inputs(
"union_docset_fulladvance",
|bench, (ratio_left, ratio_right)| {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_text_field("val", STRING);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
let mut stdrng = StdRng::from_seed([0u8; 32]);
for _ in 0u32..100_000u32 {
let mut doc = Document::default();
if stdrng.gen_bool(*ratio_left) {
doc.add_text(field, "left");
}
if stdrng.gen_bool(*ratio_right) {
doc.add_text(field, "right");
}
index_writer.add_document(doc);
}
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let query = QueryParser::for_index(&index, vec![field])
.parse_query("left right")
.unwrap();
bench.iter(move || {
let weight = query.weight(&searcher, false).unwrap();
let mut scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
let mut sum_docs = 0u64;
scorer.for_each(&mut |doc_id, _score| {
sum_docs += doc_id as u64;
});
});
},
vec![(0.2, 0.1), (0.2, 0.02)],
);
}
criterion_group!(benches, bench_union);
criterion_main!(benches);

View File

@@ -1,72 +0,0 @@
use criterion::{criterion_group, criterion_main, Criterion, ParameterizedBenchmark};
use rand::rngs::StdRng;
use rand::Rng;
use rand::SeedableRng;
use tantivy::forbench::compression::{compressed_block_size, BlockDecoder};
use tantivy::forbench::compression::{BlockEncoder, VIntEncoder};
use tantivy::forbench::compression::{VIntDecoder, COMPRESSION_BLOCK_SIZE};
fn generate_array_with_seed(n: usize, ratio: f64, seed_val: u8) -> Vec<u32> {
let seed: [u8; 32] = [seed_val; 32];
let mut rng = StdRng::from_seed(seed);
(0u32..).filter(|_| rng.gen_bool(ratio)).take(n).collect()
}
pub fn generate_array(n: usize, ratio: f64) -> Vec<u32> {
generate_array_with_seed(n, ratio, 4)
}
fn bench_compress(criterion: &mut Criterion) {
criterion.bench(
"compress_sorted",
ParameterizedBenchmark::new(
"bitpack",
|bench, ratio| {
let mut encoder = BlockEncoder::new();
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
bench.iter(|| {
encoder.compress_block_sorted(&data, 0u32);
});
},
vec![0.1],
)
.with_function("vint", |bench, ratio| {
let mut encoder = BlockEncoder::new();
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
bench.iter(|| {
encoder.compress_vint_sorted(&data, 0u32);
});
}),
);
}
fn bench_uncompress(criterion: &mut Criterion) {
criterion.bench(
"uncompress_sorted",
ParameterizedBenchmark::new(
"bitpack",
|bench, ratio| {
let mut encoder = BlockEncoder::new();
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
let mut decoder = BlockDecoder::new();
bench.iter(|| {
decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
});
},
vec![0.1],
)
.with_function("vint", |bench, ratio| {
let mut encoder = BlockEncoder::new();
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
let compressed = encoder.compress_vint_sorted(&data, 0u32);
let mut decoder = BlockDecoder::new();
bench.iter(move || {
decoder.uncompress_vint_sorted(compressed, 0u32, COMPRESSION_BLOCK_SIZE);
});
}),
);
}
criterion_group!(benches, bench_compress, bench_uncompress);
criterion_main!(benches);

View File

@@ -19,12 +19,12 @@ use tantivy::query::QueryParser;
use tantivy::schema::*; use tantivy::schema::*;
use tantivy::Index; use tantivy::Index;
use tantivy::ReloadPolicy; use tantivy::ReloadPolicy;
use tempfile::TempDir; use tempdir::TempDir;
fn main() -> tantivy::Result<()> { fn main() -> tantivy::Result<()> {
// Let's create a temporary directory for the // Let's create a temporary directory for the
// sake of this example // sake of this example
let index_path = TempDir::new()?; let index_path = TempDir::new("tantivy_example_dir")?;
// # Defining the schema // # Defining the schema
// //

View File

@@ -18,12 +18,11 @@ use tantivy::collector::FacetCollector;
use tantivy::query::AllQuery; use tantivy::query::AllQuery;
use tantivy::schema::*; use tantivy::schema::*;
use tantivy::Index; use tantivy::Index;
use tempfile::TempDir;
fn main() -> tantivy::Result<()> { fn main() -> tantivy::Result<()> {
// Let's create a temporary directory for the // Let's create a temporary directory for the
// sake of this example // sake of this example
let index_path = TempDir::new()?; let index_path = TempDir::new("tantivy_facet_example_dir")?;
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
schema_builder.add_text_field("name", TEXT | STORED); schema_builder.add_text_field("name", TEXT | STORED);
@@ -75,3 +74,5 @@ fn main() -> tantivy::Result<()> {
Ok(()) Ok(())
} }
use tempdir::TempDir;

View File

@@ -14,12 +14,12 @@ use tantivy::query::QueryParser;
use tantivy::schema::*; use tantivy::schema::*;
use tantivy::Index; use tantivy::Index;
use tantivy::{Snippet, SnippetGenerator}; use tantivy::{Snippet, SnippetGenerator};
use tempfile::TempDir; use tempdir::TempDir;
fn main() -> tantivy::Result<()> { fn main() -> tantivy::Result<()> {
// Let's create a temporary directory for the // Let's create a temporary directory for the
// sake of this example // sake of this example
let index_path = TempDir::new()?; let index_path = TempDir::new("tantivy_example_dir")?;
// # Defining the schema // # Defining the schema
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();

View File

@@ -1,2 +0,0 @@
#!/usr/bin/env bash
cargo bench --features forbench

View File

@@ -591,7 +591,7 @@ mod tests {
query_field: Field, query_field: Field,
schema: Schema, schema: Schema,
mut doc_adder: impl FnMut(&mut IndexWriter) -> (), mut doc_adder: impl FnMut(&mut IndexWriter) -> (),
) -> (Index, Box<dyn Query>) { ) -> (Index, Box<Query>) {
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();

View File

@@ -86,7 +86,6 @@ where
} }
} }
#[inline(always)]
pub fn get(&self, idx: u64) -> u64 { pub fn get(&self, idx: u64) -> u64 {
if self.num_bits == 0 { if self.num_bits == 0 {
return 0u64; return 0u64;

View File

@@ -2,7 +2,7 @@ use std::fmt;
use std::u64; use std::u64;
#[derive(Clone, Copy, Eq, PartialEq)] #[derive(Clone, Copy, Eq, PartialEq)]
pub struct TinySet(u64); pub(crate) struct TinySet(u64);
impl fmt::Debug for TinySet { impl fmt::Debug for TinySet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -353,3 +353,43 @@ mod tests {
} }
} }
} }
#[cfg(all(test, feature = "unstable"))]
mod bench {
use super::BitSet;
use super::TinySet;
use test;
#[bench]
fn bench_tinyset_pop(b: &mut test::Bencher) {
b.iter(|| {
let mut tinyset = TinySet::singleton(test::black_box(31u32));
tinyset.pop_lowest();
tinyset.pop_lowest();
tinyset.pop_lowest();
tinyset.pop_lowest();
tinyset.pop_lowest();
tinyset.pop_lowest();
});
}
#[bench]
fn bench_tinyset_sum(b: &mut test::Bencher) {
let tiny_set = TinySet::empty().insert(10u32).insert(14u32).insert(21u32);
b.iter(|| {
assert_eq!(test::black_box(tiny_set).into_iter().sum::<u32>(), 45u32);
});
}
#[bench]
fn bench_tinyarr_sum(b: &mut test::Bencher) {
let v = [10u32, 14u32, 21u32];
b.iter(|| test::black_box(v).iter().cloned().sum::<u32>());
}
#[bench]
fn bench_bitset_initialize(b: &mut test::Bencher) {
b.iter(|| BitSet::with_max_value(1_000_000));
}
}

View File

@@ -6,7 +6,7 @@ mod serialize;
mod vint; mod vint;
pub use self::bitset::BitSet; pub use self::bitset::BitSet;
pub use self::bitset::TinySet; pub(crate) use self::bitset::TinySet;
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite}; pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
pub use self::counting_writer::CountingWriter; pub use self::counting_writer::CountingWriter;
pub use self::serialize::{BinarySerializable, FixedSize}; pub use self::serialize::{BinarySerializable, FixedSize};
@@ -124,18 +124,20 @@ pub fn f64_to_u64(val: f64) -> u64 {
/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html). /// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
#[inline(always)] #[inline(always)]
pub fn u64_to_f64(val: u64) -> f64 { pub fn u64_to_f64(val: u64) -> f64 {
f64::from_bits(if val & HIGHEST_BIT != 0 { f64::from_bits(
val ^ HIGHEST_BIT if val & HIGHEST_BIT != 0 {
} else { val ^ HIGHEST_BIT
!val } else {
}) !val
}
)
} }
#[cfg(test)] #[cfg(test)]
pub(crate) mod test { pub(crate) mod test {
pub use super::serialize::test::fixed_size_test; pub use super::serialize::test::fixed_size_test;
use super::{compute_num_bits, f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64}; use super::{compute_num_bits, i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64};
use std::f64; use std::f64;
fn test_i64_converter_helper(val: i64) { fn test_i64_converter_helper(val: i64) {
@@ -170,8 +172,7 @@ pub(crate) mod test {
#[test] #[test]
fn test_f64_order() { fn test_f64_order() {
assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)) assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)).contains(&f64_to_u64(f64::NAN))); //nan is not a number
.contains(&f64_to_u64(f64::NAN))); //nan is not a number
assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa

View File

@@ -459,13 +459,13 @@ mod tests {
use super::*; use super::*;
use std::path::PathBuf; use std::path::PathBuf;
use tempfile::TempDir; use tempdir::TempDir;
#[test] #[test]
fn test_index_on_commit_reload_policy_mmap() { fn test_index_on_commit_reload_policy_mmap() {
let schema = throw_away_schema(); let schema = throw_away_schema();
let field = schema.get_field("num_likes").unwrap(); let field = schema.get_field("num_likes").unwrap();
let tempdir = TempDir::new().unwrap(); let tempdir = TempDir::new("index").unwrap();
let tempdir_path = PathBuf::from(tempdir.path()); let tempdir_path = PathBuf::from(tempdir.path());
let index = Index::create_in_dir(&tempdir_path, schema).unwrap(); let index = Index::create_in_dir(&tempdir_path, schema).unwrap();
let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
@@ -504,7 +504,7 @@ mod tests {
fn test_index_on_commit_reload_policy_different_directories() { fn test_index_on_commit_reload_policy_different_directories() {
let schema = throw_away_schema(); let schema = throw_away_schema();
let field = schema.get_field("num_likes").unwrap(); let field = schema.get_field("num_likes").unwrap();
let tempdir = TempDir::new().unwrap(); let tempdir = TempDir::new("index").unwrap();
let tempdir_path = PathBuf::from(tempdir.path()); let tempdir_path = PathBuf::from(tempdir.path());
let write_index = Index::create_in_dir(&tempdir_path, schema).unwrap(); let write_index = Index::create_in_dir(&tempdir_path, schema).unwrap();
let read_index = Index::open_in_dir(&tempdir_path).unwrap(); let read_index = Index::open_in_dir(&tempdir_path).unwrap();

View File

@@ -48,14 +48,14 @@ impl RetryPolicy {
/// ///
/// It is transparently associated to a lock file, that gets deleted /// It is transparently associated to a lock file, that gets deleted
/// on `Drop.` The lock is released automatically on `Drop`. /// on `Drop.` The lock is released automatically on `Drop`.
pub struct DirectoryLock(Box<dyn Send + Sync + 'static>); pub struct DirectoryLock(Box<dyn Drop + Send + Sync + 'static>);
struct DirectoryLockGuard { struct DirectoryLockGuard {
directory: Box<dyn Directory>, directory: Box<dyn Directory>,
path: PathBuf, path: PathBuf,
} }
impl<T: Send + Sync + 'static> From<Box<T>> for DirectoryLock { impl<T: Drop + Send + Sync + 'static> From<Box<T>> for DirectoryLock {
fn from(underlying: Box<T>) -> Self { fn from(underlying: Box<T>) -> Self {
DirectoryLock(underlying) DirectoryLock(underlying)
} }

View File

@@ -263,11 +263,11 @@ mod tests_mmap_specific {
use std::collections::HashSet; use std::collections::HashSet;
use std::io::Write; use std::io::Write;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use tempfile::TempDir; use tempdir::TempDir;
#[test] #[test]
fn test_managed_directory() { fn test_managed_directory() {
let tempdir = TempDir::new().unwrap(); let tempdir = TempDir::new("tantivy-test").unwrap();
let tempdir_path = PathBuf::from(tempdir.path()); let tempdir_path = PathBuf::from(tempdir.path());
let test_path1: &'static Path = Path::new("some_path_for_test"); let test_path1: &'static Path = Path::new("some_path_for_test");
@@ -304,7 +304,7 @@ mod tests_mmap_specific {
fn test_managed_directory_gc_while_mmapped() { fn test_managed_directory_gc_while_mmapped() {
let test_path1: &'static Path = Path::new("some_path_for_test"); let test_path1: &'static Path = Path::new("some_path_for_test");
let tempdir = TempDir::new().unwrap(); let tempdir = TempDir::new("index").unwrap();
let tempdir_path = PathBuf::from(tempdir.path()); let tempdir_path = PathBuf::from(tempdir.path());
let living_files = HashSet::new(); let living_files = HashSet::new();

View File

@@ -36,7 +36,7 @@ use std::sync::Mutex;
use std::sync::RwLock; use std::sync::RwLock;
use std::sync::Weak; use std::sync::Weak;
use std::thread; use std::thread;
use tempfile::TempDir; use tempdir::TempDir;
/// Create a default io error given a string. /// Create a default io error given a string.
pub(crate) fn make_io_err(msg: String) -> io::Error { pub(crate) fn make_io_err(msg: String) -> io::Error {
@@ -294,7 +294,7 @@ impl MmapDirectory {
/// This is mostly useful to test the MmapDirectory itself. /// This is mostly useful to test the MmapDirectory itself.
/// For your unit tests, prefer the RAMDirectory. /// For your unit tests, prefer the RAMDirectory.
pub fn create_from_tempdir() -> Result<MmapDirectory, OpenDirectoryError> { pub fn create_from_tempdir() -> Result<MmapDirectory, OpenDirectoryError> {
let tempdir = TempDir::new().map_err(OpenDirectoryError::IoError)?; let tempdir = TempDir::new("index").map_err(OpenDirectoryError::IoError)?;
let tempdir_path = PathBuf::from(tempdir.path()); let tempdir_path = PathBuf::from(tempdir.path());
MmapDirectory::new(tempdir_path, Some(tempdir)) MmapDirectory::new(tempdir_path, Some(tempdir))
} }
@@ -642,7 +642,7 @@ mod tests {
fn test_watch_wrapper() { fn test_watch_wrapper() {
let counter: Arc<AtomicUsize> = Default::default(); let counter: Arc<AtomicUsize> = Default::default();
let counter_clone = counter.clone(); let counter_clone = counter.clone();
let tmp_dir = tempfile::TempDir::new().unwrap(); let tmp_dir: TempDir = tempdir::TempDir::new("test_watch_wrapper").unwrap();
let tmp_dirpath = tmp_dir.path().to_owned(); let tmp_dirpath = tmp_dir.path().to_owned();
let mut watch_wrapper = WatcherWrapper::new(&tmp_dirpath).unwrap(); let mut watch_wrapper = WatcherWrapper::new(&tmp_dirpath).unwrap();
let tmp_file = tmp_dirpath.join("coucou"); let tmp_file = tmp_dirpath.join("coucou");

View File

@@ -177,7 +177,7 @@ impl Directory for RAMDirectory {
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> { fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
fail_point!("RAMDirectory::atomic_write", |msg| Err(io::Error::new( fail_point!("RAMDirectory::atomic_write", |msg| Err(io::Error::new(
io::ErrorKind::Other, io::ErrorKind::Other,
msg.unwrap_or_else(|| "Undefined".to_string()) msg.unwrap_or("Undefined".to_string())
))); )));
let path_buf = PathBuf::from(path); let path_buf = PathBuf::from(path);

View File

@@ -431,3 +431,111 @@ mod tests {
} }
} }
#[cfg(all(test, feature = "unstable"))]
mod bench {
use super::tests::FIELD;
use super::tests::{generate_permutation, SCHEMA};
use super::*;
use common::CompositeFile;
use directory::{Directory, RAMDirectory, WritePtr};
use fastfield::FastFieldReader;
use std::collections::HashMap;
use std::path::Path;
use test::{self, Bencher};
#[bench]
fn bench_intfastfield_linear_veclookup(b: &mut Bencher) {
let permutation = generate_permutation();
b.iter(|| {
let n = test::black_box(7000u32);
let mut a = 0u64;
for i in (0u32..n / 7).map(|v| v * 7) {
a ^= permutation[i as usize];
}
a
});
}
#[bench]
fn bench_intfastfield_veclookup(b: &mut Bencher) {
let permutation = generate_permutation();
b.iter(|| {
let n = test::black_box(1000u32);
let mut a = 0u64;
for _ in 0u32..n {
a = permutation[a as usize];
}
a
});
}
#[bench]
fn bench_intfastfield_linear_fflookup(b: &mut Bencher) {
let path = Path::new("test");
let permutation = generate_permutation();
let mut directory: RAMDirectory = RAMDirectory::create();
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
for &x in &permutation {
fast_field_writers.add_document(&doc!(*FIELD=>x));
}
fast_field_writers
.serialize(&mut serializer, &HashMap::new())
.unwrap();
serializer.close().unwrap();
}
let source = directory.open_read(&path).unwrap();
{
let fast_fields_composite = CompositeFile::open(&source).unwrap();
let data = fast_fields_composite.open_read(*FIELD).unwrap();
let fast_field_reader = FastFieldReader::<u64>::open(data);
b.iter(|| {
let n = test::black_box(7000u32);
let mut a = 0u64;
for i in (0u32..n / 7).map(|val| val * 7) {
a ^= fast_field_reader.get(i);
}
a
});
}
}
#[bench]
fn bench_intfastfield_fflookup(b: &mut Bencher) {
let path = Path::new("test");
let permutation = generate_permutation();
let mut directory: RAMDirectory = RAMDirectory::create();
{
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
for &x in &permutation {
fast_field_writers.add_document(&doc!(*FIELD=>x));
}
fast_field_writers
.serialize(&mut serializer, &HashMap::new())
.unwrap();
serializer.close().unwrap();
}
let source = directory.open_read(&path).unwrap();
{
let fast_fields_composite = CompositeFile::open(&source).unwrap();
let data = fast_fields_composite.open_read(*FIELD).unwrap();
let fast_field_reader = FastFieldReader::<u64>::open(data);
b.iter(|| {
let n = test::black_box(1000u32);
let mut a = 0u32;
for _ in 0u32..n {
a = fast_field_reader.get(a) as u32;
}
a
});
}
}
}

View File

@@ -67,12 +67,10 @@ impl<Item: FastValue> FastFieldReader<Item> {
/// ///
/// May panic if `doc` is greater than the segment /// May panic if `doc` is greater than the segment
// `maxdoc`. // `maxdoc`.
#[inline(always)]
pub fn get(&self, doc: DocId) -> Item { pub fn get(&self, doc: DocId) -> Item {
self.get_u64(u64::from(doc)) self.get_u64(u64::from(doc))
} }
#[inline(always)]
pub(crate) fn get_u64(&self, doc: u64) -> Item { pub(crate) fn get_u64(&self, doc: u64) -> Item {
Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc)) Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc))
} }

View File

@@ -31,9 +31,7 @@ impl FastFieldsWriter {
_ => 0u64, _ => 0u64,
}; };
match *field_entry.field_type() { match *field_entry.field_type() {
FieldType::I64(ref int_options) FieldType::I64(ref int_options) | FieldType::U64(ref int_options) | FieldType::F64(ref int_options) => {
| FieldType::U64(ref int_options)
| FieldType::F64(ref int_options) => {
match int_options.get_fastfield_cardinality() { match int_options.get_fastfield_cardinality() {
Some(Cardinality::SingleValue) => { Some(Cardinality::SingleValue) => {
let mut fast_field_writer = IntFastFieldWriter::new(field); let mut fast_field_writer = IntFastFieldWriter::new(field);

View File

@@ -761,6 +761,7 @@ mod tests {
use crate::Index; use crate::Index;
use crate::ReloadPolicy; use crate::ReloadPolicy;
use crate::Term; use crate::Term;
use fail;
#[test] #[test]
fn test_operations_group() { fn test_operations_group() {

View File

@@ -1,9 +1,9 @@
#![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")] #![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")]
#![recursion_limit = "100"]
#![cfg_attr(all(feature = "unstable", test), feature(test))] #![cfg_attr(all(feature = "unstable", test), feature(test))]
#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_inception))] #![cfg_attr(feature = "cargo-clippy", allow(clippy::module_inception))]
#![doc(test(attr(allow(unused_variables), deny(warnings))))] #![doc(test(attr(allow(unused_variables), deny(warnings))))]
#![warn(missing_docs)] #![warn(missing_docs)]
#![recursion_limit = "80"]
//! # `tantivy` //! # `tantivy`
//! //!
@@ -12,7 +12,7 @@
//! //!
//! ```rust //! ```rust
//! # extern crate tempfile; //! # extern crate tempdir;
//! # //! #
//! #[macro_use] //! #[macro_use]
//! extern crate tantivy; //! extern crate tantivy;
@@ -20,7 +20,7 @@
//! // ... //! // ...
//! //!
//! # use std::path::Path; //! # use std::path::Path;
//! # use tempfile::TempDir; //! # use tempdir::TempDir;
//! # use tantivy::Index; //! # use tantivy::Index;
//! # use tantivy::schema::*; //! # use tantivy::schema::*;
//! # use tantivy::{Score, DocAddress}; //! # use tantivy::{Score, DocAddress};
@@ -30,7 +30,7 @@
//! # fn main() { //! # fn main() {
//! # // Let's create a temporary directory for the //! # // Let's create a temporary directory for the
//! # // sake of this example //! # // sake of this example
//! # if let Ok(dir) = TempDir::new() { //! # if let Ok(dir) = TempDir::new("tantivy_example_dir") {
//! # run_example(dir.path()).unwrap(); //! # run_example(dir.path()).unwrap();
//! # dir.close().unwrap(); //! # dir.close().unwrap();
//! # } //! # }
@@ -249,6 +249,7 @@ pub struct DocAddress(pub SegmentLocalId, pub DocId);
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE; use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
use crate::core::SegmentReader; use crate::core::SegmentReader;
use crate::docset::DocSet; use crate::docset::DocSet;
@@ -894,15 +895,3 @@ mod tests {
} }
} }
} }
#[cfg(feature = "forbench")]
pub mod forbench {
pub mod compression {
pub use crate::postings::compression::*;
}
pub mod bitset {
pub use crate::common::BitSet;
pub use crate::common::TinySet;
}
}

View File

@@ -160,9 +160,9 @@ impl VIntEncoder for BlockEncoder {
} }
impl VIntDecoder for BlockDecoder { impl VIntDecoder for BlockDecoder {
fn uncompress_vint_sorted( fn uncompress_vint_sorted<'a>(
&mut self, &mut self,
compressed_data: &[u8], compressed_data: &'a [u8],
offset: u32, offset: u32,
num_els: usize, num_els: usize,
) -> usize { ) -> usize {
@@ -170,7 +170,7 @@ impl VIntDecoder for BlockDecoder {
vint::uncompress_sorted(compressed_data, &mut self.output.0[..num_els], offset) vint::uncompress_sorted(compressed_data, &mut self.output.0[..num_els], offset)
} }
fn uncompress_vint_unsorted(&mut self, compressed_data: &[u8], num_els: usize) -> usize { fn uncompress_vint_unsorted<'a>(&mut self, compressed_data: &'a [u8], num_els: usize) -> usize {
self.output_len = num_els; self.output_len = num_els;
vint::uncompress_unsorted(compressed_data, &mut self.output.0[..num_els]) vint::uncompress_unsorted(compressed_data, &mut self.output.0[..num_els])
} }
@@ -268,17 +268,78 @@ pub mod tests {
} }
} }
} }
}
#[cfg(all(test, feature = "unstable"))]
mod bench {
use super::*;
use rand::SeedableRng;
use rand::{Rng, XorShiftRng};
use test::Bencher;
fn generate_array_with_seed(n: usize, ratio: f64, seed_val: u8) -> Vec<u32> {
let seed: &[u8; 16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, seed_val];
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
(0u32..).filter(|_| rng.gen_bool(ratio)).take(n).collect()
}
pub fn generate_array(n: usize, ratio: f64) -> Vec<u32> {
generate_array_with_seed(n, ratio, 4)
}
#[bench]
fn bench_compress(b: &mut Bencher) {
let mut encoder = BlockEncoder::new();
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
b.iter(|| {
encoder.compress_block_sorted(&data, 0u32);
});
}
#[bench]
fn bench_uncompress(b: &mut Bencher) {
let mut encoder = BlockEncoder::new();
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
let mut decoder = BlockDecoder::new();
b.iter(|| {
decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
});
}
#[test] #[test]
fn test_all_docs_compression_numbits() { fn test_all_docs_compression_numbits() {
for expected_num_bits in 0u8..33u8 { for expected_num_bits in 0u8.. {
let mut data = [0u32; 128]; let mut data = [0u32; 128];
if expected_num_bits > 0 { if expected_num_bits > 0 {
data[0] = (1u64 << (expected_num_bits as u64) - 1u64) as u32; data[0] = (1u64 << (expected_num_bits as usize) - 1) as u32;
} }
let mut encoder = BlockEncoder::new(); let mut encoder = BlockEncoder::new();
let (num_bits, compressed) = encoder.compress_block_unsorted(&data); let (num_bits, compressed) = encoder.compress_block_unsorted(&data);
assert_eq!(compressed.len(), compressed_block_size(num_bits)); assert_eq!(compressed.len(), compressed_block_size(num_bits));
} }
} }
const NUM_INTS_BENCH_VINT: usize = 10;
#[bench]
fn bench_compress_vint(b: &mut Bencher) {
let mut encoder = BlockEncoder::new();
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
b.iter(|| {
encoder.compress_vint_sorted(&data, 0u32);
});
}
#[bench]
fn bench_uncompress_vint(b: &mut Bencher) {
let mut encoder = BlockEncoder::new();
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
let compressed = encoder.compress_vint_sorted(&data, 0u32);
let mut decoder = BlockDecoder::new();
b.iter(|| {
decoder.uncompress_vint_sorted(compressed, 0u32, NUM_INTS_BENCH_VINT);
});
}
} }

View File

@@ -3,7 +3,6 @@ Postings module (also called inverted index)
*/ */
mod block_search; mod block_search;
pub(crate) mod compression; pub(crate) mod compression;
/// Postings module /// Postings module
/// ///

View File

@@ -218,3 +218,49 @@ mod tests {
} }
} }
#[cfg(all(test, feature = "unstable"))]
mod bench {
use super::BitSet;
use super::BitSetDocSet;
use test;
use tests;
use DocSet;
#[bench]
fn bench_bitset_1pct_insert(b: &mut test::Bencher) {
use tests;
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
b.iter(|| {
let mut bitset = BitSet::with_max_value(1_000_000);
for el in els.iter().cloned() {
bitset.insert(el);
}
});
}
#[bench]
fn bench_bitset_1pct_clone(b: &mut test::Bencher) {
use tests;
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
let mut bitset = BitSet::with_max_value(1_000_000);
for el in els {
bitset.insert(el);
}
b.iter(|| bitset.clone());
}
#[bench]
fn bench_bitset_1pct_clone_iterate(b: &mut test::Bencher) {
let els = tests::sample(1_000_000u32, 0.01);
let mut bitset = BitSet::with_max_value(1_000_000);
for el in els {
bitset.insert(el);
}
b.iter(|| {
let mut docset = BitSetDocSet::from(bitset.clone());
while docset.advance() {}
});
}
}

View File

@@ -1,4 +1,3 @@
use crate::error::TantivyError::InvalidArgument;
use crate::query::{AutomatonWeight, Query, Weight}; use crate::query::{AutomatonWeight, Query, Weight};
use crate::schema::Term; use crate::schema::Term;
use crate::Result; use crate::Result;
@@ -6,16 +5,11 @@ use crate::Searcher;
use levenshtein_automata::{LevenshteinAutomatonBuilder, DFA}; use levenshtein_automata::{LevenshteinAutomatonBuilder, DFA};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use std::collections::HashMap; use std::collections::HashMap;
use std::ops::Range;
/// A range of Levenshtein distances that we will build DFAs for our terms
/// The computation is exponential, so best keep it to low single digits
const VALID_LEVENSHTEIN_DISTANCE_RANGE: Range<u8> = (0..3);
static LEV_BUILDER: Lazy<HashMap<(u8, bool), LevenshteinAutomatonBuilder>> = Lazy::new(|| { static LEV_BUILDER: Lazy<HashMap<(u8, bool), LevenshteinAutomatonBuilder>> = Lazy::new(|| {
let mut lev_builder_cache = HashMap::new(); let mut lev_builder_cache = HashMap::new();
// TODO make population lazy on a `(distance, val)` basis // TODO make population lazy on a `(distance, val)` basis
for distance in VALID_LEVENSHTEIN_DISTANCE_RANGE { for distance in 0..3 {
for &transposition in &[false, true] { for &transposition in &[false, true] {
let lev_automaton_builder = LevenshteinAutomatonBuilder::new(distance, transposition); let lev_automaton_builder = LevenshteinAutomatonBuilder::new(distance, transposition);
lev_builder_cache.insert((distance, transposition), lev_automaton_builder); lev_builder_cache.insert((distance, transposition), lev_automaton_builder);
@@ -106,18 +100,10 @@ impl FuzzyTermQuery {
} }
fn specialized_weight(&self) -> Result<AutomatonWeight<DFA>> { fn specialized_weight(&self) -> Result<AutomatonWeight<DFA>> {
// LEV_BUILDER is a HashMap, whose `get` method returns an Option let automaton = LEV_BUILDER.get(&(self.distance, false))
match LEV_BUILDER.get(&(self.distance, false)) { .unwrap() // TODO return an error
// Unwrap the option and build the Ok(AutomatonWeight) .build_dfa(self.term.text());
Some(automaton_builder) => { Ok(AutomatonWeight::new(self.term.field(), automaton))
let automaton = automaton_builder.build_dfa(self.term.text());
Ok(AutomatonWeight::new(self.term.field(), automaton))
}
None => Err(InvalidArgument(format!(
"Levenshtein distance of {} is not allowed. Choose a value in the {:?} range",
self.distance, VALID_LEVENSHTEIN_DISTANCE_RANGE
))),
}
} }
} }

View File

@@ -18,6 +18,7 @@ pub enum LogicalLiteral {
All, All,
} }
#[derive(Clone)]
pub enum LogicalAST { pub enum LogicalAST {
Clause(Vec<(Occur, LogicalAST)>), Clause(Vec<(Occur, LogicalAST)>),
Leaf(Box<LogicalLiteral>), Leaf(Box<LogicalLiteral>),

View File

@@ -1,3 +1,4 @@
use super::query_grammar;
use super::user_input_ast::*; use super::user_input_ast::*;
use crate::query::occur::Occur; use crate::query::occur::Occur;
use crate::query::query_parser::user_input_ast::UserInputBound; use crate::query::query_parser::user_input_ast::UserInputBound;
@@ -12,25 +13,22 @@ parser! {
( (
letter(), letter(),
many(satisfy(|c: char| c.is_alphanumeric() || c == '_')), many(satisfy(|c: char| c.is_alphanumeric() || c == '_')),
).skip(char(':')).map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)) ).map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
} }
} }
parser! { parser! {
fn word[I]()(I) -> String fn word[I]()(I) -> String
where [I: Stream<Item = char>] { where [I: Stream<Item = char>] {
( many1(satisfy(|c: char| c.is_alphanumeric() || c=='.'))
satisfy(|c: char| !c.is_whitespace() && !['-', '`', ':', '{', '}', '"', '[', ']', '(',')'].contains(&c) ), .and_then(|s: String| {
many(satisfy(|c: char| !c.is_whitespace() && ![':', '{', '}', '"', '[', ']', '(',')'].contains(&c))) match s.as_str() {
) "OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")),
.map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)) "AND" => Err(StreamErrorFor::<I>::unexpected_static_message("AND")),
.and_then(|s: String| "NOT" => Err(StreamErrorFor::<I>::unexpected_static_message("NOT")),
match s.as_str() { _ => Ok(s)
"OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")), }
"AND" => Err(StreamErrorFor::<I>::unexpected_static_message("AND")), })
"NOT" => Err(StreamErrorFor::<I>::unexpected_static_message("NOT")),
_ => Ok(s)
})
} }
} }
@@ -39,13 +37,12 @@ parser! {
where [I: Stream<Item = char>] where [I: Stream<Item = char>]
{ {
let term_val = || { let term_val = || {
let phrase = char('"').with(many1(satisfy(|c| c != '"'))).skip(char('"')); let phrase = (char('"'), many1(satisfy(|c| c != '"')), char('"')).map(|(_, s, _)| s);
phrase.or(word()) phrase.or(word())
}; };
let term_val_with_field = negative_number().or(term_val()); let term_val_with_field = negative_number().or(term_val());
let term_query = let term_query =
(field(), term_val_with_field) (field(), char(':'), term_val_with_field).map(|(field_name, _, phrase)| UserInputLiteral {
.map(|(field_name, phrase)| UserInputLiteral {
field_name: Some(field_name), field_name: Some(field_name),
phrase, phrase,
}); });
@@ -63,15 +60,8 @@ parser! {
fn negative_number[I]()(I) -> String fn negative_number[I]()(I) -> String
where [I: Stream<Item = char>] where [I: Stream<Item = char>]
{ {
(char('-'), many1(satisfy(char::is_numeric)), (char('-'), many1(satisfy(char::is_numeric)))
optional((char('.'), many1(satisfy(char::is_numeric))))) .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
.map(|(s1, s2, s3): (char, String, Option<(char, String)>)| {
if let Some(('.', s3)) = s3 {
format!("{}{}.{}", s1, s2, s3)
} else {
format!("{}{}", s1, s2)
}
})
} }
} }
@@ -85,23 +75,27 @@ parser! {
parser! { parser! {
fn range[I]()(I) -> UserInputLeaf fn range[I]()(I) -> UserInputLeaf
where [I: Stream<Item = char>] { where [I: Stream<Item = char>] {
let range_term_val = || { let term_val = || {
word().or(negative_number()).or(char('*').with(value("*".to_string()))) word().or(negative_number()).or(char('*').map(|_| "*".to_string()))
};
let lower_bound = {
let excl = (char('{'), term_val()).map(|(_, w)| UserInputBound::Exclusive(w));
let incl = (char('['), term_val()).map(|(_, w)| UserInputBound::Inclusive(w));
attempt(excl).or(incl)
};
let upper_bound = {
let excl = (term_val(), char('}')).map(|(w, _)| UserInputBound::Exclusive(w));
let incl = (term_val(), char(']')).map(|(w, _)| UserInputBound::Inclusive(w));
attempt(excl).or(incl)
}; };
let lower_bound = (one_of("{[".chars()), range_term_val())
.map(|(boundary_char, lower_bound): (char, String)|
if boundary_char == '{' { UserInputBound::Exclusive(lower_bound) }
else { UserInputBound::Inclusive(lower_bound) });
let upper_bound = (range_term_val(), one_of("}]".chars()))
.map(|(higher_bound, boundary_char): (String, char)|
if boundary_char == '}' { UserInputBound::Exclusive(higher_bound) }
else { UserInputBound::Inclusive(higher_bound) });
( (
optional(field()), optional((field(), char(':')).map(|x| x.0)),
lower_bound lower_bound,
.skip((spaces(), string("TO"), spaces())), spaces(),
string("TO"),
spaces(),
upper_bound, upper_bound,
).map(|(field, lower, upper)| UserInputLeaf::Range { ).map(|(field, lower, _, _, _, upper)| UserInputLeaf::Range {
field, field,
lower, lower,
upper upper
@@ -109,28 +103,25 @@ parser! {
} }
} }
fn negate(expr: UserInputAST) -> UserInputAST {
expr.unary(Occur::MustNot)
}
fn must(expr: UserInputAST) -> UserInputAST {
expr.unary(Occur::Must)
}
parser! { parser! {
fn leaf[I]()(I) -> UserInputAST fn leaf[I]()(I) -> UserInputAST
where [I: Stream<Item = char>] { where [I: Stream<Item = char>] {
char('-').with(leaf()).map(negate) (char('-'), leaf()).map(|(_, expr)| expr.unary(Occur::MustNot) )
.or(char('+').with(leaf()).map(must)) .or((char('+'), leaf()).map(|(_, expr)| expr.unary(Occur::Must) ))
.or(char('(').with(ast()).skip(char(')'))) .or((char('('), parse_to_ast(), char(')')).map(|(_, expr, _)| expr))
.or(char('*').map(|_| UserInputAST::from(UserInputLeaf::All))) .or(char('*').map(|_| UserInputAST::from(UserInputLeaf::All) ))
.or(attempt(string("NOT").skip(spaces1()).with(leaf()).map(negate))) .or(attempt(
.or(attempt(range().map(UserInputAST::from))) (string("NOT"), spaces1(), leaf()).map(|(_, _, expr)| expr.unary(Occur::MustNot))
.or(literal().map(UserInputAST::from)) )
)
.or(attempt(
range().map(UserInputAST::from)
)
)
.or(literal().map(|leaf| UserInputAST::Leaf(Box::new(leaf))))
} }
} }
#[derive(Clone, Copy)]
enum BinaryOperand { enum BinaryOperand {
Or, Or,
And, And,
@@ -138,62 +129,84 @@ enum BinaryOperand {
parser! { parser! {
fn binary_operand[I]()(I) -> BinaryOperand fn binary_operand[I]()(I) -> BinaryOperand
where [I: Stream<Item = char>] where [I: Stream<Item = char>] {
{ (spaces1(),
string("AND").with(value(BinaryOperand::And)) (
.or(string("OR").with(value(BinaryOperand::Or))) string("AND").map(|_| BinaryOperand::And)
.or(string("OR").map(|_| BinaryOperand::Or))
),
spaces1()).map(|(_, op,_)| op)
} }
} }
fn aggregate_binary_expressions( enum Element {
left: UserInputAST, SingleEl(UserInputAST),
others: Vec<(BinaryOperand, UserInputAST)>, NormalDisjunctive(Vec<Vec<UserInputAST>>),
) -> UserInputAST { }
let mut dnf: Vec<Vec<UserInputAST>> = vec![vec![left]];
for (operator, operand_ast) in others { impl Element {
match operator { pub fn into_dnf(self) -> Vec<Vec<UserInputAST>> {
BinaryOperand::And => { match self {
if let Some(last) = dnf.last_mut() { Element::NormalDisjunctive(conjunctions) => conjunctions,
last.push(operand_ast); Element::SingleEl(el) => vec![vec![el]],
}
}
BinaryOperand::Or => {
dnf.push(vec![operand_ast]);
}
} }
} }
if dnf.len() == 1 {
UserInputAST::and(dnf.into_iter().next().unwrap()) //< safe
} else {
let conjunctions = dnf.into_iter().map(UserInputAST::and).collect();
UserInputAST::or(conjunctions)
}
}
parser! {
pub fn ast[I]()(I) -> UserInputAST
where [I: Stream<Item = char>]
{
let operand_leaf = (binary_operand().skip(spaces()), leaf().skip(spaces()));
let boolean_expr = (leaf().skip(spaces().silent()), many1(operand_leaf)).map(
|(left, right)| aggregate_binary_expressions(left,right));
let whitespace_separated_leaves = many1(leaf().skip(spaces().silent()))
.map(|subqueries: Vec<UserInputAST>|
if subqueries.len() == 1 {
subqueries.into_iter().next().unwrap()
} else {
UserInputAST::Clause(subqueries.into_iter().collect())
});
let expr = attempt(boolean_expr).or(whitespace_separated_leaves);
spaces().with(expr).skip(spaces())
}
} }
parser! { parser! {
pub fn parse_to_ast[I]()(I) -> UserInputAST pub fn parse_to_ast[I]()(I) -> UserInputAST
where [I: Stream<Item = char>] where [I: Stream<Item = char>]
{ {
spaces().with(optional(ast()).skip(eof())).map(|opt_ast| opt_ast.unwrap_or_else(UserInputAST::empty_query)) (
attempt(
chainl1(
leaf().map(Element::SingleEl),
binary_operand().map(|op: BinaryOperand|
move |left: Element, right: Element| {
let mut dnf = left.into_dnf();
if let Element::SingleEl(el) = right {
match op {
BinaryOperand::And => {
if let Some(last) = dnf.last_mut() {
last.push(el);
}
}
BinaryOperand::Or => {
dnf.push(vec!(el));
}
}
} else {
unreachable!("Please report.")
}
Element::NormalDisjunctive(dnf)
}
)
)
.map(query_grammar::Element::into_dnf)
.map(|fnd| {
if fnd.len() == 1 {
UserInputAST::and(fnd.into_iter().next().unwrap()) //< safe
} else {
let conjunctions = fnd
.into_iter()
.map(UserInputAST::and)
.collect();
UserInputAST::or(conjunctions)
}
})
)
.or(
sep_by(leaf(), spaces())
.map(|subqueries: Vec<UserInputAST>| {
if subqueries.len() == 1 {
subqueries.into_iter().next().unwrap()
} else {
UserInputAST::Clause(subqueries.into_iter().collect())
}
})
)
)
} }
} }
@@ -212,18 +225,6 @@ mod test {
assert!(parse_to_ast().parse(query).is_err()); assert!(parse_to_ast().parse(query).is_err());
} }
#[test]
fn test_parse_empty_to_ast() {
test_parse_query_to_ast_helper("", "<emptyclause>");
}
#[test]
fn test_parse_query_to_ast_hyphen() {
test_parse_query_to_ast_helper("\"www-form-encoded\"", "\"www-form-encoded\"");
test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
}
#[test] #[test]
fn test_parse_query_to_ast_not_op() { fn test_parse_query_to_ast_not_op() {
assert_eq!( assert_eq!(
@@ -258,24 +259,8 @@ mod test {
); );
} }
#[test]
fn test_parse_query_to_triming_spaces() {
test_parse_query_to_ast_helper(" abc", "\"abc\"");
test_parse_query_to_ast_helper("abc ", "\"abc\"");
test_parse_query_to_ast_helper("( a OR abc)", "(?(\"a\") ?(\"abc\"))");
test_parse_query_to_ast_helper("(a OR abc)", "(?(\"a\") ?(\"abc\"))");
test_parse_query_to_ast_helper("(a OR abc)", "(?(\"a\") ?(\"abc\"))");
test_parse_query_to_ast_helper("a OR abc ", "(?(\"a\") ?(\"abc\"))");
test_parse_query_to_ast_helper("(a OR abc )", "(?(\"a\") ?(\"abc\"))");
test_parse_query_to_ast_helper("(a OR abc) ", "(?(\"a\") ?(\"abc\"))");
}
#[test] #[test]
fn test_parse_query_to_ast() { fn test_parse_query_to_ast() {
test_parse_query_to_ast_helper("abc", "\"abc\"");
test_parse_query_to_ast_helper("a b", "(\"a\" \"b\")");
test_parse_query_to_ast_helper("+(a b)", "+((\"a\" \"b\"))");
test_parse_query_to_ast_helper("+d", "+(\"d\")");
test_parse_query_to_ast_helper("+(a b) +d", "(+((\"a\" \"b\")) +(\"d\"))"); test_parse_query_to_ast_helper("+(a b) +d", "(+((\"a\" \"b\")) +(\"d\"))");
test_parse_query_to_ast_helper("(+a +b) d", "((+(\"a\") +(\"b\")) \"d\")"); test_parse_query_to_ast_helper("(+a +b) d", "((+(\"a\") +(\"b\")) \"d\")");
test_parse_query_to_ast_helper("(+a)", "+(\"a\")"); test_parse_query_to_ast_helper("(+a)", "+(\"a\")");

View File

@@ -690,7 +690,7 @@ mod test {
} }
#[test] #[test]
pub fn test_parse_query_to_ast_single_term() { pub fn test_parse_query_to_ast_disjunction() {
test_parse_query_to_logical_ast_helper( test_parse_query_to_logical_ast_helper(
"title:toto", "title:toto",
"Term([0, 0, 0, 0, 116, 111, 116, 111])", "Term([0, 0, 0, 0, 116, 111, 116, 111])",
@@ -714,10 +714,6 @@ mod test {
.unwrap(), .unwrap(),
QueryParserError::AllButQueryForbidden QueryParserError::AllButQueryForbidden
); );
}
#[test]
pub fn test_parse_query_to_ast_two_terms() {
test_parse_query_to_logical_ast_helper( test_parse_query_to_logical_ast_helper(
"title:a b", "title:a b",
"(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) \ "(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) \
@@ -730,10 +726,6 @@ mod test {
(1, Term([0, 0, 0, 0, 98]))]\"", (1, Term([0, 0, 0, 0, 98]))]\"",
false, false,
); );
}
#[test]
pub fn test_parse_query_to_ast_ranges() {
test_parse_query_to_logical_ast_helper( test_parse_query_to_logical_ast_helper(
"title:[a TO b]", "title:[a TO b]",
"(Included(Term([0, 0, 0, 0, 97])) TO \ "(Included(Term([0, 0, 0, 0, 97])) TO \
@@ -765,19 +757,6 @@ mod test {
"(Excluded(Term([0, 0, 0, 0, 116, 105, 116, 105])) TO Unbounded)", "(Excluded(Term([0, 0, 0, 0, 116, 105, 116, 105])) TO Unbounded)",
false, false,
); );
test_parse_query_to_logical_ast_helper(
"signed:{-5 TO 3}",
"(Excluded(Term([0, 0, 0, 2, 127, 255, 255, 255, 255, 255, 255, 251])) TO \
Excluded(Term([0, 0, 0, 2, 128, 0, 0, 0, 0, 0, 0, 3])))",
false,
);
test_parse_query_to_logical_ast_helper(
"float:{-1.5 TO 1.5}",
"(Excluded(Term([0, 0, 0, 10, 64, 7, 255, 255, 255, 255, 255, 255])) TO \
Excluded(Term([0, 0, 0, 10, 191, 248, 0, 0, 0, 0, 0, 0])))",
false,
);
test_parse_query_to_logical_ast_helper("*", "*", false); test_parse_query_to_logical_ast_helper("*", "*", false);
} }
@@ -914,15 +893,4 @@ mod test {
true, true,
); );
} }
#[test]
pub fn test_query_parser_hyphen() {
test_parse_query_to_logical_ast_helper(
"title:www-form-encoded",
"\"[(0, Term([0, 0, 0, 0, 119, 119, 119])), \
(1, Term([0, 0, 0, 0, 102, 111, 114, 109])), \
(2, Term([0, 0, 0, 0, 101, 110, 99, 111, 100, 101, 100]))]\"",
false,
);
}
} }

View File

@@ -0,0 +1,44 @@
use std::sync::Arc;
use stemmer;
pub struct StemmerTokenStream<TailTokenStream>
where TailTokenStream: TokenStream {
tail: TailTokenStream,
stemmer: Arc<stemmer::Stemmer>,
}
impl<TailTokenStream> TokenStream for StemmerTokenStream<TailTokenStream>
where TailTokenStream: TokenStream {
fn token(&self) -> &Token {
self.tail.token()
}
fn token_mut(&mut self) -> &mut Token {
self.tail.token_mut()
}
fn advance(&mut self) -> bool {
if self.tail.advance() {
// self.tail.token_mut().term.make_ascii_lowercase();
let new_str = self.stemmer.stem_str(&self.token().term);
true
}
else {
false
}
}
}
impl<TailTokenStream> StemmerTokenStream<TailTokenStream>
where TailTokenStream: TokenStream {
fn wrap(stemmer: Arc<stemmer::Stemmer>, tail: TailTokenStream) -> StemmerTokenStream<TailTokenStream> {
StemmerTokenStream {
tail,
stemmer,
}
}
}

View File

@@ -80,6 +80,9 @@ impl UserInputBound {
pub enum UserInputAST { pub enum UserInputAST {
Clause(Vec<UserInputAST>), Clause(Vec<UserInputAST>),
Unary(Occur, Box<UserInputAST>), Unary(Occur, Box<UserInputAST>),
// Not(Box<UserInputAST>),
// Should(Box<UserInputAST>),
// Must(Box<UserInputAST>),
Leaf(Box<UserInputLeaf>), Leaf(Box<UserInputLeaf>),
} }
@@ -89,7 +92,7 @@ impl UserInputAST {
} }
fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST { fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST {
assert_ne!(occur, Occur::MustNot); assert!(occur != Occur::MustNot);
assert!(!asts.is_empty()); assert!(!asts.is_empty());
if asts.len() == 1 { if asts.len() == 1 {
asts.into_iter().next().unwrap() //< safe asts.into_iter().next().unwrap() //< safe
@@ -102,10 +105,6 @@ impl UserInputAST {
} }
} }
pub fn empty_query() -> UserInputAST {
UserInputAST::Clause(Vec::default())
}
pub fn and(asts: Vec<UserInputAST>) -> UserInputAST { pub fn and(asts: Vec<UserInputAST>) -> UserInputAST {
UserInputAST::compose(Occur::Must, asts) UserInputAST::compose(Occur::Must, asts)
} }
@@ -115,6 +114,42 @@ impl UserInputAST {
} }
} }
/*
impl UserInputAST {
fn compose_occur(self, occur: Occur) -> UserInputAST {
match self {
UserInputAST::Not(other) => {
let new_occur = compose_occur(Occur::MustNot, occur);
other.simplify()
}
_ => {
self
}
}
}
pub fn simplify(self) -> UserInputAST {
match self {
UserInputAST::Clause(els) => {
if els.len() == 1 {
return els.into_iter().next().unwrap();
} else {
return self;
}
}
UserInputAST::Not(els) => {
if els.len() == 1 {
return els.into_iter().next().unwrap();
} else {
return self;
}
}
}
}
}
*/
impl From<UserInputLiteral> for UserInputLeaf { impl From<UserInputLiteral> for UserInputLeaf {
fn from(literal: UserInputLiteral) -> UserInputLeaf { fn from(literal: UserInputLiteral) -> UserInputLeaf {
UserInputLeaf::Literal(literal) UserInputLeaf::Literal(literal)

View File

@@ -460,10 +460,7 @@ mod tests {
let count_multiples = let count_multiples =
|range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap(); |range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();
assert_eq!( assert_eq!(count_multiples(RangeQuery::new_f64(float_field, 10.0..11.0)), 9);
count_multiples(RangeQuery::new_f64(float_field, 10.0..11.0)),
9
);
assert_eq!( assert_eq!(
count_multiples(RangeQuery::new_f64_bounds( count_multiples(RangeQuery::new_f64_bounds(
float_field, float_field,

View File

@@ -411,3 +411,52 @@ mod tests {
} }
} }
#[cfg(all(test, feature = "unstable"))]
mod bench {
use query::score_combiner::DoNothingCombiner;
use query::ConstScorer;
use query::Union;
use query::VecDocSet;
use test::Bencher;
use tests;
use DocId;
use DocSet;
#[bench]
fn bench_union_3_high(bench: &mut Bencher) {
let union_docset: Vec<Vec<DocId>> = vec![
tests::sample_with_seed(100_000, 0.1, 0),
tests::sample_with_seed(100_000, 0.2, 1),
];
bench.iter(|| {
let mut v = Union::<_, DoNothingCombiner>::from(
union_docset
.iter()
.map(|doc_ids| VecDocSet::from(doc_ids.clone()))
.map(ConstScorer::new)
.collect::<Vec<_>>(),
);
while v.advance() {}
});
}
#[bench]
fn bench_union_3_low(bench: &mut Bencher) {
let union_docset: Vec<Vec<DocId>> = vec![
tests::sample_with_seed(100_000, 0.01, 0),
tests::sample_with_seed(100_000, 0.05, 1),
tests::sample_with_seed(100_000, 0.001, 2),
];
bench.iter(|| {
let mut v = Union::<_, DoNothingCombiner>::from(
union_docset
.iter()
.map(|doc_ids| VecDocSet::from(doc_ids.clone()))
.map(ConstScorer::new)
.collect::<Vec<_>>(),
);
while v.advance() {}
});
}
}

View File

@@ -120,7 +120,9 @@ impl Facet {
/// Extract path from the `Facet`. /// Extract path from the `Facet`.
pub fn to_path(&self) -> Vec<&str> { pub fn to_path(&self) -> Vec<&str> {
self.encoded_str().split(|c| c == FACET_SEP_CHAR).collect() self.encoded_str()
.split(|c| c == FACET_SEP_CHAR)
.collect()
} }
} }

View File

@@ -108,9 +108,7 @@ impl FieldEntry {
/// Returns true iff the field is a int (signed or unsigned) fast field /// Returns true iff the field is a int (signed or unsigned) fast field
pub fn is_int_fast(&self) -> bool { pub fn is_int_fast(&self) -> bool {
match self.field_type { match self.field_type {
FieldType::U64(ref options) FieldType::U64(ref options) | FieldType::I64(ref options) | FieldType::F64(ref options) => options.is_fast(),
| FieldType::I64(ref options)
| FieldType::F64(ref options) => options.is_fast(),
_ => false, _ => false,
} }
} }

View File

@@ -83,9 +83,9 @@ impl FieldType {
pub fn is_indexed(&self) -> bool { pub fn is_indexed(&self) -> bool {
match *self { match *self {
FieldType::Str(ref text_options) => text_options.get_indexing_options().is_some(), FieldType::Str(ref text_options) => text_options.get_indexing_options().is_some(),
FieldType::U64(ref int_options) FieldType::U64(ref int_options) | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) => {
| FieldType::I64(ref int_options) int_options.is_indexed()
| FieldType::F64(ref int_options) => int_options.is_indexed(), }
FieldType::Date(ref date_options) => date_options.is_indexed(), FieldType::Date(ref date_options) => date_options.is_indexed(),
FieldType::HierarchicalFacet => true, FieldType::HierarchicalFacet => true,
FieldType::Bytes => false, FieldType::Bytes => false,
@@ -125,12 +125,9 @@ impl FieldType {
match *json { match *json {
JsonValue::String(ref field_text) => match *self { JsonValue::String(ref field_text) => match *self {
FieldType::Str(_) => Ok(Value::Str(field_text.clone())), FieldType::Str(_) => Ok(Value::Str(field_text.clone())),
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => { FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => Err(
Err(ValueParsingError::TypeError(format!( ValueParsingError::TypeError(format!("Expected an integer, got {:?}", json)),
"Expected an integer, got {:?}", ),
json
)))
}
FieldType::HierarchicalFacet => Ok(Value::Facet(Facet::from(field_text))), FieldType::HierarchicalFacet => Ok(Value::Facet(Facet::from(field_text))),
FieldType::Bytes => decode(field_text).map(Value::Bytes).map_err(|_| { FieldType::Bytes => decode(field_text).map(Value::Bytes).map_err(|_| {
ValueParsingError::InvalidBase64(format!( ValueParsingError::InvalidBase64(format!(
@@ -155,7 +152,7 @@ impl FieldType {
let msg = format!("Expected a u64 int, got {:?}", json); let msg = format!("Expected a u64 int, got {:?}", json);
Err(ValueParsingError::OverflowError(msg)) Err(ValueParsingError::OverflowError(msg))
} }
} },
FieldType::F64(_) => { FieldType::F64(_) => {
if let Some(field_val_f64) = field_val_num.as_f64() { if let Some(field_val_f64) = field_val_num.as_f64() {
Ok(Value::F64(field_val_f64)) Ok(Value::F64(field_val_f64))

View File

@@ -261,6 +261,24 @@ impl Schema {
NamedFieldDocument(field_map) NamedFieldDocument(field_map)
} }
/// Converts a named doc into a document.
pub fn from_named_doc(
&self,
named_doc: NamedFieldDocument,
) -> Result<Document, DocParsingError> {
let mut doc = Document::default();
for (field_name, field_values) in named_doc.0 {
if let Some(field) = self.get_field(&field_name) {
for field_value in field_values {
doc.add(FieldValue::new(field, field_value));
}
} else {
return Err(DocParsingError::NoSuchFieldInSchema(field_name.clone()));
}
}
Ok(doc)
}
/// Encode the schema in JSON. /// Encode the schema in JSON.
/// ///
/// Encoding a document cannot fail. /// Encoding a document cannot fail.
@@ -279,7 +297,6 @@ impl Schema {
}; };
DocParsingError::NotJSON(doc_json_sample) DocParsingError::NotJSON(doc_json_sample)
})?; })?;
let mut doc = Document::default(); let mut doc = Document::default();
for (field_name, json_value) in json_obj.iter() { for (field_name, json_value) in json_obj.iter() {
match self.get_field(field_name) { match self.get_field(field_name) {

View File

@@ -2,7 +2,7 @@ use crate::schema::Facet;
use crate::DateTime; use crate::DateTime;
use serde::de::Visitor; use serde::de::Visitor;
use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::{cmp::Ordering, fmt}; use std::{fmt, cmp::Ordering};
/// Value represents the value of a any field. /// Value represents the value of a any field.
/// It is an enum over all over all of the possible field type. /// It is an enum over all over all of the possible field type.
@@ -27,7 +27,7 @@ pub enum Value {
impl Eq for Value {} impl Eq for Value {}
impl Ord for Value { impl Ord for Value {
fn cmp(&self, other: &Self) -> Ordering { fn cmp(&self, other: &Self) -> Ordering {
match (self, other) { match (self,other) {
(Value::Str(l), Value::Str(r)) => l.cmp(r), (Value::Str(l), Value::Str(r)) => l.cmp(r),
(Value::U64(l), Value::U64(r)) => l.cmp(r), (Value::U64(l), Value::U64(r)) => l.cmp(r),
(Value::I64(l), Value::I64(r)) => l.cmp(r), (Value::I64(l), Value::I64(r)) => l.cmp(r),
@@ -35,7 +35,7 @@ impl Ord for Value {
(Value::Facet(l), Value::Facet(r)) => l.cmp(r), (Value::Facet(l), Value::Facet(r)) => l.cmp(r),
(Value::Bytes(l), Value::Bytes(r)) => l.cmp(r), (Value::Bytes(l), Value::Bytes(r)) => l.cmp(r),
(Value::F64(l), Value::F64(r)) => { (Value::F64(l), Value::F64(r)) => {
match (l.is_nan(), r.is_nan()) { match (l.is_nan(),r.is_nan()) {
(false, false) => l.partial_cmp(r).unwrap(), // only fail on NaN (false, false) => l.partial_cmp(r).unwrap(), // only fail on NaN
(true, true) => Ordering::Equal, (true, true) => Ordering::Equal,
(true, false) => Ordering::Less, // we define NaN as less than -∞ (true, false) => Ordering::Less, // we define NaN as less than -∞
@@ -219,7 +219,7 @@ impl From<Vec<u8>> for Value {
mod binary_serialize { mod binary_serialize {
use super::Value; use super::Value;
use crate::common::{f64_to_u64, u64_to_f64, BinarySerializable}; use crate::common::{BinarySerializable, f64_to_u64, u64_to_f64};
use crate::schema::Facet; use crate::schema::Facet;
use chrono::{TimeZone, Utc}; use chrono::{TimeZone, Utc};
use std::io::{self, Read, Write}; use std::io::{self, Read, Write};

View File

@@ -8,7 +8,7 @@ use tantivy::{Index, Term};
#[test] #[test]
fn test_failpoints_managed_directory_gc_if_delete_fails() { fn test_failpoints_managed_directory_gc_if_delete_fails() {
let _scenario = fail::FailScenario::setup(); let scenario = fail::FailScenario::setup();
let test_path: &'static Path = Path::new("some_path_for_test"); let test_path: &'static Path = Path::new("some_path_for_test");