mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-01 23:12:54 +00:00
Compare commits
2 Commits
criterion
...
python-bin
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a7c579f5c9 | ||
|
|
f2e546bdff |
@@ -2,10 +2,6 @@ Tantivy 0.11.0
|
|||||||
=====================
|
=====================
|
||||||
|
|
||||||
- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
|
- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
|
||||||
- Various bugfixes in the query parser.
|
|
||||||
- Better handling of hyphens in query parser. (#609)
|
|
||||||
- Better handling of whitespaces.
|
|
||||||
|
|
||||||
|
|
||||||
Tantivy 0.10.1
|
Tantivy 0.10.1
|
||||||
=====================
|
=====================
|
||||||
|
|||||||
22
Cargo.toml
22
Cargo.toml
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy"
|
name = "tantivy"
|
||||||
version = "0.11.0"
|
version = "0.10.1"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = ["database-implementations", "data-structures"]
|
categories = ["database-implementations", "data-structures"]
|
||||||
@@ -25,6 +25,7 @@ atomicwrites = {version="0.2.2", optional=true}
|
|||||||
tempfile = "3.0"
|
tempfile = "3.0"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
combine = ">=3.6.0,<4.0.0"
|
combine = ">=3.6.0,<4.0.0"
|
||||||
|
tempdir = "0.3"
|
||||||
serde = "1.0"
|
serde = "1.0"
|
||||||
serde_derive = "1.0"
|
serde_derive = "1.0"
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
@@ -35,7 +36,7 @@ levenshtein_automata = {version="0.1", features=["fst_automaton"]}
|
|||||||
notify = {version="4", optional=true}
|
notify = {version="4", optional=true}
|
||||||
bit-set = "0.5"
|
bit-set = "0.5"
|
||||||
uuid = { version = "0.7.2", features = ["v4", "serde"] }
|
uuid = { version = "0.7.2", features = ["v4", "serde"] }
|
||||||
crossbeam = "0.7"
|
crossbeam = "0.5"
|
||||||
futures = "0.1"
|
futures = "0.1"
|
||||||
futures-cpupool = "0.1"
|
futures-cpupool = "0.1"
|
||||||
owning_ref = "0.4"
|
owning_ref = "0.4"
|
||||||
@@ -62,7 +63,6 @@ rand = "0.7"
|
|||||||
maplit = "1"
|
maplit = "1"
|
||||||
matches = "0.1.8"
|
matches = "0.1.8"
|
||||||
time = "0.1.42"
|
time = "0.1.42"
|
||||||
criterion = "0.2"
|
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
opt-level = 3
|
opt-level = 3
|
||||||
@@ -75,7 +75,6 @@ overflow-checks = true
|
|||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["mmap"]
|
default = ["mmap"]
|
||||||
forbench = []
|
|
||||||
mmap = ["atomicwrites", "fs2", "memmap", "notify"]
|
mmap = ["atomicwrites", "fs2", "memmap", "notify"]
|
||||||
lz4-compression = ["lz4"]
|
lz4-compression = ["lz4"]
|
||||||
failpoints = ["fail/failpoints"]
|
failpoints = ["fail/failpoints"]
|
||||||
@@ -88,6 +87,7 @@ travis-ci = { repository = "tantivy-search/tantivy" }
|
|||||||
[dev-dependencies.fail]
|
[dev-dependencies.fail]
|
||||||
features = ["failpoints"]
|
features = ["failpoints"]
|
||||||
|
|
||||||
|
|
||||||
# Following the "fail" crate best practises, we isolate
|
# Following the "fail" crate best practises, we isolate
|
||||||
# tests that define specific behavior in fail check points
|
# tests that define specific behavior in fail check points
|
||||||
# in a different binary.
|
# in a different binary.
|
||||||
@@ -98,16 +98,4 @@ features = ["failpoints"]
|
|||||||
[[test]]
|
[[test]]
|
||||||
name = "failpoints"
|
name = "failpoints"
|
||||||
path = "tests/failpoints/mod.rs"
|
path = "tests/failpoints/mod.rs"
|
||||||
required-features = ["fail/failpoints"]
|
required-features = ["fail/failpoints"]
|
||||||
|
|
||||||
[profile.bench]
|
|
||||||
lto = true
|
|
||||||
|
|
||||||
[[bench]]
|
|
||||||
name = "vint"
|
|
||||||
harness = false
|
|
||||||
|
|
||||||
|
|
||||||
[[bench]]
|
|
||||||
name = "fastfield"
|
|
||||||
harness = false
|
|
||||||
@@ -1,73 +0,0 @@
|
|||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
|
||||||
use rand::distributions::{Bernoulli, Uniform};
|
|
||||||
use rand::rngs::StdRng;
|
|
||||||
use rand::{Rng, SeedableRng};
|
|
||||||
use tantivy::forbench::bitset::{BitSet, TinySet};
|
|
||||||
use tantivy::query::BitSetDocSet;
|
|
||||||
use tantivy::DocSet;
|
|
||||||
|
|
||||||
fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
|
|
||||||
StdRng::from_seed([seed_val; 32])
|
|
||||||
.sample_iter(&Bernoulli::new(ratio).unwrap())
|
|
||||||
.take(n as usize)
|
|
||||||
.enumerate()
|
|
||||||
.filter_map(|(val, keep)| if keep { Some(val as u32) } else { None })
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
|
|
||||||
let seed: [u8; 32] = [1; 32];
|
|
||||||
StdRng::from_seed(seed)
|
|
||||||
.sample_iter(&Uniform::new(0u32, max_value))
|
|
||||||
.take(n_elems)
|
|
||||||
.collect::<Vec<u32>>()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn bench_tinyset_pop(criterion: &mut Criterion) {
|
|
||||||
criterion.bench_function("pop_lowest", |b| {
|
|
||||||
b.iter(|| {
|
|
||||||
let mut tinyset = TinySet::singleton(criterion::black_box(31u32));
|
|
||||||
tinyset.pop_lowest();
|
|
||||||
tinyset.pop_lowest();
|
|
||||||
tinyset.pop_lowest();
|
|
||||||
tinyset.pop_lowest();
|
|
||||||
tinyset.pop_lowest();
|
|
||||||
tinyset.pop_lowest();
|
|
||||||
})
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
fn bench_bitset_insert(criterion: &mut Criterion) {
|
|
||||||
criterion.bench_function_over_inputs(
|
|
||||||
"bitset_insert",
|
|
||||||
|bench, (max_value, n_elems)| {
|
|
||||||
let els = generate_nonunique_unsorted(*max_value, *n_elems);
|
|
||||||
bench.iter(move || {
|
|
||||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
|
||||||
for el in els.iter().cloned() {
|
|
||||||
bitset.insert(el);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
},
|
|
||||||
vec![(1_000_000u32, 10_000)],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn bench_bitsetdocset_iterate(b: &mut test::Bencher) {
|
|
||||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
|
||||||
for el in sample_with_seed(1_000_000u32, 0.01, 0u8) {
|
|
||||||
bitset.insert(el);
|
|
||||||
}
|
|
||||||
b.iter(|| {
|
|
||||||
let mut docset = BitSetDocSet::from(bitset.clone());
|
|
||||||
while docset.advance() {}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
criterion_group!(
|
|
||||||
benches,
|
|
||||||
bench_tinyset_pop,
|
|
||||||
bench_bitset_insert,
|
|
||||||
bench_bitsetdocset_iterate
|
|
||||||
);
|
|
||||||
criterion_main!(benches);
|
|
||||||
@@ -1,107 +0,0 @@
|
|||||||
use criterion::criterion_group;
|
|
||||||
use criterion::criterion_main;
|
|
||||||
use criterion::Criterion;
|
|
||||||
use criterion::ParameterizedBenchmark;
|
|
||||||
use rand::rngs::StdRng;
|
|
||||||
use rand::seq::SliceRandom;
|
|
||||||
use rand::SeedableRng;
|
|
||||||
use tantivy::schema::{Schema, FAST};
|
|
||||||
use tantivy::{doc, DocId, Index};
|
|
||||||
|
|
||||||
const NUM_LOOKUPS: usize = 1_000;
|
|
||||||
|
|
||||||
fn generate_permutation(stride: usize, bit_width: u8) -> Vec<u64> {
|
|
||||||
let mut permutation: Vec<u64> = (0u64..(NUM_LOOKUPS * stride) as u64).collect();
|
|
||||||
permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
|
|
||||||
permutation.push(1u64 << (bit_width as u64)); //< just to force the bit_width
|
|
||||||
permutation
|
|
||||||
}
|
|
||||||
|
|
||||||
fn bench_linear_lookup(c: &mut Criterion) {
|
|
||||||
c.bench(
|
|
||||||
"lookup_stride",
|
|
||||||
ParameterizedBenchmark::new(
|
|
||||||
"baseline_vec",
|
|
||||||
|bench, (stride, num_bits)| {
|
|
||||||
let arr = generate_permutation(*stride, *num_bits);
|
|
||||||
bench.iter(move || {
|
|
||||||
let mut a = 0u64;
|
|
||||||
for i in (0..NUM_LOOKUPS / stride).map(|v| v * 7) {
|
|
||||||
a ^= arr[i as usize];
|
|
||||||
}
|
|
||||||
a
|
|
||||||
})
|
|
||||||
},
|
|
||||||
vec![(7, 1), (7, 5), (7, 20)],
|
|
||||||
)
|
|
||||||
.with_function("fastfield", |bench, (stride, num_bits)| {
|
|
||||||
let mut schema_builder = Schema::builder();
|
|
||||||
let val_field = schema_builder.add_u64_field("val", FAST);
|
|
||||||
let schema = schema_builder.build();
|
|
||||||
|
|
||||||
let index = Index::create_in_ram(schema);
|
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
|
|
||||||
for el in generate_permutation(*stride, *num_bits) {
|
|
||||||
index_writer.add_document(doc!(val_field=>el));
|
|
||||||
}
|
|
||||||
index_writer.commit().unwrap();
|
|
||||||
let reader = index.reader().unwrap();
|
|
||||||
let searcher = reader.searcher();
|
|
||||||
let segment_reader = searcher.segment_reader(0u32);
|
|
||||||
let fast_field_reader = segment_reader.fast_fields().u64(val_field).unwrap();
|
|
||||||
bench.iter(move || {
|
|
||||||
let mut a = 0u64;
|
|
||||||
for i in (0..NUM_LOOKUPS / stride).map(|v| v * 7) {
|
|
||||||
a ^= fast_field_reader.get(i as DocId);
|
|
||||||
}
|
|
||||||
a
|
|
||||||
})
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn bench_jumpy_lookup(c: &mut Criterion) {
|
|
||||||
c.bench(
|
|
||||||
"lookup_jumpy",
|
|
||||||
ParameterizedBenchmark::new(
|
|
||||||
"baseline_vec",
|
|
||||||
|bench, (stride, num_bits)| {
|
|
||||||
let arr = generate_permutation(*stride, *num_bits);
|
|
||||||
bench.iter(move || {
|
|
||||||
let mut a = 0u64;
|
|
||||||
for _ in 0..NUM_LOOKUPS {
|
|
||||||
a = arr[a as usize];
|
|
||||||
}
|
|
||||||
a
|
|
||||||
})
|
|
||||||
},
|
|
||||||
vec![(7, 1), (7, 5), (7, 20)],
|
|
||||||
)
|
|
||||||
.with_function("fastfield", |bench, (stride, num_bits)| {
|
|
||||||
let mut schema_builder = Schema::builder();
|
|
||||||
let val_field = schema_builder.add_u64_field("val", FAST);
|
|
||||||
let schema = schema_builder.build();
|
|
||||||
|
|
||||||
let index = Index::create_in_ram(schema);
|
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
|
|
||||||
for el in generate_permutation(*stride, *num_bits) {
|
|
||||||
index_writer.add_document(doc!(val_field=>el));
|
|
||||||
}
|
|
||||||
index_writer.commit().unwrap();
|
|
||||||
let reader = index.reader().unwrap();
|
|
||||||
let searcher = reader.searcher();
|
|
||||||
let segment_reader = searcher.segment_reader(0u32);
|
|
||||||
let fast_field_reader = segment_reader.fast_fields().u64(val_field).unwrap();
|
|
||||||
bench.iter(move || {
|
|
||||||
let mut a = 0u64;
|
|
||||||
for _ in 0..NUM_LOOKUPS {
|
|
||||||
a = fast_field_reader.get(a as DocId);
|
|
||||||
}
|
|
||||||
a
|
|
||||||
})
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
criterion_group!(benches, bench_linear_lookup, bench_jumpy_lookup);
|
|
||||||
criterion_main!(benches);
|
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
|
||||||
use rand::rngs::StdRng;
|
|
||||||
use rand::{Rng, SeedableRng};
|
|
||||||
use tantivy::query::QueryParser;
|
|
||||||
use tantivy::schema::{Schema, STRING};
|
|
||||||
use tantivy::{Document, Index};
|
|
||||||
|
|
||||||
fn bench_union(criterion: &mut Criterion) {
|
|
||||||
criterion.bench_function_over_inputs(
|
|
||||||
"union_docset_fulladvance",
|
|
||||||
|bench, (ratio_left, ratio_right)| {
|
|
||||||
let mut schema_builder = Schema::builder();
|
|
||||||
let field = schema_builder.add_text_field("val", STRING);
|
|
||||||
let schema = schema_builder.build();
|
|
||||||
let index = Index::create_in_ram(schema);
|
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
|
|
||||||
let mut stdrng = StdRng::from_seed([0u8; 32]);
|
|
||||||
for _ in 0u32..100_000u32 {
|
|
||||||
let mut doc = Document::default();
|
|
||||||
if stdrng.gen_bool(*ratio_left) {
|
|
||||||
doc.add_text(field, "left");
|
|
||||||
}
|
|
||||||
if stdrng.gen_bool(*ratio_right) {
|
|
||||||
doc.add_text(field, "right");
|
|
||||||
}
|
|
||||||
index_writer.add_document(doc);
|
|
||||||
}
|
|
||||||
index_writer.commit().unwrap();
|
|
||||||
let reader = index.reader().unwrap();
|
|
||||||
let searcher = reader.searcher();
|
|
||||||
|
|
||||||
let query = QueryParser::for_index(&index, vec![field])
|
|
||||||
.parse_query("left right")
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
bench.iter(move || {
|
|
||||||
let weight = query.weight(&searcher, false).unwrap();
|
|
||||||
let mut scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
|
|
||||||
let mut sum_docs = 0u64;
|
|
||||||
scorer.for_each(&mut |doc_id, _score| {
|
|
||||||
sum_docs += doc_id as u64;
|
|
||||||
});
|
|
||||||
});
|
|
||||||
},
|
|
||||||
vec![(0.2, 0.1), (0.2, 0.02)],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
criterion_group!(benches, bench_union);
|
|
||||||
criterion_main!(benches);
|
|
||||||
@@ -1,72 +0,0 @@
|
|||||||
use criterion::{criterion_group, criterion_main, Criterion, ParameterizedBenchmark};
|
|
||||||
use rand::rngs::StdRng;
|
|
||||||
use rand::Rng;
|
|
||||||
use rand::SeedableRng;
|
|
||||||
use tantivy::forbench::compression::{compressed_block_size, BlockDecoder};
|
|
||||||
use tantivy::forbench::compression::{BlockEncoder, VIntEncoder};
|
|
||||||
use tantivy::forbench::compression::{VIntDecoder, COMPRESSION_BLOCK_SIZE};
|
|
||||||
|
|
||||||
fn generate_array_with_seed(n: usize, ratio: f64, seed_val: u8) -> Vec<u32> {
|
|
||||||
let seed: [u8; 32] = [seed_val; 32];
|
|
||||||
let mut rng = StdRng::from_seed(seed);
|
|
||||||
(0u32..).filter(|_| rng.gen_bool(ratio)).take(n).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn generate_array(n: usize, ratio: f64) -> Vec<u32> {
|
|
||||||
generate_array_with_seed(n, ratio, 4)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn bench_compress(criterion: &mut Criterion) {
|
|
||||||
criterion.bench(
|
|
||||||
"compress_sorted",
|
|
||||||
ParameterizedBenchmark::new(
|
|
||||||
"bitpack",
|
|
||||||
|bench, ratio| {
|
|
||||||
let mut encoder = BlockEncoder::new();
|
|
||||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
|
|
||||||
bench.iter(|| {
|
|
||||||
encoder.compress_block_sorted(&data, 0u32);
|
|
||||||
});
|
|
||||||
},
|
|
||||||
vec![0.1],
|
|
||||||
)
|
|
||||||
.with_function("vint", |bench, ratio| {
|
|
||||||
let mut encoder = BlockEncoder::new();
|
|
||||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
|
|
||||||
bench.iter(|| {
|
|
||||||
encoder.compress_vint_sorted(&data, 0u32);
|
|
||||||
});
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn bench_uncompress(criterion: &mut Criterion) {
|
|
||||||
criterion.bench(
|
|
||||||
"uncompress_sorted",
|
|
||||||
ParameterizedBenchmark::new(
|
|
||||||
"bitpack",
|
|
||||||
|bench, ratio| {
|
|
||||||
let mut encoder = BlockEncoder::new();
|
|
||||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
|
|
||||||
let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
|
|
||||||
let mut decoder = BlockDecoder::new();
|
|
||||||
bench.iter(|| {
|
|
||||||
decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
|
|
||||||
});
|
|
||||||
},
|
|
||||||
vec![0.1],
|
|
||||||
)
|
|
||||||
.with_function("vint", |bench, ratio| {
|
|
||||||
let mut encoder = BlockEncoder::new();
|
|
||||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
|
|
||||||
let compressed = encoder.compress_vint_sorted(&data, 0u32);
|
|
||||||
let mut decoder = BlockDecoder::new();
|
|
||||||
bench.iter(move || {
|
|
||||||
decoder.uncompress_vint_sorted(compressed, 0u32, COMPRESSION_BLOCK_SIZE);
|
|
||||||
});
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
criterion_group!(benches, bench_compress, bench_uncompress);
|
|
||||||
criterion_main!(benches);
|
|
||||||
@@ -19,12 +19,12 @@ use tantivy::query::QueryParser;
|
|||||||
use tantivy::schema::*;
|
use tantivy::schema::*;
|
||||||
use tantivy::Index;
|
use tantivy::Index;
|
||||||
use tantivy::ReloadPolicy;
|
use tantivy::ReloadPolicy;
|
||||||
use tempfile::TempDir;
|
use tempdir::TempDir;
|
||||||
|
|
||||||
fn main() -> tantivy::Result<()> {
|
fn main() -> tantivy::Result<()> {
|
||||||
// Let's create a temporary directory for the
|
// Let's create a temporary directory for the
|
||||||
// sake of this example
|
// sake of this example
|
||||||
let index_path = TempDir::new()?;
|
let index_path = TempDir::new("tantivy_example_dir")?;
|
||||||
|
|
||||||
// # Defining the schema
|
// # Defining the schema
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -18,12 +18,11 @@ use tantivy::collector::FacetCollector;
|
|||||||
use tantivy::query::AllQuery;
|
use tantivy::query::AllQuery;
|
||||||
use tantivy::schema::*;
|
use tantivy::schema::*;
|
||||||
use tantivy::Index;
|
use tantivy::Index;
|
||||||
use tempfile::TempDir;
|
|
||||||
|
|
||||||
fn main() -> tantivy::Result<()> {
|
fn main() -> tantivy::Result<()> {
|
||||||
// Let's create a temporary directory for the
|
// Let's create a temporary directory for the
|
||||||
// sake of this example
|
// sake of this example
|
||||||
let index_path = TempDir::new()?;
|
let index_path = TempDir::new("tantivy_facet_example_dir")?;
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
|
|
||||||
schema_builder.add_text_field("name", TEXT | STORED);
|
schema_builder.add_text_field("name", TEXT | STORED);
|
||||||
@@ -75,3 +74,5 @@ fn main() -> tantivy::Result<()> {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
use tempdir::TempDir;
|
||||||
|
|||||||
@@ -14,12 +14,12 @@ use tantivy::query::QueryParser;
|
|||||||
use tantivy::schema::*;
|
use tantivy::schema::*;
|
||||||
use tantivy::Index;
|
use tantivy::Index;
|
||||||
use tantivy::{Snippet, SnippetGenerator};
|
use tantivy::{Snippet, SnippetGenerator};
|
||||||
use tempfile::TempDir;
|
use tempdir::TempDir;
|
||||||
|
|
||||||
fn main() -> tantivy::Result<()> {
|
fn main() -> tantivy::Result<()> {
|
||||||
// Let's create a temporary directory for the
|
// Let's create a temporary directory for the
|
||||||
// sake of this example
|
// sake of this example
|
||||||
let index_path = TempDir::new()?;
|
let index_path = TempDir::new("tantivy_example_dir")?;
|
||||||
|
|
||||||
// # Defining the schema
|
// # Defining the schema
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
|
|||||||
@@ -1,2 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
cargo bench --features forbench
|
|
||||||
@@ -591,7 +591,7 @@ mod tests {
|
|||||||
query_field: Field,
|
query_field: Field,
|
||||||
schema: Schema,
|
schema: Schema,
|
||||||
mut doc_adder: impl FnMut(&mut IndexWriter) -> (),
|
mut doc_adder: impl FnMut(&mut IndexWriter) -> (),
|
||||||
) -> (Index, Box<dyn Query>) {
|
) -> (Index, Box<Query>) {
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
|
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||||
|
|||||||
@@ -86,7 +86,6 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
pub fn get(&self, idx: u64) -> u64 {
|
pub fn get(&self, idx: u64) -> u64 {
|
||||||
if self.num_bits == 0 {
|
if self.num_bits == 0 {
|
||||||
return 0u64;
|
return 0u64;
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::fmt;
|
|||||||
use std::u64;
|
use std::u64;
|
||||||
|
|
||||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||||
pub struct TinySet(u64);
|
pub(crate) struct TinySet(u64);
|
||||||
|
|
||||||
impl fmt::Debug for TinySet {
|
impl fmt::Debug for TinySet {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
@@ -353,3 +353,43 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(all(test, feature = "unstable"))]
|
||||||
|
mod bench {
|
||||||
|
|
||||||
|
use super::BitSet;
|
||||||
|
use super::TinySet;
|
||||||
|
use test;
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_tinyset_pop(b: &mut test::Bencher) {
|
||||||
|
b.iter(|| {
|
||||||
|
let mut tinyset = TinySet::singleton(test::black_box(31u32));
|
||||||
|
tinyset.pop_lowest();
|
||||||
|
tinyset.pop_lowest();
|
||||||
|
tinyset.pop_lowest();
|
||||||
|
tinyset.pop_lowest();
|
||||||
|
tinyset.pop_lowest();
|
||||||
|
tinyset.pop_lowest();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_tinyset_sum(b: &mut test::Bencher) {
|
||||||
|
let tiny_set = TinySet::empty().insert(10u32).insert(14u32).insert(21u32);
|
||||||
|
b.iter(|| {
|
||||||
|
assert_eq!(test::black_box(tiny_set).into_iter().sum::<u32>(), 45u32);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_tinyarr_sum(b: &mut test::Bencher) {
|
||||||
|
let v = [10u32, 14u32, 21u32];
|
||||||
|
b.iter(|| test::black_box(v).iter().cloned().sum::<u32>());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_bitset_initialize(b: &mut test::Bencher) {
|
||||||
|
b.iter(|| BitSet::with_max_value(1_000_000));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ mod serialize;
|
|||||||
mod vint;
|
mod vint;
|
||||||
|
|
||||||
pub use self::bitset::BitSet;
|
pub use self::bitset::BitSet;
|
||||||
pub use self::bitset::TinySet;
|
pub(crate) use self::bitset::TinySet;
|
||||||
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
|
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
|
||||||
pub use self::counting_writer::CountingWriter;
|
pub use self::counting_writer::CountingWriter;
|
||||||
pub use self::serialize::{BinarySerializable, FixedSize};
|
pub use self::serialize::{BinarySerializable, FixedSize};
|
||||||
@@ -124,24 +124,26 @@ pub fn f64_to_u64(val: f64) -> u64 {
|
|||||||
/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
|
/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn u64_to_f64(val: u64) -> f64 {
|
pub fn u64_to_f64(val: u64) -> f64 {
|
||||||
f64::from_bits(if val & HIGHEST_BIT != 0 {
|
f64::from_bits(
|
||||||
val ^ HIGHEST_BIT
|
if val & HIGHEST_BIT != 0 {
|
||||||
} else {
|
val ^ HIGHEST_BIT
|
||||||
!val
|
} else {
|
||||||
})
|
!val
|
||||||
|
}
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod test {
|
pub(crate) mod test {
|
||||||
|
|
||||||
pub use super::serialize::test::fixed_size_test;
|
pub use super::serialize::test::fixed_size_test;
|
||||||
use super::{compute_num_bits, f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
|
use super::{compute_num_bits, i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64};
|
||||||
use std::f64;
|
use std::f64;
|
||||||
|
|
||||||
fn test_i64_converter_helper(val: i64) {
|
fn test_i64_converter_helper(val: i64) {
|
||||||
assert_eq!(u64_to_i64(i64_to_u64(val)), val);
|
assert_eq!(u64_to_i64(i64_to_u64(val)), val);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_f64_converter_helper(val: f64) {
|
fn test_f64_converter_helper(val: f64) {
|
||||||
assert_eq!(u64_to_f64(f64_to_u64(val)), val);
|
assert_eq!(u64_to_f64(f64_to_u64(val)), val);
|
||||||
}
|
}
|
||||||
@@ -170,8 +172,7 @@ pub(crate) mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_f64_order() {
|
fn test_f64_order() {
|
||||||
assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
|
assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)).contains(&f64_to_u64(f64::NAN))); //nan is not a number
|
||||||
.contains(&f64_to_u64(f64::NAN))); //nan is not a number
|
|
||||||
assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
|
assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
|
||||||
assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
|
assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
|
||||||
assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
|
assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
|
||||||
|
|||||||
@@ -459,13 +459,13 @@ mod tests {
|
|||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use tempfile::TempDir;
|
use tempdir::TempDir;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_index_on_commit_reload_policy_mmap() {
|
fn test_index_on_commit_reload_policy_mmap() {
|
||||||
let schema = throw_away_schema();
|
let schema = throw_away_schema();
|
||||||
let field = schema.get_field("num_likes").unwrap();
|
let field = schema.get_field("num_likes").unwrap();
|
||||||
let tempdir = TempDir::new().unwrap();
|
let tempdir = TempDir::new("index").unwrap();
|
||||||
let tempdir_path = PathBuf::from(tempdir.path());
|
let tempdir_path = PathBuf::from(tempdir.path());
|
||||||
let index = Index::create_in_dir(&tempdir_path, schema).unwrap();
|
let index = Index::create_in_dir(&tempdir_path, schema).unwrap();
|
||||||
let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||||
@@ -504,7 +504,7 @@ mod tests {
|
|||||||
fn test_index_on_commit_reload_policy_different_directories() {
|
fn test_index_on_commit_reload_policy_different_directories() {
|
||||||
let schema = throw_away_schema();
|
let schema = throw_away_schema();
|
||||||
let field = schema.get_field("num_likes").unwrap();
|
let field = schema.get_field("num_likes").unwrap();
|
||||||
let tempdir = TempDir::new().unwrap();
|
let tempdir = TempDir::new("index").unwrap();
|
||||||
let tempdir_path = PathBuf::from(tempdir.path());
|
let tempdir_path = PathBuf::from(tempdir.path());
|
||||||
let write_index = Index::create_in_dir(&tempdir_path, schema).unwrap();
|
let write_index = Index::create_in_dir(&tempdir_path, schema).unwrap();
|
||||||
let read_index = Index::open_in_dir(&tempdir_path).unwrap();
|
let read_index = Index::open_in_dir(&tempdir_path).unwrap();
|
||||||
|
|||||||
@@ -48,14 +48,14 @@ impl RetryPolicy {
|
|||||||
///
|
///
|
||||||
/// It is transparently associated to a lock file, that gets deleted
|
/// It is transparently associated to a lock file, that gets deleted
|
||||||
/// on `Drop.` The lock is released automatically on `Drop`.
|
/// on `Drop.` The lock is released automatically on `Drop`.
|
||||||
pub struct DirectoryLock(Box<dyn Send + Sync + 'static>);
|
pub struct DirectoryLock(Box<dyn Drop + Send + Sync + 'static>);
|
||||||
|
|
||||||
struct DirectoryLockGuard {
|
struct DirectoryLockGuard {
|
||||||
directory: Box<dyn Directory>,
|
directory: Box<dyn Directory>,
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Send + Sync + 'static> From<Box<T>> for DirectoryLock {
|
impl<T: Drop + Send + Sync + 'static> From<Box<T>> for DirectoryLock {
|
||||||
fn from(underlying: Box<T>) -> Self {
|
fn from(underlying: Box<T>) -> Self {
|
||||||
DirectoryLock(underlying)
|
DirectoryLock(underlying)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -263,11 +263,11 @@ mod tests_mmap_specific {
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use tempfile::TempDir;
|
use tempdir::TempDir;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_managed_directory() {
|
fn test_managed_directory() {
|
||||||
let tempdir = TempDir::new().unwrap();
|
let tempdir = TempDir::new("tantivy-test").unwrap();
|
||||||
let tempdir_path = PathBuf::from(tempdir.path());
|
let tempdir_path = PathBuf::from(tempdir.path());
|
||||||
|
|
||||||
let test_path1: &'static Path = Path::new("some_path_for_test");
|
let test_path1: &'static Path = Path::new("some_path_for_test");
|
||||||
@@ -304,7 +304,7 @@ mod tests_mmap_specific {
|
|||||||
fn test_managed_directory_gc_while_mmapped() {
|
fn test_managed_directory_gc_while_mmapped() {
|
||||||
let test_path1: &'static Path = Path::new("some_path_for_test");
|
let test_path1: &'static Path = Path::new("some_path_for_test");
|
||||||
|
|
||||||
let tempdir = TempDir::new().unwrap();
|
let tempdir = TempDir::new("index").unwrap();
|
||||||
let tempdir_path = PathBuf::from(tempdir.path());
|
let tempdir_path = PathBuf::from(tempdir.path());
|
||||||
let living_files = HashSet::new();
|
let living_files = HashSet::new();
|
||||||
|
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ use std::sync::Mutex;
|
|||||||
use std::sync::RwLock;
|
use std::sync::RwLock;
|
||||||
use std::sync::Weak;
|
use std::sync::Weak;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
use tempfile::TempDir;
|
use tempdir::TempDir;
|
||||||
|
|
||||||
/// Create a default io error given a string.
|
/// Create a default io error given a string.
|
||||||
pub(crate) fn make_io_err(msg: String) -> io::Error {
|
pub(crate) fn make_io_err(msg: String) -> io::Error {
|
||||||
@@ -294,7 +294,7 @@ impl MmapDirectory {
|
|||||||
/// This is mostly useful to test the MmapDirectory itself.
|
/// This is mostly useful to test the MmapDirectory itself.
|
||||||
/// For your unit tests, prefer the RAMDirectory.
|
/// For your unit tests, prefer the RAMDirectory.
|
||||||
pub fn create_from_tempdir() -> Result<MmapDirectory, OpenDirectoryError> {
|
pub fn create_from_tempdir() -> Result<MmapDirectory, OpenDirectoryError> {
|
||||||
let tempdir = TempDir::new().map_err(OpenDirectoryError::IoError)?;
|
let tempdir = TempDir::new("index").map_err(OpenDirectoryError::IoError)?;
|
||||||
let tempdir_path = PathBuf::from(tempdir.path());
|
let tempdir_path = PathBuf::from(tempdir.path());
|
||||||
MmapDirectory::new(tempdir_path, Some(tempdir))
|
MmapDirectory::new(tempdir_path, Some(tempdir))
|
||||||
}
|
}
|
||||||
@@ -642,7 +642,7 @@ mod tests {
|
|||||||
fn test_watch_wrapper() {
|
fn test_watch_wrapper() {
|
||||||
let counter: Arc<AtomicUsize> = Default::default();
|
let counter: Arc<AtomicUsize> = Default::default();
|
||||||
let counter_clone = counter.clone();
|
let counter_clone = counter.clone();
|
||||||
let tmp_dir = tempfile::TempDir::new().unwrap();
|
let tmp_dir: TempDir = tempdir::TempDir::new("test_watch_wrapper").unwrap();
|
||||||
let tmp_dirpath = tmp_dir.path().to_owned();
|
let tmp_dirpath = tmp_dir.path().to_owned();
|
||||||
let mut watch_wrapper = WatcherWrapper::new(&tmp_dirpath).unwrap();
|
let mut watch_wrapper = WatcherWrapper::new(&tmp_dirpath).unwrap();
|
||||||
let tmp_file = tmp_dirpath.join("coucou");
|
let tmp_file = tmp_dirpath.join("coucou");
|
||||||
|
|||||||
@@ -177,7 +177,7 @@ impl Directory for RAMDirectory {
|
|||||||
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
|
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
|
||||||
fail_point!("RAMDirectory::atomic_write", |msg| Err(io::Error::new(
|
fail_point!("RAMDirectory::atomic_write", |msg| Err(io::Error::new(
|
||||||
io::ErrorKind::Other,
|
io::ErrorKind::Other,
|
||||||
msg.unwrap_or_else(|| "Undefined".to_string())
|
msg.unwrap_or("Undefined".to_string())
|
||||||
)));
|
)));
|
||||||
let path_buf = PathBuf::from(path);
|
let path_buf = PathBuf::from(path);
|
||||||
|
|
||||||
|
|||||||
@@ -431,3 +431,111 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(all(test, feature = "unstable"))]
|
||||||
|
mod bench {
|
||||||
|
use super::tests::FIELD;
|
||||||
|
use super::tests::{generate_permutation, SCHEMA};
|
||||||
|
use super::*;
|
||||||
|
use common::CompositeFile;
|
||||||
|
use directory::{Directory, RAMDirectory, WritePtr};
|
||||||
|
use fastfield::FastFieldReader;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::path::Path;
|
||||||
|
use test::{self, Bencher};
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_intfastfield_linear_veclookup(b: &mut Bencher) {
|
||||||
|
let permutation = generate_permutation();
|
||||||
|
b.iter(|| {
|
||||||
|
let n = test::black_box(7000u32);
|
||||||
|
let mut a = 0u64;
|
||||||
|
for i in (0u32..n / 7).map(|v| v * 7) {
|
||||||
|
a ^= permutation[i as usize];
|
||||||
|
}
|
||||||
|
a
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_intfastfield_veclookup(b: &mut Bencher) {
|
||||||
|
let permutation = generate_permutation();
|
||||||
|
b.iter(|| {
|
||||||
|
let n = test::black_box(1000u32);
|
||||||
|
let mut a = 0u64;
|
||||||
|
for _ in 0u32..n {
|
||||||
|
a = permutation[a as usize];
|
||||||
|
}
|
||||||
|
a
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_intfastfield_linear_fflookup(b: &mut Bencher) {
|
||||||
|
let path = Path::new("test");
|
||||||
|
let permutation = generate_permutation();
|
||||||
|
let mut directory: RAMDirectory = RAMDirectory::create();
|
||||||
|
{
|
||||||
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||||
|
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||||
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||||
|
for &x in &permutation {
|
||||||
|
fast_field_writers.add_document(&doc!(*FIELD=>x));
|
||||||
|
}
|
||||||
|
fast_field_writers
|
||||||
|
.serialize(&mut serializer, &HashMap::new())
|
||||||
|
.unwrap();
|
||||||
|
serializer.close().unwrap();
|
||||||
|
}
|
||||||
|
let source = directory.open_read(&path).unwrap();
|
||||||
|
{
|
||||||
|
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||||
|
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||||
|
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let n = test::black_box(7000u32);
|
||||||
|
let mut a = 0u64;
|
||||||
|
for i in (0u32..n / 7).map(|val| val * 7) {
|
||||||
|
a ^= fast_field_reader.get(i);
|
||||||
|
}
|
||||||
|
a
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_intfastfield_fflookup(b: &mut Bencher) {
|
||||||
|
let path = Path::new("test");
|
||||||
|
let permutation = generate_permutation();
|
||||||
|
let mut directory: RAMDirectory = RAMDirectory::create();
|
||||||
|
{
|
||||||
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||||
|
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||||
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||||
|
for &x in &permutation {
|
||||||
|
fast_field_writers.add_document(&doc!(*FIELD=>x));
|
||||||
|
}
|
||||||
|
fast_field_writers
|
||||||
|
.serialize(&mut serializer, &HashMap::new())
|
||||||
|
.unwrap();
|
||||||
|
serializer.close().unwrap();
|
||||||
|
}
|
||||||
|
let source = directory.open_read(&path).unwrap();
|
||||||
|
{
|
||||||
|
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||||
|
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||||
|
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let n = test::black_box(1000u32);
|
||||||
|
let mut a = 0u32;
|
||||||
|
for _ in 0u32..n {
|
||||||
|
a = fast_field_reader.get(a) as u32;
|
||||||
|
}
|
||||||
|
a
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|||||||
@@ -67,12 +67,10 @@ impl<Item: FastValue> FastFieldReader<Item> {
|
|||||||
///
|
///
|
||||||
/// May panic if `doc` is greater than the segment
|
/// May panic if `doc` is greater than the segment
|
||||||
// `maxdoc`.
|
// `maxdoc`.
|
||||||
#[inline(always)]
|
|
||||||
pub fn get(&self, doc: DocId) -> Item {
|
pub fn get(&self, doc: DocId) -> Item {
|
||||||
self.get_u64(u64::from(doc))
|
self.get_u64(u64::from(doc))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
pub(crate) fn get_u64(&self, doc: u64) -> Item {
|
pub(crate) fn get_u64(&self, doc: u64) -> Item {
|
||||||
Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc))
|
Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,9 +31,7 @@ impl FastFieldsWriter {
|
|||||||
_ => 0u64,
|
_ => 0u64,
|
||||||
};
|
};
|
||||||
match *field_entry.field_type() {
|
match *field_entry.field_type() {
|
||||||
FieldType::I64(ref int_options)
|
FieldType::I64(ref int_options) | FieldType::U64(ref int_options) | FieldType::F64(ref int_options) => {
|
||||||
| FieldType::U64(ref int_options)
|
|
||||||
| FieldType::F64(ref int_options) => {
|
|
||||||
match int_options.get_fastfield_cardinality() {
|
match int_options.get_fastfield_cardinality() {
|
||||||
Some(Cardinality::SingleValue) => {
|
Some(Cardinality::SingleValue) => {
|
||||||
let mut fast_field_writer = IntFastFieldWriter::new(field);
|
let mut fast_field_writer = IntFastFieldWriter::new(field);
|
||||||
|
|||||||
@@ -761,6 +761,7 @@ mod tests {
|
|||||||
use crate::Index;
|
use crate::Index;
|
||||||
use crate::ReloadPolicy;
|
use crate::ReloadPolicy;
|
||||||
use crate::Term;
|
use crate::Term;
|
||||||
|
use fail;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_operations_group() {
|
fn test_operations_group() {
|
||||||
|
|||||||
21
src/lib.rs
21
src/lib.rs
@@ -1,9 +1,9 @@
|
|||||||
#![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")]
|
#![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")]
|
||||||
#![recursion_limit = "100"]
|
|
||||||
#![cfg_attr(all(feature = "unstable", test), feature(test))]
|
#![cfg_attr(all(feature = "unstable", test), feature(test))]
|
||||||
#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_inception))]
|
#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_inception))]
|
||||||
#![doc(test(attr(allow(unused_variables), deny(warnings))))]
|
#![doc(test(attr(allow(unused_variables), deny(warnings))))]
|
||||||
#![warn(missing_docs)]
|
#![warn(missing_docs)]
|
||||||
|
#![recursion_limit = "80"]
|
||||||
|
|
||||||
//! # `tantivy`
|
//! # `tantivy`
|
||||||
//!
|
//!
|
||||||
@@ -12,7 +12,7 @@
|
|||||||
//!
|
//!
|
||||||
//! ```rust
|
//! ```rust
|
||||||
|
|
||||||
//! # extern crate tempfile;
|
//! # extern crate tempdir;
|
||||||
//! #
|
//! #
|
||||||
//! #[macro_use]
|
//! #[macro_use]
|
||||||
//! extern crate tantivy;
|
//! extern crate tantivy;
|
||||||
@@ -20,7 +20,7 @@
|
|||||||
//! // ...
|
//! // ...
|
||||||
//!
|
//!
|
||||||
//! # use std::path::Path;
|
//! # use std::path::Path;
|
||||||
//! # use tempfile::TempDir;
|
//! # use tempdir::TempDir;
|
||||||
//! # use tantivy::Index;
|
//! # use tantivy::Index;
|
||||||
//! # use tantivy::schema::*;
|
//! # use tantivy::schema::*;
|
||||||
//! # use tantivy::{Score, DocAddress};
|
//! # use tantivy::{Score, DocAddress};
|
||||||
@@ -30,7 +30,7 @@
|
|||||||
//! # fn main() {
|
//! # fn main() {
|
||||||
//! # // Let's create a temporary directory for the
|
//! # // Let's create a temporary directory for the
|
||||||
//! # // sake of this example
|
//! # // sake of this example
|
||||||
//! # if let Ok(dir) = TempDir::new() {
|
//! # if let Ok(dir) = TempDir::new("tantivy_example_dir") {
|
||||||
//! # run_example(dir.path()).unwrap();
|
//! # run_example(dir.path()).unwrap();
|
||||||
//! # dir.close().unwrap();
|
//! # dir.close().unwrap();
|
||||||
//! # }
|
//! # }
|
||||||
@@ -249,6 +249,7 @@ pub struct DocAddress(pub SegmentLocalId, pub DocId);
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
|
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
|
||||||
use crate::core::SegmentReader;
|
use crate::core::SegmentReader;
|
||||||
use crate::docset::DocSet;
|
use crate::docset::DocSet;
|
||||||
@@ -894,15 +895,3 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "forbench")]
|
|
||||||
pub mod forbench {
|
|
||||||
pub mod compression {
|
|
||||||
pub use crate::postings::compression::*;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub mod bitset {
|
|
||||||
pub use crate::common::BitSet;
|
|
||||||
pub use crate::common::TinySet;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -160,9 +160,9 @@ impl VIntEncoder for BlockEncoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl VIntDecoder for BlockDecoder {
|
impl VIntDecoder for BlockDecoder {
|
||||||
fn uncompress_vint_sorted(
|
fn uncompress_vint_sorted<'a>(
|
||||||
&mut self,
|
&mut self,
|
||||||
compressed_data: &[u8],
|
compressed_data: &'a [u8],
|
||||||
offset: u32,
|
offset: u32,
|
||||||
num_els: usize,
|
num_els: usize,
|
||||||
) -> usize {
|
) -> usize {
|
||||||
@@ -170,7 +170,7 @@ impl VIntDecoder for BlockDecoder {
|
|||||||
vint::uncompress_sorted(compressed_data, &mut self.output.0[..num_els], offset)
|
vint::uncompress_sorted(compressed_data, &mut self.output.0[..num_els], offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn uncompress_vint_unsorted(&mut self, compressed_data: &[u8], num_els: usize) -> usize {
|
fn uncompress_vint_unsorted<'a>(&mut self, compressed_data: &'a [u8], num_els: usize) -> usize {
|
||||||
self.output_len = num_els;
|
self.output_len = num_els;
|
||||||
vint::uncompress_unsorted(compressed_data, &mut self.output.0[..num_els])
|
vint::uncompress_unsorted(compressed_data, &mut self.output.0[..num_els])
|
||||||
}
|
}
|
||||||
@@ -268,17 +268,78 @@ pub mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(test, feature = "unstable"))]
|
||||||
|
mod bench {
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use rand::SeedableRng;
|
||||||
|
use rand::{Rng, XorShiftRng};
|
||||||
|
use test::Bencher;
|
||||||
|
|
||||||
|
fn generate_array_with_seed(n: usize, ratio: f64, seed_val: u8) -> Vec<u32> {
|
||||||
|
let seed: &[u8; 16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, seed_val];
|
||||||
|
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
|
||||||
|
(0u32..).filter(|_| rng.gen_bool(ratio)).take(n).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_array(n: usize, ratio: f64) -> Vec<u32> {
|
||||||
|
generate_array_with_seed(n, ratio, 4)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_compress(b: &mut Bencher) {
|
||||||
|
let mut encoder = BlockEncoder::new();
|
||||||
|
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
|
||||||
|
b.iter(|| {
|
||||||
|
encoder.compress_block_sorted(&data, 0u32);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_uncompress(b: &mut Bencher) {
|
||||||
|
let mut encoder = BlockEncoder::new();
|
||||||
|
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
|
||||||
|
let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
|
||||||
|
let mut decoder = BlockDecoder::new();
|
||||||
|
b.iter(|| {
|
||||||
|
decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_all_docs_compression_numbits() {
|
fn test_all_docs_compression_numbits() {
|
||||||
for expected_num_bits in 0u8..33u8 {
|
for expected_num_bits in 0u8.. {
|
||||||
let mut data = [0u32; 128];
|
let mut data = [0u32; 128];
|
||||||
if expected_num_bits > 0 {
|
if expected_num_bits > 0 {
|
||||||
data[0] = (1u64 << (expected_num_bits as u64) - 1u64) as u32;
|
data[0] = (1u64 << (expected_num_bits as usize) - 1) as u32;
|
||||||
}
|
}
|
||||||
let mut encoder = BlockEncoder::new();
|
let mut encoder = BlockEncoder::new();
|
||||||
let (num_bits, compressed) = encoder.compress_block_unsorted(&data);
|
let (num_bits, compressed) = encoder.compress_block_unsorted(&data);
|
||||||
assert_eq!(compressed.len(), compressed_block_size(num_bits));
|
assert_eq!(compressed.len(), compressed_block_size(num_bits));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const NUM_INTS_BENCH_VINT: usize = 10;
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_compress_vint(b: &mut Bencher) {
|
||||||
|
let mut encoder = BlockEncoder::new();
|
||||||
|
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
|
||||||
|
b.iter(|| {
|
||||||
|
encoder.compress_vint_sorted(&data, 0u32);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_uncompress_vint(b: &mut Bencher) {
|
||||||
|
let mut encoder = BlockEncoder::new();
|
||||||
|
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
|
||||||
|
let compressed = encoder.compress_vint_sorted(&data, 0u32);
|
||||||
|
let mut decoder = BlockDecoder::new();
|
||||||
|
b.iter(|| {
|
||||||
|
decoder.uncompress_vint_sorted(compressed, 0u32, NUM_INTS_BENCH_VINT);
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ Postings module (also called inverted index)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
mod block_search;
|
mod block_search;
|
||||||
|
|
||||||
pub(crate) mod compression;
|
pub(crate) mod compression;
|
||||||
/// Postings module
|
/// Postings module
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -218,3 +218,49 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(all(test, feature = "unstable"))]
|
||||||
|
mod bench {
|
||||||
|
|
||||||
|
use super::BitSet;
|
||||||
|
use super::BitSetDocSet;
|
||||||
|
use test;
|
||||||
|
use tests;
|
||||||
|
use DocSet;
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_bitset_1pct_insert(b: &mut test::Bencher) {
|
||||||
|
use tests;
|
||||||
|
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
|
||||||
|
b.iter(|| {
|
||||||
|
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||||
|
for el in els.iter().cloned() {
|
||||||
|
bitset.insert(el);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_bitset_1pct_clone(b: &mut test::Bencher) {
|
||||||
|
use tests;
|
||||||
|
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
|
||||||
|
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||||
|
for el in els {
|
||||||
|
bitset.insert(el);
|
||||||
|
}
|
||||||
|
b.iter(|| bitset.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_bitset_1pct_clone_iterate(b: &mut test::Bencher) {
|
||||||
|
let els = tests::sample(1_000_000u32, 0.01);
|
||||||
|
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||||
|
for el in els {
|
||||||
|
bitset.insert(el);
|
||||||
|
}
|
||||||
|
b.iter(|| {
|
||||||
|
let mut docset = BitSetDocSet::from(bitset.clone());
|
||||||
|
while docset.advance() {}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
use crate::error::TantivyError::InvalidArgument;
|
|
||||||
use crate::query::{AutomatonWeight, Query, Weight};
|
use crate::query::{AutomatonWeight, Query, Weight};
|
||||||
use crate::schema::Term;
|
use crate::schema::Term;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@@ -6,16 +5,11 @@ use crate::Searcher;
|
|||||||
use levenshtein_automata::{LevenshteinAutomatonBuilder, DFA};
|
use levenshtein_automata::{LevenshteinAutomatonBuilder, DFA};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::ops::Range;
|
|
||||||
|
|
||||||
/// A range of Levenshtein distances that we will build DFAs for our terms
|
|
||||||
/// The computation is exponential, so best keep it to low single digits
|
|
||||||
const VALID_LEVENSHTEIN_DISTANCE_RANGE: Range<u8> = (0..3);
|
|
||||||
|
|
||||||
static LEV_BUILDER: Lazy<HashMap<(u8, bool), LevenshteinAutomatonBuilder>> = Lazy::new(|| {
|
static LEV_BUILDER: Lazy<HashMap<(u8, bool), LevenshteinAutomatonBuilder>> = Lazy::new(|| {
|
||||||
let mut lev_builder_cache = HashMap::new();
|
let mut lev_builder_cache = HashMap::new();
|
||||||
// TODO make population lazy on a `(distance, val)` basis
|
// TODO make population lazy on a `(distance, val)` basis
|
||||||
for distance in VALID_LEVENSHTEIN_DISTANCE_RANGE {
|
for distance in 0..3 {
|
||||||
for &transposition in &[false, true] {
|
for &transposition in &[false, true] {
|
||||||
let lev_automaton_builder = LevenshteinAutomatonBuilder::new(distance, transposition);
|
let lev_automaton_builder = LevenshteinAutomatonBuilder::new(distance, transposition);
|
||||||
lev_builder_cache.insert((distance, transposition), lev_automaton_builder);
|
lev_builder_cache.insert((distance, transposition), lev_automaton_builder);
|
||||||
@@ -106,18 +100,10 @@ impl FuzzyTermQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn specialized_weight(&self) -> Result<AutomatonWeight<DFA>> {
|
fn specialized_weight(&self) -> Result<AutomatonWeight<DFA>> {
|
||||||
// LEV_BUILDER is a HashMap, whose `get` method returns an Option
|
let automaton = LEV_BUILDER.get(&(self.distance, false))
|
||||||
match LEV_BUILDER.get(&(self.distance, false)) {
|
.unwrap() // TODO return an error
|
||||||
// Unwrap the option and build the Ok(AutomatonWeight)
|
.build_dfa(self.term.text());
|
||||||
Some(automaton_builder) => {
|
Ok(AutomatonWeight::new(self.term.field(), automaton))
|
||||||
let automaton = automaton_builder.build_dfa(self.term.text());
|
|
||||||
Ok(AutomatonWeight::new(self.term.field(), automaton))
|
|
||||||
}
|
|
||||||
None => Err(InvalidArgument(format!(
|
|
||||||
"Levenshtein distance of {} is not allowed. Choose a value in the {:?} range",
|
|
||||||
self.distance, VALID_LEVENSHTEIN_DISTANCE_RANGE
|
|
||||||
))),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ pub enum LogicalLiteral {
|
|||||||
All,
|
All,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
pub enum LogicalAST {
|
pub enum LogicalAST {
|
||||||
Clause(Vec<(Occur, LogicalAST)>),
|
Clause(Vec<(Occur, LogicalAST)>),
|
||||||
Leaf(Box<LogicalLiteral>),
|
Leaf(Box<LogicalLiteral>),
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
use super::query_grammar;
|
||||||
use super::user_input_ast::*;
|
use super::user_input_ast::*;
|
||||||
use crate::query::occur::Occur;
|
use crate::query::occur::Occur;
|
||||||
use crate::query::query_parser::user_input_ast::UserInputBound;
|
use crate::query::query_parser::user_input_ast::UserInputBound;
|
||||||
@@ -12,25 +13,22 @@ parser! {
|
|||||||
(
|
(
|
||||||
letter(),
|
letter(),
|
||||||
many(satisfy(|c: char| c.is_alphanumeric() || c == '_')),
|
many(satisfy(|c: char| c.is_alphanumeric() || c == '_')),
|
||||||
).skip(char(':')).map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
|
).map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
parser! {
|
parser! {
|
||||||
fn word[I]()(I) -> String
|
fn word[I]()(I) -> String
|
||||||
where [I: Stream<Item = char>] {
|
where [I: Stream<Item = char>] {
|
||||||
(
|
many1(satisfy(|c: char| c.is_alphanumeric() || c=='.'))
|
||||||
satisfy(|c: char| !c.is_whitespace() && !['-', '`', ':', '{', '}', '"', '[', ']', '(',')'].contains(&c) ),
|
.and_then(|s: String| {
|
||||||
many(satisfy(|c: char| !c.is_whitespace() && ![':', '{', '}', '"', '[', ']', '(',')'].contains(&c)))
|
match s.as_str() {
|
||||||
)
|
"OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")),
|
||||||
.map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
|
"AND" => Err(StreamErrorFor::<I>::unexpected_static_message("AND")),
|
||||||
.and_then(|s: String|
|
"NOT" => Err(StreamErrorFor::<I>::unexpected_static_message("NOT")),
|
||||||
match s.as_str() {
|
_ => Ok(s)
|
||||||
"OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")),
|
}
|
||||||
"AND" => Err(StreamErrorFor::<I>::unexpected_static_message("AND")),
|
})
|
||||||
"NOT" => Err(StreamErrorFor::<I>::unexpected_static_message("NOT")),
|
|
||||||
_ => Ok(s)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,13 +37,12 @@ parser! {
|
|||||||
where [I: Stream<Item = char>]
|
where [I: Stream<Item = char>]
|
||||||
{
|
{
|
||||||
let term_val = || {
|
let term_val = || {
|
||||||
let phrase = char('"').with(many1(satisfy(|c| c != '"'))).skip(char('"'));
|
let phrase = (char('"'), many1(satisfy(|c| c != '"')), char('"')).map(|(_, s, _)| s);
|
||||||
phrase.or(word())
|
phrase.or(word())
|
||||||
};
|
};
|
||||||
let term_val_with_field = negative_number().or(term_val());
|
let term_val_with_field = negative_number().or(term_val());
|
||||||
let term_query =
|
let term_query =
|
||||||
(field(), term_val_with_field)
|
(field(), char(':'), term_val_with_field).map(|(field_name, _, phrase)| UserInputLiteral {
|
||||||
.map(|(field_name, phrase)| UserInputLiteral {
|
|
||||||
field_name: Some(field_name),
|
field_name: Some(field_name),
|
||||||
phrase,
|
phrase,
|
||||||
});
|
});
|
||||||
@@ -63,15 +60,8 @@ parser! {
|
|||||||
fn negative_number[I]()(I) -> String
|
fn negative_number[I]()(I) -> String
|
||||||
where [I: Stream<Item = char>]
|
where [I: Stream<Item = char>]
|
||||||
{
|
{
|
||||||
(char('-'), many1(satisfy(char::is_numeric)),
|
(char('-'), many1(satisfy(char::is_numeric)))
|
||||||
optional((char('.'), many1(satisfy(char::is_numeric)))))
|
.map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
|
||||||
.map(|(s1, s2, s3): (char, String, Option<(char, String)>)| {
|
|
||||||
if let Some(('.', s3)) = s3 {
|
|
||||||
format!("{}{}.{}", s1, s2, s3)
|
|
||||||
} else {
|
|
||||||
format!("{}{}", s1, s2)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -85,23 +75,27 @@ parser! {
|
|||||||
parser! {
|
parser! {
|
||||||
fn range[I]()(I) -> UserInputLeaf
|
fn range[I]()(I) -> UserInputLeaf
|
||||||
where [I: Stream<Item = char>] {
|
where [I: Stream<Item = char>] {
|
||||||
let range_term_val = || {
|
let term_val = || {
|
||||||
word().or(negative_number()).or(char('*').with(value("*".to_string())))
|
word().or(negative_number()).or(char('*').map(|_| "*".to_string()))
|
||||||
|
};
|
||||||
|
let lower_bound = {
|
||||||
|
let excl = (char('{'), term_val()).map(|(_, w)| UserInputBound::Exclusive(w));
|
||||||
|
let incl = (char('['), term_val()).map(|(_, w)| UserInputBound::Inclusive(w));
|
||||||
|
attempt(excl).or(incl)
|
||||||
|
};
|
||||||
|
let upper_bound = {
|
||||||
|
let excl = (term_val(), char('}')).map(|(w, _)| UserInputBound::Exclusive(w));
|
||||||
|
let incl = (term_val(), char(']')).map(|(w, _)| UserInputBound::Inclusive(w));
|
||||||
|
attempt(excl).or(incl)
|
||||||
};
|
};
|
||||||
let lower_bound = (one_of("{[".chars()), range_term_val())
|
|
||||||
.map(|(boundary_char, lower_bound): (char, String)|
|
|
||||||
if boundary_char == '{' { UserInputBound::Exclusive(lower_bound) }
|
|
||||||
else { UserInputBound::Inclusive(lower_bound) });
|
|
||||||
let upper_bound = (range_term_val(), one_of("}]".chars()))
|
|
||||||
.map(|(higher_bound, boundary_char): (String, char)|
|
|
||||||
if boundary_char == '}' { UserInputBound::Exclusive(higher_bound) }
|
|
||||||
else { UserInputBound::Inclusive(higher_bound) });
|
|
||||||
(
|
(
|
||||||
optional(field()),
|
optional((field(), char(':')).map(|x| x.0)),
|
||||||
lower_bound
|
lower_bound,
|
||||||
.skip((spaces(), string("TO"), spaces())),
|
spaces(),
|
||||||
|
string("TO"),
|
||||||
|
spaces(),
|
||||||
upper_bound,
|
upper_bound,
|
||||||
).map(|(field, lower, upper)| UserInputLeaf::Range {
|
).map(|(field, lower, _, _, _, upper)| UserInputLeaf::Range {
|
||||||
field,
|
field,
|
||||||
lower,
|
lower,
|
||||||
upper
|
upper
|
||||||
@@ -109,28 +103,25 @@ parser! {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn negate(expr: UserInputAST) -> UserInputAST {
|
|
||||||
expr.unary(Occur::MustNot)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn must(expr: UserInputAST) -> UserInputAST {
|
|
||||||
expr.unary(Occur::Must)
|
|
||||||
}
|
|
||||||
|
|
||||||
parser! {
|
parser! {
|
||||||
fn leaf[I]()(I) -> UserInputAST
|
fn leaf[I]()(I) -> UserInputAST
|
||||||
where [I: Stream<Item = char>] {
|
where [I: Stream<Item = char>] {
|
||||||
char('-').with(leaf()).map(negate)
|
(char('-'), leaf()).map(|(_, expr)| expr.unary(Occur::MustNot) )
|
||||||
.or(char('+').with(leaf()).map(must))
|
.or((char('+'), leaf()).map(|(_, expr)| expr.unary(Occur::Must) ))
|
||||||
.or(char('(').with(ast()).skip(char(')')))
|
.or((char('('), parse_to_ast(), char(')')).map(|(_, expr, _)| expr))
|
||||||
.or(char('*').map(|_| UserInputAST::from(UserInputLeaf::All)))
|
.or(char('*').map(|_| UserInputAST::from(UserInputLeaf::All) ))
|
||||||
.or(attempt(string("NOT").skip(spaces1()).with(leaf()).map(negate)))
|
.or(attempt(
|
||||||
.or(attempt(range().map(UserInputAST::from)))
|
(string("NOT"), spaces1(), leaf()).map(|(_, _, expr)| expr.unary(Occur::MustNot))
|
||||||
.or(literal().map(UserInputAST::from))
|
)
|
||||||
|
)
|
||||||
|
.or(attempt(
|
||||||
|
range().map(UserInputAST::from)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.or(literal().map(|leaf| UserInputAST::Leaf(Box::new(leaf))))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
|
||||||
enum BinaryOperand {
|
enum BinaryOperand {
|
||||||
Or,
|
Or,
|
||||||
And,
|
And,
|
||||||
@@ -138,62 +129,84 @@ enum BinaryOperand {
|
|||||||
|
|
||||||
parser! {
|
parser! {
|
||||||
fn binary_operand[I]()(I) -> BinaryOperand
|
fn binary_operand[I]()(I) -> BinaryOperand
|
||||||
where [I: Stream<Item = char>]
|
where [I: Stream<Item = char>] {
|
||||||
{
|
(spaces1(),
|
||||||
string("AND").with(value(BinaryOperand::And))
|
(
|
||||||
.or(string("OR").with(value(BinaryOperand::Or)))
|
string("AND").map(|_| BinaryOperand::And)
|
||||||
|
.or(string("OR").map(|_| BinaryOperand::Or))
|
||||||
|
),
|
||||||
|
spaces1()).map(|(_, op,_)| op)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn aggregate_binary_expressions(
|
enum Element {
|
||||||
left: UserInputAST,
|
SingleEl(UserInputAST),
|
||||||
others: Vec<(BinaryOperand, UserInputAST)>,
|
NormalDisjunctive(Vec<Vec<UserInputAST>>),
|
||||||
) -> UserInputAST {
|
}
|
||||||
let mut dnf: Vec<Vec<UserInputAST>> = vec![vec![left]];
|
|
||||||
for (operator, operand_ast) in others {
|
impl Element {
|
||||||
match operator {
|
pub fn into_dnf(self) -> Vec<Vec<UserInputAST>> {
|
||||||
BinaryOperand::And => {
|
match self {
|
||||||
if let Some(last) = dnf.last_mut() {
|
Element::NormalDisjunctive(conjunctions) => conjunctions,
|
||||||
last.push(operand_ast);
|
Element::SingleEl(el) => vec![vec![el]],
|
||||||
}
|
|
||||||
}
|
|
||||||
BinaryOperand::Or => {
|
|
||||||
dnf.push(vec![operand_ast]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if dnf.len() == 1 {
|
|
||||||
UserInputAST::and(dnf.into_iter().next().unwrap()) //< safe
|
|
||||||
} else {
|
|
||||||
let conjunctions = dnf.into_iter().map(UserInputAST::and).collect();
|
|
||||||
UserInputAST::or(conjunctions)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
parser! {
|
|
||||||
pub fn ast[I]()(I) -> UserInputAST
|
|
||||||
where [I: Stream<Item = char>]
|
|
||||||
{
|
|
||||||
let operand_leaf = (binary_operand().skip(spaces()), leaf().skip(spaces()));
|
|
||||||
let boolean_expr = (leaf().skip(spaces().silent()), many1(operand_leaf)).map(
|
|
||||||
|(left, right)| aggregate_binary_expressions(left,right));
|
|
||||||
let whitespace_separated_leaves = many1(leaf().skip(spaces().silent()))
|
|
||||||
.map(|subqueries: Vec<UserInputAST>|
|
|
||||||
if subqueries.len() == 1 {
|
|
||||||
subqueries.into_iter().next().unwrap()
|
|
||||||
} else {
|
|
||||||
UserInputAST::Clause(subqueries.into_iter().collect())
|
|
||||||
});
|
|
||||||
let expr = attempt(boolean_expr).or(whitespace_separated_leaves);
|
|
||||||
spaces().with(expr).skip(spaces())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
parser! {
|
parser! {
|
||||||
pub fn parse_to_ast[I]()(I) -> UserInputAST
|
pub fn parse_to_ast[I]()(I) -> UserInputAST
|
||||||
where [I: Stream<Item = char>]
|
where [I: Stream<Item = char>]
|
||||||
{
|
{
|
||||||
spaces().with(optional(ast()).skip(eof())).map(|opt_ast| opt_ast.unwrap_or_else(UserInputAST::empty_query))
|
(
|
||||||
|
attempt(
|
||||||
|
chainl1(
|
||||||
|
leaf().map(Element::SingleEl),
|
||||||
|
binary_operand().map(|op: BinaryOperand|
|
||||||
|
move |left: Element, right: Element| {
|
||||||
|
let mut dnf = left.into_dnf();
|
||||||
|
if let Element::SingleEl(el) = right {
|
||||||
|
match op {
|
||||||
|
BinaryOperand::And => {
|
||||||
|
if let Some(last) = dnf.last_mut() {
|
||||||
|
last.push(el);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BinaryOperand::Or => {
|
||||||
|
dnf.push(vec!(el));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unreachable!("Please report.")
|
||||||
|
}
|
||||||
|
Element::NormalDisjunctive(dnf)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.map(query_grammar::Element::into_dnf)
|
||||||
|
.map(|fnd| {
|
||||||
|
if fnd.len() == 1 {
|
||||||
|
UserInputAST::and(fnd.into_iter().next().unwrap()) //< safe
|
||||||
|
} else {
|
||||||
|
let conjunctions = fnd
|
||||||
|
.into_iter()
|
||||||
|
.map(UserInputAST::and)
|
||||||
|
.collect();
|
||||||
|
UserInputAST::or(conjunctions)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
)
|
||||||
|
.or(
|
||||||
|
sep_by(leaf(), spaces())
|
||||||
|
.map(|subqueries: Vec<UserInputAST>| {
|
||||||
|
if subqueries.len() == 1 {
|
||||||
|
subqueries.into_iter().next().unwrap()
|
||||||
|
} else {
|
||||||
|
UserInputAST::Clause(subqueries.into_iter().collect())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -212,18 +225,6 @@ mod test {
|
|||||||
assert!(parse_to_ast().parse(query).is_err());
|
assert!(parse_to_ast().parse(query).is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_empty_to_ast() {
|
|
||||||
test_parse_query_to_ast_helper("", "<emptyclause>");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_query_to_ast_hyphen() {
|
|
||||||
test_parse_query_to_ast_helper("\"www-form-encoded\"", "\"www-form-encoded\"");
|
|
||||||
test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
|
|
||||||
test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_query_to_ast_not_op() {
|
fn test_parse_query_to_ast_not_op() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -258,24 +259,8 @@ mod test {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_query_to_triming_spaces() {
|
|
||||||
test_parse_query_to_ast_helper(" abc", "\"abc\"");
|
|
||||||
test_parse_query_to_ast_helper("abc ", "\"abc\"");
|
|
||||||
test_parse_query_to_ast_helper("( a OR abc)", "(?(\"a\") ?(\"abc\"))");
|
|
||||||
test_parse_query_to_ast_helper("(a OR abc)", "(?(\"a\") ?(\"abc\"))");
|
|
||||||
test_parse_query_to_ast_helper("(a OR abc)", "(?(\"a\") ?(\"abc\"))");
|
|
||||||
test_parse_query_to_ast_helper("a OR abc ", "(?(\"a\") ?(\"abc\"))");
|
|
||||||
test_parse_query_to_ast_helper("(a OR abc )", "(?(\"a\") ?(\"abc\"))");
|
|
||||||
test_parse_query_to_ast_helper("(a OR abc) ", "(?(\"a\") ?(\"abc\"))");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_query_to_ast() {
|
fn test_parse_query_to_ast() {
|
||||||
test_parse_query_to_ast_helper("abc", "\"abc\"");
|
|
||||||
test_parse_query_to_ast_helper("a b", "(\"a\" \"b\")");
|
|
||||||
test_parse_query_to_ast_helper("+(a b)", "+((\"a\" \"b\"))");
|
|
||||||
test_parse_query_to_ast_helper("+d", "+(\"d\")");
|
|
||||||
test_parse_query_to_ast_helper("+(a b) +d", "(+((\"a\" \"b\")) +(\"d\"))");
|
test_parse_query_to_ast_helper("+(a b) +d", "(+((\"a\" \"b\")) +(\"d\"))");
|
||||||
test_parse_query_to_ast_helper("(+a +b) d", "((+(\"a\") +(\"b\")) \"d\")");
|
test_parse_query_to_ast_helper("(+a +b) d", "((+(\"a\") +(\"b\")) \"d\")");
|
||||||
test_parse_query_to_ast_helper("(+a)", "+(\"a\")");
|
test_parse_query_to_ast_helper("(+a)", "+(\"a\")");
|
||||||
|
|||||||
@@ -690,7 +690,7 @@ mod test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
pub fn test_parse_query_to_ast_single_term() {
|
pub fn test_parse_query_to_ast_disjunction() {
|
||||||
test_parse_query_to_logical_ast_helper(
|
test_parse_query_to_logical_ast_helper(
|
||||||
"title:toto",
|
"title:toto",
|
||||||
"Term([0, 0, 0, 0, 116, 111, 116, 111])",
|
"Term([0, 0, 0, 0, 116, 111, 116, 111])",
|
||||||
@@ -714,10 +714,6 @@ mod test {
|
|||||||
.unwrap(),
|
.unwrap(),
|
||||||
QueryParserError::AllButQueryForbidden
|
QueryParserError::AllButQueryForbidden
|
||||||
);
|
);
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
pub fn test_parse_query_to_ast_two_terms() {
|
|
||||||
test_parse_query_to_logical_ast_helper(
|
test_parse_query_to_logical_ast_helper(
|
||||||
"title:a b",
|
"title:a b",
|
||||||
"(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) \
|
"(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) \
|
||||||
@@ -730,10 +726,6 @@ mod test {
|
|||||||
(1, Term([0, 0, 0, 0, 98]))]\"",
|
(1, Term([0, 0, 0, 0, 98]))]\"",
|
||||||
false,
|
false,
|
||||||
);
|
);
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
pub fn test_parse_query_to_ast_ranges() {
|
|
||||||
test_parse_query_to_logical_ast_helper(
|
test_parse_query_to_logical_ast_helper(
|
||||||
"title:[a TO b]",
|
"title:[a TO b]",
|
||||||
"(Included(Term([0, 0, 0, 0, 97])) TO \
|
"(Included(Term([0, 0, 0, 0, 97])) TO \
|
||||||
@@ -765,19 +757,6 @@ mod test {
|
|||||||
"(Excluded(Term([0, 0, 0, 0, 116, 105, 116, 105])) TO Unbounded)",
|
"(Excluded(Term([0, 0, 0, 0, 116, 105, 116, 105])) TO Unbounded)",
|
||||||
false,
|
false,
|
||||||
);
|
);
|
||||||
test_parse_query_to_logical_ast_helper(
|
|
||||||
"signed:{-5 TO 3}",
|
|
||||||
"(Excluded(Term([0, 0, 0, 2, 127, 255, 255, 255, 255, 255, 255, 251])) TO \
|
|
||||||
Excluded(Term([0, 0, 0, 2, 128, 0, 0, 0, 0, 0, 0, 3])))",
|
|
||||||
false,
|
|
||||||
);
|
|
||||||
test_parse_query_to_logical_ast_helper(
|
|
||||||
"float:{-1.5 TO 1.5}",
|
|
||||||
"(Excluded(Term([0, 0, 0, 10, 64, 7, 255, 255, 255, 255, 255, 255])) TO \
|
|
||||||
Excluded(Term([0, 0, 0, 10, 191, 248, 0, 0, 0, 0, 0, 0])))",
|
|
||||||
false,
|
|
||||||
);
|
|
||||||
|
|
||||||
test_parse_query_to_logical_ast_helper("*", "*", false);
|
test_parse_query_to_logical_ast_helper("*", "*", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -914,15 +893,4 @@ mod test {
|
|||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
pub fn test_query_parser_hyphen() {
|
|
||||||
test_parse_query_to_logical_ast_helper(
|
|
||||||
"title:www-form-encoded",
|
|
||||||
"\"[(0, Term([0, 0, 0, 0, 119, 119, 119])), \
|
|
||||||
(1, Term([0, 0, 0, 0, 102, 111, 114, 109])), \
|
|
||||||
(2, Term([0, 0, 0, 0, 101, 110, 99, 111, 100, 101, 100]))]\"",
|
|
||||||
false,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
44
src/query/query_parser/stemmer.rs
Normal file
44
src/query/query_parser/stemmer.rs
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
use stemmer;
|
||||||
|
|
||||||
|
|
||||||
|
pub struct StemmerTokenStream<TailTokenStream>
|
||||||
|
where TailTokenStream: TokenStream {
|
||||||
|
tail: TailTokenStream,
|
||||||
|
stemmer: Arc<stemmer::Stemmer>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<TailTokenStream> TokenStream for StemmerTokenStream<TailTokenStream>
|
||||||
|
where TailTokenStream: TokenStream {
|
||||||
|
|
||||||
|
fn token(&self) -> &Token {
|
||||||
|
self.tail.token()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn token_mut(&mut self) -> &mut Token {
|
||||||
|
self.tail.token_mut()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn advance(&mut self) -> bool {
|
||||||
|
if self.tail.advance() {
|
||||||
|
// self.tail.token_mut().term.make_ascii_lowercase();
|
||||||
|
let new_str = self.stemmer.stem_str(&self.token().term);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<TailTokenStream> StemmerTokenStream<TailTokenStream>
|
||||||
|
where TailTokenStream: TokenStream {
|
||||||
|
|
||||||
|
fn wrap(stemmer: Arc<stemmer::Stemmer>, tail: TailTokenStream) -> StemmerTokenStream<TailTokenStream> {
|
||||||
|
StemmerTokenStream {
|
||||||
|
tail,
|
||||||
|
stemmer,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -80,6 +80,9 @@ impl UserInputBound {
|
|||||||
pub enum UserInputAST {
|
pub enum UserInputAST {
|
||||||
Clause(Vec<UserInputAST>),
|
Clause(Vec<UserInputAST>),
|
||||||
Unary(Occur, Box<UserInputAST>),
|
Unary(Occur, Box<UserInputAST>),
|
||||||
|
// Not(Box<UserInputAST>),
|
||||||
|
// Should(Box<UserInputAST>),
|
||||||
|
// Must(Box<UserInputAST>),
|
||||||
Leaf(Box<UserInputLeaf>),
|
Leaf(Box<UserInputLeaf>),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,7 +92,7 @@ impl UserInputAST {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST {
|
fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST {
|
||||||
assert_ne!(occur, Occur::MustNot);
|
assert!(occur != Occur::MustNot);
|
||||||
assert!(!asts.is_empty());
|
assert!(!asts.is_empty());
|
||||||
if asts.len() == 1 {
|
if asts.len() == 1 {
|
||||||
asts.into_iter().next().unwrap() //< safe
|
asts.into_iter().next().unwrap() //< safe
|
||||||
@@ -102,10 +105,6 @@ impl UserInputAST {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn empty_query() -> UserInputAST {
|
|
||||||
UserInputAST::Clause(Vec::default())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn and(asts: Vec<UserInputAST>) -> UserInputAST {
|
pub fn and(asts: Vec<UserInputAST>) -> UserInputAST {
|
||||||
UserInputAST::compose(Occur::Must, asts)
|
UserInputAST::compose(Occur::Must, asts)
|
||||||
}
|
}
|
||||||
@@ -115,6 +114,42 @@ impl UserInputAST {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
impl UserInputAST {
|
||||||
|
|
||||||
|
fn compose_occur(self, occur: Occur) -> UserInputAST {
|
||||||
|
match self {
|
||||||
|
UserInputAST::Not(other) => {
|
||||||
|
let new_occur = compose_occur(Occur::MustNot, occur);
|
||||||
|
other.simplify()
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn simplify(self) -> UserInputAST {
|
||||||
|
match self {
|
||||||
|
UserInputAST::Clause(els) => {
|
||||||
|
if els.len() == 1 {
|
||||||
|
return els.into_iter().next().unwrap();
|
||||||
|
} else {
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
UserInputAST::Not(els) => {
|
||||||
|
if els.len() == 1 {
|
||||||
|
return els.into_iter().next().unwrap();
|
||||||
|
} else {
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
impl From<UserInputLiteral> for UserInputLeaf {
|
impl From<UserInputLiteral> for UserInputLeaf {
|
||||||
fn from(literal: UserInputLiteral) -> UserInputLeaf {
|
fn from(literal: UserInputLiteral) -> UserInputLeaf {
|
||||||
UserInputLeaf::Literal(literal)
|
UserInputLeaf::Literal(literal)
|
||||||
|
|||||||
@@ -460,10 +460,7 @@ mod tests {
|
|||||||
let count_multiples =
|
let count_multiples =
|
||||||
|range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();
|
|range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(count_multiples(RangeQuery::new_f64(float_field, 10.0..11.0)), 9);
|
||||||
count_multiples(RangeQuery::new_f64(float_field, 10.0..11.0)),
|
|
||||||
9
|
|
||||||
);
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
count_multiples(RangeQuery::new_f64_bounds(
|
count_multiples(RangeQuery::new_f64_bounds(
|
||||||
float_field,
|
float_field,
|
||||||
|
|||||||
@@ -411,3 +411,52 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(all(test, feature = "unstable"))]
|
||||||
|
mod bench {
|
||||||
|
|
||||||
|
use query::score_combiner::DoNothingCombiner;
|
||||||
|
use query::ConstScorer;
|
||||||
|
use query::Union;
|
||||||
|
use query::VecDocSet;
|
||||||
|
use test::Bencher;
|
||||||
|
use tests;
|
||||||
|
use DocId;
|
||||||
|
use DocSet;
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_union_3_high(bench: &mut Bencher) {
|
||||||
|
let union_docset: Vec<Vec<DocId>> = vec![
|
||||||
|
tests::sample_with_seed(100_000, 0.1, 0),
|
||||||
|
tests::sample_with_seed(100_000, 0.2, 1),
|
||||||
|
];
|
||||||
|
bench.iter(|| {
|
||||||
|
let mut v = Union::<_, DoNothingCombiner>::from(
|
||||||
|
union_docset
|
||||||
|
.iter()
|
||||||
|
.map(|doc_ids| VecDocSet::from(doc_ids.clone()))
|
||||||
|
.map(ConstScorer::new)
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
);
|
||||||
|
while v.advance() {}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
#[bench]
|
||||||
|
fn bench_union_3_low(bench: &mut Bencher) {
|
||||||
|
let union_docset: Vec<Vec<DocId>> = vec![
|
||||||
|
tests::sample_with_seed(100_000, 0.01, 0),
|
||||||
|
tests::sample_with_seed(100_000, 0.05, 1),
|
||||||
|
tests::sample_with_seed(100_000, 0.001, 2),
|
||||||
|
];
|
||||||
|
bench.iter(|| {
|
||||||
|
let mut v = Union::<_, DoNothingCombiner>::from(
|
||||||
|
union_docset
|
||||||
|
.iter()
|
||||||
|
.map(|doc_ids| VecDocSet::from(doc_ids.clone()))
|
||||||
|
.map(ConstScorer::new)
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
);
|
||||||
|
while v.advance() {}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -120,7 +120,9 @@ impl Facet {
|
|||||||
|
|
||||||
/// Extract path from the `Facet`.
|
/// Extract path from the `Facet`.
|
||||||
pub fn to_path(&self) -> Vec<&str> {
|
pub fn to_path(&self) -> Vec<&str> {
|
||||||
self.encoded_str().split(|c| c == FACET_SEP_CHAR).collect()
|
self.encoded_str()
|
||||||
|
.split(|c| c == FACET_SEP_CHAR)
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -108,9 +108,7 @@ impl FieldEntry {
|
|||||||
/// Returns true iff the field is a int (signed or unsigned) fast field
|
/// Returns true iff the field is a int (signed or unsigned) fast field
|
||||||
pub fn is_int_fast(&self) -> bool {
|
pub fn is_int_fast(&self) -> bool {
|
||||||
match self.field_type {
|
match self.field_type {
|
||||||
FieldType::U64(ref options)
|
FieldType::U64(ref options) | FieldType::I64(ref options) | FieldType::F64(ref options) => options.is_fast(),
|
||||||
| FieldType::I64(ref options)
|
|
||||||
| FieldType::F64(ref options) => options.is_fast(),
|
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -83,9 +83,9 @@ impl FieldType {
|
|||||||
pub fn is_indexed(&self) -> bool {
|
pub fn is_indexed(&self) -> bool {
|
||||||
match *self {
|
match *self {
|
||||||
FieldType::Str(ref text_options) => text_options.get_indexing_options().is_some(),
|
FieldType::Str(ref text_options) => text_options.get_indexing_options().is_some(),
|
||||||
FieldType::U64(ref int_options)
|
FieldType::U64(ref int_options) | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) => {
|
||||||
| FieldType::I64(ref int_options)
|
int_options.is_indexed()
|
||||||
| FieldType::F64(ref int_options) => int_options.is_indexed(),
|
}
|
||||||
FieldType::Date(ref date_options) => date_options.is_indexed(),
|
FieldType::Date(ref date_options) => date_options.is_indexed(),
|
||||||
FieldType::HierarchicalFacet => true,
|
FieldType::HierarchicalFacet => true,
|
||||||
FieldType::Bytes => false,
|
FieldType::Bytes => false,
|
||||||
@@ -125,12 +125,9 @@ impl FieldType {
|
|||||||
match *json {
|
match *json {
|
||||||
JsonValue::String(ref field_text) => match *self {
|
JsonValue::String(ref field_text) => match *self {
|
||||||
FieldType::Str(_) => Ok(Value::Str(field_text.clone())),
|
FieldType::Str(_) => Ok(Value::Str(field_text.clone())),
|
||||||
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => {
|
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => Err(
|
||||||
Err(ValueParsingError::TypeError(format!(
|
ValueParsingError::TypeError(format!("Expected an integer, got {:?}", json)),
|
||||||
"Expected an integer, got {:?}",
|
),
|
||||||
json
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
FieldType::HierarchicalFacet => Ok(Value::Facet(Facet::from(field_text))),
|
FieldType::HierarchicalFacet => Ok(Value::Facet(Facet::from(field_text))),
|
||||||
FieldType::Bytes => decode(field_text).map(Value::Bytes).map_err(|_| {
|
FieldType::Bytes => decode(field_text).map(Value::Bytes).map_err(|_| {
|
||||||
ValueParsingError::InvalidBase64(format!(
|
ValueParsingError::InvalidBase64(format!(
|
||||||
@@ -155,7 +152,7 @@ impl FieldType {
|
|||||||
let msg = format!("Expected a u64 int, got {:?}", json);
|
let msg = format!("Expected a u64 int, got {:?}", json);
|
||||||
Err(ValueParsingError::OverflowError(msg))
|
Err(ValueParsingError::OverflowError(msg))
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
FieldType::F64(_) => {
|
FieldType::F64(_) => {
|
||||||
if let Some(field_val_f64) = field_val_num.as_f64() {
|
if let Some(field_val_f64) = field_val_num.as_f64() {
|
||||||
Ok(Value::F64(field_val_f64))
|
Ok(Value::F64(field_val_f64))
|
||||||
|
|||||||
@@ -261,6 +261,24 @@ impl Schema {
|
|||||||
NamedFieldDocument(field_map)
|
NamedFieldDocument(field_map)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts a named doc into a document.
|
||||||
|
pub fn from_named_doc(
|
||||||
|
&self,
|
||||||
|
named_doc: NamedFieldDocument,
|
||||||
|
) -> Result<Document, DocParsingError> {
|
||||||
|
let mut doc = Document::default();
|
||||||
|
for (field_name, field_values) in named_doc.0 {
|
||||||
|
if let Some(field) = self.get_field(&field_name) {
|
||||||
|
for field_value in field_values {
|
||||||
|
doc.add(FieldValue::new(field, field_value));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Err(DocParsingError::NoSuchFieldInSchema(field_name.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(doc)
|
||||||
|
}
|
||||||
|
|
||||||
/// Encode the schema in JSON.
|
/// Encode the schema in JSON.
|
||||||
///
|
///
|
||||||
/// Encoding a document cannot fail.
|
/// Encoding a document cannot fail.
|
||||||
@@ -279,7 +297,6 @@ impl Schema {
|
|||||||
};
|
};
|
||||||
DocParsingError::NotJSON(doc_json_sample)
|
DocParsingError::NotJSON(doc_json_sample)
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let mut doc = Document::default();
|
let mut doc = Document::default();
|
||||||
for (field_name, json_value) in json_obj.iter() {
|
for (field_name, json_value) in json_obj.iter() {
|
||||||
match self.get_field(field_name) {
|
match self.get_field(field_name) {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use crate::schema::Facet;
|
|||||||
use crate::DateTime;
|
use crate::DateTime;
|
||||||
use serde::de::Visitor;
|
use serde::de::Visitor;
|
||||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
use std::{cmp::Ordering, fmt};
|
use std::{fmt, cmp::Ordering};
|
||||||
|
|
||||||
/// Value represents the value of a any field.
|
/// Value represents the value of a any field.
|
||||||
/// It is an enum over all over all of the possible field type.
|
/// It is an enum over all over all of the possible field type.
|
||||||
@@ -27,7 +27,7 @@ pub enum Value {
|
|||||||
impl Eq for Value {}
|
impl Eq for Value {}
|
||||||
impl Ord for Value {
|
impl Ord for Value {
|
||||||
fn cmp(&self, other: &Self) -> Ordering {
|
fn cmp(&self, other: &Self) -> Ordering {
|
||||||
match (self, other) {
|
match (self,other) {
|
||||||
(Value::Str(l), Value::Str(r)) => l.cmp(r),
|
(Value::Str(l), Value::Str(r)) => l.cmp(r),
|
||||||
(Value::U64(l), Value::U64(r)) => l.cmp(r),
|
(Value::U64(l), Value::U64(r)) => l.cmp(r),
|
||||||
(Value::I64(l), Value::I64(r)) => l.cmp(r),
|
(Value::I64(l), Value::I64(r)) => l.cmp(r),
|
||||||
@@ -35,7 +35,7 @@ impl Ord for Value {
|
|||||||
(Value::Facet(l), Value::Facet(r)) => l.cmp(r),
|
(Value::Facet(l), Value::Facet(r)) => l.cmp(r),
|
||||||
(Value::Bytes(l), Value::Bytes(r)) => l.cmp(r),
|
(Value::Bytes(l), Value::Bytes(r)) => l.cmp(r),
|
||||||
(Value::F64(l), Value::F64(r)) => {
|
(Value::F64(l), Value::F64(r)) => {
|
||||||
match (l.is_nan(), r.is_nan()) {
|
match (l.is_nan(),r.is_nan()) {
|
||||||
(false, false) => l.partial_cmp(r).unwrap(), // only fail on NaN
|
(false, false) => l.partial_cmp(r).unwrap(), // only fail on NaN
|
||||||
(true, true) => Ordering::Equal,
|
(true, true) => Ordering::Equal,
|
||||||
(true, false) => Ordering::Less, // we define NaN as less than -∞
|
(true, false) => Ordering::Less, // we define NaN as less than -∞
|
||||||
@@ -155,7 +155,7 @@ impl Value {
|
|||||||
Value::F64(ref value) => *value,
|
Value::F64(ref value) => *value,
|
||||||
_ => panic!("This is not a f64 field."),
|
_ => panic!("This is not a f64 field."),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the Date-value, provided the value is of the `Date` type.
|
/// Returns the Date-value, provided the value is of the `Date` type.
|
||||||
///
|
///
|
||||||
@@ -219,7 +219,7 @@ impl From<Vec<u8>> for Value {
|
|||||||
|
|
||||||
mod binary_serialize {
|
mod binary_serialize {
|
||||||
use super::Value;
|
use super::Value;
|
||||||
use crate::common::{f64_to_u64, u64_to_f64, BinarySerializable};
|
use crate::common::{BinarySerializable, f64_to_u64, u64_to_f64};
|
||||||
use crate::schema::Facet;
|
use crate::schema::Facet;
|
||||||
use chrono::{TimeZone, Utc};
|
use chrono::{TimeZone, Utc};
|
||||||
use std::io::{self, Read, Write};
|
use std::io::{self, Read, Write};
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use tantivy::{Index, Term};
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_failpoints_managed_directory_gc_if_delete_fails() {
|
fn test_failpoints_managed_directory_gc_if_delete_fails() {
|
||||||
let _scenario = fail::FailScenario::setup();
|
let scenario = fail::FailScenario::setup();
|
||||||
|
|
||||||
let test_path: &'static Path = Path::new("some_path_for_test");
|
let test_path: &'static Path = Path::new("some_path_for_test");
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user