mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-05 08:42:54 +00:00
Compare commits
28 Commits
githubacti
...
slog
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1c81b8171f | ||
|
|
838c476733 | ||
|
|
5f574348d1 | ||
|
|
19a02b2c30 | ||
|
|
c339b05789 | ||
|
|
2d3c657f9d | ||
|
|
07f9b828ae | ||
|
|
70bae7ce4c | ||
|
|
ac2a7273e6 | ||
|
|
4ce9517a82 | ||
|
|
73024a8af3 | ||
|
|
e70e605fc3 | ||
|
|
439d6956a9 | ||
|
|
6530bf0eae | ||
|
|
151498cbe7 | ||
|
|
3a72b1cb98 | ||
|
|
2737822620 | ||
|
|
06c12ae221 | ||
|
|
4e4400af7f | ||
|
|
3f1ecf53ab | ||
|
|
0b583b8130 | ||
|
|
31d18dca1c | ||
|
|
5e06e7de5a | ||
|
|
8af53cbd36 | ||
|
|
4914076e8f | ||
|
|
e04f47e922 | ||
|
|
f355695581 | ||
|
|
cbacdf0de8 |
28
.github/workflows/ci.yml
vendored
28
.github/workflows/ci.yml
vendored
@@ -1,28 +0,0 @@
|
|||||||
name: Tantivy CI
|
|
||||||
|
|
||||||
on: [push]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test:
|
|
||||||
name: Test Suite
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
- uses: actions-rs/toolchain@v1
|
|
||||||
with:
|
|
||||||
profile: minimal
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
- uses: actions-rs/cargo@v1
|
|
||||||
with:
|
|
||||||
command: test
|
|
||||||
- uses: actions-rs/cargo@v1
|
|
||||||
with:
|
|
||||||
command: fmt
|
|
||||||
args: --all -- --check
|
|
||||||
- run: rustup component add clippy
|
|
||||||
- uses: actions-rs/cargo@v1
|
|
||||||
with:
|
|
||||||
command: clippy
|
|
||||||
args: -- -D warnings
|
|
||||||
|
|
||||||
66
.github/workflows/coveralls.yml
vendored
66
.github/workflows/coveralls.yml
vendored
@@ -1,66 +0,0 @@
|
|||||||
on: [push]
|
|
||||||
|
|
||||||
name: Code coverage with grcov
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
grcov:
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
os:
|
|
||||||
- ubuntu-latest
|
|
||||||
#- macOS-latest
|
|
||||||
#- windows-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: Install toolchain
|
|
||||||
uses: actions-rs/toolchain@v1
|
|
||||||
with:
|
|
||||||
toolchain: nightly
|
|
||||||
override: true
|
|
||||||
profile: minimal
|
|
||||||
|
|
||||||
- name: Execute tests
|
|
||||||
uses: actions-rs/cargo@v1
|
|
||||||
with:
|
|
||||||
command: test
|
|
||||||
args: --all --lib
|
|
||||||
env:
|
|
||||||
CARGO_INCREMENTAL: 0
|
|
||||||
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests"
|
|
||||||
|
|
||||||
# Note that `actions-rs/grcov` Action can install `grcov` too,
|
|
||||||
# but can't use faster installation methods yet.
|
|
||||||
# As a temporary experiment `actions-rs/install` Action plugged in here.
|
|
||||||
# Consider **NOT** to copy that into your workflow,
|
|
||||||
# but use `actions-rs/grcov` only
|
|
||||||
- name: Pre-installing grcov
|
|
||||||
uses: actions-rs/install@v0.1
|
|
||||||
with:
|
|
||||||
crate: grcov
|
|
||||||
use-tool-cache: true
|
|
||||||
|
|
||||||
- name: Gather coverage data
|
|
||||||
id: coverage
|
|
||||||
uses: actions-rs/grcov@v0.1
|
|
||||||
with:
|
|
||||||
coveralls-token: ${{ secrets.COVERALLS_TOKEN }}
|
|
||||||
|
|
||||||
- name: Coveralls upload
|
|
||||||
uses: coverallsapp/github-action@master
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
parallel: true
|
|
||||||
path-to-lcov: ${{ steps.coverage.outputs.report }}
|
|
||||||
|
|
||||||
grcov_finalize:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: grcov
|
|
||||||
steps:
|
|
||||||
- name: Coveralls finalization
|
|
||||||
uses: coverallsapp/github-action@master
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
parallel-finished: true
|
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -12,3 +12,4 @@ cpp/simdcomp/bitpackingbenchmark
|
|||||||
*.bk
|
*.bk
|
||||||
.idea
|
.idea
|
||||||
trace.dat
|
trace.dat
|
||||||
|
cargo-timing*
|
||||||
|
|||||||
11
CHANGELOG.md
11
CHANGELOG.md
@@ -1,3 +1,14 @@
|
|||||||
|
Tantivy 0.14.0
|
||||||
|
=========================
|
||||||
|
- Remove dependency to atomicwrites #833. Implemented by @pmasurel upon suggestion and research from @asafigan).
|
||||||
|
- Migrated tantivy error from the now deprecated `failure` crate to `thiserror` #760. (@hirevo)
|
||||||
|
- Switched to structure logging (via the `slog` crate). (@pmasurel)
|
||||||
|
|
||||||
|
Tantivy 0.13.1
|
||||||
|
===================
|
||||||
|
Made `Query` and `Collector` `Send + Sync`.
|
||||||
|
Updated misc dependency versions.
|
||||||
|
|
||||||
Tantivy 0.13.0
|
Tantivy 0.13.0
|
||||||
======================
|
======================
|
||||||
Tantivy 0.13 introduce a change in the index format that will require
|
Tantivy 0.13 introduce a change in the index format that will require
|
||||||
|
|||||||
45
Cargo.toml
45
Cargo.toml
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy"
|
name = "tantivy"
|
||||||
version = "0.13.0"
|
version = "0.14.0-dev"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = ["database-implementations", "data-structures"]
|
categories = ["database-implementations", "data-structures"]
|
||||||
@@ -13,21 +13,21 @@ keywords = ["search", "information", "retrieval"]
|
|||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
base64 = "0.12.0"
|
base64 = "0.12"
|
||||||
byteorder = "1.0"
|
byteorder = "1"
|
||||||
crc32fast = "1.2.0"
|
crc32fast = "1"
|
||||||
once_cell = "1.0"
|
once_cell = "1"
|
||||||
regex ={version = "1.3.0", default-features = false, features = ["std"]}
|
regex ={version = "1", default-features = false, features = ["std"]}
|
||||||
tantivy-fst = "0.3"
|
tantivy-fst = "0.3"
|
||||||
memmap = {version = "0.7", optional=true}
|
memmap = {version = "0.7", optional=true}
|
||||||
lz4 = {version="1.20", optional=true}
|
lz4 = {version="1", optional=true}
|
||||||
snap = "1"
|
snap = "1"
|
||||||
atomicwrites = {version="0.2.2", optional=true}
|
tempfile = {version="3", optional=true}
|
||||||
tempfile = "3.0"
|
slog = "2.5"
|
||||||
log = "0.4"
|
slog-stdlog = "4"
|
||||||
serde = {version="1.0", features=["derive"]}
|
serde = {version="1", features=["derive"]}
|
||||||
serde_json = "1.0"
|
serde_json = "1"
|
||||||
num_cpus = "1.2"
|
num_cpus = "1"
|
||||||
fs2={version="0.4", optional=true}
|
fs2={version="0.4", optional=true}
|
||||||
levenshtein_automata = "0.2"
|
levenshtein_automata = "0.2"
|
||||||
notify = {version="4", optional=true}
|
notify = {version="4", optional=true}
|
||||||
@@ -35,20 +35,20 @@ uuid = { version = "0.8", features = ["v4", "serde"] }
|
|||||||
crossbeam = "0.7"
|
crossbeam = "0.7"
|
||||||
futures = {version = "0.3", features=["thread-pool"] }
|
futures = {version = "0.3", features=["thread-pool"] }
|
||||||
owning_ref = "0.4"
|
owning_ref = "0.4"
|
||||||
stable_deref_trait = "1.0.0"
|
tantivy-query-grammar = { version="0.14.0-dev", path="./query-grammar" }
|
||||||
rust-stemmers = "1.2"
|
stable_deref_trait = "1"
|
||||||
downcast-rs = { version="1.0" }
|
rust-stemmers = "1"
|
||||||
tantivy-query-grammar = { version="0.13", path="./query-grammar" }
|
downcast-rs = "1"
|
||||||
bitpacking = {version="0.8", default-features = false, features=["bitpacker4x"]}
|
bitpacking = {version="0.8", default-features = false, features=["bitpacker4x"]}
|
||||||
census = "0.4"
|
census = "0.4"
|
||||||
fnv = "1.0.6"
|
fnv = "1"
|
||||||
owned-read = "0.4"
|
owned-read = "0.4"
|
||||||
failure = "0.1"
|
thiserror = "1.0"
|
||||||
htmlescape = "0.3.1"
|
htmlescape = "0.3"
|
||||||
fail = "0.4"
|
fail = "0.4"
|
||||||
murmurhash32 = "0.2"
|
murmurhash32 = "0.2"
|
||||||
chrono = "0.4"
|
chrono = "0.4"
|
||||||
smallvec = "1.0"
|
smallvec = "1"
|
||||||
rayon = "1"
|
rayon = "1"
|
||||||
|
|
||||||
[target.'cfg(windows)'.dependencies]
|
[target.'cfg(windows)'.dependencies]
|
||||||
@@ -75,12 +75,11 @@ overflow-checks = true
|
|||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["mmap"]
|
default = ["mmap"]
|
||||||
mmap = ["atomicwrites", "fs2", "memmap", "notify"]
|
mmap = ["fs2", "tempfile", "memmap", "notify"]
|
||||||
lz4-compression = ["lz4"]
|
lz4-compression = ["lz4"]
|
||||||
failpoints = ["fail/failpoints"]
|
failpoints = ["fail/failpoints"]
|
||||||
unstable = [] # useful for benches.
|
unstable = [] # useful for benches.
|
||||||
wasm-bindgen = ["uuid/wasm-bindgen"]
|
wasm-bindgen = ["uuid/wasm-bindgen"]
|
||||||
scoref64 = [] # scores are f64 instead of f32. was introduced to debug blockwand.
|
|
||||||
|
|
||||||
[workspace]
|
[workspace]
|
||||||
members = ["query-grammar"]
|
members = ["query-grammar"]
|
||||||
|
|||||||
@@ -34,11 +34,6 @@ Tantivy is, in fact, strongly inspired by Lucene's design.
|
|||||||
The following [benchmark](https://tantivy-search.github.io/bench/) break downs
|
The following [benchmark](https://tantivy-search.github.io/bench/) break downs
|
||||||
performance for different type of queries / collection.
|
performance for different type of queries / collection.
|
||||||
|
|
||||||
|
|
||||||
In general, Tantivy tends to be
|
|
||||||
- slower than Lucene on union with a Top-K due to Block-WAND optimization.
|
|
||||||
- faster than Lucene on intersection and phrase queries.
|
|
||||||
|
|
||||||
Your mileage WILL vary depending on the nature of queries and their load.
|
Your mileage WILL vary depending on the nature of queries and their load.
|
||||||
|
|
||||||
# Features
|
# Features
|
||||||
|
|||||||
@@ -112,18 +112,6 @@ fn main() -> tantivy::Result<()> {
|
|||||||
limbs and branches that arch over the pool"
|
limbs and branches that arch over the pool"
|
||||||
));
|
));
|
||||||
|
|
||||||
index_writer.add_document(doc!(
|
|
||||||
title => "Of Mice and Men",
|
|
||||||
body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
|
|
||||||
bank and runs deep and green. The water is warm too, for it has slipped twinkling \
|
|
||||||
over the yellow sands in the sunlight before reaching the narrow pool. On one \
|
|
||||||
side of the river the golden foothill slopes curve up to the strong and rocky \
|
|
||||||
Gabilan Mountains, but on the valley side the water is lined with trees—willows \
|
|
||||||
fresh and green with every spring, carrying in their lower leaf junctures the \
|
|
||||||
debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
|
|
||||||
limbs and branches that arch over the pool"
|
|
||||||
));
|
|
||||||
|
|
||||||
// Multivalued field just need to be repeated.
|
// Multivalued field just need to be repeated.
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
title => "Frankenstein",
|
title => "Frankenstein",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-query-grammar"
|
name = "tantivy-query-grammar"
|
||||||
version = "0.13.0"
|
version = "0.14.0-dev"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = ["database-implementations", "data-structures"]
|
categories = ["database-implementations", "data-structures"]
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ mod test {
|
|||||||
use crate::Occur;
|
use crate::Occur;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_Occur_compose() {
|
fn test_occur_compose() {
|
||||||
assert_eq!(Occur::compose(Occur::Should, Occur::Should), Occur::Should);
|
assert_eq!(Occur::compose(Occur::Should, Occur::Should), Occur::Should);
|
||||||
assert_eq!(Occur::compose(Occur::Should, Occur::Must), Occur::Must);
|
assert_eq!(Occur::compose(Occur::Should, Occur::Must), Occur::Must);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|||||||
@@ -9,8 +9,10 @@ use combine::{
|
|||||||
|
|
||||||
fn field<'a>() -> impl Parser<&'a str, Output = String> {
|
fn field<'a>() -> impl Parser<&'a str, Output = String> {
|
||||||
(
|
(
|
||||||
letter(),
|
(letter().or(char('_'))),
|
||||||
many(satisfy(|c: char| c.is_alphanumeric() || c == '_')),
|
many(satisfy(|c: char| {
|
||||||
|
c.is_alphanumeric() || c == '_' || c == '-'
|
||||||
|
})),
|
||||||
)
|
)
|
||||||
.skip(char(':'))
|
.skip(char(':'))
|
||||||
.map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
|
.map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
|
||||||
@@ -279,6 +281,8 @@ pub fn parse_to_ast<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
|
|
||||||
|
type TestParseResult = Result<(), StringStreamError>;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use combine::parser::Parser;
|
use combine::parser::Parser;
|
||||||
|
|
||||||
@@ -296,9 +300,10 @@ mod test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_occur_symbol() {
|
fn test_occur_symbol() -> TestParseResult {
|
||||||
assert_eq!(super::occur_symbol().parse("-"), Ok((Occur::MustNot, "")));
|
assert_eq!(super::occur_symbol().parse("-")?, (Occur::MustNot, ""));
|
||||||
assert_eq!(super::occur_symbol().parse("+"), Ok((Occur::Must, "")));
|
assert_eq!(super::occur_symbol().parse("+")?, (Occur::Must, ""));
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -410,6 +415,25 @@ mod test {
|
|||||||
assert_eq!(format!("{:?}", ast), "\"abc\"");
|
assert_eq!(format!("{:?}", ast), "\"abc\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_field_name() -> TestParseResult {
|
||||||
|
assert_eq!(
|
||||||
|
super::field().parse("my-field-name:a")?,
|
||||||
|
("my-field-name".to_string(), "a")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
super::field().parse("my_field_name:a")?,
|
||||||
|
("my_field_name".to_string(), "a")
|
||||||
|
);
|
||||||
|
assert!(super::field().parse(":a").is_err());
|
||||||
|
assert!(super::field().parse("-my_field:a").is_err());
|
||||||
|
assert_eq!(
|
||||||
|
super::field().parse("_my_field:a")?,
|
||||||
|
("_my_field".to_string(), "a")
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_range_parser() {
|
fn test_range_parser() {
|
||||||
// testing the range() parser separately
|
// testing the range() parser separately
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ pub trait CustomScorer<TScore>: Sync {
|
|||||||
|
|
||||||
impl<TCustomScorer, TScore> Collector for CustomScoreTopCollector<TCustomScorer, TScore>
|
impl<TCustomScorer, TScore> Collector for CustomScoreTopCollector<TCustomScorer, TScore>
|
||||||
where
|
where
|
||||||
TCustomScorer: CustomScorer<TScore>,
|
TCustomScorer: CustomScorer<TScore> + Send + Sync,
|
||||||
TScore: 'static + PartialOrd + Clone + Send + Sync,
|
TScore: 'static + PartialOrd + Clone + Send + Sync,
|
||||||
{
|
{
|
||||||
type Fruit = Vec<(TScore, DocAddress)>;
|
type Fruit = Vec<(TScore, DocAddress)>;
|
||||||
|
|||||||
@@ -472,7 +472,7 @@ mod tests {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
|
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
let num_facets: usize = 3 * 4 * 5;
|
let num_facets: usize = 3 * 4 * 5;
|
||||||
let facets: Vec<Facet> = (0..num_facets)
|
let facets: Vec<Facet> = (0..num_facets)
|
||||||
.map(|mut n| {
|
.map(|mut n| {
|
||||||
@@ -531,7 +531,7 @@ mod tests {
|
|||||||
let facet_field = schema_builder.add_facet_field("facets");
|
let facet_field = schema_builder.add_facet_field("facets");
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
facet_field => Facet::from_text(&"/subjects/A/a"),
|
facet_field => Facet::from_text(&"/subjects/A/a"),
|
||||||
facet_field => Facet::from_text(&"/subjects/B/a"),
|
facet_field => Facet::from_text(&"/subjects/B/a"),
|
||||||
@@ -550,12 +550,12 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_doc_search_by_facet() {
|
fn test_doc_search_by_facet() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let facet_field = schema_builder.add_facet_field("facet");
|
let facet_field = schema_builder.add_facet_field("facet");
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
facet_field => Facet::from_text(&"/A/A"),
|
facet_field => Facet::from_text(&"/A/A"),
|
||||||
));
|
));
|
||||||
@@ -568,8 +568,8 @@ mod tests {
|
|||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
facet_field => Facet::from_text(&"/D/C/A"),
|
facet_field => Facet::from_text(&"/D/C/A"),
|
||||||
));
|
));
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit()?;
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader()?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
assert_eq!(searcher.num_docs(), 4);
|
assert_eq!(searcher.num_docs(), 4);
|
||||||
|
|
||||||
@@ -586,17 +586,17 @@ mod tests {
|
|||||||
assert_eq!(count_facet("/A/C"), 1);
|
assert_eq!(count_facet("/A/C"), 1);
|
||||||
assert_eq!(count_facet("/A/C/A"), 1);
|
assert_eq!(count_facet("/A/C/A"), 1);
|
||||||
assert_eq!(count_facet("/C/A"), 0);
|
assert_eq!(count_facet("/C/A"), 0);
|
||||||
|
|
||||||
|
let query_parser = QueryParser::for_index(&index, vec![]);
|
||||||
{
|
{
|
||||||
let query_parser = QueryParser::for_index(&index, vec![]);
|
let query = query_parser.parse_query("facet:/A/B")?;
|
||||||
{
|
assert_eq!(1, searcher.search(&query, &Count).unwrap());
|
||||||
let query = query_parser.parse_query("facet:/A/B").unwrap();
|
|
||||||
assert_eq!(1, searcher.search(&query, &Count).unwrap());
|
|
||||||
}
|
|
||||||
{
|
|
||||||
let query = query_parser.parse_query("facet:/A").unwrap();
|
|
||||||
assert_eq!(3, searcher.search(&query, &Count).unwrap());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
let query = query_parser.parse_query("facet:/A")?;
|
||||||
|
assert_eq!(3, searcher.search(&query, &Count)?);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -631,7 +631,7 @@ mod tests {
|
|||||||
.collect();
|
.collect();
|
||||||
docs[..].shuffle(&mut thread_rng());
|
docs[..].shuffle(&mut thread_rng());
|
||||||
|
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
for doc in docs {
|
for doc in docs {
|
||||||
index_writer.add_document(doc);
|
index_writer.add_document(doc);
|
||||||
}
|
}
|
||||||
@@ -684,7 +684,7 @@ mod bench {
|
|||||||
// 40425 docs
|
// 40425 docs
|
||||||
docs[..].shuffle(&mut thread_rng());
|
docs[..].shuffle(&mut thread_rng());
|
||||||
|
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
for doc in docs {
|
for doc in docs {
|
||||||
index_writer.add_document(doc);
|
index_writer.add_document(doc);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ mod tests {
|
|||||||
let index = Index::create_in_ram(schema.clone());
|
let index = Index::create_in_ram(schema.clone());
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
{
|
{
|
||||||
for i in 0u64..10u64 {
|
for i in 0u64..10u64 {
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
|
|||||||
@@ -133,7 +133,7 @@ impl<T> Fruit for T where T: Send + downcast_rs::Downcast {}
|
|||||||
/// The collection logic itself is in the `SegmentCollector`.
|
/// The collection logic itself is in the `SegmentCollector`.
|
||||||
///
|
///
|
||||||
/// Segments are not guaranteed to be visited in any specific order.
|
/// Segments are not guaranteed to be visited in any specific order.
|
||||||
pub trait Collector: Sync {
|
pub trait Collector: Sync + Send {
|
||||||
/// `Fruit` is the type for the result of our collection.
|
/// `Fruit` is the type for the result of our collection.
|
||||||
/// e.g. `usize` for the `Count` collector.
|
/// e.g. `usize` for the `Count` collector.
|
||||||
type Fruit: Fruit;
|
type Fruit: Fruit;
|
||||||
|
|||||||
@@ -259,7 +259,7 @@ mod tests {
|
|||||||
|
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(text=>"abc"));
|
index_writer.add_document(doc!(text=>"abc"));
|
||||||
index_writer.add_document(doc!(text=>"abc abc abc"));
|
index_writer.add_document(doc!(text=>"abc abc abc"));
|
||||||
index_writer.add_document(doc!(text=>"abc abc"));
|
index_writer.add_document(doc!(text=>"abc abc"));
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ use std::fmt;
|
|||||||
/// let schema = schema_builder.build();
|
/// let schema = schema_builder.build();
|
||||||
/// let index = Index::create_in_ram(schema);
|
/// let index = Index::create_in_ram(schema);
|
||||||
///
|
///
|
||||||
/// let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
|
||||||
/// index_writer.add_document(doc!(title => "The Name of the Wind"));
|
/// index_writer.add_document(doc!(title => "The Name of the Wind"));
|
||||||
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
|
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
|
||||||
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
|
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
|
||||||
@@ -123,7 +123,7 @@ impl TopDocs {
|
|||||||
/// let schema = schema_builder.build();
|
/// let schema = schema_builder.build();
|
||||||
/// let index = Index::create_in_ram(schema);
|
/// let index = Index::create_in_ram(schema);
|
||||||
///
|
///
|
||||||
/// let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
|
||||||
/// index_writer.add_document(doc!(title => "The Name of the Wind"));
|
/// index_writer.add_document(doc!(title => "The Name of the Wind"));
|
||||||
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
|
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
|
||||||
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
|
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
|
||||||
@@ -163,7 +163,7 @@ impl TopDocs {
|
|||||||
/// # let schema = schema_builder.build();
|
/// # let schema = schema_builder.build();
|
||||||
/// #
|
/// #
|
||||||
/// # let index = Index::create_in_ram(schema);
|
/// # let index = Index::create_in_ram(schema);
|
||||||
/// # let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
|
/// # let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
|
||||||
/// # index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64));
|
/// # index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64));
|
||||||
/// # index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64));
|
/// # index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64));
|
||||||
/// # index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64));
|
/// # index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64));
|
||||||
@@ -264,7 +264,7 @@ impl TopDocs {
|
|||||||
/// fn create_index() -> tantivy::Result<Index> {
|
/// fn create_index() -> tantivy::Result<Index> {
|
||||||
/// let schema = create_schema();
|
/// let schema = create_schema();
|
||||||
/// let index = Index::create_in_ram(schema);
|
/// let index = Index::create_in_ram(schema);
|
||||||
/// let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
|
/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
|
||||||
/// let product_name = index.schema().get_field("product_name").unwrap();
|
/// let product_name = index.schema().get_field("product_name").unwrap();
|
||||||
/// let popularity: Field = index.schema().get_field("popularity").unwrap();
|
/// let popularity: Field = index.schema().get_field("popularity").unwrap();
|
||||||
/// index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64));
|
/// index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64));
|
||||||
@@ -324,7 +324,7 @@ impl TopDocs {
|
|||||||
where
|
where
|
||||||
TScore: 'static + Send + Sync + Clone + PartialOrd,
|
TScore: 'static + Send + Sync + Clone + PartialOrd,
|
||||||
TScoreSegmentTweaker: ScoreSegmentTweaker<TScore> + 'static,
|
TScoreSegmentTweaker: ScoreSegmentTweaker<TScore> + 'static,
|
||||||
TScoreTweaker: ScoreTweaker<TScore, Child = TScoreSegmentTweaker>,
|
TScoreTweaker: ScoreTweaker<TScore, Child = TScoreSegmentTweaker> + Send + Sync,
|
||||||
{
|
{
|
||||||
TweakedScoreTopCollector::new(score_tweaker, self.0.into_tscore())
|
TweakedScoreTopCollector::new(score_tweaker, self.0.into_tscore())
|
||||||
}
|
}
|
||||||
@@ -371,7 +371,7 @@ impl TopDocs {
|
|||||||
/// # fn main() -> tantivy::Result<()> {
|
/// # fn main() -> tantivy::Result<()> {
|
||||||
/// # let schema = create_schema();
|
/// # let schema = create_schema();
|
||||||
/// # let index = Index::create_in_ram(schema);
|
/// # let index = Index::create_in_ram(schema);
|
||||||
/// # let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
|
/// # let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
|
||||||
/// # let product_name = index.schema().get_field("product_name").unwrap();
|
/// # let product_name = index.schema().get_field("product_name").unwrap();
|
||||||
/// #
|
/// #
|
||||||
/// let popularity: Field = index.schema().get_field("popularity").unwrap();
|
/// let popularity: Field = index.schema().get_field("popularity").unwrap();
|
||||||
@@ -438,7 +438,7 @@ impl TopDocs {
|
|||||||
where
|
where
|
||||||
TScore: 'static + Send + Sync + Clone + PartialOrd,
|
TScore: 'static + Send + Sync + Clone + PartialOrd,
|
||||||
TCustomSegmentScorer: CustomSegmentScorer<TScore> + 'static,
|
TCustomSegmentScorer: CustomSegmentScorer<TScore> + 'static,
|
||||||
TCustomScorer: CustomScorer<TScore, Child = TCustomSegmentScorer>,
|
TCustomScorer: CustomScorer<TScore, Child = TCustomSegmentScorer> + Send + Sync,
|
||||||
{
|
{
|
||||||
CustomScoreTopCollector::new(custom_score, self.0.into_tscore())
|
CustomScoreTopCollector::new(custom_score, self.0.into_tscore())
|
||||||
}
|
}
|
||||||
@@ -561,7 +561,7 @@ mod tests {
|
|||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
|
||||||
index_writer.add_document(doc!(text_field=>"Hello happy tax payer."));
|
index_writer.add_document(doc!(text_field=>"Hello happy tax payer."));
|
||||||
index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer"));
|
index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer"));
|
||||||
index_writer.add_document(doc!(text_field=>"I like Droopy"));
|
index_writer.add_document(doc!(text_field=>"I like Droopy"));
|
||||||
@@ -821,7 +821,7 @@ mod tests {
|
|||||||
) -> (Index, Box<dyn Query>) {
|
) -> (Index, Box<dyn Query>) {
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
|
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
|
||||||
doc_adder(&mut index_writer);
|
doc_adder(&mut index_writer);
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit().unwrap();
|
||||||
let query_parser = QueryParser::for_index(&index, vec![query_field]);
|
let query_parser = QueryParser::for_index(&index, vec![query_field]);
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ pub trait ScoreTweaker<TScore>: Sync {
|
|||||||
|
|
||||||
impl<TScoreTweaker, TScore> Collector for TweakedScoreTopCollector<TScoreTweaker, TScore>
|
impl<TScoreTweaker, TScore> Collector for TweakedScoreTopCollector<TScoreTweaker, TScore>
|
||||||
where
|
where
|
||||||
TScoreTweaker: ScoreTweaker<TScore>,
|
TScoreTweaker: ScoreTweaker<TScore> + Send + Sync,
|
||||||
TScore: 'static + PartialOrd + Clone + Send + Sync,
|
TScore: 'static + PartialOrd + Clone + Send + Sync,
|
||||||
{
|
{
|
||||||
type Fruit = Vec<(TScore, DocAddress)>;
|
type Fruit = Vec<(TScore, DocAddress)>;
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use crossbeam::channel;
|
use crossbeam::channel;
|
||||||
use rayon::{ThreadPool, ThreadPoolBuilder};
|
use rayon::{ThreadPool, ThreadPoolBuilder};
|
||||||
|
use slog::{error, Logger};
|
||||||
|
|
||||||
/// Search executor whether search request are single thread or multithread.
|
/// Search executor whether search request are single thread or multithread.
|
||||||
///
|
///
|
||||||
@@ -43,6 +44,7 @@ impl Executor {
|
|||||||
&self,
|
&self,
|
||||||
f: F,
|
f: F,
|
||||||
args: AIterator,
|
args: AIterator,
|
||||||
|
logger: Logger,
|
||||||
) -> crate::Result<Vec<R>> {
|
) -> crate::Result<Vec<R>> {
|
||||||
match self {
|
match self {
|
||||||
Executor::SingleThread => args.map(f).collect::<crate::Result<_>>(),
|
Executor::SingleThread => args.map(f).collect::<crate::Result<_>>(),
|
||||||
@@ -57,7 +59,7 @@ impl Executor {
|
|||||||
let (idx, arg) = arg_with_idx;
|
let (idx, arg) = arg_with_idx;
|
||||||
let fruit = f(arg);
|
let fruit = f(arg);
|
||||||
if let Err(err) = fruit_sender.send((idx, fruit)) {
|
if let Err(err) = fruit_sender.send((idx, fruit)) {
|
||||||
error!("Failed to send search task. It probably means all search threads have panicked. {:?}", err);
|
error!(logger, "Failed to send search task. It probably means all search threads have panicked. {:?}", err);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -87,17 +89,21 @@ impl Executor {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
|
use slog::{o, Discard, Logger};
|
||||||
|
|
||||||
use super::Executor;
|
use super::Executor;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[should_panic(expected = "panic should propagate")]
|
#[should_panic(expected = "panic should propagate")]
|
||||||
fn test_panic_propagates_single_thread() {
|
fn test_panic_propagates_single_thread() {
|
||||||
|
let logger = Logger::root(Discard, o!());
|
||||||
let _result: Vec<usize> = Executor::single_thread()
|
let _result: Vec<usize> = Executor::single_thread()
|
||||||
.map(
|
.map(
|
||||||
|_| {
|
|_| {
|
||||||
panic!("panic should propagate");
|
panic!("panic should propagate");
|
||||||
},
|
},
|
||||||
vec![0].into_iter(),
|
vec![0].into_iter(),
|
||||||
|
logger,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
@@ -105,6 +111,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
#[should_panic] //< unfortunately the panic message is not propagated
|
#[should_panic] //< unfortunately the panic message is not propagated
|
||||||
fn test_panic_propagates_multi_thread() {
|
fn test_panic_propagates_multi_thread() {
|
||||||
|
let logger = Logger::root(Discard, o!());
|
||||||
let _result: Vec<usize> = Executor::multi_thread(1, "search-test")
|
let _result: Vec<usize> = Executor::multi_thread(1, "search-test")
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(
|
.map(
|
||||||
@@ -112,14 +119,16 @@ mod tests {
|
|||||||
panic!("panic should propagate");
|
panic!("panic should propagate");
|
||||||
},
|
},
|
||||||
vec![0].into_iter(),
|
vec![0].into_iter(),
|
||||||
|
logger,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_map_singlethread() {
|
fn test_map_singlethread() {
|
||||||
|
let logger = Logger::root(Discard, o!());
|
||||||
let result: Vec<usize> = Executor::single_thread()
|
let result: Vec<usize> = Executor::single_thread()
|
||||||
.map(|i| Ok(i * 2), 0..1_000)
|
.map(|i| Ok(i * 2), 0..1_000, logger)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(result.len(), 1_000);
|
assert_eq!(result.len(), 1_000);
|
||||||
for i in 0..1_000 {
|
for i in 0..1_000 {
|
||||||
@@ -129,9 +138,10 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_map_multithread() {
|
fn test_map_multithread() {
|
||||||
|
let logger = Logger::root(Discard, o!());
|
||||||
let result: Vec<usize> = Executor::multi_thread(3, "search-test")
|
let result: Vec<usize> = Executor::multi_thread(3, "search-test")
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|i| Ok(i * 2), 0..10)
|
.map(|i| Ok(i * 2), 0..10, logger)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(result.len(), 10);
|
assert_eq!(result.len(), 10);
|
||||||
for i in 0..10 {
|
for i in 0..10 {
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ use crate::schema::FieldType;
|
|||||||
use crate::schema::Schema;
|
use crate::schema::Schema;
|
||||||
use crate::tokenizer::{TextAnalyzer, TokenizerManager};
|
use crate::tokenizer::{TextAnalyzer, TokenizerManager};
|
||||||
use crate::IndexWriter;
|
use crate::IndexWriter;
|
||||||
|
use slog::Logger;
|
||||||
use std::borrow::BorrowMut;
|
use std::borrow::BorrowMut;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
@@ -57,7 +58,14 @@ pub struct Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
/// Examines the director to see if it contains an index
|
|
||||||
|
pub(crate) fn logger(&self) -> &Logger {
|
||||||
|
self.directory.logger()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Examines the directory to see if it contains an index.
|
||||||
|
///
|
||||||
|
/// Effectively, it only checks for the presence of the `meta.json` file.
|
||||||
pub fn exists<Dir: Directory>(dir: &Dir) -> bool {
|
pub fn exists<Dir: Directory>(dir: &Dir) -> bool {
|
||||||
dir.exists(&META_FILEPATH)
|
dir.exists(&META_FILEPATH)
|
||||||
}
|
}
|
||||||
@@ -140,16 +148,18 @@ impl Index {
|
|||||||
Index::create(mmap_directory, schema)
|
Index::create(mmap_directory, schema)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new index given an implementation of the trait `Directory`
|
/// Creates a new index given an implementation of the trait `Directory`.
|
||||||
|
///
|
||||||
|
/// If a directory previously existed, it will be erased.
|
||||||
pub fn create<Dir: Directory>(dir: Dir, schema: Schema) -> crate::Result<Index> {
|
pub fn create<Dir: Directory>(dir: Dir, schema: Schema) -> crate::Result<Index> {
|
||||||
let directory = ManagedDirectory::wrap(dir)?;
|
let directory = ManagedDirectory::wrap(dir)?;
|
||||||
Index::from_directory(directory, schema)
|
Index::new_from_directory(directory, schema)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a new index from a directory.
|
/// Create a new index from a directory.
|
||||||
///
|
///
|
||||||
/// This will overwrite existing meta.json
|
/// This will overwrite existing meta.json
|
||||||
fn from_directory(mut directory: ManagedDirectory, schema: Schema) -> crate::Result<Index> {
|
fn new_from_directory(mut directory: ManagedDirectory, schema: Schema) -> crate::Result<Index> {
|
||||||
save_new_metas(schema.clone(), directory.borrow_mut())?;
|
save_new_metas(schema.clone(), directory.borrow_mut())?;
|
||||||
let metas = IndexMeta::with_schema(schema);
|
let metas = IndexMeta::with_schema(schema);
|
||||||
Index::create_from_metas(directory, &metas, SegmentMetaInventory::default())
|
Index::create_from_metas(directory, &metas, SegmentMetaInventory::default())
|
||||||
@@ -240,6 +250,8 @@ impl Index {
|
|||||||
|
|
||||||
/// Open the index using the provided directory
|
/// Open the index using the provided directory
|
||||||
pub fn open<D: Directory>(directory: D) -> crate::Result<Index> {
|
pub fn open<D: Directory>(directory: D) -> crate::Result<Index> {
|
||||||
|
let logger: &Logger = directory.logger();
|
||||||
|
slog::info!(logger, "index-open"; "directory" => format!("{:?}", directory));
|
||||||
let directory = ManagedDirectory::wrap(directory)?;
|
let directory = ManagedDirectory::wrap(directory)?;
|
||||||
let inventory = SegmentMetaInventory::default();
|
let inventory = SegmentMetaInventory::default();
|
||||||
let metas = load_metas(&directory, &inventory)?;
|
let metas = load_metas(&directory, &inventory)?;
|
||||||
@@ -300,6 +312,15 @@ impl Index {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Helper to create an index writer for tests.
|
||||||
|
///
|
||||||
|
/// That index writer only simply has a single thread and a heap of 5 MB.
|
||||||
|
/// Using a single thread gives us a deterministic allocation of DocId.
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn writer_for_tests(&self) -> crate::Result<IndexWriter> {
|
||||||
|
self.writer_with_num_threads(1, 10_000_000)
|
||||||
|
}
|
||||||
|
|
||||||
/// Creates a multithreaded writer
|
/// Creates a multithreaded writer
|
||||||
///
|
///
|
||||||
/// Tantivy will automatically define the number of threads to use.
|
/// Tantivy will automatically define the number of threads to use.
|
||||||
@@ -502,7 +523,7 @@ mod tests {
|
|||||||
let schema = throw_away_schema();
|
let schema = throw_away_schema();
|
||||||
let field = schema.get_field("num_likes").unwrap();
|
let field = schema.get_field("num_likes").unwrap();
|
||||||
let mut index = Index::create_from_tempdir(schema).unwrap();
|
let mut index = Index::create_from_tempdir(schema).unwrap();
|
||||||
let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut writer = index.writer_for_tests().unwrap();
|
||||||
writer.commit().unwrap();
|
writer.commit().unwrap();
|
||||||
let reader = index
|
let reader = index
|
||||||
.reader_builder()
|
.reader_builder()
|
||||||
@@ -539,23 +560,33 @@ mod tests {
|
|||||||
test_index_on_commit_reload_policy_aux(field, &write_index, &reader);
|
test_index_on_commit_reload_policy_aux(field, &write_index, &reader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_index_on_commit_reload_policy_aux(field: Field, index: &Index, reader: &IndexReader) {
|
fn test_index_on_commit_reload_policy_aux(field: Field, index: &Index, reader: &IndexReader) {
|
||||||
let mut reader_index = reader.index();
|
let mut reader_index = reader.index();
|
||||||
let (sender, receiver) = crossbeam::channel::unbounded();
|
let (sender, receiver) = crossbeam::channel::unbounded();
|
||||||
let _watch_handle = reader_index.directory_mut().watch(Box::new(move || {
|
let _watch_handle = reader_index.directory_mut().watch(Box::new(move || {
|
||||||
let _ = sender.send(());
|
let _ = sender.send(());
|
||||||
}));
|
}));
|
||||||
let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut writer = index.writer_for_tests().unwrap();
|
||||||
assert_eq!(reader.searcher().num_docs(), 0);
|
assert_eq!(reader.searcher().num_docs(), 0);
|
||||||
writer.add_document(doc!(field=>1u64));
|
writer.add_document(doc!(field=>1u64));
|
||||||
writer.commit().unwrap();
|
writer.commit().unwrap();
|
||||||
assert!(receiver.recv().is_ok());
|
// We need a loop here because it is possible for notify to send more than
|
||||||
assert_eq!(reader.searcher().num_docs(), 1);
|
// one modify event. It was observed on CI on MacOS.
|
||||||
|
loop {
|
||||||
|
assert!(receiver.recv().is_ok());
|
||||||
|
if reader.searcher().num_docs() == 1 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
writer.add_document(doc!(field=>2u64));
|
writer.add_document(doc!(field=>2u64));
|
||||||
writer.commit().unwrap();
|
writer.commit().unwrap();
|
||||||
assert!(receiver.recv().is_ok());
|
// ... Same as above
|
||||||
assert_eq!(reader.searcher().num_docs(), 2);
|
loop {
|
||||||
|
assert!(receiver.recv().is_ok());
|
||||||
|
if reader.searcher().num_docs() == 2 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This test will not pass on windows, because windows
|
// This test will not pass on windows, because windows
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ use crate::directory::ReadOnlySource;
|
|||||||
use crate::positions::PositionReader;
|
use crate::positions::PositionReader;
|
||||||
use crate::postings::TermInfo;
|
use crate::postings::TermInfo;
|
||||||
use crate::postings::{BlockSegmentPostings, SegmentPostings};
|
use crate::postings::{BlockSegmentPostings, SegmentPostings};
|
||||||
use crate::schema::FieldType;
|
|
||||||
use crate::schema::IndexRecordOption;
|
use crate::schema::IndexRecordOption;
|
||||||
use crate::schema::Term;
|
use crate::schema::Term;
|
||||||
use crate::termdict::TermDictionary;
|
use crate::termdict::TermDictionary;
|
||||||
@@ -54,10 +53,7 @@ impl InvertedIndexReader {
|
|||||||
|
|
||||||
/// Creates an empty `InvertedIndexReader` object, which
|
/// Creates an empty `InvertedIndexReader` object, which
|
||||||
/// contains no terms at all.
|
/// contains no terms at all.
|
||||||
pub fn empty(field_type: &FieldType) -> InvertedIndexReader {
|
pub fn empty(record_option: IndexRecordOption) -> InvertedIndexReader {
|
||||||
let record_option = field_type
|
|
||||||
.get_index_record_option()
|
|
||||||
.unwrap_or(IndexRecordOption::Basic);
|
|
||||||
InvertedIndexReader {
|
InvertedIndexReader {
|
||||||
termdict: TermDictionary::empty(),
|
termdict: TermDictionary::empty(),
|
||||||
postings_source: ReadOnlySource::empty(),
|
postings_source: ReadOnlySource::empty(),
|
||||||
|
|||||||
@@ -143,6 +143,7 @@ impl Searcher {
|
|||||||
collector.collect_segment(weight.as_ref(), segment_ord as u32, segment_reader)
|
collector.collect_segment(weight.as_ref(), segment_ord as u32, segment_reader)
|
||||||
},
|
},
|
||||||
segment_readers.iter().enumerate(),
|
segment_readers.iter().enumerate(),
|
||||||
|
self.index.logger().clone(),
|
||||||
)?;
|
)?;
|
||||||
collector.merge_fruits(fruits)
|
collector.merge_fruits(fruits)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,6 +21,12 @@ use std::sync::atomic;
|
|||||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||||
pub struct SegmentId(Uuid);
|
pub struct SegmentId(Uuid);
|
||||||
|
|
||||||
|
impl ToString for SegmentId {
|
||||||
|
fn to_string(&self) -> String {
|
||||||
|
self.short_uuid_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
static AUTO_INC_COUNTER: Lazy<atomic::AtomicUsize> = Lazy::new(|| atomic::AtomicUsize::default());
|
static AUTO_INC_COUNTER: Lazy<atomic::AtomicUsize> = Lazy::new(|| atomic::AtomicUsize::default());
|
||||||
|
|
||||||
|
|||||||
@@ -9,14 +9,15 @@ use crate::fastfield::DeleteBitSet;
|
|||||||
use crate::fastfield::FacetReader;
|
use crate::fastfield::FacetReader;
|
||||||
use crate::fastfield::FastFieldReaders;
|
use crate::fastfield::FastFieldReaders;
|
||||||
use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
|
use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
|
||||||
use crate::schema::Field;
|
|
||||||
use crate::schema::FieldType;
|
use crate::schema::FieldType;
|
||||||
use crate::schema::Schema;
|
use crate::schema::Schema;
|
||||||
|
use crate::schema::{Field, IndexRecordOption};
|
||||||
use crate::space_usage::SegmentSpaceUsage;
|
use crate::space_usage::SegmentSpaceUsage;
|
||||||
use crate::store::StoreReader;
|
use crate::store::StoreReader;
|
||||||
use crate::termdict::TermDictionary;
|
use crate::termdict::TermDictionary;
|
||||||
use crate::DocId;
|
use crate::DocId;
|
||||||
use fail::fail_point;
|
use fail::fail_point;
|
||||||
|
use slog::{warn, Logger};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -53,6 +54,7 @@ pub struct SegmentReader {
|
|||||||
store_source: ReadOnlySource,
|
store_source: ReadOnlySource,
|
||||||
delete_bitset_opt: Option<DeleteBitSet>,
|
delete_bitset_opt: Option<DeleteBitSet>,
|
||||||
schema: Schema,
|
schema: Schema,
|
||||||
|
logger: Logger,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SegmentReader {
|
impl SegmentReader {
|
||||||
@@ -125,17 +127,15 @@ impl SegmentReader {
|
|||||||
///
|
///
|
||||||
/// They are simply stored as a fast field, serialized in
|
/// They are simply stored as a fast field, serialized in
|
||||||
/// the `.fieldnorm` file of the segment.
|
/// the `.fieldnorm` file of the segment.
|
||||||
pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader {
|
pub fn get_fieldnorms_reader(&self, field: Field) -> crate::Result<FieldNormReader> {
|
||||||
if let Some(fieldnorm_reader) = self.fieldnorm_readers.get_field(field) {
|
self.fieldnorm_readers.get_field(field).ok_or_else(|| {
|
||||||
fieldnorm_reader
|
|
||||||
} else {
|
|
||||||
let field_name = self.schema.get_field_name(field);
|
let field_name = self.schema.get_field_name(field);
|
||||||
let err_msg = format!(
|
let err_msg = format!(
|
||||||
"Field norm not found for field {:?}. Was it market as indexed during indexing.",
|
"Field norm not found for field {:?}. Was it market as indexed during indexing.",
|
||||||
field_name
|
field_name
|
||||||
);
|
);
|
||||||
panic!(err_msg);
|
crate::TantivyError::SchemaError(err_msg)
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Accessor to the segment's `StoreReader`.
|
/// Accessor to the segment's `StoreReader`.
|
||||||
@@ -202,6 +202,7 @@ impl SegmentReader {
|
|||||||
positions_composite,
|
positions_composite,
|
||||||
positions_idx_composite,
|
positions_idx_composite,
|
||||||
schema,
|
schema,
|
||||||
|
logger: segment.index().logger().clone(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -212,6 +213,11 @@ impl SegmentReader {
|
|||||||
/// The field reader is in charge of iterating through the
|
/// The field reader is in charge of iterating through the
|
||||||
/// term dictionary associated to a specific field,
|
/// term dictionary associated to a specific field,
|
||||||
/// and opening the posting list associated to any term.
|
/// and opening the posting list associated to any term.
|
||||||
|
///
|
||||||
|
/// If the field is marked as index, a warn is logged and an empty `InvertedIndexReader`
|
||||||
|
/// is returned.
|
||||||
|
/// Similarly if the field is marked as indexed but no term has been indexed for the given
|
||||||
|
/// index. an empty `InvertedIndexReader` is returned (but no warning is logged).
|
||||||
pub fn inverted_index(&self, field: Field) -> Arc<InvertedIndexReader> {
|
pub fn inverted_index(&self, field: Field) -> Arc<InvertedIndexReader> {
|
||||||
if let Some(inv_idx_reader) = self
|
if let Some(inv_idx_reader) = self
|
||||||
.inv_idx_reader_cache
|
.inv_idx_reader_cache
|
||||||
@@ -226,21 +232,25 @@ impl SegmentReader {
|
|||||||
let record_option_opt = field_type.get_index_record_option();
|
let record_option_opt = field_type.get_index_record_option();
|
||||||
|
|
||||||
if record_option_opt.is_none() {
|
if record_option_opt.is_none() {
|
||||||
panic!("Field {:?} does not seem indexed.", field_entry.name());
|
warn!(
|
||||||
|
self.logger,
|
||||||
|
"Field {:?} does not seem indexed.",
|
||||||
|
field_entry.name()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let record_option = record_option_opt.unwrap();
|
|
||||||
|
|
||||||
let postings_source_opt = self.postings_composite.open_read(field);
|
let postings_source_opt = self.postings_composite.open_read(field);
|
||||||
|
|
||||||
if postings_source_opt.is_none() {
|
if postings_source_opt.is_none() || record_option_opt.is_none() {
|
||||||
// no documents in the segment contained this field.
|
// no documents in the segment contained this field.
|
||||||
// As a result, no data is associated to the inverted index.
|
// As a result, no data is associated to the inverted index.
|
||||||
//
|
//
|
||||||
// Returns an empty inverted index.
|
// Returns an empty inverted index.
|
||||||
return Arc::new(InvertedIndexReader::empty(field_type));
|
let record_option = record_option_opt.unwrap_or(IndexRecordOption::Basic);
|
||||||
|
return Arc::new(InvertedIndexReader::empty(record_option));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let record_option = record_option_opt.unwrap();
|
||||||
let postings_source = postings_source_opt.unwrap();
|
let postings_source = postings_source_opt.unwrap();
|
||||||
|
|
||||||
let termdict_source = self.termdict_composite.open_read(field).expect(
|
let termdict_source = self.termdict_composite.open_read(field).expect(
|
||||||
@@ -339,7 +349,7 @@ mod test {
|
|||||||
let name = schema.get_field("name").unwrap();
|
let name = schema.get_field("name").unwrap();
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(name => "tantivy"));
|
index_writer.add_document(doc!(name => "tantivy"));
|
||||||
index_writer.add_document(doc!(name => "horse"));
|
index_writer.add_document(doc!(name => "horse"));
|
||||||
index_writer.add_document(doc!(name => "jockey"));
|
index_writer.add_document(doc!(name => "jockey"));
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
use slog::{error, Logger};
|
||||||
|
|
||||||
use crate::directory::directory_lock::Lock;
|
use crate::directory::directory_lock::Lock;
|
||||||
use crate::directory::error::LockError;
|
use crate::directory::error::LockError;
|
||||||
use crate::directory::error::{DeleteError, OpenReadError, OpenWriteError};
|
use crate::directory::error::{DeleteError, OpenReadError, OpenWriteError};
|
||||||
@@ -64,7 +66,10 @@ impl<T: Send + Sync + 'static> From<Box<T>> for DirectoryLock {
|
|||||||
impl Drop for DirectoryLockGuard {
|
impl Drop for DirectoryLockGuard {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
if let Err(e) = self.directory.delete(&*self.path) {
|
if let Err(e) = self.directory.delete(&*self.path) {
|
||||||
error!("Failed to remove the lock file. {:?}", e);
|
error!(
|
||||||
|
self.directory.logger(),
|
||||||
|
"Failed to remove the lock file. {:?}", e
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -80,7 +85,7 @@ fn try_acquire_lock(
|
|||||||
) -> Result<DirectoryLock, TryAcquireLockError> {
|
) -> Result<DirectoryLock, TryAcquireLockError> {
|
||||||
let mut write = directory.open_write(filepath).map_err(|e| match e {
|
let mut write = directory.open_write(filepath).map_err(|e| match e {
|
||||||
OpenWriteError::FileAlreadyExists(_) => TryAcquireLockError::FileExists,
|
OpenWriteError::FileAlreadyExists(_) => TryAcquireLockError::FileExists,
|
||||||
OpenWriteError::IOError(io_error) => TryAcquireLockError::IOError(io_error.into()),
|
OpenWriteError::IOError { io_error, .. } => TryAcquireLockError::IOError(io_error),
|
||||||
})?;
|
})?;
|
||||||
write.flush().map_err(TryAcquireLockError::IOError)?;
|
write.flush().map_err(TryAcquireLockError::IOError)?;
|
||||||
Ok(DirectoryLock::from(Box::new(DirectoryLockGuard {
|
Ok(DirectoryLock::from(Box::new(DirectoryLockGuard {
|
||||||
@@ -209,6 +214,9 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
|||||||
/// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the
|
/// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the
|
||||||
/// `OnCommit` `ReloadPolicy` to work properly.
|
/// `OnCommit` `ReloadPolicy` to work properly.
|
||||||
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle>;
|
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle>;
|
||||||
|
|
||||||
|
/// Returns the `slog::Logger` configured for the `Directory`.
|
||||||
|
fn logger(&self) -> &Logger;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// DirectoryClone
|
/// DirectoryClone
|
||||||
|
|||||||
@@ -1,160 +1,60 @@
|
|||||||
use crate::Version;
|
use crate::Version;
|
||||||
use std::error::Error as StdError;
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
/// Error while trying to acquire a directory lock.
|
/// Error while trying to acquire a directory lock.
|
||||||
#[derive(Debug, Fail)]
|
#[derive(Debug, Error)]
|
||||||
pub enum LockError {
|
pub enum LockError {
|
||||||
/// Failed to acquired a lock as it is already held by another
|
/// Failed to acquired a lock as it is already held by another
|
||||||
/// client.
|
/// client.
|
||||||
/// - In the context of a blocking lock, this means the lock was not released within some `timeout` period.
|
/// - In the context of a blocking lock, this means the lock was not released within some `timeout` period.
|
||||||
/// - In the context of a non-blocking lock, this means the lock was busy at the moment of the call.
|
/// - In the context of a non-blocking lock, this means the lock was busy at the moment of the call.
|
||||||
#[fail(
|
#[error("Could not acquire lock as it is already held, possibly by a different process.")]
|
||||||
display = "Could not acquire lock as it is already held, possibly by a different process."
|
|
||||||
)]
|
|
||||||
LockBusy,
|
LockBusy,
|
||||||
/// Trying to acquire a lock failed with an `IOError`
|
/// Trying to acquire a lock failed with an `IOError`
|
||||||
#[fail(display = "Failed to acquire the lock due to an io:Error.")]
|
#[error("Failed to acquire the lock due to an io:Error.")]
|
||||||
IOError(io::Error),
|
IOError(io::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// General IO error with an optional path to the offending file.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct IOError {
|
|
||||||
path: Option<PathBuf>,
|
|
||||||
err: io::Error,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Into<io::Error> for IOError {
|
|
||||||
fn into(self) -> io::Error {
|
|
||||||
self.err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for IOError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self.path {
|
|
||||||
Some(ref path) => write!(f, "io error occurred on path '{:?}': '{}'", path, self.err),
|
|
||||||
None => write!(f, "io error occurred: '{}'", self.err),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StdError for IOError {
|
|
||||||
fn description(&self) -> &str {
|
|
||||||
"io error occurred"
|
|
||||||
}
|
|
||||||
|
|
||||||
fn cause(&self) -> Option<&dyn StdError> {
|
|
||||||
Some(&self.err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IOError {
|
|
||||||
pub(crate) fn with_path(path: PathBuf, err: io::Error) -> Self {
|
|
||||||
IOError {
|
|
||||||
path: Some(path),
|
|
||||||
err,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<io::Error> for IOError {
|
|
||||||
fn from(err: io::Error) -> IOError {
|
|
||||||
IOError { path: None, err }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Error that may occur when opening a directory
|
/// Error that may occur when opening a directory
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Error)]
|
||||||
pub enum OpenDirectoryError {
|
pub enum OpenDirectoryError {
|
||||||
/// The underlying directory does not exists.
|
/// The underlying directory does not exists.
|
||||||
|
#[error("Directory does not exist: '{0}'.")]
|
||||||
DoesNotExist(PathBuf),
|
DoesNotExist(PathBuf),
|
||||||
/// The path exists but is not a directory.
|
/// The path exists but is not a directory.
|
||||||
|
#[error("Path exists but is not a directory: '{0}'.")]
|
||||||
NotADirectory(PathBuf),
|
NotADirectory(PathBuf),
|
||||||
|
/// Failed to create a temp directory.
|
||||||
|
#[error("Failed to create a temporary directory: '{0}'.")]
|
||||||
|
FailedToCreateTempDir(io::Error),
|
||||||
/// IoError
|
/// IoError
|
||||||
IoError(io::Error),
|
#[error("IOError '{io_error:?}' while create directory in: '{directory_path:?}'.")]
|
||||||
}
|
IoError {
|
||||||
|
/// underlying io Error.
|
||||||
impl From<io::Error> for OpenDirectoryError {
|
io_error: io::Error,
|
||||||
fn from(io_err: io::Error) -> Self {
|
/// directory we tried to open.
|
||||||
OpenDirectoryError::IoError(io_err)
|
directory_path: PathBuf,
|
||||||
}
|
},
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for OpenDirectoryError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match *self {
|
|
||||||
OpenDirectoryError::DoesNotExist(ref path) => {
|
|
||||||
write!(f, "the underlying directory '{:?}' does not exist", path)
|
|
||||||
}
|
|
||||||
OpenDirectoryError::NotADirectory(ref path) => {
|
|
||||||
write!(f, "the path '{:?}' exists but is not a directory", path)
|
|
||||||
}
|
|
||||||
OpenDirectoryError::IoError(ref err) => write!(
|
|
||||||
f,
|
|
||||||
"IOError while trying to open/create the directory. {:?}",
|
|
||||||
err
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StdError for OpenDirectoryError {
|
|
||||||
fn description(&self) -> &str {
|
|
||||||
"error occurred while opening a directory"
|
|
||||||
}
|
|
||||||
|
|
||||||
fn cause(&self) -> Option<&dyn StdError> {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Error that may occur when starting to write in a file
|
/// Error that may occur when starting to write in a file
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Error)]
|
||||||
pub enum OpenWriteError {
|
pub enum OpenWriteError {
|
||||||
/// Our directory is WORM, writing an existing file is forbidden.
|
/// Our directory is WORM, writing an existing file is forbidden.
|
||||||
/// Checkout the `Directory` documentation.
|
/// Checkout the `Directory` documentation.
|
||||||
|
#[error("File already exists: '{0}'")]
|
||||||
FileAlreadyExists(PathBuf),
|
FileAlreadyExists(PathBuf),
|
||||||
/// Any kind of IO error that happens when
|
/// Any kind of IO error that happens when
|
||||||
/// writing in the underlying IO device.
|
/// writing in the underlying IO device.
|
||||||
IOError(IOError),
|
#[error("IOError '{io_error:?}' while opening file for write: '{filepath}'.")]
|
||||||
}
|
IOError {
|
||||||
|
/// The underlying `io::Error`.
|
||||||
impl From<IOError> for OpenWriteError {
|
io_error: io::Error,
|
||||||
fn from(err: IOError) -> OpenWriteError {
|
/// File path of the file that tantivy failed to open for write.
|
||||||
OpenWriteError::IOError(err)
|
filepath: PathBuf,
|
||||||
}
|
},
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for OpenWriteError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match *self {
|
|
||||||
OpenWriteError::FileAlreadyExists(ref path) => {
|
|
||||||
write!(f, "the file '{:?}' already exists", path)
|
|
||||||
}
|
|
||||||
OpenWriteError::IOError(ref err) => write!(
|
|
||||||
f,
|
|
||||||
"an io error occurred while opening a file for writing: '{}'",
|
|
||||||
err
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StdError for OpenWriteError {
|
|
||||||
fn description(&self) -> &str {
|
|
||||||
"error occurred while opening a file for writing"
|
|
||||||
}
|
|
||||||
|
|
||||||
fn cause(&self) -> Option<&dyn StdError> {
|
|
||||||
match *self {
|
|
||||||
OpenWriteError::FileAlreadyExists(_) => None,
|
|
||||||
OpenWriteError::IOError(ref err) => Some(err),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Type of index incompatibility between the library and the index found on disk
|
/// Type of index incompatibility between the library and the index found on disk
|
||||||
@@ -217,55 +117,41 @@ impl fmt::Debug for Incompatibility {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Error that may occur when accessing a file read
|
/// Error that may occur when accessing a file read
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Error)]
|
||||||
pub enum OpenReadError {
|
pub enum OpenReadError {
|
||||||
/// The file does not exists.
|
/// The file does not exists.
|
||||||
|
#[error("Files does not exists: {0:?}")]
|
||||||
FileDoesNotExist(PathBuf),
|
FileDoesNotExist(PathBuf),
|
||||||
/// Any kind of IO error that happens when
|
/// Any kind of io::Error.
|
||||||
/// interacting with the underlying IO device.
|
#[error(
|
||||||
IOError(IOError),
|
"IOError: '{io_error:?}' happened while opening the following file for Read: {filepath}."
|
||||||
/// This library doesn't support the index version found on disk
|
)]
|
||||||
|
IOError {
|
||||||
|
/// The underlying `io::Error`.
|
||||||
|
io_error: io::Error,
|
||||||
|
/// File path of the file that tantivy failed to open for read.
|
||||||
|
filepath: PathBuf,
|
||||||
|
},
|
||||||
|
/// This library does not support the index version found in file footer.
|
||||||
|
#[error("Index version unsupported: {0:?}")]
|
||||||
IncompatibleIndex(Incompatibility),
|
IncompatibleIndex(Incompatibility),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<IOError> for OpenReadError {
|
|
||||||
fn from(err: IOError) -> OpenReadError {
|
|
||||||
OpenReadError::IOError(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for OpenReadError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match *self {
|
|
||||||
OpenReadError::FileDoesNotExist(ref path) => {
|
|
||||||
write!(f, "the file '{:?}' does not exist", path)
|
|
||||||
}
|
|
||||||
OpenReadError::IOError(ref err) => write!(
|
|
||||||
f,
|
|
||||||
"an io error occurred while opening a file for reading: '{}'",
|
|
||||||
err
|
|
||||||
),
|
|
||||||
OpenReadError::IncompatibleIndex(ref footer) => {
|
|
||||||
write!(f, "Incompatible index format: {:?}", footer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Error that may occur when trying to delete a file
|
/// Error that may occur when trying to delete a file
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Error)]
|
||||||
pub enum DeleteError {
|
pub enum DeleteError {
|
||||||
/// The file does not exists.
|
/// The file does not exists.
|
||||||
|
#[error("File does not exists: '{0}'.")]
|
||||||
FileDoesNotExist(PathBuf),
|
FileDoesNotExist(PathBuf),
|
||||||
/// Any kind of IO error that happens when
|
/// Any kind of IO error that happens when
|
||||||
/// interacting with the underlying IO device.
|
/// interacting with the underlying IO device.
|
||||||
IOError(IOError),
|
#[error("The following IO error happened while deleting file '{filepath}': '{io_error:?}'.")]
|
||||||
}
|
IOError {
|
||||||
|
/// The underlying `io::Error`.
|
||||||
impl From<IOError> for DeleteError {
|
io_error: io::Error,
|
||||||
fn from(err: IOError) -> DeleteError {
|
/// File path of the file that tantivy failed to delete.
|
||||||
DeleteError::IOError(err)
|
filepath: PathBuf,
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Incompatibility> for OpenReadError {
|
impl From<Incompatibility> for OpenReadError {
|
||||||
@@ -273,29 +159,3 @@ impl From<Incompatibility> for OpenReadError {
|
|||||||
OpenReadError::IncompatibleIndex(incompatibility)
|
OpenReadError::IncompatibleIndex(incompatibility)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for DeleteError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match *self {
|
|
||||||
DeleteError::FileDoesNotExist(ref path) => {
|
|
||||||
write!(f, "the file '{:?}' does not exist", path)
|
|
||||||
}
|
|
||||||
DeleteError::IOError(ref err) => {
|
|
||||||
write!(f, "an io error occurred while deleting a file: '{}'", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StdError for DeleteError {
|
|
||||||
fn description(&self) -> &str {
|
|
||||||
"error occurred while deleting a file"
|
|
||||||
}
|
|
||||||
|
|
||||||
fn cause(&self) -> Option<&dyn StdError> {
|
|
||||||
match *self {
|
|
||||||
DeleteError::FileDoesNotExist(_) => None,
|
|
||||||
DeleteError::IOError(ref err) => Some(err),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -271,7 +271,11 @@ mod tests {
|
|||||||
let mut vec = Vec::new();
|
let mut vec = Vec::new();
|
||||||
let footer_proxy = FooterProxy::new(&mut vec);
|
let footer_proxy = FooterProxy::new(&mut vec);
|
||||||
assert!(footer_proxy.terminate().is_ok());
|
assert!(footer_proxy.terminate().is_ok());
|
||||||
assert_eq!(vec.len(), 167);
|
if crate::store::COMPRESSION == "lz4" {
|
||||||
|
assert_eq!(vec.len(), 158);
|
||||||
|
} else {
|
||||||
|
assert_eq!(vec.len(), 167);
|
||||||
|
}
|
||||||
let footer = Footer::deserialize(&mut &vec[..]).unwrap();
|
let footer = Footer::deserialize(&mut &vec[..]).unwrap();
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
footer.versioned_footer,
|
footer.versioned_footer,
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use crate::core::MANAGED_FILEPATH;
|
use crate::core::{MANAGED_FILEPATH, META_FILEPATH};
|
||||||
use crate::directory::error::{DeleteError, IOError, LockError, OpenReadError, OpenWriteError};
|
use crate::directory::error::{DeleteError, LockError, OpenReadError, OpenWriteError};
|
||||||
use crate::directory::footer::{Footer, FooterProxy};
|
use crate::directory::footer::{Footer, FooterProxy};
|
||||||
use crate::directory::DirectoryLock;
|
use crate::directory::DirectoryLock;
|
||||||
use crate::directory::GarbageCollectionResult;
|
use crate::directory::GarbageCollectionResult;
|
||||||
@@ -11,9 +11,9 @@ use crate::error::DataCorruption;
|
|||||||
use crate::Directory;
|
use crate::Directory;
|
||||||
|
|
||||||
use crc32fast::Hasher;
|
use crc32fast::Hasher;
|
||||||
|
use slog::{debug, error, info};
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::Write;
|
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::result;
|
use std::result;
|
||||||
use std::sync::RwLockWriteGuard;
|
use std::sync::RwLockWriteGuard;
|
||||||
@@ -56,9 +56,9 @@ fn save_managed_paths(
|
|||||||
directory: &mut dyn Directory,
|
directory: &mut dyn Directory,
|
||||||
wlock: &RwLockWriteGuard<'_, MetaInformation>,
|
wlock: &RwLockWriteGuard<'_, MetaInformation>,
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
let mut w = serde_json::to_vec(&wlock.managed_paths)?;
|
let mut managed_json = serde_json::to_string_pretty(&wlock.managed_paths)?;
|
||||||
writeln!(&mut w)?;
|
managed_json.push_str("\n");
|
||||||
directory.atomic_write(&MANAGED_FILEPATH, &w[..])?;
|
directory.atomic_write(&MANAGED_FILEPATH, managed_json.as_bytes())?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -86,7 +86,12 @@ impl ManagedDirectory {
|
|||||||
directory: Box::new(directory),
|
directory: Box::new(directory),
|
||||||
meta_informations: Arc::default(),
|
meta_informations: Arc::default(),
|
||||||
}),
|
}),
|
||||||
Err(OpenReadError::IOError(e)) => Err(From::from(e)),
|
Err(OpenReadError::IOError { io_error, filepath }) => {
|
||||||
|
Err(crate::TantivyError::OpenReadError(OpenReadError::IOError {
|
||||||
|
io_error,
|
||||||
|
filepath,
|
||||||
|
}))
|
||||||
|
}
|
||||||
Err(OpenReadError::IncompatibleIndex(incompatibility)) => {
|
Err(OpenReadError::IncompatibleIndex(incompatibility)) => {
|
||||||
// For the moment, this should never happen `meta.json`
|
// For the moment, this should never happen `meta.json`
|
||||||
// do not have any footer and cannot detect incompatibility.
|
// do not have any footer and cannot detect incompatibility.
|
||||||
@@ -113,7 +118,7 @@ impl ManagedDirectory {
|
|||||||
&mut self,
|
&mut self,
|
||||||
get_living_files: L,
|
get_living_files: L,
|
||||||
) -> crate::Result<GarbageCollectionResult> {
|
) -> crate::Result<GarbageCollectionResult> {
|
||||||
info!("Garbage collect");
|
info!(self.directory.logger(), "gc"; "stage"=>"start");
|
||||||
let mut files_to_delete = vec![];
|
let mut files_to_delete = vec![];
|
||||||
|
|
||||||
// It is crucial to get the living files after acquiring the
|
// It is crucial to get the living files after acquiring the
|
||||||
@@ -148,7 +153,7 @@ impl ManagedDirectory {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
error!("Failed to acquire lock for GC");
|
error!(self.logger(), "Failed to acquire lock for GC");
|
||||||
return Err(crate::TantivyError::from(err));
|
return Err(crate::TantivyError::from(err));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -160,7 +165,7 @@ impl ManagedDirectory {
|
|||||||
for file_to_delete in files_to_delete {
|
for file_to_delete in files_to_delete {
|
||||||
match self.delete(&file_to_delete) {
|
match self.delete(&file_to_delete) {
|
||||||
Ok(_) => {
|
Ok(_) => {
|
||||||
info!("Deleted {:?}", file_to_delete);
|
debug!(self.logger(), "deleted-success"; "file"=>format!("{:?}", file_to_delete));
|
||||||
deleted_files.push(file_to_delete);
|
deleted_files.push(file_to_delete);
|
||||||
}
|
}
|
||||||
Err(file_error) => {
|
Err(file_error) => {
|
||||||
@@ -168,12 +173,12 @@ impl ManagedDirectory {
|
|||||||
DeleteError::FileDoesNotExist(_) => {
|
DeleteError::FileDoesNotExist(_) => {
|
||||||
deleted_files.push(file_to_delete.clone());
|
deleted_files.push(file_to_delete.clone());
|
||||||
}
|
}
|
||||||
DeleteError::IOError(_) => {
|
DeleteError::IOError { .. } => {
|
||||||
failed_to_delete_files.push(file_to_delete.clone());
|
failed_to_delete_files.push(file_to_delete.clone());
|
||||||
if !cfg!(target_os = "windows") {
|
if !cfg!(target_os = "windows") {
|
||||||
// On windows, delete is expected to fail if the file
|
// On windows, delete is expected to fail if the file
|
||||||
// is mmapped.
|
// is mmapped.
|
||||||
error!("Failed to delete {:?}", file_to_delete);
|
error!(self.logger(), "delete-file-fail"; "path"=>file_to_delete.to_str().unwrap_or("<invalid-utf8>"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -195,6 +200,10 @@ impl ManagedDirectory {
|
|||||||
save_managed_paths(self.directory.as_mut(), &meta_informations_wlock)?;
|
save_managed_paths(self.directory.as_mut(), &meta_informations_wlock)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!(self.directory.logger(), "gc"; "stage"=>"end",
|
||||||
|
"num-sucess-file-deletes"=>deleted_files.len(),
|
||||||
|
"num-failed-file-deletes"=>failed_to_delete_files.len());
|
||||||
|
|
||||||
Ok(GarbageCollectionResult {
|
Ok(GarbageCollectionResult {
|
||||||
deleted_files,
|
deleted_files,
|
||||||
failed_to_delete_files,
|
failed_to_delete_files,
|
||||||
@@ -231,8 +240,11 @@ impl ManagedDirectory {
|
|||||||
/// Verify checksum of a managed file
|
/// Verify checksum of a managed file
|
||||||
pub fn validate_checksum(&self, path: &Path) -> result::Result<bool, OpenReadError> {
|
pub fn validate_checksum(&self, path: &Path) -> result::Result<bool, OpenReadError> {
|
||||||
let reader = self.directory.open_read(path)?;
|
let reader = self.directory.open_read(path)?;
|
||||||
let (footer, data) = Footer::extract_footer(reader)
|
let (footer, data) =
|
||||||
.map_err(|err| IOError::with_path(path.to_path_buf(), err))?;
|
Footer::extract_footer(reader).map_err(|io_error| OpenReadError::IOError {
|
||||||
|
io_error,
|
||||||
|
filepath: path.to_path_buf(),
|
||||||
|
})?;
|
||||||
let mut hasher = Hasher::new();
|
let mut hasher = Hasher::new();
|
||||||
hasher.update(data.as_slice());
|
hasher.update(data.as_slice());
|
||||||
let crc = hasher.finalize();
|
let crc = hasher.finalize();
|
||||||
@@ -245,35 +257,46 @@ impl ManagedDirectory {
|
|||||||
|
|
||||||
/// List files for which checksum does not match content
|
/// List files for which checksum does not match content
|
||||||
pub fn list_damaged(&self) -> result::Result<HashSet<PathBuf>, OpenReadError> {
|
pub fn list_damaged(&self) -> result::Result<HashSet<PathBuf>, OpenReadError> {
|
||||||
let mut hashset = HashSet::new();
|
let mut managed_paths = self
|
||||||
let managed_paths = self
|
|
||||||
.meta_informations
|
.meta_informations
|
||||||
.read()
|
.read()
|
||||||
.expect("Managed directory rlock poisoned in list damaged.")
|
.expect("Managed directory rlock poisoned in list damaged.")
|
||||||
.managed_paths
|
.managed_paths
|
||||||
.clone();
|
.clone();
|
||||||
|
|
||||||
for path in managed_paths.into_iter() {
|
managed_paths.remove(*META_FILEPATH);
|
||||||
|
|
||||||
|
let mut damaged_files = HashSet::new();
|
||||||
|
for path in managed_paths {
|
||||||
if !self.validate_checksum(&path)? {
|
if !self.validate_checksum(&path)? {
|
||||||
hashset.insert(path);
|
damaged_files.insert(path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(hashset)
|
Ok(damaged_files)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Directory for ManagedDirectory {
|
impl Directory for ManagedDirectory {
|
||||||
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
|
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
|
||||||
|
slog::debug!(self.logger(), "open-read"; "path" => path.to_str().unwrap_or("<invalid-utf8>"));
|
||||||
let read_only_source = self.directory.open_read(path)?;
|
let read_only_source = self.directory.open_read(path)?;
|
||||||
let (footer, reader) = Footer::extract_footer(read_only_source)
|
let (footer, reader) = Footer::extract_footer(read_only_source).map_err(|io_error| {
|
||||||
.map_err(|err| IOError::with_path(path.to_path_buf(), err))?;
|
OpenReadError::IOError {
|
||||||
|
io_error,
|
||||||
|
filepath: path.to_path_buf(),
|
||||||
|
}
|
||||||
|
})?;
|
||||||
footer.is_compatible()?;
|
footer.is_compatible()?;
|
||||||
Ok(reader)
|
Ok(reader)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn open_write(&mut self, path: &Path) -> result::Result<WritePtr, OpenWriteError> {
|
fn open_write(&mut self, path: &Path) -> result::Result<WritePtr, OpenWriteError> {
|
||||||
|
slog::debug!(self.logger(), "open-write"; "path" => path.to_str().unwrap_or("<invalid-utf8>"));
|
||||||
self.register_file_as_managed(path)
|
self.register_file_as_managed(path)
|
||||||
.map_err(|e| IOError::with_path(path.to_owned(), e))?;
|
.map_err(|io_error| OpenWriteError::IOError {
|
||||||
|
io_error,
|
||||||
|
filepath: path.to_path_buf(),
|
||||||
|
})?;
|
||||||
Ok(io::BufWriter::new(Box::new(FooterProxy::new(
|
Ok(io::BufWriter::new(Box::new(FooterProxy::new(
|
||||||
self.directory
|
self.directory
|
||||||
.open_write(path)?
|
.open_write(path)?
|
||||||
@@ -283,9 +306,11 @@ impl Directory for ManagedDirectory {
|
|||||||
))))
|
))))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
|
fn atomic_write(&mut self, path: &Path, content: &[u8]) -> io::Result<()> {
|
||||||
|
let content_str = std::str::from_utf8(content).unwrap_or("<content-not-utf-8>");
|
||||||
|
slog::debug!(self.logger(), "Atomic write"; "path" => format!("{:?}", path), "content_length"=>content_str);
|
||||||
self.register_file_as_managed(path)?;
|
self.register_file_as_managed(path)?;
|
||||||
self.directory.atomic_write(path, data)
|
self.directory.atomic_write(path, content)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn atomic_read(&self, path: &Path) -> result::Result<Vec<u8>, OpenReadError> {
|
fn atomic_read(&self, path: &Path) -> result::Result<Vec<u8>, OpenReadError> {
|
||||||
@@ -307,6 +332,10 @@ impl Directory for ManagedDirectory {
|
|||||||
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
||||||
self.directory.watch(watch_callback)
|
self.directory.watch(watch_callback)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn logger(&self) -> &slog::Logger {
|
||||||
|
self.directory.logger()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for ManagedDirectory {
|
impl Clone for ManagedDirectory {
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
use crate::core::META_FILEPATH;
|
use crate::core::META_FILEPATH;
|
||||||
use crate::directory::error::LockError;
|
use crate::directory::error::LockError;
|
||||||
use crate::directory::error::{
|
use crate::directory::error::{DeleteError, OpenDirectoryError, OpenReadError, OpenWriteError};
|
||||||
DeleteError, IOError, OpenDirectoryError, OpenReadError, OpenWriteError,
|
|
||||||
};
|
|
||||||
use crate::directory::read_only_source::BoxedData;
|
use crate::directory::read_only_source::BoxedData;
|
||||||
use crate::directory::AntiCallToken;
|
use crate::directory::AntiCallToken;
|
||||||
use crate::directory::Directory;
|
use crate::directory::Directory;
|
||||||
@@ -19,6 +17,8 @@ use notify::RawEvent;
|
|||||||
use notify::RecursiveMode;
|
use notify::RecursiveMode;
|
||||||
use notify::Watcher;
|
use notify::Watcher;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use slog::{debug, o, Drain, Logger};
|
||||||
|
use slog_stdlog::StdLog;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::convert::From;
|
use std::convert::From;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
@@ -36,11 +36,6 @@ use std::sync::Weak;
|
|||||||
use std::thread;
|
use std::thread;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
|
|
||||||
/// Create a default io error given a string.
|
|
||||||
pub(crate) fn make_io_err(msg: String) -> io::Error {
|
|
||||||
io::Error::new(io::ErrorKind::Other, msg)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns None iff the file exists, can be read, but is empty (and hence
|
/// Returns None iff the file exists, can be read, but is empty (and hence
|
||||||
/// cannot be mmapped)
|
/// cannot be mmapped)
|
||||||
fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
|
fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
|
||||||
@@ -48,13 +43,17 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
|
|||||||
if e.kind() == io::ErrorKind::NotFound {
|
if e.kind() == io::ErrorKind::NotFound {
|
||||||
OpenReadError::FileDoesNotExist(full_path.to_owned())
|
OpenReadError::FileDoesNotExist(full_path.to_owned())
|
||||||
} else {
|
} else {
|
||||||
OpenReadError::IOError(IOError::with_path(full_path.to_owned(), e))
|
OpenReadError::IOError {
|
||||||
|
io_error: e,
|
||||||
|
filepath: full_path.to_owned(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let meta_data = file
|
let meta_data = file.metadata().map_err(|e| OpenReadError::IOError {
|
||||||
.metadata()
|
io_error: e,
|
||||||
.map_err(|e| IOError::with_path(full_path.to_owned(), e))?;
|
filepath: full_path.to_owned(),
|
||||||
|
})?;
|
||||||
if meta_data.len() == 0 {
|
if meta_data.len() == 0 {
|
||||||
// if the file size is 0, it will not be possible
|
// if the file size is 0, it will not be possible
|
||||||
// to mmap the file, so we return None
|
// to mmap the file, so we return None
|
||||||
@@ -64,7 +63,10 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
|
|||||||
unsafe {
|
unsafe {
|
||||||
memmap::Mmap::map(&file)
|
memmap::Mmap::map(&file)
|
||||||
.map(Some)
|
.map(Some)
|
||||||
.map_err(|e| From::from(IOError::with_path(full_path.to_owned(), e)))
|
.map_err(|e| OpenReadError::IOError {
|
||||||
|
io_error: e,
|
||||||
|
filepath: full_path.to_owned(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -144,7 +146,7 @@ struct WatcherWrapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl WatcherWrapper {
|
impl WatcherWrapper {
|
||||||
pub fn new(path: &Path) -> Result<Self, OpenDirectoryError> {
|
pub(crate) fn new(path: &Path, logger: Logger) -> Result<Self, OpenDirectoryError> {
|
||||||
let (tx, watcher_recv): (Sender<RawEvent>, Receiver<RawEvent>) = channel();
|
let (tx, watcher_recv): (Sender<RawEvent>, Receiver<RawEvent>) = channel();
|
||||||
// We need to initialize the
|
// We need to initialize the
|
||||||
let watcher = notify::raw_watcher(tx)
|
let watcher = notify::raw_watcher(tx)
|
||||||
@@ -158,7 +160,8 @@ impl WatcherWrapper {
|
|||||||
panic!("Unknown error while starting watching directory {:?}", path);
|
panic!("Unknown error while starting watching directory {:?}", path);
|
||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
let watcher_router: Arc<WatchCallbackList> = Default::default();
|
let watcher_router: Arc<WatchCallbackList> =
|
||||||
|
Arc::new(WatchCallbackList::with_logger(logger));
|
||||||
let watcher_router_clone = watcher_router.clone();
|
let watcher_router_clone = watcher_router.clone();
|
||||||
thread::Builder::new()
|
thread::Builder::new()
|
||||||
.name("meta-file-watch-thread".to_string())
|
.name("meta-file-watch-thread".to_string())
|
||||||
@@ -183,6 +186,10 @@ impl WatcherWrapper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
})
|
||||||
|
.map_err(|io_error| OpenDirectoryError::IoError {
|
||||||
|
io_error,
|
||||||
|
directory_path: path.to_path_buf(),
|
||||||
})?;
|
})?;
|
||||||
Ok(WatcherWrapper {
|
Ok(WatcherWrapper {
|
||||||
_watcher: Mutex::new(watcher),
|
_watcher: Mutex::new(watcher),
|
||||||
@@ -217,15 +224,21 @@ struct MmapDirectoryInner {
|
|||||||
mmap_cache: RwLock<MmapCache>,
|
mmap_cache: RwLock<MmapCache>,
|
||||||
_temp_directory: Option<TempDir>,
|
_temp_directory: Option<TempDir>,
|
||||||
watcher: RwLock<Option<WatcherWrapper>>,
|
watcher: RwLock<Option<WatcherWrapper>>,
|
||||||
|
logger: Logger,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MmapDirectoryInner {
|
impl MmapDirectoryInner {
|
||||||
fn new(root_path: PathBuf, temp_directory: Option<TempDir>) -> MmapDirectoryInner {
|
fn new(
|
||||||
|
root_path: PathBuf,
|
||||||
|
temp_directory: Option<TempDir>,
|
||||||
|
logger: Logger,
|
||||||
|
) -> MmapDirectoryInner {
|
||||||
MmapDirectoryInner {
|
MmapDirectoryInner {
|
||||||
root_path,
|
root_path,
|
||||||
mmap_cache: Default::default(),
|
mmap_cache: Default::default(),
|
||||||
_temp_directory: temp_directory,
|
_temp_directory: temp_directory,
|
||||||
watcher: RwLock::new(None),
|
watcher: RwLock::new(None),
|
||||||
|
logger,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -237,7 +250,7 @@ impl MmapDirectoryInner {
|
|||||||
// The downside is that we might create a watch wrapper that is not useful.
|
// The downside is that we might create a watch wrapper that is not useful.
|
||||||
let need_initialization = self.watcher.read().unwrap().is_none();
|
let need_initialization = self.watcher.read().unwrap().is_none();
|
||||||
if need_initialization {
|
if need_initialization {
|
||||||
let watch_wrapper = WatcherWrapper::new(&self.root_path)?;
|
let watch_wrapper = WatcherWrapper::new(&self.root_path, self.logger.clone())?;
|
||||||
let mut watch_wlock = self.watcher.write().unwrap();
|
let mut watch_wlock = self.watcher.write().unwrap();
|
||||||
// the watcher could have been initialized when we released the lock, and
|
// the watcher could have been initialized when we released the lock, and
|
||||||
// we do not want to lose the watched files that were set.
|
// we do not want to lose the watched files that were set.
|
||||||
@@ -260,8 +273,8 @@ impl fmt::Debug for MmapDirectory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MmapDirectory {
|
impl MmapDirectory {
|
||||||
fn new(root_path: PathBuf, temp_directory: Option<TempDir>) -> MmapDirectory {
|
fn new(root_path: PathBuf, temp_directory: Option<TempDir>, logger: Logger) -> MmapDirectory {
|
||||||
let inner = MmapDirectoryInner::new(root_path, temp_directory);
|
let inner = MmapDirectoryInner::new(root_path, temp_directory, logger);
|
||||||
MmapDirectory {
|
MmapDirectory {
|
||||||
inner: Arc::new(inner),
|
inner: Arc::new(inner),
|
||||||
}
|
}
|
||||||
@@ -272,16 +285,19 @@ impl MmapDirectory {
|
|||||||
/// This is mostly useful to test the MmapDirectory itself.
|
/// This is mostly useful to test the MmapDirectory itself.
|
||||||
/// For your unit tests, prefer the RAMDirectory.
|
/// For your unit tests, prefer the RAMDirectory.
|
||||||
pub fn create_from_tempdir() -> Result<MmapDirectory, OpenDirectoryError> {
|
pub fn create_from_tempdir() -> Result<MmapDirectory, OpenDirectoryError> {
|
||||||
let tempdir = TempDir::new().map_err(OpenDirectoryError::IoError)?;
|
let tempdir = TempDir::new().map_err(OpenDirectoryError::FailedToCreateTempDir)?;
|
||||||
let tempdir_path = PathBuf::from(tempdir.path());
|
let logger = Logger::root(StdLog.fuse(), o!());
|
||||||
Ok(MmapDirectory::new(tempdir_path, Some(tempdir)))
|
Ok(MmapDirectory::new(tempdir.path().to_owned(), Some(tempdir), logger))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Opens a MmapDirectory in a directory.
|
/// Opens a MmapDirectory in a directory.
|
||||||
///
|
///
|
||||||
/// Returns an error if the `directory_path` does not
|
/// Returns an error if the `directory_path` does not
|
||||||
/// exist or if it is not a directory.
|
/// exist or if it is not a directory.
|
||||||
pub fn open<P: AsRef<Path>>(directory_path: P) -> Result<MmapDirectory, OpenDirectoryError> {
|
pub fn open_with_logger<P: AsRef<Path>>(
|
||||||
|
directory_path: P,
|
||||||
|
logger: Logger,
|
||||||
|
) -> Result<MmapDirectory, OpenDirectoryError> {
|
||||||
let directory_path: &Path = directory_path.as_ref();
|
let directory_path: &Path = directory_path.as_ref();
|
||||||
if !directory_path.exists() {
|
if !directory_path.exists() {
|
||||||
Err(OpenDirectoryError::DoesNotExist(PathBuf::from(
|
Err(OpenDirectoryError::DoesNotExist(PathBuf::from(
|
||||||
@@ -292,10 +308,20 @@ impl MmapDirectory {
|
|||||||
directory_path,
|
directory_path,
|
||||||
)))
|
)))
|
||||||
} else {
|
} else {
|
||||||
Ok(MmapDirectory::new(PathBuf::from(directory_path), None))
|
Ok(MmapDirectory::new(
|
||||||
|
PathBuf::from(directory_path),
|
||||||
|
None,
|
||||||
|
logger,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates an `MmapDirectory` at the given path.
|
||||||
|
pub fn open<P: AsRef<Path>>(directory_path: P) -> Result<MmapDirectory, OpenDirectoryError> {
|
||||||
|
let logger = Logger::root(StdLog.fuse(), o!());
|
||||||
|
Self::open_with_logger(directory_path, logger)
|
||||||
|
}
|
||||||
|
|
||||||
/// Joins a relative_path to the directory `root_path`
|
/// Joins a relative_path to the directory `root_path`
|
||||||
/// to create a proper complete `filepath`.
|
/// to create a proper complete `filepath`.
|
||||||
fn resolve_path(&self, relative_path: &Path) -> PathBuf {
|
fn resolve_path(&self, relative_path: &Path) -> PathBuf {
|
||||||
@@ -355,11 +381,12 @@ impl MmapDirectory {
|
|||||||
struct ReleaseLockFile {
|
struct ReleaseLockFile {
|
||||||
_file: File,
|
_file: File,
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
|
logger: Logger,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Drop for ReleaseLockFile {
|
impl Drop for ReleaseLockFile {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
debug!("Releasing lock {:?}", self.path);
|
debug!(self.logger, "Releasing lock {:?}", self.path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -398,16 +425,18 @@ impl TerminatingWrite for SafeFileWriter {
|
|||||||
|
|
||||||
impl Directory for MmapDirectory {
|
impl Directory for MmapDirectory {
|
||||||
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
|
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
|
||||||
debug!("Open Read {:?}", path);
|
|
||||||
let full_path = self.resolve_path(path);
|
let full_path = self.resolve_path(path);
|
||||||
|
|
||||||
let mut mmap_cache = self.inner.mmap_cache.write().map_err(|_| {
|
let mut mmap_cache = self.inner.mmap_cache.write().map_err(|_| {
|
||||||
let msg = format!(
|
let msg = format!(
|
||||||
"Failed to acquired write lock \
|
"Failed to acquired write lock \
|
||||||
on mmap cache while reading {:?}",
|
on mmap cache while reading {:?}",
|
||||||
path
|
path
|
||||||
);
|
);
|
||||||
IOError::with_path(path.to_owned(), make_io_err(msg))
|
let io_error = io::Error::new(io::ErrorKind::Other, msg);
|
||||||
|
OpenReadError::IOError {
|
||||||
|
io_error,
|
||||||
|
filepath: path.to_owned(),
|
||||||
|
}
|
||||||
})?;
|
})?;
|
||||||
Ok(mmap_cache
|
Ok(mmap_cache
|
||||||
.get_mmap(&full_path)?
|
.get_mmap(&full_path)?
|
||||||
@@ -420,14 +449,18 @@ impl Directory for MmapDirectory {
|
|||||||
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
|
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
|
||||||
let full_path = self.resolve_path(path);
|
let full_path = self.resolve_path(path);
|
||||||
match fs::remove_file(&full_path) {
|
match fs::remove_file(&full_path) {
|
||||||
Ok(_) => self
|
Ok(_) => self.sync_directory().map_err(|e| DeleteError::IOError {
|
||||||
.sync_directory()
|
io_error: e,
|
||||||
.map_err(|e| IOError::with_path(path.to_owned(), e).into()),
|
filepath: path.to_path_buf(),
|
||||||
|
}),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
if e.kind() == io::ErrorKind::NotFound {
|
if e.kind() == io::ErrorKind::NotFound {
|
||||||
Err(DeleteError::FileDoesNotExist(path.to_owned()))
|
Err(DeleteError::FileDoesNotExist(path.to_owned()))
|
||||||
} else {
|
} else {
|
||||||
Err(IOError::with_path(path.to_owned(), e).into())
|
Err(DeleteError::IOError {
|
||||||
|
io_error: e,
|
||||||
|
filepath: path.to_path_buf(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -439,9 +472,7 @@ impl Directory for MmapDirectory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError> {
|
fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError> {
|
||||||
debug!("Open Write {:?}", path);
|
|
||||||
let full_path = self.resolve_path(path);
|
let full_path = self.resolve_path(path);
|
||||||
|
|
||||||
let open_res = OpenOptions::new()
|
let open_res = OpenOptions::new()
|
||||||
.write(true)
|
.write(true)
|
||||||
.create_new(true)
|
.create_new(true)
|
||||||
@@ -451,18 +482,25 @@ impl Directory for MmapDirectory {
|
|||||||
if err.kind() == io::ErrorKind::AlreadyExists {
|
if err.kind() == io::ErrorKind::AlreadyExists {
|
||||||
OpenWriteError::FileAlreadyExists(path.to_owned())
|
OpenWriteError::FileAlreadyExists(path.to_owned())
|
||||||
} else {
|
} else {
|
||||||
IOError::with_path(path.to_owned(), err).into()
|
OpenWriteError::IOError {
|
||||||
|
io_error: err,
|
||||||
|
filepath: path.to_owned(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
// making sure the file is created.
|
// making sure the file is created.
|
||||||
file.flush()
|
file.flush().map_err(|io_error| OpenWriteError::IOError {
|
||||||
.map_err(|e| IOError::with_path(path.to_owned(), e))?;
|
io_error,
|
||||||
|
filepath: path.to_owned(),
|
||||||
|
})?;
|
||||||
|
|
||||||
// Apparetntly, on some filesystem syncing the parent
|
// Apparetntly, on some filesystem syncing the parent
|
||||||
// directory is required.
|
// directory is required.
|
||||||
self.sync_directory()
|
self.sync_directory().map_err(|e| OpenWriteError::IOError {
|
||||||
.map_err(|e| IOError::with_path(path.to_owned(), e))?;
|
io_error: e,
|
||||||
|
filepath: path.to_owned(),
|
||||||
|
})?;
|
||||||
|
|
||||||
let writer = SafeFileWriter::new(file);
|
let writer = SafeFileWriter::new(file);
|
||||||
Ok(BufWriter::new(Box::new(writer)))
|
Ok(BufWriter::new(Box::new(writer)))
|
||||||
@@ -474,24 +512,31 @@ impl Directory for MmapDirectory {
|
|||||||
match File::open(&full_path) {
|
match File::open(&full_path) {
|
||||||
Ok(mut file) => {
|
Ok(mut file) => {
|
||||||
file.read_to_end(&mut buffer)
|
file.read_to_end(&mut buffer)
|
||||||
.map_err(|e| IOError::with_path(path.to_owned(), e))?;
|
.map_err(|io_error| OpenReadError::IOError {
|
||||||
|
io_error,
|
||||||
|
filepath: path.to_owned(),
|
||||||
|
})?;
|
||||||
Ok(buffer)
|
Ok(buffer)
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(io_error) => {
|
||||||
if e.kind() == io::ErrorKind::NotFound {
|
if io_error.kind() == io::ErrorKind::NotFound {
|
||||||
Err(OpenReadError::FileDoesNotExist(path.to_owned()))
|
Err(OpenReadError::FileDoesNotExist(path.to_owned()))
|
||||||
} else {
|
} else {
|
||||||
Err(IOError::with_path(path.to_owned(), e).into())
|
Err(OpenReadError::IOError {
|
||||||
|
io_error,
|
||||||
|
filepath: path.to_owned(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
|
fn atomic_write(&mut self, path: &Path, content: &[u8]) -> io::Result<()> {
|
||||||
debug!("Atomic Write {:?}", path);
|
let mut tempfile = tempfile::Builder::new().tempfile_in(&self.inner.root_path)?;
|
||||||
|
tempfile.write_all(content)?;
|
||||||
|
tempfile.flush()?;
|
||||||
let full_path = self.resolve_path(path);
|
let full_path = self.resolve_path(path);
|
||||||
let meta_file = atomicwrites::AtomicFile::new(full_path, atomicwrites::AllowOverwrite);
|
tempfile.into_temp_path().persist(full_path)?;
|
||||||
meta_file.write(|f| f.write_all(data))?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -508,16 +553,22 @@ impl Directory for MmapDirectory {
|
|||||||
} else {
|
} else {
|
||||||
file.try_lock_exclusive().map_err(|_| LockError::LockBusy)?
|
file.try_lock_exclusive().map_err(|_| LockError::LockBusy)?
|
||||||
}
|
}
|
||||||
|
let logger = self.inner.logger.clone();
|
||||||
// dropping the file handle will release the lock.
|
// dropping the file handle will release the lock.
|
||||||
Ok(DirectoryLock::from(Box::new(ReleaseLockFile {
|
Ok(DirectoryLock::from(Box::new(ReleaseLockFile {
|
||||||
path: lock.filepath.clone(),
|
path: lock.filepath.clone(),
|
||||||
_file: file,
|
_file: file,
|
||||||
|
logger,
|
||||||
})))
|
})))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
||||||
self.inner.watch(watch_callback)
|
self.inner.watch(watch_callback)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn logger(&self) -> &Logger {
|
||||||
|
&self.inner.logger
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -627,7 +678,8 @@ mod tests {
|
|||||||
let counter_clone = counter.clone();
|
let counter_clone = counter.clone();
|
||||||
let tmp_dir = tempfile::TempDir::new().unwrap();
|
let tmp_dir = tempfile::TempDir::new().unwrap();
|
||||||
let tmp_dirpath = tmp_dir.path().to_owned();
|
let tmp_dirpath = tmp_dir.path().to_owned();
|
||||||
let mut watch_wrapper = WatcherWrapper::new(&tmp_dirpath).unwrap();
|
let logger = Logger::root(slog::Discard, o!());
|
||||||
|
let mut watch_wrapper = WatcherWrapper::new(&tmp_dirpath, logger).unwrap();
|
||||||
let tmp_file = tmp_dirpath.join(*META_FILEPATH);
|
let tmp_file = tmp_dirpath.join(*META_FILEPATH);
|
||||||
let _handle = watch_wrapper.watch(Box::new(move || {
|
let _handle = watch_wrapper.watch(Box::new(move || {
|
||||||
counter_clone.fetch_add(1, Ordering::SeqCst);
|
counter_clone.fetch_add(1, Ordering::SeqCst);
|
||||||
@@ -652,7 +704,7 @@ mod tests {
|
|||||||
{
|
{
|
||||||
let index = Index::create(mmap_directory.clone(), schema).unwrap();
|
let index = Index::create(mmap_directory.clone(), schema).unwrap();
|
||||||
|
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
let mut log_merge_policy = LogMergePolicy::default();
|
let mut log_merge_policy = LogMergePolicy::default();
|
||||||
log_merge_policy.set_min_merge_size(3);
|
log_merge_policy.set_min_merge_size(3);
|
||||||
index_writer.set_merge_policy(Box::new(log_merge_policy));
|
index_writer.set_merge_policy(Box::new(log_merge_policy));
|
||||||
|
|||||||
@@ -23,7 +23,8 @@ pub use self::directory::{Directory, DirectoryClone};
|
|||||||
pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
|
pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
|
||||||
pub use self::ram_directory::RAMDirectory;
|
pub use self::ram_directory::RAMDirectory;
|
||||||
pub use self::read_only_source::ReadOnlySource;
|
pub use self::read_only_source::ReadOnlySource;
|
||||||
pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle};
|
pub(crate) use self::watch_event_router::WatchCallbackList;
|
||||||
|
pub use self::watch_event_router::{WatchCallback, WatchHandle};
|
||||||
use std::io::{self, BufWriter, Write};
|
use std::io::{self, BufWriter, Write};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
/// Outcome of the Garbage collection
|
/// Outcome of the Garbage collection
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ use crate::directory::WatchCallbackList;
|
|||||||
use crate::directory::{Directory, ReadOnlySource, WatchCallback, WatchHandle};
|
use crate::directory::{Directory, ReadOnlySource, WatchCallback, WatchHandle};
|
||||||
use crate::directory::{TerminatingWrite, WritePtr};
|
use crate::directory::{TerminatingWrite, WritePtr};
|
||||||
use fail::fail_point;
|
use fail::fail_point;
|
||||||
|
use slog::{o, Drain, Logger};
|
||||||
|
use slog_stdlog::StdLog;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::io::{self, BufWriter, Cursor, Seek, SeekFrom, Write};
|
use std::io::{self, BufWriter, Cursor, Seek, SeekFrom, Write};
|
||||||
@@ -66,7 +68,7 @@ impl Write for VecWriter {
|
|||||||
|
|
||||||
fn flush(&mut self) -> io::Result<()> {
|
fn flush(&mut self) -> io::Result<()> {
|
||||||
self.is_flushed = true;
|
self.is_flushed = true;
|
||||||
let mut fs = self.shared_directory.fs.write().unwrap();
|
let mut fs = self.shared_directory.fs.inner_directory.write().unwrap();
|
||||||
fs.write(self.path.clone(), self.data.get_ref());
|
fs.write(self.path.clone(), self.data.get_ref());
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -78,13 +80,19 @@ impl TerminatingWrite for VecWriter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
struct InnerDirectory {
|
struct InnerDirectory {
|
||||||
fs: HashMap<PathBuf, ReadOnlySource>,
|
fs: HashMap<PathBuf, ReadOnlySource>,
|
||||||
watch_router: WatchCallbackList,
|
watch_router: WatchCallbackList,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl InnerDirectory {
|
impl InnerDirectory {
|
||||||
|
fn with_logger(logger: Logger) -> Self {
|
||||||
|
InnerDirectory {
|
||||||
|
fs: Default::default(),
|
||||||
|
watch_router: WatchCallbackList::with_logger(logger.clone()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn write(&mut self, path: PathBuf, data: &[u8]) -> bool {
|
fn write(&mut self, path: PathBuf, data: &[u8]) -> bool {
|
||||||
let data = ReadOnlySource::new(Vec::from(data));
|
let data = ReadOnlySource::new(Vec::from(data));
|
||||||
self.fs.insert(path, data).is_some()
|
self.fs.insert(path, data).is_some()
|
||||||
@@ -117,20 +125,32 @@ impl InnerDirectory {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Default for RAMDirectory {
|
||||||
|
fn default() -> RAMDirectory {
|
||||||
|
let logger = Logger::root(StdLog.fuse(), o!());
|
||||||
|
Self::with_logger(logger)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl fmt::Debug for RAMDirectory {
|
impl fmt::Debug for RAMDirectory {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
write!(f, "RAMDirectory")
|
write!(f, "RAMDirectory")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Inner {
|
||||||
|
inner_directory: RwLock<InnerDirectory>,
|
||||||
|
logger: Logger,
|
||||||
|
}
|
||||||
|
|
||||||
/// A Directory storing everything in anonymous memory.
|
/// A Directory storing everything in anonymous memory.
|
||||||
///
|
///
|
||||||
/// It is mainly meant for unit testing.
|
/// It is mainly meant for unit testing.
|
||||||
/// Writes are only made visible upon flushing.
|
/// Writes are only made visible upon flushing.
|
||||||
///
|
///
|
||||||
#[derive(Clone, Default)]
|
#[derive(Clone)]
|
||||||
pub struct RAMDirectory {
|
pub struct RAMDirectory {
|
||||||
fs: Arc<RwLock<InnerDirectory>>,
|
fs: Arc<Inner>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RAMDirectory {
|
impl RAMDirectory {
|
||||||
@@ -139,10 +159,21 @@ impl RAMDirectory {
|
|||||||
Self::default()
|
Self::default()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create a `RAMDirectory` with a custom logger.
|
||||||
|
pub fn with_logger(logger: Logger) -> RAMDirectory {
|
||||||
|
let inner_directory = InnerDirectory::with_logger(logger.clone()).into();
|
||||||
|
RAMDirectory {
|
||||||
|
fs: Arc::new(Inner {
|
||||||
|
inner_directory,
|
||||||
|
logger,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the sum of the size of the different files
|
/// Returns the sum of the size of the different files
|
||||||
/// in the RAMDirectory.
|
/// in the RAMDirectory.
|
||||||
pub fn total_mem_usage(&self) -> usize {
|
pub fn total_mem_usage(&self) -> usize {
|
||||||
self.fs.read().unwrap().total_mem_usage()
|
self.fs.inner_directory.read().unwrap().total_mem_usage()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Write a copy of all of the files saved in the RAMDirectory in the target `Directory`.
|
/// Write a copy of all of the files saved in the RAMDirectory in the target `Directory`.
|
||||||
@@ -152,7 +183,7 @@ impl RAMDirectory {
|
|||||||
///
|
///
|
||||||
/// If an error is encounterred, files may be persisted partially.
|
/// If an error is encounterred, files may be persisted partially.
|
||||||
pub fn persist(&self, dest: &mut dyn Directory) -> crate::Result<()> {
|
pub fn persist(&self, dest: &mut dyn Directory) -> crate::Result<()> {
|
||||||
let wlock = self.fs.write().unwrap();
|
let wlock = self.fs.inner_directory.write().unwrap();
|
||||||
for (path, source) in wlock.fs.iter() {
|
for (path, source) in wlock.fs.iter() {
|
||||||
let mut dest_wrt = dest.open_write(path)?;
|
let mut dest_wrt = dest.open_write(path)?;
|
||||||
dest_wrt.write_all(source.as_slice())?;
|
dest_wrt.write_all(source.as_slice())?;
|
||||||
@@ -164,24 +195,25 @@ impl RAMDirectory {
|
|||||||
|
|
||||||
impl Directory for RAMDirectory {
|
impl Directory for RAMDirectory {
|
||||||
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
|
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
|
||||||
self.fs.read().unwrap().open_read(path)
|
self.fs.inner_directory.read().unwrap().open_read(path)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
|
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
|
||||||
fail_point!("RAMDirectory::delete", |_| {
|
fail_point!("RAMDirectory::delete", |_| {
|
||||||
use crate::directory::error::IOError;
|
Err(DeleteError::IOError {
|
||||||
let io_error = IOError::from(io::Error::from(io::ErrorKind::Other));
|
io_error: io::Error::from(io::ErrorKind::Other),
|
||||||
Err(DeleteError::from(io_error))
|
filepath: path.to_path_buf(),
|
||||||
|
})
|
||||||
});
|
});
|
||||||
self.fs.write().unwrap().delete(path)
|
self.fs.inner_directory.write().unwrap().delete(path)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn exists(&self, path: &Path) -> bool {
|
fn exists(&self, path: &Path) -> bool {
|
||||||
self.fs.read().unwrap().exists(path)
|
self.fs.inner_directory.read().unwrap().exists(path)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError> {
|
fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError> {
|
||||||
let mut fs = self.fs.write().unwrap();
|
let mut fs = self.fs.inner_directory.write().unwrap();
|
||||||
let path_buf = PathBuf::from(path);
|
let path_buf = PathBuf::from(path);
|
||||||
let vec_writer = VecWriter::new(path_buf.clone(), self.clone());
|
let vec_writer = VecWriter::new(path_buf.clone(), self.clone());
|
||||||
let exists = fs.write(path_buf.clone(), &[]);
|
let exists = fs.write(path_buf.clone(), &[]);
|
||||||
@@ -205,19 +237,38 @@ impl Directory for RAMDirectory {
|
|||||||
let path_buf = PathBuf::from(path);
|
let path_buf = PathBuf::from(path);
|
||||||
|
|
||||||
// Reserve the path to prevent calls to .write() to succeed.
|
// Reserve the path to prevent calls to .write() to succeed.
|
||||||
self.fs.write().unwrap().write(path_buf.clone(), &[]);
|
self.fs
|
||||||
|
.inner_directory
|
||||||
|
.write()
|
||||||
|
.unwrap()
|
||||||
|
.write(path_buf.clone(), &[]);
|
||||||
|
|
||||||
let mut vec_writer = VecWriter::new(path_buf, self.clone());
|
let mut vec_writer = VecWriter::new(path_buf, self.clone());
|
||||||
vec_writer.write_all(data)?;
|
vec_writer.write_all(data)?;
|
||||||
vec_writer.flush()?;
|
vec_writer.flush()?;
|
||||||
if path == Path::new(&*META_FILEPATH) {
|
if path == Path::new(&*META_FILEPATH) {
|
||||||
let _ = self.fs.write().unwrap().watch_router.broadcast();
|
let _ = self
|
||||||
|
.fs
|
||||||
|
.inner_directory
|
||||||
|
.write()
|
||||||
|
.unwrap()
|
||||||
|
.watch_router
|
||||||
|
.broadcast();
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
||||||
Ok(self.fs.write().unwrap().watch(watch_callback))
|
Ok(self
|
||||||
|
.fs
|
||||||
|
.inner_directory
|
||||||
|
.write()
|
||||||
|
.unwrap()
|
||||||
|
.watch(watch_callback))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn logger(&self) -> &Logger {
|
||||||
|
&self.fs.logger
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -211,19 +211,19 @@ fn test_watch(directory: &mut dyn Directory) {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
for i in 0..10 {
|
for i in 0..10 {
|
||||||
assert_eq!(i, counter.load(SeqCst));
|
assert!(i <= counter.load(SeqCst));
|
||||||
assert!(directory
|
assert!(directory
|
||||||
.atomic_write(Path::new("meta.json"), b"random_test_data_2")
|
.atomic_write(Path::new("meta.json"), b"random_test_data_2")
|
||||||
.is_ok());
|
.is_ok());
|
||||||
assert_eq!(receiver.recv_timeout(Duration::from_millis(500)), Ok(i));
|
assert_eq!(receiver.recv_timeout(Duration::from_millis(500)), Ok(i));
|
||||||
assert_eq!(i + 1, counter.load(SeqCst));
|
assert!(i + 1 <= counter.load(SeqCst)); // notify can trigger more than once.
|
||||||
}
|
}
|
||||||
mem::drop(watch_handle);
|
mem::drop(watch_handle);
|
||||||
assert!(directory
|
assert!(directory
|
||||||
.atomic_write(Path::new("meta.json"), b"random_test_data")
|
.atomic_write(Path::new("meta.json"), b"random_test_data")
|
||||||
.is_ok());
|
.is_ok());
|
||||||
assert!(receiver.recv_timeout(Duration::from_millis(500)).is_ok());
|
assert!(receiver.recv_timeout(Duration::from_millis(500)).is_ok());
|
||||||
assert_eq!(10, counter.load(SeqCst));
|
assert!(10 <= counter.load(SeqCst));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_lock_non_blocking(directory: &mut dyn Directory) {
|
fn test_lock_non_blocking(directory: &mut dyn Directory) {
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use futures::channel::oneshot;
|
use futures::channel::oneshot;
|
||||||
use futures::{Future, TryFutureExt};
|
use futures::{Future, TryFutureExt};
|
||||||
|
use slog::{error, Logger};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::sync::RwLock;
|
use std::sync::RwLock;
|
||||||
use std::sync::Weak;
|
use std::sync::Weak;
|
||||||
@@ -11,9 +12,9 @@ pub type WatchCallback = Box<dyn Fn() + Sync + Send>;
|
|||||||
///
|
///
|
||||||
/// It registers callbacks (See `.subscribe(...)`) and
|
/// It registers callbacks (See `.subscribe(...)`) and
|
||||||
/// calls them upon calls to `.broadcast(...)`.
|
/// calls them upon calls to `.broadcast(...)`.
|
||||||
#[derive(Default)]
|
pub(crate) struct WatchCallbackList {
|
||||||
pub struct WatchCallbackList {
|
|
||||||
router: RwLock<Vec<Weak<WatchCallback>>>,
|
router: RwLock<Vec<Weak<WatchCallback>>>,
|
||||||
|
logger: Logger,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Controls how long a directory should watch for a file change.
|
/// Controls how long a directory should watch for a file change.
|
||||||
@@ -32,6 +33,13 @@ impl WatchHandle {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl WatchCallbackList {
|
impl WatchCallbackList {
|
||||||
|
pub fn with_logger(logger: Logger) -> Self {
|
||||||
|
WatchCallbackList {
|
||||||
|
logger,
|
||||||
|
router: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Subscribes a new callback and returns a handle that controls the lifetime of the callback.
|
/// Subscribes a new callback and returns a handle that controls the lifetime of the callback.
|
||||||
pub fn subscribe(&self, watch_callback: WatchCallback) -> WatchHandle {
|
pub fn subscribe(&self, watch_callback: WatchCallback) -> WatchHandle {
|
||||||
let watch_callback_arc = Arc::new(watch_callback);
|
let watch_callback_arc = Arc::new(watch_callback);
|
||||||
@@ -74,8 +82,8 @@ impl WatchCallbackList {
|
|||||||
});
|
});
|
||||||
if let Err(err) = spawn_res {
|
if let Err(err) = spawn_res {
|
||||||
error!(
|
error!(
|
||||||
"Failed to spawn thread to call watch callbacks. Cause: {:?}",
|
self.logger,
|
||||||
err
|
"Failed to spawn thread to call watch callbacks. Cause: {:?}", err
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
result
|
result
|
||||||
@@ -86,13 +94,18 @@ impl WatchCallbackList {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use crate::directory::WatchCallbackList;
|
use crate::directory::WatchCallbackList;
|
||||||
use futures::executor::block_on;
|
use futures::executor::block_on;
|
||||||
|
use slog::{o, Discard, Logger};
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
fn default_watch_callback_list() -> WatchCallbackList {
|
||||||
|
WatchCallbackList::with_logger(Logger::root(Discard, o!()))
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_watch_event_router_simple() {
|
fn test_watch_event_router_simple() {
|
||||||
let watch_event_router = WatchCallbackList::default();
|
let watch_event_router = default_watch_callback_list();
|
||||||
let counter: Arc<AtomicUsize> = Default::default();
|
let counter: Arc<AtomicUsize> = Default::default();
|
||||||
let counter_clone = counter.clone();
|
let counter_clone = counter.clone();
|
||||||
let inc_callback = Box::new(move || {
|
let inc_callback = Box::new(move || {
|
||||||
@@ -119,7 +132,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_watch_event_router_multiple_callback_same_key() {
|
fn test_watch_event_router_multiple_callback_same_key() {
|
||||||
let watch_event_router = WatchCallbackList::default();
|
let watch_event_router = default_watch_callback_list();
|
||||||
let counter: Arc<AtomicUsize> = Default::default();
|
let counter: Arc<AtomicUsize> = Default::default();
|
||||||
let inc_callback = |inc: usize| {
|
let inc_callback = |inc: usize| {
|
||||||
let counter_clone = counter.clone();
|
let counter_clone = counter.clone();
|
||||||
@@ -148,7 +161,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_watch_event_router_multiple_callback_different_key() {
|
fn test_watch_event_router_multiple_callback_different_key() {
|
||||||
let watch_event_router = WatchCallbackList::default();
|
let watch_event_router = default_watch_callback_list();
|
||||||
let counter: Arc<AtomicUsize> = Default::default();
|
let counter: Arc<AtomicUsize> = Default::default();
|
||||||
let counter_clone = counter.clone();
|
let counter_clone = counter.clone();
|
||||||
let inc_callback = Box::new(move || {
|
let inc_callback = Box::new(move || {
|
||||||
|
|||||||
98
src/error.rs
98
src/error.rs
@@ -2,11 +2,13 @@
|
|||||||
|
|
||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
use crate::directory::error::{IOError, OpenDirectoryError, OpenReadError, OpenWriteError};
|
|
||||||
use crate::directory::error::{Incompatibility, LockError};
|
use crate::directory::error::{Incompatibility, LockError};
|
||||||
use crate::fastfield::FastFieldNotAvailableError;
|
use crate::fastfield::FastFieldNotAvailableError;
|
||||||
use crate::query;
|
use crate::query;
|
||||||
use crate::schema;
|
use crate::{
|
||||||
|
directory::error::{OpenDirectoryError, OpenReadError, OpenWriteError},
|
||||||
|
schema,
|
||||||
|
};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::PoisonError;
|
use std::sync::PoisonError;
|
||||||
@@ -43,44 +45,47 @@ impl fmt::Debug for DataCorruption {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The library's failure based error enum
|
/// The library's error enum
|
||||||
#[derive(Debug, Fail)]
|
#[derive(Debug, Error)]
|
||||||
pub enum TantivyError {
|
pub enum TantivyError {
|
||||||
/// Path does not exist.
|
/// Failed to open the directory.
|
||||||
#[fail(display = "Path does not exist: '{:?}'", _0)]
|
#[error("Failed to open the directory: '{0:?}'")]
|
||||||
PathDoesNotExist(PathBuf),
|
OpenDirectoryError(#[from] OpenDirectoryError),
|
||||||
/// File already exists, this is a problem when we try to write into a new file.
|
/// Failed to open a file for read.
|
||||||
#[fail(display = "File already exists: '{:?}'", _0)]
|
#[error("Failed to open file for read: '{0:?}'")]
|
||||||
FileAlreadyExists(PathBuf),
|
OpenReadError(#[from] OpenReadError),
|
||||||
|
/// Failed to open a file for write.
|
||||||
|
#[error("Failed to open file for write: '{0:?}'")]
|
||||||
|
OpenWriteError(#[from] OpenWriteError),
|
||||||
/// Index already exists in this directory
|
/// Index already exists in this directory
|
||||||
#[fail(display = "Index already exists")]
|
#[error("Index already exists")]
|
||||||
IndexAlreadyExists,
|
IndexAlreadyExists,
|
||||||
/// Failed to acquire file lock
|
/// Failed to acquire file lock
|
||||||
#[fail(display = "Failed to acquire Lockfile: {:?}. {:?}", _0, _1)]
|
#[error("Failed to acquire Lockfile: {0:?}. {1:?}")]
|
||||||
LockFailure(LockError, Option<String>),
|
LockFailure(LockError, Option<String>),
|
||||||
/// IO Error.
|
/// IO Error.
|
||||||
#[fail(display = "An IO error occurred: '{}'", _0)]
|
#[error("An IO error occurred: '{0}'")]
|
||||||
IOError(#[cause] IOError),
|
IOError(#[from] io::Error),
|
||||||
/// Data corruption.
|
/// Data corruption.
|
||||||
#[fail(display = "{:?}", _0)]
|
#[error("Data corrupted: '{0:?}'")]
|
||||||
DataCorruption(DataCorruption),
|
DataCorruption(DataCorruption),
|
||||||
/// A thread holding the locked panicked and poisoned the lock.
|
/// A thread holding the locked panicked and poisoned the lock.
|
||||||
#[fail(display = "A thread holding the locked panicked and poisoned the lock")]
|
#[error("A thread holding the locked panicked and poisoned the lock")]
|
||||||
Poisoned,
|
Poisoned,
|
||||||
/// Invalid argument was passed by the user.
|
/// Invalid argument was passed by the user.
|
||||||
#[fail(display = "An invalid argument was passed: '{}'", _0)]
|
#[error("An invalid argument was passed: '{0}'")]
|
||||||
InvalidArgument(String),
|
InvalidArgument(String),
|
||||||
/// An Error happened in one of the thread.
|
/// An Error happened in one of the thread.
|
||||||
#[fail(display = "An error occurred in a thread: '{}'", _0)]
|
#[error("An error occurred in a thread: '{0}'")]
|
||||||
ErrorInThread(String),
|
ErrorInThread(String),
|
||||||
/// An Error appeared related to the schema.
|
/// An Error appeared related to the schema.
|
||||||
#[fail(display = "Schema error: '{}'", _0)]
|
#[error("Schema error: '{0}'")]
|
||||||
SchemaError(String),
|
SchemaError(String),
|
||||||
/// System error. (e.g.: We failed spawning a new thread)
|
/// System error. (e.g.: We failed spawning a new thread)
|
||||||
#[fail(display = "System error.'{}'", _0)]
|
#[error("System error.'{0}'")]
|
||||||
SystemError(String),
|
SystemError(String),
|
||||||
/// Index incompatible with current version of tantivy
|
/// Index incompatible with current version of tantivy
|
||||||
#[fail(display = "{:?}", _0)]
|
#[error("{0:?}")]
|
||||||
IncompatibleIndex(Incompatibility),
|
IncompatibleIndex(Incompatibility),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,31 +94,17 @@ impl From<DataCorruption> for TantivyError {
|
|||||||
TantivyError::DataCorruption(data_corruption)
|
TantivyError::DataCorruption(data_corruption)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<FastFieldNotAvailableError> for TantivyError {
|
impl From<FastFieldNotAvailableError> for TantivyError {
|
||||||
fn from(fastfield_error: FastFieldNotAvailableError) -> TantivyError {
|
fn from(fastfield_error: FastFieldNotAvailableError) -> TantivyError {
|
||||||
TantivyError::SchemaError(format!("{}", fastfield_error))
|
TantivyError::SchemaError(format!("{}", fastfield_error))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<LockError> for TantivyError {
|
impl From<LockError> for TantivyError {
|
||||||
fn from(lock_error: LockError) -> TantivyError {
|
fn from(lock_error: LockError) -> TantivyError {
|
||||||
TantivyError::LockFailure(lock_error, None)
|
TantivyError::LockFailure(lock_error, None)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<IOError> for TantivyError {
|
|
||||||
fn from(io_error: IOError) -> TantivyError {
|
|
||||||
TantivyError::IOError(io_error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<io::Error> for TantivyError {
|
|
||||||
fn from(io_error: io::Error) -> TantivyError {
|
|
||||||
TantivyError::IOError(io_error.into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<query::QueryParserError> for TantivyError {
|
impl From<query::QueryParserError> for TantivyError {
|
||||||
fn from(parsing_error: query::QueryParserError) -> TantivyError {
|
fn from(parsing_error: query::QueryParserError) -> TantivyError {
|
||||||
TantivyError::InvalidArgument(format!("Query is invalid. {:?}", parsing_error))
|
TantivyError::InvalidArgument(format!("Query is invalid. {:?}", parsing_error))
|
||||||
@@ -126,49 +117,12 @@ impl<Guard> From<PoisonError<Guard>> for TantivyError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<OpenReadError> for TantivyError {
|
|
||||||
fn from(error: OpenReadError) -> TantivyError {
|
|
||||||
match error {
|
|
||||||
OpenReadError::FileDoesNotExist(filepath) => TantivyError::PathDoesNotExist(filepath),
|
|
||||||
OpenReadError::IOError(io_error) => TantivyError::IOError(io_error),
|
|
||||||
OpenReadError::IncompatibleIndex(incompatibility) => {
|
|
||||||
TantivyError::IncompatibleIndex(incompatibility)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<schema::DocParsingError> for TantivyError {
|
impl From<schema::DocParsingError> for TantivyError {
|
||||||
fn from(error: schema::DocParsingError) -> TantivyError {
|
fn from(error: schema::DocParsingError) -> TantivyError {
|
||||||
TantivyError::InvalidArgument(format!("Failed to parse document {:?}", error))
|
TantivyError::InvalidArgument(format!("Failed to parse document {:?}", error))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<OpenWriteError> for TantivyError {
|
|
||||||
fn from(error: OpenWriteError) -> TantivyError {
|
|
||||||
match error {
|
|
||||||
OpenWriteError::FileAlreadyExists(filepath) => {
|
|
||||||
TantivyError::FileAlreadyExists(filepath)
|
|
||||||
}
|
|
||||||
OpenWriteError::IOError(io_error) => TantivyError::IOError(io_error),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<OpenDirectoryError> for TantivyError {
|
|
||||||
fn from(error: OpenDirectoryError) -> TantivyError {
|
|
||||||
match error {
|
|
||||||
OpenDirectoryError::DoesNotExist(directory_path) => {
|
|
||||||
TantivyError::PathDoesNotExist(directory_path)
|
|
||||||
}
|
|
||||||
OpenDirectoryError::NotADirectory(directory_path) => {
|
|
||||||
TantivyError::InvalidArgument(format!("{:?} is not a directory", directory_path))
|
|
||||||
}
|
|
||||||
OpenDirectoryError::IoError(err) => TantivyError::IOError(IOError::from(err)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<serde_json::Error> for TantivyError {
|
impl From<serde_json::Error> for TantivyError {
|
||||||
fn from(error: serde_json::Error) -> TantivyError {
|
fn from(error: serde_json::Error) -> TantivyError {
|
||||||
let io_err = io::Error::from(error);
|
let io_err = io::Error::from(error);
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ mod tests {
|
|||||||
let field = schema_builder.add_bytes_field("bytesfield");
|
let field = schema_builder.add_bytes_field("bytesfield");
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(field=>vec![0u8, 1, 2, 3]));
|
index_writer.add_document(doc!(field=>vec![0u8, 1, 2, 3]));
|
||||||
index_writer.add_document(doc!(field=>vec![]));
|
index_writer.add_document(doc!(field=>vec![]));
|
||||||
index_writer.add_document(doc!(field=>vec![255u8]));
|
index_writer.add_document(doc!(field=>vec![255u8]));
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ use std::result;
|
|||||||
/// `FastFieldNotAvailableError` is returned when the
|
/// `FastFieldNotAvailableError` is returned when the
|
||||||
/// user requested for a fast field reader, and the field was not
|
/// user requested for a fast field reader, and the field was not
|
||||||
/// defined in the schema as a fast field.
|
/// defined in the schema as a fast field.
|
||||||
#[derive(Debug, Fail)]
|
#[derive(Debug, Error)]
|
||||||
#[fail(display = "Fast field not available: '{:?}'", field_name)]
|
#[error("Fast field not available: '{field_name:?}'")]
|
||||||
pub struct FastFieldNotAvailableError {
|
pub struct FastFieldNotAvailableError {
|
||||||
field_name: String,
|
field_name: String,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -474,7 +474,7 @@ mod tests {
|
|||||||
let date_field = schema_builder.add_date_field("date", FAST);
|
let date_field = schema_builder.add_date_field("date", FAST);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||||
index_writer.add_document(doc!(date_field =>crate::chrono::prelude::Utc::now()));
|
index_writer.add_document(doc!(date_field =>crate::chrono::prelude::Utc::now()));
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit().unwrap();
|
||||||
@@ -511,7 +511,7 @@ mod tests {
|
|||||||
);
|
);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
date_field => crate::DateTime::from_u64(1i64.to_u64()),
|
date_field => crate::DateTime::from_u64(1i64.to_u64()),
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ mod tests {
|
|||||||
);
|
);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(field=>1u64, field=>3u64));
|
index_writer.add_document(doc!(field=>1u64, field=>3u64));
|
||||||
index_writer.add_document(doc!());
|
index_writer.add_document(doc!());
|
||||||
index_writer.add_document(doc!(field=>4u64));
|
index_writer.add_document(doc!(field=>4u64));
|
||||||
@@ -64,7 +64,7 @@ mod tests {
|
|||||||
schema_builder.add_i64_field("time_stamp_i", IntOptions::default().set_stored());
|
schema_builder.add_i64_field("time_stamp_i", IntOptions::default().set_stored());
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
let first_time_stamp = chrono::Utc::now();
|
let first_time_stamp = chrono::Utc::now();
|
||||||
index_writer.add_document(
|
index_writer.add_document(
|
||||||
doc!(date_field=>first_time_stamp, date_field=>first_time_stamp, time_i=>1i64),
|
doc!(date_field=>first_time_stamp, date_field=>first_time_stamp, time_i=>1i64),
|
||||||
@@ -186,7 +186,7 @@ mod tests {
|
|||||||
);
|
);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(field=> 1i64, field => 3i64));
|
index_writer.add_document(doc!(field=> 1i64, field => 3i64));
|
||||||
index_writer.add_document(doc!());
|
index_writer.add_document(doc!());
|
||||||
index_writer.add_document(doc!(field=> -4i64));
|
index_writer.add_document(doc!(field=> -4i64));
|
||||||
@@ -221,7 +221,7 @@ mod tests {
|
|||||||
let field = schema_builder.add_facet_field("facetfield");
|
let field = schema_builder.add_facet_field("facetfield");
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
for i in 0..100_000 {
|
for i in 0..100_000 {
|
||||||
index_writer.add_document(doc!(field=> Facet::from(format!("/lang/{}", i).as_str())));
|
index_writer.add_document(doc!(field=> Facet::from(format!("/lang/{}", i).as_str())));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ mod tests {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index
|
let mut index_writer = index
|
||||||
.writer_with_num_threads(1, 30_000_000)
|
.writer_for_tests()
|
||||||
.expect("Failed to create index writer.");
|
.expect("Failed to create index writer.");
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
facet_field => Facet::from("/category/cat2"),
|
facet_field => Facet::from("/category/cat2"),
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ use crate::Opstamp;
|
|||||||
use crossbeam::channel;
|
use crossbeam::channel;
|
||||||
use futures::executor::block_on;
|
use futures::executor::block_on;
|
||||||
use futures::future::Future;
|
use futures::future::Future;
|
||||||
|
use slog::{error, info, Logger};
|
||||||
use smallvec::smallvec;
|
use smallvec::smallvec;
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
use std::mem;
|
use std::mem;
|
||||||
@@ -195,20 +196,21 @@ fn index_documents(
|
|||||||
grouped_document_iterator: &mut dyn Iterator<Item = OperationGroup>,
|
grouped_document_iterator: &mut dyn Iterator<Item = OperationGroup>,
|
||||||
segment_updater: &mut SegmentUpdater,
|
segment_updater: &mut SegmentUpdater,
|
||||||
mut delete_cursor: DeleteCursor,
|
mut delete_cursor: DeleteCursor,
|
||||||
|
logger: &Logger,
|
||||||
) -> crate::Result<bool> {
|
) -> crate::Result<bool> {
|
||||||
let schema = segment.schema();
|
let schema = segment.schema();
|
||||||
|
|
||||||
|
info!(logger, "segment-index"; "stage"=>"start");
|
||||||
let mut segment_writer = SegmentWriter::for_segment(memory_budget, segment.clone(), &schema)?;
|
let mut segment_writer = SegmentWriter::for_segment(memory_budget, segment.clone(), &schema)?;
|
||||||
|
let mut buffer_limit_reached = false;
|
||||||
for document_group in grouped_document_iterator {
|
for document_group in grouped_document_iterator {
|
||||||
for doc in document_group {
|
for doc in document_group {
|
||||||
segment_writer.add_document(doc, &schema)?;
|
segment_writer.add_document(doc, &schema)?;
|
||||||
}
|
}
|
||||||
let mem_usage = segment_writer.mem_usage();
|
let mem_usage = segment_writer.mem_usage();
|
||||||
if mem_usage >= memory_budget - MARGIN_IN_BYTES {
|
if mem_usage >= memory_budget - MARGIN_IN_BYTES {
|
||||||
info!(
|
buffer_limit_reached = true;
|
||||||
"Buffer limit reached, flushing segment with maxdoc={}.",
|
|
||||||
segment_writer.max_doc()
|
|
||||||
);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -228,6 +230,14 @@ fn index_documents(
|
|||||||
let segment_with_max_doc = segment.with_max_doc(max_doc);
|
let segment_with_max_doc = segment.with_max_doc(max_doc);
|
||||||
|
|
||||||
let last_docstamp: Opstamp = *(doc_opstamps.last().unwrap());
|
let last_docstamp: Opstamp = *(doc_opstamps.last().unwrap());
|
||||||
|
info!(
|
||||||
|
logger,
|
||||||
|
"segment-index";
|
||||||
|
"stage" => "serialize",
|
||||||
|
"cause" => if buffer_limit_reached { "buffer-limit" } else { "commit" },
|
||||||
|
"maxdoc" => max_doc,
|
||||||
|
"last_docstamp" => last_docstamp
|
||||||
|
);
|
||||||
|
|
||||||
let delete_bitset_opt = apply_deletes(
|
let delete_bitset_opt = apply_deletes(
|
||||||
&segment_with_max_doc,
|
&segment_with_max_doc,
|
||||||
@@ -241,7 +251,18 @@ fn index_documents(
|
|||||||
delete_cursor,
|
delete_cursor,
|
||||||
delete_bitset_opt,
|
delete_bitset_opt,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
info!(
|
||||||
|
logger,
|
||||||
|
"segment-index";
|
||||||
|
"stage" => "publish",
|
||||||
|
);
|
||||||
block_on(segment_updater.schedule_add_segment(segment_entry))?;
|
block_on(segment_updater.schedule_add_segment(segment_entry))?;
|
||||||
|
info!(
|
||||||
|
logger,
|
||||||
|
"segment-index";
|
||||||
|
"stage" => "end",
|
||||||
|
);
|
||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -344,6 +365,10 @@ impl IndexWriter {
|
|||||||
Ok(index_writer)
|
Ok(index_writer)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn logger(&self) -> &Logger {
|
||||||
|
self.index.logger()
|
||||||
|
}
|
||||||
|
|
||||||
fn drop_sender(&mut self) {
|
fn drop_sender(&mut self) {
|
||||||
let (sender, _receiver) = channel::bounded(1);
|
let (sender, _receiver) = channel::bounded(1);
|
||||||
self.operation_sender = sender;
|
self.operation_sender = sender;
|
||||||
@@ -352,6 +377,8 @@ impl IndexWriter {
|
|||||||
/// If there are some merging threads, blocks until they all finish their work and
|
/// If there are some merging threads, blocks until they all finish their work and
|
||||||
/// then drop the `IndexWriter`.
|
/// then drop the `IndexWriter`.
|
||||||
pub fn wait_merging_threads(mut self) -> crate::Result<()> {
|
pub fn wait_merging_threads(mut self) -> crate::Result<()> {
|
||||||
|
info!(self.logger(), "wait-merge-threads"; "stage"=>"start");
|
||||||
|
|
||||||
// this will stop the indexing thread,
|
// this will stop the indexing thread,
|
||||||
// dropping the last reference to the segment_updater.
|
// dropping the last reference to the segment_updater.
|
||||||
self.drop_sender();
|
self.drop_sender();
|
||||||
@@ -372,9 +399,9 @@ impl IndexWriter {
|
|||||||
.map_err(|_| TantivyError::ErrorInThread("Failed to join merging thread.".into()));
|
.map_err(|_| TantivyError::ErrorInThread("Failed to join merging thread.".into()));
|
||||||
|
|
||||||
if let Err(ref e) = result {
|
if let Err(ref e) = result {
|
||||||
error!("Some merging thread failed {:?}", e);
|
error!(self.logger(), "some merge thread failed"; "cause"=>e.to_string());
|
||||||
}
|
}
|
||||||
|
info!(self.logger(), "wait-merge-threads"; "stage"=>"stop");
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -434,12 +461,16 @@ impl IndexWriter {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
let segment = index.new_segment();
|
let segment = index.new_segment();
|
||||||
|
let segment_id = segment.id();
|
||||||
index_documents(
|
index_documents(
|
||||||
mem_budget,
|
mem_budget,
|
||||||
segment,
|
segment,
|
||||||
&mut document_iterator,
|
&mut document_iterator,
|
||||||
&mut segment_updater,
|
&mut segment_updater,
|
||||||
delete_cursor.clone(),
|
delete_cursor.clone(),
|
||||||
|
&index
|
||||||
|
.logger()
|
||||||
|
.new(slog::o!("segment"=>segment_id.to_string())),
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
@@ -553,7 +584,10 @@ impl IndexWriter {
|
|||||||
///
|
///
|
||||||
/// The opstamp at the last commit is returned.
|
/// The opstamp at the last commit is returned.
|
||||||
pub fn rollback(&mut self) -> crate::Result<Opstamp> {
|
pub fn rollback(&mut self) -> crate::Result<Opstamp> {
|
||||||
info!("Rolling back to opstamp {}", self.committed_opstamp);
|
info!(
|
||||||
|
self.logger(),
|
||||||
|
"Rolling back to opstamp {}", self.committed_opstamp
|
||||||
|
);
|
||||||
// marks the segment updater as killed. From now on, all
|
// marks the segment updater as killed. From now on, all
|
||||||
// segment updates will be ignored.
|
// segment updates will be ignored.
|
||||||
self.segment_updater.kill();
|
self.segment_updater.kill();
|
||||||
@@ -610,6 +644,8 @@ impl IndexWriter {
|
|||||||
/// using this API.
|
/// using this API.
|
||||||
/// See [`PreparedCommit::set_payload()`](PreparedCommit.html)
|
/// See [`PreparedCommit::set_payload()`](PreparedCommit.html)
|
||||||
pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit> {
|
pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit> {
|
||||||
|
let logger = self.logger().clone();
|
||||||
|
|
||||||
// Here, because we join all of the worker threads,
|
// Here, because we join all of the worker threads,
|
||||||
// all of the segment update for this commit have been
|
// all of the segment update for this commit have been
|
||||||
// sent.
|
// sent.
|
||||||
@@ -620,7 +656,10 @@ impl IndexWriter {
|
|||||||
//
|
//
|
||||||
// This will move uncommitted segments to the state of
|
// This will move uncommitted segments to the state of
|
||||||
// committed segments.
|
// committed segments.
|
||||||
info!("Preparing commit");
|
|
||||||
|
let commit_opstamp = self.stamper.stamp();
|
||||||
|
|
||||||
|
info!(logger, "prepare-commit"; "opstamp" => commit_opstamp);
|
||||||
|
|
||||||
// this will drop the current document channel
|
// this will drop the current document channel
|
||||||
// and recreate a new one.
|
// and recreate a new one.
|
||||||
@@ -636,9 +675,8 @@ impl IndexWriter {
|
|||||||
self.add_indexing_worker()?;
|
self.add_indexing_worker()?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let commit_opstamp = self.stamper.stamp();
|
|
||||||
let prepared_commit = PreparedCommit::new(self, commit_opstamp);
|
let prepared_commit = PreparedCommit::new(self, commit_opstamp);
|
||||||
info!("Prepared commit {}", commit_opstamp);
|
info!(logger, "Prepared commit {}", commit_opstamp);
|
||||||
Ok(prepared_commit)
|
Ok(prepared_commit)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -800,7 +838,7 @@ mod tests {
|
|||||||
let mut schema_builder = schema::Schema::builder();
|
let mut schema_builder = schema::Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", schema::TEXT);
|
let text_field = schema_builder.add_text_field("text", schema::TEXT);
|
||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
let index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let index_writer = index.writer_for_tests().unwrap();
|
||||||
let operations = vec![
|
let operations = vec![
|
||||||
UserOperation::Add(doc!(text_field=>"a")),
|
UserOperation::Add(doc!(text_field=>"a")),
|
||||||
UserOperation::Add(doc!(text_field=>"b")),
|
UserOperation::Add(doc!(text_field=>"b")),
|
||||||
@@ -815,7 +853,7 @@ mod tests {
|
|||||||
let text_field = schema_builder.add_text_field("text", schema::TEXT);
|
let text_field = schema_builder.add_text_field("text", schema::TEXT);
|
||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
|
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(text_field => "hello1"));
|
index_writer.add_document(doc!(text_field => "hello1"));
|
||||||
index_writer.add_document(doc!(text_field => "hello2"));
|
index_writer.add_document(doc!(text_field => "hello2"));
|
||||||
assert!(index_writer.commit().is_ok());
|
assert!(index_writer.commit().is_ok());
|
||||||
@@ -864,7 +902,7 @@ mod tests {
|
|||||||
.reload_policy(ReloadPolicy::Manual)
|
.reload_policy(ReloadPolicy::Manual)
|
||||||
.try_into()
|
.try_into()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
let a_term = Term::from_field_text(text_field, "a");
|
let a_term = Term::from_field_text(text_field, "a");
|
||||||
let b_term = Term::from_field_text(text_field, "b");
|
let b_term = Term::from_field_text(text_field, "b");
|
||||||
let operations = vec![
|
let operations = vec![
|
||||||
@@ -926,8 +964,8 @@ mod tests {
|
|||||||
fn test_lockfile_already_exists_error_msg() {
|
fn test_lockfile_already_exists_error_msg() {
|
||||||
let schema_builder = schema::Schema::builder();
|
let schema_builder = schema::Schema::builder();
|
||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
let _index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let _index_writer = index.writer_for_tests().unwrap();
|
||||||
match index.writer_with_num_threads(1, 3_000_000) {
|
match index.writer_for_tests() {
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
let err_msg = err.to_string();
|
let err_msg = err.to_string();
|
||||||
assert!(err_msg.contains("already an `IndexWriter`"));
|
assert!(err_msg.contains("already an `IndexWriter`"));
|
||||||
@@ -1261,7 +1299,7 @@ mod tests {
|
|||||||
let idfield = schema_builder.add_text_field("id", STRING);
|
let idfield = schema_builder.add_text_field("id", STRING);
|
||||||
schema_builder.add_text_field("optfield", STRING);
|
schema_builder.add_text_field("optfield", STRING);
|
||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(idfield=>"myid"));
|
index_writer.add_document(doc!(idfield=>"myid"));
|
||||||
let commit = index_writer.commit();
|
let commit = index_writer.commit();
|
||||||
assert!(commit.is_ok());
|
assert!(commit.is_ok());
|
||||||
|
|||||||
@@ -25,14 +25,14 @@ use std::cmp;
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 {
|
fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::Result<u64> {
|
||||||
let mut total_tokens = 0u64;
|
let mut total_tokens = 0u64;
|
||||||
let mut count: [usize; 256] = [0; 256];
|
let mut count: [usize; 256] = [0; 256];
|
||||||
for reader in readers {
|
for reader in readers {
|
||||||
if reader.has_deletes() {
|
if reader.has_deletes() {
|
||||||
// if there are deletes, then we use an approximation
|
// if there are deletes, then we use an approximation
|
||||||
// using the fieldnorm
|
// using the fieldnorm
|
||||||
let fieldnorms_reader = reader.get_fieldnorms_reader(field);
|
let fieldnorms_reader = reader.get_fieldnorms_reader(field)?;
|
||||||
for doc in reader.doc_ids_alive() {
|
for doc in reader.doc_ids_alive() {
|
||||||
let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc);
|
let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc);
|
||||||
count[fieldnorm_id as usize] += 1;
|
count[fieldnorm_id as usize] += 1;
|
||||||
@@ -41,7 +41,7 @@ fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 {
|
|||||||
total_tokens += reader.inverted_index(field).total_num_tokens();
|
total_tokens += reader.inverted_index(field).total_num_tokens();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
total_tokens
|
Ok(total_tokens
|
||||||
+ count
|
+ count
|
||||||
.iter()
|
.iter()
|
||||||
.cloned()
|
.cloned()
|
||||||
@@ -49,7 +49,7 @@ fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 {
|
|||||||
.map(|(fieldnorm_ord, count)| {
|
.map(|(fieldnorm_ord, count)| {
|
||||||
count as u64 * u64::from(FieldNormReader::id_to_fieldnorm(fieldnorm_ord as u8))
|
count as u64 * u64::from(FieldNormReader::id_to_fieldnorm(fieldnorm_ord as u8))
|
||||||
})
|
})
|
||||||
.sum::<u64>()
|
.sum::<u64>())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct IndexMerger {
|
pub struct IndexMerger {
|
||||||
@@ -175,7 +175,7 @@ impl IndexMerger {
|
|||||||
for field in fields {
|
for field in fields {
|
||||||
fieldnorms_data.clear();
|
fieldnorms_data.clear();
|
||||||
for reader in &self.readers {
|
for reader in &self.readers {
|
||||||
let fieldnorms_reader = reader.get_fieldnorms_reader(field);
|
let fieldnorms_reader = reader.get_fieldnorms_reader(field)?;
|
||||||
for doc_id in reader.doc_ids_alive() {
|
for doc_id in reader.doc_ids_alive() {
|
||||||
let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc_id);
|
let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc_id);
|
||||||
fieldnorms_data.push(fieldnorm_id);
|
fieldnorms_data.push(fieldnorm_id);
|
||||||
@@ -541,7 +541,7 @@ impl IndexMerger {
|
|||||||
// The total number of tokens will only be exact when there has been no deletes.
|
// The total number of tokens will only be exact when there has been no deletes.
|
||||||
//
|
//
|
||||||
// Otherwise, we approximate by removing deleted documents proportionally.
|
// Otherwise, we approximate by removing deleted documents proportionally.
|
||||||
let total_num_tokens: u64 = compute_total_num_tokens(&self.readers, indexed_field);
|
let total_num_tokens: u64 = compute_total_num_tokens(&self.readers, indexed_field)?;
|
||||||
|
|
||||||
// Create the total list of doc ids
|
// Create the total list of doc ids
|
||||||
// by stacking the doc ids from the different segment.
|
// by stacking the doc ids from the different segment.
|
||||||
@@ -751,7 +751,7 @@ mod tests {
|
|||||||
};
|
};
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
{
|
{
|
||||||
// writing the segment
|
// writing the segment
|
||||||
{
|
{
|
||||||
@@ -803,7 +803,7 @@ mod tests {
|
|||||||
let segment_ids = index
|
let segment_ids = index
|
||||||
.searchable_segment_ids()
|
.searchable_segment_ids()
|
||||||
.expect("Searchable segments failed.");
|
.expect("Searchable segments failed.");
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
block_on(index_writer.merge(&segment_ids)).expect("Merging failed");
|
block_on(index_writer.merge(&segment_ids)).expect("Merging failed");
|
||||||
index_writer.wait_merging_threads().unwrap();
|
index_writer.wait_merging_threads().unwrap();
|
||||||
}
|
}
|
||||||
@@ -904,7 +904,7 @@ mod tests {
|
|||||||
let score_field = schema_builder.add_u64_field("score", score_fieldtype);
|
let score_field = schema_builder.add_u64_field("score", score_fieldtype);
|
||||||
let bytes_score_field = schema_builder.add_bytes_field("score_bytes");
|
let bytes_score_field = schema_builder.add_bytes_field("score_bytes");
|
||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader().unwrap();
|
||||||
let search_term = |searcher: &Searcher, term: Term| {
|
let search_term = |searcher: &Searcher, term: Term| {
|
||||||
let collector = FastFieldTestCollector::for_field(score_field);
|
let collector = FastFieldTestCollector::for_field(score_field);
|
||||||
@@ -1211,7 +1211,7 @@ mod tests {
|
|||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader().unwrap();
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
let index_doc = |index_writer: &mut IndexWriter, doc_facets: &[&str]| {
|
let index_doc = |index_writer: &mut IndexWriter, doc_facets: &[&str]| {
|
||||||
let mut doc = Document::default();
|
let mut doc = Document::default();
|
||||||
for facet in doc_facets {
|
for facet in doc_facets {
|
||||||
@@ -1276,7 +1276,7 @@ mod tests {
|
|||||||
let segment_ids = index
|
let segment_ids = index
|
||||||
.searchable_segment_ids()
|
.searchable_segment_ids()
|
||||||
.expect("Searchable segments failed.");
|
.expect("Searchable segments failed.");
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
block_on(index_writer.merge(&segment_ids)).expect("Merging failed");
|
block_on(index_writer.merge(&segment_ids)).expect("Merging failed");
|
||||||
index_writer.wait_merging_threads().unwrap();
|
index_writer.wait_merging_threads().unwrap();
|
||||||
reader.reload().unwrap();
|
reader.reload().unwrap();
|
||||||
@@ -1295,7 +1295,7 @@ mod tests {
|
|||||||
|
|
||||||
// Deleting one term
|
// Deleting one term
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
let facet = Facet::from_path(vec!["top", "a", "firstdoc"]);
|
let facet = Facet::from_path(vec!["top", "a", "firstdoc"]);
|
||||||
let facet_term = Term::from_facet(facet_field, &facet);
|
let facet_term = Term::from_facet(facet_field, &facet);
|
||||||
index_writer.delete_term(facet_term);
|
index_writer.delete_term(facet_term);
|
||||||
@@ -1320,7 +1320,7 @@ mod tests {
|
|||||||
let mut schema_builder = schema::Schema::builder();
|
let mut schema_builder = schema::Schema::builder();
|
||||||
let int_field = schema_builder.add_u64_field("intvals", INDEXED);
|
let int_field = schema_builder.add_u64_field("intvals", INDEXED);
|
||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(int_field => 1u64));
|
index_writer.add_document(doc!(int_field => 1u64));
|
||||||
index_writer.commit().expect("commit failed");
|
index_writer.commit().expect("commit failed");
|
||||||
index_writer.add_document(doc!(int_field => 1u64));
|
index_writer.add_document(doc!(int_field => 1u64));
|
||||||
@@ -1349,7 +1349,7 @@ mod tests {
|
|||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader().unwrap();
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
let mut doc = Document::default();
|
let mut doc = Document::default();
|
||||||
doc.add_u64(int_field, 1);
|
doc.add_u64(int_field, 1);
|
||||||
index_writer.add_document(doc.clone());
|
index_writer.add_document(doc.clone());
|
||||||
@@ -1388,7 +1388,7 @@ mod tests {
|
|||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
let index_doc = |index_writer: &mut IndexWriter, int_vals: &[u64]| {
|
let index_doc = |index_writer: &mut IndexWriter, int_vals: &[u64]| {
|
||||||
let mut doc = Document::default();
|
let mut doc = Document::default();
|
||||||
for &val in int_vals {
|
for &val in int_vals {
|
||||||
@@ -1462,7 +1462,7 @@ mod tests {
|
|||||||
let segment_ids = index
|
let segment_ids = index
|
||||||
.searchable_segment_ids()
|
.searchable_segment_ids()
|
||||||
.expect("Searchable segments failed.");
|
.expect("Searchable segments failed.");
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
assert!(block_on(index_writer.merge(&segment_ids)).is_ok());
|
assert!(block_on(index_writer.merge(&segment_ids)).is_ok());
|
||||||
assert!(index_writer.wait_merging_threads().is_ok());
|
assert!(index_writer.wait_merging_threads().is_ok());
|
||||||
}
|
}
|
||||||
@@ -1516,7 +1516,7 @@ mod tests {
|
|||||||
|
|
||||||
let index = Index::create_in_ram(builder.build());
|
let index = Index::create_in_ram(builder.build());
|
||||||
|
|
||||||
let mut writer = index.writer_with_num_threads(1, 3_000_000)?;
|
let mut writer = index.writer_for_tests()?;
|
||||||
|
|
||||||
// Make sure we'll attempt to merge every created segment
|
// Make sure we'll attempt to merge every created segment
|
||||||
let mut policy = crate::indexer::LogMergePolicy::default();
|
let mut policy = crate::indexer::LogMergePolicy::default();
|
||||||
@@ -1548,7 +1548,7 @@ mod tests {
|
|||||||
let mut builder = schema::SchemaBuilder::new();
|
let mut builder = schema::SchemaBuilder::new();
|
||||||
let text = builder.add_text_field("text", TEXT);
|
let text = builder.add_text_field("text", TEXT);
|
||||||
let index = Index::create_in_ram(builder.build());
|
let index = Index::create_in_ram(builder.build());
|
||||||
let mut writer = index.writer_with_num_threads(1, 3_000_000)?;
|
let mut writer = index.writer_for_tests()?;
|
||||||
let happy_term = Term::from_field_text(text, "happy");
|
let happy_term = Term::from_field_text(text, "happy");
|
||||||
let term_query = TermQuery::new(happy_term, IndexRecordOption::WithFreqs);
|
let term_query = TermQuery::new(happy_term, IndexRecordOption::WithFreqs);
|
||||||
for _ in 0..62 {
|
for _ in 0..62 {
|
||||||
|
|||||||
@@ -29,8 +29,9 @@ pub use self::segment_writer::SegmentWriter;
|
|||||||
/// Alias for the default merge policy, which is the `LogMergePolicy`.
|
/// Alias for the default merge policy, which is the `LogMergePolicy`.
|
||||||
pub type DefaultMergePolicy = LogMergePolicy;
|
pub type DefaultMergePolicy = LogMergePolicy;
|
||||||
|
|
||||||
|
#[cfg(feature = "mmap")]
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests_mmap {
|
||||||
use crate::schema::{self, Schema};
|
use crate::schema::{self, Schema};
|
||||||
use crate::{Index, Term};
|
use crate::{Index, Term};
|
||||||
|
|
||||||
@@ -39,7 +40,7 @@ mod tests {
|
|||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", schema::TEXT);
|
let text_field = schema_builder.add_text_field("text", schema::TEXT);
|
||||||
let index = Index::create_from_tempdir(schema_builder.build()).unwrap();
|
let index = Index::create_from_tempdir(schema_builder.build()).unwrap();
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
// there must be one deleted document in the segment
|
// there must be one deleted document in the segment
|
||||||
index_writer.add_document(doc!(text_field=>"b"));
|
index_writer.add_document(doc!(text_field=>"b"));
|
||||||
index_writer.delete_term(Term::from_field_text(text_field, "b"));
|
index_writer.delete_term(Term::from_field_text(text_field, "b"));
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use super::IndexWriter;
|
use super::IndexWriter;
|
||||||
use crate::Opstamp;
|
use crate::Opstamp;
|
||||||
use futures::executor::block_on;
|
use futures::executor::block_on;
|
||||||
|
use slog::info;
|
||||||
|
|
||||||
/// A prepared commit
|
/// A prepared commit
|
||||||
pub struct PreparedCommit<'a> {
|
pub struct PreparedCommit<'a> {
|
||||||
@@ -31,7 +32,7 @@ impl<'a> PreparedCommit<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn commit(self) -> crate::Result<Opstamp> {
|
pub fn commit(self) -> crate::Result<Opstamp> {
|
||||||
info!("committing {}", self.opstamp);
|
info!(self.index_writer.logger(), "committing {}", self.opstamp);
|
||||||
let _ = block_on(
|
let _ = block_on(
|
||||||
self.index_writer
|
self.index_writer
|
||||||
.segment_updater()
|
.segment_updater()
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
use slog::{warn, Logger};
|
||||||
|
|
||||||
use super::segment_register::SegmentRegister;
|
use super::segment_register::SegmentRegister;
|
||||||
use crate::core::SegmentId;
|
use crate::core::SegmentId;
|
||||||
use crate::core::SegmentMeta;
|
use crate::core::SegmentMeta;
|
||||||
@@ -42,9 +44,9 @@ impl SegmentRegisters {
|
|||||||
///
|
///
|
||||||
/// It guarantees the atomicity of the
|
/// It guarantees the atomicity of the
|
||||||
/// changes (merges especially)
|
/// changes (merges especially)
|
||||||
#[derive(Default)]
|
|
||||||
pub struct SegmentManager {
|
pub struct SegmentManager {
|
||||||
registers: RwLock<SegmentRegisters>,
|
registers: RwLock<SegmentRegisters>,
|
||||||
|
logger: Logger,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Debug for SegmentManager {
|
impl Debug for SegmentManager {
|
||||||
@@ -77,12 +79,14 @@ impl SegmentManager {
|
|||||||
pub fn from_segments(
|
pub fn from_segments(
|
||||||
segment_metas: Vec<SegmentMeta>,
|
segment_metas: Vec<SegmentMeta>,
|
||||||
delete_cursor: &DeleteCursor,
|
delete_cursor: &DeleteCursor,
|
||||||
|
logger: Logger,
|
||||||
) -> SegmentManager {
|
) -> SegmentManager {
|
||||||
SegmentManager {
|
SegmentManager {
|
||||||
registers: RwLock::new(SegmentRegisters {
|
registers: RwLock::new(SegmentRegisters {
|
||||||
uncommitted: SegmentRegister::default(),
|
uncommitted: SegmentRegister::default(),
|
||||||
committed: SegmentRegister::new(segment_metas, delete_cursor),
|
committed: SegmentRegister::new(segment_metas, delete_cursor),
|
||||||
}),
|
}),
|
||||||
|
logger,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -186,7 +190,7 @@ impl SegmentManager {
|
|||||||
let segments_status = registers_lock
|
let segments_status = registers_lock
|
||||||
.segments_status(before_merge_segment_ids)
|
.segments_status(before_merge_segment_ids)
|
||||||
.ok_or_else(|| {
|
.ok_or_else(|| {
|
||||||
warn!("couldn't find segment in SegmentManager");
|
warn!(self.logger, "couldn't find segment in SegmentManager");
|
||||||
crate::TantivyError::InvalidArgument(
|
crate::TantivyError::InvalidArgument(
|
||||||
"The segments that were merged could not be found in the SegmentManager. \
|
"The segments that were merged could not be found in the SegmentManager. \
|
||||||
This is not necessarily a bug, and can happen after a rollback for instance."
|
This is not necessarily a bug, and can happen after a rollback for instance."
|
||||||
|
|||||||
@@ -23,9 +23,9 @@ use futures::channel::oneshot;
|
|||||||
use futures::executor::{ThreadPool, ThreadPoolBuilder};
|
use futures::executor::{ThreadPool, ThreadPoolBuilder};
|
||||||
use futures::future::Future;
|
use futures::future::Future;
|
||||||
use futures::future::TryFutureExt;
|
use futures::future::TryFutureExt;
|
||||||
|
use slog::{debug, error, info, warn};
|
||||||
use std::borrow::BorrowMut;
|
use std::borrow::BorrowMut;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::io::Write;
|
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
@@ -65,12 +65,11 @@ pub fn save_new_metas(schema: Schema, directory: &mut dyn Directory) -> crate::R
|
|||||||
///
|
///
|
||||||
/// This method is not part of tantivy's public API
|
/// This method is not part of tantivy's public API
|
||||||
fn save_metas(metas: &IndexMeta, directory: &mut dyn Directory) -> crate::Result<()> {
|
fn save_metas(metas: &IndexMeta, directory: &mut dyn Directory) -> crate::Result<()> {
|
||||||
info!("save metas");
|
let mut meta_json = serde_json::to_string_pretty(metas)?;
|
||||||
let mut buffer = serde_json::to_vec_pretty(metas)?;
|
|
||||||
// Just adding a new line at the end of the buffer.
|
// Just adding a new line at the end of the buffer.
|
||||||
writeln!(&mut buffer)?;
|
meta_json.push_str("\n");
|
||||||
directory.atomic_write(&META_FILEPATH, &buffer[..])?;
|
debug!(directory.logger(), "save meta"; "content"=>&meta_json);
|
||||||
debug!("Saved metas {:?}", serde_json::to_string_pretty(&metas));
|
directory.atomic_write(&META_FILEPATH, meta_json.as_bytes())?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -97,7 +96,6 @@ impl Deref for SegmentUpdater {
|
|||||||
async fn garbage_collect_files(
|
async fn garbage_collect_files(
|
||||||
segment_updater: SegmentUpdater,
|
segment_updater: SegmentUpdater,
|
||||||
) -> crate::Result<GarbageCollectionResult> {
|
) -> crate::Result<GarbageCollectionResult> {
|
||||||
info!("Running garbage collection");
|
|
||||||
let mut index = segment_updater.index.clone();
|
let mut index = segment_updater.index.clone();
|
||||||
index
|
index
|
||||||
.directory_mut()
|
.directory_mut()
|
||||||
@@ -107,14 +105,12 @@ async fn garbage_collect_files(
|
|||||||
/// Merges a list of segments the list of segment givens in the `segment_entries`.
|
/// Merges a list of segments the list of segment givens in the `segment_entries`.
|
||||||
/// This function happens in the calling thread and is computationally expensive.
|
/// This function happens in the calling thread and is computationally expensive.
|
||||||
fn merge(
|
fn merge(
|
||||||
|
merged_segment: Segment,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
mut segment_entries: Vec<SegmentEntry>,
|
mut segment_entries: Vec<SegmentEntry>,
|
||||||
target_opstamp: Opstamp,
|
target_opstamp: Opstamp,
|
||||||
) -> crate::Result<SegmentEntry> {
|
) -> crate::Result<SegmentEntry> {
|
||||||
// first we need to apply deletes to our segment.
|
// First we apply all of the delete to the merged segment, up to the target opstamp.
|
||||||
let merged_segment = index.new_segment();
|
|
||||||
|
|
||||||
// First we apply all of the delet to the merged segment, up to the target opstamp.
|
|
||||||
for segment_entry in &mut segment_entries {
|
for segment_entry in &mut segment_entries {
|
||||||
let segment = index.segment(segment_entry.meta().clone());
|
let segment = index.segment(segment_entry.meta().clone());
|
||||||
advance_deletes(segment, segment_entry, target_opstamp)?;
|
advance_deletes(segment, segment_entry, target_opstamp)?;
|
||||||
@@ -167,7 +163,8 @@ impl SegmentUpdater {
|
|||||||
delete_cursor: &DeleteCursor,
|
delete_cursor: &DeleteCursor,
|
||||||
) -> crate::Result<SegmentUpdater> {
|
) -> crate::Result<SegmentUpdater> {
|
||||||
let segments = index.searchable_segment_metas()?;
|
let segments = index.searchable_segment_metas()?;
|
||||||
let segment_manager = SegmentManager::from_segments(segments, delete_cursor);
|
let segment_manager =
|
||||||
|
SegmentManager::from_segments(segments, delete_cursor, index.logger().clone());
|
||||||
let pool = ThreadPoolBuilder::new()
|
let pool = ThreadPoolBuilder::new()
|
||||||
.name_prefix("segment_updater")
|
.name_prefix("segment_updater")
|
||||||
.pool_size(1)
|
.pool_size(1)
|
||||||
@@ -387,7 +384,18 @@ impl SegmentUpdater {
|
|||||||
.segment_manager
|
.segment_manager
|
||||||
.start_merge(merge_operation.segment_ids())?;
|
.start_merge(merge_operation.segment_ids())?;
|
||||||
|
|
||||||
info!("Starting merge - {:?}", merge_operation.segment_ids());
|
let segment_ids_str: String = merge_operation
|
||||||
|
.segment_ids()
|
||||||
|
.iter()
|
||||||
|
.map(|segment_id| segment_id.to_string())
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(",");
|
||||||
|
|
||||||
|
let merged_segment = self.index.new_segment();
|
||||||
|
let logger = self.index.logger().new(slog::o!("segments"=>segment_ids_str, "merged-segment"=>merged_segment.id().to_string()));
|
||||||
|
|
||||||
|
let num_merges: usize = self.merge_operations.list().len();
|
||||||
|
slog::info!(&logger, "merge"; "stage"=>"start", "num-merges" => num_merges);
|
||||||
|
|
||||||
let (merging_future_send, merging_future_recv) =
|
let (merging_future_send, merging_future_recv) =
|
||||||
oneshot::channel::<crate::Result<SegmentMeta>>();
|
oneshot::channel::<crate::Result<SegmentMeta>>();
|
||||||
@@ -398,22 +406,20 @@ impl SegmentUpdater {
|
|||||||
// as well as which segment is currently in merge and therefore should not be
|
// as well as which segment is currently in merge and therefore should not be
|
||||||
// candidate for another merge.
|
// candidate for another merge.
|
||||||
match merge(
|
match merge(
|
||||||
|
merged_segment,
|
||||||
&segment_updater.index,
|
&segment_updater.index,
|
||||||
segment_entries,
|
segment_entries,
|
||||||
merge_operation.target_opstamp(),
|
merge_operation.target_opstamp(),
|
||||||
) {
|
) {
|
||||||
Ok(after_merge_segment_entry) => {
|
Ok(after_merge_segment_entry) => {
|
||||||
|
info!(&logger, "merge"; "stage" => "end");
|
||||||
let segment_meta = segment_updater
|
let segment_meta = segment_updater
|
||||||
.end_merge(merge_operation, after_merge_segment_entry)
|
.end_merge(merge_operation, after_merge_segment_entry)
|
||||||
.await;
|
.await;
|
||||||
let _send_result = merging_future_send.send(segment_meta);
|
let _send_result = merging_future_send.send(segment_meta);
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!(
|
error!(&logger, "merge"; "stage" => "fail", "cause"=>e.to_string());
|
||||||
"Merge of {:?} was cancelled: {:?}",
|
|
||||||
merge_operation.segment_ids().to_vec(),
|
|
||||||
e
|
|
||||||
);
|
|
||||||
// ... cancel merge
|
// ... cancel merge
|
||||||
if cfg!(test) {
|
if cfg!(test) {
|
||||||
panic!("Merge failed.");
|
panic!("Merge failed.");
|
||||||
@@ -454,11 +460,12 @@ impl SegmentUpdater {
|
|||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
merge_candidates.extend(committed_merge_candidates.into_iter());
|
merge_candidates.extend(committed_merge_candidates.into_iter());
|
||||||
|
|
||||||
|
let logger = self.index.logger();
|
||||||
for merge_operation in merge_candidates {
|
for merge_operation in merge_candidates {
|
||||||
if let Err(err) = self.start_merge(merge_operation) {
|
if let Err(err) = self.start_merge(merge_operation) {
|
||||||
warn!(
|
warn!(
|
||||||
"Starting the merge failed for the following reason. This is not fatal. {}",
|
logger,
|
||||||
err
|
"merge-start-fail (not fatal, not necessarily a problem)"; "reason" => format!("{}", err),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -471,8 +478,11 @@ impl SegmentUpdater {
|
|||||||
) -> impl Future<Output = crate::Result<SegmentMeta>> {
|
) -> impl Future<Output = crate::Result<SegmentMeta>> {
|
||||||
let segment_updater = self.clone();
|
let segment_updater = self.clone();
|
||||||
let after_merge_segment_meta = after_merge_segment_entry.meta().clone();
|
let after_merge_segment_meta = after_merge_segment_entry.meta().clone();
|
||||||
|
let logger = self.index.logger().new(
|
||||||
|
slog::o!("segment"=>after_merge_segment_meta.id().to_string(),
|
||||||
|
"delete-opstamp"=>after_merge_segment_meta.delete_opstamp()),
|
||||||
|
);
|
||||||
let end_merge_future = self.schedule_future(async move {
|
let end_merge_future = self.schedule_future(async move {
|
||||||
info!("End merge {:?}", after_merge_segment_entry.meta());
|
|
||||||
{
|
{
|
||||||
let mut delete_cursor = after_merge_segment_entry.delete_cursor().clone();
|
let mut delete_cursor = after_merge_segment_entry.delete_cursor().clone();
|
||||||
if let Some(delete_operation) = delete_cursor.get() {
|
if let Some(delete_operation) = delete_cursor.get() {
|
||||||
@@ -486,6 +496,7 @@ impl SegmentUpdater {
|
|||||||
committed_opstamp,
|
committed_opstamp,
|
||||||
) {
|
) {
|
||||||
error!(
|
error!(
|
||||||
|
logger,
|
||||||
"Merge of {:?} was cancelled (advancing deletes failed): {:?}",
|
"Merge of {:?} was cancelled (advancing deletes failed): {:?}",
|
||||||
merge_operation.segment_ids(),
|
merge_operation.segment_ids(),
|
||||||
e
|
e
|
||||||
@@ -555,7 +566,7 @@ mod tests {
|
|||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
|
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.set_merge_policy(Box::new(MergeWheneverPossible));
|
index_writer.set_merge_policy(Box::new(MergeWheneverPossible));
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -608,7 +619,7 @@ mod tests {
|
|||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
|
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
|
|
||||||
{
|
{
|
||||||
for _ in 0..100 {
|
for _ in 0..100 {
|
||||||
@@ -679,7 +690,7 @@ mod tests {
|
|||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
|
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
|
|
||||||
{
|
{
|
||||||
for _ in 0..100 {
|
for _ in 0..100 {
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
use super::operation::AddOperation;
|
use super::operation::AddOperation;
|
||||||
use crate::core::Segment;
|
|
||||||
use crate::core::SerializableSegment;
|
use crate::core::SerializableSegment;
|
||||||
use crate::fastfield::FastFieldsWriter;
|
use crate::fastfield::FastFieldsWriter;
|
||||||
use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
|
use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
|
||||||
@@ -15,9 +14,9 @@ use crate::tokenizer::{BoxTokenStream, PreTokenizedStream};
|
|||||||
use crate::tokenizer::{FacetTokenizer, TextAnalyzer};
|
use crate::tokenizer::{FacetTokenizer, TextAnalyzer};
|
||||||
use crate::tokenizer::{TokenStreamChain, Tokenizer};
|
use crate::tokenizer::{TokenStreamChain, Tokenizer};
|
||||||
use crate::Opstamp;
|
use crate::Opstamp;
|
||||||
|
use crate::{core::Segment, tokenizer::MAX_TOKEN_LEN};
|
||||||
use crate::{DocId, SegmentComponent};
|
use crate::{DocId, SegmentComponent};
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::str;
|
|
||||||
|
|
||||||
/// Computes the initial size of the hash table.
|
/// Computes the initial size of the hash table.
|
||||||
///
|
///
|
||||||
@@ -48,6 +47,7 @@ pub struct SegmentWriter {
|
|||||||
fieldnorms_writer: FieldNormsWriter,
|
fieldnorms_writer: FieldNormsWriter,
|
||||||
doc_opstamps: Vec<Opstamp>,
|
doc_opstamps: Vec<Opstamp>,
|
||||||
tokenizers: Vec<Option<TextAnalyzer>>,
|
tokenizers: Vec<Option<TextAnalyzer>>,
|
||||||
|
term_buffer: Term,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SegmentWriter {
|
impl SegmentWriter {
|
||||||
@@ -91,6 +91,7 @@ impl SegmentWriter {
|
|||||||
fast_field_writers: FastFieldsWriter::from_schema(schema),
|
fast_field_writers: FastFieldsWriter::from_schema(schema),
|
||||||
doc_opstamps: Vec::with_capacity(1_000),
|
doc_opstamps: Vec::with_capacity(1_000),
|
||||||
tokenizers,
|
tokenizers,
|
||||||
|
term_buffer: Term::new(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -128,24 +129,29 @@ impl SegmentWriter {
|
|||||||
if !field_options.is_indexed() {
|
if !field_options.is_indexed() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
let (term_buffer, multifield_postings) =
|
||||||
|
(&mut self.term_buffer, &mut self.multifield_postings);
|
||||||
match *field_options.field_type() {
|
match *field_options.field_type() {
|
||||||
FieldType::HierarchicalFacet => {
|
FieldType::HierarchicalFacet => {
|
||||||
let facets: Vec<&str> = field_values
|
term_buffer.set_field(field);
|
||||||
.iter()
|
let facets =
|
||||||
.flat_map(|field_value| match *field_value.value() {
|
field_values
|
||||||
Value::Facet(ref facet) => Some(facet.encoded_str()),
|
.iter()
|
||||||
_ => {
|
.flat_map(|field_value| match *field_value.value() {
|
||||||
panic!("Expected hierarchical facet");
|
Value::Facet(ref facet) => Some(facet.encoded_str()),
|
||||||
}
|
_ => {
|
||||||
})
|
panic!("Expected hierarchical facet");
|
||||||
.collect();
|
}
|
||||||
let mut term = Term::for_field(field); // we set the Term
|
});
|
||||||
for fake_str in facets {
|
for fake_str in facets {
|
||||||
let mut unordered_term_id_opt = None;
|
let mut unordered_term_id_opt = None;
|
||||||
FacetTokenizer.token_stream(fake_str).process(&mut |token| {
|
FacetTokenizer.token_stream(fake_str).process(&mut |token| {
|
||||||
term.set_text(&token.text);
|
if token.text.len() > MAX_TOKEN_LEN {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
term_buffer.set_text(&token.text);
|
||||||
let unordered_term_id =
|
let unordered_term_id =
|
||||||
self.multifield_postings.subscribe(doc_id, &term);
|
multifield_postings.subscribe(doc_id, &term_buffer);
|
||||||
unordered_term_id_opt = Some(unordered_term_id);
|
unordered_term_id_opt = Some(unordered_term_id);
|
||||||
});
|
});
|
||||||
if let Some(unordered_term_id) = unordered_term_id_opt {
|
if let Some(unordered_term_id) = unordered_term_id_opt {
|
||||||
@@ -168,7 +174,6 @@ impl SegmentWriter {
|
|||||||
if let Some(last_token) = tok_str.tokens.last() {
|
if let Some(last_token) = tok_str.tokens.last() {
|
||||||
total_offset += last_token.offset_to;
|
total_offset += last_token.offset_to;
|
||||||
}
|
}
|
||||||
|
|
||||||
token_streams
|
token_streams
|
||||||
.push(PreTokenizedStream::from(tok_str.clone()).into());
|
.push(PreTokenizedStream::from(tok_str.clone()).into());
|
||||||
}
|
}
|
||||||
@@ -178,7 +183,6 @@ impl SegmentWriter {
|
|||||||
{
|
{
|
||||||
offsets.push(total_offset);
|
offsets.push(total_offset);
|
||||||
total_offset += text.len();
|
total_offset += text.len();
|
||||||
|
|
||||||
token_streams.push(tokenizer.token_stream(text));
|
token_streams.push(tokenizer.token_stream(text));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -190,8 +194,12 @@ impl SegmentWriter {
|
|||||||
0
|
0
|
||||||
} else {
|
} else {
|
||||||
let mut token_stream = TokenStreamChain::new(offsets, token_streams);
|
let mut token_stream = TokenStreamChain::new(offsets, token_streams);
|
||||||
self.multifield_postings
|
multifield_postings.index_text(
|
||||||
.index_text(doc_id, field, &mut token_stream)
|
doc_id,
|
||||||
|
field,
|
||||||
|
&mut token_stream,
|
||||||
|
term_buffer,
|
||||||
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
self.fieldnorms_writer.record(doc_id, field, num_tokens);
|
self.fieldnorms_writer.record(doc_id, field, num_tokens);
|
||||||
@@ -199,44 +207,36 @@ impl SegmentWriter {
|
|||||||
FieldType::U64(ref int_option) => {
|
FieldType::U64(ref int_option) => {
|
||||||
if int_option.is_indexed() {
|
if int_option.is_indexed() {
|
||||||
for field_value in field_values {
|
for field_value in field_values {
|
||||||
let term = Term::from_field_u64(
|
term_buffer.set_field(field_value.field());
|
||||||
field_value.field(),
|
term_buffer.set_u64(field_value.value().u64_value());
|
||||||
field_value.value().u64_value(),
|
multifield_postings.subscribe(doc_id, &term_buffer);
|
||||||
);
|
|
||||||
self.multifield_postings.subscribe(doc_id, &term);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
FieldType::Date(ref int_option) => {
|
FieldType::Date(ref int_option) => {
|
||||||
if int_option.is_indexed() {
|
if int_option.is_indexed() {
|
||||||
for field_value in field_values {
|
for field_value in field_values {
|
||||||
let term = Term::from_field_i64(
|
term_buffer.set_field(field_value.field());
|
||||||
field_value.field(),
|
term_buffer.set_i64(field_value.value().date_value().timestamp());
|
||||||
field_value.value().date_value().timestamp(),
|
multifield_postings.subscribe(doc_id, &term_buffer);
|
||||||
);
|
|
||||||
self.multifield_postings.subscribe(doc_id, &term);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
FieldType::I64(ref int_option) => {
|
FieldType::I64(ref int_option) => {
|
||||||
if int_option.is_indexed() {
|
if int_option.is_indexed() {
|
||||||
for field_value in field_values {
|
for field_value in field_values {
|
||||||
let term = Term::from_field_i64(
|
term_buffer.set_field(field_value.field());
|
||||||
field_value.field(),
|
term_buffer.set_i64(field_value.value().i64_value());
|
||||||
field_value.value().i64_value(),
|
multifield_postings.subscribe(doc_id, &term_buffer);
|
||||||
);
|
|
||||||
self.multifield_postings.subscribe(doc_id, &term);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
FieldType::F64(ref int_option) => {
|
FieldType::F64(ref int_option) => {
|
||||||
if int_option.is_indexed() {
|
if int_option.is_indexed() {
|
||||||
for field_value in field_values {
|
for field_value in field_values {
|
||||||
let term = Term::from_field_f64(
|
term_buffer.set_field(field_value.field());
|
||||||
field_value.field(),
|
term_buffer.set_f64(field_value.value().f64_value());
|
||||||
field_value.value().f64_value(),
|
multifield_postings.subscribe(doc_id, &term_buffer);
|
||||||
);
|
|
||||||
self.multifield_postings.subscribe(doc_id, &term);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
382
src/lib.rs
382
src/lib.rs
@@ -102,10 +102,7 @@
|
|||||||
extern crate serde_json;
|
extern crate serde_json;
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate log;
|
extern crate thiserror;
|
||||||
|
|
||||||
#[macro_use]
|
|
||||||
extern crate failure;
|
|
||||||
|
|
||||||
#[cfg(all(test, feature = "unstable"))]
|
#[cfg(all(test, feature = "unstable"))]
|
||||||
extern crate test;
|
extern crate test;
|
||||||
@@ -148,6 +145,7 @@ pub mod schema;
|
|||||||
pub mod space_usage;
|
pub mod space_usage;
|
||||||
pub mod store;
|
pub mod store;
|
||||||
pub mod termdict;
|
pub mod termdict;
|
||||||
|
pub use slog;
|
||||||
|
|
||||||
mod reader;
|
mod reader;
|
||||||
|
|
||||||
@@ -245,18 +243,10 @@ pub type DocId = u32;
|
|||||||
/// with opstamp `n+1`.
|
/// with opstamp `n+1`.
|
||||||
pub type Opstamp = u64;
|
pub type Opstamp = u64;
|
||||||
|
|
||||||
/// A Score that represents the relevance of the document to the query
|
|
||||||
///
|
|
||||||
/// This is modelled internally as a `f64`, because tantivy was compiled with the `scoref64`
|
|
||||||
/// feature. The larger the number, the more relevant the document is to the search query.
|
|
||||||
#[cfg(feature = "scoref64")]
|
|
||||||
pub type Score = f64;
|
|
||||||
|
|
||||||
/// A Score that represents the relevance of the document to the query
|
/// A Score that represents the relevance of the document to the query
|
||||||
///
|
///
|
||||||
/// This is modelled internally as a `f32`. The larger the number, the more relevant
|
/// This is modelled internally as a `f32`. The larger the number, the more relevant
|
||||||
/// the document to the search query.
|
/// the document to the search query.
|
||||||
#[cfg(not(feature = "scoref64"))]
|
|
||||||
pub type Score = f32;
|
pub type Score = f32;
|
||||||
|
|
||||||
/// A `SegmentLocalId` identifies a segment.
|
/// A `SegmentLocalId` identifies a segment.
|
||||||
@@ -296,7 +286,6 @@ mod tests {
|
|||||||
use crate::schema::*;
|
use crate::schema::*;
|
||||||
use crate::DocAddress;
|
use crate::DocAddress;
|
||||||
use crate::Index;
|
use crate::Index;
|
||||||
use crate::IndexWriter;
|
|
||||||
use crate::Postings;
|
use crate::Postings;
|
||||||
use crate::ReloadPolicy;
|
use crate::ReloadPolicy;
|
||||||
use rand::distributions::Bernoulli;
|
use rand::distributions::Bernoulli;
|
||||||
@@ -361,14 +350,14 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(feature = "mmap")]
|
#[cfg(feature = "mmap")]
|
||||||
fn test_indexing() {
|
fn test_indexing() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_from_tempdir(schema).unwrap();
|
let index = Index::create_from_tempdir(schema).unwrap();
|
||||||
{
|
{
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
{
|
{
|
||||||
let doc = doc!(text_field=>"af b");
|
let doc = doc!(text_field=>"af b");
|
||||||
index_writer.add_document(doc);
|
index_writer.add_document(doc);
|
||||||
@@ -383,29 +372,30 @@ mod tests {
|
|||||||
}
|
}
|
||||||
assert!(index_writer.commit().is_ok());
|
assert!(index_writer.commit().is_ok());
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_docfreq1() {
|
fn test_docfreq1() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
{
|
{
|
||||||
index_writer.add_document(doc!(text_field=>"a b c"));
|
index_writer.add_document(doc!(text_field=>"a b c"));
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit()?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
index_writer.add_document(doc!(text_field=>"a"));
|
index_writer.add_document(doc!(text_field=>"a"));
|
||||||
index_writer.add_document(doc!(text_field=>"a a"));
|
index_writer.add_document(doc!(text_field=>"a a"));
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit()?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
index_writer.add_document(doc!(text_field=>"c"));
|
index_writer.add_document(doc!(text_field=>"c"));
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit()?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader()?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let term_a = Term::from_field_text(text_field, "a");
|
let term_a = Term::from_field_text(text_field, "a");
|
||||||
assert_eq!(searcher.doc_freq(&term_a), 3);
|
assert_eq!(searcher.doc_freq(&term_a), 3);
|
||||||
@@ -416,67 +406,50 @@ mod tests {
|
|||||||
let term_d = Term::from_field_text(text_field, "d");
|
let term_d = Term::from_field_text(text_field, "d");
|
||||||
assert_eq!(searcher.doc_freq(&term_d), 0);
|
assert_eq!(searcher.doc_freq(&term_d), 0);
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_fieldnorm_no_docs_with_field() {
|
fn test_fieldnorm_no_docs_with_field() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let title_field = schema_builder.add_text_field("title", TEXT);
|
let title_field = schema_builder.add_text_field("title", TEXT);
|
||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
|
index_writer.add_document(doc!(text_field=>"a b c"));
|
||||||
|
index_writer.commit()?;
|
||||||
|
let index_reader = index.reader()?;
|
||||||
|
let searcher = index_reader.searcher();
|
||||||
|
let reader = searcher.segment_reader(0);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let fieldnorm_reader = reader.get_fieldnorms_reader(text_field)?;
|
||||||
{
|
assert_eq!(fieldnorm_reader.fieldnorm(0), 3);
|
||||||
let doc = doc!(text_field=>"a b c");
|
|
||||||
index_writer.add_document(doc);
|
|
||||||
}
|
|
||||||
index_writer.commit().unwrap();
|
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
let index_reader = index.reader().unwrap();
|
let fieldnorm_reader = reader.get_fieldnorms_reader(title_field)?;
|
||||||
let searcher = index_reader.searcher();
|
assert_eq!(fieldnorm_reader.fieldnorm_id(0), 0);
|
||||||
let reader = searcher.segment_reader(0);
|
|
||||||
{
|
|
||||||
let fieldnorm_reader = reader.get_fieldnorms_reader(text_field);
|
|
||||||
assert_eq!(fieldnorm_reader.fieldnorm(0), 3);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
let fieldnorm_reader = reader.get_fieldnorms_reader(title_field);
|
|
||||||
assert_eq!(fieldnorm_reader.fieldnorm_id(0), 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_fieldnorm() {
|
fn test_fieldnorm() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
{
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
index_writer.add_document(doc!(text_field=>"a b c"));
|
||||||
{
|
index_writer.add_document(doc!());
|
||||||
let doc = doc!(text_field=>"a b c");
|
index_writer.add_document(doc!(text_field=>"a b"));
|
||||||
index_writer.add_document(doc);
|
index_writer.commit()?;
|
||||||
}
|
let reader = index.reader()?;
|
||||||
{
|
let searcher = reader.searcher();
|
||||||
let doc = doc!();
|
let segment_reader: &SegmentReader = searcher.segment_reader(0);
|
||||||
index_writer.add_document(doc);
|
let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field)?;
|
||||||
}
|
assert_eq!(fieldnorms_reader.fieldnorm(0), 3);
|
||||||
{
|
assert_eq!(fieldnorms_reader.fieldnorm(1), 0);
|
||||||
let doc = doc!(text_field=>"a b");
|
assert_eq!(fieldnorms_reader.fieldnorm(2), 2);
|
||||||
index_writer.add_document(doc);
|
Ok(())
|
||||||
}
|
|
||||||
index_writer.commit().unwrap();
|
|
||||||
}
|
|
||||||
{
|
|
||||||
let reader = index.reader().unwrap();
|
|
||||||
let searcher = reader.searcher();
|
|
||||||
let segment_reader: &SegmentReader = searcher.segment_reader(0);
|
|
||||||
let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field);
|
|
||||||
assert_eq!(fieldnorms_reader.fieldnorm(0), 3);
|
|
||||||
assert_eq!(fieldnorms_reader.fieldnorm(1), 0);
|
|
||||||
assert_eq!(fieldnorms_reader.fieldnorm(2), 2);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn advance_undeleted(docset: &mut dyn DocSet, reader: &SegmentReader) -> bool {
|
fn advance_undeleted(docset: &mut dyn DocSet, reader: &SegmentReader) -> bool {
|
||||||
@@ -491,7 +464,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_delete_postings1() {
|
fn test_delete_postings1() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let term_abcd = Term::from_field_text(text_field, "abcd");
|
let term_abcd = Term::from_field_text(text_field, "abcd");
|
||||||
@@ -507,7 +480,7 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
{
|
{
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
// 0
|
// 0
|
||||||
index_writer.add_document(doc!(text_field=>"a b"));
|
index_writer.add_document(doc!(text_field=>"a b"));
|
||||||
// 1
|
// 1
|
||||||
@@ -523,10 +496,10 @@ mod tests {
|
|||||||
index_writer.add_document(doc!(text_field=>" b c"));
|
index_writer.add_document(doc!(text_field=>" b c"));
|
||||||
// 5
|
// 5
|
||||||
index_writer.add_document(doc!(text_field=>" a"));
|
index_writer.add_document(doc!(text_field=>" a"));
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit()?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
reader.reload().unwrap();
|
reader.reload()?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let segment_reader = searcher.segment_reader(0);
|
let segment_reader = searcher.segment_reader(0);
|
||||||
let inverted_index = segment_reader.inverted_index(text_field);
|
let inverted_index = segment_reader.inverted_index(text_field);
|
||||||
@@ -554,15 +527,15 @@ mod tests {
|
|||||||
}
|
}
|
||||||
{
|
{
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
// 0
|
// 0
|
||||||
index_writer.add_document(doc!(text_field=>"a b"));
|
index_writer.add_document(doc!(text_field=>"a b"));
|
||||||
// 1
|
// 1
|
||||||
index_writer.delete_term(Term::from_field_text(text_field, "c"));
|
index_writer.delete_term(Term::from_field_text(text_field, "c"));
|
||||||
index_writer.rollback().unwrap();
|
index_writer.rollback()?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
reader.reload().unwrap();
|
reader.reload()?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let seg_reader = searcher.segment_reader(0);
|
let seg_reader = searcher.segment_reader(0);
|
||||||
let inverted_index = seg_reader.inverted_index(term_abcd.field());
|
let inverted_index = seg_reader.inverted_index(term_abcd.field());
|
||||||
@@ -591,15 +564,15 @@ mod tests {
|
|||||||
}
|
}
|
||||||
{
|
{
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
index_writer.add_document(doc!(text_field=>"a b"));
|
index_writer.add_document(doc!(text_field=>"a b"));
|
||||||
index_writer.delete_term(Term::from_field_text(text_field, "c"));
|
index_writer.delete_term(Term::from_field_text(text_field, "c"));
|
||||||
index_writer.rollback().unwrap();
|
index_writer.rollback()?;
|
||||||
index_writer.delete_term(Term::from_field_text(text_field, "a"));
|
index_writer.delete_term(Term::from_field_text(text_field, "a"));
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit()?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
reader.reload().unwrap();
|
reader.reload()?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let segment_reader = searcher.segment_reader(0);
|
let segment_reader = searcher.segment_reader(0);
|
||||||
let inverted_index = segment_reader.inverted_index(term_abcd.field());
|
let inverted_index = segment_reader.inverted_index(term_abcd.field());
|
||||||
@@ -631,19 +604,20 @@ mod tests {
|
|||||||
assert!(!advance_undeleted(&mut postings, segment_reader));
|
assert!(!advance_undeleted(&mut postings, segment_reader));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_indexed_u64() {
|
fn test_indexed_u64() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let field = schema_builder.add_u64_field("value", INDEXED);
|
let field = schema_builder.add_u64_field("value", INDEXED);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
|
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
index_writer.add_document(doc!(field=>1u64));
|
index_writer.add_document(doc!(field=>1u64));
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit()?;
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader()?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let term = Term::from_field_u64(field, 1u64);
|
let term = Term::from_field_u64(field, 1u64);
|
||||||
let mut postings = searcher
|
let mut postings = searcher
|
||||||
@@ -653,20 +627,21 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(postings.doc(), 0);
|
assert_eq!(postings.doc(), 0);
|
||||||
assert_eq!(postings.advance(), TERMINATED);
|
assert_eq!(postings.advance(), TERMINATED);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_indexed_i64() {
|
fn test_indexed_i64() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let value_field = schema_builder.add_i64_field("value", INDEXED);
|
let value_field = schema_builder.add_i64_field("value", INDEXED);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
|
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
let negative_val = -1i64;
|
let negative_val = -1i64;
|
||||||
index_writer.add_document(doc!(value_field => negative_val));
|
index_writer.add_document(doc!(value_field => negative_val));
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit()?;
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader()?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let term = Term::from_field_i64(value_field, negative_val);
|
let term = Term::from_field_i64(value_field, negative_val);
|
||||||
let mut postings = searcher
|
let mut postings = searcher
|
||||||
@@ -676,20 +651,21 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(postings.doc(), 0);
|
assert_eq!(postings.doc(), 0);
|
||||||
assert_eq!(postings.advance(), TERMINATED);
|
assert_eq!(postings.advance(), TERMINATED);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_indexed_f64() {
|
fn test_indexed_f64() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let value_field = schema_builder.add_f64_field("value", INDEXED);
|
let value_field = schema_builder.add_f64_field("value", INDEXED);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
|
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
let val = std::f64::consts::PI;
|
let val = std::f64::consts::PI;
|
||||||
index_writer.add_document(doc!(value_field => val));
|
index_writer.add_document(doc!(value_field => val));
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit()?;
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader()?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let term = Term::from_field_f64(value_field, val);
|
let term = Term::from_field_f64(value_field, val);
|
||||||
let mut postings = searcher
|
let mut postings = searcher
|
||||||
@@ -699,26 +675,29 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(postings.doc(), 0);
|
assert_eq!(postings.doc(), 0);
|
||||||
assert_eq!(postings.advance(), TERMINATED);
|
assert_eq!(postings.advance(), TERMINATED);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_indexedfield_not_in_documents() {
|
fn test_indexedfield_not_in_documents() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let absent_field = schema_builder.add_text_field("text", TEXT);
|
let absent_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
index_writer.add_document(doc!(text_field=>"a"));
|
index_writer.add_document(doc!(text_field=>"a"));
|
||||||
assert!(index_writer.commit().is_ok());
|
assert!(index_writer.commit().is_ok());
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader()?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let segment_reader = searcher.segment_reader(0);
|
let segment_reader = searcher.segment_reader(0);
|
||||||
segment_reader.inverted_index(absent_field); //< should not panic
|
let inverted_index = segment_reader.inverted_index(absent_field); //< should not panic
|
||||||
|
assert_eq!(inverted_index.terms().num_terms(), 0);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_delete_postings2() {
|
fn test_delete_postings2() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
@@ -726,53 +705,40 @@ mod tests {
|
|||||||
let reader = index
|
let reader = index
|
||||||
.reader_builder()
|
.reader_builder()
|
||||||
.reload_policy(ReloadPolicy::Manual)
|
.reload_policy(ReloadPolicy::Manual)
|
||||||
.try_into()
|
.try_into()?;
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
|
index_writer.add_document(doc!(text_field=>"63"));
|
||||||
let add_document = |index_writer: &mut IndexWriter, val: &'static str| {
|
index_writer.add_document(doc!(text_field=>"70"));
|
||||||
let doc = doc!(text_field=>val);
|
index_writer.add_document(doc!(text_field=>"34"));
|
||||||
index_writer.add_document(doc);
|
index_writer.add_document(doc!(text_field=>"1"));
|
||||||
};
|
index_writer.add_document(doc!(text_field=>"38"));
|
||||||
|
index_writer.add_document(doc!(text_field=>"33"));
|
||||||
let remove_document = |index_writer: &mut IndexWriter, val: &'static str| {
|
index_writer.add_document(doc!(text_field=>"40"));
|
||||||
let delterm = Term::from_field_text(text_field, val);
|
index_writer.add_document(doc!(text_field=>"17"));
|
||||||
index_writer.delete_term(delterm);
|
index_writer.delete_term(Term::from_field_text(text_field, "38"));
|
||||||
};
|
index_writer.delete_term(Term::from_field_text(text_field, "34"));
|
||||||
|
index_writer.commit()?;
|
||||||
add_document(&mut index_writer, "63");
|
reader.reload()?;
|
||||||
add_document(&mut index_writer, "70");
|
assert_eq!(reader.searcher().num_docs(), 6);
|
||||||
add_document(&mut index_writer, "34");
|
Ok(())
|
||||||
add_document(&mut index_writer, "1");
|
|
||||||
add_document(&mut index_writer, "38");
|
|
||||||
add_document(&mut index_writer, "33");
|
|
||||||
add_document(&mut index_writer, "40");
|
|
||||||
add_document(&mut index_writer, "17");
|
|
||||||
remove_document(&mut index_writer, "38");
|
|
||||||
remove_document(&mut index_writer, "34");
|
|
||||||
index_writer.commit().unwrap();
|
|
||||||
reader.reload().unwrap();
|
|
||||||
let searcher = reader.searcher();
|
|
||||||
assert_eq!(searcher.num_docs(), 6);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_termfreq() {
|
fn test_termfreq() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
let doc = doc!(text_field=>"af af af bc bc");
|
index_writer.add_document(doc!(text_field=>"af af af bc bc"));
|
||||||
index_writer.add_document(doc);
|
index_writer.commit()?;
|
||||||
index_writer.commit().unwrap();
|
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
let index_reader = index.reader().unwrap();
|
let index_reader = index.reader()?;
|
||||||
let searcher = index_reader.searcher();
|
let searcher = index_reader.searcher();
|
||||||
let reader = searcher.segment_reader(0);
|
let reader = searcher.segment_reader(0);
|
||||||
let inverted_index = reader.inverted_index(text_field);
|
let inverted_index = reader.inverted_index(text_field);
|
||||||
@@ -788,63 +754,63 @@ mod tests {
|
|||||||
assert_eq!(postings.term_freq(), 3);
|
assert_eq!(postings.term_freq(), 3);
|
||||||
assert_eq!(postings.advance(), TERMINATED);
|
assert_eq!(postings.advance(), TERMINATED);
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_searcher_1() {
|
fn test_searcher_1() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader()?;
|
||||||
{
|
// writing the segment
|
||||||
// writing the segment
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
index_writer.add_document(doc!(text_field=>"af af af b"));
|
||||||
index_writer.add_document(doc!(text_field=>"af af af b"));
|
index_writer.add_document(doc!(text_field=>"a b c"));
|
||||||
index_writer.add_document(doc!(text_field=>"a b c"));
|
index_writer.add_document(doc!(text_field=>"a b c d"));
|
||||||
index_writer.add_document(doc!(text_field=>"a b c d"));
|
index_writer.commit()?;
|
||||||
index_writer.commit().unwrap();
|
|
||||||
}
|
reader.reload()?;
|
||||||
{
|
let searcher = reader.searcher();
|
||||||
reader.reload().unwrap();
|
let get_doc_ids = |terms: Vec<Term>| {
|
||||||
let searcher = reader.searcher();
|
let query = BooleanQuery::new_multiterms_query(terms);
|
||||||
let get_doc_ids = |terms: Vec<Term>| {
|
searcher
|
||||||
let query = BooleanQuery::new_multiterms_query(terms);
|
.search(&query, &TEST_COLLECTOR_WITH_SCORE)
|
||||||
let topdocs = searcher.search(&query, &TEST_COLLECTOR_WITH_SCORE).unwrap();
|
.map(|topdocs| topdocs.docs().to_vec())
|
||||||
topdocs.docs().to_vec()
|
};
|
||||||
};
|
assert_eq!(
|
||||||
assert_eq!(
|
get_doc_ids(vec![Term::from_field_text(text_field, "a")])?,
|
||||||
get_doc_ids(vec![Term::from_field_text(text_field, "a")]),
|
vec![DocAddress(0, 1), DocAddress(0, 2)]
|
||||||
vec![DocAddress(0, 1), DocAddress(0, 2)]
|
);
|
||||||
);
|
assert_eq!(
|
||||||
assert_eq!(
|
get_doc_ids(vec![Term::from_field_text(text_field, "af")])?,
|
||||||
get_doc_ids(vec![Term::from_field_text(text_field, "af")]),
|
vec![DocAddress(0, 0)]
|
||||||
vec![DocAddress(0, 0)]
|
);
|
||||||
);
|
assert_eq!(
|
||||||
assert_eq!(
|
get_doc_ids(vec![Term::from_field_text(text_field, "b")])?,
|
||||||
get_doc_ids(vec![Term::from_field_text(text_field, "b")]),
|
vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)]
|
||||||
vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)]
|
);
|
||||||
);
|
assert_eq!(
|
||||||
assert_eq!(
|
get_doc_ids(vec![Term::from_field_text(text_field, "c")])?,
|
||||||
get_doc_ids(vec![Term::from_field_text(text_field, "c")]),
|
vec![DocAddress(0, 1), DocAddress(0, 2)]
|
||||||
vec![DocAddress(0, 1), DocAddress(0, 2)]
|
);
|
||||||
);
|
assert_eq!(
|
||||||
assert_eq!(
|
get_doc_ids(vec![Term::from_field_text(text_field, "d")])?,
|
||||||
get_doc_ids(vec![Term::from_field_text(text_field, "d")]),
|
vec![DocAddress(0, 2)]
|
||||||
vec![DocAddress(0, 2)]
|
);
|
||||||
);
|
assert_eq!(
|
||||||
assert_eq!(
|
get_doc_ids(vec![
|
||||||
get_doc_ids(vec![
|
Term::from_field_text(text_field, "b"),
|
||||||
Term::from_field_text(text_field, "b"),
|
Term::from_field_text(text_field, "a"),
|
||||||
Term::from_field_text(text_field, "a"),
|
])?,
|
||||||
]),
|
vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)]
|
||||||
vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)]
|
);
|
||||||
);
|
Ok(())
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_searcher_2() {
|
fn test_searcher_2() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
@@ -852,19 +818,17 @@ mod tests {
|
|||||||
let reader = index
|
let reader = index
|
||||||
.reader_builder()
|
.reader_builder()
|
||||||
.reload_policy(ReloadPolicy::Manual)
|
.reload_policy(ReloadPolicy::Manual)
|
||||||
.try_into()
|
.try_into()?;
|
||||||
.unwrap();
|
|
||||||
assert_eq!(reader.searcher().num_docs(), 0u64);
|
assert_eq!(reader.searcher().num_docs(), 0u64);
|
||||||
{
|
// writing the segment
|
||||||
// writing the segment
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
index_writer.add_document(doc!(text_field=>"af b"));
|
||||||
index_writer.add_document(doc!(text_field=>"af b"));
|
index_writer.add_document(doc!(text_field=>"a b c"));
|
||||||
index_writer.add_document(doc!(text_field=>"a b c"));
|
index_writer.add_document(doc!(text_field=>"a b c d"));
|
||||||
index_writer.add_document(doc!(text_field=>"a b c d"));
|
index_writer.commit()?;
|
||||||
index_writer.commit().unwrap();
|
reader.reload()?;
|
||||||
}
|
|
||||||
reader.reload().unwrap();
|
|
||||||
assert_eq!(reader.searcher().num_docs(), 3u64);
|
assert_eq!(reader.searcher().num_docs(), 3u64);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -886,7 +850,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_wrong_fast_field_type() {
|
fn test_wrong_fast_field_type() -> crate::Result<()> {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let fast_field_unsigned = schema_builder.add_u64_field("unsigned", FAST);
|
let fast_field_unsigned = schema_builder.add_u64_field("unsigned", FAST);
|
||||||
let fast_field_signed = schema_builder.add_i64_field("signed", FAST);
|
let fast_field_signed = schema_builder.add_i64_field("signed", FAST);
|
||||||
@@ -896,14 +860,14 @@ mod tests {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
|
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
{
|
{
|
||||||
let document =
|
let document =
|
||||||
doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
|
doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
|
||||||
index_writer.add_document(document);
|
index_writer.add_document(document);
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit()?;
|
||||||
}
|
}
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader()?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let segment_reader: &SegmentReader = searcher.segment_reader(0);
|
let segment_reader: &SegmentReader = searcher.segment_reader(0);
|
||||||
{
|
{
|
||||||
@@ -942,11 +906,12 @@ mod tests {
|
|||||||
let fast_field_reader = fast_field_reader_opt.unwrap();
|
let fast_field_reader = fast_field_reader_opt.unwrap();
|
||||||
assert_eq!(fast_field_reader.get(0), 4f64)
|
assert_eq!(fast_field_reader.get(0), 4f64)
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// motivated by #729
|
// motivated by #729
|
||||||
#[test]
|
#[test]
|
||||||
fn test_update_via_delete_insert() {
|
fn test_update_via_delete_insert() -> crate::Result<()> {
|
||||||
use crate::collector::Count;
|
use crate::collector::Count;
|
||||||
use crate::indexer::NoMergePolicy;
|
use crate::indexer::NoMergePolicy;
|
||||||
use crate::query::AllQuery;
|
use crate::query::AllQuery;
|
||||||
@@ -960,17 +925,17 @@ mod tests {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
|
|
||||||
let index = Index::create_in_ram(schema.clone());
|
let index = Index::create_in_ram(schema.clone());
|
||||||
let index_reader = index.reader().unwrap();
|
let index_reader = index.reader()?;
|
||||||
|
|
||||||
let mut index_writer = index.writer(3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||||
|
|
||||||
for doc_id in 0u64..DOC_COUNT {
|
for doc_id in 0u64..DOC_COUNT {
|
||||||
index_writer.add_document(doc!(id => doc_id));
|
index_writer.add_document(doc!(id => doc_id));
|
||||||
}
|
}
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit()?;
|
||||||
|
|
||||||
index_reader.reload().unwrap();
|
index_reader.reload()?;
|
||||||
let searcher = index_reader.searcher();
|
let searcher = index_reader.searcher();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -981,12 +946,11 @@ mod tests {
|
|||||||
// update the 10 elements by deleting and re-adding
|
// update the 10 elements by deleting and re-adding
|
||||||
for doc_id in 0u64..DOC_COUNT {
|
for doc_id in 0u64..DOC_COUNT {
|
||||||
index_writer.delete_term(Term::from_field_u64(id, doc_id));
|
index_writer.delete_term(Term::from_field_u64(id, doc_id));
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit()?;
|
||||||
index_reader.reload().unwrap();
|
index_reader.reload()?;
|
||||||
let doc = doc!(id => doc_id);
|
index_writer.add_document(doc!(id => doc_id));
|
||||||
index_writer.add_document(doc);
|
index_writer.commit()?;
|
||||||
index_writer.commit().unwrap();
|
index_reader.reload()?;
|
||||||
index_reader.reload().unwrap();
|
|
||||||
let searcher = index_reader.searcher();
|
let searcher = index_reader.searcher();
|
||||||
// The number of document should be stable.
|
// The number of document should be stable.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -995,7 +959,7 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
index_reader.reload().unwrap();
|
index_reader.reload()?;
|
||||||
let searcher = index_reader.searcher();
|
let searcher = index_reader.searcher();
|
||||||
let segment_ids: Vec<SegmentId> = searcher
|
let segment_ids: Vec<SegmentId> = searcher
|
||||||
.segment_readers()
|
.segment_readers()
|
||||||
@@ -1004,12 +968,18 @@ mod tests {
|
|||||||
.collect();
|
.collect();
|
||||||
block_on(index_writer.merge(&segment_ids)).unwrap();
|
block_on(index_writer.merge(&segment_ids)).unwrap();
|
||||||
|
|
||||||
index_reader.reload().unwrap();
|
index_reader.reload()?;
|
||||||
let searcher = index_reader.searcher();
|
let searcher = index_reader.searcher();
|
||||||
|
assert_eq!(searcher.search(&AllQuery, &Count)?, DOC_COUNT as usize);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
assert_eq!(
|
#[test]
|
||||||
searcher.search(&AllQuery, &Count).unwrap(),
|
fn test_validate_checksum() -> crate::Result<()> {
|
||||||
DOC_COUNT as usize
|
let index_path = tempfile::tempdir().expect("dir");
|
||||||
);
|
let schema = Schema::builder().build();
|
||||||
|
let index = Index::create_in_dir(&index_path, schema)?;
|
||||||
|
assert!(index.validate_checksum()?.is_empty());
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -455,7 +455,7 @@ mod tests {
|
|||||||
let int_field = schema_builder.add_u64_field("id", INDEXED);
|
let int_field = schema_builder.add_u64_field("id", INDEXED);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
let mut last_doc = 0u32;
|
let mut last_doc = 0u32;
|
||||||
for &doc in docs {
|
for &doc in docs {
|
||||||
for _ in last_doc..doc {
|
for _ in last_doc..doc {
|
||||||
@@ -496,7 +496,7 @@ mod tests {
|
|||||||
let int_field = schema_builder.add_u64_field("id", INDEXED);
|
let int_field = schema_builder.add_u64_field("id", INDEXED);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
// create two postings list, one containg even number,
|
// create two postings list, one containg even number,
|
||||||
// the other containing odd numbers.
|
// the other containing odd numbers.
|
||||||
for i in 0..6 {
|
for i in 0..6 {
|
||||||
|
|||||||
@@ -310,6 +310,7 @@ pub mod tests {
|
|||||||
mod bench {
|
mod bench {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::TERMINATED;
|
||||||
use rand::rngs::StdRng;
|
use rand::rngs::StdRng;
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use rand::SeedableRng;
|
use rand::SeedableRng;
|
||||||
@@ -340,7 +341,7 @@ mod bench {
|
|||||||
let mut encoder = BlockEncoder::new();
|
let mut encoder = BlockEncoder::new();
|
||||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
|
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
|
||||||
let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
|
let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
|
||||||
let mut decoder = BlockDecoder::new();
|
let mut decoder = BlockDecoder::default();
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
|
decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
|
||||||
});
|
});
|
||||||
@@ -375,9 +376,9 @@ mod bench {
|
|||||||
let mut encoder = BlockEncoder::new();
|
let mut encoder = BlockEncoder::new();
|
||||||
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
|
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
|
||||||
let compressed = encoder.compress_vint_sorted(&data, 0u32);
|
let compressed = encoder.compress_vint_sorted(&data, 0u32);
|
||||||
let mut decoder = BlockDecoder::new();
|
let mut decoder = BlockDecoder::default();
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
decoder.uncompress_vint_sorted(compressed, 0u32, NUM_INTS_BENCH_VINT);
|
decoder.uncompress_vint_sorted(compressed, 0u32, NUM_INTS_BENCH_VINT, TERMINATED);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ pub mod tests {
|
|||||||
let title = schema_builder.add_text_field("title", TEXT);
|
let title = schema_builder.add_text_field("title", TEXT);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 30_000_000)?;
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
index_writer.add_document(doc!(title => r#"abc abc abc"#));
|
index_writer.add_document(doc!(title => r#"abc abc abc"#));
|
||||||
index_writer.add_document(doc!(title => r#"abc be be be be abc"#));
|
index_writer.add_document(doc!(title => r#"abc be be be be abc"#));
|
||||||
for _ in 0..1_000 {
|
for _ in 0..1_000 {
|
||||||
@@ -176,7 +176,7 @@ pub mod tests {
|
|||||||
.tokenizers()
|
.tokenizers()
|
||||||
.register("simple_no_truncation", SimpleTokenizer);
|
.register("simple_no_truncation", SimpleTokenizer);
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader().unwrap();
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||||
{
|
{
|
||||||
index_writer.add_document(doc!(text_field=>exceeding_token_text));
|
index_writer.add_document(doc!(text_field=>exceeding_token_text));
|
||||||
@@ -205,7 +205,7 @@ pub mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
pub fn test_position_and_fieldnorm1() {
|
pub fn test_position_and_fieldnorm1() -> crate::Result<()> {
|
||||||
let mut positions = Vec::new();
|
let mut positions = Vec::new();
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
@@ -217,42 +217,38 @@ pub mod tests {
|
|||||||
let mut segment_writer =
|
let mut segment_writer =
|
||||||
SegmentWriter::for_segment(3_000_000, segment.clone(), &schema).unwrap();
|
SegmentWriter::for_segment(3_000_000, segment.clone(), &schema).unwrap();
|
||||||
{
|
{
|
||||||
let mut doc = Document::default();
|
|
||||||
// checking that position works if the field has two values
|
// checking that position works if the field has two values
|
||||||
doc.add_text(text_field, "a b a c a d a a.");
|
|
||||||
doc.add_text(text_field, "d d d d a");
|
|
||||||
let op = AddOperation {
|
let op = AddOperation {
|
||||||
opstamp: 0u64,
|
opstamp: 0u64,
|
||||||
document: doc,
|
document: doc!(
|
||||||
|
text_field => "a b a c a d a a.",
|
||||||
|
text_field => "d d d d a"
|
||||||
|
),
|
||||||
};
|
};
|
||||||
segment_writer.add_document(op, &schema).unwrap();
|
segment_writer.add_document(op, &schema)?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
let mut doc = Document::default();
|
|
||||||
doc.add_text(text_field, "b a");
|
|
||||||
let op = AddOperation {
|
let op = AddOperation {
|
||||||
opstamp: 1u64,
|
opstamp: 1u64,
|
||||||
document: doc,
|
document: doc!(text_field => "b a"),
|
||||||
};
|
};
|
||||||
segment_writer.add_document(op, &schema).unwrap();
|
segment_writer.add_document(op, &schema).unwrap();
|
||||||
}
|
}
|
||||||
for i in 2..1000 {
|
for i in 2..1000 {
|
||||||
let mut doc = Document::default();
|
let mut text: String = iter::repeat("e ").take(i).collect();
|
||||||
let mut text = iter::repeat("e ").take(i).collect::<String>();
|
|
||||||
text.push_str(" a");
|
text.push_str(" a");
|
||||||
doc.add_text(text_field, &text);
|
|
||||||
let op = AddOperation {
|
let op = AddOperation {
|
||||||
opstamp: 2u64,
|
opstamp: 2u64,
|
||||||
document: doc,
|
document: doc!(text_field => text),
|
||||||
};
|
};
|
||||||
segment_writer.add_document(op, &schema).unwrap();
|
segment_writer.add_document(op, &schema).unwrap();
|
||||||
}
|
}
|
||||||
segment_writer.finalize().unwrap();
|
segment_writer.finalize()?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
let segment_reader = SegmentReader::open(&segment).unwrap();
|
let segment_reader = SegmentReader::open(&segment)?;
|
||||||
{
|
{
|
||||||
let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field);
|
let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field)?;
|
||||||
assert_eq!(fieldnorm_reader.fieldnorm(0), 8 + 5);
|
assert_eq!(fieldnorm_reader.fieldnorm(0), 8 + 5);
|
||||||
assert_eq!(fieldnorm_reader.fieldnorm(1), 2);
|
assert_eq!(fieldnorm_reader.fieldnorm(1), 2);
|
||||||
for i in 2..1000 {
|
for i in 2..1000 {
|
||||||
@@ -312,6 +308,7 @@ pub mod tests {
|
|||||||
assert_eq!(postings_e.doc(), TERMINATED);
|
assert_eq!(postings_e.doc(), TERMINATED);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -322,7 +319,7 @@ pub mod tests {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(text_field => "g b b d c g c"));
|
index_writer.add_document(doc!(text_field => "g b b d c g c"));
|
||||||
index_writer.add_document(doc!(text_field => "g a b b a d c g c"));
|
index_writer.add_document(doc!(text_field => "g a b b a d c g c"));
|
||||||
assert!(index_writer.commit().is_ok());
|
assert!(index_writer.commit().is_ok());
|
||||||
@@ -354,7 +351,7 @@ pub mod tests {
|
|||||||
|
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
for i in 0u64..num_docs as u64 {
|
for i in 0u64..num_docs as u64 {
|
||||||
let doc = doc!(value_field => 2u64, value_field => i % 2u64);
|
let doc = doc!(value_field => 2u64, value_field => i % 2u64);
|
||||||
index_writer.add_document(doc);
|
index_writer.add_document(doc);
|
||||||
@@ -425,7 +422,7 @@ pub mod tests {
|
|||||||
|
|
||||||
// delete some of the documents
|
// delete some of the documents
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.delete_term(term_0);
|
index_writer.delete_term(term_0);
|
||||||
assert!(index_writer.commit().is_ok());
|
assert!(index_writer.commit().is_ok());
|
||||||
}
|
}
|
||||||
@@ -479,7 +476,7 @@ pub mod tests {
|
|||||||
|
|
||||||
// delete everything else
|
// delete everything else
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.delete_term(term_1);
|
index_writer.delete_term(term_1);
|
||||||
assert!(index_writer.commit().is_ok());
|
assert!(index_writer.commit().is_ok());
|
||||||
}
|
}
|
||||||
@@ -522,7 +519,7 @@ pub mod tests {
|
|||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let posting_list_size = 1_000_000;
|
let posting_list_size = 1_000_000;
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
for _ in 0..posting_list_size {
|
for _ in 0..posting_list_size {
|
||||||
let mut doc = Document::default();
|
let mut doc = Document::default();
|
||||||
if rng.gen_bool(1f64 / 15f64) {
|
if rng.gen_bool(1f64 / 15f64) {
|
||||||
@@ -730,7 +727,7 @@ mod bench {
|
|||||||
let mut s = 0u32;
|
let mut s = 0u32;
|
||||||
while segment_postings.doc() != TERMINATED {
|
while segment_postings.doc() != TERMINATED {
|
||||||
s += (segment_postings.doc() & n) % 1024;
|
s += (segment_postings.doc() & n) % 1024;
|
||||||
segment_postings.advance()
|
segment_postings.advance();
|
||||||
}
|
}
|
||||||
s
|
s
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -105,6 +105,7 @@ impl MultiFieldPostingsWriter {
|
|||||||
doc: DocId,
|
doc: DocId,
|
||||||
field: Field,
|
field: Field,
|
||||||
token_stream: &mut dyn TokenStream,
|
token_stream: &mut dyn TokenStream,
|
||||||
|
term_buffer: &mut Term,
|
||||||
) -> u32 {
|
) -> u32 {
|
||||||
let postings_writer =
|
let postings_writer =
|
||||||
self.per_field_postings_writers[field.field_id() as usize].deref_mut();
|
self.per_field_postings_writers[field.field_id() as usize].deref_mut();
|
||||||
@@ -114,6 +115,7 @@ impl MultiFieldPostingsWriter {
|
|||||||
field,
|
field,
|
||||||
token_stream,
|
token_stream,
|
||||||
&mut self.heap,
|
&mut self.heap,
|
||||||
|
term_buffer,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -220,21 +222,22 @@ pub trait PostingsWriter {
|
|||||||
field: Field,
|
field: Field,
|
||||||
token_stream: &mut dyn TokenStream,
|
token_stream: &mut dyn TokenStream,
|
||||||
heap: &mut MemoryArena,
|
heap: &mut MemoryArena,
|
||||||
|
term_buffer: &mut Term,
|
||||||
) -> u32 {
|
) -> u32 {
|
||||||
let mut term = Term::for_field(field);
|
term_buffer.set_field(field);
|
||||||
let mut sink = |token: &Token| {
|
let mut sink = |token: &Token| {
|
||||||
// We skip all tokens with a len greater than u16.
|
// We skip all tokens with a len greater than u16.
|
||||||
if token.text.len() <= MAX_TOKEN_LEN {
|
if token.text.len() > MAX_TOKEN_LEN {
|
||||||
term.set_text(token.text.as_str());
|
return;
|
||||||
self.subscribe(term_index, doc_id, token.position as u32, &term, heap);
|
|
||||||
} else {
|
|
||||||
info!(
|
|
||||||
"A token exceeding MAX_TOKEN_LEN ({}>{}) was dropped. Search for \
|
|
||||||
MAX_TOKEN_LEN in the documentation for more information.",
|
|
||||||
token.text.len(),
|
|
||||||
MAX_TOKEN_LEN
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
term_buffer.set_text(token.text.as_str());
|
||||||
|
self.subscribe(
|
||||||
|
term_index,
|
||||||
|
doc_id,
|
||||||
|
token.position as u32,
|
||||||
|
&term_buffer,
|
||||||
|
heap,
|
||||||
|
);
|
||||||
};
|
};
|
||||||
token_stream.process(&mut sink)
|
token_stream.process(&mut sink)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -114,7 +114,7 @@ impl SegmentPostings {
|
|||||||
.iter()
|
.iter()
|
||||||
.map(|&fieldnorm| fieldnorm as u64)
|
.map(|&fieldnorm| fieldnorm as u64)
|
||||||
.sum::<u64>();
|
.sum::<u64>();
|
||||||
total_num_tokens as Score / fieldnorms.len() as f32
|
total_num_tokens as Score / fieldnorms.len() as Score
|
||||||
})
|
})
|
||||||
.unwrap_or(0.0);
|
.unwrap_or(0.0);
|
||||||
let mut postings_serializer = PostingsSerializer::new(
|
let mut postings_serializer = PostingsSerializer::new(
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ mod tests {
|
|||||||
let field = schema_builder.add_text_field("text", TEXT);
|
let field = schema_builder.add_text_field("text", TEXT);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(field=>"aaa"));
|
index_writer.add_document(doc!(field=>"aaa"));
|
||||||
index_writer.add_document(doc!(field=>"bbb"));
|
index_writer.add_document(doc!(field=>"bbb"));
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit().unwrap();
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ use crate::query::{BitSetDocSet, Explanation};
|
|||||||
use crate::query::{Scorer, Weight};
|
use crate::query::{Scorer, Weight};
|
||||||
use crate::schema::{Field, IndexRecordOption};
|
use crate::schema::{Field, IndexRecordOption};
|
||||||
use crate::termdict::{TermDictionary, TermStreamer};
|
use crate::termdict::{TermDictionary, TermStreamer};
|
||||||
use crate::Result;
|
|
||||||
use crate::TantivyError;
|
use crate::TantivyError;
|
||||||
use crate::{DocId, Score};
|
use crate::{DocId, Score};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -40,7 +39,7 @@ impl<A> Weight for AutomatonWeight<A>
|
|||||||
where
|
where
|
||||||
A: Automaton + Send + Sync + 'static,
|
A: Automaton + Send + Sync + 'static,
|
||||||
{
|
{
|
||||||
fn scorer(&self, reader: &SegmentReader, boost: Score) -> Result<Box<dyn Scorer>> {
|
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
|
||||||
let max_doc = reader.max_doc();
|
let max_doc = reader.max_doc();
|
||||||
let mut doc_bitset = BitSet::with_max_value(max_doc);
|
let mut doc_bitset = BitSet::with_max_value(max_doc);
|
||||||
let inverted_index = reader.inverted_index(self.field);
|
let inverted_index = reader.inverted_index(self.field);
|
||||||
@@ -66,7 +65,7 @@ where
|
|||||||
Ok(Box::new(const_scorer))
|
Ok(Box::new(const_scorer))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
|
fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
|
||||||
let mut scorer = self.scorer(reader, 1.0)?;
|
let mut scorer = self.scorer(reader, 1.0)?;
|
||||||
if scorer.seek(doc) == doc {
|
if scorer.seek(doc) == doc {
|
||||||
Ok(Explanation::new("AutomatonScorer", 1.0))
|
Ok(Explanation::new("AutomatonScorer", 1.0))
|
||||||
@@ -91,7 +90,7 @@ mod tests {
|
|||||||
let mut schema = Schema::builder();
|
let mut schema = Schema::builder();
|
||||||
let title = schema.add_text_field("title", STRING);
|
let title = schema.add_text_field("title", STRING);
|
||||||
let index = Index::create_in_ram(schema.build());
|
let index = Index::create_in_ram(schema.build());
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(title=>"abc"));
|
index_writer.add_document(doc!(title=>"abc"));
|
||||||
index_writer.add_document(doc!(title=>"bcd"));
|
index_writer.add_document(doc!(title=>"bcd"));
|
||||||
index_writer.add_document(doc!(title=>"abcd"));
|
index_writer.add_document(doc!(title=>"abcd"));
|
||||||
|
|||||||
@@ -4,19 +4,6 @@ use crate::{DocId, DocSet, Score, TERMINATED};
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::ops::DerefMut;
|
use std::ops::DerefMut;
|
||||||
|
|
||||||
fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
|
|
||||||
if let Some(first) = it.next() {
|
|
||||||
let mut prev = first;
|
|
||||||
for doc in it {
|
|
||||||
if doc < prev {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
prev = doc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
true
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Takes a term_scorers sorted by their current doc() and a threshold and returns
|
/// Takes a term_scorers sorted by their current doc() and a threshold and returns
|
||||||
/// Returns (pivot_len, pivot_ord) defined as follows:
|
/// Returns (pivot_len, pivot_ord) defined as follows:
|
||||||
/// - `pivot_doc` lowest document that has a chance of exceeding (>) the threshold score.
|
/// - `pivot_doc` lowest document that has a chance of exceeding (>) the threshold score.
|
||||||
@@ -55,37 +42,12 @@ fn find_pivot_doc(
|
|||||||
Some((before_pivot_len, pivot_len, pivot_doc))
|
Some((before_pivot_len, pivot_len, pivot_doc))
|
||||||
}
|
}
|
||||||
|
|
||||||
struct TermScorerWithMaxScore<'a> {
|
|
||||||
scorer: &'a mut TermScorer,
|
|
||||||
max_score: Score,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
|
|
||||||
fn from(scorer: &'a mut TermScorer) -> Self {
|
|
||||||
let max_score = scorer.max_score();
|
|
||||||
TermScorerWithMaxScore { scorer, max_score }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Deref for TermScorerWithMaxScore<'a> {
|
|
||||||
type Target = TermScorer;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
|
||||||
self.scorer
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> DerefMut for TermScorerWithMaxScore<'a> {
|
|
||||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
||||||
self.scorer
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Before and after calling this method, scorers need to be sorted by their `.doc()`.
|
// Before and after calling this method, scorers need to be sorted by their `.doc()`.
|
||||||
fn block_max_was_too_low_advance_one_scorer(
|
fn block_max_was_too_low_advance_one_scorer(
|
||||||
scorers: &mut Vec<TermScorerWithMaxScore>,
|
scorers: &mut Vec<TermScorerWithMaxScore>,
|
||||||
pivot_len: usize,
|
pivot_len: usize,
|
||||||
) {
|
) {
|
||||||
|
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
|
||||||
let mut scorer_to_seek = pivot_len - 1;
|
let mut scorer_to_seek = pivot_len - 1;
|
||||||
let mut doc_to_seek_after = scorers[scorer_to_seek].doc();
|
let mut doc_to_seek_after = scorers[scorer_to_seek].doc();
|
||||||
for scorer_ord in (0..pivot_len - 1).rev() {
|
for scorer_ord in (0..pivot_len - 1).rev() {
|
||||||
@@ -102,6 +64,7 @@ fn block_max_was_too_low_advance_one_scorer(
|
|||||||
}
|
}
|
||||||
scorers[scorer_to_seek].seek(doc_to_seek_after + 1);
|
scorers[scorer_to_seek].seek(doc_to_seek_after + 1);
|
||||||
restore_ordering(scorers, scorer_to_seek);
|
restore_ordering(scorers, scorer_to_seek);
|
||||||
|
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Given a list of term_scorers and a `ord` and assuming that `term_scorers[ord]` is sorted
|
// Given a list of term_scorers and a `ord` and assuming that `term_scorers[ord]` is sorted
|
||||||
@@ -177,64 +140,99 @@ pub fn block_wand(
|
|||||||
.map(TermScorerWithMaxScore::from)
|
.map(TermScorerWithMaxScore::from)
|
||||||
.collect();
|
.collect();
|
||||||
scorers.sort_by_key(|scorer| scorer.doc());
|
scorers.sort_by_key(|scorer| scorer.doc());
|
||||||
loop {
|
// At this point we need to ensure that the scorers are sorted!
|
||||||
// At this point we need to ensure that the scorers are sorted!
|
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
|
||||||
|
while let Some((before_pivot_len, pivot_len, pivot_doc)) =
|
||||||
|
find_pivot_doc(&scorers[..], threshold)
|
||||||
|
{
|
||||||
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
|
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
|
||||||
if let Some((before_pivot_len, pivot_len, pivot_doc)) =
|
debug_assert_ne!(pivot_doc, TERMINATED);
|
||||||
find_pivot_doc(&scorers[..], threshold)
|
debug_assert!(before_pivot_len < pivot_len);
|
||||||
{
|
|
||||||
debug_assert_ne!(pivot_doc, TERMINATED);
|
|
||||||
debug_assert!(before_pivot_len < pivot_len);
|
|
||||||
|
|
||||||
let block_max_score_upperbound: Score = scorers[..pivot_len]
|
let block_max_score_upperbound: Score = scorers[..pivot_len]
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
.map(|scorer| {
|
.map(|scorer| {
|
||||||
scorer.shallow_seek(pivot_doc);
|
scorer.shallow_seek(pivot_doc);
|
||||||
scorer.block_max_score()
|
scorer.block_max_score()
|
||||||
})
|
})
|
||||||
.sum();
|
.sum();
|
||||||
|
|
||||||
// Beware after shallow advance, skip readers can be in advance compared to
|
// Beware after shallow advance, skip readers can be in advance compared to
|
||||||
// the segment posting lists.
|
// the segment posting lists.
|
||||||
//
|
//
|
||||||
// `block_segment_postings.load_block()` need to be called separately.
|
// `block_segment_postings.load_block()` need to be called separately.
|
||||||
if block_max_score_upperbound <= threshold {
|
if block_max_score_upperbound <= threshold {
|
||||||
// Block max condition was not reached
|
// Block max condition was not reached
|
||||||
// We could get away by simply advancing the scorers to DocId + 1 but it would
|
// We could get away by simply advancing the scorers to DocId + 1 but it would
|
||||||
// be inefficient. The optimization requires proper explanation and was
|
// be inefficient. The optimization requires proper explanation and was
|
||||||
// isolated in a different function.
|
// isolated in a different function.
|
||||||
block_max_was_too_low_advance_one_scorer(&mut scorers, pivot_len);
|
block_max_was_too_low_advance_one_scorer(&mut scorers, pivot_len);
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
// Block max condition is observed.
|
|
||||||
//
|
|
||||||
// Let's try and advance all scorers before the pivot to the pivot.
|
|
||||||
if !align_scorers(&mut scorers, pivot_doc, before_pivot_len) {
|
|
||||||
// At least of the scorer does not contain the pivot.
|
|
||||||
//
|
|
||||||
// Let's stop scoring this pivot and go through the pivot selection again.
|
|
||||||
// Note that the current pivot is not necessarily a bad candidate and it
|
|
||||||
// may be picked again.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// At this point, all scorers are positioned on the doc.
|
|
||||||
let score = scorers[..pivot_len]
|
|
||||||
.iter_mut()
|
|
||||||
.map(|scorer| scorer.score())
|
|
||||||
.sum();
|
|
||||||
if score > threshold {
|
|
||||||
threshold = callback(pivot_doc, score);
|
|
||||||
}
|
|
||||||
// let's advance all of the scorers that are currently positioned on the pivot.
|
|
||||||
advance_all_scorers_on_pivot(&mut scorers, pivot_len);
|
|
||||||
} else {
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Block max condition is observed.
|
||||||
|
//
|
||||||
|
// Let's try and advance all scorers before the pivot to the pivot.
|
||||||
|
if !align_scorers(&mut scorers, pivot_doc, before_pivot_len) {
|
||||||
|
// At least of the scorer does not contain the pivot.
|
||||||
|
//
|
||||||
|
// Let's stop scoring this pivot and go through the pivot selection again.
|
||||||
|
// Note that the current pivot is not necessarily a bad candidate and it
|
||||||
|
// may be picked again.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// At this point, all scorers are positioned on the doc.
|
||||||
|
let score = scorers[..pivot_len]
|
||||||
|
.iter_mut()
|
||||||
|
.map(|scorer| scorer.score())
|
||||||
|
.sum();
|
||||||
|
if score > threshold {
|
||||||
|
threshold = callback(pivot_doc, score);
|
||||||
|
}
|
||||||
|
// let's advance all of the scorers that are currently positioned on the pivot.
|
||||||
|
advance_all_scorers_on_pivot(&mut scorers, pivot_len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct TermScorerWithMaxScore<'a> {
|
||||||
|
scorer: &'a mut TermScorer,
|
||||||
|
max_score: Score,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
|
||||||
|
fn from(scorer: &'a mut TermScorer) -> Self {
|
||||||
|
let max_score = scorer.max_score();
|
||||||
|
TermScorerWithMaxScore { scorer, max_score }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Deref for TermScorerWithMaxScore<'a> {
|
||||||
|
type Target = TermScorer;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
self.scorer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> DerefMut for TermScorerWithMaxScore<'a> {
|
||||||
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||||
|
self.scorer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
|
||||||
|
if let Some(first) = it.next() {
|
||||||
|
let mut prev = first;
|
||||||
|
for doc in it {
|
||||||
|
if doc < prev {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
prev = doc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::query::score_combiner::SumCombiner;
|
use crate::query::score_combiner::SumCombiner;
|
||||||
@@ -248,17 +246,21 @@ mod tests {
|
|||||||
use std::iter;
|
use std::iter;
|
||||||
|
|
||||||
struct Float(Score);
|
struct Float(Score);
|
||||||
|
|
||||||
impl Eq for Float {}
|
impl Eq for Float {}
|
||||||
|
|
||||||
impl PartialEq for Float {
|
impl PartialEq for Float {
|
||||||
fn eq(&self, other: &Self) -> bool {
|
fn eq(&self, other: &Self) -> bool {
|
||||||
self.cmp(&other) == Ordering::Equal
|
self.cmp(&other) == Ordering::Equal
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PartialOrd for Float {
|
impl PartialOrd for Float {
|
||||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
Some(self.cmp(other))
|
Some(self.cmp(other))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Ord for Float {
|
impl Ord for Float {
|
||||||
fn cmp(&self, other: &Self) -> Ordering {
|
fn cmp(&self, other: &Self) -> Ordering {
|
||||||
other.0.partial_cmp(&self.0).unwrap_or(Ordering::Equal)
|
other.0.partial_cmp(&self.0).unwrap_or(Ordering::Equal)
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ mod tests {
|
|||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
{
|
{
|
||||||
index_writer.add_document(doc!(text_field => "a b c"));
|
index_writer.add_document(doc!(text_field => "a b c"));
|
||||||
index_writer.add_document(doc!(text_field => "a c"));
|
index_writer.add_document(doc!(text_field => "a c"));
|
||||||
@@ -224,7 +224,7 @@ mod tests {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(text_field => "a b c"));
|
index_writer.add_document(doc!(text_field => "a b c"));
|
||||||
index_writer.add_document(doc!(text_field => "a c"));
|
index_writer.add_document(doc!(text_field => "a c"));
|
||||||
index_writer.add_document(doc!(text_field => "b c"));
|
index_writer.add_document(doc!(text_field => "b c"));
|
||||||
|
|||||||
@@ -144,7 +144,7 @@ mod tests {
|
|||||||
fn test_boost_query_explain() {
|
fn test_boost_query_explain() {
|
||||||
let schema = Schema::builder().build();
|
let schema = Schema::builder().build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(Document::new());
|
index_writer.add_document(Document::new());
|
||||||
assert!(index_writer.commit().is_ok());
|
assert!(index_writer.commit().is_ok());
|
||||||
let reader = index.reader().unwrap();
|
let reader = index.reader().unwrap();
|
||||||
|
|||||||
@@ -177,7 +177,7 @@ mod test {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
country_field => "japan",
|
country_field => "japan",
|
||||||
));
|
));
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ pub mod tests {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
for &text in texts {
|
for &text in texts {
|
||||||
let doc = doc!(text_field=>text);
|
let doc = doc!(text_field=>text);
|
||||||
index_writer.add_document(doc);
|
index_writer.add_document(doc);
|
||||||
@@ -135,7 +135,7 @@ pub mod tests {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(text_field=>"a b c"));
|
index_writer.add_document(doc!(text_field=>"a b c"));
|
||||||
assert!(index_writer.commit().is_ok());
|
assert!(index_writer.commit().is_ok());
|
||||||
}
|
}
|
||||||
@@ -186,7 +186,7 @@ pub mod tests {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(text_field=>"b"));
|
index_writer.add_document(doc!(text_field=>"b"));
|
||||||
index_writer.add_document(doc!(text_field=>"a b"));
|
index_writer.add_document(doc!(text_field=>"a b"));
|
||||||
index_writer.add_document(doc!(text_field=>"b a"));
|
index_writer.add_document(doc!(text_field=>"b a"));
|
||||||
@@ -217,7 +217,7 @@ pub mod tests {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(text_field=>"a b c d e f g h"));
|
index_writer.add_document(doc!(text_field=>"a b c d e f g h"));
|
||||||
assert!(index_writer.commit().is_ok());
|
assert!(index_writer.commit().is_ok());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ use crate::query::Weight;
|
|||||||
use crate::query::{EmptyScorer, Explanation};
|
use crate::query::{EmptyScorer, Explanation};
|
||||||
use crate::schema::IndexRecordOption;
|
use crate::schema::IndexRecordOption;
|
||||||
use crate::schema::Term;
|
use crate::schema::Term;
|
||||||
|
use crate::Score;
|
||||||
use crate::{DocId, DocSet};
|
use crate::{DocId, DocSet};
|
||||||
use crate::{Result, Score};
|
|
||||||
|
|
||||||
pub struct PhraseWeight {
|
pub struct PhraseWeight {
|
||||||
phrase_terms: Vec<(usize, Term)>,
|
phrase_terms: Vec<(usize, Term)>,
|
||||||
@@ -32,7 +32,7 @@ impl PhraseWeight {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fieldnorm_reader(&self, reader: &SegmentReader) -> FieldNormReader {
|
fn fieldnorm_reader(&self, reader: &SegmentReader) -> crate::Result<FieldNormReader> {
|
||||||
let field = self.phrase_terms[0].1.field();
|
let field = self.phrase_terms[0].1.field();
|
||||||
reader.get_fieldnorms_reader(field)
|
reader.get_fieldnorms_reader(field)
|
||||||
}
|
}
|
||||||
@@ -41,9 +41,9 @@ impl PhraseWeight {
|
|||||||
&self,
|
&self,
|
||||||
reader: &SegmentReader,
|
reader: &SegmentReader,
|
||||||
boost: Score,
|
boost: Score,
|
||||||
) -> Result<Option<PhraseScorer<SegmentPostings>>> {
|
) -> crate::Result<Option<PhraseScorer<SegmentPostings>>> {
|
||||||
let similarity_weight = self.similarity_weight.boost_by(boost);
|
let similarity_weight = self.similarity_weight.boost_by(boost);
|
||||||
let fieldnorm_reader = self.fieldnorm_reader(reader);
|
let fieldnorm_reader = self.fieldnorm_reader(reader)?;
|
||||||
if reader.has_deletes() {
|
if reader.has_deletes() {
|
||||||
let mut term_postings_list = Vec::new();
|
let mut term_postings_list = Vec::new();
|
||||||
for &(offset, ref term) in &self.phrase_terms {
|
for &(offset, ref term) in &self.phrase_terms {
|
||||||
@@ -85,7 +85,7 @@ impl PhraseWeight {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Weight for PhraseWeight {
|
impl Weight for PhraseWeight {
|
||||||
fn scorer(&self, reader: &SegmentReader, boost: Score) -> Result<Box<dyn Scorer>> {
|
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
|
||||||
if let Some(scorer) = self.phrase_scorer(reader, boost)? {
|
if let Some(scorer) = self.phrase_scorer(reader, boost)? {
|
||||||
Ok(Box::new(scorer))
|
Ok(Box::new(scorer))
|
||||||
} else {
|
} else {
|
||||||
@@ -93,7 +93,7 @@ impl Weight for PhraseWeight {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
|
fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
|
||||||
let scorer_opt = self.phrase_scorer(reader, 1.0)?;
|
let scorer_opt = self.phrase_scorer(reader, 1.0)?;
|
||||||
if scorer_opt.is_none() {
|
if scorer_opt.is_none() {
|
||||||
return Err(does_not_match(doc));
|
return Err(does_not_match(doc));
|
||||||
@@ -102,7 +102,7 @@ impl Weight for PhraseWeight {
|
|||||||
if scorer.seek(doc) != doc {
|
if scorer.seek(doc) != doc {
|
||||||
return Err(does_not_match(doc));
|
return Err(does_not_match(doc));
|
||||||
}
|
}
|
||||||
let fieldnorm_reader = self.fieldnorm_reader(reader);
|
let fieldnorm_reader = self.fieldnorm_reader(reader)?;
|
||||||
let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc);
|
let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc);
|
||||||
let phrase_count = scorer.phrase_count();
|
let phrase_count = scorer.phrase_count();
|
||||||
let mut explanation = Explanation::new("Phrase Scorer", scorer.score());
|
let mut explanation = Explanation::new("Phrase Scorer", scorer.score());
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ use std::fmt;
|
|||||||
///
|
///
|
||||||
/// When implementing a new type of `Query`, it is normal to implement a
|
/// When implementing a new type of `Query`, it is normal to implement a
|
||||||
/// dedicated `Query`, `Weight` and `Scorer`.
|
/// dedicated `Query`, `Weight` and `Scorer`.
|
||||||
pub trait Query: QueryClone + downcast_rs::Downcast + fmt::Debug {
|
pub trait Query: QueryClone + Send + Sync + downcast_rs::Downcast + fmt::Debug {
|
||||||
/// Create the weight associated to a query.
|
/// Create the weight associated to a query.
|
||||||
///
|
///
|
||||||
/// If scoring is not required, setting `scoring_enabled` to `false`
|
/// If scoring is not required, setting `scoring_enabled` to `false`
|
||||||
|
|||||||
@@ -21,51 +21,48 @@ use std::str::FromStr;
|
|||||||
use tantivy_query_grammar::{UserInputAST, UserInputBound, UserInputLeaf};
|
use tantivy_query_grammar::{UserInputAST, UserInputBound, UserInputLeaf};
|
||||||
|
|
||||||
/// Possible error that may happen when parsing a query.
|
/// Possible error that may happen when parsing a query.
|
||||||
#[derive(Debug, PartialEq, Eq, Fail)]
|
#[derive(Debug, PartialEq, Eq, Error)]
|
||||||
pub enum QueryParserError {
|
pub enum QueryParserError {
|
||||||
/// Error in the query syntax
|
/// Error in the query syntax
|
||||||
#[fail(display = "Syntax Error")]
|
#[error("Syntax Error")]
|
||||||
SyntaxError,
|
SyntaxError,
|
||||||
/// `FieldDoesNotExist(field_name: String)`
|
/// `FieldDoesNotExist(field_name: String)`
|
||||||
/// The query references a field that is not in the schema
|
/// The query references a field that is not in the schema
|
||||||
#[fail(display = "File does not exists: '{:?}'", _0)]
|
#[error("File does not exists: '{0:?}'")]
|
||||||
FieldDoesNotExist(String),
|
FieldDoesNotExist(String),
|
||||||
/// The query contains a term for a `u64` or `i64`-field, but the value
|
/// The query contains a term for a `u64` or `i64`-field, but the value
|
||||||
/// is neither.
|
/// is neither.
|
||||||
#[fail(display = "Expected a valid integer: '{:?}'", _0)]
|
#[error("Expected a valid integer: '{0:?}'")]
|
||||||
ExpectedInt(ParseIntError),
|
ExpectedInt(ParseIntError),
|
||||||
/// The query contains a term for a `f64`-field, but the value
|
/// The query contains a term for a `f64`-field, but the value
|
||||||
/// is not a f64.
|
/// is not a f64.
|
||||||
#[fail(display = "Invalid query: Only excluding terms given")]
|
#[error("Invalid query: Only excluding terms given")]
|
||||||
ExpectedFloat(ParseFloatError),
|
ExpectedFloat(ParseFloatError),
|
||||||
/// It is forbidden queries that are only "excluding". (e.g. -title:pop)
|
/// It is forbidden queries that are only "excluding". (e.g. -title:pop)
|
||||||
#[fail(display = "Invalid query: Only excluding terms given")]
|
#[error("Invalid query: Only excluding terms given")]
|
||||||
AllButQueryForbidden,
|
AllButQueryForbidden,
|
||||||
/// If no default field is declared, running a query without any
|
/// If no default field is declared, running a query without any
|
||||||
/// field specified is forbbidden.
|
/// field specified is forbbidden.
|
||||||
#[fail(display = "No default field declared and no field specified in query")]
|
#[error("No default field declared and no field specified in query")]
|
||||||
NoDefaultFieldDeclared,
|
NoDefaultFieldDeclared,
|
||||||
/// The field searched for is not declared
|
/// The field searched for is not declared
|
||||||
/// as indexed in the schema.
|
/// as indexed in the schema.
|
||||||
#[fail(display = "The field '{:?}' is not declared as indexed", _0)]
|
#[error("The field '{0:?}' is not declared as indexed")]
|
||||||
FieldNotIndexed(String),
|
FieldNotIndexed(String),
|
||||||
/// A phrase query was requested for a field that does not
|
/// A phrase query was requested for a field that does not
|
||||||
/// have any positions indexed.
|
/// have any positions indexed.
|
||||||
#[fail(display = "The field '{:?}' does not have positions indexed", _0)]
|
#[error("The field '{0:?}' does not have positions indexed")]
|
||||||
FieldDoesNotHavePositionsIndexed(String),
|
FieldDoesNotHavePositionsIndexed(String),
|
||||||
/// The tokenizer for the given field is unknown
|
/// The tokenizer for the given field is unknown
|
||||||
/// The two argument strings are the name of the field, the name of the tokenizer
|
/// The two argument strings are the name of the field, the name of the tokenizer
|
||||||
#[fail(
|
#[error("The tokenizer '{0:?}' for the field '{1:?}' is unknown")]
|
||||||
display = "The tokenizer '{:?}' for the field '{:?}' is unknown",
|
|
||||||
_0, _1
|
|
||||||
)]
|
|
||||||
UnknownTokenizer(String, String),
|
UnknownTokenizer(String, String),
|
||||||
/// The query contains a range query with a phrase as one of the bounds.
|
/// The query contains a range query with a phrase as one of the bounds.
|
||||||
/// Only terms can be used as bounds.
|
/// Only terms can be used as bounds.
|
||||||
#[fail(display = "A range query cannot have a phrase as one of the bounds")]
|
#[error("A range query cannot have a phrase as one of the bounds")]
|
||||||
RangeMustNotHavePhrase,
|
RangeMustNotHavePhrase,
|
||||||
/// The format for the date field is not RFC 3339 compliant.
|
/// The format for the date field is not RFC 3339 compliant.
|
||||||
#[fail(display = "The date field has an invalid format")]
|
#[error("The date field has an invalid format")]
|
||||||
DateFormatError(chrono::ParseError),
|
DateFormatError(chrono::ParseError),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ use crate::query::{Query, Scorer, Weight};
|
|||||||
use crate::schema::Type;
|
use crate::schema::Type;
|
||||||
use crate::schema::{Field, IndexRecordOption, Term};
|
use crate::schema::{Field, IndexRecordOption, Term};
|
||||||
use crate::termdict::{TermDictionary, TermStreamer};
|
use crate::termdict::{TermDictionary, TermStreamer};
|
||||||
use crate::Result;
|
|
||||||
use crate::{DocId, Score};
|
use crate::{DocId, Score};
|
||||||
use std::collections::Bound;
|
use std::collections::Bound;
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
@@ -48,7 +47,7 @@ fn map_bound<TFrom, TTo, Transform: Fn(&TFrom) -> TTo>(
|
|||||||
/// let schema = schema_builder.build();
|
/// let schema = schema_builder.build();
|
||||||
///
|
///
|
||||||
/// let index = Index::create_in_ram(schema);
|
/// let index = Index::create_in_ram(schema);
|
||||||
/// let mut index_writer = index.writer_with_num_threads(1, 6_000_000)?;
|
/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
|
||||||
/// for year in 1950u64..2017u64 {
|
/// for year in 1950u64..2017u64 {
|
||||||
/// let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
|
/// let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
|
||||||
/// for _ in 0..num_docs_within_year {
|
/// for _ in 0..num_docs_within_year {
|
||||||
@@ -246,7 +245,11 @@ impl RangeQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Query for RangeQuery {
|
impl Query for RangeQuery {
|
||||||
fn weight(&self, searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<dyn Weight>> {
|
fn weight(
|
||||||
|
&self,
|
||||||
|
searcher: &Searcher,
|
||||||
|
_scoring_enabled: bool,
|
||||||
|
) -> crate::Result<Box<dyn Weight>> {
|
||||||
let schema = searcher.schema();
|
let schema = searcher.schema();
|
||||||
let value_type = schema.get_field_entry(self.field).field_type().value_type();
|
let value_type = schema.get_field_entry(self.field).field_type().value_type();
|
||||||
if value_type != self.value_type {
|
if value_type != self.value_type {
|
||||||
@@ -289,7 +292,7 @@ impl RangeWeight {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Weight for RangeWeight {
|
impl Weight for RangeWeight {
|
||||||
fn scorer(&self, reader: &SegmentReader, boost: Score) -> Result<Box<dyn Scorer>> {
|
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
|
||||||
let max_doc = reader.max_doc();
|
let max_doc = reader.max_doc();
|
||||||
let mut doc_bitset = BitSet::with_max_value(max_doc);
|
let mut doc_bitset = BitSet::with_max_value(max_doc);
|
||||||
|
|
||||||
@@ -315,7 +318,7 @@ impl Weight for RangeWeight {
|
|||||||
Ok(Box::new(ConstScorer::new(doc_bitset, boost)))
|
Ok(Box::new(ConstScorer::new(doc_bitset, boost)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
|
fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
|
||||||
let mut scorer = self.scorer(reader, 1.0)?;
|
let mut scorer = self.scorer(reader, 1.0)?;
|
||||||
if scorer.seek(doc) != doc {
|
if scorer.seek(doc) != doc {
|
||||||
return Err(does_not_match(doc));
|
return Err(does_not_match(doc));
|
||||||
@@ -342,7 +345,7 @@ mod tests {
|
|||||||
|
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
for year in 1950u64..2017u64 {
|
for year in 1950u64..2017u64 {
|
||||||
let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
|
let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
|
||||||
for _ in 0..num_docs_within_year {
|
for _ in 0..num_docs_within_year {
|
||||||
@@ -485,7 +488,7 @@ mod tests {
|
|||||||
schema_builder.add_i64_field("year", INDEXED);
|
schema_builder.add_i64_field("year", INDEXED);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema.clone());
|
let index = Index::create_in_ram(schema.clone());
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
let title = schema.get_field("title").unwrap();
|
let title = schema.get_field("title").unwrap();
|
||||||
let year = schema.get_field("year").unwrap();
|
let year = schema.get_field("year").unwrap();
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
|
|||||||
@@ -103,7 +103,7 @@ mod test {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
country_field => "japan",
|
country_field => "japan",
|
||||||
));
|
));
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ mod tests {
|
|||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
let doc = doc!(text_field => "a");
|
let doc = doc!(text_field => "a");
|
||||||
index_writer.add_document(doc);
|
index_writer.add_document(doc);
|
||||||
assert!(index_writer.commit().is_ok());
|
assert!(index_writer.commit().is_ok());
|
||||||
@@ -50,7 +50,7 @@ mod tests {
|
|||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
|
let mut index_writer = index.writer_for_tests()?;
|
||||||
for _ in 0..COMPRESSION_BLOCK_SIZE {
|
for _ in 0..COMPRESSION_BLOCK_SIZE {
|
||||||
let doc = doc!(text_field => "a");
|
let doc = doc!(text_field => "a");
|
||||||
index_writer.add_document(doc);
|
index_writer.add_document(doc);
|
||||||
@@ -86,7 +86,7 @@ mod tests {
|
|||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
left_field => "left1 left2 left2 left2f2 left2f2 left3 abcde abcde abcde abcde abcde abcde abcde abcde abcde abcewde abcde abcde",
|
left_field => "left1 left2 left2 left2f2 left2f2 left3 abcde abcde abcde abcde abcde abcde abcde abcde abcde abcewde abcde abcde",
|
||||||
right_field => "right1 right2",
|
right_field => "right1 right2",
|
||||||
@@ -136,7 +136,7 @@ mod tests {
|
|||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 5_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(text_field=>"a b"));
|
index_writer.add_document(doc!(text_field=>"a b"));
|
||||||
index_writer.add_document(doc!(text_field=>"a c"));
|
index_writer.add_document(doc!(text_field=>"a c"));
|
||||||
index_writer.delete_term(Term::from_field_text(text_field, "b"));
|
index_writer.delete_term(Term::from_field_text(text_field, "b"));
|
||||||
@@ -153,7 +153,7 @@ mod tests {
|
|||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(text_field=>"a"));
|
index_writer.add_document(doc!(text_field=>"a"));
|
||||||
index_writer.add_document(doc!(text_field=>"a"));
|
index_writer.add_document(doc!(text_field=>"a"));
|
||||||
index_writer.commit()?;
|
index_writer.commit()?;
|
||||||
|
|||||||
@@ -4,11 +4,10 @@ use crate::docset::DocSet;
|
|||||||
use crate::postings::SegmentPostings;
|
use crate::postings::SegmentPostings;
|
||||||
use crate::query::bm25::BM25Weight;
|
use crate::query::bm25::BM25Weight;
|
||||||
use crate::query::explanation::does_not_match;
|
use crate::query::explanation::does_not_match;
|
||||||
use crate::query::weight::{for_each_pruning_scorer, for_each_scorer};
|
use crate::query::weight::for_each_scorer;
|
||||||
use crate::query::Weight;
|
use crate::query::Weight;
|
||||||
use crate::query::{Explanation, Scorer};
|
use crate::query::{Explanation, Scorer};
|
||||||
use crate::schema::IndexRecordOption;
|
use crate::schema::IndexRecordOption;
|
||||||
use crate::Result;
|
|
||||||
use crate::Term;
|
use crate::Term;
|
||||||
use crate::{DocId, Score};
|
use crate::{DocId, Score};
|
||||||
|
|
||||||
@@ -19,12 +18,12 @@ pub struct TermWeight {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Weight for TermWeight {
|
impl Weight for TermWeight {
|
||||||
fn scorer(&self, reader: &SegmentReader, boost: Score) -> Result<Box<dyn Scorer>> {
|
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
|
||||||
let term_scorer = self.specialized_scorer(reader, boost)?;
|
let term_scorer = self.specialized_scorer(reader, boost)?;
|
||||||
Ok(Box::new(term_scorer))
|
Ok(Box::new(term_scorer))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
|
fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
|
||||||
let mut scorer = self.specialized_scorer(reader, 1.0)?;
|
let mut scorer = self.specialized_scorer(reader, 1.0)?;
|
||||||
if scorer.seek(doc) != doc {
|
if scorer.seek(doc) != doc {
|
||||||
return Err(does_not_match(doc));
|
return Err(does_not_match(doc));
|
||||||
@@ -32,7 +31,7 @@ impl Weight for TermWeight {
|
|||||||
Ok(scorer.explain())
|
Ok(scorer.explain())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn count(&self, reader: &SegmentReader) -> Result<u32> {
|
fn count(&self, reader: &SegmentReader) -> crate::Result<u32> {
|
||||||
if let Some(delete_bitset) = reader.delete_bitset() {
|
if let Some(delete_bitset) = reader.delete_bitset() {
|
||||||
Ok(self.scorer(reader, 1.0)?.count(delete_bitset))
|
Ok(self.scorer(reader, 1.0)?.count(delete_bitset))
|
||||||
} else {
|
} else {
|
||||||
@@ -73,8 +72,8 @@ impl Weight for TermWeight {
|
|||||||
reader: &SegmentReader,
|
reader: &SegmentReader,
|
||||||
callback: &mut dyn FnMut(DocId, Score) -> Score,
|
callback: &mut dyn FnMut(DocId, Score) -> Score,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
let mut scorer = self.scorer(reader, 1.0)?;
|
let scorer = self.specialized_scorer(reader, 1.0)?;
|
||||||
for_each_pruning_scorer(&mut scorer, threshold, callback);
|
crate::query::boolean_query::block_wand(vec![scorer], threshold, callback);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -96,10 +95,10 @@ impl TermWeight {
|
|||||||
&self,
|
&self,
|
||||||
reader: &SegmentReader,
|
reader: &SegmentReader,
|
||||||
boost: Score,
|
boost: Score,
|
||||||
) -> Result<TermScorer> {
|
) -> crate::Result<TermScorer> {
|
||||||
let field = self.term.field();
|
let field = self.term.field();
|
||||||
let inverted_index = reader.inverted_index(field);
|
let inverted_index = reader.inverted_index(field);
|
||||||
let fieldnorm_reader = reader.get_fieldnorms_reader(field);
|
let fieldnorm_reader = reader.get_fieldnorms_reader(field)?;
|
||||||
let similarity_weight = self.similarity_weight.boost_by(boost);
|
let similarity_weight = self.similarity_weight.boost_by(boost);
|
||||||
let postings_opt: Option<SegmentPostings> =
|
let postings_opt: Option<SegmentPostings> =
|
||||||
inverted_index.read_postings(&self.term, self.index_record_option);
|
inverted_index.read_postings(&self.term, self.index_record_option);
|
||||||
|
|||||||
@@ -398,9 +398,9 @@ mod bench {
|
|||||||
|
|
||||||
use crate::query::score_combiner::DoNothingCombiner;
|
use crate::query::score_combiner::DoNothingCombiner;
|
||||||
use crate::query::{ConstScorer, Union, VecDocSet};
|
use crate::query::{ConstScorer, Union, VecDocSet};
|
||||||
use crate::tests;
|
|
||||||
use crate::DocId;
|
use crate::DocId;
|
||||||
use crate::DocSet;
|
use crate::DocSet;
|
||||||
|
use crate::{tests, TERMINATED};
|
||||||
use test::Bencher;
|
use test::Bencher;
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
@@ -414,10 +414,12 @@ mod bench {
|
|||||||
union_docset
|
union_docset
|
||||||
.iter()
|
.iter()
|
||||||
.map(|doc_ids| VecDocSet::from(doc_ids.clone()))
|
.map(|doc_ids| VecDocSet::from(doc_ids.clone()))
|
||||||
.map(ConstScorer::new)
|
.map(|docset| ConstScorer::new(docset, 1.0))
|
||||||
.collect::<Vec<_>>(),
|
.collect::<Vec<_>>(),
|
||||||
);
|
);
|
||||||
while v.advance() {}
|
while v.doc() != TERMINATED {
|
||||||
|
v.advance();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
#[bench]
|
#[bench]
|
||||||
@@ -432,10 +434,12 @@ mod bench {
|
|||||||
union_docset
|
union_docset
|
||||||
.iter()
|
.iter()
|
||||||
.map(|doc_ids| VecDocSet::from(doc_ids.clone()))
|
.map(|doc_ids| VecDocSet::from(doc_ids.clone()))
|
||||||
.map(ConstScorer::new)
|
.map(|docset| ConstScorer::new(docset, 1.0))
|
||||||
.collect::<Vec<_>>(),
|
.collect::<Vec<_>>(),
|
||||||
);
|
);
|
||||||
while v.advance() {}
|
while v.doc() != TERMINATED {
|
||||||
|
v.advance();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
mod pool;
|
mod pool;
|
||||||
|
|
||||||
|
use slog::error;
|
||||||
|
|
||||||
pub use self::pool::LeasedItem;
|
pub use self::pool::LeasedItem;
|
||||||
use self::pool::Pool;
|
use self::pool::Pool;
|
||||||
use crate::core::Segment;
|
use crate::core::Segment;
|
||||||
@@ -62,6 +64,7 @@ impl IndexReaderBuilder {
|
|||||||
/// to open different segment readers. It may take hundreds of milliseconds
|
/// to open different segment readers. It may take hundreds of milliseconds
|
||||||
/// of time and it may return an error.
|
/// of time and it may return an error.
|
||||||
pub fn try_into(self) -> crate::Result<IndexReader> {
|
pub fn try_into(self) -> crate::Result<IndexReader> {
|
||||||
|
let logger = self.index.logger().clone();
|
||||||
let inner_reader = InnerIndexReader {
|
let inner_reader = InnerIndexReader {
|
||||||
index: self.index,
|
index: self.index,
|
||||||
num_searchers: self.num_searchers,
|
num_searchers: self.num_searchers,
|
||||||
@@ -80,8 +83,8 @@ impl IndexReaderBuilder {
|
|||||||
let callback = move || {
|
let callback = move || {
|
||||||
if let Err(err) = inner_reader_arc_clone.reload() {
|
if let Err(err) = inner_reader_arc_clone.reload() {
|
||||||
error!(
|
error!(
|
||||||
"Error while loading searcher after commit was detected. {:?}",
|
logger,
|
||||||
err
|
"Error while loading searcher after commit was detected. {:?}", err
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -138,9 +141,11 @@ impl InnerIndexReader {
|
|||||||
.collect::<crate::Result<_>>()?
|
.collect::<crate::Result<_>>()?
|
||||||
};
|
};
|
||||||
let schema = self.index.schema();
|
let schema = self.index.schema();
|
||||||
let searchers = (0..self.num_searchers)
|
let searchers = std::iter::repeat_with(|| {
|
||||||
.map(|_| Searcher::new(schema.clone(), self.index.clone(), segment_readers.clone()))
|
Searcher::new(schema.clone(), self.index.clone(), segment_readers.clone())
|
||||||
.collect();
|
})
|
||||||
|
.take(self.num_searchers)
|
||||||
|
.collect();
|
||||||
self.searcher_pool.publish_new_generation(searchers);
|
self.searcher_pool.publish_new_generation(searchers);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -74,9 +74,8 @@ impl Document {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Add a text field.
|
/// Add a text field.
|
||||||
pub fn add_text(&mut self, field: Field, text: &str) {
|
pub fn add_text<S: ToString>(&mut self, field: Field, text: S) {
|
||||||
let value = Value::Str(String::from(text));
|
self.add(FieldValue::new(field, Value::Str(text.to_string())));
|
||||||
self.add(FieldValue::new(field, value));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add a pre-tokenized text field.
|
/// Add a pre-tokenized text field.
|
||||||
@@ -110,8 +109,8 @@ impl Document {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Add a bytes field
|
/// Add a bytes field
|
||||||
pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
|
pub fn add_bytes<T: Into<Vec<u8>>>(&mut self, field: Field, value: T) {
|
||||||
self.add(FieldValue::new(field, Value::Bytes(value)))
|
self.add(FieldValue::new(field, Value::Bytes(value.into())))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add a field value
|
/// Add a field value
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use crate::schema::IntOptions;
|
|
||||||
use crate::schema::TextOptions;
|
use crate::schema::TextOptions;
|
||||||
|
use crate::schema::{is_valid_field_name, IntOptions};
|
||||||
|
|
||||||
use crate::schema::FieldType;
|
use crate::schema::FieldType;
|
||||||
use serde::de::{self, MapAccess, Visitor};
|
use serde::de::{self, MapAccess, Visitor};
|
||||||
@@ -24,6 +24,7 @@ impl FieldEntry {
|
|||||||
/// Creates a new u64 field entry in the schema, given
|
/// Creates a new u64 field entry in the schema, given
|
||||||
/// a name, and some options.
|
/// a name, and some options.
|
||||||
pub fn new_text(field_name: String, text_options: TextOptions) -> FieldEntry {
|
pub fn new_text(field_name: String, text_options: TextOptions) -> FieldEntry {
|
||||||
|
assert!(is_valid_field_name(&field_name));
|
||||||
FieldEntry {
|
FieldEntry {
|
||||||
name: field_name,
|
name: field_name,
|
||||||
field_type: FieldType::Str(text_options),
|
field_type: FieldType::Str(text_options),
|
||||||
@@ -33,6 +34,7 @@ impl FieldEntry {
|
|||||||
/// Creates a new u64 field entry in the schema, given
|
/// Creates a new u64 field entry in the schema, given
|
||||||
/// a name, and some options.
|
/// a name, and some options.
|
||||||
pub fn new_u64(field_name: String, field_type: IntOptions) -> FieldEntry {
|
pub fn new_u64(field_name: String, field_type: IntOptions) -> FieldEntry {
|
||||||
|
assert!(is_valid_field_name(&field_name));
|
||||||
FieldEntry {
|
FieldEntry {
|
||||||
name: field_name,
|
name: field_name,
|
||||||
field_type: FieldType::U64(field_type),
|
field_type: FieldType::U64(field_type),
|
||||||
@@ -42,6 +44,7 @@ impl FieldEntry {
|
|||||||
/// Creates a new i64 field entry in the schema, given
|
/// Creates a new i64 field entry in the schema, given
|
||||||
/// a name, and some options.
|
/// a name, and some options.
|
||||||
pub fn new_i64(field_name: String, field_type: IntOptions) -> FieldEntry {
|
pub fn new_i64(field_name: String, field_type: IntOptions) -> FieldEntry {
|
||||||
|
assert!(is_valid_field_name(&field_name));
|
||||||
FieldEntry {
|
FieldEntry {
|
||||||
name: field_name,
|
name: field_name,
|
||||||
field_type: FieldType::I64(field_type),
|
field_type: FieldType::I64(field_type),
|
||||||
@@ -51,6 +54,7 @@ impl FieldEntry {
|
|||||||
/// Creates a new f64 field entry in the schema, given
|
/// Creates a new f64 field entry in the schema, given
|
||||||
/// a name, and some options.
|
/// a name, and some options.
|
||||||
pub fn new_f64(field_name: String, field_type: IntOptions) -> FieldEntry {
|
pub fn new_f64(field_name: String, field_type: IntOptions) -> FieldEntry {
|
||||||
|
assert!(is_valid_field_name(&field_name));
|
||||||
FieldEntry {
|
FieldEntry {
|
||||||
name: field_name,
|
name: field_name,
|
||||||
field_type: FieldType::F64(field_type),
|
field_type: FieldType::F64(field_type),
|
||||||
@@ -60,6 +64,7 @@ impl FieldEntry {
|
|||||||
/// Creates a new date field entry in the schema, given
|
/// Creates a new date field entry in the schema, given
|
||||||
/// a name, and some options.
|
/// a name, and some options.
|
||||||
pub fn new_date(field_name: String, field_type: IntOptions) -> FieldEntry {
|
pub fn new_date(field_name: String, field_type: IntOptions) -> FieldEntry {
|
||||||
|
assert!(is_valid_field_name(&field_name));
|
||||||
FieldEntry {
|
FieldEntry {
|
||||||
name: field_name,
|
name: field_name,
|
||||||
field_type: FieldType::Date(field_type),
|
field_type: FieldType::Date(field_type),
|
||||||
@@ -68,6 +73,7 @@ impl FieldEntry {
|
|||||||
|
|
||||||
/// Creates a field entry for a facet.
|
/// Creates a field entry for a facet.
|
||||||
pub fn new_facet(field_name: String) -> FieldEntry {
|
pub fn new_facet(field_name: String) -> FieldEntry {
|
||||||
|
assert!(is_valid_field_name(&field_name));
|
||||||
FieldEntry {
|
FieldEntry {
|
||||||
name: field_name,
|
name: field_name,
|
||||||
field_type: FieldType::HierarchicalFacet,
|
field_type: FieldType::HierarchicalFacet,
|
||||||
@@ -76,6 +82,7 @@ impl FieldEntry {
|
|||||||
|
|
||||||
/// Creates a field entry for a bytes field
|
/// Creates a field entry for a bytes field
|
||||||
pub fn new_bytes(field_name: String) -> FieldEntry {
|
pub fn new_bytes(field_name: String) -> FieldEntry {
|
||||||
|
assert!(is_valid_field_name(&field_name));
|
||||||
FieldEntry {
|
FieldEntry {
|
||||||
name: field_name,
|
name: field_name,
|
||||||
field_type: FieldType::Bytes,
|
field_type: FieldType::Bytes,
|
||||||
@@ -268,6 +275,12 @@ mod tests {
|
|||||||
use crate::schema::TEXT;
|
use crate::schema::TEXT;
|
||||||
use serde_json;
|
use serde_json;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[should_panic]
|
||||||
|
fn test_invalid_field_name_should_panic() {
|
||||||
|
FieldEntry::new_text("-hello".to_string(), TEXT);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_json_serialization() {
|
fn test_json_serialization() {
|
||||||
let field_value = FieldEntry::new_text(String::from("title"), TEXT);
|
let field_value = FieldEntry::new_text(String::from("title"), TEXT);
|
||||||
|
|||||||
@@ -149,14 +149,16 @@ pub use self::int_options::IntOptions;
|
|||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
|
||||||
|
/// Regular expression representing the restriction on a valid field names.
|
||||||
|
pub const FIELD_NAME_PATTERN: &str = r#"^[_a-zA-Z][_\-a-zA-Z0-9]*$"#;
|
||||||
|
|
||||||
/// Validator for a potential `field_name`.
|
/// Validator for a potential `field_name`.
|
||||||
/// Returns true iff the name can be use for a field name.
|
/// Returns true iff the name can be use for a field name.
|
||||||
///
|
///
|
||||||
/// A field name must start by a letter `[a-zA-Z]`.
|
/// A field name must start by a letter `[a-zA-Z]`.
|
||||||
/// The other characters can be any alphanumic character `[a-ZA-Z0-9]` or `_`.
|
/// The other characters can be any alphanumic character `[a-ZA-Z0-9]` or `_`.
|
||||||
pub fn is_valid_field_name(field_name: &str) -> bool {
|
pub fn is_valid_field_name(field_name: &str) -> bool {
|
||||||
static FIELD_NAME_PTN: Lazy<Regex> =
|
static FIELD_NAME_PTN: Lazy<Regex> = Lazy::new(|| Regex::new(FIELD_NAME_PATTERN).unwrap());
|
||||||
Lazy::new(|| Regex::new("^[a-zA-Z][_a-zA-Z0-9]*$").unwrap());
|
|
||||||
FIELD_NAME_PTN.is_match(field_name)
|
FIELD_NAME_PTN.is_match(field_name)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -170,6 +172,11 @@ mod tests {
|
|||||||
assert!(is_valid_field_name("text"));
|
assert!(is_valid_field_name("text"));
|
||||||
assert!(is_valid_field_name("text0"));
|
assert!(is_valid_field_name("text0"));
|
||||||
assert!(!is_valid_field_name("0text"));
|
assert!(!is_valid_field_name("0text"));
|
||||||
|
assert!(is_valid_field_name("field-name"));
|
||||||
|
assert!(is_valid_field_name("field_name"));
|
||||||
|
assert!(!is_valid_field_name("field!name"));
|
||||||
|
assert!(!is_valid_field_name("-fieldname"));
|
||||||
|
assert!(is_valid_field_name("_fieldname"));
|
||||||
assert!(!is_valid_field_name(""));
|
assert!(!is_valid_field_name(""));
|
||||||
assert!(!is_valid_field_name("シャボン玉"));
|
assert!(!is_valid_field_name("シャボン玉"));
|
||||||
assert!(is_valid_field_name("my_text_field"));
|
assert!(is_valid_field_name("my_text_field"));
|
||||||
|
|||||||
@@ -381,19 +381,16 @@ impl<'de> Deserialize<'de> for Schema {
|
|||||||
|
|
||||||
/// Error that may happen when deserializing
|
/// Error that may happen when deserializing
|
||||||
/// a document from JSON.
|
/// a document from JSON.
|
||||||
#[derive(Debug, Fail, PartialEq)]
|
#[derive(Debug, Error, PartialEq)]
|
||||||
pub enum DocParsingError {
|
pub enum DocParsingError {
|
||||||
/// The payload given is not valid JSON.
|
/// The payload given is not valid JSON.
|
||||||
#[fail(display = "The provided string is not valid JSON")]
|
#[error("The provided string is not valid JSON")]
|
||||||
NotJSON(String),
|
NotJSON(String),
|
||||||
/// One of the value node could not be parsed.
|
/// One of the value node could not be parsed.
|
||||||
#[fail(display = "The field '{:?}' could not be parsed: {:?}", _0, _1)]
|
#[error("The field '{0:?}' could not be parsed: {1:?}")]
|
||||||
ValueError(String, ValueParsingError),
|
ValueError(String, ValueParsingError),
|
||||||
/// The json-document contains a field that is not declared in the schema.
|
/// The json-document contains a field that is not declared in the schema.
|
||||||
#[fail(
|
#[error("The document contains a field that is not declared in the schema: {0:?}")]
|
||||||
display = "The document contains a field that is not declared in the schema: {:?}",
|
|
||||||
_0
|
|
||||||
)]
|
|
||||||
NoSuchFieldInSchema(String),
|
NoSuchFieldInSchema(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ use super::Field;
|
|||||||
use crate::common;
|
use crate::common;
|
||||||
use crate::schema::Facet;
|
use crate::schema::Facet;
|
||||||
use crate::DateTime;
|
use crate::DateTime;
|
||||||
use byteorder::{BigEndian, ByteOrder};
|
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
/// Size (in bytes) of the buffer of a int field.
|
/// Size (in bytes) of the buffer of a int field.
|
||||||
@@ -19,6 +18,10 @@ where
|
|||||||
B: AsRef<[u8]>;
|
B: AsRef<[u8]>;
|
||||||
|
|
||||||
impl Term {
|
impl Term {
|
||||||
|
pub(crate) fn new() -> Term {
|
||||||
|
Term(Vec::with_capacity(100))
|
||||||
|
}
|
||||||
|
|
||||||
/// Builds a term given a field, and a i64-value
|
/// Builds a term given a field, and a i64-value
|
||||||
///
|
///
|
||||||
/// Assuming the term has a field id of 1, and a i64 value of 3234,
|
/// Assuming the term has a field id of 1, and a i64 value of 3234,
|
||||||
@@ -93,6 +96,12 @@ impl Term {
|
|||||||
term
|
term
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn from_field_bytes(field: Field, bytes: &[u8]) -> Term {
|
||||||
|
let mut term = Term::for_field(field);
|
||||||
|
term.set_bytes(bytes);
|
||||||
|
term
|
||||||
|
}
|
||||||
|
|
||||||
/// Creates a new Term for a given field.
|
/// Creates a new Term for a given field.
|
||||||
pub(crate) fn for_field(field: Field) -> Term {
|
pub(crate) fn for_field(field: Field) -> Term {
|
||||||
let mut term = Term(Vec::with_capacity(100));
|
let mut term = Term(Vec::with_capacity(100));
|
||||||
@@ -100,12 +109,10 @@ impl Term {
|
|||||||
term
|
term
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the field.
|
pub(crate) fn set_field(&mut self, field: Field) {
|
||||||
pub fn set_field(&mut self, field: Field) {
|
self.0.clear();
|
||||||
if self.0.len() < 4 {
|
self.0
|
||||||
self.0.resize(4, 0u8);
|
.extend_from_slice(&field.field_id().to_be_bytes()[..]);
|
||||||
}
|
|
||||||
BigEndian::write_u32(&mut self.0[0..4], field.field_id());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sets a u64 value in the term.
|
/// Sets a u64 value in the term.
|
||||||
@@ -116,7 +123,7 @@ impl Term {
|
|||||||
/// the natural order of the values.
|
/// the natural order of the values.
|
||||||
pub fn set_u64(&mut self, val: u64) {
|
pub fn set_u64(&mut self, val: u64) {
|
||||||
self.0.resize(INT_TERM_LEN, 0u8);
|
self.0.resize(INT_TERM_LEN, 0u8);
|
||||||
BigEndian::write_u64(&mut self.0[4..], val);
|
self.0[4..12].copy_from_slice(val.to_be_bytes().as_ref());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sets a `i64` value in the term.
|
/// Sets a `i64` value in the term.
|
||||||
@@ -134,12 +141,6 @@ impl Term {
|
|||||||
self.0.extend(bytes);
|
self.0.extend(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn from_field_bytes(field: Field, bytes: &[u8]) -> Term {
|
|
||||||
let mut term = Term::for_field(field);
|
|
||||||
term.set_bytes(bytes);
|
|
||||||
term
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the texts only, keeping the field untouched.
|
/// Set the texts only, keeping the field untouched.
|
||||||
pub fn set_text(&mut self, text: &str) {
|
pub fn set_text(&mut self, text: &str) {
|
||||||
self.set_bytes(text.as_bytes());
|
self.set_bytes(text.as_bytes());
|
||||||
@@ -157,7 +158,9 @@ where
|
|||||||
|
|
||||||
/// Returns the field.
|
/// Returns the field.
|
||||||
pub fn field(&self) -> Field {
|
pub fn field(&self) -> Field {
|
||||||
Field::from_field_id(BigEndian::read_u32(&self.0.as_ref()[..4]))
|
let mut field_id_bytes = [0u8; 4];
|
||||||
|
field_id_bytes.copy_from_slice(&self.0.as_ref()[..4]);
|
||||||
|
Field::from_field_id(u32::from_be_bytes(field_id_bytes))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the `u64` value stored in a term.
|
/// Returns the `u64` value stored in a term.
|
||||||
@@ -166,7 +169,9 @@ where
|
|||||||
/// ... or returns an invalid value
|
/// ... or returns an invalid value
|
||||||
/// if the term is not a `u64` field.
|
/// if the term is not a `u64` field.
|
||||||
pub fn get_u64(&self) -> u64 {
|
pub fn get_u64(&self) -> u64 {
|
||||||
BigEndian::read_u64(&self.0.as_ref()[4..])
|
let mut field_id_bytes = [0u8; 8];
|
||||||
|
field_id_bytes.copy_from_slice(self.value_bytes());
|
||||||
|
u64::from_be_bytes(field_id_bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the `i64` value stored in a term.
|
/// Returns the `i64` value stored in a term.
|
||||||
@@ -175,7 +180,7 @@ where
|
|||||||
/// ... or returns an invalid value
|
/// ... or returns an invalid value
|
||||||
/// if the term is not a `i64` field.
|
/// if the term is not a `i64` field.
|
||||||
pub fn get_i64(&self) -> i64 {
|
pub fn get_i64(&self) -> i64 {
|
||||||
common::u64_to_i64(BigEndian::read_u64(&self.0.as_ref()[4..]))
|
common::u64_to_i64(self.get_u64())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the `f64` value stored in a term.
|
/// Returns the `f64` value stored in a term.
|
||||||
@@ -184,7 +189,7 @@ where
|
|||||||
/// ... or returns an invalid value
|
/// ... or returns an invalid value
|
||||||
/// if the term is not a `f64` field.
|
/// if the term is not a `f64` field.
|
||||||
pub fn get_f64(&self) -> f64 {
|
pub fn get_f64(&self) -> f64 {
|
||||||
common::u64_to_f64(BigEndian::read_u64(&self.0.as_ref()[4..]))
|
common::u64_to_f64(self.get_u64())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the text associated with the term.
|
/// Returns the text associated with the term.
|
||||||
|
|||||||
@@ -221,6 +221,12 @@ impl<'a> From<&'a str> for Value {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> From<&'a [u8]> for Value {
|
||||||
|
fn from(bytes: &'a [u8]) -> Value {
|
||||||
|
Value::Bytes(bytes.to_vec())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> From<Facet> for Value {
|
impl<'a> From<Facet> for Value {
|
||||||
fn from(facet: Facet) -> Value {
|
fn from(facet: Facet) -> Value {
|
||||||
Value::Facet(facet)
|
Value::Facet(facet)
|
||||||
|
|||||||
@@ -221,7 +221,7 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str)
|
|||||||
/// # let text_field = schema_builder.add_text_field("text", TEXT);
|
/// # let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
/// # let schema = schema_builder.build();
|
/// # let schema = schema_builder.build();
|
||||||
/// # let index = Index::create_in_ram(schema);
|
/// # let index = Index::create_in_ram(schema);
|
||||||
/// # let mut index_writer = index.writer_with_num_threads(1, 30_000_000)?;
|
/// # let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
|
||||||
/// # let doc = doc!(text_field => r#"Comme je descendais des Fleuves impassibles,
|
/// # let doc = doc!(text_field => r#"Comme je descendais des Fleuves impassibles,
|
||||||
/// # Je ne me sentis plus guidé par les haleurs :
|
/// # Je ne me sentis plus guidé par les haleurs :
|
||||||
/// # Des Peaux-Rouges criards les avaient pris pour cibles,
|
/// # Des Peaux-Rouges criards les avaient pris pour cibles,
|
||||||
@@ -506,7 +506,7 @@ Survey in 2016, 2017, and 2018."#;
|
|||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(text_field => "a"));
|
index_writer.add_document(doc!(text_field => "a"));
|
||||||
index_writer.add_document(doc!(text_field => "a"));
|
index_writer.add_document(doc!(text_field => "a"));
|
||||||
index_writer.add_document(doc!(text_field => "a b"));
|
index_writer.add_document(doc!(text_field => "a b"));
|
||||||
@@ -562,7 +562,7 @@ Survey in 2016, 2017, and 2018."#;
|
|||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
{
|
{
|
||||||
// writing the segment
|
// writing the segment
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
{
|
{
|
||||||
let doc = doc ! (text_field => TEST_TEXT);
|
let doc = doc ! (text_field => TEST_TEXT);
|
||||||
index_writer.add_document(doc);
|
index_writer.add_document(doc);
|
||||||
|
|||||||
@@ -336,7 +336,7 @@ mod test {
|
|||||||
let index = Index::create_in_ram(schema.clone());
|
let index = Index::create_in_ram(schema.clone());
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(name => 1u64));
|
index_writer.add_document(doc!(name => 1u64));
|
||||||
index_writer.add_document(doc!(name => 2u64));
|
index_writer.add_document(doc!(name => 2u64));
|
||||||
index_writer.add_document(doc!(name => 10u64));
|
index_writer.add_document(doc!(name => 10u64));
|
||||||
@@ -374,7 +374,7 @@ mod test {
|
|||||||
let index = Index::create_in_ram(schema.clone());
|
let index = Index::create_in_ram(schema.clone());
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(name => "hi"));
|
index_writer.add_document(doc!(name => "hi"));
|
||||||
index_writer.add_document(doc!(name => "this is a test"));
|
index_writer.add_document(doc!(name => "this is a test"));
|
||||||
index_writer.add_document(
|
index_writer.add_document(
|
||||||
@@ -414,7 +414,7 @@ mod test {
|
|||||||
let index = Index::create_in_ram(schema.clone());
|
let index = Index::create_in_ram(schema.clone());
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(name => "hi"));
|
index_writer.add_document(doc!(name => "hi"));
|
||||||
index_writer.add_document(doc!(name => "this is a test"));
|
index_writer.add_document(doc!(name => "this is a test"));
|
||||||
index_writer.add_document(
|
index_writer.add_document(
|
||||||
@@ -453,7 +453,7 @@ mod test {
|
|||||||
let index = Index::create_in_ram(schema.clone());
|
let index = Index::create_in_ram(schema.clone());
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
index_writer.add_document(doc!(name => 1u64));
|
index_writer.add_document(doc!(name => 1u64));
|
||||||
index_writer.add_document(doc!(name => 2u64));
|
index_writer.add_document(doc!(name => 2u64));
|
||||||
index_writer.add_document(doc!(name => 3u64));
|
index_writer.add_document(doc!(name => 3u64));
|
||||||
|
|||||||
@@ -68,19 +68,17 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn insert(&mut self, key: u64, dest: &T) -> io::Result<()> {
|
pub fn insert(&mut self, key: u64, dest: &T) -> io::Result<()> {
|
||||||
let mut layer_id = 0;
|
|
||||||
let mut skip_pointer = self.data_layer.insert(key, dest)?;
|
let mut skip_pointer = self.data_layer.insert(key, dest)?;
|
||||||
loop {
|
for layer_id in 0.. {
|
||||||
skip_pointer = match skip_pointer {
|
if let Some((skip_doc_id, skip_offset)) = skip_pointer {
|
||||||
Some((skip_doc_id, skip_offset)) => self
|
skip_pointer = self
|
||||||
.get_skip_layer(layer_id)
|
.get_skip_layer(layer_id)
|
||||||
.insert(skip_doc_id, &skip_offset)?,
|
.insert(skip_doc_id, &skip_offset)?;
|
||||||
None => {
|
} else {
|
||||||
return Ok(());
|
break;
|
||||||
}
|
}
|
||||||
};
|
|
||||||
layer_id += 1;
|
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write<W: Write>(self, output: &mut W) -> io::Result<()> {
|
pub fn write<W: Write>(self, output: &mut W) -> io::Result<()> {
|
||||||
|
|||||||
@@ -138,7 +138,7 @@ mod tests {
|
|||||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
{
|
{
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
let mut doc = Document::default();
|
let mut doc = Document::default();
|
||||||
|
|||||||
Reference in New Issue
Block a user