Compare commits

..

2 Commits

Author SHA1 Message Date
Paul Masurel
31543bdd90 fixing unit test 2019-01-29 11:41:48 +01:00
Paul Masurel
ea1402bd82 Downcast_ref 2019-01-29 09:54:49 +01:00
98 changed files with 1066 additions and 2528 deletions

View File

@@ -29,7 +29,7 @@ addons:
matrix: matrix:
include: include:
# Android # Android
- env: TARGET=aarch64-linux-android - env: TARGET=aarch64-linux-android DISABLE_TESTS=1
#- env: TARGET=arm-linux-androideabi DISABLE_TESTS=1 #- env: TARGET=arm-linux-androideabi DISABLE_TESTS=1
#- env: TARGET=armv7-linux-androideabi DISABLE_TESTS=1 #- env: TARGET=armv7-linux-androideabi DISABLE_TESTS=1
#- env: TARGET=i686-linux-android DISABLE_TESTS=1 #- env: TARGET=i686-linux-android DISABLE_TESTS=1

View File

@@ -1,33 +1,9 @@
Tantivy 0.9.1
=====================
Hotfix: The english stemmer was actually used for all languages.
Tantivy 0.9.0 Tantivy 0.9.0
===================== =====================
*0.9.0 index format is not compatible with the
previous index format.*
- MAJOR BUGFIX :
Some `Mmap` objects were being leaked, and would never get released. (@fulmicoton)
- Removed most unsafe (@fulmicoton) - Removed most unsafe (@fulmicoton)
- Indexer memory footprint improved. (VInt comp, inlining the first block. (@fulmicoton) - Indexer memory footprint improved. (VInt comp, inlining the first block. (@fulmicoton)
- Stemming in other language possible (@pentlander) - Stemming in other language possible (@pentlander)
- Segments with no docs are deleted earlier (@barrotsteindev) - Segments with no docs are deleted earlier (@barrotsteindev)
- Added grouped add and delete operations.
They are guaranteed to happen together (i.e. they cannot be split by a commit).
In addition, adds are guaranteed to happen on the same segment. (@elbow-jason)
- Removed `INT_STORED` and `INT_INDEXED`. It is now possible to use `STORED` and `INDEXED`
for int fields. (@fulmicoton)
- Added DateTime field (@barrotsteindev)
- Added IndexReader. By default, index is reloaded automatically upon new commits (@fulmicoton)
Tantivy 0.8.2
=====================
Fixing build for x86_64 platforms. (#496)
No need to update from 0.8.1 if tantivy
is building on your platform.
Tantivy 0.8.1 Tantivy 0.8.1
===================== =====================

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "tantivy" name = "tantivy"
version = "0.9.1" version = "0.9.0-dev"
authors = ["Paul Masurel <paul.masurel@gmail.com>"] authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT" license = "MIT"
categories = ["database-implementations", "data-structures"] categories = ["database-implementations", "data-structures"]
@@ -16,8 +16,8 @@ base64 = "0.10.0"
byteorder = "1.0" byteorder = "1.0"
lazy_static = "1" lazy_static = "1"
regex = "1.0" regex = "1.0"
tantivy-fst = "0.1" fst = {version="0.3", default-features=false}
memmap = {version = "0.7", optional=true} fst-regex = { version="0.2" }
lz4 = {version="1.20", optional=true} lz4 = {version="1.20", optional=true}
snap = {version="0.2"} snap = {version="0.2"}
atomicwrites = {version="0.2.2", optional=true} atomicwrites = {version="0.2.2", optional=true}
@@ -32,17 +32,17 @@ num_cpus = "1.2"
fs2={version="0.4", optional=true} fs2={version="0.4", optional=true}
itertools = "0.8" itertools = "0.8"
levenshtein_automata = {version="0.1", features=["fst_automaton"]} levenshtein_automata = {version="0.1", features=["fst_automaton"]}
notify = {version="4", optional=true}
bit-set = "0.5" bit-set = "0.5"
uuid = { version = "0.7.2", features = ["v4", "serde"] } uuid = { version = "0.7", features = ["v4", "serde"] }
crossbeam = "0.5" crossbeam = "0.5"
futures = "0.1" futures = "0.1"
futures-cpupool = "0.1" futures-cpupool = "0.1"
owning_ref = "0.4" owning_ref = "0.4"
stable_deref_trait = "1.0.0" stable_deref_trait = "1.0.0"
rust-stemmers = "1.1" rust-stemmers = "1"
downcast-rs = { version="1.0" } downcast-rs = { version="1.0" }
bitpacking = "0.6" matches = "0.1"
bitpacking = "0.5"
census = "0.2" census = "0.2"
fnv = "1.0.6" fnv = "1.0.6"
owned-read = "0.4" owned-read = "0.4"
@@ -51,7 +51,6 @@ htmlescape = "0.3.1"
fail = "0.2" fail = "0.2"
scoped-pool = "1.0" scoped-pool = "1.0"
murmurhash32 = "0.2" murmurhash32 = "0.2"
chrono = "0.4"
[target.'cfg(windows)'.dependencies] [target.'cfg(windows)'.dependencies]
winapi = "0.2" winapi = "0.2"
@@ -59,8 +58,6 @@ winapi = "0.2"
[dev-dependencies] [dev-dependencies]
rand = "0.6" rand = "0.6"
maplit = "1" maplit = "1"
matches = "0.1.8"
time = "0.1.42"
[profile.release] [profile.release]
opt-level = 3 opt-level = 3
@@ -74,11 +71,12 @@ overflow-checks = true
[features] [features]
# by default no-fail is disabled. We manually enable it when running test. # by default no-fail is disabled. We manually enable it when running test.
default = ["mmap", "no_fail"] default = ["mmap", "no_fail"]
mmap = ["atomicwrites", "fs2", "memmap", "notify"] mmap = ["fst/mmap", "atomicwrites", "fs2"]
lz4-compression = ["lz4"] lz4-compression = ["lz4"]
no_fail = ["fail/no_fail"] no_fail = ["fail/no_fail"]
unstable = [] # useful for benches. unstable = [] # useful for benches.
wasm-bindgen = ["uuid/wasm-bindgen"]
[badges] [badges]
travis-ci = { repository = "tantivy-search/tantivy" } travis-ci = { repository = "tantivy-search/tantivy" }

View File

@@ -30,7 +30,6 @@ Tantivy is, in fact, strongly inspired by Lucene's design.
# Features # Features
- Full-text search - Full-text search
- Configurable tokenizer. (stemming available for 17 latin languages. Third party support for Chinese ([tantivy-jieba](https://crates.io/crates/tantivy-jieba) and [cang-jie](https://crates.io/crates/cang-jie)) and [Japanese](https://crates.io/crates/tantivy-tokenizer-tiny-segmenter)
- Fast (check out the :racehorse: :sparkles: [benchmark](https://tantivy-search.github.io/bench/) :sparkles: :racehorse:) - Fast (check out the :racehorse: :sparkles: [benchmark](https://tantivy-search.github.io/bench/) :sparkles: :racehorse:)
- Tiny startup time (<10ms), perfect for command line tools - Tiny startup time (<10ms), perfect for command line tools
- BM25 scoring (the same as lucene) - BM25 scoring (the same as lucene)
@@ -42,7 +41,6 @@ Tantivy is, in fact, strongly inspired by Lucene's design.
- SIMD integer compression when the platform/CPU includes the SSE2 instruction set. - SIMD integer compression when the platform/CPU includes the SSE2 instruction set.
- Single valued and multivalued u64 and i64 fast fields (equivalent of doc values in Lucene) - Single valued and multivalued u64 and i64 fast fields (equivalent of doc values in Lucene)
- `&[u8]` fast fields - `&[u8]` fast fields
- Text, i64, u64, dates and hierarchical facet fields
- LZ4 compressed document store - LZ4 compressed document store
- Range queries - Range queries
- Faceted search - Faceted search

View File

@@ -20,7 +20,6 @@ use tantivy::collector::TopDocs;
use tantivy::query::QueryParser; use tantivy::query::QueryParser;
use tantivy::schema::*; use tantivy::schema::*;
use tantivy::Index; use tantivy::Index;
use tantivy::ReloadPolicy;
use tempdir::TempDir; use tempdir::TempDir;
fn main() -> tantivy::Result<()> { fn main() -> tantivy::Result<()> {
@@ -171,33 +170,24 @@ fn main() -> tantivy::Result<()> {
// //
// ### Searcher // ### Searcher
// //
// A reader is required to get search the index. // Let's search our index. Start by reloading
// It acts as a `Searcher` pool that reloads itself, // searchers in the index. This should be done
// depending on a `ReloadPolicy`. // after every `commit()`.
// index.load_searchers()?;
// For a search server you will typically create one reader for the entire lifetime of your
// program, and acquire a new searcher for every single request.
//
// In the code below, we rely on the 'ON_COMMIT' policy: the reader
// will reload the index automatically after each commit.
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::OnCommit)
.try_into()?;
// We now need to acquire a searcher. // We now need to acquire a searcher.
//
// A searcher points to snapshotted, immutable version of the index.
//
// Some search experience might require more than // Some search experience might require more than
// one query. Using the same searcher ensures that all of these queries will run on the // one query.
// same version of the index. //
// The searcher ensure that we get to work
// with a consistent version of the index.
// //
// Acquiring a `searcher` is very cheap. // Acquiring a `searcher` is very cheap.
// //
// You should acquire a searcher every time you start processing a request and // You should acquire a searcher every time you
// start processing a request and
// and release it right after your query is finished. // and release it right after your query is finished.
let searcher = reader.searcher(); let searcher = index.searcher();
// ### Query // ### Query
@@ -234,6 +224,7 @@ fn main() -> tantivy::Result<()> {
// Since the body field was not configured as stored, // Since the body field was not configured as stored,
// the document returned will only contain // the document returned will only contain
// a title. // a title.
for (_score, doc_address) in top_docs { for (_score, doc_address) in top_docs {
let retrieved_doc = searcher.doc(doc_address)?; let retrieved_doc = searcher.doc(doc_address)?;
println!("{}", schema.to_json(&retrieved_doc)); println!("{}", schema.to_json(&retrieved_doc));

View File

@@ -17,7 +17,7 @@ use tantivy::collector::{Collector, SegmentCollector};
use tantivy::fastfield::FastFieldReader; use tantivy::fastfield::FastFieldReader;
use tantivy::query::QueryParser; use tantivy::query::QueryParser;
use tantivy::schema::Field; use tantivy::schema::Field;
use tantivy::schema::{Schema, FAST, INDEXED, TEXT}; use tantivy::schema::{Schema, FAST, INT_INDEXED, TEXT};
use tantivy::Index; use tantivy::Index;
use tantivy::SegmentReader; use tantivy::SegmentReader;
@@ -137,7 +137,7 @@ fn main() -> tantivy::Result<()> {
// products, and with a name, a description, and a price. // products, and with a name, a description, and a price.
let product_name = schema_builder.add_text_field("name", TEXT); let product_name = schema_builder.add_text_field("name", TEXT);
let product_description = schema_builder.add_text_field("description", TEXT); let product_description = schema_builder.add_text_field("description", TEXT);
let price = schema_builder.add_u64_field("price", INDEXED | FAST); let price = schema_builder.add_u64_field("price", INT_INDEXED | FAST);
let schema = schema_builder.build(); let schema = schema_builder.build();
// # Indexing documents // # Indexing documents
@@ -170,9 +170,9 @@ fn main() -> tantivy::Result<()> {
price => 5_200u64 price => 5_200u64
)); ));
index_writer.commit()?; index_writer.commit()?;
index.load_searchers()?;
let reader = index.reader()?; let searcher = index.searcher();
let searcher = reader.searcher();
let query_parser = QueryParser::for_index(&index, vec![product_name, product_description]); let query_parser = QueryParser::for_index(&index, vec![product_name, product_description]);
// here we want to get a hit on the 'ken' in Frankenstein // here we want to get a hit on the 'ken' in Frankenstein

View File

@@ -91,9 +91,9 @@ fn main() -> tantivy::Result<()> {
increasing confidence in the success of my undertaking."# increasing confidence in the success of my undertaking."#
)); ));
index_writer.commit()?; index_writer.commit()?;
index.load_searchers()?;
let reader = index.reader()?; let searcher = index.searcher();
let searcher = reader.searcher();
// The query parser can interpret human queries. // The query parser can interpret human queries.
// Here, if the user does not specify which // Here, if the user does not specify which

View File

@@ -14,16 +14,12 @@ use tantivy::collector::TopDocs;
use tantivy::query::TermQuery; use tantivy::query::TermQuery;
use tantivy::schema::*; use tantivy::schema::*;
use tantivy::Index; use tantivy::Index;
use tantivy::IndexReader;
// A simple helper function to fetch a single document // A simple helper function to fetch a single document
// given its id from our index. // given its id from our index.
// It will be helpful to check our work. // It will be helpful to check our work.
fn extract_doc_given_isbn( fn extract_doc_given_isbn(index: &Index, isbn_term: &Term) -> tantivy::Result<Option<Document>> {
reader: &IndexReader, let searcher = index.searcher();
isbn_term: &Term,
) -> tantivy::Result<Option<Document>> {
let searcher = reader.searcher();
// This is the simplest query you can think of. // This is the simplest query you can think of.
// It matches all of the documents containing a specific term. // It matches all of the documents containing a specific term.
@@ -89,12 +85,12 @@ fn main() -> tantivy::Result<()> {
isbn => "978-9176370711", isbn => "978-9176370711",
)); ));
index_writer.commit()?; index_writer.commit()?;
let reader = index.reader()?; index.load_searchers()?;
let frankenstein_isbn = Term::from_field_text(isbn, "978-9176370711"); let frankenstein_isbn = Term::from_field_text(isbn, "978-9176370711");
// Oops our frankenstein doc seems mispelled // Oops our frankenstein doc seems mispelled
let frankenstein_doc_misspelled = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap(); let frankenstein_doc_misspelled = extract_doc_given_isbn(&index, &frankenstein_isbn)?.unwrap();
assert_eq!( assert_eq!(
schema.to_json(&frankenstein_doc_misspelled), schema.to_json(&frankenstein_doc_misspelled),
r#"{"isbn":["978-9176370711"],"title":["Frankentein"]}"#, r#"{"isbn":["978-9176370711"],"title":["Frankentein"]}"#,
@@ -133,10 +129,10 @@ fn main() -> tantivy::Result<()> {
// Everything happened as if the document was updated. // Everything happened as if the document was updated.
index_writer.commit()?; index_writer.commit()?;
// We reload our searcher to make our change available to clients. // We reload our searcher to make our change available to clients.
reader.reload()?; index.load_searchers()?;
// No more typo! // No more typo!
let frankenstein_new_doc = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap(); let frankenstein_new_doc = extract_doc_given_isbn(&index, &frankenstein_isbn)?.unwrap();
assert_eq!( assert_eq!(
schema.to_json(&frankenstein_new_doc), schema.to_json(&frankenstein_new_doc),
r#"{"isbn":["978-9176370711"],"title":["Frankenstein"]}"#, r#"{"isbn":["978-9176370711"],"title":["Frankenstein"]}"#,

View File

@@ -55,9 +55,9 @@ fn main() -> tantivy::Result<()> {
index_writer.commit()?; index_writer.commit()?;
let reader = index.reader()?; index.load_searchers()?;
let searcher = reader.searcher(); let searcher = index.searcher();
let mut facet_collector = FacetCollector::for_field(tags); let mut facet_collector = FacetCollector::for_field(tags);
facet_collector.add_facet("/pools"); facet_collector.add_facet("/pools");

View File

@@ -1,43 +0,0 @@
// # Searching a range on an indexed int field.
//
// Below is an example of creating an indexed integer field in your schema
// You can use RangeQuery to get a Count of all occurrences in a given range.
#[macro_use]
extern crate tantivy;
use tantivy::collector::Count;
use tantivy::query::RangeQuery;
use tantivy::schema::{Schema, INDEXED};
use tantivy::Index;
use tantivy::Result;
fn run() -> Result<()> {
// For the sake of simplicity, this schema will only have 1 field
let mut schema_builder = Schema::builder();
// `INDEXED` is a short-hand to indicate that our field should be "searchable".
let year_field = schema_builder.add_u64_field("year", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let reader = index.reader()?;
{
let mut index_writer = index.writer_with_num_threads(1, 6_000_000)?;
for year in 1950u64..2019u64 {
index_writer.add_document(doc!(year_field => year));
}
index_writer.commit()?;
// The index will be a range of years
}
reader.reload()?;
let searcher = reader.searcher();
// The end is excluded i.e. here we are searching up to 1969
let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960..1970);
// Uses a Count collector to sum the total number of docs in the range
let num_60s_books = searcher.search(&docs_in_the_sixties, &Count)?;
assert_eq!(num_60s_books, 10);
Ok(())
}
fn main() {
run().unwrap()
}

View File

@@ -33,9 +33,9 @@ fn main() -> tantivy::Result<()> {
index_writer.add_document(doc!(title => "The modern Promotheus")); index_writer.add_document(doc!(title => "The modern Promotheus"));
index_writer.commit()?; index_writer.commit()?;
let reader = index.reader()?; index.load_searchers()?;
let searcher = reader.searcher(); let searcher = index.searcher();
// A tantivy index is actually a collection of segments. // A tantivy index is actually a collection of segments.
// Similarly, a searcher just wraps a list `segment_reader`. // Similarly, a searcher just wraps a list `segment_reader`.

View File

@@ -48,8 +48,9 @@ fn main() -> tantivy::Result<()> {
// ... // ...
index_writer.commit()?; index_writer.commit()?;
let reader = index.reader()?; index.load_searchers()?;
let searcher = reader.searcher();
let searcher = index.searcher();
let query_parser = QueryParser::for_index(&index, vec![title, body]); let query_parser = QueryParser::for_index(&index, vec![title, body]);
let query = query_parser.parse_query("sycamore spring")?; let query = query_parser.parse_query("sycamore spring")?;

View File

@@ -96,9 +96,9 @@ fn main() -> tantivy::Result<()> {
index_writer.commit()?; index_writer.commit()?;
let reader = index.reader()?; index.load_searchers()?;
let searcher = reader.searcher(); let searcher = index.searcher();
let query_parser = QueryParser::for_index(&index, vec![title, body]); let query_parser = QueryParser::for_index(&index, vec![title, body]);

View File

@@ -12,7 +12,7 @@ fn main() -> tantivy::Result<()> {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
schema_builder.add_text_field("title", TEXT | STORED); schema_builder.add_text_field("title", TEXT | STORED);
schema_builder.add_text_field("body", TEXT); schema_builder.add_text_field("body", TEXT);
schema_builder.add_u64_field("year", INDEXED); schema_builder.add_u64_field("year", INT_INDEXED);
let schema = schema_builder.build(); let schema = schema_builder.build();
// Let's assume we have a json-serialized document. // Let's assume we have a json-serialized document.

View File

@@ -40,8 +40,8 @@ use SegmentReader;
/// index_writer.commit().unwrap(); /// index_writer.commit().unwrap();
/// } /// }
/// ///
/// let reader = index.reader()?; /// index.load_searchers()?;
/// let searcher = reader.searcher(); /// let searcher = index.searcher();
/// ///
/// { /// {
/// let query_parser = QueryParser::for_index(&index, vec![title]); /// let query_parser = QueryParser::for_index(&index, vec![title]);

View File

@@ -122,16 +122,17 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// facet => Facet::from("/lang/en"), /// facet => Facet::from("/lang/en"),
/// facet => Facet::from("/category/biography") /// facet => Facet::from("/category/biography")
/// )); /// ));
/// index_writer.commit()?; /// index_writer.commit().unwrap();
/// } /// }
/// let reader = index.reader()?; ///
/// let searcher = reader.searcher(); /// index.load_searchers()?;
/// let searcher = index.searcher();
/// ///
/// { /// {
/// let mut facet_collector = FacetCollector::for_field(facet); /// let mut facet_collector = FacetCollector::for_field(facet);
/// facet_collector.add_facet("/lang"); /// facet_collector.add_facet("/lang");
/// facet_collector.add_facet("/category"); /// facet_collector.add_facet("/category");
/// let facet_counts = searcher.search(&AllQuery, &facet_collector)?; /// let facet_counts = searcher.search(&AllQuery, &facet_collector).unwrap();
/// ///
/// // This lists all of the facet counts /// // This lists all of the facet counts
/// let facets: Vec<(&Facet, u64)> = facet_counts /// let facets: Vec<(&Facet, u64)> = facet_counts
@@ -146,7 +147,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// { /// {
/// let mut facet_collector = FacetCollector::for_field(facet); /// let mut facet_collector = FacetCollector::for_field(facet);
/// facet_collector.add_facet("/category/fiction"); /// facet_collector.add_facet("/category/fiction");
/// let facet_counts = searcher.search(&AllQuery, &facet_collector)?; /// let facet_counts = searcher.search(&AllQuery, &facet_collector).unwrap();
/// ///
/// // This lists all of the facet counts /// // This lists all of the facet counts
/// let facets: Vec<(&Facet, u64)> = facet_counts /// let facets: Vec<(&Facet, u64)> = facet_counts
@@ -162,7 +163,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// { /// {
/// let mut facet_collector = FacetCollector::for_field(facet); /// let mut facet_collector = FacetCollector::for_field(facet);
/// facet_collector.add_facet("/category/fiction"); /// facet_collector.add_facet("/category/fiction");
/// let facet_counts = searcher.search(&AllQuery, &facet_collector)?; /// let facet_counts = searcher.search(&AllQuery, &facet_collector).unwrap();
/// ///
/// // This lists all of the facet counts /// // This lists all of the facet counts
/// let facets: Vec<(&Facet, u64)> = facet_counts.top_k("/category/fiction", 1); /// let facets: Vec<(&Facet, u64)> = facet_counts.top_k("/category/fiction", 1);
@@ -482,8 +483,8 @@ mod tests {
index_writer.add_document(doc); index_writer.add_document(doc);
} }
index_writer.commit().unwrap(); index_writer.commit().unwrap();
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let mut facet_collector = FacetCollector::for_field(facet_field); let mut facet_collector = FacetCollector::for_field(facet_field);
facet_collector.add_facet(Facet::from("/top1")); facet_collector.add_facet(Facet::from("/top1"));
let counts = searcher.search(&AllQuery, &facet_collector).unwrap(); let counts = searcher.search(&AllQuery, &facet_collector).unwrap();
@@ -531,8 +532,8 @@ mod tests {
facet_field => Facet::from_text(&"/subjects/B/b"), facet_field => Facet::from_text(&"/subjects/B/b"),
)); ));
index_writer.commit().unwrap(); index_writer.commit().unwrap();
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
assert_eq!(searcher.num_docs(), 1); assert_eq!(searcher.num_docs(), 1);
let mut facet_collector = FacetCollector::for_field(facet_field); let mut facet_collector = FacetCollector::for_field(facet_field);
facet_collector.add_facet("/subjects"); facet_collector.add_facet("/subjects");
@@ -578,7 +579,9 @@ mod tests {
index_writer.add_document(doc); index_writer.add_document(doc);
} }
index_writer.commit().unwrap(); index_writer.commit().unwrap();
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let mut facet_collector = FacetCollector::for_field(facet_field); let mut facet_collector = FacetCollector::for_field(facet_field);
facet_collector.add_facet("/facet"); facet_collector.add_facet("/facet");
@@ -632,7 +635,8 @@ mod bench {
index_writer.add_document(doc); index_writer.add_document(doc);
} }
index_writer.commit().unwrap(); index_writer.commit().unwrap();
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
b.iter(|| { b.iter(|| {
let searcher = index.searcher(); let searcher = index.searcher();
let facet_collector = FacetCollector::for_field(facet_field); let facet_collector = FacetCollector::for_field(facet_field);

View File

@@ -101,7 +101,8 @@ mod tests {
assert_eq!(index_writer.commit().unwrap(), 10u64); assert_eq!(index_writer.commit().unwrap(), 10u64);
} }
let searcher = index.reader().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let mut ffvf_i64: IntFacetCollector<I64FastFieldReader> = IntFacetCollector::new(num_field_i64); let mut ffvf_i64: IntFacetCollector<I64FastFieldReader> = IntFacetCollector::new(num_field_i64);
let mut ffvf_u64: IntFacetCollector<U64FastFieldReader> = IntFacetCollector::new(num_field_u64); let mut ffvf_u64: IntFacetCollector<U64FastFieldReader> = IntFacetCollector::new(num_field_u64);

View File

@@ -53,9 +53,9 @@ use tantivy::collector::{Count, TopDocs};
# index_writer.add_document(doc!( # index_writer.add_document(doc!(
# title => "The Diary of Muadib", # title => "The Diary of Muadib",
# )); # ));
# index_writer.commit()?; # index_writer.commit().unwrap();
# let reader = index.reader()?; # index.load_searchers()?;
# let searcher = reader.searcher(); # let searcher = index.searcher();
# let query_parser = QueryParser::for_index(&index, vec![title]); # let query_parser = QueryParser::for_index(&index, vec![title]);
# let query = query_parser.parse_query("diary")?; # let query = query_parser.parse_query("diary")?;
let (doc_count, top_docs): (usize, Vec<(Score, DocAddress)>) = let (doc_count, top_docs): (usize, Vec<(Score, DocAddress)>) =

View File

@@ -36,8 +36,7 @@ impl<TCollector: Collector> Collector for CollectorWrapper<TCollector> {
let typed_fruit: Vec<TCollector::Fruit> = children let typed_fruit: Vec<TCollector::Fruit> = children
.into_iter() .into_iter()
.map(|untyped_fruit| { .map(|untyped_fruit| {
untyped_fruit untyped_fruit.downcast::<TCollector::Fruit>()
.downcast::<TCollector::Fruit>()
.map(|boxed_but_typed| *boxed_but_typed) .map(|boxed_but_typed| *boxed_but_typed)
.map_err(|_| { .map_err(|_| {
TantivyError::InvalidArgument("Failed to cast child fruit.".to_string()) TantivyError::InvalidArgument("Failed to cast child fruit.".to_string())
@@ -88,10 +87,7 @@ pub struct FruitHandle<TFruit: Fruit> {
impl<TFruit: Fruit> FruitHandle<TFruit> { impl<TFruit: Fruit> FruitHandle<TFruit> {
pub fn extract(self, fruits: &mut MultiFruit) -> TFruit { pub fn extract(self, fruits: &mut MultiFruit) -> TFruit {
let boxed_fruit = fruits.sub_fruits[self.pos].take().expect(""); let boxed_fruit = fruits.sub_fruits[self.pos].take().expect("");
*boxed_fruit *boxed_fruit.downcast::<TFruit>().map_err(|_| ()).expect("Failed to downcast collector fruit.")
.downcast::<TFruit>()
.map_err(|_| ())
.expect("Failed to downcast collector fruit.")
} }
} }
@@ -134,8 +130,8 @@ impl<TFruit: Fruit> FruitHandle<TFruit> {
/// index_writer.commit().unwrap(); /// index_writer.commit().unwrap();
/// } /// }
/// ///
/// let reader = index.reader()?; /// index.load_searchers()?;
/// let searcher = reader.searcher(); /// let searcher = index.searcher();
/// ///
/// let mut collectors = MultiCollector::new(); /// let mut collectors = MultiCollector::new();
/// let top_docs_handle = collectors.add_collector(TopDocs::with_limit(2)); /// let top_docs_handle = collectors.add_collector(TopDocs::with_limit(2));
@@ -278,7 +274,8 @@ mod tests {
index_writer.add_document(doc!(text=>"abc")); index_writer.add_document(doc!(text=>"abc"));
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let term = Term::from_field_text(text, "abc"); let term = Term::from_field_text(text, "abc");
let query = TermQuery::new(term, IndexRecordOption::Basic); let query = TermQuery::new(term, IndexRecordOption::Basic);

View File

@@ -23,16 +23,15 @@ use SegmentReader;
/// # use tantivy::schema::{Schema, Field, FAST, TEXT}; /// # use tantivy::schema::{Schema, Field, FAST, TEXT};
/// # use tantivy::{Index, Result, DocAddress}; /// # use tantivy::{Index, Result, DocAddress};
/// # use tantivy::query::{Query, QueryParser}; /// # use tantivy::query::{Query, QueryParser};
/// use tantivy::Searcher;
/// use tantivy::collector::TopDocs; /// use tantivy::collector::TopDocs;
/// ///
/// # fn main() -> tantivy::Result<()> { /// # fn main() {
/// # let mut schema_builder = Schema::builder(); /// # let mut schema_builder = Schema::builder();
/// # let title = schema_builder.add_text_field("title", TEXT); /// # let title = schema_builder.add_text_field("title", TEXT);
/// # let rating = schema_builder.add_u64_field("rating", FAST); /// # let rating = schema_builder.add_u64_field("rating", FAST);
/// # let schema = schema_builder.build(); /// # let schema = schema_builder.build();
/// # let index = Index::create_in_ram(schema); /// # let index = Index::create_in_ram(schema);
/// # let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?; /// # let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
/// # index_writer.add_document(doc!( /// # index_writer.add_document(doc!(
/// # title => "The Name of the Wind", /// # title => "The Name of the Wind",
/// # rating => 92u64, /// # rating => 92u64,
@@ -40,14 +39,13 @@ use SegmentReader;
/// # index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64)); /// # index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64));
/// # index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64)); /// # index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64));
/// # index_writer.add_document(doc!(title => "The Diary of a Young Girl", rating => 80u64)); /// # index_writer.add_document(doc!(title => "The Diary of a Young Girl", rating => 80u64));
/// # index_writer.commit()?; /// # index_writer.commit().unwrap();
/// # let reader = index.reader()?; /// # index.load_searchers().unwrap();
/// # let query = QueryParser::for_index(&index, vec![title]).parse_query("diary")?; /// # let query = QueryParser::for_index(&index, vec![title]).parse_query("diary").unwrap();
/// # let top_docs = docs_sorted_by_rating(&reader.searcher(), &query, rating)?; /// # let top_docs = docs_sorted_by_rating(&index, &query, rating).unwrap();
/// # assert_eq!(top_docs, /// # assert_eq!(top_docs,
/// # vec![(97u64, DocAddress(0u32, 1)), /// # vec![(97u64, DocAddress(0u32, 1)),
/// # (80u64, DocAddress(0u32, 3))]); /// # (80u64, DocAddress(0u32, 3))]);
/// # Ok(())
/// # } /// # }
/// # /// #
/// /// Searches the document matching the given query, and /// /// Searches the document matching the given query, and
@@ -55,9 +53,7 @@ use SegmentReader;
/// /// given in argument. /// /// given in argument.
/// /// /// ///
/// /// `field` is required to be a FAST field. /// /// `field` is required to be a FAST field.
/// fn docs_sorted_by_rating(searcher: &Searcher, /// fn docs_sorted_by_rating(index: &Index, query: &Query, sort_by_field: Field)
/// query: &Query,
/// sort_by_field: Field)
/// -> Result<Vec<(u64, DocAddress)>> { /// -> Result<Vec<(u64, DocAddress)>> {
/// ///
/// // This is where we build our collector! /// // This is where we build our collector!
@@ -65,7 +61,8 @@ use SegmentReader;
/// ///
/// // ... and here is our documents. Not this is a simple vec. /// // ... and here is our documents. Not this is a simple vec.
/// // The `u64` in the pair is the value of our fast field for each documents. /// // The `u64` in the pair is the value of our fast field for each documents.
/// searcher.search(query, &top_docs_by_rating) /// index.searcher()
/// .search(query, &top_docs_by_rating)
/// } /// }
/// ``` /// ```
pub struct TopDocsByField<T> { pub struct TopDocsByField<T> {
@@ -79,12 +76,6 @@ impl<T: FastValue + PartialOrd + Clone> TopDocsByField<T> {
/// The given field name must be a fast field, otherwise the collector have an error while /// The given field name must be a fast field, otherwise the collector have an error while
/// collecting results. /// collecting results.
/// ///
/// This constructor is crate-private. Client are supposed to call
/// build `TopDocsByField` object using the `TopDocs` API.
///
/// e.g.:
/// `TopDocs::with_limit(2).order_by_field(sort_by_field)`
///
/// # Panics /// # Panics
/// The method panics if limit is 0 /// The method panics if limit is 0
pub(crate) fn new(field: Field, limit: usize) -> TopDocsByField<T> { pub(crate) fn new(field: Field, limit: usize) -> TopDocsByField<T> {
@@ -180,7 +171,7 @@ mod tests {
size => 16u64, size => 16u64,
)); ));
}); });
let searcher = index.reader().unwrap().searcher(); let searcher = index.searcher();
let top_collector = TopDocs::with_limit(4).order_by_field(size); let top_collector = TopDocs::with_limit(4).order_by_field(size);
let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector).unwrap(); let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector).unwrap();
@@ -207,7 +198,7 @@ mod tests {
size => 12u64, size => 12u64,
)); ));
}); });
let searcher = index.reader().unwrap().searcher(); let searcher = index.searcher();
let top_collector: TopDocsByField<u64> = TopDocs::with_limit(4).order_by_field(Field(2)); let top_collector: TopDocsByField<u64> = TopDocs::with_limit(4).order_by_field(Field(2));
let segment_reader = searcher.segment_reader(0u32); let segment_reader = searcher.segment_reader(0u32);
top_collector top_collector
@@ -227,7 +218,7 @@ mod tests {
size => 12u64, size => 12u64,
)); ));
}); });
let searcher = index.reader().unwrap().searcher(); let searcher = index.searcher();
let segment = searcher.segment_reader(0); let segment = searcher.segment_reader(0);
let top_collector: TopDocsByField<u64> = TopDocs::with_limit(4).order_by_field(size); let top_collector: TopDocsByField<u64> = TopDocs::with_limit(4).order_by_field(size);
assert_matches!( assert_matches!(
@@ -250,6 +241,8 @@ mod tests {
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
doc_adder(&mut index_writer); doc_adder(&mut index_writer);
index_writer.commit().unwrap(); index_writer.commit().unwrap();
index.load_searchers().unwrap();
let query_parser = QueryParser::for_index(&index, vec![query_field]); let query_parser = QueryParser::for_index(&index, vec![query_field]);
let query = query_parser.parse_query(query).unwrap(); let query = query_parser.parse_query(query).unwrap();
(index, query) (index, query)

View File

@@ -51,8 +51,8 @@ use SegmentReader;
/// index_writer.commit().unwrap(); /// index_writer.commit().unwrap();
/// } /// }
/// ///
/// let reader = index.reader()?; /// index.load_searchers()?;
/// let searcher = reader.searcher(); /// let searcher = index.searcher();
/// ///
/// let query_parser = QueryParser::for_index(&index, vec![title]); /// let query_parser = QueryParser::for_index(&index, vec![title]);
/// let query = query_parser.parse_query("diary")?; /// let query = query_parser.parse_query("diary")?;
@@ -148,6 +148,7 @@ mod tests {
index_writer.add_document(doc!(text_field=>"I like Droopy")); index_writer.add_document(doc!(text_field=>"I like Droopy"));
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
index.load_searchers().unwrap();
index index
} }
@@ -158,8 +159,6 @@ mod tests {
let query_parser = QueryParser::for_index(&index, vec![field]); let query_parser = QueryParser::for_index(&index, vec![field]);
let text_query = query_parser.parse_query("droopy tax").unwrap(); let text_query = query_parser.parse_query("droopy tax").unwrap();
let score_docs: Vec<(Score, DocAddress)> = index let score_docs: Vec<(Score, DocAddress)> = index
.reader()
.unwrap()
.searcher() .searcher()
.search(&text_query, &TopDocs::with_limit(4)) .search(&text_query, &TopDocs::with_limit(4))
.unwrap(); .unwrap();
@@ -180,8 +179,6 @@ mod tests {
let query_parser = QueryParser::for_index(&index, vec![field]); let query_parser = QueryParser::for_index(&index, vec![field]);
let text_query = query_parser.parse_query("droopy tax").unwrap(); let text_query = query_parser.parse_query("droopy tax").unwrap();
let score_docs: Vec<(Score, DocAddress)> = index let score_docs: Vec<(Score, DocAddress)> = index
.reader()
.unwrap()
.searcher() .searcher()
.search(&text_query, &TopDocs::with_limit(2)) .search(&text_query, &TopDocs::with_limit(2))
.unwrap(); .unwrap();

View File

@@ -64,7 +64,7 @@ pub struct BitUnpacker<Data>
where where
Data: Deref<Target = [u8]>, Data: Deref<Target = [u8]>,
{ {
num_bits: u64, num_bits: usize,
mask: u64, mask: u64,
data: Data, data: Data,
} }
@@ -80,13 +80,13 @@ where
(1u64 << num_bits) - 1u64 (1u64 << num_bits) - 1u64
}; };
BitUnpacker { BitUnpacker {
num_bits: u64::from(num_bits), num_bits: num_bits as usize,
mask, mask,
data, data,
} }
} }
pub fn get(&self, idx: u64) -> u64 { pub fn get(&self, idx: usize) -> u64 {
if self.num_bits == 0 { if self.num_bits == 0 {
return 0u64; return 0u64;
} }
@@ -97,10 +97,10 @@ where
let addr = addr_in_bits >> 3; let addr = addr_in_bits >> 3;
let bit_shift = addr_in_bits & 7; let bit_shift = addr_in_bits & 7;
debug_assert!( debug_assert!(
addr + 8 <= data.len() as u64, addr + 8 <= data.len(),
"The fast field field should have been padded with 7 bytes." "The fast field field should have been padded with 7 bytes."
); );
let val_unshifted_unmasked: u64 = LittleEndian::read_u64(&data[(addr as usize)..]); let val_unshifted_unmasked: u64 = LittleEndian::read_u64(&data[addr..]);
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64; let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
val_shifted & mask val_shifted & mask
} }
@@ -129,7 +129,7 @@ mod test {
fn test_bitpacker_util(len: usize, num_bits: u8) { fn test_bitpacker_util(len: usize, num_bits: u8) {
let (bitunpacker, vals) = create_fastfield_bitpacker(len, num_bits); let (bitunpacker, vals) = create_fastfield_bitpacker(len, num_bits);
for (i, val) in vals.iter().enumerate() { for (i, val) in vals.iter().enumerate() {
assert_eq!(bitunpacker.get(i as u64), *val); assert_eq!(bitunpacker.get(i), *val);
} }
} }

View File

@@ -39,7 +39,7 @@ impl BinarySerializable for FileAddr {
/// A `CompositeWrite` is used to write a `CompositeFile`. /// A `CompositeWrite` is used to write a `CompositeFile`.
pub struct CompositeWrite<W = WritePtr> { pub struct CompositeWrite<W = WritePtr> {
write: CountingWriter<W>, write: CountingWriter<W>,
offsets: HashMap<FileAddr, u64>, offsets: HashMap<FileAddr, usize>,
} }
impl<W: Write> CompositeWrite<W> { impl<W: Write> CompositeWrite<W> {

View File

@@ -3,7 +3,7 @@ use std::io::Write;
pub struct CountingWriter<W> { pub struct CountingWriter<W> {
underlying: W, underlying: W,
written_bytes: u64, written_bytes: usize,
} }
impl<W: Write> CountingWriter<W> { impl<W: Write> CountingWriter<W> {
@@ -14,11 +14,11 @@ impl<W: Write> CountingWriter<W> {
} }
} }
pub fn written_bytes(&self) -> u64 { pub fn written_bytes(&self) -> usize {
self.written_bytes self.written_bytes
} }
pub fn finish(mut self) -> io::Result<(W, u64)> { pub fn finish(mut self) -> io::Result<(W, usize)> {
self.flush()?; self.flush()?;
Ok((self.underlying, self.written_bytes)) Ok((self.underlying, self.written_bytes))
} }
@@ -27,16 +27,10 @@ impl<W: Write> CountingWriter<W> {
impl<W: Write> Write for CountingWriter<W> { impl<W: Write> Write for CountingWriter<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> { fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let written_size = self.underlying.write(buf)?; let written_size = self.underlying.write(buf)?;
self.written_bytes += written_size as u64; self.written_bytes += written_size;
Ok(written_size) Ok(written_size)
} }
fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
self.underlying.write_all(buf)?;
self.written_bytes += buf.len() as u64;
Ok(())
}
fn flush(&mut self) -> io::Result<()> { fn flush(&mut self) -> io::Result<()> {
self.underlying.flush() self.underlying.flush()
} }
@@ -54,8 +48,8 @@ mod test {
let mut counting_writer = CountingWriter::wrap(buffer); let mut counting_writer = CountingWriter::wrap(buffer);
let bytes = (0u8..10u8).collect::<Vec<u8>>(); let bytes = (0u8..10u8).collect::<Vec<u8>>();
counting_writer.write_all(&bytes).unwrap(); counting_writer.write_all(&bytes).unwrap();
let (w, len): (Vec<u8>, u64) = counting_writer.finish().unwrap(); let (w, len): (Vec<u8>, usize) = counting_writer.finish().unwrap();
assert_eq!(len, 10u64); assert_eq!(len, 10);
assert_eq!(w.len(), 10); assert_eq!(w.len(), 10);
} }
} }

View File

@@ -123,14 +123,15 @@ mod tests {
} }
} }
#[test] }
fn test_map_multithread() {
let result: Vec<usize> = Executor::multi_thread(3, "search-test") #[test]
.map(|i| Ok(i * 2), 0..10) fn test_map_multithread() {
.unwrap(); let result: Vec<usize> = Executor::multi_thread(3, "search-test")
assert_eq!(result.len(), 10); .map(|i| Ok(i * 2), 0..10)
for i in 0..10 { .unwrap();
assert_eq!(result[i], i * 2); assert_eq!(result.len(), 10);
} for i in 0..10 {
assert_eq!(result[i], i * 2);
} }
} }

View File

@@ -1,14 +1,19 @@
use super::pool::LeasedItem;
use super::pool::Pool;
use super::segment::create_segment; use super::segment::create_segment;
use super::segment::Segment; use super::segment::Segment;
use core::searcher::Searcher;
use core::Executor; use core::Executor;
use core::IndexMeta; use core::IndexMeta;
use core::SegmentId; use core::SegmentId;
use core::SegmentMeta; use core::SegmentMeta;
use core::SegmentReader;
use core::META_FILEPATH; use core::META_FILEPATH;
use directory::ManagedDirectory; use directory::ManagedDirectory;
#[cfg(feature = "mmap")] #[cfg(feature = "mmap")]
use directory::MmapDirectory; use directory::MmapDirectory;
use directory::INDEX_WRITER_LOCK; use directory::INDEX_WRITER_LOCK;
use directory::META_LOCK;
use directory::{Directory, RAMDirectory}; use directory::{Directory, RAMDirectory};
use error::DataCorruption; use error::DataCorruption;
use error::TantivyError; use error::TantivyError;
@@ -16,8 +21,6 @@ use indexer::index_writer::open_index_writer;
use indexer::index_writer::HEAP_SIZE_MIN; use indexer::index_writer::HEAP_SIZE_MIN;
use indexer::segment_updater::save_new_metas; use indexer::segment_updater::save_new_metas;
use num_cpus; use num_cpus;
use reader::IndexReader;
use reader::IndexReaderBuilder;
use schema::Field; use schema::Field;
use schema::FieldType; use schema::FieldType;
use schema::Schema; use schema::Schema;
@@ -25,6 +28,7 @@ use serde_json;
use std::borrow::BorrowMut; use std::borrow::BorrowMut;
use std::fmt; use std::fmt;
use std::path::Path; use std::path::Path;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc; use std::sync::Arc;
use tokenizer::BoxedTokenizer; use tokenizer::BoxedTokenizer;
use tokenizer::TokenizerManager; use tokenizer::TokenizerManager;
@@ -45,10 +49,11 @@ fn load_metas(directory: &Directory) -> Result<IndexMeta> {
} }
/// Search Index /// Search Index
#[derive(Clone)]
pub struct Index { pub struct Index {
directory: ManagedDirectory, directory: ManagedDirectory,
schema: Schema, schema: Schema,
num_searchers: Arc<AtomicUsize>,
searcher_pool: Arc<Pool<Searcher>>,
executor: Arc<Executor>, executor: Arc<Executor>,
tokenizers: TokenizerManager, tokenizers: TokenizerManager,
} }
@@ -106,6 +111,7 @@ impl Index {
} }
/// Opens or creates a new index in the provided directory /// Opens or creates a new index in the provided directory
#[cfg(feature = "mmap")]
pub fn open_or_create<Dir: Directory>(dir: Dir, schema: Schema) -> Result<Index> { pub fn open_or_create<Dir: Directory>(dir: Dir, schema: Schema) -> Result<Index> {
if Index::exists(&dir) { if Index::exists(&dir) {
let index = Index::open(dir)?; let index = Index::open(dir)?;
@@ -153,12 +159,16 @@ impl Index {
/// Creates a new index given a directory and an `IndexMeta`. /// Creates a new index given a directory and an `IndexMeta`.
fn create_from_metas(directory: ManagedDirectory, metas: &IndexMeta) -> Result<Index> { fn create_from_metas(directory: ManagedDirectory, metas: &IndexMeta) -> Result<Index> {
let schema = metas.schema.clone(); let schema = metas.schema.clone();
let n_cpus = num_cpus::get();
let index = Index { let index = Index {
directory, directory,
schema, schema,
num_searchers: Arc::new(AtomicUsize::new(n_cpus)),
searcher_pool: Arc::new(Pool::new()),
tokenizers: TokenizerManager::default(), tokenizers: TokenizerManager::default(),
executor: Arc::new(Executor::single_thread()), executor: Arc::new(Executor::single_thread()),
}; };
index.load_searchers()?;
Ok(index) Ok(index)
} }
@@ -188,22 +198,6 @@ impl Index {
} }
} }
/// Create a default `IndexReader` for the given index.
///
/// See [`Index.reader_builder()`](#method.reader_builder).
pub fn reader(&self) -> Result<IndexReader> {
self.reader_builder().try_into()
}
/// Create a `IndexReader` for the given index.
///
/// Most project should create at most one reader for a given index.
/// This method is typically called only once per `Index` instance,
/// over the lifetime of most problem.
pub fn reader_builder(&self) -> IndexReaderBuilder {
IndexReaderBuilder::new(self.clone())
}
/// Opens a new directory from an index path. /// Opens a new directory from an index path.
#[cfg(feature = "mmap")] #[cfg(feature = "mmap")]
pub fn open_in_dir<P: AsRef<Path>>(directory_path: P) -> Result<Index> { pub fn open_in_dir<P: AsRef<Path>>(directory_path: P) -> Result<Index> {
@@ -342,6 +336,53 @@ impl Index {
.map(|segment_meta| segment_meta.id()) .map(|segment_meta| segment_meta.id())
.collect()) .collect())
} }
/// Sets the number of searchers to use
///
/// Only works after the next call to `load_searchers`
pub fn set_num_searchers(&mut self, num_searchers: usize) {
self.num_searchers.store(num_searchers, Ordering::Release);
}
/// Update searchers so that they reflect the state of the last
/// `.commit()`.
///
/// If indexing happens in the same process as searching,
/// you most likely want to call `.load_searchers()` right after each
/// successful call to `.commit()`.
///
/// If indexing and searching happen in different processes, the way to
/// get the freshest `index` at all time, is to watch `meta.json` and
/// call `load_searchers` whenever a changes happen.
pub fn load_searchers(&self) -> Result<()> {
let _meta_lock = self.directory().acquire_lock(&META_LOCK)?;
let searchable_segments = self.searchable_segments()?;
let segment_readers: Vec<SegmentReader> = searchable_segments
.iter()
.map(SegmentReader::open)
.collect::<Result<_>>()?;
let schema = self.schema();
let num_searchers: usize = self.num_searchers.load(Ordering::Acquire);
let searchers = (0..num_searchers)
.map(|_| Searcher::new(schema.clone(), self.clone(), segment_readers.clone()))
.collect();
self.searcher_pool.publish_new_generation(searchers);
Ok(())
}
/// Returns a searcher
///
/// This method should be called every single time a search
/// query is performed.
/// The searchers are taken from a pool of `num_searchers` searchers.
/// If no searcher is available
/// this may block.
///
/// The same searcher must be used for a given query, as it ensures
/// the use of a consistent segment set.
pub fn searcher(&self) -> LeasedItem<Searcher> {
self.searcher_pool.acquire()
}
} }
impl fmt::Debug for Index { impl fmt::Debug for Index {
@@ -350,24 +391,29 @@ impl fmt::Debug for Index {
} }
} }
impl Clone for Index {
fn clone(&self) -> Index {
Index {
directory: self.directory.clone(),
schema: self.schema.clone(),
num_searchers: Arc::clone(&self.num_searchers),
searcher_pool: Arc::clone(&self.searcher_pool),
tokenizers: self.tokenizers.clone(),
executor: self.executor.clone(),
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use directory::RAMDirectory; use directory::RAMDirectory;
use schema::Field; use schema::{Schema, INT_INDEXED, TEXT};
use schema::{Schema, INDEXED, TEXT};
use std::path::PathBuf;
use std::thread;
use std::time::Duration;
use tempdir::TempDir;
use Index; use Index;
use IndexReader;
use IndexWriter;
use ReloadPolicy;
#[test] #[test]
fn test_indexer_for_field() { fn test_indexer_for_field() {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let num_likes_field = schema_builder.add_u64_field("num_likes", INDEXED); let num_likes_field = schema_builder.add_u64_field("num_likes", INT_INDEXED);
let body_field = schema_builder.add_text_field("body", TEXT); let body_field = schema_builder.add_text_field("body", TEXT);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
@@ -425,109 +471,7 @@ mod tests {
fn throw_away_schema() -> Schema { fn throw_away_schema() -> Schema {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let _ = schema_builder.add_u64_field("num_likes", INDEXED); let _ = schema_builder.add_u64_field("num_likes", INT_INDEXED);
schema_builder.build() schema_builder.build()
} }
#[test]
fn test_index_on_commit_reload_policy() {
let schema = throw_away_schema();
let field = schema.get_field("num_likes").unwrap();
let index = Index::create_in_ram(schema);
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::OnCommit)
.try_into()
.unwrap();
assert_eq!(reader.searcher().num_docs(), 0);
let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
test_index_on_commit_reload_policy_aux(field, &mut writer, &reader);
}
#[test]
fn test_index_on_commit_reload_policy_mmap() {
let schema = throw_away_schema();
let field = schema.get_field("num_likes").unwrap();
let tempdir = TempDir::new("index").unwrap();
let tempdir_path = PathBuf::from(tempdir.path());
let index = Index::create_in_dir(&tempdir_path, schema).unwrap();
let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
writer.commit().unwrap();
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::OnCommit)
.try_into()
.unwrap();
assert_eq!(reader.searcher().num_docs(), 0);
test_index_on_commit_reload_policy_aux(field, &mut writer, &reader);
}
#[test]
fn test_index_manual_policy_mmap() {
let schema = throw_away_schema();
let field = schema.get_field("num_likes").unwrap();
let index = Index::create_from_tempdir(schema).unwrap();
let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
writer.commit().unwrap();
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap();
assert_eq!(reader.searcher().num_docs(), 0);
writer.add_document(doc!(field=>1u64));
writer.commit().unwrap();
thread::sleep(Duration::from_millis(500));
assert_eq!(reader.searcher().num_docs(), 0);
reader.reload().unwrap();
assert_eq!(reader.searcher().num_docs(), 1);
}
#[test]
fn test_index_on_commit_reload_policy_different_directories() {
let schema = throw_away_schema();
let field = schema.get_field("num_likes").unwrap();
let tempdir = TempDir::new("index").unwrap();
let tempdir_path = PathBuf::from(tempdir.path());
let write_index = Index::create_in_dir(&tempdir_path, schema).unwrap();
let read_index = Index::open_in_dir(&tempdir_path).unwrap();
let reader = read_index
.reader_builder()
.reload_policy(ReloadPolicy::OnCommit)
.try_into()
.unwrap();
assert_eq!(reader.searcher().num_docs(), 0);
let mut writer = write_index.writer_with_num_threads(1, 3_000_000).unwrap();
test_index_on_commit_reload_policy_aux(field, &mut writer, &reader);
}
fn test_index_on_commit_reload_policy_aux(
field: Field,
writer: &mut IndexWriter,
reader: &IndexReader,
) {
assert_eq!(reader.searcher().num_docs(), 0);
writer.add_document(doc!(field=>1u64));
writer.commit().unwrap();
let mut count = 0;
for _ in 0..100 {
count = reader.searcher().num_docs();
if count > 0 {
break;
}
thread::sleep(Duration::from_millis(100));
}
assert_eq!(count, 1);
writer.add_document(doc!(field=>2u64));
writer.commit().unwrap();
let mut count = 0;
for _ in 0..10 {
count = reader.searcher().num_docs();
if count > 1 {
break;
}
thread::sleep(Duration::from_millis(100));
}
assert_eq!(count, 2);
}
} }

View File

@@ -2,6 +2,7 @@ mod executor;
pub mod index; pub mod index;
mod index_meta; mod index_meta;
mod inverted_index_reader; mod inverted_index_reader;
mod pool;
pub mod searcher; pub mod searcher;
mod segment; mod segment;
mod segment_component; mod segment_component;
@@ -24,7 +25,6 @@ pub use self::segment_reader::SegmentReader;
use std::path::PathBuf; use std::path::PathBuf;
lazy_static! { lazy_static! {
/// The meta file contains all the information about the list of segments and the schema /// The meta file contains all the information about the list of segments and the schema
/// of the index. /// of the index.
pub static ref META_FILEPATH: PathBuf = PathBuf::from("meta.json"); pub static ref META_FILEPATH: PathBuf = PathBuf::from("meta.json");

View File

@@ -1,5 +1,5 @@
use crossbeam::crossbeam_channel::unbounded; use crossbeam::queue::MsQueue;
use crossbeam::{Receiver, RecvError, Sender}; use std::mem;
use std::ops::{Deref, DerefMut}; use std::ops::{Deref, DerefMut};
use std::sync::atomic::AtomicUsize; use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
@@ -10,52 +10,15 @@ pub struct GenerationItem<T> {
item: T, item: T,
} }
/// Queue implementation for the Object Pool below
/// Uses the unbounded Linked-List type queue from crossbeam-channel
/// Splits the Queue into sender and receiver
struct Queue<T> {
sender: Sender<T>,
receiver: Receiver<T>,
}
impl<T> Queue<T> {
fn new() -> Self {
let (s, r) = unbounded();
Queue {
sender: s,
receiver: r,
}
}
/// Sender trait returns a Result type, which is ignored.
/// The Result is not handled at the moment
fn push(&self, elem: T) {
self.sender
.send(elem)
.expect("Sending an item to crossbeam-queue shouldn't fail");
}
/// Relies on the underlying crossbeam-channel Receiver
/// to block on empty queue
fn pop(&self) -> Result<T, RecvError> {
self.receiver.recv()
}
}
/// An object pool
///
/// This is used in tantivy to create a pool of `Searcher`.
/// Object are wrapped in a `LeasedItem` wrapper and are
/// released automatically back into the pool on `Drop`.
pub struct Pool<T> { pub struct Pool<T> {
queue: Arc<Queue<GenerationItem<T>>>, queue: Arc<MsQueue<GenerationItem<T>>>,
freshest_generation: AtomicUsize, freshest_generation: AtomicUsize,
next_generation: AtomicUsize, next_generation: AtomicUsize,
} }
impl<T> Pool<T> { impl<T> Pool<T> {
pub fn new() -> Pool<T> { pub fn new() -> Pool<T> {
let queue = Arc::new(Queue::new()); let queue = Arc::new(MsQueue::new());
Pool { Pool {
queue, queue,
freshest_generation: AtomicUsize::default(), freshest_generation: AtomicUsize::default(),
@@ -63,10 +26,6 @@ impl<T> Pool<T> {
} }
} }
/// Publishes a new generation of `Searcher`.
///
/// After publish, all new `Searcher` acquired will be
/// of the new generation.
pub fn publish_new_generation(&self, items: Vec<T>) { pub fn publish_new_generation(&self, items: Vec<T>) {
let next_generation = self.next_generation.fetch_add(1, Ordering::SeqCst) + 1; let next_generation = self.next_generation.fetch_add(1, Ordering::SeqCst) + 1;
for item in items { for item in items {
@@ -102,14 +61,10 @@ impl<T> Pool<T> {
self.freshest_generation.load(Ordering::Acquire) self.freshest_generation.load(Ordering::Acquire)
} }
/// Acquires a new searcher.
///
/// If no searcher is available, this methods block until
/// a searcher is released.
pub fn acquire(&self) -> LeasedItem<T> { pub fn acquire(&self) -> LeasedItem<T> {
let generation = self.generation(); let generation = self.generation();
loop { loop {
let gen_item = self.queue.pop().unwrap(); let gen_item = self.queue.pop();
if gen_item.generation >= generation { if gen_item.generation >= generation {
return LeasedItem { return LeasedItem {
gen_item: Some(gen_item), gen_item: Some(gen_item),
@@ -125,7 +80,7 @@ impl<T> Pool<T> {
pub struct LeasedItem<T> { pub struct LeasedItem<T> {
gen_item: Option<GenerationItem<T>>, gen_item: Option<GenerationItem<T>>,
recycle_queue: Arc<Queue<GenerationItem<T>>>, recycle_queue: Arc<MsQueue<GenerationItem<T>>>,
} }
impl<T> Deref for LeasedItem<T> { impl<T> Deref for LeasedItem<T> {
@@ -152,9 +107,9 @@ impl<T> DerefMut for LeasedItem<T> {
impl<T> Drop for LeasedItem<T> { impl<T> Drop for LeasedItem<T> {
fn drop(&mut self) { fn drop(&mut self) {
if let Some(gen_item) = self.gen_item.take() { let gen_item: GenerationItem<T> = mem::replace(&mut self.gen_item, None)
self.recycle_queue.push(gen_item); .expect("Unwrapping a leased item should never fail");
} self.recycle_queue.push(gen_item);
} }
} }
@@ -162,7 +117,6 @@ impl<T> Drop for LeasedItem<T> {
mod tests { mod tests {
use super::Pool; use super::Pool;
use super::Queue;
use std::iter; use std::iter;
#[test] #[test]
@@ -179,47 +133,4 @@ mod tests {
assert_eq!(*pool.acquire(), 11); assert_eq!(*pool.acquire(), 11);
} }
} }
#[test]
fn test_queue() {
let q = Queue::new();
let elem = 5;
q.push(elem);
let res = q.pop();
assert_eq!(res.unwrap(), elem);
}
#[test]
fn test_pool_dont_panic_on_empty_pop() {
// When the object pool is exhausted, it shouldn't panic on pop()
use std::sync::Arc;
use std::{thread, time};
// Wrap the pool in an Arc, same way as its used in `core/index.rs`
let pool = Arc::new(Pool::new());
// clone pools outside the move scope of each new thread
let pool1 = Arc::clone(&pool);
let pool2 = Arc::clone(&pool);
let elements_for_pool = vec![1, 2];
pool.publish_new_generation(elements_for_pool);
let mut threads = vec![];
let sleep_dur = time::Duration::from_millis(10);
// spawn one more thread than there are elements in the pool
threads.push(thread::spawn(move || {
// leasing to make sure it's not dropped before sleep is called
let _leased_searcher = &pool.acquire();
thread::sleep(sleep_dur);
}));
threads.push(thread::spawn(move || {
// leasing to make sure it's not dropped before sleep is called
let _leased_searcher = &pool1.acquire();
thread::sleep(sleep_dur);
}));
threads.push(thread::spawn(move || {
// leasing to make sure it's not dropped before sleep is called
let _leased_searcher = &pool2.acquire();
thread::sleep(sleep_dur);
}));
}
} }

View File

@@ -19,7 +19,7 @@ pub struct SegmentId(Uuid);
#[cfg(test)] #[cfg(test)]
lazy_static! { lazy_static! {
static ref AUTO_INC_COUNTER: atomic::AtomicUsize = atomic::AtomicUsize::default(); static ref AUTO_INC_COUNTER: atomic::AtomicUsize = atomic::AtomicUsize::default();
static ref ZERO_ARRAY: [u8; 8] = [0u8; 8]; static ref EMPTY_ARR: [u8; 8] = [0u8; 8];
} }
// During tests, we generate the segment id in a autoincrement manner // During tests, we generate the segment id in a autoincrement manner
@@ -30,7 +30,7 @@ lazy_static! {
#[cfg(test)] #[cfg(test)]
fn create_uuid() -> Uuid { fn create_uuid() -> Uuid {
let new_auto_inc_id = (*AUTO_INC_COUNTER).fetch_add(1, atomic::Ordering::SeqCst); let new_auto_inc_id = (*AUTO_INC_COUNTER).fetch_add(1, atomic::Ordering::SeqCst);
Uuid::from_fields(new_auto_inc_id as u32, 0, 0, &*ZERO_ARRAY).unwrap() Uuid::from_fields(new_auto_inc_id as u32, 0, 0, &*EMPTY_ARR).unwrap()
} }
#[cfg(not(test))] #[cfg(not(test))]

View File

@@ -477,7 +477,9 @@ mod test {
// ok, now we should have a deleted doc // ok, now we should have a deleted doc
index_writer2.commit().unwrap(); index_writer2.commit().unwrap();
} }
let searcher = index.reader().unwrap().searcher();
index.load_searchers().unwrap();
let searcher = index.searcher();
let docs: Vec<DocId> = searcher.segment_reader(0).doc_ids_alive().collect(); let docs: Vec<DocId> = searcher.segment_reader(0).doc_ids_alive().collect();
assert_eq!(vec![0u32, 2u32], docs); assert_eq!(vec![0u32, 2u32], docs);
} }

View File

@@ -1,8 +1,6 @@
use directory::directory_lock::Lock; use directory::directory_lock::Lock;
use directory::error::LockError; use directory::error::LockError;
use directory::error::{DeleteError, OpenReadError, OpenWriteError}; use directory::error::{DeleteError, OpenReadError, OpenWriteError};
use directory::WatchCallback;
use directory::WatchHandle;
use directory::{ReadOnlySource, WritePtr}; use directory::{ReadOnlySource, WritePtr};
use std::fmt; use std::fmt;
use std::io; use std::io;
@@ -189,22 +187,6 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
} }
} }
} }
/// Registers a callback that will be called whenever a change on the `meta.json`
/// using the `atomic_write` API is detected.
///
/// The behavior when using `.watch()` on a file using `.open_write(...)` is, on the other
/// hand, undefined.
///
/// The file will be watched for the lifetime of the returned `WatchHandle`. The caller is
/// required to keep it.
/// It does not override previous callbacks. When the file is modified, all callback that are
/// registered (and whose `WatchHandle` is still alive) are triggered.
///
/// Internally, tantivy only uses this API to detect new commits to implement the
/// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the
/// `OnCommit` `ReloadPolicy` to work properly.
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle;
} }
/// DirectoryClone /// DirectoryClone

View File

@@ -43,7 +43,7 @@ lazy_static! {
is_blocking: false is_blocking: false
}; };
/// The meta lock file is here to protect the segment files being opened by /// The meta lock file is here to protect the segment files being opened by
/// `IndexReader::reload()` from being garbage collected. /// `.load_searchers()` from being garbage collected.
/// It makes it possible for another process to safely consume /// It makes it possible for another process to safely consume
/// our index in-writing. Ideally, we may have prefered `RWLock` semantics /// our index in-writing. Ideally, we may have prefered `RWLock` semantics
/// here, but it is difficult to achieve on Windows. /// here, but it is difficult to achieve on Windows.

View File

@@ -73,14 +73,6 @@ pub enum OpenDirectoryError {
DoesNotExist(PathBuf), DoesNotExist(PathBuf),
/// The path exists but is not a directory. /// The path exists but is not a directory.
NotADirectory(PathBuf), NotADirectory(PathBuf),
/// IoError
IoError(io::Error),
}
impl From<io::Error> for OpenDirectoryError {
fn from(io_err: io::Error) -> Self {
OpenDirectoryError::IoError(io_err)
}
} }
impl fmt::Display for OpenDirectoryError { impl fmt::Display for OpenDirectoryError {
@@ -92,11 +84,6 @@ impl fmt::Display for OpenDirectoryError {
OpenDirectoryError::NotADirectory(ref path) => { OpenDirectoryError::NotADirectory(ref path) => {
write!(f, "the path '{:?}' exists but is not a directory", path) write!(f, "the path '{:?}' exists but is not a directory", path)
} }
OpenDirectoryError::IoError(ref err) => write!(
f,
"IOError while trying to open/create the directory. {:?}",
err
),
} }
} }
} }

View File

@@ -4,7 +4,6 @@ use directory::DirectoryLock;
use directory::Lock; use directory::Lock;
use directory::META_LOCK; use directory::META_LOCK;
use directory::{ReadOnlySource, WritePtr}; use directory::{ReadOnlySource, WritePtr};
use directory::{WatchCallback, WatchHandle};
use error::DataCorruption; use error::DataCorruption;
use serde_json; use serde_json;
use std::collections::HashSet; use std::collections::HashSet;
@@ -242,10 +241,6 @@ impl Directory for ManagedDirectory {
fn acquire_lock(&self, lock: &Lock) -> result::Result<DirectoryLock, LockError> { fn acquire_lock(&self, lock: &Lock) -> result::Result<DirectoryLock, LockError> {
self.directory.acquire_lock(lock) self.directory.acquire_lock(lock)
} }
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle {
self.directory.watch(watch_callback)
}
} }
impl Clone for ManagedDirectory { impl Clone for ManagedDirectory {

View File

@@ -1,25 +1,18 @@
extern crate fs2; extern crate fs2;
extern crate notify;
use self::fs2::FileExt; use self::fs2::FileExt;
use self::notify::RawEvent;
use self::notify::RecursiveMode;
use self::notify::Watcher;
use atomicwrites; use atomicwrites;
use common::make_io_err; use common::make_io_err;
use core::META_FILEPATH;
use directory::error::LockError; use directory::error::LockError;
use directory::error::{DeleteError, IOError, OpenDirectoryError, OpenReadError, OpenWriteError}; use directory::error::{DeleteError, IOError, OpenDirectoryError, OpenReadError, OpenWriteError};
use directory::read_only_source::BoxedData; use directory::shared_vec_slice::SharedVecSlice;
use directory::Directory; use directory::Directory;
use directory::DirectoryLock; use directory::DirectoryLock;
use directory::Lock; use directory::Lock;
use directory::ReadOnlySource; use directory::ReadOnlySource;
use directory::WatchCallback;
use directory::WatchCallbackList;
use directory::WatchHandle;
use directory::WritePtr; use directory::WritePtr;
use memmap::Mmap; use fst::raw::MmapReadOnly;
use std::collections::hash_map::Entry as HashMapEntry;
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::From; use std::convert::From;
use std::fmt; use std::fmt;
@@ -29,17 +22,14 @@ use std::io::{self, Seek, SeekFrom};
use std::io::{BufWriter, Read, Write}; use std::io::{BufWriter, Read, Write};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::result; use std::result;
use std::sync::mpsc::{channel, Receiver, Sender};
use std::sync::Arc; use std::sync::Arc;
use std::sync::Mutex;
use std::sync::RwLock; use std::sync::RwLock;
use std::sync::Weak;
use std::thread;
use tempdir::TempDir; use tempdir::TempDir;
/// Returns None iff the file exists, can be read, but is empty (and hence /// Returns None iff the file exists, can be read, but is empty (and hence
/// cannot be mmapped) /// cannot be mmapped).
fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> { ///
fn open_mmap(full_path: &Path) -> result::Result<Option<MmapReadOnly>, OpenReadError> {
let file = File::open(full_path).map_err(|e| { let file = File::open(full_path).map_err(|e| {
if e.kind() == io::ErrorKind::NotFound { if e.kind() == io::ErrorKind::NotFound {
OpenReadError::FileDoesNotExist(full_path.to_owned()) OpenReadError::FileDoesNotExist(full_path.to_owned())
@@ -58,7 +48,7 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
return Ok(None); return Ok(None);
} }
unsafe { unsafe {
memmap::Mmap::map(&file) MmapReadOnly::open(&file)
.map(Some) .map(Some)
.map_err(|e| From::from(IOError::with_path(full_path.to_owned(), e))) .map_err(|e| From::from(IOError::with_path(full_path.to_owned(), e)))
} }
@@ -81,7 +71,7 @@ pub struct CacheInfo {
struct MmapCache { struct MmapCache {
counters: CacheCounters, counters: CacheCounters,
cache: HashMap<PathBuf, Weak<BoxedData>>, cache: HashMap<PathBuf, MmapReadOnly>,
} }
impl Default for MmapCache { impl Default for MmapCache {
@@ -94,7 +84,12 @@ impl Default for MmapCache {
} }
impl MmapCache { impl MmapCache {
fn get_info(&self) -> CacheInfo { /// Removes a `MmapReadOnly` entry from the mmap cache.
fn discard_from_cache(&mut self, full_path: &Path) -> bool {
self.cache.remove(full_path).is_some()
}
fn get_info(&mut self) -> CacheInfo {
let paths: Vec<PathBuf> = self.cache.keys().cloned().collect(); let paths: Vec<PathBuf> = self.cache.keys().cloned().collect();
CacheInfo { CacheInfo {
counters: self.counters.clone(), counters: self.counters.clone(),
@@ -102,105 +97,23 @@ impl MmapCache {
} }
} }
fn remove_weak_ref(&mut self) { fn get_mmap(&mut self, full_path: &Path) -> Result<Option<MmapReadOnly>, OpenReadError> {
let keys_to_remove: Vec<PathBuf> = self Ok(match self.cache.entry(full_path.to_owned()) {
.cache HashMapEntry::Occupied(occupied_entry) => {
.iter() let mmap = occupied_entry.get();
.filter(|(_, mmap_weakref)| mmap_weakref.upgrade().is_none())
.map(|(key, _)| key.clone())
.collect();
for key in keys_to_remove {
self.cache.remove(&key);
}
}
// Returns None if the file exists but as a len of 0 (and hence is not mmappable).
fn get_mmap(&mut self, full_path: &Path) -> Result<Option<Arc<BoxedData>>, OpenReadError> {
if let Some(mmap_weak) = self.cache.get(full_path) {
if let Some(mmap_arc) = mmap_weak.upgrade() {
self.counters.hit += 1; self.counters.hit += 1;
return Ok(Some(mmap_arc)); Some(mmap.clone())
} }
} HashMapEntry::Vacant(vacant_entry) => {
self.cache.remove(full_path); self.counters.miss += 1;
self.counters.miss += 1; if let Some(mmap) = open_mmap(full_path)? {
Ok(if let Some(mmap) = open_mmap(full_path)? { vacant_entry.insert(mmap.clone());
let mmap_arc: Arc<BoxedData> = Arc::new(Box::new(mmap)); Some(mmap)
let mmap_weak = Arc::downgrade(&mmap_arc); } else {
self.cache.insert(full_path.to_owned(), mmap_weak); None
Some(mmap_arc)
} else {
None
})
}
}
struct InnerWatcherWrapper {
_watcher: Mutex<notify::RecommendedWatcher>,
watcher_router: WatchCallbackList,
}
impl InnerWatcherWrapper {
pub fn new(path: &Path) -> Result<(Self, Receiver<notify::RawEvent>), notify::Error> {
let (tx, watcher_recv): (Sender<RawEvent>, Receiver<RawEvent>) = channel();
// We need to initialize the
let mut watcher = notify::raw_watcher(tx)?;
watcher.watch(path, RecursiveMode::Recursive)?;
let inner = InnerWatcherWrapper {
_watcher: Mutex::new(watcher),
watcher_router: Default::default(),
};
Ok((inner, watcher_recv))
}
}
#[derive(Clone)]
pub(crate) struct WatcherWrapper {
inner: Arc<InnerWatcherWrapper>,
}
impl WatcherWrapper {
pub fn new(path: &Path) -> Result<Self, OpenDirectoryError> {
let (inner, watcher_recv) = InnerWatcherWrapper::new(path).map_err(|err| match err {
notify::Error::PathNotFound => OpenDirectoryError::DoesNotExist(path.to_owned()),
_ => {
panic!("Unknown error while starting watching directory {:?}", path);
}
})?;
let watcher_wrapper = WatcherWrapper {
inner: Arc::new(inner),
};
let watcher_wrapper_clone = watcher_wrapper.clone();
thread::Builder::new()
.name("meta-file-watch-thread".to_string())
.spawn(move || {
loop {
match watcher_recv.recv().map(|evt| evt.path) {
Ok(Some(changed_path)) => {
// ... Actually subject to false positive.
// We might want to be more accurate than this at one point.
if let Some(filename) = changed_path.file_name() {
if filename == *META_FILEPATH {
watcher_wrapper_clone.inner.watcher_router.broadcast();
}
}
}
Ok(None) => {
// not an event we are interested in.
}
Err(_e) => {
// the watch send channel was dropped
break;
}
}
} }
}) }
.expect("Failed to spawn thread to watch meta.json"); })
Ok(watcher_wrapper)
}
pub fn watch(&mut self, watch_callback: WatchCallback) -> WatchHandle {
self.inner.watcher_router.subscribe(watch_callback)
} }
} }
@@ -218,62 +131,31 @@ impl WatcherWrapper {
/// On Windows the semantics are again different. /// On Windows the semantics are again different.
#[derive(Clone)] #[derive(Clone)]
pub struct MmapDirectory { pub struct MmapDirectory {
inner: Arc<MmapDirectoryInner>,
}
struct MmapDirectoryInner {
root_path: PathBuf, root_path: PathBuf,
mmap_cache: RwLock<MmapCache>, mmap_cache: Arc<RwLock<MmapCache>>,
_temp_directory: Option<TempDir>, _temp_directory: Arc<Option<TempDir>>,
watcher: RwLock<WatcherWrapper>,
}
impl MmapDirectoryInner {
fn new(
root_path: PathBuf,
temp_directory: Option<TempDir>,
) -> Result<MmapDirectoryInner, OpenDirectoryError> {
let watch_wrapper = WatcherWrapper::new(&root_path)?;
let mmap_directory_inner = MmapDirectoryInner {
root_path,
mmap_cache: Default::default(),
_temp_directory: temp_directory,
watcher: RwLock::new(watch_wrapper),
};
Ok(mmap_directory_inner)
}
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle {
let mut wlock = self.watcher.write().unwrap();
wlock.watch(watch_callback)
}
} }
impl fmt::Debug for MmapDirectory { impl fmt::Debug for MmapDirectory {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "MmapDirectory({:?})", self.inner.root_path) write!(f, "MmapDirectory({:?})", self.root_path)
} }
} }
impl MmapDirectory { impl MmapDirectory {
fn new(
root_path: PathBuf,
temp_directory: Option<TempDir>,
) -> Result<MmapDirectory, OpenDirectoryError> {
let inner = MmapDirectoryInner::new(root_path, temp_directory)?;
Ok(MmapDirectory {
inner: Arc::new(inner),
})
}
/// Creates a new MmapDirectory in a temporary directory. /// Creates a new MmapDirectory in a temporary directory.
/// ///
/// This is mostly useful to test the MmapDirectory itself. /// This is mostly useful to test the MmapDirectory itself.
/// For your unit tests, prefer the RAMDirectory. /// For your unit tests, prefer the RAMDirectory.
pub fn create_from_tempdir() -> Result<MmapDirectory, OpenDirectoryError> { pub fn create_from_tempdir() -> io::Result<MmapDirectory> {
let tempdir = TempDir::new("index").map_err(OpenDirectoryError::IoError)?; let tempdir = TempDir::new("index")?;
let tempdir_path = PathBuf::from(tempdir.path()); let tempdir_path = PathBuf::from(tempdir.path());
MmapDirectory::new(tempdir_path, Some(tempdir)) let directory = MmapDirectory {
root_path: tempdir_path,
mmap_cache: Arc::new(RwLock::new(MmapCache::default())),
_temp_directory: Arc::new(Some(tempdir)),
};
Ok(directory)
} }
/// Opens a MmapDirectory in a directory. /// Opens a MmapDirectory in a directory.
@@ -291,14 +173,18 @@ impl MmapDirectory {
directory_path, directory_path,
))) )))
} else { } else {
Ok(MmapDirectory::new(PathBuf::from(directory_path), None)?) Ok(MmapDirectory {
root_path: PathBuf::from(directory_path),
mmap_cache: Arc::new(RwLock::new(MmapCache::default())),
_temp_directory: Arc::new(None),
})
} }
} }
/// Joins a relative_path to the directory `root_path` /// Joins a relative_path to the directory `root_path`
/// to create a proper complete `filepath`. /// to create a proper complete `filepath`.
fn resolve_path(&self, relative_path: &Path) -> PathBuf { fn resolve_path(&self, relative_path: &Path) -> PathBuf {
self.inner.root_path.join(relative_path) self.root_path.join(relative_path)
} }
/// Sync the root directory. /// Sync the root directory.
@@ -323,7 +209,7 @@ impl MmapDirectory {
.custom_flags(winbase::FILE_FLAG_BACKUP_SEMANTICS); .custom_flags(winbase::FILE_FLAG_BACKUP_SEMANTICS);
} }
let fd = open_opts.open(&self.inner.root_path)?; let fd = open_opts.open(&self.root_path)?;
fd.sync_all()?; fd.sync_all()?;
Ok(()) Ok(())
} }
@@ -333,15 +219,9 @@ impl MmapDirectory {
/// ///
/// The `MmapDirectory` embeds a `MmapDirectory` /// The `MmapDirectory` embeds a `MmapDirectory`
/// to avoid multiplying the `mmap` system calls. /// to avoid multiplying the `mmap` system calls.
pub fn get_cache_info(&self) -> CacheInfo { pub fn get_cache_info(&mut self) -> CacheInfo {
self.inner self.mmap_cache
.mmap_cache
.write() .write()
.expect("mmap cache lock is poisoned")
.remove_weak_ref();
self.inner
.mmap_cache
.read()
.expect("Mmap cache lock is poisoned.") .expect("Mmap cache lock is poisoned.")
.get_info() .get_info()
} }
@@ -394,7 +274,7 @@ impl Directory for MmapDirectory {
debug!("Open Read {:?}", path); debug!("Open Read {:?}", path);
let full_path = self.resolve_path(path); let full_path = self.resolve_path(path);
let mut mmap_cache = self.inner.mmap_cache.write().map_err(|_| { let mut mmap_cache = self.mmap_cache.write().map_err(|_| {
let msg = format!( let msg = format!(
"Failed to acquired write lock \ "Failed to acquired write lock \
on mmap cache while reading {:?}", on mmap cache while reading {:?}",
@@ -402,34 +282,11 @@ impl Directory for MmapDirectory {
); );
IOError::with_path(path.to_owned(), make_io_err(msg)) IOError::with_path(path.to_owned(), make_io_err(msg))
})?; })?;
Ok(mmap_cache Ok(mmap_cache
.get_mmap(&full_path)? .get_mmap(&full_path)?
.map(ReadOnlySource::from) .map(ReadOnlySource::Mmap)
.unwrap_or_else(ReadOnlySource::empty)) .unwrap_or_else(|| ReadOnlySource::Anonymous(SharedVecSlice::empty())))
}
/// Any entry associated to the path in the mmap will be
/// removed before the file is deleted.
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
debug!("Deleting file {:?}", path);
let full_path = self.resolve_path(path);
match fs::remove_file(&full_path) {
Ok(_) => self
.sync_directory()
.map_err(|e| IOError::with_path(path.to_owned(), e).into()),
Err(e) => {
if e.kind() == io::ErrorKind::NotFound {
Err(DeleteError::FileDoesNotExist(path.to_owned()))
} else {
Err(IOError::with_path(path.to_owned(), e).into())
}
}
}
}
fn exists(&self, path: &Path) -> bool {
let full_path = self.resolve_path(path);
full_path.exists()
} }
fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError> { fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError> {
@@ -462,6 +319,44 @@ impl Directory for MmapDirectory {
Ok(BufWriter::new(Box::new(writer))) Ok(BufWriter::new(Box::new(writer)))
} }
/// Any entry associated to the path in the mmap will be
/// removed before the file is deleted.
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
debug!("Deleting file {:?}", path);
let full_path = self.resolve_path(path);
let mut mmap_cache = self.mmap_cache.write().map_err(|_| {
let msg = format!(
"Failed to acquired write lock \
on mmap cache while deleting {:?}",
path
);
IOError::with_path(path.to_owned(), make_io_err(msg))
})?;
mmap_cache.discard_from_cache(path);
// Removing the entry in the MMap cache.
// The munmap will appear on Drop,
// when the last reference is gone.
mmap_cache.cache.remove(&full_path);
match fs::remove_file(&full_path) {
Ok(_) => self
.sync_directory()
.map_err(|e| IOError::with_path(path.to_owned(), e).into()),
Err(e) => {
if e.kind() == io::ErrorKind::NotFound {
Err(DeleteError::FileDoesNotExist(path.to_owned()))
} else {
Err(IOError::with_path(path.to_owned(), e).into())
}
}
}
}
fn exists(&self, path: &Path) -> bool {
let full_path = self.resolve_path(path);
full_path.exists()
}
fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> { fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> {
let full_path = self.resolve_path(path); let full_path = self.resolve_path(path);
let mut buffer = Vec::new(); let mut buffer = Vec::new();
@@ -508,10 +403,6 @@ impl Directory for MmapDirectory {
_file: file, _file: file,
}))) })))
} }
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle {
self.inner.watch(watch_callback)
}
} }
#[cfg(test)] #[cfg(test)]
@@ -521,13 +412,6 @@ mod tests {
// The following tests are specific to the MmapDirectory // The following tests are specific to the MmapDirectory
use super::*; use super::*;
use schema::{Schema, SchemaBuilder, TEXT};
use std::fs;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::thread;
use std::time::Duration;
use Index;
use ReloadPolicy;
#[test] #[test]
fn test_open_non_existant_path() { fn test_open_non_existant_path() {
@@ -552,7 +436,7 @@ mod tests {
#[test] #[test]
fn test_cache() { fn test_cache() {
let content = b"abc"; let content = "abc".as_bytes();
// here we test if the cache releases // here we test if the cache releases
// mmaps correctly. // mmaps correctly.
@@ -568,104 +452,26 @@ mod tests {
w.flush().unwrap(); w.flush().unwrap();
} }
} }
{
let mut keep = vec![]; for (i, path) in paths.iter().enumerate() {
for (i, path) in paths.iter().enumerate() { let _r = mmap_directory.open_read(path).unwrap();
keep.push(mmap_directory.open_read(path).unwrap()); assert_eq!(mmap_directory.get_cache_info().mmapped.len(), i + 1);
assert_eq!(mmap_directory.get_cache_info().mmapped.len(), i + 1); }
} for path in paths.iter() {
assert_eq!(mmap_directory.get_cache_info().counters.hit, 0); let _r = mmap_directory.open_read(path).unwrap();
assert_eq!(mmap_directory.get_cache_info().counters.miss, 10); assert_eq!(mmap_directory.get_cache_info().mmapped.len(), num_paths);
assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 10); }
for path in paths.iter() { for (i, path) in paths.iter().enumerate() {
let _r = mmap_directory.open_read(path).unwrap(); mmap_directory.delete(path).unwrap();
assert_eq!(mmap_directory.get_cache_info().mmapped.len(), num_paths); assert_eq!(
mmap_directory.get_cache_info().mmapped.len(),
num_paths - i - 1
);
}
} }
assert_eq!(mmap_directory.get_cache_info().counters.hit, 10); assert_eq!(mmap_directory.get_cache_info().counters.hit, 10);
assert_eq!(mmap_directory.get_cache_info().counters.miss, 10); assert_eq!(mmap_directory.get_cache_info().counters.miss, 10);
assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 10);
for path in paths.iter() {
let _r = mmap_directory.open_read(path).unwrap();
assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 10);
}
assert_eq!(mmap_directory.get_cache_info().counters.hit, 20);
assert_eq!(mmap_directory.get_cache_info().counters.miss, 10);
assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 10);
drop(keep);
for path in paths.iter() {
let _r = mmap_directory.open_read(path).unwrap();
assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 1);
}
assert_eq!(mmap_directory.get_cache_info().counters.hit, 20);
assert_eq!(mmap_directory.get_cache_info().counters.miss, 20);
assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 0);
for path in &paths {
mmap_directory.delete(path).unwrap();
}
assert_eq!(mmap_directory.get_cache_info().counters.hit, 20);
assert_eq!(mmap_directory.get_cache_info().counters.miss, 20);
assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 0);
for path in paths.iter() {
assert!(mmap_directory.open_read(path).is_err());
}
assert_eq!(mmap_directory.get_cache_info().counters.hit, 20);
assert_eq!(mmap_directory.get_cache_info().counters.miss, 30);
assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 0); assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 0);
} }
#[test]
fn test_watch_wrapper() {
let counter: Arc<AtomicUsize> = Default::default();
let counter_clone = counter.clone();
let tmp_dir: TempDir = tempdir::TempDir::new("test_watch_wrapper").unwrap();
let tmp_dirpath = tmp_dir.path().to_owned();
let mut watch_wrapper = WatcherWrapper::new(&tmp_dirpath).unwrap();
let tmp_file = tmp_dirpath.join("coucou");
let _handle = watch_wrapper.watch(Box::new(move || {
counter_clone.fetch_add(1, Ordering::SeqCst);
}));
assert_eq!(counter.load(Ordering::SeqCst), 0);
fs::write(&tmp_file, b"whateverwilldo").unwrap();
thread::sleep(Duration::new(0, 1_000u32));
}
#[test]
fn test_mmap_released() {
let mmap_directory = MmapDirectory::create_from_tempdir().unwrap();
let mut schema_builder: SchemaBuilder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
{
let index = Index::create(mmap_directory.clone(), schema).unwrap();
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
for _num_commits in 0..16 {
for _ in 0..10 {
index_writer.add_document(doc!(text_field=>"abc"));
}
index_writer.commit().unwrap();
}
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap();
for _ in 0..30 {
index_writer.add_document(doc!(text_field=>"abc"));
index_writer.commit().unwrap();
reader.reload().unwrap();
}
index_writer.wait_merging_threads().unwrap();
reader.reload().unwrap();
let num_segments = reader.searcher().segment_readers().len();
assert_eq!(num_segments, 4);
assert_eq!(
num_segments * 7,
mmap_directory.get_cache_info().mmapped.len()
);
}
assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 0);
}
} }

View File

@@ -12,7 +12,7 @@ mod directory_lock;
mod managed_directory; mod managed_directory;
mod ram_directory; mod ram_directory;
mod read_only_source; mod read_only_source;
mod watch_event_router; mod shared_vec_slice;
/// Errors specific to the directory module. /// Errors specific to the directory module.
pub mod error; pub mod error;
@@ -22,8 +22,6 @@ pub use self::directory::{Directory, DirectoryClone};
pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK}; pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
pub use self::ram_directory::RAMDirectory; pub use self::ram_directory::RAMDirectory;
pub use self::read_only_source::ReadOnlySource; pub use self::read_only_source::ReadOnlySource;
pub(crate) use self::watch_event_router::WatchCallbackList;
pub use self::watch_event_router::{WatchCallback, WatchHandle};
use std::io::{BufWriter, Seek, Write}; use std::io::{BufWriter, Seek, Write};
#[cfg(feature = "mmap")] #[cfg(feature = "mmap")]

View File

@@ -1,8 +1,8 @@
use core::META_FILEPATH; use super::shared_vec_slice::SharedVecSlice;
use directory::error::{DeleteError, OpenReadError, OpenWriteError}; use common::make_io_err;
use directory::WatchCallbackList; use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError};
use directory::WritePtr; use directory::WritePtr;
use directory::{Directory, ReadOnlySource, WatchCallback, WatchHandle}; use directory::{Directory, ReadOnlySource};
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt; use std::fmt;
use std::io::{self, BufWriter, Cursor, Seek, SeekFrom, Write}; use std::io::{self, BufWriter, Cursor, Seek, SeekFrom, Write};
@@ -22,13 +22,13 @@ use std::sync::{Arc, RwLock};
/// ///
struct VecWriter { struct VecWriter {
path: PathBuf, path: PathBuf,
shared_directory: RAMDirectory, shared_directory: InnerDirectory,
data: Cursor<Vec<u8>>, data: Cursor<Vec<u8>>,
is_flushed: bool, is_flushed: bool,
} }
impl VecWriter { impl VecWriter {
fn new(path_buf: PathBuf, shared_directory: RAMDirectory) -> VecWriter { fn new(path_buf: PathBuf, shared_directory: InnerDirectory) -> VecWriter {
VecWriter { VecWriter {
path: path_buf, path: path_buf,
data: Cursor::new(Vec::new()), data: Cursor::new(Vec::new()),
@@ -64,44 +64,75 @@ impl Write for VecWriter {
fn flush(&mut self) -> io::Result<()> { fn flush(&mut self) -> io::Result<()> {
self.is_flushed = true; self.is_flushed = true;
let mut fs = self.shared_directory.fs.write().unwrap(); self.shared_directory
fs.write(self.path.clone(), self.data.get_ref()); .write(self.path.clone(), self.data.get_ref())?;
Ok(()) Ok(())
} }
} }
#[derive(Default)] #[derive(Clone)]
struct InnerDirectory { struct InnerDirectory(Arc<RwLock<HashMap<PathBuf, Arc<Vec<u8>>>>>);
fs: HashMap<PathBuf, ReadOnlySource>,
watch_router: WatchCallbackList,
}
impl InnerDirectory { impl InnerDirectory {
fn write(&mut self, path: PathBuf, data: &[u8]) -> bool { fn new() -> InnerDirectory {
let data = ReadOnlySource::new(Vec::from(data)); InnerDirectory(Arc::new(RwLock::new(HashMap::new())))
self.fs.insert(path, data).is_some() }
fn write(&self, path: PathBuf, data: &[u8]) -> io::Result<bool> {
let mut map = self.0.write().map_err(|_| {
make_io_err(format!(
"Failed to lock the directory, when trying to write {:?}",
path
))
})?;
let prev_value = map.insert(path, Arc::new(Vec::from(data)));
Ok(prev_value.is_some())
} }
fn open_read(&self, path: &Path) -> Result<ReadOnlySource, OpenReadError> { fn open_read(&self, path: &Path) -> Result<ReadOnlySource, OpenReadError> {
self.fs self.0
.get(path) .read()
.ok_or_else(|| OpenReadError::FileDoesNotExist(PathBuf::from(path))) .map_err(|_| {
.map(|el| el.clone()) let msg = format!(
"Failed to acquire read lock for the \
directory when trying to read {:?}",
path
);
let io_err = make_io_err(msg);
OpenReadError::IOError(IOError::with_path(path.to_owned(), io_err))
})
.and_then(|readable_map| {
readable_map
.get(path)
.ok_or_else(|| OpenReadError::FileDoesNotExist(PathBuf::from(path)))
.map(Arc::clone)
.map(|data| ReadOnlySource::Anonymous(SharedVecSlice::new(data)))
})
} }
fn delete(&mut self, path: &Path) -> result::Result<(), DeleteError> { fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
match self.fs.remove(path) { self.0
Some(_) => Ok(()), .write()
None => Err(DeleteError::FileDoesNotExist(PathBuf::from(path))), .map_err(|_| {
} let msg = format!(
"Failed to acquire write lock for the \
directory when trying to delete {:?}",
path
);
let io_err = make_io_err(msg);
DeleteError::IOError(IOError::with_path(path.to_owned(), io_err))
})
.and_then(|mut writable_map| match writable_map.remove(path) {
Some(_) => Ok(()),
None => Err(DeleteError::FileDoesNotExist(PathBuf::from(path))),
})
} }
fn exists(&self, path: &Path) -> bool { fn exists(&self, path: &Path) -> bool {
self.fs.contains_key(path) self.0
} .read()
.expect("Failed to get read lock directory.")
fn watch(&mut self, watch_handle: WatchCallback) -> WatchHandle { .contains_key(path)
self.watch_router.subscribe(watch_handle)
} }
} }
@@ -116,36 +147,33 @@ impl fmt::Debug for RAMDirectory {
/// It is mainly meant for unit testing. /// It is mainly meant for unit testing.
/// Writes are only made visible upon flushing. /// Writes are only made visible upon flushing.
/// ///
#[derive(Clone, Default)] #[derive(Clone)]
pub struct RAMDirectory { pub struct RAMDirectory {
fs: Arc<RwLock<InnerDirectory>>, fs: InnerDirectory,
} }
impl RAMDirectory { impl RAMDirectory {
/// Constructor /// Constructor
pub fn create() -> RAMDirectory { pub fn create() -> RAMDirectory {
Self::default() RAMDirectory {
fs: InnerDirectory::new(),
}
} }
} }
impl Directory for RAMDirectory { impl Directory for RAMDirectory {
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> { fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
self.fs.read().unwrap().open_read(path) self.fs.open_read(path)
}
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
self.fs.write().unwrap().delete(path)
}
fn exists(&self, path: &Path) -> bool {
self.fs.read().unwrap().exists(path)
} }
fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError> { fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError> {
let mut fs = self.fs.write().unwrap();
let path_buf = PathBuf::from(path); let path_buf = PathBuf::from(path);
let vec_writer = VecWriter::new(path_buf.clone(), self.clone()); let vec_writer = VecWriter::new(path_buf.clone(), self.fs.clone());
let exists = fs.write(path_buf.clone(), &[]);
let exists = self
.fs
.write(path_buf.clone(), &Vec::new())
.map_err(|err| IOError::with_path(path.to_owned(), err))?;
// force the creation of the file to mimic the MMap directory. // force the creation of the file to mimic the MMap directory.
if exists { if exists {
Err(OpenWriteError::FileAlreadyExists(path_buf)) Err(OpenWriteError::FileAlreadyExists(path_buf))
@@ -154,8 +182,17 @@ impl Directory for RAMDirectory {
} }
} }
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
self.fs.delete(path)
}
fn exists(&self, path: &Path) -> bool {
self.fs.exists(path)
}
fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> { fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> {
Ok(self.open_read(path)?.as_slice().to_owned()) let read = self.open_read(path)?;
Ok(read.as_slice().to_owned())
} }
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> { fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
@@ -164,20 +201,10 @@ impl Directory for RAMDirectory {
msg.unwrap_or("Undefined".to_string()) msg.unwrap_or("Undefined".to_string())
))); )));
let path_buf = PathBuf::from(path); let path_buf = PathBuf::from(path);
let mut vec_writer = VecWriter::new(path_buf.clone(), self.fs.clone());
// Reserve the path to prevent calls to .write() to succeed. self.fs.write(path_buf, &Vec::new())?;
self.fs.write().unwrap().write(path_buf.clone(), &[]);
let mut vec_writer = VecWriter::new(path_buf.clone(), self.clone());
vec_writer.write_all(data)?; vec_writer.write_all(data)?;
vec_writer.flush()?; vec_writer.flush()?;
if path == Path::new(&*META_FILEPATH) {
self.fs.write().unwrap().watch_router.broadcast();
}
Ok(()) Ok(())
} }
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle {
self.fs.write().unwrap().watch(watch_callback)
}
} }

View File

@@ -1,9 +1,9 @@
use super::shared_vec_slice::SharedVecSlice;
use common::HasLen; use common::HasLen;
#[cfg(feature = "mmap")]
use fst::raw::MmapReadOnly;
use stable_deref_trait::{CloneStableDeref, StableDeref}; use stable_deref_trait::{CloneStableDeref, StableDeref};
use std::ops::Deref; use std::ops::Deref;
use std::sync::Arc;
pub type BoxedData = Box<Deref<Target = [u8]> + Send + Sync + 'static>;
/// Read object that represents files in tantivy. /// Read object that represents files in tantivy.
/// ///
@@ -11,10 +11,12 @@ pub type BoxedData = Box<Deref<Target = [u8]> + Send + Sync + 'static>;
/// the data in the form of a constant read-only `&[u8]`. /// the data in the form of a constant read-only `&[u8]`.
/// Whatever happens to the directory file, the data /// Whatever happens to the directory file, the data
/// hold by this object should never be altered or destroyed. /// hold by this object should never be altered or destroyed.
pub struct ReadOnlySource { pub enum ReadOnlySource {
data: Arc<BoxedData>, /// Mmap source of data
start: usize, #[cfg(feature = "mmap")]
stop: usize, Mmap(MmapReadOnly),
/// Wrapping a `Vec<u8>`
Anonymous(SharedVecSlice),
} }
unsafe impl StableDeref for ReadOnlySource {} unsafe impl StableDeref for ReadOnlySource {}
@@ -28,38 +30,19 @@ impl Deref for ReadOnlySource {
} }
} }
impl From<Arc<BoxedData>> for ReadOnlySource {
fn from(data: Arc<BoxedData>) -> Self {
let len = data.len();
ReadOnlySource {
data,
start: 0,
stop: len,
}
}
}
impl ReadOnlySource { impl ReadOnlySource {
pub(crate) fn new<D>(data: D) -> ReadOnlySource
where
D: Deref<Target = [u8]> + Send + Sync + 'static,
{
let len = data.len();
ReadOnlySource {
data: Arc::new(Box::new(data)),
start: 0,
stop: len,
}
}
/// Creates an empty ReadOnlySource /// Creates an empty ReadOnlySource
pub fn empty() -> ReadOnlySource { pub fn empty() -> ReadOnlySource {
ReadOnlySource::new(&[][..]) ReadOnlySource::Anonymous(SharedVecSlice::empty())
} }
/// Returns the data underlying the ReadOnlySource object. /// Returns the data underlying the ReadOnlySource object.
pub fn as_slice(&self) -> &[u8] { pub fn as_slice(&self) -> &[u8] {
&self.data[self.start..self.stop] match *self {
#[cfg(feature = "mmap")]
ReadOnlySource::Mmap(ref mmap_read_only) => mmap_read_only.as_slice(),
ReadOnlySource::Anonymous(ref shared_vec) => shared_vec.as_slice(),
}
} }
/// Splits into 2 `ReadOnlySource`, at the offset given /// Splits into 2 `ReadOnlySource`, at the offset given
@@ -80,18 +63,22 @@ impl ReadOnlySource {
/// worth of data in anonymous memory, and only a /// worth of data in anonymous memory, and only a
/// 1KB slice is remaining, the whole `500MBs` /// 1KB slice is remaining, the whole `500MBs`
/// are retained in memory. /// are retained in memory.
pub fn slice(&self, start: usize, stop: usize) -> ReadOnlySource { pub fn slice(&self, from_offset: usize, to_offset: usize) -> ReadOnlySource {
assert!( assert!(
start <= stop, from_offset <= to_offset,
"Requested negative slice [{}..{}]", "Requested negative slice [{}..{}]",
start, from_offset,
stop to_offset
); );
assert!(stop <= self.len()); match *self {
ReadOnlySource { #[cfg(feature = "mmap")]
data: self.data.clone(), ReadOnlySource::Mmap(ref mmap_read_only) => {
start: self.start + start, let sliced_mmap = mmap_read_only.range(from_offset, to_offset - from_offset);
stop: self.start + stop, ReadOnlySource::Mmap(sliced_mmap)
}
ReadOnlySource::Anonymous(ref shared_vec) => {
ReadOnlySource::Anonymous(shared_vec.slice(from_offset, to_offset))
}
} }
} }
@@ -100,7 +87,8 @@ impl ReadOnlySource {
/// ///
/// Equivalent to `.slice(from_offset, self.len())` /// Equivalent to `.slice(from_offset, self.len())`
pub fn slice_from(&self, from_offset: usize) -> ReadOnlySource { pub fn slice_from(&self, from_offset: usize) -> ReadOnlySource {
self.slice(from_offset, self.len()) let len = self.len();
self.slice(from_offset, len)
} }
/// Like `.slice(...)` but enforcing only the `to` /// Like `.slice(...)` but enforcing only the `to`
@@ -114,18 +102,19 @@ impl ReadOnlySource {
impl HasLen for ReadOnlySource { impl HasLen for ReadOnlySource {
fn len(&self) -> usize { fn len(&self) -> usize {
self.stop - self.start self.as_slice().len()
} }
} }
impl Clone for ReadOnlySource { impl Clone for ReadOnlySource {
fn clone(&self) -> Self { fn clone(&self) -> Self {
self.slice_from(0) self.slice(0, self.len())
} }
} }
impl From<Vec<u8>> for ReadOnlySource { impl From<Vec<u8>> for ReadOnlySource {
fn from(data: Vec<u8>) -> ReadOnlySource { fn from(data: Vec<u8>) -> ReadOnlySource {
ReadOnlySource::new(data) let shared_data = SharedVecSlice::from(data);
ReadOnlySource::Anonymous(shared_data)
} }
} }

View File

@@ -0,0 +1,41 @@
use std::sync::Arc;
#[derive(Clone)]
pub struct SharedVecSlice {
pub data: Arc<Vec<u8>>,
pub start: usize,
pub len: usize,
}
impl SharedVecSlice {
pub fn empty() -> SharedVecSlice {
SharedVecSlice::new(Arc::new(Vec::new()))
}
pub fn new(data: Arc<Vec<u8>>) -> SharedVecSlice {
let data_len = data.len();
SharedVecSlice {
data,
start: 0,
len: data_len,
}
}
pub fn as_slice(&self) -> &[u8] {
&self.data[self.start..self.start + self.len]
}
pub fn slice(&self, from_offset: usize, to_offset: usize) -> SharedVecSlice {
SharedVecSlice {
data: Arc::clone(&self.data),
start: self.start + from_offset,
len: to_offset - from_offset,
}
}
}
impl From<Vec<u8>> for SharedVecSlice {
fn from(data: Vec<u8>) -> SharedVecSlice {
SharedVecSlice::new(Arc::new(data))
}
}

View File

@@ -1,13 +1,7 @@
use super::*; use super::*;
use std::io::{Seek, SeekFrom, Write}; use std::io::{Seek, SeekFrom, Write};
use std::mem;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::thread;
use std::time; use std::time;
use std::time::Duration;
lazy_static! { lazy_static! {
static ref TEST_PATH: &'static Path = Path::new("some_path_for_test"); static ref TEST_PATH: &'static Path = Path::new("some_path_for_test");
@@ -36,18 +30,19 @@ fn ram_directory_panics_if_flush_forgotten() {
fn test_simple(directory: &mut Directory) { fn test_simple(directory: &mut Directory) {
{ {
let mut write_file = directory.open_write(*TEST_PATH).unwrap(); {
assert!(directory.exists(*TEST_PATH)); let mut write_file = directory.open_write(*TEST_PATH).unwrap();
write_file.write_all(&[4]).unwrap(); assert!(directory.exists(*TEST_PATH));
write_file.write_all(&[3]).unwrap(); write_file.write_all(&[4]).unwrap();
write_file.write_all(&[7, 3, 5]).unwrap(); write_file.write_all(&[3]).unwrap();
write_file.flush().unwrap(); write_file.write_all(&[7, 3, 5]).unwrap();
} write_file.flush().unwrap();
{ }
let read_file = directory.open_read(*TEST_PATH).unwrap(); let read_file = directory.open_read(*TEST_PATH).unwrap();
let data: &[u8] = &*read_file; let data: &[u8] = &*read_file;
assert_eq!(data, &[4u8, 3u8, 7u8, 3u8, 5u8]); assert_eq!(data, &[4u8, 3u8, 7u8, 3u8, 5u8]);
} }
assert!(directory.delete(*TEST_PATH).is_ok()); assert!(directory.delete(*TEST_PATH).is_ok());
assert!(!directory.exists(*TEST_PATH)); assert!(!directory.exists(*TEST_PATH));
} }
@@ -126,41 +121,6 @@ fn test_directory(directory: &mut Directory) {
test_directory_delete(directory); test_directory_delete(directory);
test_lock_non_blocking(directory); test_lock_non_blocking(directory);
test_lock_blocking(directory); test_lock_blocking(directory);
test_watch(directory);
}
fn test_watch(directory: &mut Directory) {
let counter: Arc<AtomicUsize> = Default::default();
let counter_clone = counter.clone();
let watch_callback = Box::new(move || {
counter_clone.fetch_add(1, Ordering::SeqCst);
});
assert!(directory
.atomic_write(Path::new("meta.json"), b"random_test_data")
.is_ok());
thread::sleep(Duration::new(0, 10_000));
assert_eq!(0, counter.load(Ordering::SeqCst));
let watch_handle = directory.watch(watch_callback);
for i in 0..10 {
assert_eq!(i, counter.load(Ordering::SeqCst));
assert!(directory
.atomic_write(Path::new("meta.json"), b"random_test_data_2")
.is_ok());
for _ in 0..100 {
if counter.load(Ordering::SeqCst) > i {
break;
}
thread::sleep(Duration::from_millis(10));
}
assert_eq!(i + 1, counter.load(Ordering::SeqCst));
}
mem::drop(watch_handle);
assert!(directory
.atomic_write(Path::new("meta.json"), b"random_test_data")
.is_ok());
thread::sleep(Duration::from_millis(200));
assert_eq!(10, counter.load(Ordering::SeqCst));
} }
fn test_lock_non_blocking(directory: &mut Directory) { fn test_lock_non_blocking(directory: &mut Directory) {

View File

@@ -1,156 +0,0 @@
use std::sync::Arc;
use std::sync::RwLock;
use std::sync::Weak;
/// Type alias for callbacks registered when watching files of a `Directory`.
pub type WatchCallback = Box<Fn() -> () + Sync + Send>;
/// Helper struct to implement the watch method in `Directory` implementations.
///
/// It registers callbacks (See `.subscribe(...)`) and
/// calls them upon calls to `.broadcast(...)`.
#[derive(Default)]
pub struct WatchCallbackList {
router: RwLock<Vec<Weak<WatchCallback>>>,
}
/// Controls how long a directory should watch for a file change.
///
/// After all the clones of `WatchHandle` are dropped, the associated will not be called when a
/// file change is detected.
#[must_use = "This `WatchHandle` controls the lifetime of the watch and should therefore be used."]
#[derive(Clone)]
pub struct WatchHandle(Arc<WatchCallback>);
impl WatchCallbackList {
/// Suscribes a new callback and returns a handle that controls the lifetime of the callback.
pub fn subscribe(&self, watch_callback: WatchCallback) -> WatchHandle {
let watch_callback_arc = Arc::new(watch_callback);
let watch_callback_weak = Arc::downgrade(&watch_callback_arc);
self.router.write().unwrap().push(watch_callback_weak);
WatchHandle(watch_callback_arc)
}
fn list_callback(&self) -> Vec<Arc<WatchCallback>> {
let mut callbacks = vec![];
let mut router_wlock = self.router.write().unwrap();
let mut i = 0;
while i < router_wlock.len() {
if let Some(watch) = router_wlock[i].upgrade() {
callbacks.push(watch);
i += 1;
} else {
router_wlock.swap_remove(i);
}
}
callbacks
}
/// Triggers all callbacks
pub fn broadcast(&self) {
let callbacks = self.list_callback();
let spawn_res = std::thread::Builder::new()
.name("watch-callbacks".to_string())
.spawn(move || {
for callback in callbacks {
callback();
}
});
if let Err(err) = spawn_res {
error!(
"Failed to spawn thread to call watch callbacks. Cause: {:?}",
err
);
}
}
}
#[cfg(test)]
mod tests {
use directory::WatchCallbackList;
use std::mem;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::thread;
use std::time::Duration;
const WAIT_TIME: u64 = 20;
#[test]
fn test_watch_event_router_simple() {
let watch_event_router = WatchCallbackList::default();
let counter: Arc<AtomicUsize> = Default::default();
let counter_clone = counter.clone();
let inc_callback = Box::new(move || {
counter_clone.fetch_add(1, Ordering::SeqCst);
});
watch_event_router.broadcast();
assert_eq!(0, counter.load(Ordering::SeqCst));
let handle_a = watch_event_router.subscribe(inc_callback);
thread::sleep(Duration::from_millis(WAIT_TIME));
assert_eq!(0, counter.load(Ordering::SeqCst));
watch_event_router.broadcast();
thread::sleep(Duration::from_millis(WAIT_TIME));
assert_eq!(1, counter.load(Ordering::SeqCst));
watch_event_router.broadcast();
watch_event_router.broadcast();
watch_event_router.broadcast();
thread::sleep(Duration::from_millis(WAIT_TIME));
assert_eq!(4, counter.load(Ordering::SeqCst));
mem::drop(handle_a);
watch_event_router.broadcast();
thread::sleep(Duration::from_millis(WAIT_TIME));
assert_eq!(4, counter.load(Ordering::SeqCst));
}
#[test]
fn test_watch_event_router_multiple_callback_same_key() {
let watch_event_router = WatchCallbackList::default();
let counter: Arc<AtomicUsize> = Default::default();
let inc_callback = |inc: usize| {
let counter_clone = counter.clone();
Box::new(move || {
counter_clone.fetch_add(inc, Ordering::SeqCst);
})
};
let handle_a = watch_event_router.subscribe(inc_callback(1));
let handle_a2 = watch_event_router.subscribe(inc_callback(10));
thread::sleep(Duration::from_millis(WAIT_TIME));
assert_eq!(0, counter.load(Ordering::SeqCst));
watch_event_router.broadcast();
watch_event_router.broadcast();
thread::sleep(Duration::from_millis(WAIT_TIME));
assert_eq!(22, counter.load(Ordering::SeqCst));
mem::drop(handle_a);
watch_event_router.broadcast();
thread::sleep(Duration::from_millis(WAIT_TIME));
assert_eq!(32, counter.load(Ordering::SeqCst));
mem::drop(handle_a2);
watch_event_router.broadcast();
watch_event_router.broadcast();
thread::sleep(Duration::from_millis(WAIT_TIME));
assert_eq!(32, counter.load(Ordering::SeqCst));
}
#[test]
fn test_watch_event_router_multiple_callback_different_key() {
let watch_event_router = WatchCallbackList::default();
let counter: Arc<AtomicUsize> = Default::default();
let counter_clone = counter.clone();
let inc_callback = Box::new(move || {
counter_clone.fetch_add(1, Ordering::SeqCst);
});
let handle_a = watch_event_router.subscribe(inc_callback);
assert_eq!(0, counter.load(Ordering::SeqCst));
watch_event_router.broadcast();
watch_event_router.broadcast();
thread::sleep(Duration::from_millis(WAIT_TIME));
assert_eq!(2, counter.load(Ordering::SeqCst));
thread::sleep(Duration::from_millis(WAIT_TIME));
mem::drop(handle_a);
watch_event_router.broadcast();
thread::sleep(Duration::from_millis(WAIT_TIME));
assert_eq!(2, counter.load(Ordering::SeqCst));
}
}

View File

@@ -162,7 +162,6 @@ impl From<OpenDirectoryError> for TantivyError {
OpenDirectoryError::NotADirectory(directory_path) => { OpenDirectoryError::NotADirectory(directory_path) => {
TantivyError::InvalidArgument(format!("{:?} is not a directory", directory_path)) TantivyError::InvalidArgument(format!("{:?} is not a directory", directory_path))
} }
OpenDirectoryError::IoError(err) => TantivyError::IOError(IOError::from(err)),
} }
} }
} }

View File

@@ -22,7 +22,9 @@ mod tests {
index_writer.add_document(doc!(field=>vec![1u8, 3, 5, 7, 9])); index_writer.add_document(doc!(field=>vec![1u8, 3, 5, 7, 9]));
index_writer.add_document(doc!(field=>vec![0u8; 1000])); index_writer.add_document(doc!(field=>vec![0u8; 1000]));
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
let searcher = index.reader().unwrap().searcher();
index.load_searchers().unwrap();
let searcher = index.searcher();
let reader = searcher.segment_reader(0); let reader = searcher.segment_reader(0);
let bytes_reader = reader.bytes_fast_field_reader(field).unwrap(); let bytes_reader = reader.bytes_fast_field_reader(field).unwrap();

View File

@@ -7,13 +7,7 @@ pub use self::writer::MultiValueIntFastFieldWriter;
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
extern crate time;
use self::time::Duration;
use collector::TopDocs;
use query::QueryParser;
use schema::Cardinality; use schema::Cardinality;
use schema::Facet;
use schema::IntOptions; use schema::IntOptions;
use schema::Schema; use schema::Schema;
use Index; use Index;
@@ -34,12 +28,11 @@ mod tests {
index_writer.add_document(doc!(field=>5u64, field=>20u64,field=>1u64)); index_writer.add_document(doc!(field=>5u64, field=>20u64,field=>1u64));
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let segment_reader = searcher.segment_reader(0); let searcher = index.searcher();
let reader = searcher.segment_reader(0);
let mut vals = Vec::new(); let mut vals = Vec::new();
let multi_value_reader = segment_reader let multi_value_reader = reader.multi_fast_field_reader::<u64>(field).unwrap();
.multi_fast_field_reader::<u64>(field)
.unwrap();
{ {
multi_value_reader.get_vals(2, &mut vals); multi_value_reader.get_vals(2, &mut vals);
assert_eq!(&vals, &[4u64]); assert_eq!(&vals, &[4u64]);
@@ -54,133 +47,6 @@ mod tests {
} }
} }
#[test]
fn test_multivalued_date() {
let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field(
"multi_date_field",
IntOptions::default()
.set_fast(Cardinality::MultiValues)
.set_indexed()
.set_stored(),
);
let time_i =
schema_builder.add_i64_field("time_stamp_i", IntOptions::default().set_stored());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
let first_time_stamp = chrono::Utc::now();
index_writer.add_document(
doc!(date_field=>first_time_stamp, date_field=>first_time_stamp, time_i=>1i64),
);
index_writer.add_document(doc!(time_i=>0i64));
// add one second
index_writer
.add_document(doc!(date_field=>first_time_stamp + Duration::seconds(1), time_i=>2i64));
// add another second
let two_secs_ahead = first_time_stamp + Duration::seconds(2);
index_writer.add_document(doc!(date_field=>two_secs_ahead, date_field=>two_secs_ahead,date_field=>two_secs_ahead, time_i=>3i64));
assert!(index_writer.commit().is_ok());
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let reader = searcher.segment_reader(0);
assert_eq!(reader.num_docs(), 4);
{
let parser = QueryParser::for_index(&index, vec![date_field]);
let query = parser
.parse_query(&format!("\"{}\"", first_time_stamp.to_rfc3339()).to_string())
.expect("could not parse query");
let results = searcher
.search(&query, &TopDocs::with_limit(5))
.expect("could not query index");
assert_eq!(results.len(), 1);
for (_score, doc_address) in results {
let retrieved_doc = searcher.doc(doc_address).expect("cannot fetch doc");
assert_eq!(
retrieved_doc
.get_first(date_field)
.expect("cannot find value")
.date_value()
.timestamp(),
first_time_stamp.timestamp()
);
assert_eq!(
retrieved_doc
.get_first(time_i)
.expect("cannot find value")
.i64_value(),
1i64
);
}
}
{
let parser = QueryParser::for_index(&index, vec![date_field]);
let query = parser
.parse_query(&format!("\"{}\"", two_secs_ahead.to_rfc3339()).to_string())
.expect("could not parse query");
let results = searcher
.search(&query, &TopDocs::with_limit(5))
.expect("could not query index");
assert_eq!(results.len(), 1);
for (_score, doc_address) in results {
let retrieved_doc = searcher.doc(doc_address).expect("cannot fetch doc");
assert_eq!(
retrieved_doc
.get_first(date_field)
.expect("cannot find value")
.date_value()
.timestamp(),
two_secs_ahead.timestamp()
);
assert_eq!(
retrieved_doc
.get_first(time_i)
.expect("cannot find value")
.i64_value(),
3i64
);
}
}
// TODO: support Date range queries
// {
// let parser = QueryParser::for_index(&index, vec![date_field]);
// let range_q = format!("\"{}\"..\"{}\"",
// (first_time_stamp + Duration::seconds(1)).to_rfc3339(),
// (first_time_stamp + Duration::seconds(3)).to_rfc3339()
// );
// let query = parser.parse_query(&range_q)
// .expect("could not parse query");
// let results = searcher.search(&query, &TopDocs::with_limit(5))
// .expect("could not query index");
//
//
// assert_eq!(results.len(), 2);
// for (i, doc_pair) in results.iter().enumerate() {
// let retrieved_doc = searcher.doc(doc_pair.1).expect("cannot fetch doc");
// let offset_sec = match i {
// 0 => 1,
// 1 => 3,
// _ => panic!("should not have more than 2 docs")
// };
// let time_i_val = match i {
// 0 => 2,
// 1 => 3,
// _ => panic!("should not have more than 2 docs")
// };
// assert_eq!(retrieved_doc.get_first(date_field).expect("cannot find value").date_value().timestamp(),
// (first_time_stamp + Duration::seconds(offset_sec)).timestamp());
// assert_eq!(retrieved_doc.get_first(time_i).expect("cannot find value").i64_value(), time_i_val);
// }
// }
}
#[test] #[test]
fn test_multivalued_i64() { fn test_multivalued_i64() {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
@@ -197,7 +63,8 @@ mod tests {
index_writer.add_document(doc!(field=> -5i64, field => -20i64, field=>1i64)); index_writer.add_document(doc!(field=> -5i64, field => -20i64, field=>1i64));
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let reader = searcher.segment_reader(0); let reader = searcher.segment_reader(0);
let mut vals = Vec::new(); let mut vals = Vec::new();
let multi_value_reader = reader.multi_fast_field_reader::<i64>(field).unwrap(); let multi_value_reader = reader.multi_fast_field_reader::<i64>(field).unwrap();
@@ -218,17 +85,4 @@ mod tests {
assert_eq!(&vals, &[-5i64, -20i64, 1i64]); assert_eq!(&vals, &[-5i64, -20i64, 1i64]);
} }
} }
#[test]
#[ignore]
fn test_many_facets() {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_facet_field("facetfield");
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
for i in 0..100_000 {
index_writer.add_document(doc!(field=> Facet::from(format!("/lang/{}", i).as_str())));
}
assert!(index_writer.commit().is_ok());
}
} }

View File

@@ -39,7 +39,7 @@ impl<Item: FastValue> MultiValueIntFastFieldReader<Item> {
let (start, stop) = self.range(doc); let (start, stop) = self.range(doc);
let len = (stop - start) as usize; let len = (stop - start) as usize;
vals.resize(len, Item::default()); vals.resize(len, Item::default());
self.vals_reader.get_range_u64(start, &mut vals[..]); self.vals_reader.get_range(start as u32, &mut vals[..]);
} }
} }
@@ -75,7 +75,8 @@ mod tests {
index_writer.add_document(doc); index_writer.add_document(doc);
} }
index_writer.commit().expect("Commit failed"); index_writer.commit().expect("Commit failed");
let searcher = index.reader().unwrap().searcher(); index.load_searchers().expect("Reloading searchers");
let searcher = index.searcher();
let segment_reader = searcher.segment_reader(0); let segment_reader = searcher.segment_reader(0);
let mut facet_reader = segment_reader.facet_reader(facet_field).unwrap(); let mut facet_reader = segment_reader.facet_reader(facet_field).unwrap();

View File

@@ -32,7 +32,7 @@ use DocId;
/// term ids when the segment is getting serialized. /// term ids when the segment is getting serialized.
pub struct MultiValueIntFastFieldWriter { pub struct MultiValueIntFastFieldWriter {
field: Field, field: Field,
vals: Vec<UnorderedTermId>, vals: Vec<u64>,
doc_index: Vec<u64>, doc_index: Vec<u64>,
is_facet: bool, is_facet: bool,
} }

View File

@@ -59,29 +59,7 @@ impl<Item: FastValue> FastFieldReader<Item> {
/// May panic if `doc` is greater than the segment /// May panic if `doc` is greater than the segment
// `maxdoc`. // `maxdoc`.
pub fn get(&self, doc: DocId) -> Item { pub fn get(&self, doc: DocId) -> Item {
self.get_u64(u64::from(doc)) Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc as usize))
}
pub(crate) fn get_u64(&self, doc: u64) -> Item {
Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc))
}
/// Internally `multivalued` also use SingleValue Fast fields.
/// It works as follows... A first column contains the list of start index
/// for each document, a second column contains the actual values.
///
/// The values associated to a given doc, are then
/// `second_column[first_column.get(doc)..first_column.get(doc+1)]`.
///
/// Which means single value fast field reader can be indexed internally with
/// something different from a `DocId`. For this use case, we want to use `u64`
/// values.
///
/// See `get_range` for an actual documentation about this method.
pub(crate) fn get_range_u64(&self, start: u64, output: &mut [Item]) {
for (i, out) in output.iter_mut().enumerate() {
*out = self.get_u64(start + (i as u64));
}
} }
/// Fills an output buffer with the fast field values /// Fills an output buffer with the fast field values
@@ -97,8 +75,13 @@ impl<Item: FastValue> FastFieldReader<Item> {
/// ///
/// May panic if `start + output.len()` is greater than /// May panic if `start + output.len()` is greater than
/// the segment's `maxdoc`. /// the segment's `maxdoc`.
pub fn get_range(&self, start: DocId, output: &mut [Item]) { ///
self.get_range_u64(u64::from(start), output); // TODO change start to `u64`.
// For multifastfield, start is an index in a second fastfield, not a `DocId`
pub fn get_range(&self, start: u32, output: &mut [Item]) {
for (i, out) in output.iter_mut().enumerate() {
*out = self.get(start + i as u32);
}
} }
/// Returns the minimum value for this fast field. /// Returns the minimum value for this fast field.

View File

@@ -17,12 +17,11 @@ fn check_index_content(searcher: &Searcher, vals: &HashSet<u64>) {
fn test_indexing() { fn test_indexing() {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let id_field = schema_builder.add_u64_field("id", INDEXED); let id_field = schema_builder.add_u64_field("id", INT_INDEXED);
let multiples_field = schema_builder.add_u64_field("multiples", INDEXED); let multiples_field = schema_builder.add_u64_field("multiples", INT_INDEXED);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_from_tempdir(schema).unwrap(); let index = Index::create_from_tempdir(schema).unwrap();
let reader = index.reader().unwrap();
let mut rng = thread_rng(); let mut rng = thread_rng();
@@ -37,8 +36,8 @@ fn test_indexing() {
index_writer.commit().expect("Commit failed"); index_writer.commit().expect("Commit failed");
committed_docs.extend(&uncommitted_docs); committed_docs.extend(&uncommitted_docs);
uncommitted_docs.clear(); uncommitted_docs.clear();
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
// check that everything is correct. // check that everything is correct.
check_index_content(&searcher, &committed_docs); check_index_content(&searcher, &committed_docs);
} else { } else {

View File

@@ -1,4 +1,4 @@
use super::operation::{AddOperation, UserOperation}; use super::operation::AddOperation;
use super::segment_updater::SegmentUpdater; use super::segment_updater::SegmentUpdater;
use super::PreparedCommit; use super::PreparedCommit;
use bit_set::BitSet; use bit_set::BitSet;
@@ -26,7 +26,6 @@ use schema::Document;
use schema::IndexRecordOption; use schema::IndexRecordOption;
use schema::Term; use schema::Term;
use std::mem; use std::mem;
use std::ops::Range;
use std::sync::Arc; use std::sync::Arc;
use std::thread; use std::thread;
use std::thread::JoinHandle; use std::thread::JoinHandle;
@@ -44,8 +43,8 @@ pub const HEAP_SIZE_MAX: usize = u32::max_value() as usize - MARGIN_IN_BYTES;
// reaches `PIPELINE_MAX_SIZE_IN_DOCS` // reaches `PIPELINE_MAX_SIZE_IN_DOCS`
const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000; const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000;
type OperationSender = channel::Sender<Vec<AddOperation>>; type DocumentSender = channel::Sender<AddOperation>;
type OperationReceiver = channel::Receiver<Vec<AddOperation>>; type DocumentReceiver = channel::Receiver<AddOperation>;
/// Split the thread memory budget into /// Split the thread memory budget into
/// - the heap size /// - the heap size
@@ -85,8 +84,8 @@ pub struct IndexWriter {
workers_join_handle: Vec<JoinHandle<Result<()>>>, workers_join_handle: Vec<JoinHandle<Result<()>>>,
operation_receiver: OperationReceiver, document_receiver: DocumentReceiver,
operation_sender: OperationSender, document_sender: DocumentSender,
segment_updater: SegmentUpdater, segment_updater: SegmentUpdater,
@@ -133,7 +132,7 @@ pub fn open_index_writer(
let err_msg = format!("The heap size per thread cannot exceed {}", HEAP_SIZE_MAX); let err_msg = format!("The heap size per thread cannot exceed {}", HEAP_SIZE_MAX);
return Err(TantivyError::InvalidArgument(err_msg)); return Err(TantivyError::InvalidArgument(err_msg));
} }
let (document_sender, document_receiver): (OperationSender, OperationReceiver) = let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) =
channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS); channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS);
let delete_queue = DeleteQueue::new(); let delete_queue = DeleteQueue::new();
@@ -151,8 +150,8 @@ pub fn open_index_writer(
heap_size_in_bytes_per_thread, heap_size_in_bytes_per_thread,
index: index.clone(), index: index.clone(),
operation_receiver: document_receiver, document_receiver,
operation_sender: document_sender, document_sender,
segment_updater, segment_updater,
@@ -259,7 +258,7 @@ pub fn advance_deletes(
write_delete_bitset(&delete_bitset, &mut delete_file)?; write_delete_bitset(&delete_bitset, &mut delete_file)?;
} }
} }
segment_entry.set_meta(segment.meta().clone()); segment_entry.set_meta((*segment.meta()).clone());
Ok(()) Ok(())
} }
@@ -267,7 +266,7 @@ fn index_documents(
memory_budget: usize, memory_budget: usize,
segment: &Segment, segment: &Segment,
generation: usize, generation: usize,
document_iterator: &mut Iterator<Item = Vec<AddOperation>>, document_iterator: &mut Iterator<Item = AddOperation>,
segment_updater: &mut SegmentUpdater, segment_updater: &mut SegmentUpdater,
mut delete_cursor: DeleteCursor, mut delete_cursor: DeleteCursor,
) -> Result<bool> { ) -> Result<bool> {
@@ -275,11 +274,11 @@ fn index_documents(
let segment_id = segment.id(); let segment_id = segment.id();
let table_size = initial_table_size(memory_budget); let table_size = initial_table_size(memory_budget);
let mut segment_writer = SegmentWriter::for_segment(table_size, segment.clone(), &schema)?; let mut segment_writer = SegmentWriter::for_segment(table_size, segment.clone(), &schema)?;
for documents in document_iterator { for doc in document_iterator {
for doc in documents { segment_writer.add_document(doc, &schema)?;
segment_writer.add_document(doc, &schema)?;
}
let mem_usage = segment_writer.mem_usage(); let mem_usage = segment_writer.mem_usage();
if mem_usage >= memory_budget - MARGIN_IN_BYTES { if mem_usage >= memory_budget - MARGIN_IN_BYTES {
info!( info!(
"Buffer limit reached, flushing segment with maxdoc={}.", "Buffer limit reached, flushing segment with maxdoc={}.",
@@ -335,7 +334,7 @@ impl IndexWriter {
pub fn wait_merging_threads(mut self) -> Result<()> { pub fn wait_merging_threads(mut self) -> Result<()> {
// this will stop the indexing thread, // this will stop the indexing thread,
// dropping the last reference to the segment_updater. // dropping the last reference to the segment_updater.
drop(self.operation_sender); drop(self.document_sender);
let former_workers_handles = mem::replace(&mut self.workers_join_handle, vec![]); let former_workers_handles = mem::replace(&mut self.workers_join_handle, vec![]);
for join_handle in former_workers_handles { for join_handle in former_workers_handles {
@@ -384,7 +383,7 @@ impl IndexWriter {
/// The thread consumes documents from the pipeline. /// The thread consumes documents from the pipeline.
/// ///
fn add_indexing_worker(&mut self) -> Result<()> { fn add_indexing_worker(&mut self) -> Result<()> {
let document_receiver_clone = self.operation_receiver.clone(); let document_receiver_clone = self.document_receiver.clone();
let mut segment_updater = self.segment_updater.clone(); let mut segment_updater = self.segment_updater.clone();
let generation = self.generation; let generation = self.generation;
@@ -410,12 +409,8 @@ impl IndexWriter {
// this is a valid guarantee as the // this is a valid guarantee as the
// peeked document now belongs to // peeked document now belongs to
// our local iterator. // our local iterator.
if let Some(operations) = document_iterator.peek() { if let Some(operation) = document_iterator.peek() {
if let Some(first) = operations.first() { delete_cursor.skip_to(operation.opstamp);
delete_cursor.skip_to(first.opstamp);
} else {
return Ok(());
}
} else { } else {
// No more documents. // No more documents.
// Happens when there is a commit, or if the `IndexWriter` // Happens when there is a commit, or if the `IndexWriter`
@@ -479,11 +474,11 @@ impl IndexWriter {
/// when no documents are remaining. /// when no documents are remaining.
/// ///
/// Returns the former segment_ready channel. /// Returns the former segment_ready channel.
fn recreate_document_channel(&mut self) -> OperationReceiver { fn recreate_document_channel(&mut self) -> DocumentReceiver {
let (document_sender, document_receiver): (OperationSender, OperationReceiver) = let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) =
channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS); channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS);
mem::replace(&mut self.operation_sender, document_sender); mem::replace(&mut self.document_sender, document_sender);
mem::replace(&mut self.operation_receiver, document_receiver) mem::replace(&mut self.document_receiver, document_receiver)
} }
/// Rollback to the last commit /// Rollback to the last commit
@@ -501,7 +496,7 @@ impl IndexWriter {
// segment updates will be ignored. // segment updates will be ignored.
self.segment_updater.kill(); self.segment_updater.kill();
let document_receiver = self.operation_receiver.clone(); let document_receiver = self.document_receiver.clone();
// take the directory lock to create a new index_writer. // take the directory lock to create a new index_writer.
let directory_lock = self let directory_lock = self
@@ -648,168 +643,25 @@ impl IndexWriter {
pub fn add_document(&mut self, document: Document) -> u64 { pub fn add_document(&mut self, document: Document) -> u64 {
let opstamp = self.stamper.stamp(); let opstamp = self.stamper.stamp();
let add_operation = AddOperation { opstamp, document }; let add_operation = AddOperation { opstamp, document };
let send_result = self.operation_sender.send(vec![add_operation]); let send_result = self.document_sender.send(add_operation);
if let Err(e) = send_result { if let Err(e) = send_result {
panic!("Failed to index document. Sending to indexing channel failed. This probably means all of the indexing threads have panicked. {:?}", e); panic!("Failed to index document. Sending to indexing channel failed. This probably means all of the indexing threads have panicked. {:?}", e);
} }
opstamp opstamp
} }
/// Gets a range of stamps from the stamper and "pops" the last stamp
/// from the range returning a tuple of the last optstamp and the popped
/// range.
///
/// The total number of stamps generated by this method is `count + 1`;
/// each operation gets a stamp from the `stamps` iterator and `last_opstamp`
/// is for the batch itself.
fn get_batch_opstamps(&mut self, count: u64) -> (u64, Range<u64>) {
let Range { start, end } = self.stamper.stamps(count + 1u64);
let last_opstamp = end - 1;
let stamps = Range {
start,
end: last_opstamp,
};
(last_opstamp, stamps)
}
/// Runs a group of document operations ensuring that the operations are
/// assigned contigous u64 opstamps and that add operations of the same
/// group are flushed into the same segment.
///
/// If the indexing pipeline is full, this call may block.
///
/// Each operation of the given `user_operations` will receive an in-order,
/// contiguous u64 opstamp. The entire batch itself is also given an
/// opstamp that is 1 greater than the last given operation. This
/// `batch_opstamp` is the return value of `run`. An empty group of
/// `user_operations`, an empty `Vec<UserOperation>`, still receives
/// a valid opstamp even though no changes were _actually_ made to the index.
///
/// Like adds and deletes (see `IndexWriter.add_document` and
/// `IndexWriter.delete_term`), the changes made by calling `run` will be
/// visible to readers only after calling `commit()`.
pub fn run(&mut self, user_operations: Vec<UserOperation>) -> u64 {
let count = user_operations.len() as u64;
if count == 0 {
return self.stamper.stamp();
}
let (batch_opstamp, stamps) = self.get_batch_opstamps(count);
let mut adds: Vec<AddOperation> = Vec::new();
for (user_op, opstamp) in user_operations.into_iter().zip(stamps) {
match user_op {
UserOperation::Delete(term) => {
let delete_operation = DeleteOperation { opstamp, term };
self.delete_queue.push(delete_operation);
}
UserOperation::Add(document) => {
let add_operation = AddOperation { opstamp, document };
adds.push(add_operation);
}
}
}
let send_result = self.operation_sender.send(adds);
if let Err(e) = send_result {
panic!("Failed to index document. Sending to indexing channel failed. This probably means all of the indexing threads have panicked. {:?}", e);
};
batch_opstamp
}
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::super::operation::UserOperation;
use super::initial_table_size; use super::initial_table_size;
use collector::TopDocs;
use directory::error::LockError; use directory::error::LockError;
use error::*; use error::*;
use indexer::NoMergePolicy; use indexer::NoMergePolicy;
use query::TermQuery; use schema::{self, Document};
use schema::{self, IndexRecordOption};
use Index; use Index;
use ReloadPolicy;
use Term; use Term;
#[test]
fn test_operations_group() {
// an operations group with 2 items should cause 3 opstamps 0, 1, and 2.
let mut schema_builder = schema::Schema::builder();
let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
let operations = vec![
UserOperation::Add(doc!(text_field=>"a")),
UserOperation::Add(doc!(text_field=>"b")),
];
let batch_opstamp1 = index_writer.run(operations);
assert_eq!(batch_opstamp1, 2u64);
}
#[test]
fn test_ordered_batched_operations() {
// * one delete for `doc!(field=>"a")`
// * one add for `doc!(field=>"a")`
// * one add for `doc!(field=>"b")`
// * one delete for `doc!(field=>"b")`
// after commit there is one doc with "a" and 0 doc with "b"
let mut schema_builder = schema::Schema::builder();
let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema_builder.build());
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap();
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
let a_term = Term::from_field_text(text_field, "a");
let b_term = Term::from_field_text(text_field, "b");
let operations = vec![
UserOperation::Delete(a_term),
UserOperation::Add(doc!(text_field=>"a")),
UserOperation::Add(doc!(text_field=>"b")),
UserOperation::Delete(b_term),
];
index_writer.run(operations);
index_writer.commit().expect("failed to commit");
reader.reload().expect("failed to load searchers");
let a_term = Term::from_field_text(text_field, "a");
let b_term = Term::from_field_text(text_field, "b");
let a_query = TermQuery::new(a_term, IndexRecordOption::Basic);
let b_query = TermQuery::new(b_term, IndexRecordOption::Basic);
let searcher = reader.searcher();
let a_docs = searcher
.search(&a_query, &TopDocs::with_limit(1))
.expect("search for a failed");
let b_docs = searcher
.search(&b_query, &TopDocs::with_limit(1))
.expect("search for b failed");
assert_eq!(a_docs.len(), 1);
assert_eq!(b_docs.len(), 0);
}
#[test]
fn test_empty_operations_group() {
let schema_builder = schema::Schema::builder();
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer(3_000_000).unwrap();
let operations1 = vec![];
let batch_opstamp1 = index_writer.run(operations1);
assert_eq!(batch_opstamp1, 0u64);
let operations2 = vec![];
let batch_opstamp2 = index_writer.run(operations2);
assert_eq!(batch_opstamp2, 1u64);
}
#[test] #[test]
fn test_lockfile_stops_duplicates() { fn test_lockfile_stops_duplicates() {
let schema_builder = schema::Schema::builder(); let schema_builder = schema::Schema::builder();
@@ -870,13 +722,9 @@ mod tests {
let mut schema_builder = schema::Schema::builder(); let mut schema_builder = schema::Schema::builder();
let text_field = schema_builder.add_text_field("text", schema::TEXT); let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema_builder.build()); let index = Index::create_in_ram(schema_builder.build());
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap();
let num_docs_containing = |s: &str| { let num_docs_containing = |s: &str| {
let searcher = reader.searcher(); let searcher = index.searcher();
let term = Term::from_field_text(text_field, s); let term = Term::from_field_text(text_field, s);
searcher.doc_freq(&term) searcher.doc_freq(&term)
}; };
@@ -886,6 +734,7 @@ mod tests {
let mut index_writer = index.writer(3_000_000).unwrap(); let mut index_writer = index.writer(3_000_000).unwrap();
index_writer.add_document(doc!(text_field=>"a")); index_writer.add_document(doc!(text_field=>"a"));
index_writer.rollback().unwrap(); index_writer.rollback().unwrap();
assert_eq!(index_writer.commit_opstamp(), 0u64); assert_eq!(index_writer.commit_opstamp(), 0u64);
assert_eq!(num_docs_containing("a"), 0); assert_eq!(num_docs_containing("a"), 0);
{ {
@@ -893,13 +742,13 @@ mod tests {
index_writer.add_document(doc!(text_field=>"c")); index_writer.add_document(doc!(text_field=>"c"));
} }
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
reader.reload().unwrap(); index.load_searchers().unwrap();
assert_eq!(num_docs_containing("a"), 0); assert_eq!(num_docs_containing("a"), 0);
assert_eq!(num_docs_containing("b"), 1); assert_eq!(num_docs_containing("b"), 1);
assert_eq!(num_docs_containing("c"), 1); assert_eq!(num_docs_containing("c"), 1);
} }
reader.reload().unwrap(); index.load_searchers().unwrap();
reader.searcher(); index.searcher();
} }
#[test] #[test]
@@ -907,33 +756,32 @@ mod tests {
let mut schema_builder = schema::Schema::builder(); let mut schema_builder = schema::Schema::builder();
let text_field = schema_builder.add_text_field("text", schema::TEXT); let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema_builder.build()); let index = Index::create_in_ram(schema_builder.build());
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap();
let num_docs_containing = |s: &str| { let num_docs_containing = |s: &str| {
let searcher = index.searcher();
let term_a = Term::from_field_text(text_field, s); let term_a = Term::from_field_text(text_field, s);
reader.searcher().doc_freq(&term_a) searcher.doc_freq(&term_a)
}; };
{ {
// writing the segment // writing the segment
let mut index_writer = index.writer(12_000_000).unwrap(); let mut index_writer = index.writer(12_000_000).unwrap();
// create 8 segments with 100 tiny docs // create 8 segments with 100 tiny docs
for _doc in 0..100 { for _doc in 0..100 {
index_writer.add_document(doc!(text_field=>"a")); let mut doc = Document::default();
doc.add_text(text_field, "a");
index_writer.add_document(doc);
} }
index_writer.commit().expect("commit failed"); index_writer.commit().expect("commit failed");
for _doc in 0..100 { for _doc in 0..100 {
index_writer.add_document(doc!(text_field=>"a")); let mut doc = Document::default();
doc.add_text(text_field, "a");
index_writer.add_document(doc);
} }
// this should create 8 segments and trigger a merge. // this should create 8 segments and trigger a merge.
index_writer.commit().expect("commit failed"); index_writer.commit().expect("commit failed");
index_writer index_writer
.wait_merging_threads() .wait_merging_threads()
.expect("waiting merging thread failed"); .expect("waiting merging thread failed");
index.load_searchers().unwrap();
reader.reload().unwrap();
assert_eq!(num_docs_containing("a"), 200); assert_eq!(num_docs_containing("a"), 200);
assert!(index.searchable_segments().unwrap().len() < 8); assert!(index.searchable_segments().unwrap().len() < 8);
@@ -1000,15 +848,11 @@ mod tests {
} }
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
index.load_searchers().unwrap();
let num_docs_containing = |s: &str| { let num_docs_containing = |s: &str| {
let searcher = index.searcher();
let term_a = Term::from_field_text(text_field, s); let term_a = Term::from_field_text(text_field, s);
index searcher.doc_freq(&term_a)
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap()
.searcher()
.doc_freq(&term_a)
}; };
assert_eq!(num_docs_containing("a"), 0); assert_eq!(num_docs_containing("a"), 0);
assert_eq!(num_docs_containing("b"), 100); assert_eq!(num_docs_containing("b"), 100);
@@ -1016,9 +860,9 @@ mod tests {
#[test] #[test]
fn test_hashmap_size() { fn test_hashmap_size() {
assert_eq!(initial_table_size(100_000), 11); assert_eq!(initial_table_size(100_000), 12);
assert_eq!(initial_table_size(1_000_000), 14); assert_eq!(initial_table_size(1_000_000), 15);
assert_eq!(initial_table_size(10_000_000), 17); assert_eq!(initial_table_size(10_000_000), 18);
assert_eq!(initial_table_size(1_000_000_000), 19); assert_eq!(initial_table_size(1_000_000_000), 19);
} }
@@ -1040,9 +884,11 @@ mod tests {
index_writer.add_document(doc!(text_field => "b")); index_writer.add_document(doc!(text_field => "b"));
} }
assert!(index_writer.commit().is_err()); assert!(index_writer.commit().is_err());
index.load_searchers().unwrap();
let num_docs_containing = |s: &str| { let num_docs_containing = |s: &str| {
let searcher = index.searcher();
let term_a = Term::from_field_text(text_field, s); let term_a = Term::from_field_text(text_field, s);
index.reader().unwrap().searcher().doc_freq(&term_a) searcher.doc_freq(&term_a)
}; };
assert_eq!(num_docs_containing("a"), 100); assert_eq!(num_docs_containing("a"), 100);
assert_eq!(num_docs_containing("b"), 0); assert_eq!(num_docs_containing("b"), 0);

View File

@@ -194,17 +194,17 @@ impl IndexMerger {
fast_field_serializer, fast_field_serializer,
)?; )?;
} }
FieldType::U64(ref options) FieldType::U64(ref options) | FieldType::I64(ref options) => {
| FieldType::I64(ref options) match options.get_fastfield_cardinality() {
| FieldType::Date(ref options) => match options.get_fastfield_cardinality() { Some(Cardinality::SingleValue) => {
Some(Cardinality::SingleValue) => { self.write_single_fast_field(field, fast_field_serializer)?;
self.write_single_fast_field(field, fast_field_serializer)?; }
Some(Cardinality::MultiValues) => {
self.write_multi_fast_field(field, fast_field_serializer)?;
}
None => {}
} }
Some(Cardinality::MultiValues) => { }
self.write_multi_fast_field(field, fast_field_serializer)?;
}
None => {}
},
FieldType::Str(_) => { FieldType::Str(_) => {
// We don't handle str fast field for the moment // We don't handle str fast field for the moment
// They can be implemented using what is done // They can be implemented using what is done
@@ -654,7 +654,7 @@ mod tests {
use schema::IntOptions; use schema::IntOptions;
use schema::Term; use schema::Term;
use schema::TextFieldIndexing; use schema::TextFieldIndexing;
use schema::INDEXED; use schema::INT_INDEXED;
use std::io::Cursor; use std::io::Cursor;
use DocAddress; use DocAddress;
use IndexWriter; use IndexWriter;
@@ -671,13 +671,11 @@ mod tests {
) )
.set_stored(); .set_stored();
let text_field = schema_builder.add_text_field("text", text_fieldtype); let text_field = schema_builder.add_text_field("text", text_fieldtype);
let date_field = schema_builder.add_date_field("date", INDEXED);
let score_fieldtype = schema::IntOptions::default().set_fast(Cardinality::SingleValue); let score_fieldtype = schema::IntOptions::default().set_fast(Cardinality::SingleValue);
let score_field = schema_builder.add_u64_field("score", score_fieldtype); let score_field = schema_builder.add_u64_field("score", score_fieldtype);
let bytes_score_field = schema_builder.add_bytes_field("score_bytes"); let bytes_score_field = schema_builder.add_bytes_field("score_bytes");
let index = Index::create_in_ram(schema_builder.build()); let index = Index::create_in_ram(schema_builder.build());
let reader = index.reader().unwrap();
let curr_time = chrono::Utc::now();
let add_score_bytes = |doc: &mut Document, score: u32| { let add_score_bytes = |doc: &mut Document, score: u32| {
let mut bytes = Vec::new(); let mut bytes = Vec::new();
bytes bytes
@@ -694,7 +692,6 @@ mod tests {
let mut doc = Document::default(); let mut doc = Document::default();
doc.add_text(text_field, "af b"); doc.add_text(text_field, "af b");
doc.add_u64(score_field, 3); doc.add_u64(score_field, 3);
doc.add_date(date_field, &curr_time);
add_score_bytes(&mut doc, 3); add_score_bytes(&mut doc, 3);
index_writer.add_document(doc); index_writer.add_document(doc);
} }
@@ -720,7 +717,6 @@ mod tests {
{ {
let mut doc = Document::default(); let mut doc = Document::default();
doc.add_text(text_field, "af b"); doc.add_text(text_field, "af b");
doc.add_date(date_field, &curr_time);
doc.add_u64(score_field, 11); doc.add_u64(score_field, 11);
add_score_bytes(&mut doc, 11); add_score_bytes(&mut doc, 11);
index_writer.add_document(doc); index_writer.add_document(doc);
@@ -748,8 +744,8 @@ mod tests {
index_writer.wait_merging_threads().unwrap(); index_writer.wait_merging_threads().unwrap();
} }
{ {
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let get_doc_ids = |terms: Vec<Term>| { let get_doc_ids = |terms: Vec<Term>| {
let query = BooleanQuery::new_multiterms_query(terms); let query = BooleanQuery::new_multiterms_query(terms);
let top_docs = searcher.search(&query, &TestCollector).unwrap(); let top_docs = searcher.search(&query, &TestCollector).unwrap();
@@ -778,10 +774,6 @@ mod tests {
DocAddress(0, 4) DocAddress(0, 4)
] ]
); );
assert_eq!(
get_doc_ids(vec![Term::from_field_date(date_field, &curr_time)]),
vec![DocAddress(0, 0), DocAddress(0, 3)]
);
} }
{ {
let doc = searcher.doc(DocAddress(0, 0)).unwrap(); let doc = searcher.doc(DocAddress(0, 0)).unwrap();
@@ -845,7 +837,7 @@ mod tests {
let bytes_score_field = schema_builder.add_bytes_field("score_bytes"); let bytes_score_field = schema_builder.add_bytes_field("score_bytes");
let index = Index::create_in_ram(schema_builder.build()); let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
let reader = index.reader().unwrap();
let search_term = |searcher: &Searcher, term: Term| { let search_term = |searcher: &Searcher, term: Term| {
let collector = FastFieldTestCollector::for_field(score_field); let collector = FastFieldTestCollector::for_field(score_field);
let bytes_collector = BytesFastFieldTestCollector::for_field(bytes_score_field); let bytes_collector = BytesFastFieldTestCollector::for_field(bytes_score_field);
@@ -882,8 +874,8 @@ mod tests {
bytes_score_field => vec![0u8, 0, 0, 3], bytes_score_field => vec![0u8, 0, 0, 3],
)); ));
index_writer.commit().expect("committed"); index_writer.commit().expect("committed");
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let ref searcher = *index.searcher();
assert_eq!(searcher.num_docs(), 2); assert_eq!(searcher.num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].num_docs(), 2); assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].max_doc(), 3); assert_eq!(searcher.segment_readers()[0].max_doc(), 3);
@@ -929,8 +921,8 @@ mod tests {
bytes_score_field => vec![0u8, 0, 27, 88], bytes_score_field => vec![0u8, 0, 27, 88],
)); ));
index_writer.commit().expect("committed"); index_writer.commit().expect("committed");
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
assert_eq!(searcher.segment_readers().len(), 2); assert_eq!(searcher.segment_readers().len(), 2);
assert_eq!(searcher.num_docs(), 3); assert_eq!(searcher.num_docs(), 3);
@@ -991,8 +983,8 @@ mod tests {
.expect("Failed to initiate merge") .expect("Failed to initiate merge")
.wait() .wait()
.expect("Merging failed"); .expect("Merging failed");
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
assert_eq!(searcher.segment_readers().len(), 1); assert_eq!(searcher.segment_readers().len(), 1);
assert_eq!(searcher.num_docs(), 3); assert_eq!(searcher.num_docs(), 3);
assert_eq!(searcher.segment_readers()[0].num_docs(), 3); assert_eq!(searcher.segment_readers()[0].num_docs(), 3);
@@ -1037,8 +1029,8 @@ mod tests {
index_writer.delete_term(Term::from_field_text(text_field, "c")); index_writer.delete_term(Term::from_field_text(text_field, "c"));
index_writer.commit().unwrap(); index_writer.commit().unwrap();
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
assert_eq!(searcher.segment_readers().len(), 1); assert_eq!(searcher.segment_readers().len(), 1);
assert_eq!(searcher.num_docs(), 2); assert_eq!(searcher.num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].num_docs(), 2); assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
@@ -1088,9 +1080,9 @@ mod tests {
.expect("Failed to initiate merge") .expect("Failed to initiate merge")
.wait() .wait()
.expect("Merging failed"); .expect("Merging failed");
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let ref searcher = *index.searcher();
assert_eq!(searcher.segment_readers().len(), 1); assert_eq!(searcher.segment_readers().len(), 1);
assert_eq!(searcher.num_docs(), 2); assert_eq!(searcher.num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].num_docs(), 2); assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
@@ -1138,9 +1130,9 @@ mod tests {
let segment_ids = index let segment_ids = index
.searchable_segment_ids() .searchable_segment_ids()
.expect("Searchable segments failed."); .expect("Searchable segments failed.");
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let ref searcher = *index.searcher();
assert!(segment_ids.is_empty()); assert!(segment_ids.is_empty());
assert!(searcher.segment_readers().is_empty()); assert!(searcher.segment_readers().is_empty());
assert_eq!(searcher.num_docs(), 0); assert_eq!(searcher.num_docs(), 0);
@@ -1152,7 +1144,6 @@ mod tests {
let mut schema_builder = schema::Schema::builder(); let mut schema_builder = schema::Schema::builder();
let facet_field = schema_builder.add_facet_field("facet"); let facet_field = schema_builder.add_facet_field("facet");
let index = Index::create_in_ram(schema_builder.build()); let index = Index::create_in_ram(schema_builder.build());
let reader = index.reader().unwrap();
{ {
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
let index_doc = |index_writer: &mut IndexWriter, doc_facets: &[&str]| { let index_doc = |index_writer: &mut IndexWriter, doc_facets: &[&str]| {
@@ -1182,9 +1173,9 @@ mod tests {
index_writer.commit().expect("committed"); index_writer.commit().expect("committed");
} }
reader.reload().unwrap(); index.load_searchers().unwrap();
let test_searcher = |expected_num_docs: usize, expected: &[(&str, u64)]| { let test_searcher = |expected_num_docs: usize, expected: &[(&str, u64)]| {
let searcher = reader.searcher(); let searcher = index.searcher();
let mut facet_collector = FacetCollector::for_field(facet_field); let mut facet_collector = FacetCollector::for_field(facet_field);
facet_collector.add_facet(Facet::from("/top")); facet_collector.add_facet(Facet::from("/top"));
let (count, facet_counts) = searcher let (count, facet_counts) = searcher
@@ -1226,7 +1217,7 @@ mod tests {
.wait() .wait()
.expect("Merging failed"); .expect("Merging failed");
index_writer.wait_merging_threads().unwrap(); index_writer.wait_merging_threads().unwrap();
reader.reload().unwrap(); index.load_searchers().unwrap();
test_searcher( test_searcher(
11, 11,
&[ &[
@@ -1247,7 +1238,7 @@ mod tests {
let facet_term = Term::from_facet(facet_field, &facet); let facet_term = Term::from_facet(facet_field, &facet);
index_writer.delete_term(facet_term); index_writer.delete_term(facet_term);
index_writer.commit().unwrap(); index_writer.commit().unwrap();
reader.reload().unwrap(); index.load_searchers().unwrap();
test_searcher( test_searcher(
9, 9,
&[ &[
@@ -1265,15 +1256,15 @@ mod tests {
#[test] #[test]
fn test_bug_merge() { fn test_bug_merge() {
let mut schema_builder = schema::Schema::builder(); let mut schema_builder = schema::Schema::builder();
let int_field = schema_builder.add_u64_field("intvals", INDEXED); let int_field = schema_builder.add_u64_field("intvals", INT_INDEXED);
let index = Index::create_in_ram(schema_builder.build()); let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
index_writer.add_document(doc!(int_field => 1u64)); index_writer.add_document(doc!(int_field => 1u64));
index_writer.commit().expect("commit failed"); index_writer.commit().expect("commit failed");
index_writer.add_document(doc!(int_field => 1u64)); index_writer.add_document(doc!(int_field => 1u64));
index_writer.commit().expect("commit failed"); index_writer.commit().expect("commit failed");
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
assert_eq!(searcher.num_docs(), 2); assert_eq!(searcher.num_docs(), 2);
index_writer.delete_term(Term::from_field_u64(int_field, 1)); index_writer.delete_term(Term::from_field_u64(int_field, 1));
let segment_ids = index let segment_ids = index
@@ -1284,10 +1275,10 @@ mod tests {
.expect("Failed to initiate merge") .expect("Failed to initiate merge")
.wait() .wait()
.expect("Merging failed"); .expect("Merging failed");
reader.reload().unwrap(); index.load_searchers().unwrap();
// commit has not been called yet. The document should still be // commit has not been called yet. The document should still be
// there. // there.
assert_eq!(reader.searcher().num_docs(), 2); assert_eq!(index.searcher().num_docs(), 2);
} }
#[test] #[test]
@@ -1298,7 +1289,7 @@ mod tests {
.set_indexed(); .set_indexed();
let int_field = schema_builder.add_u64_field("intvals", int_options); let int_field = schema_builder.add_u64_field("intvals", int_options);
let index = Index::create_in_ram(schema_builder.build()); let index = Index::create_in_ram(schema_builder.build());
let reader = index.reader().unwrap();
{ {
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
let mut doc = Document::default(); let mut doc = Document::default();
@@ -1319,8 +1310,8 @@ mod tests {
.expect("Merging failed"); .expect("Merging failed");
// assert delete has not been committed // assert delete has not been committed
reader.reload().expect("failed to load searcher 1"); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
assert_eq!(searcher.num_docs(), 2); assert_eq!(searcher.num_docs(), 2);
index_writer.commit().unwrap(); index_writer.commit().unwrap();
@@ -1328,13 +1319,13 @@ mod tests {
index_writer.wait_merging_threads().unwrap(); index_writer.wait_merging_threads().unwrap();
} }
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
assert_eq!(searcher.num_docs(), 0); assert_eq!(searcher.num_docs(), 0);
} }
#[test] #[test]
fn test_merge_multivalued_int_fields_simple() { fn test_merge_multivalued_int_fields() {
let mut schema_builder = schema::Schema::builder(); let mut schema_builder = schema::Schema::builder();
let int_options = IntOptions::default() let int_options = IntOptions::default()
.set_fast(Cardinality::MultiValues) .set_fast(Cardinality::MultiValues)
@@ -1351,6 +1342,7 @@ mod tests {
} }
index_writer.add_document(doc); index_writer.add_document(doc);
}; };
index_doc(&mut index_writer, &[1, 2]); index_doc(&mut index_writer, &[1, 2]);
index_doc(&mut index_writer, &[1, 2, 3]); index_doc(&mut index_writer, &[1, 2, 3]);
index_doc(&mut index_writer, &[4, 5]); index_doc(&mut index_writer, &[4, 5]);
@@ -1359,14 +1351,19 @@ mod tests {
index_doc(&mut index_writer, &[3]); index_doc(&mut index_writer, &[3]);
index_doc(&mut index_writer, &[17]); index_doc(&mut index_writer, &[17]);
index_writer.commit().expect("committed"); index_writer.commit().expect("committed");
index_doc(&mut index_writer, &[20]); index_doc(&mut index_writer, &[20]);
index_writer.commit().expect("committed"); index_writer.commit().expect("committed");
index_doc(&mut index_writer, &[28, 27]); index_doc(&mut index_writer, &[28, 27]);
index_doc(&mut index_writer, &[1_000]); index_doc(&mut index_writer, &[1_000]);
index_writer.commit().expect("committed"); index_writer.commit().expect("committed");
} }
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher();
let searcher = index.searcher();
let mut vals: Vec<u64> = Vec::new(); let mut vals: Vec<u64> = Vec::new();
{ {
@@ -1432,14 +1429,13 @@ mod tests {
.expect("Failed to initiate merge") .expect("Failed to initiate merge")
.wait() .wait()
.expect("Merging failed"); .expect("Merging failed");
index_writer index_writer.wait_merging_threads().unwrap();
.wait_merging_threads()
.expect("Wait for merging threads");
} }
reader.reload().expect("Load searcher");
index.load_searchers().unwrap();
{ {
let searcher = reader.searcher(); let searcher = index.searcher();
println!( println!(
"{:?}", "{:?}",
searcher searcher

View File

@@ -14,10 +14,3 @@ pub struct AddOperation {
pub opstamp: u64, pub opstamp: u64,
pub document: Document, pub document: Document,
} }
/// UserOperation is an enum type that encapsulates other operation types.
#[derive(Eq, PartialEq, Debug)]
pub enum UserOperation {
Add(Document),
Delete(Term),
}

View File

@@ -62,7 +62,7 @@ pub fn save_new_metas(schema: Schema, directory: &mut Directory) -> Result<()> {
/// Save the index meta file. /// Save the index meta file.
/// This operation is atomic: /// This operation is atomic:
/// Either /// Either
/// - it fails, in which case an error is returned, // - it fails, in which case an error is returned,
/// and the `meta.json` remains untouched, /// and the `meta.json` remains untouched,
/// - it success, and `meta.json` is written /// - it success, and `meta.json` is written
/// and flushed. /// and flushed.
@@ -565,8 +565,9 @@ mod tests {
index_writer.delete_term(term); index_writer.delete_term(term);
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
let reader = index.reader().unwrap();
assert_eq!(reader.searcher().num_docs(), 302); index.load_searchers().unwrap();
assert_eq!(index.searcher().num_docs(), 302);
{ {
index_writer index_writer
@@ -574,9 +575,9 @@ mod tests {
.expect("waiting for merging threads"); .expect("waiting for merging threads");
} }
reader.reload().unwrap(); index.load_searchers().unwrap();
assert_eq!(reader.searcher().segment_readers().len(), 1); assert_eq!(index.searcher().segment_readers().len(), 1);
assert_eq!(reader.searcher().num_docs(), 302); assert_eq!(index.searcher().num_docs(), 302);
} }
#[test] #[test]
@@ -635,18 +636,18 @@ mod tests {
.expect("waiting for merging threads"); .expect("waiting for merging threads");
} }
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
assert_eq!(reader.searcher().num_docs(), 0); assert_eq!(index.searcher().num_docs(), 0);
let seg_ids = index let seg_ids = index
.searchable_segment_ids() .searchable_segment_ids()
.expect("Searchable segments failed."); .expect("Searchable segments failed.");
assert!(seg_ids.is_empty()); assert!(seg_ids.is_empty());
reader.reload().unwrap(); index.load_searchers().unwrap();
assert_eq!(reader.searcher().num_docs(), 0); assert_eq!(index.searcher().num_docs(), 0);
// empty segments should be erased // empty segments should be erased
assert!(index.searchable_segment_metas().unwrap().is_empty()); assert!(index.searchable_segment_metas().unwrap().is_empty());
assert!(reader.searcher().segment_readers().is_empty()); assert!(index.searcher().segment_readers().is_empty());
} }
} }

View File

@@ -171,17 +171,6 @@ impl SegmentWriter {
} }
} }
} }
FieldType::Date(ref int_option) => {
if int_option.is_indexed() {
for field_value in field_values {
let term = Term::from_field_i64(
field_value.field(),
field_value.value().date_value().timestamp(),
);
self.multifield_postings.subscribe(doc_id, &term);
}
}
}
FieldType::I64(ref int_option) => { FieldType::I64(ref int_option) => {
if int_option.is_indexed() { if int_option.is_indexed() {
for field_value in field_values { for field_value in field_values {

View File

@@ -1,4 +1,3 @@
use std::ops::Range;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
use std::sync::Arc; use std::sync::Arc;
@@ -61,16 +60,6 @@ impl Stamper {
pub fn stamp(&self) -> u64 { pub fn stamp(&self) -> u64 {
self.0.fetch_add(1u64, Ordering::SeqCst) as u64 self.0.fetch_add(1u64, Ordering::SeqCst) as u64
} }
/// Given a desired count `n`, `stamps` returns an iterator that
/// will supply `n` number of u64 stamps.
pub fn stamps(&self, n: u64) -> Range<u64> {
let start = self.0.fetch_add(n, Ordering::SeqCst);
Range {
start,
end: start + n,
}
}
} }
#[cfg(test)] #[cfg(test)]
@@ -89,7 +78,5 @@ mod test {
assert_eq!(stamper.stamp(), 10u64); assert_eq!(stamper.stamp(), 10u64);
assert_eq!(stamper_clone.stamp(), 11u64); assert_eq!(stamper_clone.stamp(), 11u64);
assert_eq!(stamper.stamps(3u64), (12..15));
assert_eq!(stamper.stamp(), 15u64);
} }
} }

View File

@@ -75,9 +75,9 @@
//! //!
//! // # Searching //! // # Searching
//! //!
//! let reader = index.reader()?; //! index.load_searchers()?;
//! //!
//! let searcher = reader.searcher(); //! let searcher = index.searcher();
//! //!
//! let query_parser = QueryParser::for_index(&index, vec![title, body]); //! let query_parser = QueryParser::for_index(&index, vec![title, body]);
//! //!
@@ -132,13 +132,13 @@ extern crate byteorder;
extern crate combine; extern crate combine;
extern crate crossbeam; extern crate crossbeam;
extern crate fnv; extern crate fnv;
extern crate fst;
extern crate fst_regex;
extern crate futures; extern crate futures;
extern crate futures_cpupool; extern crate futures_cpupool;
extern crate htmlescape; extern crate htmlescape;
extern crate itertools; extern crate itertools;
extern crate levenshtein_automata; extern crate levenshtein_automata;
#[cfg(feature = "mmap")]
extern crate memmap;
extern crate num_cpus; extern crate num_cpus;
extern crate owning_ref; extern crate owning_ref;
extern crate regex; extern crate regex;
@@ -146,7 +146,6 @@ extern crate rust_stemmers;
extern crate scoped_pool; extern crate scoped_pool;
extern crate serde; extern crate serde;
extern crate stable_deref_trait; extern crate stable_deref_trait;
extern crate tantivy_fst;
extern crate tempdir; extern crate tempdir;
extern crate tempfile; extern crate tempfile;
extern crate uuid; extern crate uuid;
@@ -186,15 +185,11 @@ pub use error::TantivyError;
pub use error::TantivyError as Error; pub use error::TantivyError as Error;
extern crate census; extern crate census;
pub extern crate chrono;
extern crate owned_read; extern crate owned_read;
/// Tantivy result. /// Tantivy result.
pub type Result<T> = std::result::Result<T, error::TantivyError>; pub type Result<T> = std::result::Result<T, error::TantivyError>;
/// Tantivy DateTime
pub type DateTime = chrono::DateTime<chrono::Utc>;
mod common; mod common;
mod core; mod core;
mod indexer; mod indexer;
@@ -215,9 +210,6 @@ pub mod space_usage;
pub mod store; pub mod store;
pub mod termdict; pub mod termdict;
mod reader;
pub use self::reader::{IndexReader, IndexReaderBuilder, ReloadPolicy};
mod snippet; mod snippet;
pub use self::snippet::{Snippet, SnippetGenerator}; pub use self::snippet::{Snippet, SnippetGenerator};
@@ -306,7 +298,6 @@ mod tests {
use Index; use Index;
use IndexWriter; use IndexWriter;
use Postings; use Postings;
use ReloadPolicy;
pub fn assert_nearly_equals(expected: f32, val: f32) { pub fn assert_nearly_equals(expected: f32, val: f32) {
assert!( assert!(
@@ -395,8 +386,8 @@ mod tests {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
{ {
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let term_a = Term::from_field_text(text_field, "a"); let term_a = Term::from_field_text(text_field, "a");
assert_eq!(searcher.doc_freq(&term_a), 3); assert_eq!(searcher.doc_freq(&term_a), 3);
let term_b = Term::from_field_text(text_field, "b"); let term_b = Term::from_field_text(text_field, "b");
@@ -423,8 +414,8 @@ mod tests {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
{ {
let index_reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = index_reader.searcher(); let searcher = index.searcher();
let reader = searcher.segment_reader(0); let reader = searcher.segment_reader(0);
{ {
let fieldnorm_reader = reader.get_fieldnorms_reader(text_field); let fieldnorm_reader = reader.get_fieldnorms_reader(text_field);
@@ -459,8 +450,8 @@ mod tests {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
{ {
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let segment_reader: &SegmentReader = searcher.segment_reader(0); let segment_reader: &SegmentReader = searcher.segment_reader(0);
let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field); let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field);
assert_eq!(fieldnorms_reader.fieldnorm(0), 3); assert_eq!(fieldnorms_reader.fieldnorm(0), 3);
@@ -488,11 +479,6 @@ mod tests {
let term_c = Term::from_field_text(text_field, "c"); let term_c = Term::from_field_text(text_field, "c");
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap();
{ {
// writing the segment // writing the segment
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
@@ -514,10 +500,10 @@ mod tests {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
{ {
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let segment_reader = searcher.segment_reader(0); let reader = searcher.segment_reader(0);
let inverted_index = segment_reader.inverted_index(text_field); let inverted_index = reader.inverted_index(text_field);
assert!(inverted_index assert!(inverted_index
.read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions) .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions)
.is_none()); .is_none());
@@ -525,19 +511,19 @@ mod tests {
let mut postings = inverted_index let mut postings = inverted_index
.read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
.unwrap(); .unwrap();
assert!(advance_undeleted(&mut postings, segment_reader)); assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 5); assert_eq!(postings.doc(), 5);
assert!(!advance_undeleted(&mut postings, segment_reader)); assert!(!advance_undeleted(&mut postings, reader));
} }
{ {
let mut postings = inverted_index let mut postings = inverted_index
.read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions) .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions)
.unwrap(); .unwrap();
assert!(advance_undeleted(&mut postings, segment_reader)); assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 3); assert_eq!(postings.doc(), 3);
assert!(advance_undeleted(&mut postings, segment_reader)); assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 4); assert_eq!(postings.doc(), 4);
assert!(!advance_undeleted(&mut postings, segment_reader)); assert!(!advance_undeleted(&mut postings, reader));
} }
} }
{ {
@@ -550,10 +536,10 @@ mod tests {
index_writer.rollback().unwrap(); index_writer.rollback().unwrap();
} }
{ {
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let seg_reader = searcher.segment_reader(0); let reader = searcher.segment_reader(0);
let inverted_index = seg_reader.inverted_index(term_abcd.field()); let inverted_index = reader.inverted_index(term_abcd.field());
assert!(inverted_index assert!(inverted_index
.read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions) .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions)
@@ -562,19 +548,19 @@ mod tests {
let mut postings = inverted_index let mut postings = inverted_index
.read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
.unwrap(); .unwrap();
assert!(advance_undeleted(&mut postings, seg_reader)); assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 5); assert_eq!(postings.doc(), 5);
assert!(!advance_undeleted(&mut postings, seg_reader)); assert!(!advance_undeleted(&mut postings, reader));
} }
{ {
let mut postings = inverted_index let mut postings = inverted_index
.read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions) .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions)
.unwrap(); .unwrap();
assert!(advance_undeleted(&mut postings, seg_reader)); assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 3); assert_eq!(postings.doc(), 3);
assert!(advance_undeleted(&mut postings, seg_reader)); assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 4); assert_eq!(postings.doc(), 4);
assert!(!advance_undeleted(&mut postings, seg_reader)); assert!(!advance_undeleted(&mut postings, reader));
} }
} }
{ {
@@ -587,10 +573,10 @@ mod tests {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
{ {
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let segment_reader = searcher.segment_reader(0); let reader = searcher.segment_reader(0);
let inverted_index = segment_reader.inverted_index(term_abcd.field()); let inverted_index = reader.inverted_index(term_abcd.field());
assert!(inverted_index assert!(inverted_index
.read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions) .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions)
.is_none()); .is_none());
@@ -598,25 +584,25 @@ mod tests {
let mut postings = inverted_index let mut postings = inverted_index
.read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
.unwrap(); .unwrap();
assert!(!advance_undeleted(&mut postings, segment_reader)); assert!(!advance_undeleted(&mut postings, reader));
} }
{ {
let mut postings = inverted_index let mut postings = inverted_index
.read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions) .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions)
.unwrap(); .unwrap();
assert!(advance_undeleted(&mut postings, segment_reader)); assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 3); assert_eq!(postings.doc(), 3);
assert!(advance_undeleted(&mut postings, segment_reader)); assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 4); assert_eq!(postings.doc(), 4);
assert!(!advance_undeleted(&mut postings, segment_reader)); assert!(!advance_undeleted(&mut postings, reader));
} }
{ {
let mut postings = inverted_index let mut postings = inverted_index
.read_postings(&term_c, IndexRecordOption::WithFreqsAndPositions) .read_postings(&term_c, IndexRecordOption::WithFreqsAndPositions)
.unwrap(); .unwrap();
assert!(advance_undeleted(&mut postings, segment_reader)); assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 4); assert_eq!(postings.doc(), 4);
assert!(!advance_undeleted(&mut postings, segment_reader)); assert!(!advance_undeleted(&mut postings, reader));
} }
} }
} }
@@ -624,15 +610,15 @@ mod tests {
#[test] #[test]
fn test_indexed_u64() { fn test_indexed_u64() {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let field = schema_builder.add_u64_field("value", INDEXED); let field = schema_builder.add_u64_field("value", INT_INDEXED);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
index_writer.add_document(doc!(field=>1u64)); index_writer.add_document(doc!(field=>1u64));
index_writer.commit().unwrap(); index_writer.commit().unwrap();
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let term = Term::from_field_u64(field, 1u64); let term = Term::from_field_u64(field, 1u64);
let mut postings = searcher let mut postings = searcher
.segment_reader(0) .segment_reader(0)
@@ -647,7 +633,7 @@ mod tests {
#[test] #[test]
fn test_indexed_i64() { fn test_indexed_i64() {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let value_field = schema_builder.add_i64_field("value", INDEXED); let value_field = schema_builder.add_i64_field("value", INT_INDEXED);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
@@ -655,8 +641,8 @@ mod tests {
let negative_val = -1i64; let negative_val = -1i64;
index_writer.add_document(doc!(value_field => negative_val)); index_writer.add_document(doc!(value_field => negative_val));
index_writer.commit().unwrap(); index_writer.commit().unwrap();
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let term = Term::from_field_i64(value_field, negative_val); let term = Term::from_field_i64(value_field, negative_val);
let mut postings = searcher let mut postings = searcher
.segment_reader(0) .segment_reader(0)
@@ -678,8 +664,8 @@ mod tests {
let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap();
index_writer.add_document(doc!(text_field=>"a")); index_writer.add_document(doc!(text_field=>"a"));
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
let reader = index.reader().unwrap(); assert!(index.load_searchers().is_ok());
let searcher = reader.searcher(); let searcher = index.searcher();
let segment_reader = searcher.segment_reader(0); let segment_reader = searcher.segment_reader(0);
segment_reader.inverted_index(absent_field); //< should not panic segment_reader.inverted_index(absent_field); //< should not panic
} }
@@ -690,11 +676,6 @@ mod tests {
let text_field = schema_builder.add_text_field("text", TEXT); let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap();
// writing the segment // writing the segment
let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap();
@@ -720,8 +701,8 @@ mod tests {
remove_document(&mut index_writer, "38"); remove_document(&mut index_writer, "38");
remove_document(&mut index_writer, "34"); remove_document(&mut index_writer, "34");
index_writer.commit().unwrap(); index_writer.commit().unwrap();
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
assert_eq!(searcher.num_docs(), 6); assert_eq!(searcher.num_docs(), 6);
} }
@@ -741,8 +722,8 @@ mod tests {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
{ {
let index_reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = index_reader.searcher(); let searcher = index.searcher();
let reader = searcher.segment_reader(0); let reader = searcher.segment_reader(0);
let inverted_index = reader.inverted_index(text_field); let inverted_index = reader.inverted_index(text_field);
let term_abcd = Term::from_field_text(text_field, "abcd"); let term_abcd = Term::from_field_text(text_field, "abcd");
@@ -766,7 +747,7 @@ mod tests {
let text_field = schema_builder.add_text_field("text", TEXT); let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
let reader = index.reader().unwrap();
{ {
// writing the segment // writing the segment
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
@@ -776,8 +757,8 @@ mod tests {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
{ {
reader.reload().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let get_doc_ids = |terms: Vec<Term>| { let get_doc_ids = |terms: Vec<Term>| {
let query = BooleanQuery::new_multiterms_query(terms); let query = BooleanQuery::new_multiterms_query(terms);
let topdocs = searcher.search(&query, &TestCollector).unwrap(); let topdocs = searcher.search(&query, &TestCollector).unwrap();
@@ -819,22 +800,25 @@ mod tests {
let text_field = schema_builder.add_text_field("text", TEXT); let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap();
assert_eq!(reader.searcher().num_docs(), 0u64);
{ {
// writing the segment // writing the segment
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
index_writer.add_document(doc!(text_field=>"af b")); {
index_writer.add_document(doc!(text_field=>"a b c")); let doc = doc!(text_field=>"af b");
index_writer.add_document(doc!(text_field=>"a b c d")); index_writer.add_document(doc);
}
{
let doc = doc!(text_field=>"a b c");
index_writer.add_document(doc);
}
{
let doc = doc!(text_field=>"a b c d");
index_writer.add_document(doc);
}
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
reader.reload().unwrap(); index.searcher();
assert_eq!(reader.searcher().num_docs(), 3u64);
} }
#[test] #[test]
@@ -861,7 +845,7 @@ mod tests {
let fast_field_unsigned = schema_builder.add_u64_field("unsigned", FAST); let fast_field_unsigned = schema_builder.add_u64_field("unsigned", FAST);
let fast_field_signed = schema_builder.add_i64_field("signed", FAST); let fast_field_signed = schema_builder.add_i64_field("signed", FAST);
let text_field = schema_builder.add_text_field("text", TEXT); let text_field = schema_builder.add_text_field("text", TEXT);
let stored_int_field = schema_builder.add_u64_field("text", STORED); let stored_int_field = schema_builder.add_u64_field("text", INT_STORED);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
@@ -871,8 +855,9 @@ mod tests {
index_writer.add_document(document); index_writer.add_document(document);
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
let reader = index.reader().unwrap();
let searcher = reader.searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let segment_reader: &SegmentReader = searcher.segment_reader(0); let segment_reader: &SegmentReader = searcher.segment_reader(0);
{ {
let fast_field_reader_res = segment_reader.fast_field_reader::<u64>(text_field); let fast_field_reader_res = segment_reader.fast_field_reader::<u64>(text_field);

View File

@@ -61,7 +61,7 @@ macro_rules! doc(
}; };
// if there is a trailing comma retry with the trailing comma stripped. // if there is a trailing comma retry with the trailing comma stripped.
($($field:expr => $value:expr),+ ,) => { ($($field:expr => $value:expr),+ ,) => {
doc!( $( $field => $value ), *) doc!( $( $field => $value ), *);
}; };
); );

View File

@@ -34,6 +34,10 @@ const COMPRESSION_BLOCK_SIZE: usize = BitPacker4x::BLOCK_LEN;
const LONG_SKIP_IN_BLOCKS: usize = 1_024; const LONG_SKIP_IN_BLOCKS: usize = 1_024;
const LONG_SKIP_INTERVAL: u64 = (LONG_SKIP_IN_BLOCKS * COMPRESSION_BLOCK_SIZE) as u64; const LONG_SKIP_INTERVAL: u64 = (LONG_SKIP_IN_BLOCKS * COMPRESSION_BLOCK_SIZE) as u64;
lazy_static! {
static ref BIT_PACKER: BitPacker4x = BitPacker4x::new();
}
#[cfg(test)] #[cfg(test)]
pub mod tests { pub mod tests {

View File

@@ -1,23 +1,4 @@
/// Positions works as a long sequence of compressed block. use super::BIT_PACKER;
/// All terms are chained one after the other.
///
/// When accessing the position of a term, we get a positions_idx from the `Terminfo`.
/// This means we need to skip to the `nth` positions efficiently.
///
/// This is done thanks to two levels of skiping that we refer to in the code
/// as `long_skip` and `short_skip`.
///
/// The `long_skip` makes it possible to skip every 1_024 compression blocks (= 131_072 positions).
/// Skipping offset are simply stored one after as an offset stored over 8 bytes.
///
/// We find the number of long skips, as `n / long_skip`.
///
/// Blocks are compressed using bitpacking, so `skip_read` contains the number of bytes
/// (values can go from 0bit to 32 bits) required to decompressed every block.
///
/// A given block obviously takes `(128 x num_bit_for_the_block / num_bits_in_a_byte)`,
/// so skipping a block without decompressing it is just a matter of advancing that many
/// bytes.
use bitpacking::{BitPacker, BitPacker4x}; use bitpacking::{BitPacker, BitPacker4x};
use common::{BinarySerializable, FixedSize}; use common::{BinarySerializable, FixedSize};
use directory::ReadOnlySource; use directory::ReadOnlySource;
@@ -27,65 +8,9 @@ use positions::LONG_SKIP_INTERVAL;
use positions::LONG_SKIP_IN_BLOCKS; use positions::LONG_SKIP_IN_BLOCKS;
use postings::compression::compressed_block_size; use postings::compression::compressed_block_size;
struct Positions {
bit_packer: BitPacker4x,
skip_source: ReadOnlySource,
position_source: ReadOnlySource,
long_skip_source: ReadOnlySource,
}
impl Positions {
pub fn new(position_source: ReadOnlySource, skip_source: ReadOnlySource) -> Positions {
let skip_len = skip_source.len();
let (body, footer) = skip_source.split(skip_len - u32::SIZE_IN_BYTES);
let num_long_skips = u32::deserialize(&mut footer.as_slice()).expect("Index corrupted");
let body_split = body.len() - u64::SIZE_IN_BYTES * (num_long_skips as usize);
let (skip_source, long_skip_source) = body.split(body_split);
Positions {
bit_packer: BitPacker4x::new(),
skip_source,
long_skip_source,
position_source,
}
}
/// Returns the offset of the block associated to the given `long_skip_id`.
///
/// One `long_skip_id` means `LONG_SKIP_IN_BLOCKS` blocks.
fn long_skip(&self, long_skip_id: usize) -> u64 {
if long_skip_id == 0 {
return 0;
}
let long_skip_slice = self.long_skip_source.as_slice();
let mut long_skip_blocks: &[u8] = &long_skip_slice[(long_skip_id - 1) * 8..][..8];
u64::deserialize(&mut long_skip_blocks).expect("Index corrupted")
}
fn reader(&self, offset: u64) -> PositionReader {
let long_skip_id = (offset / LONG_SKIP_INTERVAL) as usize;
let small_skip = (offset % LONG_SKIP_INTERVAL) as usize;
let offset_num_bytes: u64 = self.long_skip(long_skip_id);
let mut position_read = OwnedRead::new(self.position_source.clone());
position_read.advance(offset_num_bytes as usize);
let mut skip_read = OwnedRead::new(self.skip_source.clone());
skip_read.advance(long_skip_id * LONG_SKIP_IN_BLOCKS);
let mut position_reader = PositionReader {
bit_packer: self.bit_packer,
skip_read,
position_read,
inner_offset: 0,
buffer: Box::new([0u32; 128]),
ahead: None,
};
position_reader.skip(small_skip);
position_reader
}
}
pub struct PositionReader { pub struct PositionReader {
skip_read: OwnedRead, skip_read: OwnedRead,
position_read: OwnedRead, position_read: OwnedRead,
bit_packer: BitPacker4x,
inner_offset: usize, inner_offset: usize,
buffer: Box<[u32; 128]>, buffer: Box<[u32; 128]>,
ahead: Option<usize>, // if None, no block is loaded. ahead: Option<usize>, // if None, no block is loaded.
@@ -102,7 +27,6 @@ pub struct PositionReader {
// If the requested number of els ends exactly at a given block, the next // If the requested number of els ends exactly at a given block, the next
// block is not decompressed. // block is not decompressed.
fn read_impl( fn read_impl(
bit_packer: BitPacker4x,
mut position: &[u8], mut position: &[u8],
buffer: &mut [u32; 128], buffer: &mut [u32; 128],
mut inner_offset: usize, mut inner_offset: usize,
@@ -113,23 +37,21 @@ fn read_impl(
let mut output_len = output.len(); let mut output_len = output.len();
let mut ahead = 0; let mut ahead = 0;
loop { loop {
let available_len = COMPRESSION_BLOCK_SIZE - inner_offset; let available_len = 128 - inner_offset;
// We have enough elements in the current block.
// Let's copy the requested elements in the output buffer,
// and return.
if output_len <= available_len { if output_len <= available_len {
output[output_start..].copy_from_slice(&buffer[inner_offset..][..output_len]); output[output_start..].copy_from_slice(&buffer[inner_offset..][..output_len]);
return ahead; return ahead;
} else {
output[output_start..][..available_len].copy_from_slice(&buffer[inner_offset..]);
output_len -= available_len;
output_start += available_len;
inner_offset = 0;
let num_bits = num_bits[ahead];
BitPacker4x::new().decompress(position, &mut buffer[..], num_bits);
let block_len = compressed_block_size(num_bits);
position = &position[block_len..];
ahead += 1;
} }
output[output_start..][..available_len].copy_from_slice(&buffer[inner_offset..]);
output_len -= available_len;
output_start += available_len;
inner_offset = 0;
let num_bits = num_bits[ahead];
bit_packer.decompress(position, &mut buffer[..], num_bits);
let block_len = compressed_block_size(num_bits);
position = &position[block_len..];
ahead += 1;
} }
} }
@@ -139,7 +61,35 @@ impl PositionReader {
skip_source: ReadOnlySource, skip_source: ReadOnlySource,
offset: u64, offset: u64,
) -> PositionReader { ) -> PositionReader {
Positions::new(position_source, skip_source).reader(offset) let skip_len = skip_source.len();
let (body, footer) = skip_source.split(skip_len - u32::SIZE_IN_BYTES);
let num_long_skips = u32::deserialize(&mut footer.as_slice()).expect("Index corrupted");
let body_split = body.len() - u64::SIZE_IN_BYTES * (num_long_skips as usize);
let (skip_body, long_skips) = body.split(body_split);
let long_skip_id = (offset / LONG_SKIP_INTERVAL) as usize;
let small_skip = (offset - (long_skip_id as u64) * (LONG_SKIP_INTERVAL as u64)) as usize;
let offset_num_bytes: u64 = {
if long_skip_id > 0 {
let mut long_skip_blocks: &[u8] =
&long_skips.as_slice()[(long_skip_id - 1) * 8..][..8];
u64::deserialize(&mut long_skip_blocks).expect("Index corrupted") * 16
} else {
0
}
};
let mut position_read = OwnedRead::new(position_source);
position_read.advance(offset_num_bytes as usize);
let mut skip_read = OwnedRead::new(skip_body);
skip_read.advance(long_skip_id * LONG_SKIP_IN_BLOCKS);
let mut position_reader = PositionReader {
skip_read,
position_read,
inner_offset: 0,
buffer: Box::new([0u32; 128]),
ahead: None,
};
position_reader.skip(small_skip);
position_reader
} }
/// Fills a buffer with the next `output.len()` integers. /// Fills a buffer with the next `output.len()` integers.
@@ -151,13 +101,10 @@ impl PositionReader {
if self.ahead != Some(0) { if self.ahead != Some(0) {
// the block currently available is not the block // the block currently available is not the block
// for the current position // for the current position
self.bit_packer BIT_PACKER.decompress(position_data, self.buffer.as_mut(), num_bits);
.decompress(position_data, self.buffer.as_mut(), num_bits);
self.ahead = Some(0);
} }
let block_len = compressed_block_size(num_bits); let block_len = compressed_block_size(num_bits);
self.ahead = Some(read_impl( self.ahead = Some(read_impl(
self.bit_packer,
&position_data[block_len..], &position_data[block_len..],
self.buffer.as_mut(), self.buffer.as_mut(),
self.inner_offset, self.inner_offset,
@@ -186,13 +133,14 @@ impl PositionReader {
} }
}); });
let skip_len_in_bits = self.skip_read.as_ref()[..num_blocks_to_advance] let skip_len = self.skip_read.as_ref()[..num_blocks_to_advance]
.iter() .iter()
.map(|num_bits| *num_bits as usize) .cloned()
.map(|num_bit| num_bit as usize)
.sum::<usize>() .sum::<usize>()
* COMPRESSION_BLOCK_SIZE; * (COMPRESSION_BLOCK_SIZE / 8);
let skip_len_in_bytes = skip_len_in_bits / 8;
self.skip_read.advance(num_blocks_to_advance); self.skip_read.advance(num_blocks_to_advance);
self.position_read.advance(skip_len_in_bytes); self.position_read.advance(skip_len);
} }
} }

View File

@@ -1,30 +1,29 @@
use super::BIT_PACKER;
use bitpacking::BitPacker; use bitpacking::BitPacker;
use bitpacking::BitPacker4x;
use common::BinarySerializable; use common::BinarySerializable;
use common::CountingWriter;
use positions::{COMPRESSION_BLOCK_SIZE, LONG_SKIP_INTERVAL}; use positions::{COMPRESSION_BLOCK_SIZE, LONG_SKIP_INTERVAL};
use std::io::{self, Write}; use std::io;
pub struct PositionSerializer<W: io::Write> { pub struct PositionSerializer<W: io::Write> {
bit_packer: BitPacker4x, write_stream: W,
write_stream: CountingWriter<W>,
write_skiplist: W, write_skiplist: W,
block: Vec<u32>, block: Vec<u32>,
buffer: Vec<u8>, buffer: Vec<u8>,
num_ints: u64, num_ints: u64,
long_skips: Vec<u64>, long_skips: Vec<u64>,
cumulated_num_bits: u64,
} }
impl<W: io::Write> PositionSerializer<W> { impl<W: io::Write> PositionSerializer<W> {
pub fn new(write_stream: W, write_skiplist: W) -> PositionSerializer<W> { pub fn new(write_stream: W, write_skiplist: W) -> PositionSerializer<W> {
PositionSerializer { PositionSerializer {
bit_packer: BitPacker4x::new(), write_stream,
write_stream: CountingWriter::wrap(write_stream),
write_skiplist, write_skiplist,
block: Vec::with_capacity(128), block: Vec::with_capacity(128),
buffer: vec![0u8; 128 * 4], buffer: vec![0u8; 128 * 4],
num_ints: 0u64, num_ints: 0u64,
long_skips: Vec::new(), long_skips: Vec::new(),
cumulated_num_bits: 0u64,
} }
} }
@@ -51,15 +50,14 @@ impl<W: io::Write> PositionSerializer<W> {
} }
fn flush_block(&mut self) -> io::Result<()> { fn flush_block(&mut self) -> io::Result<()> {
let num_bits = self.bit_packer.num_bits(&self.block[..]); let num_bits = BIT_PACKER.num_bits(&self.block[..]);
self.cumulated_num_bits += u64::from(num_bits);
self.write_skiplist.write_all(&[num_bits])?; self.write_skiplist.write_all(&[num_bits])?;
let written_len = self let written_len = BIT_PACKER.compress(&self.block[..], &mut self.buffer, num_bits);
.bit_packer
.compress(&self.block[..], &mut self.buffer, num_bits);
self.write_stream.write_all(&self.buffer[..written_len])?; self.write_stream.write_all(&self.buffer[..written_len])?;
self.block.clear(); self.block.clear();
if (self.num_ints % LONG_SKIP_INTERVAL) == 0u64 { if (self.num_ints % LONG_SKIP_INTERVAL) == 0u64 {
self.long_skips.push(self.write_stream.written_bytes()); self.long_skips.push(self.cumulated_num_bits);
} }
Ok(()) Ok(())
} }

View File

@@ -31,6 +31,7 @@ pub(crate) use self::stacker::compute_table_size;
pub use common::HasLen; pub use common::HasLen;
pub(crate) const USE_SKIP_INFO_LIMIT: u32 = COMPRESSION_BLOCK_SIZE as u32; pub(crate) const USE_SKIP_INFO_LIMIT: u32 = COMPRESSION_BLOCK_SIZE as u32;
pub(crate) type UnorderedTermId = u64; pub(crate) type UnorderedTermId = u64;
#[cfg_attr(feature = "cargo-clippy", allow(clippy::enum_variant_names))] #[cfg_attr(feature = "cargo-clippy", allow(clippy::enum_variant_names))]
@@ -57,7 +58,7 @@ pub mod tests {
use rand::{Rng, SeedableRng}; use rand::{Rng, SeedableRng};
use schema::Field; use schema::Field;
use schema::IndexRecordOption; use schema::IndexRecordOption;
use schema::{Document, Schema, Term, INDEXED, STRING, TEXT}; use schema::{Document, Schema, Term, INT_INDEXED, STRING, TEXT};
use std::iter; use std::iter;
use DocId; use DocId;
use Score; use Score;
@@ -100,8 +101,9 @@ pub mod tests {
} }
index_writer.add_document(doc!(title => r#"abc be be be be abc"#)); index_writer.add_document(doc!(title => r#"abc be be be be abc"#));
index_writer.commit().unwrap(); index_writer.commit().unwrap();
index.load_searchers().unwrap();
let searcher = index.reader().unwrap().searcher(); let searcher = index.searcher();
let inverted_index = searcher.segment_reader(0u32).inverted_index(title); let inverted_index = searcher.segment_reader(0u32).inverted_index(title);
let term = Term::from_field_text(title, "abc"); let term = Term::from_field_text(title, "abc");
@@ -291,8 +293,9 @@ pub mod tests {
} }
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
index.load_searchers().unwrap();
let term_a = Term::from_field_text(text_field, "a"); let term_a = Term::from_field_text(text_field, "a");
let searcher = index.reader().unwrap().searcher(); let searcher = index.searcher();
let segment_reader = searcher.segment_reader(0); let segment_reader = searcher.segment_reader(0);
let mut postings = segment_reader let mut postings = segment_reader
.inverted_index(text_field) .inverted_index(text_field)
@@ -314,7 +317,7 @@ pub mod tests {
let index = { let index = {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let value_field = schema_builder.add_u64_field("value", INDEXED); let value_field = schema_builder.add_u64_field("value", INT_INDEXED);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
@@ -329,9 +332,10 @@ pub mod tests {
} }
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
index.load_searchers().unwrap();
index index
}; };
let searcher = index.reader().unwrap().searcher(); let searcher = index.searcher();
let segment_reader = searcher.segment_reader(0); let segment_reader = searcher.segment_reader(0);
// check that the basic usage works // check that the basic usage works
@@ -399,7 +403,8 @@ pub mod tests {
index_writer.delete_term(term_0); index_writer.delete_term(term_0);
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let segment_reader = searcher.segment_reader(0); let segment_reader = searcher.segment_reader(0);
// make sure seeking still works // make sure seeking still works
@@ -446,9 +451,12 @@ pub mod tests {
{ {
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
index_writer.delete_term(term_1); index_writer.delete_term(term_1);
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
// finally, check that it's empty // finally, check that it's empty
{ {
@@ -504,6 +512,7 @@ pub mod tests {
} }
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
index.load_searchers().unwrap();
index index
}; };
} }

View File

@@ -33,10 +33,9 @@ fn posting_from_field_entry(field_entry: &FieldEntry) -> Box<PostingsWriter> {
} }
}) })
.unwrap_or_else(|| SpecializedPostingsWriter::<NothingRecorder>::new_boxed()), .unwrap_or_else(|| SpecializedPostingsWriter::<NothingRecorder>::new_boxed()),
FieldType::U64(_) FieldType::U64(_) | FieldType::I64(_) | FieldType::HierarchicalFacet => {
| FieldType::I64(_) SpecializedPostingsWriter::<NothingRecorder>::new_boxed()
| FieldType::Date(_) }
| FieldType::HierarchicalFacet => SpecializedPostingsWriter::<NothingRecorder>::new_boxed(),
FieldType::Bytes => { FieldType::Bytes => {
// FieldType::Bytes cannot actually be indexed. // FieldType::Bytes cannot actually be indexed.
// TODO fix during the indexer refactoring described in #276 // TODO fix during the indexer refactoring described in #276
@@ -52,31 +51,6 @@ pub struct MultiFieldPostingsWriter {
per_field_postings_writers: Vec<Box<PostingsWriter>>, per_field_postings_writers: Vec<Box<PostingsWriter>>,
} }
fn make_field_partition(
term_offsets: &[(&[u8], Addr, UnorderedTermId)],
) -> Vec<(Field, usize, usize)> {
let term_offsets_it = term_offsets
.iter()
.map(|(key, _, _)| Term::wrap(key).field())
.enumerate();
let mut prev_field = Field(u32::max_value());
let mut fields = vec![];
let mut offsets = vec![];
for (offset, field) in term_offsets_it {
if field != prev_field {
prev_field = field;
fields.push(field);
offsets.push(offset);
}
}
offsets.push(term_offsets.len());
let mut field_offsets = vec![];
for i in 0..fields.len() {
field_offsets.push((fields[i], offsets[i], offsets[i + 1]));
}
field_offsets
}
impl MultiFieldPostingsWriter { impl MultiFieldPostingsWriter {
/// Create a new `MultiFieldPostingsWriter` given /// Create a new `MultiFieldPostingsWriter` given
/// a schema and a heap. /// a schema and a heap.
@@ -122,16 +96,36 @@ impl MultiFieldPostingsWriter {
&self, &self,
serializer: &mut InvertedIndexSerializer, serializer: &mut InvertedIndexSerializer,
) -> Result<HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>>> { ) -> Result<HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>>> {
let mut term_offsets: Vec<(&[u8], Addr, UnorderedTermId)> = let mut term_offsets: Vec<(&[u8], Addr, UnorderedTermId)> = self
self.term_index.iter().collect(); .term_index
.iter()
.map(|(term_bytes, addr, bucket_id)| (term_bytes, addr, bucket_id as UnorderedTermId))
.collect();
term_offsets.sort_unstable_by_key(|&(k, _, _)| k); term_offsets.sort_unstable_by_key(|&(k, _, _)| k);
let mut offsets: Vec<(Field, usize)> = vec![];
let term_offsets_it = term_offsets
.iter()
.cloned()
.map(|(key, _, _)| Term::wrap(key).field())
.enumerate();
let mut unordered_term_mappings: HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>> = let mut unordered_term_mappings: HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>> =
HashMap::new(); HashMap::new();
let field_offsets = make_field_partition(&term_offsets); let mut prev_field = Field(u32::max_value());
for (offset, field) in term_offsets_it {
if field != prev_field {
offsets.push((field, offset));
prev_field = field;
}
}
offsets.push((Field(0), term_offsets.len()));
for i in 0..(offsets.len() - 1) {
let (field, start) = offsets[i];
let (_, stop) = offsets[i + 1];
for (field, start, stop) in field_offsets {
let field_entry = self.schema.get_field_entry(field); let field_entry = self.schema.get_field_entry(field);
match *field_entry.field_type() { match *field_entry.field_type() {
@@ -149,7 +143,7 @@ impl MultiFieldPostingsWriter {
.collect(); .collect();
unordered_term_mappings.insert(field, mapping); unordered_term_mappings.insert(field, mapping);
} }
FieldType::U64(_) | FieldType::I64(_) | FieldType::Date(_) => {} FieldType::U64(_) | FieldType::I64(_) => {}
FieldType::Bytes => {} FieldType::Bytes => {}
} }

View File

@@ -4,6 +4,7 @@ use postings::FieldSerializer;
use std::io; use std::io;
use DocId; use DocId;
const EMPTY_ARRAY: [u32; 0] = [0u32; 0];
const POSITION_END: u32 = 0; const POSITION_END: u32 = 0;
#[derive(Default)] #[derive(Default)]
@@ -114,7 +115,7 @@ impl Recorder for NothingRecorder {
let buffer = buffer_lender.lend_u8(); let buffer = buffer_lender.lend_u8();
self.stack.read_to_end(heap, buffer); self.stack.read_to_end(heap, buffer);
for doc in VInt32Reader::new(&buffer[..]) { for doc in VInt32Reader::new(&buffer[..]) {
serializer.write_doc(doc as u32, 0u32, &[][..])?; serializer.write_doc(doc as u32, 0u32, &EMPTY_ARRAY)?;
} }
Ok(()) Ok(())
} }
@@ -167,7 +168,7 @@ impl Recorder for TermFrequencyRecorder {
let mut u32_it = VInt32Reader::new(&buffer[..]); let mut u32_it = VInt32Reader::new(&buffer[..]);
while let Some(doc) = u32_it.next() { while let Some(doc) = u32_it.next() {
let term_freq = u32_it.next().unwrap_or(self.current_tf); let term_freq = u32_it.next().unwrap_or(self.current_tf);
serializer.write_doc(doc as u32, term_freq, &[][..])?; serializer.write_doc(doc as u32, term_freq, &EMPTY_ARRAY)?;
} }
Ok(()) Ok(())

View File

@@ -2,6 +2,7 @@ use common::BitSet;
use common::HasLen; use common::HasLen;
use common::{BinarySerializable, VInt}; use common::{BinarySerializable, VInt};
use docset::{DocSet, SkipResult}; use docset::{DocSet, SkipResult};
use fst::Streamer;
use owned_read::OwnedRead; use owned_read::OwnedRead;
use positions::PositionReader; use positions::PositionReader;
use postings::compression::compressed_block_size; use postings::compression::compressed_block_size;
@@ -13,9 +14,10 @@ use postings::SkipReader;
use postings::USE_SKIP_INFO_LIMIT; use postings::USE_SKIP_INFO_LIMIT;
use schema::IndexRecordOption; use schema::IndexRecordOption;
use std::cmp::Ordering; use std::cmp::Ordering;
use tantivy_fst::Streamer;
use DocId; use DocId;
const EMPTY_ARR: [u8; 0] = [];
struct PositionComputer { struct PositionComputer {
// store the amount of position int // store the amount of position int
// before reading positions. // before reading positions.
@@ -121,17 +123,12 @@ impl SegmentPostings {
} }
} }
fn linear_search(arr: &[u32], target: u32) -> usize { fn exponential_search(target: u32, arr: &[u32]) -> (usize, usize) {
arr.iter().map(|&el| if el < target { 1 } else { 0 }).sum()
}
fn exponential_search(arr: &[u32], target: u32) -> (usize, usize) {
let end = arr.len(); let end = arr.len();
debug_assert!(arr.len() <= 128);
debug_assert!(target <= arr[end - 1]);
let mut begin = 0; let mut begin = 0;
for &pivot in &[1, 3, 7, 15, 31, 63] { for &pivot in [1,3,7,15,31,63].iter().take_while(|&&el| el < end) {
if pivot >= end {
break;
}
if arr[pivot] > target { if arr[pivot] > target {
return (begin, pivot); return (begin, pivot);
} }
@@ -148,8 +145,12 @@ fn exponential_search(arr: &[u32], target: u32) -> (usize, usize) {
/// The target is assumed greater or equal to the first element. /// The target is assumed greater or equal to the first element.
/// The target is assumed smaller or equal to the last element. /// The target is assumed smaller or equal to the last element.
fn search_within_block(block_docs: &[u32], target: u32) -> usize { fn search_within_block(block_docs: &[u32], target: u32) -> usize {
let (start, end) = exponential_search(block_docs, target); let (start, end) = exponential_search(target, block_docs);
start + linear_search(&block_docs[start..end], target) start.wrapping_add(
block_docs[start..end]
.binary_search(&target)
.unwrap_or_else(|e| e),
)
} }
impl DocSet for SegmentPostings { impl DocSet for SegmentPostings {
@@ -367,7 +368,7 @@ impl BlockSegmentPostings {
let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, data); let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, data);
let skip_reader = match skip_data_opt { let skip_reader = match skip_data_opt {
Some(skip_data) => SkipReader::new(skip_data, record_option), Some(skip_data) => SkipReader::new(skip_data, record_option),
None => SkipReader::new(OwnedRead::new(&[][..]), record_option), None => SkipReader::new(OwnedRead::new(&EMPTY_ARR[..]), record_option),
}; };
let doc_freq = doc_freq as usize; let doc_freq = doc_freq as usize;
let num_vint_docs = doc_freq % COMPRESSION_BLOCK_SIZE; let num_vint_docs = doc_freq % COMPRESSION_BLOCK_SIZE;
@@ -401,7 +402,7 @@ impl BlockSegmentPostings {
if let Some(skip_data) = skip_data_opt { if let Some(skip_data) = skip_data_opt {
self.skip_reader.reset(skip_data); self.skip_reader.reset(skip_data);
} else { } else {
self.skip_reader.reset(OwnedRead::new(&[][..])) self.skip_reader.reset(OwnedRead::new(&EMPTY_ARR[..]))
} }
self.doc_offset = 0; self.doc_offset = 0;
self.doc_freq = doc_freq as usize; self.doc_freq = doc_freq as usize;
@@ -616,7 +617,6 @@ impl<'b> Streamer<'b> for BlockSegmentPostings {
mod tests { mod tests {
use super::exponential_search; use super::exponential_search;
use super::linear_search;
use super::search_within_block; use super::search_within_block;
use super::BlockSegmentPostings; use super::BlockSegmentPostings;
use super::BlockSegmentPostingsSkipResult; use super::BlockSegmentPostingsSkipResult;
@@ -624,29 +624,14 @@ mod tests {
use common::HasLen; use common::HasLen;
use core::Index; use core::Index;
use docset::DocSet; use docset::DocSet;
use fst::Streamer;
use schema::IndexRecordOption; use schema::IndexRecordOption;
use schema::Schema; use schema::Schema;
use schema::Term; use schema::Term;
use schema::INDEXED; use schema::INT_INDEXED;
use tantivy_fst::Streamer;
use DocId; use DocId;
use SkipResult; use SkipResult;
#[test]
fn test_linear_search() {
let len: usize = 50;
let arr: Vec<u32> = (0..len).map(|el| 1u32 + (el as u32) * 2).collect();
for target in 1..*arr.last().unwrap() {
let res = linear_search(&arr[..], target);
if res > 0 {
assert!(arr[res - 1] < target);
}
if res < len {
assert!(arr[res] >= target);
}
}
}
#[test] #[test]
fn test_empty_segment_postings() { fn test_empty_segment_postings() {
let mut postings = SegmentPostings::empty(); let mut postings = SegmentPostings::empty();
@@ -675,10 +660,10 @@ mod tests {
#[test] #[test]
fn test_exponentiel_search() { fn test_exponentiel_search() {
assert_eq!(exponential_search(&[1, 2], 0), (0, 1)); assert_eq!(exponential_search(0, &[1, 2]), (0, 1));
assert_eq!(exponential_search(&[1, 2], 1), (0, 1)); assert_eq!(exponential_search(1, &[1, 2]), (0, 1));
assert_eq!( assert_eq!(
exponential_search(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 7), exponential_search(7, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
(3, 7) (3, 7)
); );
} }
@@ -760,7 +745,7 @@ mod tests {
fn build_block_postings(docs: &[DocId]) -> BlockSegmentPostings { fn build_block_postings(docs: &[DocId]) -> BlockSegmentPostings {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let int_field = schema_builder.add_u64_field("id", INDEXED); let int_field = schema_builder.add_u64_field("id", INT_INDEXED);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
@@ -773,7 +758,8 @@ mod tests {
last_doc = doc + 1; last_doc = doc + 1;
} }
index_writer.commit().unwrap(); index_writer.commit().unwrap();
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let segment_reader = searcher.segment_reader(0); let segment_reader = searcher.segment_reader(0);
let inverted_index = segment_reader.inverted_index(int_field); let inverted_index = segment_reader.inverted_index(int_field);
let term = Term::from_field_u64(int_field, 0u64); let term = Term::from_field_u64(int_field, 0u64);
@@ -830,7 +816,7 @@ mod tests {
#[test] #[test]
fn test_reset_block_segment_postings() { fn test_reset_block_segment_postings() {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let int_field = schema_builder.add_u64_field("id", INDEXED); let int_field = schema_builder.add_u64_field("id", INT_INDEXED);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
@@ -841,7 +827,8 @@ mod tests {
index_writer.add_document(doc); index_writer.add_document(doc);
} }
index_writer.commit().unwrap(); index_writer.commit().unwrap();
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let segment_reader = searcher.segment_reader(0); let segment_reader = searcher.segment_reader(0);
let mut block_segments; let mut block_segments;

View File

@@ -5,11 +5,12 @@ use self::murmurhash32::murmurhash2;
use super::{Addr, MemoryArena}; use super::{Addr, MemoryArena};
use byteorder::{ByteOrder, NativeEndian}; use byteorder::{ByteOrder, NativeEndian};
use postings::stacker::memory_arena::store; use postings::stacker::memory_arena::store;
use postings::UnorderedTermId;
use std::iter; use std::iter;
use std::mem; use std::mem;
use std::slice; use std::slice;
pub type BucketId = usize;
/// Returns the actual memory size in bytes /// Returns the actual memory size in bytes
/// required to create a table of size $2^num_bits$. /// required to create a table of size $2^num_bits$.
pub fn compute_table_size(num_bits: usize) -> usize { pub fn compute_table_size(num_bits: usize) -> usize {
@@ -27,7 +28,6 @@ pub fn compute_table_size(num_bits: usize) -> usize {
struct KeyValue { struct KeyValue {
key_value_addr: Addr, key_value_addr: Addr,
hash: u32, hash: u32,
unordered_term_id: UnorderedTermId,
} }
impl Default for KeyValue { impl Default for KeyValue {
@@ -35,7 +35,6 @@ impl Default for KeyValue {
KeyValue { KeyValue {
key_value_addr: Addr::null_pointer(), key_value_addr: Addr::null_pointer(),
hash: 0u32, hash: 0u32,
unordered_term_id: UnorderedTermId::default(),
} }
} }
} }
@@ -60,7 +59,6 @@ pub struct TermHashMap {
pub heap: MemoryArena, pub heap: MemoryArena,
mask: usize, mask: usize,
occupied: Vec<usize>, occupied: Vec<usize>,
len: usize,
} }
struct QuadraticProbing { struct QuadraticProbing {
@@ -87,13 +85,13 @@ pub struct Iter<'a> {
} }
impl<'a> Iterator for Iter<'a> { impl<'a> Iterator for Iter<'a> {
type Item = (&'a [u8], Addr, UnorderedTermId); type Item = (&'a [u8], Addr, BucketId);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.inner.next().cloned().map(move |bucket: usize| { self.inner.next().cloned().map(move |bucket: usize| {
let kv = self.hashmap.table[bucket]; let kv = self.hashmap.table[bucket];
let (key, offset): (&'a [u8], Addr) = self.hashmap.get_key_value(kv.key_value_addr); let (key, offset): (&'a [u8], Addr) = self.hashmap.get_key_value(kv.key_value_addr);
(key, offset, kv.unordered_term_id) (key, offset, bucket as BucketId)
}) })
} }
} }
@@ -108,7 +106,6 @@ impl TermHashMap {
heap, heap,
mask: table_size - 1, mask: table_size - 1,
occupied: Vec::with_capacity(table_size / 2), occupied: Vec::with_capacity(table_size / 2),
len: 0,
} }
} }
@@ -142,16 +139,12 @@ impl TermHashMap {
} }
} }
fn set_bucket(&mut self, hash: u32, key_value_addr: Addr, bucket: usize) -> UnorderedTermId { pub fn set_bucket(&mut self, hash: u32, key_value_addr: Addr, bucket: usize) {
self.occupied.push(bucket); self.occupied.push(bucket);
let unordered_term_id = self.len as UnorderedTermId;
self.len += 1;
self.table[bucket] = KeyValue { self.table[bucket] = KeyValue {
key_value_addr, key_value_addr,
hash, hash,
unordered_term_id,
}; };
unordered_term_id
} }
pub fn iter(&self) -> Iter { pub fn iter(&self) -> Iter {
@@ -191,11 +184,7 @@ impl TermHashMap {
/// will be in charge of returning a default value. /// will be in charge of returning a default value.
/// If the key already as an associated value, then it will be passed /// If the key already as an associated value, then it will be passed
/// `Some(previous_value)`. /// `Some(previous_value)`.
pub fn mutate_or_create<S, V, TMutator>( pub fn mutate_or_create<S, V, TMutator>(&mut self, key: S, mut updater: TMutator) -> BucketId
&mut self,
key: S,
mut updater: TMutator,
) -> UnorderedTermId
where where
S: AsRef<[u8]>, S: AsRef<[u8]>,
V: Copy + 'static, V: Copy + 'static,
@@ -211,7 +200,6 @@ impl TermHashMap {
let bucket = probe.next_probe(); let bucket = probe.next_probe();
let kv: KeyValue = self.table[bucket]; let kv: KeyValue = self.table[bucket];
if kv.is_empty() { if kv.is_empty() {
// The key does not exists yet.
let val = updater(None); let val = updater(None);
let num_bytes = let num_bytes =
std::mem::size_of::<u16>() + key_bytes.len() + std::mem::size_of::<V>(); std::mem::size_of::<u16>() + key_bytes.len() + std::mem::size_of::<V>();
@@ -223,7 +211,8 @@ impl TermHashMap {
data[2..stop].copy_from_slice(key_bytes); data[2..stop].copy_from_slice(key_bytes);
store(&mut data[stop..], val); store(&mut data[stop..], val);
} }
return self.set_bucket(hash, key_addr, bucket); self.set_bucket(hash, key_addr, bucket);
return bucket as BucketId;
} else if kv.hash == hash { } else if kv.hash == hash {
if let Some(val_addr) = if let Some(val_addr) =
self.get_value_addr_if_key_match(key_bytes, kv.key_value_addr) self.get_value_addr_if_key_match(key_bytes, kv.key_value_addr)
@@ -231,7 +220,7 @@ impl TermHashMap {
let v = self.heap.read(val_addr); let v = self.heap.read(val_addr);
let new_v = updater(Some(v)); let new_v = updater(Some(v));
self.heap.write_at(val_addr, new_v); self.heap.write_at(val_addr, new_v);
return kv.unordered_term_id; return bucket as BucketId;
} }
} }
} }

View File

@@ -101,9 +101,8 @@ mod tests {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
index_writer.add_document(doc!(field=>"ccc")); index_writer.add_document(doc!(field=>"ccc"));
index_writer.commit().unwrap(); index_writer.commit().unwrap();
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
reader.reload().unwrap(); let searcher = index.searcher();
let searcher = reader.searcher();
let weight = AllQuery.weight(&searcher, false).unwrap(); let weight = AllQuery.weight(&searcher, false).unwrap();
{ {
let reader = searcher.segment_reader(0); let reader = searcher.segment_reader(0);

View File

@@ -1,10 +1,10 @@
use common::BitSet; use common::BitSet;
use core::SegmentReader; use core::SegmentReader;
use fst::Automaton;
use query::BitSetDocSet; use query::BitSetDocSet;
use query::ConstScorer; use query::ConstScorer;
use query::{Scorer, Weight}; use query::{Scorer, Weight};
use schema::{Field, IndexRecordOption}; use schema::{Field, IndexRecordOption};
use tantivy_fst::Automaton;
use termdict::{TermDictionary, TermStreamer}; use termdict::{TermDictionary, TermStreamer};
use Result; use Result;

View File

@@ -1,4 +1,5 @@
use core::SegmentReader; use core::SegmentReader;
use downcast_rs::Downcast;
use query::intersect_scorers; use query::intersect_scorers;
use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner}; use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner};
use query::term_query::TermScorer; use query::term_query::TermScorer;
@@ -22,11 +23,13 @@ where
} }
{ {
let is_all_term_queries = scorers.iter().all(|scorer| scorer.is::<TermScorer>()); let is_all_term_queries = scorers.iter().all(|scorer| {
scorer.is::<TermScorer>()
});
if is_all_term_queries { if is_all_term_queries {
let scorers: Vec<TermScorer> = scorers let scorers: Vec<TermScorer> = scorers
.into_iter() .into_iter()
.map(|scorer| *(scorer.downcast::<TermScorer>().map_err(|_| ()).unwrap())) .map(|scorer| *(scorer.downcast::<TermScorer>().map_err(|_| ()).unwrap() ))
.collect(); .collect();
let scorer: Box<Scorer> = Box::new(Union::<TermScorer, TScoreCombiner>::from(scorers)); let scorer: Box<Scorer> = Box::new(Union::<TermScorer, TScoreCombiner>::from(scorers));
return scorer; return scorer;

View File

@@ -8,6 +8,7 @@ mod tests {
use super::*; use super::*;
use collector::tests::TestCollector; use collector::tests::TestCollector;
use downcast_rs::Downcast;
use query::score_combiner::SumWithCoordsCombiner; use query::score_combiner::SumWithCoordsCombiner;
use query::term_query::TermScorer; use query::term_query::TermScorer;
use query::Intersection; use query::Intersection;
@@ -51,6 +52,7 @@ mod tests {
} }
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
index.load_searchers().unwrap();
(index, text_field) (index, text_field)
} }
@@ -59,8 +61,7 @@ mod tests {
let (index, text_field) = aux_test_helper(); let (index, text_field) = aux_test_helper();
let query_parser = QueryParser::for_index(&index, vec![text_field]); let query_parser = QueryParser::for_index(&index, vec![text_field]);
let query = query_parser.parse_query("(+a +b) d").unwrap(); let query = query_parser.parse_query("(+a +b) d").unwrap();
let searcher = index.reader().unwrap().searcher(); assert_eq!(query.count(&*index.searcher()).unwrap(), 3);
assert_eq!(query.count(&searcher).unwrap(), 3);
} }
#[test] #[test]
@@ -68,7 +69,7 @@ mod tests {
let (index, text_field) = aux_test_helper(); let (index, text_field) = aux_test_helper();
let query_parser = QueryParser::for_index(&index, vec![text_field]); let query_parser = QueryParser::for_index(&index, vec![text_field]);
let query = query_parser.parse_query("+a").unwrap(); let query = query_parser.parse_query("+a").unwrap();
let searcher = index.reader().unwrap().searcher(); let searcher = index.searcher();
let weight = query.weight(&searcher, true).unwrap(); let weight = query.weight(&searcher, true).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap(); let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
assert!(scorer.is::<TermScorer>()); assert!(scorer.is::<TermScorer>());
@@ -78,7 +79,7 @@ mod tests {
pub fn test_boolean_termonly_intersection() { pub fn test_boolean_termonly_intersection() {
let (index, text_field) = aux_test_helper(); let (index, text_field) = aux_test_helper();
let query_parser = QueryParser::for_index(&index, vec![text_field]); let query_parser = QueryParser::for_index(&index, vec![text_field]);
let searcher = index.reader().unwrap().searcher(); let searcher = index.searcher();
{ {
let query = query_parser.parse_query("+a +b +c").unwrap(); let query = query_parser.parse_query("+a +b +c").unwrap();
let weight = query.weight(&searcher, true).unwrap(); let weight = query.weight(&searcher, true).unwrap();
@@ -97,13 +98,12 @@ mod tests {
pub fn test_boolean_reqopt() { pub fn test_boolean_reqopt() {
let (index, text_field) = aux_test_helper(); let (index, text_field) = aux_test_helper();
let query_parser = QueryParser::for_index(&index, vec![text_field]); let query_parser = QueryParser::for_index(&index, vec![text_field]);
let searcher = index.reader().unwrap().searcher(); let searcher = index.searcher();
{ {
let query = query_parser.parse_query("+a b").unwrap(); let query = query_parser.parse_query("+a b").unwrap();
let weight = query.weight(&searcher, true).unwrap(); let weight = query.weight(&searcher, true).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap(); let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
assert!(scorer assert!(scorer.is::<RequiredOptionalScorer<Box<Scorer>, Box<Scorer>, SumWithCoordsCombiner>>());
.is::<RequiredOptionalScorer<Box<Scorer>, Box<Scorer>, SumWithCoordsCombiner>>());
} }
{ {
let query = query_parser.parse_query("+a b").unwrap(); let query = query_parser.parse_query("+a b").unwrap();
@@ -111,7 +111,8 @@ mod tests {
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap(); let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
assert!(scorer.is::<TermScorer>()); assert!(scorer.is::<TermScorer>());
} }
} }
#[test] #[test]
pub fn test_boolean_query() { pub fn test_boolean_query() {
@@ -126,13 +127,10 @@ mod tests {
query query
}; };
let reader = index.reader().unwrap();
let matching_docs = |boolean_query: &Query| { let matching_docs = |boolean_query: &Query| {
reader let searcher = index.searcher();
.searcher() let test_docs = searcher.search(boolean_query, &TestCollector).unwrap();
.search(boolean_query, &TestCollector) test_docs
.unwrap()
.docs() .docs()
.iter() .iter()
.cloned() .cloned()
@@ -188,12 +186,10 @@ mod tests {
let query: Box<Query> = Box::new(term_query); let query: Box<Query> = Box::new(term_query);
query query
}; };
let reader = index.reader().unwrap();
let score_docs = |boolean_query: &Query| { let score_docs = |boolean_query: &Query| {
let fruit = reader let searcher = index.searcher();
.searcher() let fruit = searcher.search(boolean_query, &TestCollector).unwrap();
.search(boolean_query, &TestCollector)
.unwrap();
fruit.scores().to_vec() fruit.scores().to_vec()
}; };

View File

@@ -52,8 +52,9 @@ lazy_static! {
/// )); /// ));
/// index_writer.commit().unwrap(); /// index_writer.commit().unwrap();
/// } /// }
/// let reader = index.reader()?; ///
/// let searcher = reader.searcher(); /// index.load_searchers()?;
/// let searcher = index.searcher();
/// ///
/// { /// {
/// ///
@@ -140,8 +141,8 @@ mod test {
)); ));
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
{ {
let term = Term::from_field_text(country_field, "japon"); let term = Term::from_field_text(country_field, "japon");

View File

@@ -1,9 +1,9 @@
use docset::{DocSet, SkipResult}; use docset::{DocSet, SkipResult};
use query::term_query::TermScorer;
use query::EmptyScorer; use query::EmptyScorer;
use query::Scorer; use query::Scorer;
use DocId; use DocId;
use Score; use Score;
use query::term_query::TermScorer;
/// Returns the intersection scorer. /// Returns the intersection scorer.
/// ///
@@ -24,9 +24,9 @@ pub fn intersect_scorers(mut scorers: Vec<Box<Scorer>>) -> Box<Scorer> {
(Some(single_docset), None) => single_docset, (Some(single_docset), None) => single_docset,
(Some(left), Some(right)) => { (Some(left), Some(right)) => {
{ {
let all_term_scorers = [&left, &right] let all_term_scorers = [&left, &right].iter().all(|&scorer| {
.iter() scorer.is::<TermScorer>()
.all(|&scorer| scorer.is::<TermScorer>()); });
if all_term_scorers { if all_term_scorers {
let left = *(left.downcast::<TermScorer>().map_err(|_| ()).unwrap()); let left = *(left.downcast::<TermScorer>().map_err(|_| ()).unwrap());
let right = *(right.downcast::<TermScorer>().map_err(|_| ()).unwrap()); let right = *(right.downcast::<TermScorer>().map_err(|_| ()).unwrap());

View File

@@ -31,6 +31,7 @@ mod tests {
} }
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
index.load_searchers().unwrap();
index index
} }
@@ -45,7 +46,8 @@ mod tests {
]); ]);
let schema = index.schema(); let schema = index.schema();
let text_field = schema.get_field("text").unwrap(); let text_field = schema.get_field("text").unwrap();
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let test_query = |texts: Vec<&str>| { let test_query = |texts: Vec<&str>| {
let terms: Vec<Term> = texts let terms: Vec<Term> = texts
.iter() .iter()
@@ -88,7 +90,8 @@ mod tests {
index_writer.add_document(doc!(text_field=>"a b c")); index_writer.add_document(doc!(text_field=>"a b c"));
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let phrase_query = PhraseQuery::new(vec![ let phrase_query = PhraseQuery::new(vec![
Term::from_field_text(text_field, "a"), Term::from_field_text(text_field, "a"),
Term::from_field_text(text_field, "b"), Term::from_field_text(text_field, "b"),
@@ -112,7 +115,8 @@ mod tests {
let index = create_index(&["a b c", "a b c a b"]); let index = create_index(&["a b c", "a b c a b"]);
let schema = index.schema(); let schema = index.schema();
let text_field = schema.get_field("text").unwrap(); let text_field = schema.get_field("text").unwrap();
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let test_query = |texts: Vec<&str>| { let test_query = |texts: Vec<&str>| {
let terms: Vec<Term> = texts let terms: Vec<Term> = texts
.iter() .iter()
@@ -144,7 +148,8 @@ mod tests {
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let test_query = |texts: Vec<&str>| { let test_query = |texts: Vec<&str>| {
let terms: Vec<Term> = texts let terms: Vec<Term> = texts
.iter() .iter()
@@ -172,7 +177,8 @@ mod tests {
index_writer.add_document(doc!(text_field=>"a b c d e f g h")); index_writer.add_document(doc!(text_field=>"a b c d e f g h"));
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let test_query = |texts: Vec<(usize, &str)>| { let test_query = |texts: Vec<(usize, &str)>| {
let terms: Vec<(usize, Term)> = texts let terms: Vec<(usize, Term)> = texts
.iter() .iter()

View File

@@ -43,7 +43,7 @@ impl<TPostings: Postings> DocSet for PostingsWithOffset<TPostings> {
pub struct PhraseScorer<TPostings: Postings> { pub struct PhraseScorer<TPostings: Postings> {
intersection_docset: Intersection<PostingsWithOffset<TPostings>, PostingsWithOffset<TPostings>>, intersection_docset: Intersection<PostingsWithOffset<TPostings>, PostingsWithOffset<TPostings>>,
num_terms: usize, num_docsets: usize,
left: Vec<u32>, left: Vec<u32>,
right: Vec<u32>, right: Vec<u32>,
phrase_count: u32, phrase_count: u32,
@@ -138,7 +138,7 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
.collect::<Vec<_>>(); .collect::<Vec<_>>();
PhraseScorer { PhraseScorer {
intersection_docset: Intersection::new(postings_with_offsets), intersection_docset: Intersection::new(postings_with_offsets),
num_terms: num_docsets, num_docsets,
left: Vec::with_capacity(100), left: Vec::with_capacity(100),
right: Vec::with_capacity(100), right: Vec::with_capacity(100),
phrase_count: 0u32, phrase_count: 0u32,
@@ -165,7 +165,7 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
.positions(&mut self.left); .positions(&mut self.left);
} }
let mut intersection_len = self.left.len(); let mut intersection_len = self.left.len();
for i in 1..self.num_terms - 1 { for i in 1..self.num_docsets - 1 {
{ {
self.intersection_docset self.intersection_docset
.docset_mut_specialized(i) .docset_mut_specialized(i)
@@ -178,7 +178,7 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
} }
self.intersection_docset self.intersection_docset
.docset_mut_specialized(self.num_terms - 1) .docset_mut_specialized(self.num_docsets - 1)
.positions(&mut self.right); .positions(&mut self.right);
intersection_exists(&self.left[..intersection_len], &self.right[..]) intersection_exists(&self.left[..intersection_len], &self.right[..])
} }
@@ -190,7 +190,7 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
.positions(&mut self.left); .positions(&mut self.left);
} }
let mut intersection_len = self.left.len(); let mut intersection_len = self.left.len();
for i in 1..self.num_terms - 1 { for i in 1..self.num_docsets - 1 {
{ {
self.intersection_docset self.intersection_docset
.docset_mut_specialized(i) .docset_mut_specialized(i)
@@ -203,7 +203,7 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
} }
self.intersection_docset self.intersection_docset
.docset_mut_specialized(self.num_terms - 1) .docset_mut_specialized(self.num_docsets - 1)
.positions(&mut self.right); .positions(&mut self.right);
intersection_count(&self.left[..intersection_len], &self.right[..]) as u32 intersection_count(&self.left[..intersection_len], &self.right[..]) as u32
} }

View File

@@ -52,7 +52,7 @@ parser! {
field_name: None, field_name: None,
phrase, phrase,
}); });
attempt(term_query) try(term_query)
.or(term_default_field) .or(term_default_field)
.map(UserInputLeaf::from) .map(UserInputLeaf::from)
} }
@@ -83,12 +83,12 @@ parser! {
let lower_bound = { let lower_bound = {
let excl = (char('{'), term_val()).map(|(_, w)| UserInputBound::Exclusive(w)); let excl = (char('{'), term_val()).map(|(_, w)| UserInputBound::Exclusive(w));
let incl = (char('['), term_val()).map(|(_, w)| UserInputBound::Inclusive(w)); let incl = (char('['), term_val()).map(|(_, w)| UserInputBound::Inclusive(w));
attempt(excl).or(incl) try(excl).or(incl)
}; };
let upper_bound = { let upper_bound = {
let excl = (term_val(), char('}')).map(|(w, _)| UserInputBound::Exclusive(w)); let excl = (term_val(), char('}')).map(|(w, _)| UserInputBound::Exclusive(w));
let incl = (term_val(), char(']')).map(|(w, _)| UserInputBound::Inclusive(w)); let incl = (term_val(), char(']')).map(|(w, _)| UserInputBound::Inclusive(w));
attempt(excl).or(incl) try(excl).or(incl)
}; };
( (
optional((field(), char(':')).map(|x| x.0)), optional((field(), char(':')).map(|x| x.0)),
@@ -112,11 +112,11 @@ parser! {
.or((char('+'), leaf()).map(|(_, expr)| expr.unary(Occur::Must) )) .or((char('+'), leaf()).map(|(_, expr)| expr.unary(Occur::Must) ))
.or((char('('), parse_to_ast(), char(')')).map(|(_, expr, _)| expr)) .or((char('('), parse_to_ast(), char(')')).map(|(_, expr, _)| expr))
.or(char('*').map(|_| UserInputAST::from(UserInputLeaf::All) )) .or(char('*').map(|_| UserInputAST::from(UserInputLeaf::All) ))
.or(attempt( .or(try(
(string("NOT"), spaces1(), leaf()).map(|(_, _, expr)| expr.unary(Occur::MustNot)) (string("NOT"), spaces1(), leaf()).map(|(_, _, expr)| expr.unary(Occur::MustNot))
) )
) )
.or(attempt( .or(try(
range().map(UserInputAST::from) range().map(UserInputAST::from)
) )
) )
@@ -160,7 +160,7 @@ parser! {
where [I: Stream<Item = char>] where [I: Stream<Item = char>]
{ {
( (
attempt( try(
chainl1( chainl1(
leaf().map(Element::SingleEl), leaf().map(Element::SingleEl),
binary_operand().map(|op: BinaryOperand| binary_operand().map(|op: BinaryOperand|

View File

@@ -50,8 +50,6 @@ pub enum QueryParserError {
/// The query contains a range query with a phrase as one of the bounds. /// The query contains a range query with a phrase as one of the bounds.
/// Only terms can be used as bounds. /// Only terms can be used as bounds.
RangeMustNotHavePhrase, RangeMustNotHavePhrase,
/// The format for the date field is not RFC 3339 compliant.
DateFormatError(chrono::ParseError),
} }
impl From<ParseIntError> for QueryParserError { impl From<ParseIntError> for QueryParserError {
@@ -60,12 +58,6 @@ impl From<ParseIntError> for QueryParserError {
} }
} }
impl From<chrono::ParseError> for QueryParserError {
fn from(err: chrono::ParseError) -> QueryParserError {
QueryParserError::DateFormatError(err)
}
}
/// Recursively remove empty clause from the AST /// Recursively remove empty clause from the AST
/// ///
/// Returns `None` iff the `logical_ast` ended up being empty. /// Returns `None` iff the `logical_ast` ended up being empty.
@@ -135,8 +127,6 @@ fn trim_ast(logical_ast: LogicalAST) -> Option<LogicalAST> {
/// a word lexicographically between `a` and `c` (inclusive lower bound, exclusive upper bound). /// a word lexicographically between `a` and `c` (inclusive lower bound, exclusive upper bound).
/// Inclusive bounds are `[]`, exclusive are `{}`. /// Inclusive bounds are `[]`, exclusive are `{}`.
/// ///
/// * date values: The query parser supports rfc3339 formatted dates. For example "2002-10-02T15:00:00.05Z"
///
/// * all docs query: A plain `*` will match all documents in the index. /// * all docs query: A plain `*` will match all documents in the index.
/// ///
#[derive(Clone)] #[derive(Clone)]
@@ -239,13 +229,6 @@ impl QueryParser {
let term = Term::from_field_i64(field, val); let term = Term::from_field_i64(field, val);
Ok(vec![(0, term)]) Ok(vec![(0, term)])
} }
FieldType::Date(_) => match chrono::DateTime::parse_from_rfc3339(phrase) {
Ok(x) => Ok(vec![(
0,
Term::from_field_date(field, &x.with_timezone(&chrono::Utc)),
)]),
Err(e) => Err(QueryParserError::DateFormatError(e)),
},
FieldType::U64(_) => { FieldType::U64(_) => {
let val: u64 = u64::from_str(phrase)?; let val: u64 = u64::from_str(phrase)?;
let term = Term::from_field_u64(field, val); let term = Term::from_field_u64(field, val);
@@ -504,7 +487,7 @@ mod test {
use query::Query; use query::Query;
use schema::Field; use schema::Field;
use schema::{IndexRecordOption, TextFieldIndexing, TextOptions}; use schema::{IndexRecordOption, TextFieldIndexing, TextOptions};
use schema::{Schema, Term, INDEXED, STORED, STRING, TEXT}; use schema::{Schema, Term, INT_INDEXED, STORED, STRING, TEXT};
use tokenizer::{LowerCaser, SimpleTokenizer, StopWordFilter, Tokenizer, TokenizerManager}; use tokenizer::{LowerCaser, SimpleTokenizer, StopWordFilter, Tokenizer, TokenizerManager};
use Index; use Index;
@@ -518,14 +501,13 @@ mod test {
.set_stored(); .set_stored();
let title = schema_builder.add_text_field("title", TEXT); let title = schema_builder.add_text_field("title", TEXT);
let text = schema_builder.add_text_field("text", TEXT); let text = schema_builder.add_text_field("text", TEXT);
schema_builder.add_i64_field("signed", INDEXED); schema_builder.add_i64_field("signed", INT_INDEXED);
schema_builder.add_u64_field("unsigned", INDEXED); schema_builder.add_u64_field("unsigned", INT_INDEXED);
schema_builder.add_text_field("notindexed_text", STORED); schema_builder.add_text_field("notindexed_text", STORED);
schema_builder.add_text_field("notindexed_u64", STORED); schema_builder.add_text_field("notindexed_u64", STORED);
schema_builder.add_text_field("notindexed_i64", STORED); schema_builder.add_text_field("notindexed_i64", STORED);
schema_builder.add_text_field("nottokenized", STRING); schema_builder.add_text_field("nottokenized", STRING);
schema_builder.add_text_field("with_stop_words", text_options); schema_builder.add_text_field("with_stop_words", text_options);
schema_builder.add_date_field("date", INDEXED);
let schema = schema_builder.build(); let schema = schema_builder.build();
let default_fields = vec![title, text]; let default_fields = vec![title, text];
let tokenizer_manager = TokenizerManager::default(); let tokenizer_manager = TokenizerManager::default();
@@ -785,18 +767,6 @@ mod test {
); );
} }
#[test]
pub fn test_query_parser_expected_date() {
let query_parser = make_query_parser();
assert_matches!(
query_parser.parse_query("date:18a"),
Err(QueryParserError::DateFormatError(_))
);
assert!(query_parser
.parse_query("date:\"1985-04-12T23:20:50.52Z\"")
.is_ok());
}
#[test] #[test]
pub fn test_query_parser_not_empty_but_no_tokens() { pub fn test_query_parser_not_empty_but_no_tokens() {
let query_parser = make_query_parser(); let query_parser = make_query_parser();

View File

@@ -40,14 +40,14 @@ fn map_bound<TFrom, TTo, Transform: Fn(&TFrom) -> TTo>(
/// # #[macro_use] /// # #[macro_use]
/// # extern crate tantivy; /// # extern crate tantivy;
/// # use tantivy::Index; /// # use tantivy::Index;
/// # use tantivy::schema::{Schema, INDEXED}; /// # use tantivy::schema::{Schema, INT_INDEXED};
/// # use tantivy::collector::Count; /// # use tantivy::collector::Count;
/// # use tantivy::Result; /// # use tantivy::Result;
/// # use tantivy::query::RangeQuery; /// # use tantivy::query::RangeQuery;
/// # /// #
/// # fn run() -> Result<()> { /// # fn run() -> Result<()> {
/// # let mut schema_builder = Schema::builder(); /// # let mut schema_builder = Schema::builder();
/// # let year_field = schema_builder.add_u64_field("year", INDEXED); /// # let year_field = schema_builder.add_u64_field("year", INT_INDEXED);
/// # let schema = schema_builder.build(); /// # let schema = schema_builder.build();
/// # /// #
/// # let index = Index::create_in_ram(schema); /// # let index = Index::create_in_ram(schema);
@@ -61,8 +61,8 @@ fn map_bound<TFrom, TTo, Transform: Fn(&TFrom) -> TTo>(
/// # } /// # }
/// # index_writer.commit().unwrap(); /// # index_writer.commit().unwrap();
/// # } /// # }
/// # let reader = index.reader()?; /// # index.load_searchers()?;
/// let searcher = reader.searcher(); /// let searcher = index.searcher();
/// ///
/// let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960..1970); /// let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960..1970);
/// ///
@@ -293,7 +293,7 @@ mod tests {
use super::RangeQuery; use super::RangeQuery;
use collector::Count; use collector::Count;
use schema::{Document, Field, Schema, INDEXED}; use schema::{Document, Field, Schema, INT_INDEXED};
use std::collections::Bound; use std::collections::Bound;
use Index; use Index;
use Result; use Result;
@@ -302,7 +302,7 @@ mod tests {
fn test_range_query_simple() { fn test_range_query_simple() {
fn run() -> Result<()> { fn run() -> Result<()> {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let year_field = schema_builder.add_u64_field("year", INDEXED); let year_field = schema_builder.add_u64_field("year", INT_INDEXED);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema); let index = Index::create_in_ram(schema);
@@ -316,8 +316,8 @@ mod tests {
} }
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64); let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64);
@@ -335,7 +335,7 @@ mod tests {
let int_field: Field; let int_field: Field;
let schema = { let schema = {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
int_field = schema_builder.add_i64_field("intfield", INDEXED); int_field = schema_builder.add_i64_field("intfield", INT_INDEXED);
schema_builder.build() schema_builder.build()
}; };
@@ -355,8 +355,8 @@ mod tests {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let count_multiples = let count_multiples =
|range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap(); |range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();

View File

@@ -1,8 +1,8 @@
use error::TantivyError; use error::TantivyError;
use fst_regex::Regex;
use query::{AutomatonWeight, Query, Weight}; use query::{AutomatonWeight, Query, Weight};
use schema::Field; use schema::Field;
use std::clone::Clone; use std::clone::Clone;
use tantivy_fst::Regex;
use Result; use Result;
use Searcher; use Searcher;
@@ -44,8 +44,8 @@ use Searcher;
/// index_writer.commit().unwrap(); /// index_writer.commit().unwrap();
/// } /// }
/// ///
/// let reader = index.reader()?; /// index.load_searchers()?;
/// let searcher = reader.searcher(); /// let searcher = index.searcher();
/// ///
/// let term = Term::from_field_text(title, "Diary"); /// let term = Term::from_field_text(title, "Diary");
/// let query = RegexQuery::new("d[ai]{2}ry".to_string(), title); /// let query = RegexQuery::new("d[ai]{2}ry".to_string(), title);
@@ -108,8 +108,8 @@ mod test {
)); ));
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
{ {
let regex_query = RegexQuery::new("jap[ao]n".to_string(), country_field); let regex_query = RegexQuery::new("jap[ao]n".to_string(), country_field);
let scored_docs = searcher let scored_docs = searcher

View File

@@ -25,6 +25,7 @@ pub trait Scorer: downcast_rs::Downcast + DocSet + 'static {
impl_downcast!(Scorer); impl_downcast!(Scorer);
impl Scorer for Box<Scorer> { impl Scorer for Box<Scorer> {
fn score(&mut self) -> Score { fn score(&mut self) -> Score {
self.deref_mut().score() self.deref_mut().score()

View File

@@ -32,7 +32,9 @@ mod tests {
} }
assert!(index_writer.commit().is_ok()); assert!(index_writer.commit().is_ok());
} }
let searcher = index.reader().unwrap().searcher();
index.load_searchers().unwrap();
let searcher = index.searcher();
let term_query = TermQuery::new( let term_query = TermQuery::new(
Term::from_field_text(text_field, "a"), Term::from_field_text(text_field, "a"),
IndexRecordOption::Basic, IndexRecordOption::Basic,
@@ -63,7 +65,8 @@ mod tests {
index_writer.add_document(doc!(left_field => "left4 left1")); index_writer.add_document(doc!(left_field => "left4 left1"));
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
{ {
let term = Term::from_field_text(left_field, "left2"); let term = Term::from_field_text(left_field, "left2");
let term_query = TermQuery::new(term, IndexRecordOption::WithFreqs); let term_query = TermQuery::new(term, IndexRecordOption::WithFreqs);

View File

@@ -48,8 +48,9 @@ use Term;
/// )); /// ));
/// index_writer.commit()?; /// index_writer.commit()?;
/// } /// }
/// let reader = index.reader()?; ///
/// let searcher = reader.searcher(); /// index.load_searchers()?;
/// let searcher = index.searcher();
/// ///
/// let query = TermQuery::new( /// let query = TermQuery::new(
/// Term::from_field_text(title, "diary"), /// Term::from_field_text(title, "diary"),

View File

@@ -5,6 +5,8 @@ use docset::DocSet;
use std::num::Wrapping; use std::num::Wrapping;
use DocId; use DocId;
const EMPTY_ARRAY: [u32; 0] = [];
/// Simulate a `Postings` objects from a `VecPostings`. /// Simulate a `Postings` objects from a `VecPostings`.
/// `VecPostings` only exist for testing purposes. /// `VecPostings` only exist for testing purposes.
/// ///

View File

@@ -1,187 +0,0 @@
mod pool;
use self::pool::{LeasedItem, Pool};
use core::Segment;
use directory::Directory;
use directory::WatchHandle;
use directory::META_LOCK;
use std::sync::Arc;
use Index;
use Result;
use Searcher;
use SegmentReader;
/// Defines when a new version of the index should be reloaded.
///
/// Regardless of whether you search and index in the same process, tantivy does not necessarily
/// reflects the change that are commited to your index. `ReloadPolicy` precisely helps you define
/// when you want your index to be reloaded.
#[derive(Clone, Copy)]
pub enum ReloadPolicy {
/// The index is entirely reloaded manually.
/// All updates of the index should be manual.
///
/// No change is reflected automatically. You are required to call `.load_seacher()` manually.
Manual,
/// The index is reloaded within milliseconds after a new commit is available.
/// This is made possible by watching changes in the `meta.json` file.
OnCommit, // TODO add NEAR_REAL_TIME(target_ms)
}
/// `IndexReader` builder
///
/// It makes it possible to set the following values.
///
/// - `num_searchers` (by default, the number of detected CPU threads):
///
/// When `num_searchers` queries are requested at the same time, the `num_searchers` will block
/// until the one of the searcher in-use gets released.
/// - `reload_policy` (by default `ReloadPolicy::OnCommit`):
///
/// See [`ReloadPolicy`](./enum.ReloadPolicy.html) for more details.
#[derive(Clone)]
pub struct IndexReaderBuilder {
num_searchers: usize,
reload_policy: ReloadPolicy,
index: Index,
}
impl IndexReaderBuilder {
pub(crate) fn new(index: Index) -> IndexReaderBuilder {
IndexReaderBuilder {
num_searchers: num_cpus::get(),
reload_policy: ReloadPolicy::OnCommit,
index,
}
}
/// Builds the reader.
///
/// Building the reader is a non-trivial operation that requires
/// to open different segment readers. It may take hundreds of milliseconds
/// of time and it may return an error.
/// TODO(pmasurel) Use the `TryInto` trait once it is available in stable.
pub fn try_into(self) -> Result<IndexReader> {
let inner_reader = InnerIndexReader {
index: self.index,
num_searchers: self.num_searchers,
searcher_pool: Pool::new(),
};
inner_reader.reload()?;
let inner_reader_arc = Arc::new(inner_reader);
let watch_handle_opt: Option<WatchHandle>;
match self.reload_policy {
ReloadPolicy::Manual => {
// No need to set anything...
watch_handle_opt = None;
}
ReloadPolicy::OnCommit => {
let inner_reader_arc_clone = inner_reader_arc.clone();
let callback = move || {
if let Err(err) = inner_reader_arc_clone.reload() {
error!(
"Error while loading searcher after commit was detected. {:?}",
err
);
}
};
let watch_handle = inner_reader_arc.index.directory().watch(Box::new(callback));
watch_handle_opt = Some(watch_handle);
}
}
Ok(IndexReader {
inner: inner_reader_arc,
watch_handle_opt,
})
}
/// Sets the reload_policy.
///
/// See [`ReloadPolicy`](./enum.ReloadPolicy.html) for more details.
pub fn reload_policy(mut self, reload_policy: ReloadPolicy) -> IndexReaderBuilder {
self.reload_policy = reload_policy;
self
}
/// Sets the number of `Searcher` in the searcher pool.
pub fn num_searchers(mut self, num_searchers: usize) -> IndexReaderBuilder {
self.num_searchers = num_searchers;
self
}
}
struct InnerIndexReader {
num_searchers: usize,
searcher_pool: Pool<Searcher>,
index: Index,
}
impl InnerIndexReader {
fn reload(&self) -> Result<()> {
let segment_readers: Vec<SegmentReader> = {
let _meta_lock = self.index.directory().acquire_lock(&META_LOCK)?;
let searchable_segments = self.searchable_segments()?;
searchable_segments
.iter()
.map(SegmentReader::open)
.collect::<Result<_>>()?
};
let schema = self.index.schema();
let searchers = (0..self.num_searchers)
.map(|_| Searcher::new(schema.clone(), self.index.clone(), segment_readers.clone()))
.collect();
self.searcher_pool.publish_new_generation(searchers);
Ok(())
}
/// Returns the list of segments that are searchable
fn searchable_segments(&self) -> Result<Vec<Segment>> {
self.index.searchable_segments()
}
fn searcher(&self) -> LeasedItem<Searcher> {
self.searcher_pool.acquire()
}
}
/// `IndexReader` is your entry point to read and search the index.
///
/// It controls when a new version of the index should be loaded and lends
/// you instances of `Searcher` for the last loaded version.
///
/// `Clone` does not clone the different pool of searcher. `IndexReader`
/// just wraps and `Arc`.
#[derive(Clone)]
pub struct IndexReader {
inner: Arc<InnerIndexReader>,
watch_handle_opt: Option<WatchHandle>,
}
impl IndexReader {
/// Update searchers so that they reflect the state of the last
/// `.commit()`.
///
/// If you set up the `OnCommit` `ReloadPolicy` (which is the default)
/// every commit should be rapidly reflected on your `IndexReader` and you should
/// not need to call `reload()` at all.
///
/// This automatic reload can take 10s of milliseconds to kick in however, and in unit tests
/// it can be nice to deterministically force the reload of searchers.
pub fn reload(&self) -> Result<()> {
self.inner.reload()
}
/// Returns a searcher
///
/// This method should be called every single time a search
/// query is performed.
/// The searchers are taken from a pool of `num_searchers` searchers.
/// If no searcher is available
/// this may block.
///
/// The same searcher must be used for a given query, as it ensures
/// the use of a consistent segment set.
pub fn searcher(&self) -> LeasedItem<Searcher> {
self.inner.searcher()
}
}

View File

@@ -3,7 +3,6 @@ use common::BinarySerializable;
use common::VInt; use common::VInt;
use itertools::Itertools; use itertools::Itertools;
use std::io::{self, Read, Write}; use std::io::{self, Read, Write};
use DateTime;
/// Tantivy's Document is the object that can /// Tantivy's Document is the object that can
/// be indexed and then searched for. /// be indexed and then searched for.
@@ -83,16 +82,11 @@ impl Document {
self.add(FieldValue::new(field, Value::U64(value))); self.add(FieldValue::new(field, Value::U64(value)));
} }
/// Add a i64 field /// Add a u64 field
pub fn add_i64(&mut self, field: Field, value: i64) { pub fn add_i64(&mut self, field: Field, value: i64) {
self.add(FieldValue::new(field, Value::I64(value))); self.add(FieldValue::new(field, Value::I64(value)));
} }
/// Add a date field
pub fn add_date(&mut self, field: Field, value: &DateTime) {
self.add(FieldValue::new(field, Value::Date(*value)));
}
/// Add a bytes field /// Add a bytes field
pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) { pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
self.add(FieldValue::new(field, Value::Bytes(value))) self.add(FieldValue::new(field, Value::Bytes(value)))

View File

@@ -48,15 +48,6 @@ impl FieldEntry {
} }
} }
/// Creates a new date field entry in the schema, given
/// a name, and some options.
pub fn new_date(field_name: String, field_type: IntOptions) -> FieldEntry {
FieldEntry {
name: field_name,
field_type: FieldType::Date(field_type),
}
}
/// Creates a field entry for a facet. /// Creates a field entry for a facet.
pub fn new_facet(field_name: String) -> FieldEntry { pub fn new_facet(field_name: String) -> FieldEntry {
FieldEntry { FieldEntry {
@@ -87,9 +78,7 @@ impl FieldEntry {
pub fn is_indexed(&self) -> bool { pub fn is_indexed(&self) -> bool {
match self.field_type { match self.field_type {
FieldType::Str(ref options) => options.get_indexing_options().is_some(), FieldType::Str(ref options) => options.get_indexing_options().is_some(),
FieldType::U64(ref options) FieldType::U64(ref options) | FieldType::I64(ref options) => options.is_indexed(),
| FieldType::I64(ref options)
| FieldType::Date(ref options) => options.is_indexed(),
FieldType::HierarchicalFacet => true, FieldType::HierarchicalFacet => true,
FieldType::Bytes => false, FieldType::Bytes => false,
} }
@@ -106,9 +95,7 @@ impl FieldEntry {
/// Returns true iff the field is stored /// Returns true iff the field is stored
pub fn is_stored(&self) -> bool { pub fn is_stored(&self) -> bool {
match self.field_type { match self.field_type {
FieldType::U64(ref options) FieldType::U64(ref options) | FieldType::I64(ref options) => options.is_stored(),
| FieldType::I64(ref options)
| FieldType::Date(ref options) => options.is_stored(),
FieldType::Str(ref options) => options.is_stored(), FieldType::Str(ref options) => options.is_stored(),
// TODO make stored hierarchical facet optional // TODO make stored hierarchical facet optional
FieldType::HierarchicalFacet => true, FieldType::HierarchicalFacet => true,
@@ -138,10 +125,6 @@ impl Serialize for FieldEntry {
s.serialize_field("type", "i64")?; s.serialize_field("type", "i64")?;
s.serialize_field("options", options)?; s.serialize_field("options", options)?;
} }
FieldType::Date(ref options) => {
s.serialize_field("type", "date")?;
s.serialize_field("options", options)?;
}
FieldType::HierarchicalFacet => { FieldType::HierarchicalFacet => {
s.serialize_field("type", "hierarchical_facet")?; s.serialize_field("type", "hierarchical_facet")?;
} }
@@ -205,7 +188,7 @@ impl<'de> Deserialize<'de> for FieldEntry {
"bytes" => { "bytes" => {
field_type = Some(FieldType::Bytes); field_type = Some(FieldType::Bytes);
} }
"text" | "u64" | "i64" | "date" => { "text" | "u64" | "i64" => {
// These types require additional options to create a field_type // These types require additional options to create a field_type
} }
_ => panic!("unhandled type"), _ => panic!("unhandled type"),
@@ -222,7 +205,6 @@ impl<'de> Deserialize<'de> for FieldEntry {
"text" => field_type = Some(FieldType::Str(map.next_value()?)), "text" => field_type = Some(FieldType::Str(map.next_value()?)),
"u64" => field_type = Some(FieldType::U64(map.next_value()?)), "u64" => field_type = Some(FieldType::U64(map.next_value()?)),
"i64" => field_type = Some(FieldType::I64(map.next_value()?)), "i64" => field_type = Some(FieldType::I64(map.next_value()?)),
"date" => field_type = Some(FieldType::Date(map.next_value()?)),
_ => { _ => {
let msg = format!("Unrecognised type {}", ty); let msg = format!("Unrecognised type {}", ty);
return Err(de::Error::custom(msg)); return Err(de::Error::custom(msg));

View File

@@ -34,8 +34,6 @@ pub enum Type {
U64, U64,
/// `i64` /// `i64`
I64, I64,
/// `date(i64) timestamp`
Date,
/// `tantivy::schema::Facet`. Passed as a string in JSON. /// `tantivy::schema::Facet`. Passed as a string in JSON.
HierarchicalFacet, HierarchicalFacet,
/// `Vec<u8>` /// `Vec<u8>`
@@ -52,8 +50,6 @@ pub enum FieldType {
U64(IntOptions), U64(IntOptions),
/// Signed 64-bits integers 64 field type configuration /// Signed 64-bits integers 64 field type configuration
I64(IntOptions), I64(IntOptions),
/// Signed 64-bits Date 64 field type configuration,
Date(IntOptions),
/// Hierachical Facet /// Hierachical Facet
HierarchicalFacet, HierarchicalFacet,
/// Bytes (one per document) /// Bytes (one per document)
@@ -67,7 +63,6 @@ impl FieldType {
FieldType::Str(_) => Type::Str, FieldType::Str(_) => Type::Str,
FieldType::U64(_) => Type::U64, FieldType::U64(_) => Type::U64,
FieldType::I64(_) => Type::I64, FieldType::I64(_) => Type::I64,
FieldType::Date(_) => Type::Date,
FieldType::HierarchicalFacet => Type::HierarchicalFacet, FieldType::HierarchicalFacet => Type::HierarchicalFacet,
FieldType::Bytes => Type::Bytes, FieldType::Bytes => Type::Bytes,
} }
@@ -80,7 +75,6 @@ impl FieldType {
FieldType::U64(ref int_options) | FieldType::I64(ref int_options) => { FieldType::U64(ref int_options) | FieldType::I64(ref int_options) => {
int_options.is_indexed() int_options.is_indexed()
} }
FieldType::Date(ref date_options) => date_options.is_indexed(),
FieldType::HierarchicalFacet => true, FieldType::HierarchicalFacet => true,
FieldType::Bytes => false, FieldType::Bytes => false,
} }
@@ -95,9 +89,7 @@ impl FieldType {
FieldType::Str(ref text_options) => text_options FieldType::Str(ref text_options) => text_options
.get_indexing_options() .get_indexing_options()
.map(|indexing_options| indexing_options.index_option()), .map(|indexing_options| indexing_options.index_option()),
FieldType::U64(ref int_options) FieldType::U64(ref int_options) | FieldType::I64(ref int_options) => {
| FieldType::I64(ref int_options)
| FieldType::Date(ref int_options) => {
if int_options.is_indexed() { if int_options.is_indexed() {
Some(IndexRecordOption::Basic) Some(IndexRecordOption::Basic)
} else { } else {
@@ -118,9 +110,9 @@ impl FieldType {
match *json { match *json {
JsonValue::String(ref field_text) => match *self { JsonValue::String(ref field_text) => match *self {
FieldType::Str(_) => Ok(Value::Str(field_text.clone())), FieldType::Str(_) => Ok(Value::Str(field_text.clone())),
FieldType::U64(_) | FieldType::I64(_) | FieldType::Date(_) => Err( FieldType::U64(_) | FieldType::I64(_) => Err(ValueParsingError::TypeError(
ValueParsingError::TypeError(format!("Expected an integer, got {:?}", json)), format!("Expected an integer, got {:?}", json),
), )),
FieldType::HierarchicalFacet => Ok(Value::Facet(Facet::from(field_text))), FieldType::HierarchicalFacet => Ok(Value::Facet(Facet::from(field_text))),
FieldType::Bytes => decode(field_text).map(Value::Bytes).map_err(|_| { FieldType::Bytes => decode(field_text).map(Value::Bytes).map_err(|_| {
ValueParsingError::InvalidBase64(format!( ValueParsingError::InvalidBase64(format!(
@@ -130,7 +122,7 @@ impl FieldType {
}), }),
}, },
JsonValue::Number(ref field_val_num) => match *self { JsonValue::Number(ref field_val_num) => match *self {
FieldType::I64(_) | FieldType::Date(_) => { FieldType::I64(_) => {
if let Some(field_val_i64) = field_val_num.as_i64() { if let Some(field_val_i64) = field_val_num.as_i64() {
Ok(Value::I64(field_val_i64)) Ok(Value::I64(field_val_i64))
} else { } else {

View File

@@ -1,81 +0,0 @@
use schema::IntOptions;
use schema::TextOptions;
use std::ops::BitOr;
#[derive(Clone)]
pub struct StoredFlag;
/// Flag to mark the field as stored.
/// This flag can apply to any kind of field.
///
/// A stored fields of a document can be retrieved given its `DocId`.
/// Stored field are stored together and LZ4 compressed.
/// Reading the stored fields of a document is relatively slow.
/// (~ 100 microsecs)
///
/// It should not be used during scoring or collection.
pub const STORED: SchemaFlagList<StoredFlag, ()> = SchemaFlagList {
head: StoredFlag,
tail: (),
};
#[derive(Clone)]
pub struct IndexedFlag;
/// Flag to mark the field as indexed.
///
/// The `INDEXED` flag can only be used when building `IntOptions` (`u64` and `i64` fields)
/// Of course, text fields can also be indexed... But this is expressed by using either the
/// `STRING` (untokenized) or `TEXT` (tokenized with the english tokenizer) flags.
pub const INDEXED: SchemaFlagList<IndexedFlag, ()> = SchemaFlagList {
head: IndexedFlag,
tail: (),
};
#[derive(Clone)]
pub struct FastFlag;
/// Flag to mark the field as a fast field (similar to Lucene's DocValues)
///
/// Fast fields can be random-accessed rapidly. Fields useful for scoring, filtering
/// or collection should be mark as fast fields.
/// The `FAST` flag can only be used when building `IntOptions` (`u64` and `i64` fields)
pub const FAST: SchemaFlagList<FastFlag, ()> = SchemaFlagList {
head: FastFlag,
tail: (),
};
impl<Head, OldHead, OldTail> BitOr<SchemaFlagList<Head, ()>> for SchemaFlagList<OldHead, OldTail>
where
Head: Clone,
OldHead: Clone,
OldTail: Clone,
{
type Output = SchemaFlagList<Head, SchemaFlagList<OldHead, OldTail>>;
fn bitor(self, head: SchemaFlagList<Head, ()>) -> Self::Output {
SchemaFlagList {
head: head.head,
tail: self.clone(),
}
}
}
impl<T: Clone + Into<IntOptions>> BitOr<IntOptions> for SchemaFlagList<T, ()> {
type Output = IntOptions;
fn bitor(self, rhs: IntOptions) -> Self::Output {
self.head.into() | rhs
}
}
impl<T: Clone + Into<TextOptions>> BitOr<TextOptions> for SchemaFlagList<T, ()> {
type Output = TextOptions;
fn bitor(self, rhs: TextOptions) -> Self::Output {
self.head.into() | rhs
}
}
#[derive(Clone)]
pub struct SchemaFlagList<Head: Clone, Tail: Clone> {
pub head: Head,
pub tail: Tail,
}

View File

@@ -1,4 +1,3 @@
use schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
use std::ops::BitOr; use std::ops::BitOr;
/// Express whether a field is single-value or multi-valued. /// Express whether a field is single-value or multi-valued.
@@ -86,62 +85,41 @@ impl Default for IntOptions {
} }
} }
impl From<()> for IntOptions { /// Shortcut for a u64 fast field.
fn from(_: ()) -> IntOptions { ///
IntOptions::default() /// Such a shortcut can be composed as follows `STORED | FAST | INT_INDEXED`
} pub const FAST: IntOptions = IntOptions {
} indexed: false,
stored: false,
fast: Some(Cardinality::SingleValue),
};
impl From<FastFlag> for IntOptions { /// Shortcut for a u64 indexed field.
fn from(_: FastFlag) -> Self { ///
IntOptions { /// Such a shortcut can be composed as follows `STORED | FAST | INT_INDEXED`
indexed: false, pub const INT_INDEXED: IntOptions = IntOptions {
stored: false, indexed: true,
fast: Some(Cardinality::SingleValue), stored: false,
} fast: None,
} };
}
impl From<StoredFlag> for IntOptions { /// Shortcut for a u64 stored field.
fn from(_: StoredFlag) -> Self { ///
IntOptions { /// Such a shortcut can be composed as follows `STORED | FAST | INT_INDEXED`
indexed: false, pub const INT_STORED: IntOptions = IntOptions {
stored: true, indexed: false,
fast: None, stored: true,
} fast: None,
} };
}
impl From<IndexedFlag> for IntOptions { impl BitOr for IntOptions {
fn from(_: IndexedFlag) -> Self {
IntOptions {
indexed: true,
stored: false,
fast: None,
}
}
}
impl<T: Into<IntOptions>> BitOr<T> for IntOptions {
type Output = IntOptions; type Output = IntOptions;
fn bitor(self, other: T) -> IntOptions { fn bitor(self, other: IntOptions) -> IntOptions {
let mut res = IntOptions::default(); let mut res = IntOptions::default();
let other = other.into();
res.indexed = self.indexed | other.indexed; res.indexed = self.indexed | other.indexed;
res.stored = self.stored | other.stored; res.stored = self.stored | other.stored;
res.fast = self.fast.or(other.fast); res.fast = self.fast.or(other.fast);
res res
} }
} }
impl<Head, Tail> From<SchemaFlagList<Head, Tail>> for IntOptions
where
Head: Clone,
Tail: Clone,
Self: BitOr<Output = Self> + From<Head> + From<Tail>,
{
fn from(head_tail: SchemaFlagList<Head, Tail>) -> Self {
Self::from(head_tail.head) | Self::from(head_tail.tail)
}
}

View File

@@ -33,7 +33,7 @@ let title_options = TextOptions::default()
.set_indexing_options(TextFieldIndexing::default() .set_indexing_options(TextFieldIndexing::default()
.set_tokenizer("default") .set_tokenizer("default")
.set_index_option(IndexRecordOption::WithFreqsAndPositions)); .set_index_option(IndexRecordOption::WithFreqsAndPositions));
schema_builder.add_text_field("title", title_options); schema_builder.add_text_field("title_options", title_options);
let schema = schema_builder.build(); let schema = schema_builder.build();
``` ```
@@ -53,8 +53,23 @@ The effect of each possible setting is described more in detail
On the other hand setting the field as stored or not determines whether the field should be returned On the other hand setting the field as stored or not determines whether the field should be returned
when [`searcher.doc(doc_address)`](../struct.Searcher.html#method.doc) is called. when [`searcher.doc(doc_address)`](../struct.Searcher.html#method.doc) is called.
### Shortcuts
## Setting a u64 or a i64 field For convenience, a few special values of `TextOptions`.
They can be composed using the `|` operator.
The example can be rewritten :
```
use tantivy::schema::*;
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("title_options", TEXT | STORED);
let schema = schema_builder.build();
```
## Setting a u64 field
### Example ### Example
@@ -83,23 +98,6 @@ u64 that are indexed as fast will be stored in a special data structure that wil
make it possible to access the u64 value given the doc id rapidly. This is useful if the value of make it possible to access the u64 value given the doc id rapidly. This is useful if the value of
the field is required during scoring or collection for instance. the field is required during scoring or collection for instance.
### Shortcuts
For convenience, it is possible to define your field indexing options by combining different flags
using the `|` operator.
For instance, a schema containing the two fields defined in the example above could be rewritten :
```
use tantivy::schema::*;
let mut schema_builder = Schema::builder();
schema_builder.add_u64_field("num_stars", INDEXED | STORED);
schema_builder.add_text_field("title", TEXT | STORED);
let schema = schema_builder.build();
```
*/ */
mod document; mod document;
@@ -118,15 +116,13 @@ mod named_field_document;
mod text_options; mod text_options;
mod value; mod value;
mod flags;
pub use self::named_field_document::NamedFieldDocument; pub use self::named_field_document::NamedFieldDocument;
pub use self::schema::DocParsingError; pub use self::schema::DocParsingError;
pub use self::schema::{Schema, SchemaBuilder}; pub use self::schema::{Schema, SchemaBuilder};
pub use self::value::Value; pub use self::value::Value;
pub use self::facet::Facet; pub use self::facet::Facet;
pub(crate) use self::facet::FACET_SEP_BYTE; pub use self::facet::FACET_SEP_BYTE;
pub use self::document::Document; pub use self::document::Document;
pub use self::field::Field; pub use self::field::Field;
@@ -139,12 +135,15 @@ pub use self::field_value::FieldValue;
pub use self::index_record_option::IndexRecordOption; pub use self::index_record_option::IndexRecordOption;
pub use self::text_options::TextFieldIndexing; pub use self::text_options::TextFieldIndexing;
pub use self::text_options::TextOptions; pub use self::text_options::TextOptions;
pub use self::text_options::STORED;
pub use self::text_options::STRING; pub use self::text_options::STRING;
pub use self::text_options::TEXT; pub use self::text_options::TEXT;
pub use self::flags::{FAST, INDEXED, STORED};
pub use self::int_options::Cardinality; pub use self::int_options::Cardinality;
pub use self::int_options::IntOptions; pub use self::int_options::IntOptions;
pub use self::int_options::FAST;
pub use self::int_options::INT_INDEXED;
pub use self::int_options::INT_STORED;
use regex::Regex; use regex::Regex;

View File

@@ -52,13 +52,9 @@ impl SchemaBuilder {
/// by the second one. /// by the second one.
/// The first field will get a field id /// The first field will get a field id
/// but only the second one will be indexed /// but only the second one will be indexed
pub fn add_u64_field<T: Into<IntOptions>>( pub fn add_u64_field(&mut self, field_name_str: &str, field_options: IntOptions) -> Field {
&mut self,
field_name_str: &str,
field_options: T,
) -> Field {
let field_name = String::from(field_name_str); let field_name = String::from(field_name_str);
let field_entry = FieldEntry::new_u64(field_name, field_options.into()); let field_entry = FieldEntry::new_u64(field_name, field_options);
self.add_field(field_entry) self.add_field(field_entry)
} }
@@ -72,35 +68,9 @@ impl SchemaBuilder {
/// by the second one. /// by the second one.
/// The first field will get a field id /// The first field will get a field id
/// but only the second one will be indexed /// but only the second one will be indexed
pub fn add_i64_field<T: Into<IntOptions>>( pub fn add_i64_field(&mut self, field_name_str: &str, field_options: IntOptions) -> Field {
&mut self,
field_name_str: &str,
field_options: T,
) -> Field {
let field_name = String::from(field_name_str); let field_name = String::from(field_name_str);
let field_entry = FieldEntry::new_i64(field_name, field_options.into()); let field_entry = FieldEntry::new_i64(field_name, field_options);
self.add_field(field_entry)
}
/// Adds a new date field.
/// Returns the associated field handle
/// Internally, Tantivy simply stores dates as i64 UTC timestamps,
/// while the user supplies DateTime values for convenience.
///
/// # Caution
///
/// Appending two fields with the same name
/// will result in the shadowing of the first
/// by the second one.
/// The first field will get a field id
/// but only the second one will be indexed
pub fn add_date_field<T: Into<IntOptions>>(
&mut self,
field_name_str: &str,
field_options: T,
) -> Field {
let field_name = String::from(field_name_str);
let field_entry = FieldEntry::new_date(field_name, field_options.into());
self.add_field(field_entry) self.add_field(field_entry)
} }
@@ -114,13 +84,9 @@ impl SchemaBuilder {
/// by the second one. /// by the second one.
/// The first field will get a field id /// The first field will get a field id
/// but only the second one will be indexed /// but only the second one will be indexed
pub fn add_text_field<T: Into<TextOptions>>( pub fn add_text_field(&mut self, field_name_str: &str, field_options: TextOptions) -> Field {
&mut self,
field_name_str: &str,
field_options: T,
) -> Field {
let field_name = String::from(field_name_str); let field_name = String::from(field_name_str);
let field_entry = FieldEntry::new_text(field_name, field_options.into()); let field_entry = FieldEntry::new_text(field_name, field_options);
self.add_field(field_entry) self.add_field(field_entry)
} }
@@ -212,7 +178,15 @@ impl Schema {
SchemaBuilder::default() SchemaBuilder::default()
} }
/// Returns the field option associated with a given name. /// Returns the field options associated with a given name.
///
/// # Panics
/// Panics if the field name does not exist.
/// It is meant as an helper for user who created
/// and control the content of their schema.
///
/// If panicking is not an option for you,
/// you may use `get(&self, field_name: &str)`.
pub fn get_field(&self, field_name: &str) -> Option<Field> { pub fn get_field(&self, field_name: &str) -> Option<Field> {
self.0.fields_map.get(field_name).cloned() self.0.fields_map.get(field_name).cloned()
} }

View File

@@ -5,7 +5,6 @@ use byteorder::{BigEndian, ByteOrder};
use common; use common;
use schema::Facet; use schema::Facet;
use std::str; use std::str;
use DateTime;
/// Size (in bytes) of the buffer of a int field. /// Size (in bytes) of the buffer of a int field.
const INT_TERM_LEN: usize = 4 + 8; const INT_TERM_LEN: usize = 4 + 8;
@@ -31,18 +30,6 @@ impl Term {
Term::from_field_u64(field, val_u64) Term::from_field_u64(field, val_u64)
} }
/// Builds a term given a field, and a DateTime value
///
/// Assuming the term has a field id of 1, and a timestamp i64 value of 3234,
/// the Term will have 8 bytes.
///
/// The first four byte are dedicated to storing the field id as a u64.
/// The 4 following bytes are encoding the DateTime as i64 timestamp value.
pub fn from_field_date(field: Field, val: &DateTime) -> Term {
let val_timestamp = val.timestamp();
Term::from_field_i64(field, val_timestamp)
}
/// Creates a `Term` given a facet. /// Creates a `Term` given a facet.
pub fn from_facet(field: Field, facet: &Facet) -> Term { pub fn from_facet(field: Field, facet: &Facet) -> Term {
let bytes = facet.encoded_str().as_bytes(); let bytes = facet.encoded_str().as_bytes();

View File

@@ -1,5 +1,3 @@
use schema::flags::SchemaFlagList;
use schema::flags::StoredFlag;
use schema::IndexRecordOption; use schema::IndexRecordOption;
use std::borrow::Cow; use std::borrow::Cow;
use std::ops::BitOr; use std::ops::BitOr;
@@ -111,11 +109,19 @@ pub const TEXT: TextOptions = TextOptions {
stored: false, stored: false,
}; };
impl<T: Into<TextOptions>> BitOr<T> for TextOptions { /// A stored fields of a document can be retrieved given its `DocId`.
/// Stored field are stored together and LZ4 compressed.
/// Reading the stored fields of a document is relatively slow.
/// (100 microsecs)
pub const STORED: TextOptions = TextOptions {
indexing: None,
stored: true,
};
impl BitOr for TextOptions {
type Output = TextOptions; type Output = TextOptions;
fn bitor(self, other: T) -> TextOptions { fn bitor(self, other: TextOptions) -> TextOptions {
let other = other.into();
let mut res = TextOptions::default(); let mut res = TextOptions::default();
res.indexing = self.indexing.or(other.indexing); res.indexing = self.indexing.or(other.indexing);
res.stored = self.stored | other.stored; res.stored = self.stored | other.stored;
@@ -123,32 +129,6 @@ impl<T: Into<TextOptions>> BitOr<T> for TextOptions {
} }
} }
impl From<()> for TextOptions {
fn from(_: ()) -> TextOptions {
TextOptions::default()
}
}
impl From<StoredFlag> for TextOptions {
fn from(_: StoredFlag) -> TextOptions {
TextOptions {
indexing: None,
stored: true,
}
}
}
impl<Head, Tail> From<SchemaFlagList<Head, Tail>> for TextOptions
where
Head: Clone,
Tail: Clone,
Self: BitOr<Output = Self> + From<Head> + From<Tail>,
{
fn from(head_tail: SchemaFlagList<Head, Tail>) -> Self {
Self::from(head_tail.head) | Self::from(head_tail.tail)
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use schema::*; use schema::*;

View File

@@ -2,7 +2,6 @@ use schema::Facet;
use serde::de::Visitor; use serde::de::Visitor;
use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::fmt; use std::fmt;
use DateTime;
/// Value represents the value of a any field. /// Value represents the value of a any field.
/// It is an enum over all over all of the possible field type. /// It is an enum over all over all of the possible field type.
@@ -14,8 +13,6 @@ pub enum Value {
U64(u64), U64(u64),
/// Signed 64-bits Integer `i64` /// Signed 64-bits Integer `i64`
I64(i64), I64(i64),
/// Signed 64-bits Date time stamp `date`
Date(DateTime),
/// Hierarchical Facet /// Hierarchical Facet
Facet(Facet), Facet(Facet),
/// Arbitrarily sized byte array /// Arbitrarily sized byte array
@@ -31,7 +28,6 @@ impl Serialize for Value {
Value::Str(ref v) => serializer.serialize_str(v), Value::Str(ref v) => serializer.serialize_str(v),
Value::U64(u) => serializer.serialize_u64(u), Value::U64(u) => serializer.serialize_u64(u),
Value::I64(u) => serializer.serialize_i64(u), Value::I64(u) => serializer.serialize_i64(u),
Value::Date(ref date) => serializer.serialize_i64(date.timestamp()),
Value::Facet(ref facet) => facet.serialize(serializer), Value::Facet(ref facet) => facet.serialize(serializer),
Value::Bytes(ref bytes) => serializer.serialize_bytes(bytes), Value::Bytes(ref bytes) => serializer.serialize_bytes(bytes),
} }
@@ -106,17 +102,6 @@ impl Value {
_ => panic!("This is not a text field."), _ => panic!("This is not a text field."),
} }
} }
/// Returns the Date-value, provided the value is of the `Date` type.
///
/// # Panics
/// If the value is not of type `Date`
pub fn date_value(&self) -> &DateTime {
match *self {
Value::Date(ref value) => value,
_ => panic!("This is not a date field."),
}
}
} }
impl From<String> for Value { impl From<String> for Value {
@@ -137,12 +122,6 @@ impl From<i64> for Value {
} }
} }
impl From<DateTime> for Value {
fn from(date_time: DateTime) -> Value {
Value::Date(date_time)
}
}
impl<'a> From<&'a str> for Value { impl<'a> From<&'a str> for Value {
fn from(s: &'a str) -> Value { fn from(s: &'a str) -> Value {
Value::Str(s.to_string()) Value::Str(s.to_string())
@@ -163,7 +142,6 @@ impl From<Vec<u8>> for Value {
mod binary_serialize { mod binary_serialize {
use super::Value; use super::Value;
use chrono::{TimeZone, Utc};
use common::BinarySerializable; use common::BinarySerializable;
use schema::Facet; use schema::Facet;
use std::io::{self, Read, Write}; use std::io::{self, Read, Write};
@@ -173,7 +151,6 @@ mod binary_serialize {
const I64_CODE: u8 = 2; const I64_CODE: u8 = 2;
const HIERARCHICAL_FACET_CODE: u8 = 3; const HIERARCHICAL_FACET_CODE: u8 = 3;
const BYTES_CODE: u8 = 4; const BYTES_CODE: u8 = 4;
const DATE_CODE: u8 = 5;
impl BinarySerializable for Value { impl BinarySerializable for Value {
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> { fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
@@ -190,10 +167,6 @@ mod binary_serialize {
I64_CODE.serialize(writer)?; I64_CODE.serialize(writer)?;
val.serialize(writer) val.serialize(writer)
} }
Value::Date(ref val) => {
DATE_CODE.serialize(writer)?;
val.timestamp().serialize(writer)
}
Value::Facet(ref facet) => { Value::Facet(ref facet) => {
HIERARCHICAL_FACET_CODE.serialize(writer)?; HIERARCHICAL_FACET_CODE.serialize(writer)?;
facet.serialize(writer) facet.serialize(writer)
@@ -219,10 +192,6 @@ mod binary_serialize {
let value = i64::deserialize(reader)?; let value = i64::deserialize(reader)?;
Ok(Value::I64(value)) Ok(Value::I64(value))
} }
DATE_CODE => {
let timestamp = i64::deserialize(reader)?;
Ok(Value::Date(Utc.timestamp(timestamp, 0)))
}
HIERARCHICAL_FACET_CODE => Ok(Value::Facet(Facet::deserialize(reader)?)), HIERARCHICAL_FACET_CODE => Ok(Value::Facet(Facet::deserialize(reader)?)),
BYTES_CODE => Ok(Value::Bytes(Vec::<u8>::deserialize(reader)?)), BYTES_CODE => Ok(Value::Bytes(Vec::<u8>::deserialize(reader)?)),
_ => Err(io::Error::new( _ => Err(io::Error::new(

View File

@@ -241,8 +241,8 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str)
/// # let query_parser = QueryParser::for_index(&index, vec![text_field]); /// # let query_parser = QueryParser::for_index(&index, vec![text_field]);
/// // ... /// // ...
/// let query = query_parser.parse_query("haleurs flamands").unwrap(); /// let query = query_parser.parse_query("haleurs flamands").unwrap();
/// # let reader = index.reader()?; /// # index.load_searchers()?;
/// # let searcher = reader.searcher(); /// # let searcher = index.searcher();
/// let mut snippet_generator = SnippetGenerator::create(&searcher, &*query, text_field)?; /// let mut snippet_generator = SnippetGenerator::create(&searcher, &*query, text_field)?;
/// snippet_generator.set_max_num_chars(100); /// snippet_generator.set_max_num_chars(100);
/// let snippet = snippet_generator.snippet_from_doc(&doc); /// let snippet = snippet_generator.snippet_from_doc(&doc);
@@ -528,8 +528,9 @@ Survey in 2016, 2017, and 2018."#;
index_writer.add_document(doc!(text_field => "a")); index_writer.add_document(doc!(text_field => "a"));
index_writer.add_document(doc!(text_field => "a b")); index_writer.add_document(doc!(text_field => "a b"));
index_writer.commit().unwrap(); index_writer.commit().unwrap();
index.load_searchers().unwrap();
} }
let searcher = index.reader().unwrap().searcher(); let searcher = index.searcher();
let query_parser = QueryParser::for_index(&index, vec![text_field]); let query_parser = QueryParser::for_index(&index, vec![text_field]);
{ {
let query = query_parser.parse_query("e").unwrap(); let query = query_parser.parse_query("e").unwrap();
@@ -586,7 +587,8 @@ Survey in 2016, 2017, and 2018."#;
} }
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let query_parser = QueryParser::for_index(&index, vec![text_field]); let query_parser = QueryParser::for_index(&index, vec![text_field]);
let query = query_parser.parse_query("rust design").unwrap(); let query = query_parser.parse_query("rust design").unwrap();
let mut snippet_generator = let mut snippet_generator =

View File

@@ -295,7 +295,8 @@ mod test {
use core::Index; use core::Index;
use schema::Field; use schema::Field;
use schema::Schema; use schema::Schema;
use schema::{FAST, INDEXED, STORED, TEXT}; use schema::STORED;
use schema::{FAST, INT_INDEXED, TEXT};
use space_usage::ByteCount; use space_usage::ByteCount;
use space_usage::PerFieldSpaceUsage; use space_usage::PerFieldSpaceUsage;
use Term; use Term;
@@ -304,8 +305,9 @@ mod test {
fn test_empty() { fn test_empty() {
let schema = Schema::builder().build(); let schema = Schema::builder().build();
let index = Index::create_in_ram(schema.clone()); let index = Index::create_in_ram(schema.clone());
let reader = index.reader().unwrap();
let searcher = reader.searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let searcher_space_usage = searcher.space_usage(); let searcher_space_usage = searcher.space_usage();
assert_eq!(0, searcher_space_usage.total()); assert_eq!(0, searcher_space_usage.total());
} }
@@ -330,7 +332,7 @@ mod test {
#[test] #[test]
fn test_fast_indexed() { fn test_fast_indexed() {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let name = schema_builder.add_u64_field("name", FAST | INDEXED); let name = schema_builder.add_u64_field("name", FAST | INT_INDEXED);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone()); let index = Index::create_in_ram(schema.clone());
@@ -343,8 +345,8 @@ mod test {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let searcher_space_usage = searcher.space_usage(); let searcher_space_usage = searcher.space_usage();
assert!(searcher_space_usage.total() > 0); assert!(searcher_space_usage.total() > 0);
assert_eq!(1, searcher_space_usage.segments().len()); assert_eq!(1, searcher_space_usage.segments().len());
@@ -383,8 +385,8 @@ mod test {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher(); let searcher = index.searcher();
let searcher_space_usage = searcher.space_usage(); let searcher_space_usage = searcher.space_usage();
assert!(searcher_space_usage.total() > 0); assert!(searcher_space_usage.total() > 0);
assert_eq!(1, searcher_space_usage.segments().len()); assert_eq!(1, searcher_space_usage.segments().len());
@@ -422,8 +424,9 @@ mod test {
index_writer.add_document(doc!(name => "hello hi goodbye")); index_writer.add_document(doc!(name => "hello hi goodbye"));
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
let reader = index.reader().unwrap();
let searcher = reader.searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let searcher_space_usage = searcher.space_usage(); let searcher_space_usage = searcher.space_usage();
assert!(searcher_space_usage.total() > 0); assert!(searcher_space_usage.total() > 0);
assert_eq!(1, searcher_space_usage.segments().len()); assert_eq!(1, searcher_space_usage.segments().len());
@@ -447,7 +450,7 @@ mod test {
#[test] #[test]
fn test_deletes() { fn test_deletes() {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let name = schema_builder.add_u64_field("name", INDEXED); let name = schema_builder.add_u64_field("name", INT_INDEXED);
let schema = schema_builder.build(); let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone()); let index = Index::create_in_ram(schema.clone());
@@ -469,8 +472,9 @@ mod test {
index_writer2.commit().unwrap(); index_writer2.commit().unwrap();
} }
let reader = index.reader().unwrap(); index.load_searchers().unwrap();
let searcher = reader.searcher();
let searcher = index.searcher();
let searcher_space_usage = searcher.space_usage(); let searcher_space_usage = searcher.space_usage();
assert!(searcher_space_usage.total() > 0); assert!(searcher_space_usage.total() > 0);
assert_eq!(1, searcher_space_usage.segments().len()); assert_eq!(1, searcher_space_usage.segments().len());

View File

@@ -159,7 +159,8 @@ mod tests {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
} }
let searcher = index.reader().unwrap().searcher(); index.load_searchers().unwrap();
let searcher = index.searcher();
let field_searcher = searcher.field(text_field); let field_searcher = searcher.field(text_field);
let mut term_it = field_searcher.terms(); let mut term_it = field_searcher.terms();

View File

@@ -1,9 +1,9 @@
use super::TermDictionary; use super::TermDictionary;
use fst::automaton::AlwaysMatch;
use fst::map::{Stream, StreamBuilder};
use fst::Automaton;
use fst::{IntoStreamer, Streamer};
use postings::TermInfo; use postings::TermInfo;
use tantivy_fst::automaton::AlwaysMatch;
use tantivy_fst::map::{Stream, StreamBuilder};
use tantivy_fst::Automaton;
use tantivy_fst::{IntoStreamer, Streamer};
use termdict::TermOrdinal; use termdict::TermOrdinal;
/// `TermStreamerBuilder` is a helper object used to define /// `TermStreamerBuilder` is a helper object used to define

View File

@@ -3,15 +3,15 @@ use super::{TermStreamer, TermStreamerBuilder};
use common::BinarySerializable; use common::BinarySerializable;
use common::CountingWriter; use common::CountingWriter;
use directory::ReadOnlySource; use directory::ReadOnlySource;
use fst;
use fst::raw::Fst;
use fst::Automaton;
use postings::TermInfo; use postings::TermInfo;
use schema::FieldType; use schema::FieldType;
use std::io::{self, Write}; use std::io::{self, Write};
use tantivy_fst;
use tantivy_fst::raw::Fst;
use tantivy_fst::Automaton;
use termdict::TermOrdinal; use termdict::TermOrdinal;
fn convert_fst_error(e: tantivy_fst::Error) -> io::Error { fn convert_fst_error(e: fst::Error) -> io::Error {
io::Error::new(io::ErrorKind::Other, e) io::Error::new(io::ErrorKind::Other, e)
} }
@@ -19,7 +19,7 @@ fn convert_fst_error(e: tantivy_fst::Error) -> io::Error {
/// ///
/// Inserting must be done in the order of the `keys`. /// Inserting must be done in the order of the `keys`.
pub struct TermDictionaryBuilder<W> { pub struct TermDictionaryBuilder<W> {
fst_builder: tantivy_fst::MapBuilder<W>, fst_builder: fst::MapBuilder<W>,
term_info_store_writer: TermInfoStoreWriter, term_info_store_writer: TermInfoStoreWriter,
term_ord: u64, term_ord: u64,
} }
@@ -30,7 +30,7 @@ where
{ {
/// Creates a new `TermDictionaryBuilder` /// Creates a new `TermDictionaryBuilder`
pub fn create(w: W, _field_type: &FieldType) -> io::Result<Self> { pub fn create(w: W, _field_type: &FieldType) -> io::Result<Self> {
let fst_builder = tantivy_fst::MapBuilder::new(w).map_err(convert_fst_error)?; let fst_builder = fst::MapBuilder::new(w).map_err(convert_fst_error)?;
Ok(TermDictionaryBuilder { Ok(TermDictionaryBuilder {
fst_builder, fst_builder,
term_info_store_writer: TermInfoStoreWriter::new(), term_info_store_writer: TermInfoStoreWriter::new(),
@@ -87,9 +87,17 @@ where
} }
} }
fn open_fst_index(source: ReadOnlySource) -> tantivy_fst::Map<ReadOnlySource> { fn open_fst_index(source: ReadOnlySource) -> fst::Map {
let fst = Fst::new(source).expect("FST data is corrupted"); let fst = match source {
tantivy_fst::Map::from(fst) ReadOnlySource::Anonymous(data) => {
Fst::from_shared_bytes(data.data, data.start, data.len).expect("FST data is corrupted")
}
#[cfg(feature = "mmap")]
ReadOnlySource::Mmap(mmap_readonly) => {
Fst::from_mmap(mmap_readonly).expect("FST data is corrupted")
}
};
fst::Map::from(fst)
} }
/// The term dictionary contains all of the terms in /// The term dictionary contains all of the terms in
@@ -99,7 +107,7 @@ fn open_fst_index(source: ReadOnlySource) -> tantivy_fst::Map<ReadOnlySource> {
/// respective `TermOrdinal`. The `TermInfoStore` then makes it /// respective `TermOrdinal`. The `TermInfoStore` then makes it
/// possible to fetch the associated `TermInfo`. /// possible to fetch the associated `TermInfo`.
pub struct TermDictionary { pub struct TermDictionary {
fst_index: tantivy_fst::Map<ReadOnlySource>, fst_index: fst::Map,
term_info_store: TermInfoStore, term_info_store: TermInfoStore,
} }

View File

@@ -228,27 +228,27 @@ pub mod tests {
fn test_non_en_tokenizer() { fn test_non_en_tokenizer() {
let tokenizer_manager = TokenizerManager::default(); let tokenizer_manager = TokenizerManager::default();
tokenizer_manager.register( tokenizer_manager.register(
"el_stem", "es_stem",
SimpleTokenizer SimpleTokenizer
.filter(RemoveLongFilter::limit(40)) .filter(RemoveLongFilter::limit(40))
.filter(LowerCaser) .filter(LowerCaser)
.filter(Stemmer::new(Language::Greek)), .filter(Stemmer::new(Language::Spanish)),
); );
let en_tokenizer = tokenizer_manager.get("el_stem").unwrap(); let en_tokenizer = tokenizer_manager.get("es_stem").unwrap();
let mut tokens: Vec<Token> = vec![]; let mut tokens: Vec<Token> = vec![];
{ {
let mut add_token = |token: &Token| { let mut add_token = |token: &Token| {
tokens.push(token.clone()); tokens.push(token.clone());
}; };
en_tokenizer en_tokenizer
.token_stream("Καλημέρα, χαρούμενε φορολογούμενε!") .token_stream("Hola, feliz contribuyente!")
.process(&mut add_token); .process(&mut add_token);
} }
assert_eq!(tokens.len(), 3); assert_eq!(tokens.len(), 3);
assert_token(&tokens[0], 0, "καλημερ", 0, 16); assert_token(&tokens[0], 0, "hola", 0, 4);
assert_token(&tokens[1], 1, "χαρουμεν", 18, 36); assert_token(&tokens[1], 1, "feliz", 6, 11);
assert_token(&tokens[2], 2, "φορολογουμεν", 37, 63); assert_token(&tokens[2], 2, "contribuyent", 12, 25);
} }
#[test] #[test]

View File

@@ -2,6 +2,7 @@
use super::{Token, TokenFilter, TokenStream}; use super::{Token, TokenFilter, TokenStream};
use rust_stemmers::{self, Algorithm}; use rust_stemmers::{self, Algorithm};
use std::sync::Arc;
/// Available stemmer languages. /// Available stemmer languages.
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Copy, Clone)] #[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Copy, Clone)]
@@ -14,7 +15,6 @@ pub enum Language {
Finnish, Finnish,
French, French,
German, German,
Greek,
Hungarian, Hungarian,
Italian, Italian,
Portuguese, Portuguese,
@@ -37,7 +37,6 @@ impl Language {
Finnish => Algorithm::Finnish, Finnish => Algorithm::Finnish,
French => Algorithm::French, French => Algorithm::French,
German => Algorithm::German, German => Algorithm::German,
Greek => Algorithm::Greek,
Hungarian => Algorithm::Hungarian, Hungarian => Algorithm::Hungarian,
Italian => Algorithm::Italian, Italian => Algorithm::Italian,
Portuguese => Algorithm::Portuguese, Portuguese => Algorithm::Portuguese,
@@ -56,14 +55,14 @@ impl Language {
/// Tokens are expected to be lowercased beforehand. /// Tokens are expected to be lowercased beforehand.
#[derive(Clone)] #[derive(Clone)]
pub struct Stemmer { pub struct Stemmer {
stemmer_algorithm: Algorithm, stemmer_algorithm: Arc<Algorithm>,
} }
impl Stemmer { impl Stemmer {
/// Creates a new Stemmer `TokenFilter` for a given language algorithm. /// Creates a new Stemmer `TokenFilter` for a given language algorithm.
pub fn new(language: Language) -> Stemmer { pub fn new(language: Language) -> Stemmer {
Stemmer { Stemmer {
stemmer_algorithm: language.algorithm(), stemmer_algorithm: Arc::new(language.algorithm()),
} }
} }
} }
@@ -82,7 +81,7 @@ where
type ResultTokenStream = StemmerTokenStream<TailTokenStream>; type ResultTokenStream = StemmerTokenStream<TailTokenStream>;
fn transform(&self, token_stream: TailTokenStream) -> Self::ResultTokenStream { fn transform(&self, token_stream: TailTokenStream) -> Self::ResultTokenStream {
let inner_stemmer = rust_stemmers::Stemmer::create(self.stemmer_algorithm); let inner_stemmer = rust_stemmers::Stemmer::create(Algorithm::English);
StemmerTokenStream::wrap(inner_stemmer, token_stream) StemmerTokenStream::wrap(inner_stemmer, token_stream)
} }
} }