Issue/1198 (#1201)

* Unit test reproducing #1198
* Fixing unit test to handle the error from add_document.
* Bump project version
This commit is contained in:
Paul Masurel
2021-11-11 16:42:19 +09:00
committed by GitHub
parent fcff91559b
commit 7234bef0eb
66 changed files with 1404 additions and 1191 deletions

View File

@@ -1,6 +1,6 @@
[package]
name = "tantivy"
version = "0.16.1"
version = "0.17.0-dev"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT"
categories = ["database-implementations", "data-structures"]

View File

@@ -96,7 +96,7 @@ fn main() -> tantivy::Result<()> {
);
// ... and add it to the `IndexWriter`.
index_writer.add_document(old_man_doc);
index_writer.add_document(old_man_doc)?;
// For convenience, tantivy also comes with a macro to
// reduce the boilerplate above.
@@ -110,7 +110,7 @@ fn main() -> tantivy::Result<()> {
fresh and green with every spring, carrying in their lower leaf junctures the \
debris of the winters flooding; and sycamores with mottled, white, recumbent \
limbs and branches that arch over the pool"
));
))?;
// Multivalued field just need to be repeated.
index_writer.add_document(doc!(
@@ -120,7 +120,7 @@ fn main() -> tantivy::Result<()> {
enterprise which you have regarded with such evil forebodings. I arrived here \
yesterday, and my first task is to assure my dear sister of my welfare and \
increasing confidence in the success of my undertaking."
));
))?;
// This is an example, so we will only index 3 documents
// here. You can check out tantivy's tutorial to index

View File

@@ -145,23 +145,23 @@ fn main() -> tantivy::Result<()> {
product_description => "While it is ok for short distance travel, this broom \
was designed quiditch. It will up your game.",
price => 30_200u64
));
))?;
index_writer.add_document(doc!(
product_name => "Turbulobroom",
product_description => "You might have heard of this broom before : it is the sponsor of the Wales team.\
You'll enjoy its sharp turns, and rapid acceleration",
price => 29_240u64
));
))?;
index_writer.add_document(doc!(
product_name => "Broomio",
product_description => "Great value for the price. This broom is a market favorite",
price => 21_240u64
));
))?;
index_writer.add_document(doc!(
product_name => "Whack a Mole",
product_description => "Prime quality bat.",
price => 5_200u64
));
))?;
index_writer.commit()?;
let reader = index.reader()?;

View File

@@ -68,7 +68,7 @@ fn main() -> tantivy::Result<()> {
title => "The Old Man and the Sea",
body => "He was an old man who fished alone in a skiff in the Gulf Stream and \
he had gone eighty-four days now without taking a fish."
));
))?;
index_writer.add_document(doc!(
title => "Of Mice and Men",
body => r#"A few miles south of Soledad, the Salinas River drops in close to the hillside
@@ -79,14 +79,14 @@ fn main() -> tantivy::Result<()> {
fresh and green with every spring, carrying in their lower leaf junctures the
debris of the winters flooding; and sycamores with mottled, white, recumbent
limbs and branches that arch over the pool"#
));
))?;
index_writer.add_document(doc!(
title => "Frankenstein",
body => r#"You will rejoice to hear that no disaster has accompanied the commencement of an
enterprise which you have regarded with such evil forebodings. I arrived here
yesterday, and my first task is to assure my dear sister of my welfare and
increasing confidence in the success of my undertaking."#
));
))?;
index_writer.commit()?;
let reader = index.reader()?;

View File

@@ -76,15 +76,15 @@ fn main() -> tantivy::Result<()> {
index_writer.add_document(doc!(
isbn => "978-0099908401",
title => "The old Man and the see"
));
))?;
index_writer.add_document(doc!(
isbn => "978-0140177398",
title => "Of Mice and Men",
));
))?;
index_writer.add_document(doc!(
title => "Frankentein", //< Oops there is a typo here.
isbn => "978-9176370711",
));
))?;
index_writer.commit()?;
let reader = index.reader()?;
@@ -122,7 +122,7 @@ fn main() -> tantivy::Result<()> {
index_writer.add_document(doc!(
title => "Frankenstein",
isbn => "978-9176370711",
));
))?;
// You are guaranteed that your clients will only observe your index in
// the state it was in after a commit.

View File

@@ -35,35 +35,35 @@ fn main() -> tantivy::Result<()> {
index_writer.add_document(doc!(
name => "Cat",
classification => Facet::from("/Felidae/Felinae/Felis")
));
))?;
index_writer.add_document(doc!(
name => "Canada lynx",
classification => Facet::from("/Felidae/Felinae/Lynx")
));
))?;
index_writer.add_document(doc!(
name => "Cheetah",
classification => Facet::from("/Felidae/Felinae/Acinonyx")
));
))?;
index_writer.add_document(doc!(
name => "Tiger",
classification => Facet::from("/Felidae/Pantherinae/Panthera")
));
))?;
index_writer.add_document(doc!(
name => "Lion",
classification => Facet::from("/Felidae/Pantherinae/Panthera")
));
))?;
index_writer.add_document(doc!(
name => "Jaguar",
classification => Facet::from("/Felidae/Pantherinae/Panthera")
));
))?;
index_writer.add_document(doc!(
name => "Sunda clouded leopard",
classification => Facet::from("/Felidae/Pantherinae/Neofelis")
));
))?;
index_writer.add_document(doc!(
name => "Fossa",
classification => Facet::from("/Eupleridae/Cryptoprocta")
));
))?;
index_writer.commit()?;
let reader = index.reader()?;

View File

@@ -20,14 +20,14 @@ fn main() -> tantivy::Result<()> {
title => "Fried egg",
ingredient => Facet::from("/ingredient/egg"),
ingredient => Facet::from("/ingredient/oil"),
));
))?;
index_writer.add_document(doc!(
title => "Scrambled egg",
ingredient => Facet::from("/ingredient/egg"),
ingredient => Facet::from("/ingredient/butter"),
ingredient => Facet::from("/ingredient/milk"),
ingredient => Facet::from("/ingredient/salt"),
));
))?;
index_writer.add_document(doc!(
title => "Egg rolls",
ingredient => Facet::from("/ingredient/egg"),
@@ -36,7 +36,7 @@ fn main() -> tantivy::Result<()> {
ingredient => Facet::from("/ingredient/oil"),
ingredient => Facet::from("/ingredient/tortilla-wrap"),
ingredient => Facet::from("/ingredient/mushroom"),
));
))?;
index_writer.commit()?;
let reader = index.reader()?;

View File

@@ -7,7 +7,7 @@ use tantivy::query::RangeQuery;
use tantivy::schema::{Schema, INDEXED};
use tantivy::{doc, Index, Result};
fn run() -> Result<()> {
fn main() -> Result<()> {
// For the sake of simplicity, this schema will only have 1 field
let mut schema_builder = Schema::builder();
@@ -19,7 +19,7 @@ fn run() -> Result<()> {
{
let mut index_writer = index.writer_with_num_threads(1, 6_000_000)?;
for year in 1950u64..2019u64 {
index_writer.add_document(doc!(year_field => year));
index_writer.add_document(doc!(year_field => year))?;
}
index_writer.commit()?;
// The index will be a range of years
@@ -33,7 +33,3 @@ fn run() -> Result<()> {
assert_eq!(num_60s_books, 10);
Ok(())
}
fn main() {
run().unwrap()
}

View File

@@ -25,9 +25,9 @@ fn main() -> tantivy::Result<()> {
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 50_000_000)?;
index_writer.add_document(doc!(title => "The Old Man and the Sea"));
index_writer.add_document(doc!(title => "Of Mice and Men"));
index_writer.add_document(doc!(title => "The modern Promotheus"));
index_writer.add_document(doc!(title => "The Old Man and the Sea"))?;
index_writer.add_document(doc!(title => "Of Mice and Men"))?;
index_writer.add_document(doc!(title => "The modern Promotheus"))?;
index_writer.commit()?;
let reader = index.reader()?;

View File

@@ -29,7 +29,7 @@ use std::sync::{Arc, RwLock};
use std::thread;
use std::time::Duration;
use tantivy::schema::{Schema, STORED, TEXT};
use tantivy::{doc, Index, IndexWriter, Opstamp};
use tantivy::{doc, Index, IndexWriter, Opstamp, TantivyError};
fn main() -> tantivy::Result<()> {
// # Defining the schema
@@ -59,10 +59,11 @@ fn main() -> tantivy::Result<()> {
fresh and green with every spring, carrying in their lower leaf junctures the \
debris of the winters flooding; and sycamores with mottled, white, recumbent \
limbs and branches that arch over the pool"
));
))?;
println!("add doc {} from thread 1 - opstamp {}", i, opstamp);
thread::sleep(Duration::from_millis(20));
}
Result::<(), TantivyError>::Ok(())
});
// # Second indexing thread.
@@ -78,11 +79,12 @@ fn main() -> tantivy::Result<()> {
index_writer_rlock.add_document(doc!(
title => "Manufacturing consent",
body => "Some great book description..."
))
))?
};
println!("add doc {} from thread 2 - opstamp {}", i, opstamp);
thread::sleep(Duration::from_millis(10));
}
Result::<(), TantivyError>::Ok(())
});
// # In the main thread, we commit 10 times, once every 500ms.
@@ -90,7 +92,7 @@ fn main() -> tantivy::Result<()> {
let opstamp: Opstamp = {
// Committing or rollbacking on the other hand requires write lock. This will block other threads.
let mut index_writer_wlock = index_writer.write().unwrap();
index_writer_wlock.commit().unwrap()
index_writer_wlock.commit()?
};
println!("committed with opstamp {}", opstamp);
thread::sleep(Duration::from_millis(500));

View File

@@ -68,7 +68,7 @@ fn main() -> tantivy::Result<()> {
let old_man_doc = doc!(title => title_tok, body => body_tok);
// ... now let's just add it to the IndexWriter
index_writer.add_document(old_man_doc);
index_writer.add_document(old_man_doc)?;
// Pretokenized text can also be fed as JSON
let short_man_json = r#"{
@@ -84,7 +84,7 @@ fn main() -> tantivy::Result<()> {
let short_man_doc = schema.parse_document(short_man_json)?;
index_writer.add_document(short_man_doc);
index_writer.add_document(short_man_doc)?;
// Let's commit changes
index_writer.commit()?;
@@ -106,9 +106,7 @@ fn main() -> tantivy::Result<()> {
IndexRecordOption::Basic,
);
let (top_docs, count) = searcher
.search(&query, &(TopDocs::with_limit(2), Count))
.unwrap();
let (top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count))?;
assert_eq!(count, 2);
@@ -129,9 +127,7 @@ fn main() -> tantivy::Result<()> {
IndexRecordOption::Basic,
);
let (_top_docs, count) = searcher
.search(&query, &(TopDocs::with_limit(2), Count))
.unwrap();
let (_top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count))?;
assert_eq!(count, 0);

View File

@@ -40,7 +40,7 @@ fn main() -> tantivy::Result<()> {
fresh and green with every spring, carrying in their lower leaf junctures the \
debris of the winters flooding; and sycamores with mottled, white, recumbent \
limbs and branches that arch over the pool"
));
))?;
// ...
index_writer.commit()?;

View File

@@ -68,7 +68,7 @@ fn main() -> tantivy::Result<()> {
title => "The Old Man and the Sea",
body => "He was an old man who fished alone in a skiff in the Gulf Stream and \
he had gone eighty-four days now without taking a fish."
));
))?;
index_writer.add_document(doc!(
title => "Of Mice and Men",
@@ -80,7 +80,7 @@ fn main() -> tantivy::Result<()> {
fresh and green with every spring, carrying in their lower leaf junctures the \
debris of the winters flooding; and sycamores with mottled, white, recumbent \
limbs and branches that arch over the pool"
));
))?;
index_writer.add_document(doc!(
title => "Frankenstein",
@@ -88,7 +88,7 @@ fn main() -> tantivy::Result<()> {
enterprise which you have regarded with such evil forebodings. I arrived here \
yesterday, and my first task is to assure my dear sister of my welfare and \
increasing confidence in the success of my undertaking."
));
))?;
index_writer.commit()?;

View File

@@ -20,10 +20,10 @@ use crate::SegmentReader;
/// let index = Index::create_in_ram(schema);
///
/// let mut index_writer = index.writer(3_000_000).unwrap();
/// index_writer.add_document(doc!(title => "The Name of the Wind"));
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
/// index_writer.add_document(doc!(title => "The Name of the Wind")).unwrap();
/// index_writer.add_document(doc!(title => "The Diary of Muadib")).unwrap();
/// index_writer.add_document(doc!(title => "A Dairy Cow")).unwrap();
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl")).unwrap();
/// assert!(index_writer.commit().is_ok());
///
/// let reader = index.reader().unwrap();

View File

@@ -103,23 +103,23 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// title => "The Name of the Wind",
/// facet => Facet::from("/lang/en"),
/// facet => Facet::from("/category/fiction/fantasy")
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "Dune",
/// facet => Facet::from("/lang/en"),
/// facet => Facet::from("/category/fiction/sci-fi")
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "La Vénus d'Ille",
/// facet => Facet::from("/lang/fr"),
/// facet => Facet::from("/category/fiction/fantasy"),
/// facet => Facet::from("/category/fiction/horror")
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "The Diary of a Young Girl",
/// facet => Facet::from("/lang/en"),
/// facet => Facet::from("/category/biography")
/// ));
/// ))?;
/// index_writer.commit()?;
/// }
/// let reader = index.reader()?;
@@ -470,13 +470,13 @@ mod tests {
use std::iter;
#[test]
fn test_facet_collector_drilldown() {
fn test_facet_collector_drilldown() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
let num_facets: usize = 3 * 4 * 5;
let facets: Vec<Facet> = (0..num_facets)
.map(|mut n| {
@@ -491,14 +491,14 @@ mod tests {
for i in 0..num_facets * 10 {
let mut doc = Document::new();
doc.add_facet(facet_field, facets[i % num_facets].clone());
index_writer.add_document(doc);
index_writer.add_document(doc)?;
}
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
let mut facet_collector = FacetCollector::for_field(facet_field);
facet_collector.add_facet(Facet::from("/top1"));
let counts = searcher.search(&AllQuery, &facet_collector).unwrap();
let counts = searcher.search(&AllQuery, &facet_collector)?;
{
let facets: Vec<(String, u64)> = counts
@@ -518,6 +518,7 @@ mod tests {
.collect::<Vec<_>>()
);
}
Ok(())
}
#[test]
@@ -530,27 +531,28 @@ mod tests {
}
#[test]
fn test_doc_unsorted_multifacet() {
fn test_doc_unsorted_multifacet() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facets", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(
facet_field => Facet::from_text(&"/subjects/A/a").unwrap(),
facet_field => Facet::from_text(&"/subjects/B/a").unwrap(),
facet_field => Facet::from_text(&"/subjects/A/b").unwrap(),
facet_field => Facet::from_text(&"/subjects/B/b").unwrap(),
));
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
))?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
assert_eq!(searcher.num_docs(), 1);
let mut facet_collector = FacetCollector::for_field(facet_field);
facet_collector.add_facet("/subjects");
let counts = searcher.search(&AllQuery, &facet_collector).unwrap();
let counts = searcher.search(&AllQuery, &facet_collector)?;
let facets: Vec<(&Facet, u64)> = counts.get("/subjects").collect();
assert_eq!(facets[0].1, 1);
Ok(())
}
#[test]
@@ -562,16 +564,16 @@ mod tests {
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(
facet_field => Facet::from_text(&"/A/A").unwrap(),
));
))?;
index_writer.add_document(doc!(
facet_field => Facet::from_text(&"/A/B").unwrap(),
));
))?;
index_writer.add_document(doc!(
facet_field => Facet::from_text(&"/A/C/A").unwrap(),
));
))?;
index_writer.add_document(doc!(
facet_field => Facet::from_text(&"/D/C/A").unwrap(),
));
))?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
@@ -637,7 +639,7 @@ mod tests {
let mut index_writer = index.writer_for_tests().unwrap();
for doc in docs {
index_writer.add_document(doc);
index_writer.add_document(doc).unwrap();
}
index_writer.commit().unwrap();
let searcher = index.reader().unwrap().searcher();
@@ -677,7 +679,7 @@ mod tests {
let mut index_writer = index.writer_for_tests()?;
for doc in docs {
index_writer.add_document(doc);
index_writer.add_document(doc)?;
}
index_writer.commit()?;

View File

@@ -25,34 +25,37 @@ use crate::{Score, SegmentReader, TantivyError};
/// use tantivy::schema::{Schema, TEXT, INDEXED, FAST};
/// use tantivy::{doc, DocAddress, Index};
///
/// # fn main() -> tantivy::Result<()> {
/// let mut schema_builder = Schema::builder();
/// let title = schema_builder.add_text_field("title", TEXT);
/// let price = schema_builder.add_u64_field("price", INDEXED | FAST);
/// let schema = schema_builder.build();
/// let index = Index::create_in_ram(schema);
///
/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
/// index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64));
/// index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64));
/// index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64));
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64));
/// assert!(index_writer.commit().is_ok());
/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
/// index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64))?;
/// index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64))?;
/// index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64))?;
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64))?;
/// index_writer.commit()?;
///
/// let reader = index.reader().unwrap();
/// let reader = index.reader()?;
/// let searcher = reader.searcher();
///
/// let query_parser = QueryParser::for_index(&index, vec![title]);
/// let query = query_parser.parse_query("diary").unwrap();
/// let query = query_parser.parse_query("diary")?;
/// let no_filter_collector = FilterCollector::new(price, &|value: u64| value > 20_120u64, TopDocs::with_limit(2));
/// let top_docs = searcher.search(&query, &no_filter_collector).unwrap();
/// let top_docs = searcher.search(&query, &no_filter_collector)?;
///
/// assert_eq!(top_docs.len(), 1);
/// assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
///
/// let filter_all_collector: FilterCollector<_, _, u64> = FilterCollector::new(price, &|value| value < 5u64, TopDocs::with_limit(2));
/// let filtered_top_docs = searcher.search(&query, &filter_all_collector).unwrap();
/// let filtered_top_docs = searcher.search(&query, &filter_all_collector)?;
///
/// assert_eq!(filtered_top_docs.len(), 0);
/// # Ok(())
/// # }
/// ```
pub struct FilterCollector<TCollector, TPredicate, TPredicateValue: FastValue>
where

View File

@@ -226,10 +226,10 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
writer.add_document(doc!(val_field=>12i64));
writer.add_document(doc!(val_field=>-30i64));
writer.add_document(doc!(val_field=>-12i64));
writer.add_document(doc!(val_field=>-10i64));
writer.add_document(doc!(val_field=>12i64))?;
writer.add_document(doc!(val_field=>-30i64))?;
writer.add_document(doc!(val_field=>-12i64))?;
writer.add_document(doc!(val_field=>-10i64))?;
writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
@@ -247,13 +247,13 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
writer.add_document(doc!(val_field=>12i64));
writer.add_document(doc!(val_field=>12i64))?;
writer.commit()?;
writer.add_document(doc!(val_field=>-30i64));
writer.add_document(doc!(val_field=>-30i64))?;
writer.commit()?;
writer.add_document(doc!(val_field=>-12i64));
writer.add_document(doc!(val_field=>-12i64))?;
writer.commit()?;
writer.add_document(doc!(val_field=>-10i64));
writer.add_document(doc!(val_field=>-10i64))?;
writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
@@ -271,9 +271,9 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
writer.add_document(doc!(date_field=>Utc.ymd(1982, 9, 17).and_hms(0, 0,0)));
writer.add_document(doc!(date_field=>Utc.ymd(1986, 3, 9).and_hms(0, 0, 0)));
writer.add_document(doc!(date_field=>Utc.ymd(1983, 9, 27).and_hms(0, 0, 0)));
writer.add_document(doc!(date_field=>Utc.ymd(1982, 9, 17).and_hms(0, 0,0)))?;
writer.add_document(doc!(date_field=>Utc.ymd(1986, 3, 9).and_hms(0, 0, 0)))?;
writer.add_document(doc!(date_field=>Utc.ymd(1983, 9, 27).and_hms(0, 0, 0)))?;
writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();

View File

@@ -48,10 +48,10 @@ use tantivy::collector::{Count, TopDocs};
# let mut index_writer = index.writer(3_000_000)?;
# index_writer.add_document(doc!(
# title => "The Name of the Wind",
# ));
# ))?;
# index_writer.add_document(doc!(
# title => "The Diary of Muadib",
# ));
# ))?;
# index_writer.commit()?;
# let reader = index.reader()?;
# let searcher = reader.searcher();

View File

@@ -112,19 +112,19 @@ impl<TFruit: Fruit> FruitHandle<TFruit> {
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{doc, Index};
///
/// # fn main() -> tantivy::Result<()> {
/// let mut schema_builder = Schema::builder();
/// let title = schema_builder.add_text_field("title", TEXT);
/// let schema = schema_builder.build();
/// let index = Index::create_in_ram(schema);
/// let mut index_writer = index.writer(3_000_000)?;
/// index_writer.add_document(doc!(title => "The Name of the Wind"))?;
/// index_writer.add_document(doc!(title => "The Diary of Muadib"))?;
/// index_writer.add_document(doc!(title => "A Dairy Cow"))?;
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"))?;
/// index_writer.commit()?;
///
/// let mut index_writer = index.writer(3_000_000).unwrap();
/// index_writer.add_document(doc!(title => "The Name of the Wind"));
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
/// assert!(index_writer.commit().is_ok());
///
/// let reader = index.reader().unwrap();
/// let reader = index.reader()?;
/// let searcher = reader.searcher();
///
/// let mut collectors = MultiCollector::new();
@@ -139,6 +139,8 @@ impl<TFruit: Fruit> FruitHandle<TFruit> {
///
/// assert_eq!(count, 2);
/// assert_eq!(top_docs.len(), 2);
/// # Ok(())
/// # }
/// ```
#[allow(clippy::type_complexity)]
#[derive(Default)]
@@ -252,24 +254,24 @@ mod tests {
use crate::Term;
#[test]
fn test_multi_collector() {
fn test_multi_collector() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text=>"abc"));
index_writer.add_document(doc!(text=>"abc abc abc"));
index_writer.add_document(doc!(text=>"abc abc"));
index_writer.commit().unwrap();
index_writer.add_document(doc!(text=>""));
index_writer.add_document(doc!(text=>"abc abc abc abc"));
index_writer.add_document(doc!(text=>"abc"));
index_writer.commit().unwrap();
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text=>"abc"))?;
index_writer.add_document(doc!(text=>"abc abc abc"))?;
index_writer.add_document(doc!(text=>"abc abc"))?;
index_writer.commit()?;
index_writer.add_document(doc!(text=>""))?;
index_writer.add_document(doc!(text=>"abc abc abc abc"))?;
index_writer.add_document(doc!(text=>"abc"))?;
index_writer.commit()?;
}
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let term = Term::from_field_text(text, "abc");
let query = TermQuery::new(term, IndexRecordOption::Basic);
@@ -280,5 +282,6 @@ mod tests {
assert_eq!(count_handler.extract(&mut multifruits), 5);
assert_eq!(topdocs_handler.extract(&mut multifruits).len(), 2);
Ok(())
}
}

View File

@@ -25,7 +25,7 @@ pub const TEST_COLLECTOR_WITHOUT_SCORE: TestCollector = TestCollector {
};
#[test]
pub fn test_filter_collector() {
pub fn test_filter_collector() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let title = schema_builder.add_text_field("title", TEXT);
let price = schema_builder.add_u64_field("price", FAST);
@@ -33,25 +33,25 @@ pub fn test_filter_collector() {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::from_str("1898-04-09T00:00:00+00:00").unwrap()));
index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::from_str("2020-04-09T00:00:00+00:00").unwrap()));
index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::from_str("2019-04-20T00:00:00+00:00").unwrap()));
index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()));
index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::from_str("2018-04-09T00:00:00+00:00").unwrap()));
assert!(index_writer.commit().is_ok());
let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::from_str("1898-04-09T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::from_str("2020-04-09T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::from_str("2019-04-20T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::from_str("2018-04-09T00:00:00+00:00").unwrap()))?;
index_writer.commit()?;
let reader = index.reader().unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
let query_parser = QueryParser::for_index(&index, vec![title]);
let query = query_parser.parse_query("diary").unwrap();
let query = query_parser.parse_query("diary")?;
let filter_some_collector = FilterCollector::new(
price,
&|value: u64| value > 20_120u64,
TopDocs::with_limit(2),
);
let top_docs = searcher.search(&query, &filter_some_collector).unwrap();
let top_docs = searcher.search(&query, &filter_some_collector)?;
assert_eq!(top_docs.len(), 1);
assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
@@ -67,9 +67,10 @@ pub fn test_filter_collector() {
}
let filter_dates_collector = FilterCollector::new(date, &date_filter, TopDocs::with_limit(5));
let filtered_date_docs = searcher.search(&query, &filter_dates_collector).unwrap();
let filtered_date_docs = searcher.search(&query, &filter_dates_collector)?;
assert_eq!(filtered_date_docs.len(), 2);
Ok(())
}
/// Stores all of the doc ids.
@@ -274,8 +275,8 @@ fn make_test_searcher() -> crate::Result<crate::LeasedItem<Searcher>> {
let schema = Schema::builder().build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(Document::default());
index_writer.add_document(Document::default());
index_writer.add_document(Document::default())?;
index_writer.add_document(Document::default())?;
index_writer.commit()?;
Ok(index.reader()?.searcher())
}

View File

@@ -94,27 +94,30 @@ where
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{doc, DocAddress, Index};
///
/// # fn main() -> tantivy::Result<()> {
/// let mut schema_builder = Schema::builder();
/// let title = schema_builder.add_text_field("title", TEXT);
/// let schema = schema_builder.build();
/// let index = Index::create_in_ram(schema);
///
/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
/// index_writer.add_document(doc!(title => "The Name of the Wind"));
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
/// assert!(index_writer.commit().is_ok());
/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
/// index_writer.add_document(doc!(title => "The Name of the Wind"))?;
/// index_writer.add_document(doc!(title => "The Diary of Muadib"))?;
/// index_writer.add_document(doc!(title => "A Dairy Cow"))?;
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"))?;
/// index_writer.commit()?;
///
/// let reader = index.reader().unwrap();
/// let reader = index.reader()?;
/// let searcher = reader.searcher();
///
/// let query_parser = QueryParser::for_index(&index, vec![title]);
/// let query = query_parser.parse_query("diary").unwrap();
/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2)).unwrap();
/// let query = query_parser.parse_query("diary")?;
/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2))?;
///
/// assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
/// assert_eq!(top_docs[1].1, DocAddress::new(0, 3));
/// # Ok(())
/// # }
/// ```
pub struct TopDocs(TopCollector<Score>);
@@ -180,29 +183,32 @@ impl TopDocs {
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{doc, DocAddress, Index};
///
/// # fn main() -> tantivy::Result<()> {
/// let mut schema_builder = Schema::builder();
/// let title = schema_builder.add_text_field("title", TEXT);
/// let schema = schema_builder.build();
/// let index = Index::create_in_ram(schema);
///
/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
/// index_writer.add_document(doc!(title => "The Name of the Wind"));
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
/// index_writer.add_document(doc!(title => "The Diary of Lena Mukhina"));
/// assert!(index_writer.commit().is_ok());
/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
/// index_writer.add_document(doc!(title => "The Name of the Wind"))?;
/// index_writer.add_document(doc!(title => "The Diary of Muadib"))?;
/// index_writer.add_document(doc!(title => "A Dairy Cow"))?;
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"))?;
/// index_writer.add_document(doc!(title => "The Diary of Lena Mukhina"))?;
/// index_writer.commit()?;
///
/// let reader = index.reader().unwrap();
/// let reader = index.reader()?;
/// let searcher = reader.searcher();
///
/// let query_parser = QueryParser::for_index(&index, vec![title]);
/// let query = query_parser.parse_query("diary").unwrap();
/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2).and_offset(1)).unwrap();
/// let query = query_parser.parse_query("diary")?;
/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2).and_offset(1))?;
///
/// assert_eq!(top_docs.len(), 2);
/// assert_eq!(top_docs[0].1, DocAddress::new(0, 4));
/// assert_eq!(top_docs[1].1, DocAddress::new(0, 3));
/// Ok(())
/// # }
/// ```
pub fn and_offset(self, offset: usize) -> TopDocs {
TopDocs(self.0.and_offset(offset))
@@ -234,11 +240,11 @@ impl TopDocs {
/// #
/// # let index = Index::create_in_ram(schema);
/// # let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
/// # index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64));
/// # index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64));
/// # index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64));
/// # index_writer.add_document(doc!(title => "The Diary of a Young Girl", rating => 80u64));
/// # assert!(index_writer.commit().is_ok());
/// # index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64))?;
/// # index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64))?;
/// # index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64))?;
/// # index_writer.add_document(doc!(title => "The Diary of a Young Girl", rating => 80u64))?;
/// # index_writer.commit()?;
/// # let reader = index.reader()?;
/// # let query = QueryParser::for_index(&index, vec![title]).parse_query("diary")?;
/// # let top_docs = docs_sorted_by_rating(&reader.searcher(), &query, rating)?;
@@ -316,9 +322,9 @@ impl TopDocs {
/// #
/// # let index = Index::create_in_ram(schema);
/// # let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
/// # index_writer.add_document(doc!(title => "MadCow Inc.", rating => 92_000_000i64));
/// # index_writer.add_document(doc!(title => "Zozo Cow KKK", rating => 119_000_000i64));
/// # index_writer.add_document(doc!(title => "Declining Cow", rating => -63_000_000i64));
/// # index_writer.add_document(doc!(title => "MadCow Inc.", rating => 92_000_000i64))?;
/// # index_writer.add_document(doc!(title => "Zozo Cow KKK", rating => 119_000_000i64))?;
/// # index_writer.add_document(doc!(title => "Declining Cow", rating => -63_000_000i64))?;
/// # assert!(index_writer.commit().is_ok());
/// # let reader = index.reader()?;
/// # let top_docs = docs_sorted_by_revenue(&reader.searcher(), &AllQuery, rating)?;
@@ -417,9 +423,9 @@ impl TopDocs {
/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
/// let product_name = index.schema().get_field("product_name").unwrap();
/// let popularity: Field = index.schema().get_field("popularity").unwrap();
/// index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64));
/// index_writer.add_document(doc!(product_name => "A Dairy Cow", popularity => 10u64));
/// index_writer.add_document(doc!(product_name => "The Diary of a Young Girl", popularity => 15u64));
/// index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64))?;
/// index_writer.add_document(doc!(product_name => "A Dairy Cow", popularity => 10u64))?;
/// index_writer.add_document(doc!(product_name => "The Diary of a Young Girl", popularity => 15u64))?;
/// index_writer.commit()?;
/// Ok(index)
/// }
@@ -527,9 +533,9 @@ impl TopDocs {
/// #
/// let popularity: Field = index.schema().get_field("popularity").unwrap();
/// let boosted: Field = index.schema().get_field("boosted").unwrap();
/// # index_writer.add_document(doc!(boosted=>1u64, product_name => "The Diary of Muadib", popularity => 1u64));
/// # index_writer.add_document(doc!(boosted=>0u64, product_name => "A Dairy Cow", popularity => 10u64));
/// # index_writer.add_document(doc!(boosted=>0u64, product_name => "The Diary of a Young Girl", popularity => 15u64));
/// # index_writer.add_document(doc!(boosted=>1u64, product_name => "The Diary of Muadib", popularity => 1u64))?;
/// # index_writer.add_document(doc!(boosted=>0u64, product_name => "A Dairy Cow", popularity => 10u64))?;
/// # index_writer.add_document(doc!(boosted=>0u64, product_name => "The Diary of a Young Girl", popularity => 15u64))?;
/// # index_writer.commit()?;
/// // ...
/// # let user_query = "diary";
@@ -713,20 +719,18 @@ mod tests {
use crate::Score;
use crate::{DocAddress, DocId, SegmentReader};
fn make_index() -> Index {
fn make_index() -> crate::Result<Index> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
// writing the segment
let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
index_writer.add_document(doc!(text_field=>"Hello happy tax payer."));
index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer"));
index_writer.add_document(doc!(text_field=>"I like Droopy"));
assert!(index_writer.commit().is_ok());
}
index
// writing the segment
let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
index_writer.add_document(doc!(text_field=>"Hello happy tax payer."))?;
index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer"))?;
index_writer.add_document(doc!(text_field=>"I like Droopy"))?;
index_writer.commit()?;
Ok(index)
}
fn assert_results_equals(results: &[(Score, DocAddress)], expected: &[(Score, DocAddress)]) {
@@ -737,17 +741,15 @@ mod tests {
}
#[test]
fn test_top_collector_not_at_capacity_without_offset() {
let index = make_index();
fn test_top_collector_not_at_capacity_without_offset() -> crate::Result<()> {
let index = make_index()?;
let field = index.schema().get_field("text").unwrap();
let query_parser = QueryParser::for_index(&index, vec![field]);
let text_query = query_parser.parse_query("droopy tax").unwrap();
let text_query = query_parser.parse_query("droopy tax")?;
let score_docs: Vec<(Score, DocAddress)> = index
.reader()
.unwrap()
.reader()?
.searcher()
.search(&text_query, &TopDocs::with_limit(4))
.unwrap();
.search(&text_query, &TopDocs::with_limit(4))?;
assert_results_equals(
&score_docs,
&[
@@ -756,11 +758,12 @@ mod tests {
(0.48527452, DocAddress::new(0, 0)),
],
);
Ok(())
}
#[test]
fn test_top_collector_not_at_capacity_with_offset() {
let index = make_index();
let index = make_index().unwrap();
let field = index.schema().get_field("text").unwrap();
let query_parser = QueryParser::for_index(&index, vec![field]);
let text_query = query_parser.parse_query("droopy tax").unwrap();
@@ -775,7 +778,7 @@ mod tests {
#[test]
fn test_top_collector_at_capacity() {
let index = make_index();
let index = make_index().unwrap();
let field = index.schema().get_field("text").unwrap();
let query_parser = QueryParser::for_index(&index, vec![field]);
let text_query = query_parser.parse_query("droopy tax").unwrap();
@@ -796,7 +799,7 @@ mod tests {
#[test]
fn test_top_collector_at_capacity_with_offset() {
let index = make_index();
let index = make_index().unwrap();
let field = index.schema().get_field("text").unwrap();
let query_parser = QueryParser::for_index(&index, vec![field]);
let text_query = query_parser.parse_query("droopy tax").unwrap();
@@ -817,7 +820,7 @@ mod tests {
#[test]
fn test_top_collector_stable_sorting() {
let index = make_index();
let index = make_index().unwrap();
// using AllQuery to get a constant score
let searcher = index.reader().unwrap().searcher();
@@ -848,29 +851,35 @@ mod tests {
const SIZE: &str = "size";
#[test]
fn test_top_field_collector_not_at_capacity() {
fn test_top_field_collector_not_at_capacity() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let title = schema_builder.add_text_field(TITLE, TEXT);
let size = schema_builder.add_u64_field(SIZE, FAST);
let schema = schema_builder.build();
let (index, query) = index("beer", title, schema, |index_writer| {
index_writer.add_document(doc!(
title => "bottle of beer",
size => 12u64,
));
index_writer.add_document(doc!(
title => "growler of beer",
size => 64u64,
));
index_writer.add_document(doc!(
title => "pint of beer",
size => 16u64,
));
index_writer
.add_document(doc!(
title => "bottle of beer",
size => 12u64,
))
.unwrap();
index_writer
.add_document(doc!(
title => "growler of beer",
size => 64u64,
))
.unwrap();
index_writer
.add_document(doc!(
title => "pint of beer",
size => 16u64,
))
.unwrap();
});
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let top_collector = TopDocs::with_limit(4).order_by_u64_field(size);
let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector).unwrap();
let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector)?;
assert_eq!(
&top_docs[..],
&[
@@ -879,6 +888,7 @@ mod tests {
(12, DocAddress::new(0, 0))
]
);
Ok(())
}
#[test]
@@ -894,12 +904,12 @@ mod tests {
index_writer.add_document(doc!(
name => "Paul Robeson",
birthday => pr_birthday
));
))?;
let mr_birthday = crate::DateTime::from_str("1947-11-08T00:00:00+00:00")?;
index_writer.add_document(doc!(
name => "Minnie Riperton",
birthday => mr_birthday
));
))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let top_collector = TopDocs::with_limit(3).order_by_fast_field(birthday);
@@ -926,11 +936,11 @@ mod tests {
index_writer.add_document(doc!(
city => "georgetown",
altitude => -1i64,
));
))?;
index_writer.add_document(doc!(
city => "tokyo",
altitude => 40i64,
));
))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let top_collector = TopDocs::with_limit(3).order_by_fast_field(altitude);
@@ -956,11 +966,11 @@ mod tests {
index_writer.add_document(doc!(
city => "georgetown",
altitude => -1.0f64,
));
))?;
index_writer.add_document(doc!(
city => "tokyo",
altitude => 40f64,
));
))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let top_collector = TopDocs::with_limit(3).order_by_fast_field(altitude);
@@ -983,10 +993,12 @@ mod tests {
let size = schema_builder.add_u64_field(SIZE, FAST);
let schema = schema_builder.build();
let (index, _) = index("beer", title, schema, |index_writer| {
index_writer.add_document(doc!(
title => "bottle of beer",
size => 12u64,
));
index_writer
.add_document(doc!(
title => "bottle of beer",
size => 12u64,
))
.unwrap();
});
let searcher = index.reader().unwrap().searcher();
let top_collector = TopDocs::with_limit(4).order_by_u64_field(Field::from_field_id(2));
@@ -1003,7 +1015,7 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(size=>1u64));
index_writer.add_document(doc!(size=>1u64))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let segment = searcher.segment_reader(0);
@@ -1020,7 +1032,7 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(size=>1u64));
index_writer.add_document(doc!(size=>1u64))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let segment = searcher.segment_reader(0);
@@ -1033,30 +1045,26 @@ mod tests {
}
#[test]
fn test_tweak_score_top_collector_with_offset() {
let index = make_index();
fn test_tweak_score_top_collector_with_offset() -> crate::Result<()> {
let index = make_index()?;
let field = index.schema().get_field("text").unwrap();
let query_parser = QueryParser::for_index(&index, vec![field]);
let text_query = query_parser.parse_query("droopy tax").unwrap();
let text_query = query_parser.parse_query("droopy tax")?;
let collector = TopDocs::with_limit(2).and_offset(1).tweak_score(
move |_segment_reader: &SegmentReader| move |doc: DocId, _original_score: Score| doc,
);
let score_docs: Vec<(u32, DocAddress)> = index
.reader()
.unwrap()
.searcher()
.search(&text_query, &collector)
.unwrap();
let score_docs: Vec<(u32, DocAddress)> =
index.reader()?.searcher().search(&text_query, &collector)?;
assert_eq!(
score_docs,
vec![(1, DocAddress::new(0, 1)), (0, DocAddress::new(0, 0)),]
);
Ok(())
}
#[test]
fn test_custom_score_top_collector_with_offset() {
let index = make_index();
let index = make_index().unwrap();
let field = index.schema().get_field("text").unwrap();
let query_parser = QueryParser::for_index(&index, vec![field]);
let text_query = query_parser.parse_query("droopy tax").unwrap();

View File

@@ -657,7 +657,7 @@ mod tests {
}
#[test]
fn test_index_on_commit_reload_policy() {
fn test_index_on_commit_reload_policy() -> crate::Result<()> {
let schema = throw_away_schema();
let field = schema.get_field("num_likes").unwrap();
let index = Index::create_in_ram(schema);
@@ -667,7 +667,7 @@ mod tests {
.try_into()
.unwrap();
assert_eq!(reader.searcher().num_docs(), 0);
test_index_on_commit_reload_policy_aux(field, &index, &reader);
test_index_on_commit_reload_policy_aux(field, &index, &reader)
}
#[cfg(feature = "mmap")]
@@ -679,7 +679,7 @@ mod tests {
use tempfile::TempDir;
#[test]
fn test_index_on_commit_reload_policy_mmap() {
fn test_index_on_commit_reload_policy_mmap() -> crate::Result<()> {
let schema = throw_away_schema();
let field = schema.get_field("num_likes").unwrap();
let tempdir = TempDir::new().unwrap();
@@ -691,7 +691,7 @@ mod tests {
.try_into()
.unwrap();
assert_eq!(reader.searcher().num_docs(), 0);
test_index_on_commit_reload_policy_aux(field, &index, &reader);
test_index_on_commit_reload_policy_aux(field, &index, &reader)
}
#[test]
@@ -706,7 +706,7 @@ mod tests {
.reload_policy(ReloadPolicy::Manual)
.try_into()?;
assert_eq!(reader.searcher().num_docs(), 0);
writer.add_document(doc!(field=>1u64));
writer.add_document(doc!(field=>1u64))?;
let (sender, receiver) = crossbeam::channel::unbounded();
let _handle = index.directory_mut().watch(WatchCallback::new(move || {
let _ = sender.send(());
@@ -720,7 +720,7 @@ mod tests {
}
#[test]
fn test_index_on_commit_reload_policy_different_directories() {
fn test_index_on_commit_reload_policy_different_directories() -> crate::Result<()> {
let schema = throw_away_schema();
let field = schema.get_field("num_likes").unwrap();
let tempdir = TempDir::new().unwrap();
@@ -733,10 +733,14 @@ mod tests {
.try_into()
.unwrap();
assert_eq!(reader.searcher().num_docs(), 0);
test_index_on_commit_reload_policy_aux(field, &write_index, &reader);
test_index_on_commit_reload_policy_aux(field, &write_index, &reader)
}
}
fn test_index_on_commit_reload_policy_aux(field: Field, index: &Index, reader: &IndexReader) {
fn test_index_on_commit_reload_policy_aux(
field: Field,
index: &Index,
reader: &IndexReader,
) -> crate::Result<()> {
let mut reader_index = reader.index();
let (sender, receiver) = crossbeam::channel::unbounded();
let _watch_handle = reader_index
@@ -744,9 +748,9 @@ mod tests {
.watch(WatchCallback::new(move || {
let _ = sender.send(());
}));
let mut writer = index.writer_for_tests().unwrap();
let mut writer = index.writer_for_tests()?;
assert_eq!(reader.searcher().num_docs(), 0);
writer.add_document(doc!(field=>1u64));
writer.add_document(doc!(field=>1u64))?;
writer.commit().unwrap();
// We need a loop here because it is possible for notify to send more than
// one modify event. It was observed on CI on MacOS.
@@ -756,7 +760,7 @@ mod tests {
break;
}
}
writer.add_document(doc!(field=>2u64));
writer.add_document(doc!(field=>2u64))?;
writer.commit().unwrap();
// ... Same as above
loop {
@@ -765,37 +769,37 @@ mod tests {
break;
}
}
Ok(())
}
// This test will not pass on windows, because windows
// prevent deleting files that are MMapped.
#[cfg(not(target_os = "windows"))]
#[test]
fn garbage_collect_works_as_intended() {
fn garbage_collect_works_as_intended() -> crate::Result<()> {
let directory = RamDirectory::create();
let schema = throw_away_schema();
let field = schema.get_field("num_likes").unwrap();
let index = Index::create(directory.clone(), schema, IndexSettings::default()).unwrap();
let index = Index::create(directory.clone(), schema, IndexSettings::default())?;
let mut writer = index.writer_with_num_threads(8, 24_000_000).unwrap();
for i in 0u64..8_000u64 {
writer.add_document(doc!(field => i));
writer.add_document(doc!(field => i))?;
}
let (sender, receiver) = crossbeam::channel::unbounded();
let _handle = directory.watch(WatchCallback::new(move || {
let _ = sender.send(());
}));
writer.commit().unwrap();
writer.commit()?;
let mem_right_after_commit = directory.total_mem_usage();
assert!(receiver.recv().is_ok());
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap();
.try_into()?;
assert_eq!(reader.searcher().num_docs(), 8_000);
writer.wait_merging_threads().unwrap();
writer.wait_merging_threads()?;
let mem_right_after_merge_finished = directory.total_mem_usage();
reader.reload().unwrap();
@@ -807,5 +811,6 @@ mod tests {
mem_right_after_merge_finished,
mem_right_after_commit
);
Ok(())
}
}

View File

@@ -364,10 +364,10 @@ mod test {
{
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(name => "tantivy"));
index_writer.add_document(doc!(name => "horse"));
index_writer.add_document(doc!(name => "jockey"));
index_writer.add_document(doc!(name => "cap"));
index_writer.add_document(doc!(name => "tantivy"))?;
index_writer.add_document(doc!(name => "horse"))?;
index_writer.add_document(doc!(name => "jockey"))?;
index_writer.add_document(doc!(name => "cap"))?;
// we should now have one segment with two docs
index_writer.delete_term(Term::from_field_text(name, "horse"));
index_writer.delete_term(Term::from_field_text(name, "cap"));
@@ -390,10 +390,10 @@ mod test {
{
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(name => "tantivy"));
index_writer.add_document(doc!(name => "horse"));
index_writer.add_document(doc!(name => "jockey"));
index_writer.add_document(doc!(name => "cap"));
index_writer.add_document(doc!(name => "tantivy"))?;
index_writer.add_document(doc!(name => "horse"))?;
index_writer.add_document(doc!(name => "jockey"))?;
index_writer.add_document(doc!(name => "cap"))?;
// we should now have one segment with two docs
index_writer.commit()?;
}

View File

@@ -574,8 +574,8 @@ mod tests {
}
#[test]
fn test_mmap_released() {
let mmap_directory = MmapDirectory::create_from_tempdir().unwrap();
fn test_mmap_released() -> crate::Result<()> {
let mmap_directory = MmapDirectory::create_from_tempdir()?;
let mut schema_builder: SchemaBuilder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
@@ -584,31 +584,30 @@ mod tests {
let index =
Index::create(mmap_directory.clone(), schema, IndexSettings::default()).unwrap();
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
let mut log_merge_policy = LogMergePolicy::default();
log_merge_policy.set_min_num_segments(3);
index_writer.set_merge_policy(Box::new(log_merge_policy));
for _num_commits in 0..10 {
for _ in 0..10 {
index_writer.add_document(doc!(text_field=>"abc"));
index_writer.add_document(doc!(text_field=>"abc"))?;
}
index_writer.commit().unwrap();
index_writer.commit()?;
}
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap();
.try_into()?;
for _ in 0..4 {
index_writer.add_document(doc!(text_field=>"abc"));
index_writer.commit().unwrap();
reader.reload().unwrap();
index_writer.add_document(doc!(text_field=>"abc"))?;
index_writer.commit()?;
reader.reload()?;
}
index_writer.wait_merging_threads().unwrap();
index_writer.wait_merging_threads()?;
reader.reload().unwrap();
reader.reload()?;
let num_segments = reader.searcher().segment_readers().len();
assert!(num_segments <= 4);
let num_components_except_deletes_and_tempstore =
@@ -619,5 +618,6 @@ mod tests {
);
}
assert!(mmap_directory.get_cache_info().mmapped.is_empty());
Ok(())
}
}

View File

@@ -214,14 +214,8 @@ impl Directory for RamDirectory {
}
fn atomic_write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
fail_point!("RamDirectory::atomic_write", |msg| Err(io::Error::new(
io::ErrorKind::Other,
msg.unwrap_or_else(|| "Undefined".to_string())
)));
let path_buf = PathBuf::from(path);
self.fs.write().unwrap().write(path_buf, data);
if path == *META_FILEPATH {
let _ = self.fs.write().unwrap().watch_router.broadcast();
}

View File

@@ -18,11 +18,11 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(bytes_field=>vec![0u8, 1, 2, 3]));
index_writer.add_document(doc!(bytes_field=>vec![]));
index_writer.add_document(doc!(bytes_field=>vec![255u8]));
index_writer.add_document(doc!(bytes_field=>vec![1u8, 3, 5, 7, 9]));
index_writer.add_document(doc!(bytes_field=>vec![0u8; 1000]));
index_writer.add_document(doc!(bytes_field=>vec![0u8, 1, 2, 3]))?;
index_writer.add_document(doc!(bytes_field=>vec![]))?;
index_writer.add_document(doc!(bytes_field=>vec![255u8]))?;
index_writer.add_document(doc!(bytes_field=>vec![1u8, 3, 5, 7, 9]))?;
index_writer.add_document(doc!(bytes_field=>vec![0u8; 1000]))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let segment_reader = searcher.segment_reader(0);
@@ -47,7 +47,7 @@ mod tests {
index_writer.add_document(doc!(
field => b"tantivy".as_ref(),
field => b"lucene".as_ref()
));
))?;
index_writer.commit()?;
Ok(index.reader()?.searcher())
}

View File

@@ -95,7 +95,7 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()));
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
@@ -118,7 +118,7 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()));
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
@@ -141,7 +141,7 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()));
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
@@ -164,7 +164,7 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()));
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
@@ -187,8 +187,8 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()));
index_writer.add_document(Document::default());
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.add_document(Document::default())?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
@@ -210,8 +210,8 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(Document::default());
index_writer.add_document(Document::default());
index_writer.add_document(Document::default())?;
index_writer.add_document(Document::default())?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher

View File

@@ -497,18 +497,18 @@ mod tests {
}
#[test]
fn test_merge_missing_date_fast_field() {
fn test_merge_missing_date_fast_field() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field("date", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer.add_document(doc!(date_field =>crate::chrono::prelude::Utc::now()));
index_writer.commit().unwrap();
index_writer.add_document(doc!());
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
index_writer.add_document(doc!(date_field =>crate::chrono::prelude::Utc::now()))?;
index_writer.commit()?;
index_writer.add_document(doc!())?;
index_writer.commit()?;
let reader = index.reader()?;
let segment_ids: Vec<SegmentId> = reader
.searcher()
.segment_readers()
@@ -517,10 +517,10 @@ mod tests {
.collect();
assert_eq!(segment_ids.len(), 2);
let merge_future = index_writer.merge(&segment_ids[..]);
let merge_res = futures::executor::block_on(merge_future);
assert!(merge_res.is_ok());
assert!(reader.reload().is_ok());
futures::executor::block_on(merge_future)?;
reader.reload()?;
assert_eq!(reader.searcher().segment_readers().len(), 1);
Ok(())
}
#[test]
@@ -529,7 +529,7 @@ mod tests {
}
#[test]
fn test_datefastfield() {
fn test_datefastfield() -> crate::Result<()> {
use crate::fastfield::FastValue;
let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field("date", FAST);
@@ -539,22 +539,22 @@ mod tests {
);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer.add_document(doc!(
date_field => crate::DateTime::from_u64(1i64.to_u64()),
multi_date_field => crate::DateTime::from_u64(2i64.to_u64()),
multi_date_field => crate::DateTime::from_u64(3i64.to_u64())
));
))?;
index_writer.add_document(doc!(
date_field => crate::DateTime::from_u64(4i64.to_u64())
));
))?;
index_writer.add_document(doc!(
multi_date_field => crate::DateTime::from_u64(5i64.to_u64()),
multi_date_field => crate::DateTime::from_u64(6i64.to_u64())
));
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
))?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
let segment_reader = searcher.segment_reader(0);
@@ -581,6 +581,7 @@ mod tests {
assert_eq!(dates[0].timestamp(), 5i64);
assert_eq!(dates[1].timestamp(), 6i64);
}
Ok(())
}
}

View File

@@ -26,7 +26,7 @@ mod tests {
use test_env_log::test;
#[test]
fn test_multivalued_u64() {
fn test_multivalued_u64() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_u64_field(
"multifield",
@@ -34,17 +34,17 @@ mod tests {
);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(field=>1u64, field=>3u64));
index_writer.add_document(doc!());
index_writer.add_document(doc!(field=>4u64));
index_writer.add_document(doc!(field=>5u64, field=>20u64,field=>1u64));
assert!(index_writer.commit().is_ok());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(field=>1u64, field=>3u64))?;
index_writer.add_document(doc!())?;
index_writer.add_document(doc!(field=>4u64))?;
index_writer.add_document(doc!(field=>5u64, field=>20u64,field=>1u64))?;
index_writer.commit()?;
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let segment_reader = searcher.segment_reader(0);
let mut vals = Vec::new();
let multi_value_reader = segment_reader.fast_fields().u64s(field).unwrap();
let multi_value_reader = segment_reader.fast_fields().u64s(field)?;
{
multi_value_reader.get_vals(2, &mut vals);
assert_eq!(&vals, &[4u64]);
@@ -57,10 +57,11 @@ mod tests {
multi_value_reader.get_vals(1, &mut vals);
assert!(vals.is_empty());
}
Ok(())
}
#[test]
fn test_multivalued_date() {
fn test_multivalued_date() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field(
"multi_date_field",
@@ -73,40 +74,37 @@ mod tests {
schema_builder.add_i64_field("time_stamp_i", IntOptions::default().set_stored());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
let first_time_stamp = chrono::Utc::now();
index_writer.add_document(
doc!(date_field=>first_time_stamp, date_field=>first_time_stamp, time_i=>1i64),
);
index_writer.add_document(doc!(time_i=>0i64));
)?;
index_writer.add_document(doc!(time_i=>0i64))?;
// add one second
index_writer
.add_document(doc!(date_field=>first_time_stamp + Duration::seconds(1), time_i=>2i64));
index_writer.add_document(
doc!(date_field=>first_time_stamp + Duration::seconds(1), time_i=>2i64),
)?;
// add another second
let two_secs_ahead = first_time_stamp + Duration::seconds(2);
index_writer.add_document(doc!(date_field=>two_secs_ahead, date_field=>two_secs_ahead,date_field=>two_secs_ahead, time_i=>3i64));
index_writer.add_document(doc!(date_field=>two_secs_ahead, date_field=>two_secs_ahead,date_field=>two_secs_ahead, time_i=>3i64))?;
// add three seconds
index_writer
.add_document(doc!(date_field=>first_time_stamp + Duration::seconds(3), time_i=>4i64));
assert!(index_writer.commit().is_ok());
index_writer.add_document(
doc!(date_field=>first_time_stamp + Duration::seconds(3), time_i=>4i64),
)?;
index_writer.commit()?;
let reader = index.reader().unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
let reader = searcher.segment_reader(0);
assert_eq!(reader.num_docs(), 5);
{
let parser = QueryParser::for_index(&index, vec![date_field]);
let query = parser
.parse_query(&format!("\"{}\"", first_time_stamp.to_rfc3339()))
.expect("could not parse query");
let results = searcher
.search(&query, &TopDocs::with_limit(5))
.expect("could not query index");
let query = parser.parse_query(&format!("\"{}\"", first_time_stamp.to_rfc3339()))?;
let results = searcher.search(&query, &TopDocs::with_limit(5))?;
assert_eq!(results.len(), 1);
for (_score, doc_address) in results {
let retrieved_doc = searcher.doc(doc_address).expect("cannot fetch doc");
let retrieved_doc = searcher.doc(doc_address)?;
assert_eq!(
retrieved_doc
.get_first(date_field)
@@ -128,12 +126,8 @@ mod tests {
{
let parser = QueryParser::for_index(&index, vec![date_field]);
let query = parser
.parse_query(&format!("\"{}\"", two_secs_ahead.to_rfc3339()))
.expect("could not parse query");
let results = searcher
.search(&query, &TopDocs::with_limit(5))
.expect("could not query index");
let query = parser.parse_query(&format!("\"{}\"", two_secs_ahead.to_rfc3339()))?;
let results = searcher.search(&query, &TopDocs::with_limit(5))?;
assert_eq!(results.len(), 1);
@@ -165,10 +159,8 @@ mod tests {
(first_time_stamp + Duration::seconds(1)).to_rfc3339(),
(first_time_stamp + Duration::seconds(3)).to_rfc3339()
);
let query = parser.parse_query(&range_q).expect("could not parse query");
let results = searcher
.search(&query, &TopDocs::with_limit(5))
.expect("could not query index");
let query = parser.parse_query(&range_q)?;
let results = searcher.search(&query, &TopDocs::with_limit(5))?;
assert_eq!(results.len(), 2);
for (i, doc_pair) in results.iter().enumerate() {
@@ -196,16 +188,16 @@ mod tests {
retrieved_doc
.get_first(time_i)
.expect("cannot find value")
.i64_value()
.expect("value not of i64 type"),
time_i_val
.i64_value(),
Some(time_i_val)
);
}
}
Ok(())
}
#[test]
fn test_multivalued_i64() {
fn test_multivalued_i64() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_i64_field(
"multifield",
@@ -213,14 +205,14 @@ mod tests {
);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(field=> 1i64, field => 3i64));
index_writer.add_document(doc!());
index_writer.add_document(doc!(field=> -4i64));
index_writer.add_document(doc!(field=> -5i64, field => -20i64, field=>1i64));
assert!(index_writer.commit().is_ok());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(field=> 1i64, field => 3i64))?;
index_writer.add_document(doc!())?;
index_writer.add_document(doc!(field=> -4i64))?;
index_writer.add_document(doc!(field=> -5i64, field => -20i64, field=>1i64))?;
index_writer.commit()?;
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let segment_reader = searcher.segment_reader(0);
let mut vals = Vec::new();
let multi_value_reader = segment_reader.fast_fields().i64s(field).unwrap();
@@ -232,9 +224,10 @@ mod tests {
assert!(vals.is_empty());
multi_value_reader.get_vals(3, &mut vals);
assert_eq!(&vals, &[-5i64, -20i64, 1i64]);
Ok(())
}
fn test_multivalued_no_panic(ops: &[IndexingOp]) {
fn test_multivalued_no_panic(ops: &[IndexingOp]) -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_u64_field(
"multifield",
@@ -244,7 +237,7 @@ mod tests {
);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
index_writer.set_merge_policy(Box::new(NoMergePolicy));
for &op in ops {
@@ -252,19 +245,19 @@ mod tests {
IndexingOp::AddDoc { id } => {
match id % 3 {
0 => {
index_writer.add_document(doc!());
index_writer.add_document(doc!())?;
}
1 => {
let mut doc = Document::new();
for _ in 0..5001 {
doc.add_u64(field, id as u64);
}
index_writer.add_document(doc);
index_writer.add_document(doc)?;
}
_ => {
let mut doc = Document::new();
doc.add_u64(field, id as u64);
index_writer.add_document(doc);
index_writer.add_document(doc)?;
}
};
}
@@ -275,18 +268,16 @@ mod tests {
index_writer.commit().unwrap();
}
IndexingOp::Merge => {
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
let segment_ids = index.searchable_segment_ids()?;
if segment_ids.len() >= 2 {
block_on(index_writer.merge(&segment_ids)).unwrap();
assert!(index_writer.segment_updater().wait_merging_thread().is_ok());
block_on(index_writer.merge(&segment_ids))?;
index_writer.segment_updater().wait_merging_thread()?;
}
}
}
}
assert!(index_writer.commit().is_ok());
index_writer.commit()?;
// Merging the segments
{
@@ -298,6 +289,7 @@ mod tests {
assert!(index_writer.wait_merging_threads().is_ok());
}
}
Ok(())
}
#[derive(Debug, Clone, Copy)]
@@ -320,7 +312,7 @@ mod tests {
proptest! {
#[test]
fn test_multivalued_proptest(ops in proptest::collection::vec(operation_strategy(), 1..10)) {
test_multivalued_no_panic(&ops[..]);
assert!(test_multivalued_no_panic(&ops[..]).is_ok());
}
}
@@ -335,20 +327,22 @@ mod tests {
Merge,
];
test_multivalued_no_panic(&ops[..]);
assert!(test_multivalued_no_panic(&ops[..]).is_ok());
}
#[test]
#[ignore]
fn test_many_facets() {
fn test_many_facets() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_facet_field("facetfield", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
for i in 0..100_000 {
index_writer.add_document(doc!(field=> Facet::from(format!("/lang/{}", i).as_str())));
index_writer
.add_document(doc!(field=> Facet::from(format!("/lang/{}", i).as_str())))?;
}
assert!(index_writer.commit().is_ok());
index_writer.commit()?;
Ok(())
}
}

View File

@@ -94,24 +94,22 @@ mod tests {
use crate::schema::{Cardinality, Facet, IntOptions, Schema, INDEXED};
#[test]
fn test_multifastfield_reader() {
fn test_multifastfield_reader() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facets", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index
.writer_for_tests()
.expect("Failed to create index writer.");
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(
facet_field => Facet::from("/category/cat2"),
facet_field => Facet::from("/category/cat1"),
));
index_writer.add_document(doc!(facet_field => Facet::from("/category/cat2")));
index_writer.add_document(doc!(facet_field => Facet::from("/category/cat3")));
index_writer.commit().expect("Commit failed");
let searcher = index.reader().unwrap().searcher();
))?;
index_writer.add_document(doc!(facet_field => Facet::from("/category/cat2")))?;
index_writer.add_document(doc!(facet_field => Facet::from("/category/cat3")))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let segment_reader = searcher.segment_reader(0);
let mut facet_reader = segment_reader.facet_reader(facet_field).unwrap();
let mut facet_reader = segment_reader.facet_reader(facet_field)?;
let mut facet = Facet::root();
{
@@ -145,10 +143,11 @@ mod tests {
facet_reader.facet_ords(2, &mut vals);
assert_eq!(&vals[..], &[4]);
}
Ok(())
}
#[test]
fn test_multifastfield_reader_min_max() {
fn test_multifastfield_reader_min_max() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let field_options = IntOptions::default()
.set_indexed()
@@ -163,15 +162,16 @@ mod tests {
item_field => 2i64,
item_field => 3i64,
item_field => -2i64,
));
index_writer.add_document(doc!(item_field => 6i64, item_field => 3i64));
index_writer.add_document(doc!(item_field => 4i64));
index_writer.commit().expect("Commit failed");
let searcher = index.reader().unwrap().searcher();
))?;
index_writer.add_document(doc!(item_field => 6i64, item_field => 3i64))?;
index_writer.add_document(doc!(item_field => 4i64))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let segment_reader = searcher.segment_reader(0);
let field_reader = segment_reader.fast_fields().i64s(item_field).unwrap();
let field_reader = segment_reader.fast_fields().i64s(item_field)?;
assert_eq!(field_reader.min_value(), -2);
assert_eq!(field_reader.max_value(), 6);
Ok(())
}
}

View File

@@ -49,7 +49,7 @@ fn test_functional_store() -> crate::Result<()> {
}
for _ in 0..num_docs {
doc_set.push(doc_id);
index_writer.add_document(doc!(id_field=>doc_id));
index_writer.add_document(doc!(id_field=>doc_id))?;
doc_id += 1;
}
index_writer.commit()?;
@@ -124,7 +124,7 @@ fn test_functional_indexing_sorted() -> crate::Result<()> {
doc.add_u64(multiples_field, random_val * i);
}
doc.add_text(text_field, get_text());
index_writer.add_document(doc);
index_writer.add_document(doc)?;
}
}
Ok(())
@@ -201,7 +201,7 @@ fn test_functional_indexing_unsorted() -> crate::Result<()> {
doc.add_u64(multiples_field, random_val * i);
}
doc.add_text(text_field, get_text());
index_writer.add_document(doc);
index_writer.add_document(doc)?;
}
}
Ok(())

View File

@@ -207,8 +207,8 @@ mod tests {
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"texto1"));
index_writer.add_document(doc!(text_field=>"texto2"));
index_writer.add_document(doc!(text_field=>"texto1"))?;
index_writer.add_document(doc!(text_field=>"texto2"))?;
index_writer.commit()?;
index
};
@@ -218,8 +218,8 @@ mod tests {
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"texto3"));
index_writer.add_document(doc!(text_field=>"texto4"));
index_writer.add_document(doc!(text_field=>"texto3"))?;
index_writer.add_document(doc!(text_field=>"texto4"))?;
index_writer.delete_term(Term::from_field_text(text_field, "4"));
index_writer.commit()?;

View File

@@ -146,7 +146,7 @@ mod tests_indexsorting {
fn create_test_index(
index_settings: Option<IndexSettings>,
text_field_options: TextOptions,
) -> Index {
) -> crate::Result<Index> {
let mut schema_builder = Schema::builder();
let my_text_field = schema_builder.add_text_field("text_field", text_field_options);
@@ -166,19 +166,20 @@ mod tests_indexsorting {
if let Some(settings) = index_settings {
index_builder = index_builder.settings(settings);
}
let index = index_builder.create_in_ram().unwrap();
let index = index_builder.create_in_ram()?;
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(my_number=>40_u64));
index_writer
.add_document(doc!(my_number=>20_u64, multi_numbers => 5_u64, multi_numbers => 6_u64));
index_writer.add_document(doc!(my_number=>100_u64));
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(my_number=>40_u64))?;
index_writer.add_document(
doc!(my_number=>20_u64, multi_numbers => 5_u64, multi_numbers => 6_u64),
)?;
index_writer.add_document(doc!(my_number=>100_u64))?;
index_writer.add_document(
doc!(my_number=>10_u64, my_string_field=> "blublub", my_text_field => "some text"),
);
index_writer.add_document(doc!(my_number=>30_u64, multi_numbers => 3_u64 ));
index_writer.commit().unwrap();
index
)?;
index_writer.add_document(doc!(my_number=>30_u64, multi_numbers => 3_u64 ))?;
index_writer.commit()?;
Ok(index)
}
fn get_text_options() -> TextOptions {
TextOptions::default().set_indexing_options(
@@ -203,7 +204,7 @@ mod tests_indexsorting {
for option in options {
//let options = get_text_options();
// no index_sort
let index = create_test_index(None, option.clone());
let index = create_test_index(None, option.clone())?;
let my_text_field = index.schema().get_field("text_field").unwrap();
let searcher = index.reader()?.searcher();
@@ -225,7 +226,7 @@ mod tests_indexsorting {
..Default::default()
}),
option.clone(),
);
)?;
let my_text_field = index.schema().get_field("text_field").unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
@@ -257,7 +258,7 @@ mod tests_indexsorting {
..Default::default()
}),
option.clone(),
);
)?;
let my_string_field = index.schema().get_field("text_field").unwrap();
let searcher = index.reader()?.searcher();
@@ -287,7 +288,7 @@ mod tests_indexsorting {
#[test]
fn test_sort_index_get_documents() -> crate::Result<()> {
// default baseline
let index = create_test_index(None, get_text_options());
let index = create_test_index(None, get_text_options())?;
let my_string_field = index.schema().get_field("string_field").unwrap();
let searcher = index.reader()?.searcher();
{
@@ -316,7 +317,7 @@ mod tests_indexsorting {
..Default::default()
}),
get_text_options(),
);
)?;
let my_string_field = index.schema().get_field("string_field").unwrap();
let searcher = index.reader()?.searcher();
{
@@ -341,7 +342,7 @@ mod tests_indexsorting {
..Default::default()
}),
get_text_options(),
);
)?;
let my_string_field = index.schema().get_field("string_field").unwrap();
let searcher = index.reader()?.searcher();
{
@@ -356,7 +357,7 @@ mod tests_indexsorting {
#[test]
fn test_sort_index_test_string_field() -> crate::Result<()> {
let index = create_test_index(None, get_text_options());
let index = create_test_index(None, get_text_options())?;
let my_string_field = index.schema().get_field("string_field").unwrap();
let searcher = index.reader()?.searcher();
@@ -376,7 +377,7 @@ mod tests_indexsorting {
..Default::default()
}),
get_text_options(),
);
)?;
let my_string_field = index.schema().get_field("string_field").unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
@@ -407,7 +408,7 @@ mod tests_indexsorting {
..Default::default()
}),
get_text_options(),
);
)?;
let my_string_field = index.schema().get_field("string_field").unwrap();
let searcher = index.reader()?.searcher();
@@ -443,7 +444,7 @@ mod tests_indexsorting {
..Default::default()
}),
get_text_options(),
);
)?;
assert_eq!(
index.settings().sort_by_field.as_ref().unwrap().field,
"my_number".to_string()

View File

@@ -14,6 +14,7 @@ use crate::error::TantivyError;
use crate::fastfield::write_alive_bitset;
use crate::indexer::delete_queue::{DeleteCursor, DeleteQueue};
use crate::indexer::doc_opstamp_mapping::DocToOpstampMapping;
use crate::indexer::index_writer_status::IndexWriterStatus;
use crate::indexer::operation::DeleteOperation;
use crate::indexer::stamper::Stamper;
use crate::indexer::MergePolicy;
@@ -28,13 +29,13 @@ use crossbeam::channel;
use futures::executor::block_on;
use futures::future::Future;
use smallvec::smallvec;
use smallvec::SmallVec;
use std::mem;
use std::ops::Range;
use std::sync::Arc;
use std::thread;
use std::thread::JoinHandle;
use super::{AddBatch, AddBatchReceiver, AddBatchSender};
// Size of the margin for the heap. A segment is closed when the remaining memory
// in the heap goes below MARGIN_IN_BYTES.
pub const MARGIN_IN_BYTES: usize = 1_000_000;
@@ -50,15 +51,12 @@ pub const MAX_NUM_THREAD: usize = 8;
// reaches `PIPELINE_MAX_SIZE_IN_DOCS`
const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000;
// Group of operations.
// Most of the time, users will send operation one-by-one, but it can be useful to
// send them as a small block to ensure that
// - all docs in the operation will happen on the same segment and continuous doc_ids.
// - all operations in the group are committed at the same time, making the group
// atomic.
type OperationGroup = SmallVec<[AddOperation; 4]>;
type OperationSender = channel::Sender<OperationGroup>;
type OperationReceiver = channel::Receiver<OperationGroup>;
fn error_in_index_worker_thread(context: &str) -> TantivyError {
TantivyError::ErrorInThread(format!(
"{}. A worker thread encounterred an error (io::Error most likely) or panicked.",
context
))
}
/// `IndexWriter` is the user entry-point to add document to an index.
///
@@ -77,8 +75,8 @@ pub struct IndexWriter {
workers_join_handle: Vec<JoinHandle<crate::Result<()>>>,
operation_receiver: OperationReceiver,
operation_sender: OperationSender,
index_writer_status: IndexWriterStatus,
operation_sender: AddBatchSender,
segment_updater: SegmentUpdater,
@@ -185,10 +183,10 @@ pub(crate) fn advance_deletes(
fn index_documents(
memory_budget: usize,
segment: Segment,
grouped_document_iterator: &mut dyn Iterator<Item = OperationGroup>,
grouped_document_iterator: &mut dyn Iterator<Item = AddBatch>,
segment_updater: &mut SegmentUpdater,
mut delete_cursor: DeleteCursor,
) -> crate::Result<bool> {
) -> crate::Result<()> {
let schema = segment.schema();
let mut segment_writer = SegmentWriter::for_segment(memory_budget, segment.clone(), &schema)?;
@@ -207,7 +205,7 @@ fn index_documents(
}
if !segment_updater.is_alive() {
return Ok(false);
return Ok(());
}
let max_doc = segment_writer.max_doc();
@@ -227,7 +225,7 @@ fn index_documents(
// update segment_updater inventory to remove tempstore
let segment_entry = SegmentEntry::new(meta, delete_cursor, alive_bitset_opt);
block_on(segment_updater.schedule_add_segment(segment_entry))?;
Ok(true)
Ok(())
}
/// `doc_opstamps` is required to be non-empty.
@@ -298,7 +296,7 @@ impl IndexWriter {
let err_msg = format!("The heap size per thread cannot exceed {}", HEAP_SIZE_MAX);
return Err(TantivyError::InvalidArgument(err_msg));
}
let (document_sender, document_receiver): (OperationSender, OperationReceiver) =
let (document_sender, document_receiver): (AddBatchSender, AddBatchReceiver) =
channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS);
let delete_queue = DeleteQueue::new();
@@ -316,7 +314,7 @@ impl IndexWriter {
heap_size_in_bytes_per_thread,
index: index.clone(),
operation_receiver: document_receiver,
index_writer_status: IndexWriterStatus::from(document_receiver),
operation_sender: document_sender,
segment_updater,
@@ -356,16 +354,14 @@ impl IndexWriter {
for join_handle in former_workers_handles {
join_handle
.join()
.expect("Indexing Worker thread panicked")
.map_err(|_| {
TantivyError::ErrorInThread("Error in indexing worker thread.".into())
})?;
.map_err(|_| error_in_index_worker_thread("Worker thread panicked."))?
.map_err(|_| error_in_index_worker_thread("Worker thread failed."))?;
}
let result = self
.segment_updater
.wait_merging_thread()
.map_err(|_| TantivyError::ErrorInThread("Failed to join merging thread.".into()));
.map_err(|_| error_in_index_worker_thread("Failed to join merging thread."));
if let Err(ref e) = result {
error!("Some merging thread failed {:?}", e);
@@ -393,10 +389,18 @@ impl IndexWriter {
self.index.new_segment()
}
fn operation_receiver(&self) -> crate::Result<AddBatchReceiver> {
self.index_writer_status
.operation_receiver()
.ok_or_else(|| crate::TantivyError::ErrorInThread("The index writer was killed. It can happen if an indexing worker encounterred an Io error for instance.".to_string()))
}
/// Spawns a new worker thread for indexing.
/// The thread consumes documents from the pipeline.
fn add_indexing_worker(&mut self) -> crate::Result<()> {
let document_receiver_clone = self.operation_receiver.clone();
let document_receiver_clone = self.operation_receiver()?;
let index_writer_bomb = self.index_writer_status.create_bomb();
let mut segment_updater = self.segment_updater.clone();
let mut delete_cursor = self.delete_queue.cursor();
@@ -407,32 +411,31 @@ impl IndexWriter {
.name(format!("thrd-tantivy-index{}", self.worker_id))
.spawn(move || {
loop {
let mut document_iterator =
document_receiver_clone.clone().into_iter().peekable();
let mut document_iterator = document_receiver_clone
.clone()
.into_iter()
.filter(|batch| !batch.is_empty())
.peekable();
// the peeking here is to avoid
// creating a new segment's files
// The peeking here is to avoid creating a new segment's files
// if no document are available.
//
// this is a valid guarantee as the
// peeked document now belongs to
// This is a valid guarantee as the peeked document now belongs to
// our local iterator.
if let Some(operations) = document_iterator.peek() {
if let Some(first) = operations.first() {
delete_cursor.skip_to(first.opstamp);
} else {
return Ok(());
}
if let Some(batch) = document_iterator.peek() {
assert!(!batch.is_empty());
delete_cursor.skip_to(batch[0].opstamp);
} else {
// No more documents.
// Happens when there is a commit, or if the `IndexWriter`
// It happens when there is a commit, or if the `IndexWriter`
// was dropped.
index_writer_bomb.defuse();
return Ok(());
}
let segment = index.new_segment();
index_documents(
mem_budget,
segment,
index.new_segment(),
&mut document_iterator,
&mut segment_updater,
delete_cursor.clone(),
@@ -488,7 +491,7 @@ impl IndexWriter {
/// let index = Index::create_in_ram(schema.clone());
///
/// let mut index_writer = index.writer_with_num_threads(1, 50_000_000)?;
/// index_writer.add_document(doc!(title => "The modern Promotheus"));
/// index_writer.add_document(doc!(title => "The modern Promotheus"))?;
/// index_writer.commit()?;
///
/// let clear_res = index_writer.delete_all_documents().unwrap();
@@ -532,12 +535,11 @@ impl IndexWriter {
/// when no documents are remaining.
///
/// Returns the former segment_ready channel.
#[allow(unused_must_use)]
fn recreate_document_channel(&mut self) -> OperationReceiver {
let (document_sender, document_receiver): (OperationSender, OperationReceiver) =
fn recreate_document_channel(&mut self) {
let (document_sender, document_receiver): (AddBatchSender, AddBatchReceiver) =
channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS);
mem::replace(&mut self.operation_sender, document_sender);
mem::replace(&mut self.operation_receiver, document_receiver)
self.operation_sender = document_sender;
self.index_writer_status = IndexWriterStatus::from(document_receiver);
}
/// Rollback to the last commit
@@ -553,7 +555,7 @@ impl IndexWriter {
// marks the segment updater as killed. From now on, all
// segment updates will be ignored.
self.segment_updater.kill();
let document_receiver = self.operation_receiver.clone();
let document_receiver = self.operation_receiver();
// take the directory lock to create a new index_writer.
let directory_lock = self
@@ -693,14 +695,10 @@ impl IndexWriter {
/// The opstamp is an increasing `u64` that can
/// be used by the client to align commits with its own
/// document queue.
pub fn add_document(&self, document: Document) -> Opstamp {
pub fn add_document(&self, document: Document) -> crate::Result<Opstamp> {
let opstamp = self.stamper.stamp();
let add_operation = AddOperation { opstamp, document };
let send_result = self.operation_sender.send(smallvec![add_operation]);
if let Err(e) = send_result {
panic!("Failed to index document. Sending to indexing channel failed. This probably means all of the indexing threads have panicked. {:?}", e);
}
opstamp
self.send_add_documents_batch(smallvec![AddOperation { opstamp, document }])?;
Ok(opstamp)
}
/// Gets a range of stamps from the stamper and "pops" the last stamp
@@ -713,11 +711,7 @@ impl IndexWriter {
fn get_batch_opstamps(&self, count: Opstamp) -> (Opstamp, Range<Opstamp>) {
let Range { start, end } = self.stamper.stamps(count + 1u64);
let last_opstamp = end - 1;
let stamps = Range {
start,
end: last_opstamp,
};
(last_opstamp, stamps)
(last_opstamp, start..last_opstamp)
}
/// Runs a group of document operations ensuring that the operations are
@@ -736,16 +730,20 @@ impl IndexWriter {
/// Like adds and deletes (see `IndexWriter.add_document` and
/// `IndexWriter.delete_term`), the changes made by calling `run` will be
/// visible to readers only after calling `commit()`.
pub fn run(&self, user_operations: Vec<UserOperation>) -> Opstamp {
let count = user_operations.len() as u64;
pub fn run<I>(&self, user_operations: I) -> crate::Result<Opstamp>
where
I: IntoIterator<Item = UserOperation>,
I::IntoIter: ExactSizeIterator,
{
let user_operations_it = user_operations.into_iter();
let count = user_operations_it.len() as u64;
if count == 0 {
return self.stamper.stamp();
return Ok(self.stamper.stamp());
}
let (batch_opstamp, stamps) = self.get_batch_opstamps(count);
let mut adds = OperationGroup::default();
for (user_op, opstamp) in user_operations.into_iter().zip(stamps) {
let mut adds = AddBatch::default();
for (user_op, opstamp) in user_operations_it.zip(stamps) {
match user_op {
UserOperation::Delete(term) => {
let delete_operation = DeleteOperation { opstamp, term };
@@ -757,12 +755,16 @@ impl IndexWriter {
}
}
}
let send_result = self.operation_sender.send(adds);
if let Err(e) = send_result {
panic!("Failed to index document. Sending to indexing channel failed. This probably means all of the indexing threads have panicked. {:?}", e);
};
self.send_add_documents_batch(adds)?;
Ok(batch_opstamp)
}
batch_opstamp
fn send_add_documents_batch(&self, add_ops: AddBatch) -> crate::Result<()> {
if self.index_writer_status.is_alive() && self.operation_sender.send(add_ops).is_ok() {
Ok(())
} else {
Err(error_in_index_worker_thread("An index writer was killed."))
}
}
}
@@ -828,7 +830,7 @@ mod tests {
UserOperation::Add(doc!(text_field=>"a")),
UserOperation::Add(doc!(text_field=>"b")),
];
let batch_opstamp1 = index_writer.run(operations);
let batch_opstamp1 = index_writer.run(operations).unwrap();
assert_eq!(batch_opstamp1, 2u64);
}
@@ -839,8 +841,12 @@ mod tests {
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field => "hello1"));
index_writer.add_document(doc!(text_field => "hello2"));
index_writer
.add_document(doc!(text_field => "hello1"))
.unwrap();
index_writer
.add_document(doc!(text_field => "hello2"))
.unwrap();
assert!(index_writer.commit().is_ok());
let reader = index.reader().unwrap();
@@ -897,7 +903,7 @@ mod tests {
UserOperation::Delete(b_term),
];
index_writer.run(operations);
index_writer.run(operations).unwrap();
index_writer.commit().expect("failed to commit");
reader.reload().expect("failed to load searchers");
@@ -927,10 +933,10 @@ mod tests {
let index = Index::create_in_ram(schema_builder.build());
let index_writer = index.writer(3_000_000).unwrap();
let operations1 = vec![];
let batch_opstamp1 = index_writer.run(operations1);
let batch_opstamp1 = index_writer.run(operations1).unwrap();
assert_eq!(batch_opstamp1, 0u64);
let operations2 = vec![];
let batch_opstamp2 = index_writer.run(operations2);
let batch_opstamp2 = index_writer.run(operations2).unwrap();
assert_eq!(batch_opstamp2, 1u64);
}
@@ -990,15 +996,14 @@ mod tests {
}
#[test]
fn test_commit_and_rollback() {
fn test_commit_and_rollback() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema_builder.build());
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap();
.try_into()?;
let num_docs_containing = |s: &str| {
let searcher = reader.searcher();
let term = Term::from_field_text(text_field, s);
@@ -1007,136 +1012,127 @@ mod tests {
{
// writing the segment
let mut index_writer = index.writer(3_000_000).unwrap();
index_writer.add_document(doc!(text_field=>"a"));
index_writer.rollback().unwrap();
let mut index_writer = index.writer(3_000_000)?;
index_writer.add_document(doc!(text_field=>"a"))?;
index_writer.rollback()?;
assert_eq!(index_writer.commit_opstamp(), 0u64);
assert_eq!(num_docs_containing("a"), 0);
{
index_writer.add_document(doc!(text_field=>"b"));
index_writer.add_document(doc!(text_field=>"c"));
}
assert!(index_writer.commit().is_ok());
reader.reload().unwrap();
index_writer.add_document(doc!(text_field=>"b"))?;
index_writer.add_document(doc!(text_field=>"c"))?;
index_writer.commit()?;
reader.reload()?;
assert_eq!(num_docs_containing("a"), 0);
assert_eq!(num_docs_containing("b"), 1);
assert_eq!(num_docs_containing("c"), 1);
}
reader.reload().unwrap();
reader.reload()?;
reader.searcher();
Ok(())
}
#[test]
fn test_with_merges() {
fn test_with_merges() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema_builder.build());
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap();
.try_into()?;
let num_docs_containing = |s: &str| {
let term_a = Term::from_field_text(text_field, s);
reader.searcher().doc_freq(&term_a).unwrap()
};
{
// writing the segment
let mut index_writer = index.writer(12_000_000).unwrap();
// create 8 segments with 100 tiny docs
for _doc in 0..100 {
index_writer.add_document(doc!(text_field=>"a"));
}
index_writer.commit().expect("commit failed");
for _doc in 0..100 {
index_writer.add_document(doc!(text_field=>"a"));
}
// this should create 8 segments and trigger a merge.
index_writer.commit().expect("commit failed");
index_writer
.wait_merging_threads()
.expect("waiting merging thread failed");
reader.reload().unwrap();
assert_eq!(num_docs_containing("a"), 200);
assert!(index.searchable_segments().unwrap().len() < 8);
// writing the segment
let mut index_writer = index.writer(12_000_000).unwrap();
// create 8 segments with 100 tiny docs
for _doc in 0..100 {
index_writer.add_document(doc!(text_field=>"a"))?;
}
index_writer.commit()?;
for _doc in 0..100 {
index_writer.add_document(doc!(text_field=>"a"))?;
}
// this should create 8 segments and trigger a merge.
index_writer.commit()?;
index_writer.wait_merging_threads()?;
reader.reload()?;
assert_eq!(num_docs_containing("a"), 200);
assert!(index.searchable_segments()?.len() < 8);
Ok(())
}
#[test]
fn test_prepare_with_commit_message() {
fn test_prepare_with_commit_message() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema_builder.build());
// writing the segment
let mut index_writer = index.writer(12_000_000)?;
// create 8 segments with 100 tiny docs
for _doc in 0..100 {
index_writer.add_document(doc!(text_field => "a"))?;
}
{
let mut prepared_commit = index_writer.prepare_commit()?;
prepared_commit.set_payload("first commit");
prepared_commit.commit()?;
}
{
let metas = index.load_metas()?;
assert_eq!(metas.payload.unwrap(), "first commit");
}
for _doc in 0..100 {
index_writer.add_document(doc!(text_field => "a"))?;
}
index_writer.commit()?;
{
let metas = index.load_metas()?;
assert!(metas.payload.is_none());
}
Ok(())
}
#[test]
fn test_prepare_but_rollback() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema_builder.build());
{
// writing the segment
let mut index_writer = index.writer(12_000_000).unwrap();
let mut index_writer = index.writer_with_num_threads(4, 12_000_000)?;
// create 8 segments with 100 tiny docs
for _doc in 0..100 {
index_writer.add_document(doc!(text_field => "a"));
index_writer.add_document(doc!(text_field => "a"))?;
}
{
let mut prepared_commit = index_writer.prepare_commit().expect("commit failed");
let mut prepared_commit = index_writer.prepare_commit()?;
prepared_commit.set_payload("first commit");
prepared_commit.commit().expect("commit failed");
prepared_commit.abort()?;
}
{
let metas = index.load_metas().unwrap();
assert_eq!(metas.payload.unwrap(), "first commit");
}
for _doc in 0..100 {
index_writer.add_document(doc!(text_field => "a"));
}
index_writer.commit().unwrap();
{
let metas = index.load_metas().unwrap();
assert!(metas.payload.is_none());
}
}
}
#[test]
fn test_prepare_but_rollback() {
let mut schema_builder = schema::Schema::builder();
let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_in_ram(schema_builder.build());
{
// writing the segment
let mut index_writer = index.writer_with_num_threads(4, 12_000_000).unwrap();
// create 8 segments with 100 tiny docs
for _doc in 0..100 {
index_writer.add_document(doc!(text_field => "a"));
}
{
let mut prepared_commit = index_writer.prepare_commit().expect("commit failed");
prepared_commit.set_payload("first commit");
prepared_commit.abort().expect("commit failed");
}
{
let metas = index.load_metas().unwrap();
let metas = index.load_metas()?;
assert!(metas.payload.is_none());
}
for _doc in 0..100 {
index_writer.add_document(doc!(text_field => "b"));
index_writer.add_document(doc!(text_field => "b"))?;
}
index_writer.commit().unwrap();
index_writer.commit()?;
}
let num_docs_containing = |s: &str| {
let term_a = Term::from_field_text(text_field, s);
index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.unwrap()
.try_into()?
.searcher()
.doc_freq(&term_a)
.unwrap()
};
assert_eq!(num_docs_containing("a"), 0);
assert_eq!(num_docs_containing("b"), 100);
assert_eq!(num_docs_containing("a")?, 0);
assert_eq!(num_docs_containing("b")?, 100);
Ok(())
}
#[test]
@@ -1157,7 +1153,7 @@ mod tests {
};
let mut index_writer = index.writer_with_num_threads(4, 12_000_000).unwrap();
let add_tstamp = index_writer.add_document(doc!(text_field => "a"));
let add_tstamp = index_writer.add_document(doc!(text_field => "a")).unwrap();
let commit_tstamp = index_writer.commit().unwrap();
assert!(commit_tstamp > add_tstamp);
index_writer.delete_all_documents().unwrap();
@@ -1174,7 +1170,7 @@ mod tests {
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_with_num_threads(4, 12_000_000).unwrap();
let add_tstamp = index_writer.add_document(doc!(text_field => "a"));
let add_tstamp = index_writer.add_document(doc!(text_field => "a")).unwrap();
// commit documents - they are now available
let first_commit = index_writer.commit();
@@ -1193,7 +1189,7 @@ mod tests {
// add new documents again
for _ in 0..100 {
index_writer.add_document(doc!(text_field => "b"));
index_writer.add_document(doc!(text_field => "b")).unwrap();
}
// rollback to last commit, when index was empty
@@ -1227,7 +1223,7 @@ mod tests {
assert!(index_writer.commit().is_ok());
// add one simple doc
index_writer.add_document(doc!(text_field => "a"));
index_writer.add_document(doc!(text_field => "a")).unwrap();
assert!(index_writer.commit().is_ok());
let term_a = Term::from_field_text(text_field, "a");
@@ -1251,7 +1247,7 @@ mod tests {
let mut index_writer = index.writer_with_num_threads(4, 12_000_000).unwrap();
// add one simple doc
index_writer.add_document(doc!(text_field => "a"));
assert!(index_writer.add_document(doc!(text_field => "a")).is_ok());
let comm = index_writer.commit();
assert!(comm.is_ok());
let commit_tstamp = comm.unwrap();
@@ -1327,13 +1323,13 @@ mod tests {
// create and delete docs in same commit
for id in 0u64..5u64 {
index_writer.add_document(doc!(id_field => id));
index_writer.add_document(doc!(id_field => id))?;
}
for id in 2u64..4u64 {
index_writer.delete_term(Term::from_field_u64(id_field, id));
}
for id in 5u64..10u64 {
index_writer.add_document(doc!(id_field => id));
index_writer.add_document(doc!(id_field => id))?;
}
index_writer.commit()?;
index_reader.reload()?;
@@ -1452,7 +1448,7 @@ mod tests {
IndexingOp::AddDoc { id } => {
let facet = Facet::from(&("/cola/".to_string() + &id.to_string()));
index_writer
.add_document(doc!(id_field=>id, multi_numbers=> id, multi_numbers => id, text_field => id.to_string(), facet_field => facet, large_text_field=> LOREM));
.add_document(doc!(id_field=>id, multi_numbers=> id, multi_numbers => id, text_field => id.to_string(), facet_field => facet, large_text_field=> LOREM))?;
}
IndexingOp::DeleteDoc { id } => {
index_writer.delete_term(Term::from_field_u64(id_field, id));
@@ -1679,11 +1675,11 @@ mod tests {
let mut index_writer = index.writer_for_tests()?;
// We add a doc...
index_writer.add_document(doc!(sort_by_field => 2u64, id_field => 0u64));
index_writer.add_document(doc!(sort_by_field => 2u64, id_field => 0u64))?;
// And remove it.
index_writer.delete_term(Term::from_field_u64(id_field, 0u64));
// We add another doc.
index_writer.add_document(doc!(sort_by_field=>1u64, id_field => 0u64));
index_writer.add_document(doc!(sort_by_field=>1u64, id_field => 0u64))?;
// The expected result is a segment with
// maxdoc = 2
@@ -1700,14 +1696,14 @@ mod tests {
}
#[test]
fn test_index_doc_missing_field() {
fn test_index_doc_missing_field() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let idfield = schema_builder.add_text_field("id", STRING);
schema_builder.add_text_field("optfield", STRING);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(idfield=>"myid"));
let commit = index_writer.commit();
assert!(commit.is_ok());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(idfield=>"myid"))?;
index_writer.commit()?;
Ok(())
}
}

View File

@@ -0,0 +1,118 @@
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, RwLock};
use super::AddBatchReceiver;
#[derive(Clone)]
pub(crate) struct IndexWriterStatus {
inner: Arc<Inner>,
}
impl IndexWriterStatus {
/// Returns true iff the index writer is alive.
pub fn is_alive(&self) -> bool {
self.inner.as_ref().is_alive()
}
/// Returns a copy of the operation receiver.
/// If the index writer was killed, returns None.
pub fn operation_receiver(&self) -> Option<AddBatchReceiver> {
let rlock = self
.inner
.receive_channel
.read()
.expect("This lock should never be poisoned");
rlock.as_ref().map(|receiver| receiver.clone())
}
/// Create an index writer bomb.
/// If dropped, the index writer status will be killed.
pub(crate) fn create_bomb(&self) -> IndexWriterBomb {
IndexWriterBomb {
inner: Some(self.inner.clone()),
}
}
}
struct Inner {
is_alive: AtomicBool,
receive_channel: RwLock<Option<AddBatchReceiver>>,
}
impl Inner {
fn is_alive(&self) -> bool {
self.is_alive.load(Ordering::Relaxed)
}
fn kill(&self) {
self.is_alive.store(false, Ordering::Relaxed);
self.receive_channel
.write()
.expect("This lock should never be poisoned")
.take();
}
}
impl From<AddBatchReceiver> for IndexWriterStatus {
fn from(receiver: AddBatchReceiver) -> Self {
IndexWriterStatus {
inner: Arc::new(Inner {
is_alive: AtomicBool::new(true),
receive_channel: RwLock::new(Some(receiver)),
}),
}
}
}
/// If dropped, the index writer will be killed.
/// To prevent this, clients can call `.defuse()`.
pub(crate) struct IndexWriterBomb {
inner: Option<Arc<Inner>>,
}
impl IndexWriterBomb {
/// Defuses the bomb.
///
/// This is the only way to drop the bomb without killing
/// the index writer.
pub fn defuse(mut self) {
self.inner = None;
}
}
impl Drop for IndexWriterBomb {
fn drop(&mut self) {
if let Some(inner) = self.inner.take() {
inner.kill();
}
}
}
#[cfg(test)]
mod tests {
use super::IndexWriterStatus;
use crossbeam::channel;
use std::mem;
#[test]
fn test_bomb_goes_boom() {
let (_tx, rx) = channel::bounded(10);
let index_writer_status: IndexWriterStatus = IndexWriterStatus::from(rx);
assert!(index_writer_status.operation_receiver().is_some());
let bomb = index_writer_status.create_bomb();
assert!(index_writer_status.operation_receiver().is_some());
mem::drop(bomb);
// boom!
assert!(index_writer_status.operation_receiver().is_none());
}
#[test]
fn test_bomb_defused() {
let (_tx, rx) = channel::bounded(10);
let index_writer_status: IndexWriterStatus = IndexWriterStatus::from(rx);
assert!(index_writer_status.operation_receiver().is_some());
let bomb = index_writer_status.create_bomb();
bomb.defuse();
assert!(index_writer_status.operation_receiver().is_some());
}
}

View File

@@ -114,7 +114,7 @@ mod tests {
use crate::Index;
#[test]
fn create_index_test_max_merge_issue_1035() {
fn create_index_test_max_merge_issue_1035() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let int_field = schema_builder.add_u64_field("intval", INDEXED);
let schema = schema_builder.build();
@@ -127,34 +127,34 @@ mod tests {
log_merge_policy.set_max_docs_before_merge(1);
log_merge_policy.set_min_layer_size(0);
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
index_writer.set_merge_policy(Box::new(log_merge_policy));
// after every commit the merge checker is started, it will merge only segments with 1
// element in it because of the max_merge_size.
index_writer.add_document(doc!(int_field=>1_u64));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(int_field=>1_u64))?;
index_writer.commit()?;
index_writer.add_document(doc!(int_field=>2_u64));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(int_field=>2_u64))?;
index_writer.commit()?;
index_writer.add_document(doc!(int_field=>3_u64));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(int_field=>3_u64))?;
index_writer.commit()?;
index_writer.add_document(doc!(int_field=>4_u64));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(int_field=>4_u64))?;
index_writer.commit()?;
index_writer.add_document(doc!(int_field=>5_u64));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(int_field=>5_u64))?;
index_writer.commit()?;
index_writer.add_document(doc!(int_field=>6_u64));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(int_field=>6_u64))?;
index_writer.commit()?;
index_writer.add_document(doc!(int_field=>7_u64));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(int_field=>7_u64))?;
index_writer.commit()?;
index_writer.add_document(doc!(int_field=>8_u64));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(int_field=>8_u64))?;
index_writer.commit()?;
}
let _segment_ids = index
@@ -169,6 +169,7 @@ mod tests {
panic!("segment can't have more than two segments");
} // don't know how to wait for the merge, then it could be a simple eq
}
Ok(())
}
fn test_merge_policy() -> LogMergePolicy {

View File

@@ -1162,18 +1162,17 @@ mod tests {
score_field => 3u64,
date_field => curr_time,
bytes_score_field => 3u32.to_be_bytes().as_ref()
));
))?;
index_writer.add_document(doc!(
text_field => "a b c",
score_field => 5u64,
bytes_score_field => 5u32.to_be_bytes().as_ref()
));
))?;
index_writer.add_document(doc!(
text_field => "a b c d",
score_field => 7u64,
bytes_score_field => 7u32.to_be_bytes().as_ref()
));
))?;
index_writer.commit()?;
// writing the segment
index_writer.add_document(doc!(
@@ -1181,12 +1180,12 @@ mod tests {
date_field => curr_time,
score_field => 11u64,
bytes_score_field => 11u32.to_be_bytes().as_ref()
));
))?;
index_writer.add_document(doc!(
text_field => "a b c g",
score_field => 13u64,
bytes_score_field => 13u32.to_be_bytes().as_ref()
));
))?;
index_writer.commit()?;
}
{
@@ -1320,18 +1319,18 @@ mod tests {
text_field => "a b d",
score_field => 1u64,
bytes_score_field => vec![0u8, 0, 0, 1],
));
))?;
index_writer.add_document(doc!(
text_field => "b c",
score_field => 2u64,
bytes_score_field => vec![0u8, 0, 0, 2],
));
))?;
index_writer.delete_term(Term::from_field_text(text_field, "c"));
index_writer.add_document(doc!(
text_field => "c d",
score_field => 3u64,
bytes_score_field => vec![0u8, 0, 0, 3],
));
))?;
index_writer.commit()?;
reader.reload()?;
let searcher = reader.searcher();
@@ -1361,24 +1360,24 @@ mod tests {
text_field => "a d e",
score_field => 4_000u64,
bytes_score_field => vec![0u8, 0, 0, 4],
));
))?;
index_writer.add_document(doc!(
text_field => "e f",
score_field => 5_000u64,
bytes_score_field => vec![0u8, 0, 0, 5],
));
))?;
index_writer.delete_term(Term::from_field_text(text_field, "a"));
index_writer.delete_term(Term::from_field_text(text_field, "f"));
index_writer.add_document(doc!(
text_field => "f g",
score_field => 6_000u64,
bytes_score_field => vec![0u8, 0, 23, 112],
));
))?;
index_writer.add_document(doc!(
text_field => "g h",
score_field => 7_000u64,
bytes_score_field => vec![0u8, 0, 27, 88],
));
))?;
index_writer.commit()?;
reader.reload()?;
let searcher = reader.searcher();
@@ -1674,7 +1673,7 @@ mod tests {
}
doc.add_u64(int_field, *int_val);
*int_val += 1;
index_writer.add_document(doc);
index_writer.add_document(doc).unwrap();
};
index_doc(
@@ -1787,70 +1786,69 @@ mod tests {
}
#[test]
fn test_bug_merge() {
fn test_bug_merge() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let int_field = schema_builder.add_u64_field("intvals", INDEXED);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(int_field => 1u64));
index_writer.add_document(doc!(int_field => 1u64))?;
index_writer.commit().expect("commit failed");
index_writer.add_document(doc!(int_field => 1u64));
index_writer.add_document(doc!(int_field => 1u64))?;
index_writer.commit().expect("commit failed");
let reader = index.reader().unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
assert_eq!(searcher.num_docs(), 2);
index_writer.delete_term(Term::from_field_u64(int_field, 1));
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
block_on(index_writer.merge(&segment_ids)).expect("Merging failed");
reader.reload().unwrap();
block_on(index_writer.merge(&segment_ids))?;
reader.reload()?;
// commit has not been called yet. The document should still be
// there.
assert_eq!(reader.searcher().num_docs(), 2);
Ok(())
}
#[test]
fn test_merge_multivalued_int_fields_all_deleted() {
fn test_merge_multivalued_int_fields_all_deleted() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let int_options = IntOptions::default()
.set_fast(Cardinality::MultiValues)
.set_indexed();
let int_field = schema_builder.add_u64_field("intvals", int_options);
let index = Index::create_in_ram(schema_builder.build());
let reader = index.reader().unwrap();
let reader = index.reader()?;
{
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
let mut doc = Document::default();
doc.add_u64(int_field, 1);
index_writer.add_document(doc.clone());
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc);
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc.clone())?;
index_writer.commit()?;
index_writer.add_document(doc)?;
index_writer.commit()?;
index_writer.delete_term(Term::from_field_u64(int_field, 1));
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
assert!(block_on(index_writer.merge(&segment_ids)).is_ok());
let segment_ids = index.searchable_segment_ids()?;
block_on(index_writer.merge(&segment_ids))?;
// assert delete has not been committed
assert!(reader.reload().is_ok());
reader.reload()?;
let searcher = reader.searcher();
assert_eq!(searcher.num_docs(), 2);
index_writer.commit().unwrap();
index_writer.commit()?;
index_writer.wait_merging_threads().unwrap();
index_writer.wait_merging_threads()?;
}
reader.reload().unwrap();
reader.reload()?;
let searcher = reader.searcher();
assert_eq!(searcher.num_docs(), 0);
Ok(())
}
#[test]
fn test_merge_multivalued_int_fields_simple() {
fn test_merge_multivalued_int_fields_simple() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let int_options = IntOptions::default()
.set_fast(Cardinality::MultiValues)
@@ -1859,13 +1857,13 @@ mod tests {
let index = Index::create_in_ram(schema_builder.build());
{
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
let index_doc = |index_writer: &mut IndexWriter, int_vals: &[u64]| {
let mut doc = Document::default();
for &val in int_vals {
doc.add_u64(int_field, val);
}
index_writer.add_document(doc);
index_writer.add_document(doc).unwrap();
};
index_doc(&mut index_writer, &[1, 2]);
index_doc(&mut index_writer, &[1, 2, 3]);
@@ -1881,7 +1879,7 @@ mod tests {
index_doc(&mut index_writer, &[1_000]);
assert!(index_writer.commit().is_ok());
}
let reader = index.reader().unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
let mut vals: Vec<u64> = Vec::new();
@@ -1930,14 +1928,12 @@ mod tests {
// Merging the segments
{
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
let mut index_writer = index.writer_for_tests().unwrap();
assert!(block_on(index_writer.merge(&segment_ids)).is_ok());
assert!(index_writer.wait_merging_threads().is_ok());
let segment_ids = index.searchable_segment_ids()?;
let mut index_writer = index.writer_for_tests()?;
block_on(index_writer.merge(&segment_ids))?;
index_writer.wait_merging_threads()?;
}
assert!(reader.reload().is_ok());
reader.reload()?;
{
let searcher = reader.searcher();
@@ -1974,6 +1970,7 @@ mod tests {
ff_reader.get_vals(9, &mut vals);
assert_eq!(&vals, &[20]);
}
Ok(())
}
#[test]
@@ -1999,7 +1996,7 @@ mod tests {
doc.add_f64(field, 42.0);
doc.add_f64(multi_field, 0.24);
doc.add_f64(multi_field, 0.27);
writer.add_document(doc);
writer.add_document(doc)?;
if i % 5 == 0 {
writer.commit()?;
}
@@ -2023,7 +2020,7 @@ mod tests {
let happy_term = Term::from_field_text(text, "happy");
let term_query = TermQuery::new(happy_term, IndexRecordOption::WithFreqs);
for _ in 0..62 {
writer.add_document(doc!(text=>"hello happy tax payer"));
writer.add_document(doc!(text=>"hello happy tax payer"))?;
}
writer.commit()?;
let reader = index.reader()?;
@@ -2035,7 +2032,7 @@ mod tests {
assert_nearly_equals!(term_scorer.block_max_score(), 0.0079681855);
assert_nearly_equals!(term_scorer.score(), 0.0079681855);
for _ in 0..81 {
writer.add_document(doc!(text=>"hello happy tax payer"));
writer.add_document(doc!(text=>"hello happy tax payer"))?;
}
writer.commit()?;
reader.reload()?;

View File

@@ -39,14 +39,17 @@ mod tests {
{
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(int_field=>3_u64, facet_field=> Facet::from("/crime")));
index_writer.add_document(doc!(int_field=>6_u64, facet_field=> Facet::from("/crime")));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(int_field=>5_u64, facet_field=> Facet::from("/fanta")));
assert!(index_writer.commit().is_ok());
index_writer
.add_document(doc!(int_field=>3_u64, facet_field=> Facet::from("/crime")))
.unwrap();
index_writer
.add_document(doc!(int_field=>6_u64, facet_field=> Facet::from("/crime")))
.unwrap();
index_writer.commit().unwrap();
index_writer
.add_document(doc!(int_field=>5_u64, facet_field=> Facet::from("/fanta")))
.unwrap();
index_writer.commit().unwrap();
}
// Merging the segments
@@ -66,7 +69,7 @@ mod tests {
fn create_test_index(
index_settings: Option<IndexSettings>,
force_disjunct_segment_sort_values: bool,
) -> Index {
) -> crate::Result<Index> {
let mut schema_builder = schema::Schema::builder();
let int_options = IntOptions::default()
.set_fast(Cardinality::SingleValue)
@@ -95,34 +98,34 @@ mod tests {
if let Some(settings) = index_settings {
index_builder = index_builder.settings(settings);
}
let index = index_builder.create_in_ram().unwrap();
let index = index_builder.create_in_ram()?;
{
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
// segment 1 - range 1-3
index_writer.add_document(doc!(int_field=>1_u64));
index_writer.add_document(doc!(int_field=>1_u64))?;
index_writer.add_document(
doc!(int_field=>3_u64, multi_numbers => 3_u64, multi_numbers => 4_u64, bytes_field => vec![1, 2, 3], text_field => "some text", facet_field=> Facet::from("/book/crime")),
);
)?;
index_writer.add_document(
doc!(int_field=>1_u64, text_field=> "deleteme", text_field => "ok text more text"),
);
)?;
index_writer.add_document(
doc!(int_field=>2_u64, multi_numbers => 2_u64, multi_numbers => 3_u64, text_field => "ok text more text"),
);
)?;
assert!(index_writer.commit().is_ok());
index_writer.commit()?;
// segment 2 - range 1-20 , with force_disjunct_segment_sort_values 10-20
index_writer.add_document(doc!(int_field=>20_u64, multi_numbers => 20_u64));
index_writer.add_document(doc!(int_field=>20_u64, multi_numbers => 20_u64))?;
let in_val = if force_disjunct_segment_sort_values {
10_u64
} else {
1
};
index_writer.add_document(doc!(int_field=>in_val, text_field=> "deleteme" , text_field => "ok text more text", facet_field=> Facet::from("/book/crime")));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(int_field=>in_val, text_field=> "deleteme" , text_field => "ok text more text", facet_field=> Facet::from("/book/crime")))?;
index_writer.commit()?;
// segment 3 - range 5-1000, with force_disjunct_segment_sort_values 50-1000
let int_vals = if force_disjunct_segment_sort_values {
[100_u64, 50]
@@ -131,26 +134,24 @@ mod tests {
};
index_writer.add_document( // position of this doc after delete in desc sorting = [2], in disjunct case [1]
doc!(int_field=>int_vals[0], multi_numbers => 10_u64, multi_numbers => 11_u64, text_field=> "blubber", facet_field=> Facet::from("/book/fantasy")),
);
index_writer.add_document(doc!(int_field=>int_vals[1], text_field=> "deleteme"));
)?;
index_writer.add_document(doc!(int_field=>int_vals[1], text_field=> "deleteme"))?;
index_writer.add_document(
doc!(int_field=>1_000u64, multi_numbers => 1001_u64, multi_numbers => 1002_u64, bytes_field => vec![5, 5],text_field => "the biggest num")
);
)?;
index_writer.delete_term(Term::from_field_text(text_field, "deleteme"));
assert!(index_writer.commit().is_ok());
index_writer.commit()?;
}
// Merging the segments
{
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
let mut index_writer = index.writer_for_tests().unwrap();
assert!(block_on(index_writer.merge(&segment_ids)).is_ok());
assert!(index_writer.wait_merging_threads().is_ok());
let segment_ids = index.searchable_segment_ids()?;
let mut index_writer = index.writer_for_tests()?;
block_on(index_writer.merge(&segment_ids))?;
index_writer.wait_merging_threads()?;
}
index
Ok(index)
}
#[test]
@@ -183,7 +184,8 @@ mod tests {
..Default::default()
}),
force_disjunct_segment_sort_values,
);
)
.unwrap();
let int_field = index.schema().get_field("intval").unwrap();
let reader = index.reader().unwrap();
@@ -300,7 +302,8 @@ mod tests {
..Default::default()
}),
false,
);
)
.unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
@@ -367,7 +370,8 @@ mod tests {
..Default::default()
}),
false,
);
)
.unwrap();
let int_field = index.schema().get_field("intval").unwrap();
let multi_numbers = index.schema().get_field("multi_numbers").unwrap();

View File

@@ -4,6 +4,7 @@ pub mod demuxer;
pub mod doc_id_mapping;
mod doc_opstamp_mapping;
pub mod index_writer;
mod index_writer_status;
mod log_merge_policy;
mod merge_operation;
pub mod merge_policy;
@@ -19,6 +20,11 @@ pub mod segment_updater;
mod segment_writer;
mod stamper;
use crossbeam::channel;
use smallvec::SmallVec;
use crate::indexer::operation::AddOperation;
pub use self::index_writer::IndexWriter;
pub use self::log_merge_policy::LogMergePolicy;
pub use self::merge_operation::MergeOperation;
@@ -34,6 +40,16 @@ pub use self::segment_writer::SegmentWriter;
/// Alias for the default merge policy, which is the `LogMergePolicy`.
pub type DefaultMergePolicy = LogMergePolicy;
// Batch of documents.
// Most of the time, users will send operation one-by-one, but it can be useful to
// send them as a small block to ensure that
// - all docs in the operation will happen on the same segment and continuous doc_ids.
// - all operations in the group are committed at the same time, making the group
// atomic.
type AddBatch = SmallVec<[AddOperation; 4]>;
type AddBatchSender = channel::Sender<AddBatch>;
type AddBatchReceiver = channel::Receiver<AddBatch>;
#[cfg(feature = "mmap")]
#[cfg(test)]
mod tests_mmap {
@@ -41,19 +57,20 @@ mod tests_mmap {
use crate::{Index, Term};
#[test]
fn test_advance_delete_bug() {
fn test_advance_delete_bug() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", schema::TEXT);
let index = Index::create_from_tempdir(schema_builder.build()).unwrap();
let mut index_writer = index.writer_for_tests().unwrap();
let index = Index::create_from_tempdir(schema_builder.build())?;
let mut index_writer = index.writer_for_tests()?;
// there must be one deleted document in the segment
index_writer.add_document(doc!(text_field=>"b"));
index_writer.add_document(doc!(text_field=>"b"))?;
index_writer.delete_term(Term::from_field_text(text_field, "b"));
// we need enough data to trigger the bug (at least 32 documents)
for _ in 0..32 {
index_writer.add_document(doc!(text_field=>"c"));
index_writer.add_document(doc!(text_field=>"c"))?;
}
index_writer.commit().unwrap();
index_writer.commit().unwrap();
index_writer.commit()?;
index_writer.commit()?;
Ok(())
}
}

View File

@@ -32,11 +32,11 @@ impl<'a> PreparedCommit<'a> {
pub(crate) fn commit(self) -> crate::Result<Opstamp> {
info!("committing {}", self.opstamp);
let _ = block_on(
block_on(
self.index_writer
.segment_updater()
.schedule_commit(self.opstamp, self.payload),
);
)?;
Ok(self.opstamp)
}
}

View File

@@ -20,12 +20,15 @@ use crate::indexer::{DefaultMergePolicy, MergePolicy};
use crate::indexer::{MergeCandidate, MergeOperation};
use crate::schema::Schema;
use crate::Opstamp;
use crate::TantivyError;
use fail::fail_point;
use futures::channel::oneshot;
use futures::executor::{ThreadPool, ThreadPoolBuilder};
use futures::future::Future;
use futures::future::TryFutureExt;
use std::borrow::BorrowMut;
use std::collections::HashSet;
use std::io;
use std::io::Write;
use std::ops::Deref;
use std::path::PathBuf;
@@ -75,6 +78,10 @@ fn save_metas(metas: &IndexMeta, directory: &dyn Directory) -> crate::Result<()>
let mut buffer = serde_json::to_vec_pretty(metas)?;
// Just adding a new line at the end of the buffer.
writeln!(&mut buffer)?;
fail_point!("save_metas", |msg| Err(TantivyError::from(io::Error::new(
io::ErrorKind::Other,
msg.unwrap_or_else(|| "Undefined".to_string())
))));
directory.atomic_write(&META_FILEPATH, &buffer[..])?;
debug!("Saved metas {:?}", serde_json::to_string_pretty(&metas));
Ok(())
@@ -706,19 +713,19 @@ mod tests {
index_writer.set_merge_policy(Box::new(MergeWheneverPossible));
for _ in 0..100 {
index_writer.add_document(doc!(text_field=>"a"));
index_writer.add_document(doc!(text_field=>"b"));
index_writer.add_document(doc!(text_field=>"a"))?;
index_writer.add_document(doc!(text_field=>"b"))?;
}
index_writer.commit()?;
for _ in 0..100 {
index_writer.add_document(doc!(text_field=>"c"));
index_writer.add_document(doc!(text_field=>"d"));
index_writer.add_document(doc!(text_field=>"c"))?;
index_writer.add_document(doc!(text_field=>"d"))?;
}
index_writer.commit()?;
index_writer.add_document(doc!(text_field=>"e"));
index_writer.add_document(doc!(text_field=>"f"));
index_writer.add_document(doc!(text_field=>"e"))?;
index_writer.add_document(doc!(text_field=>"f"))?;
index_writer.commit()?;
let term = Term::from_field_text(text_field, "a");
@@ -746,8 +753,8 @@ mod tests {
let mut index_writer = index.writer_for_tests()?;
for _ in 0..10 {
index_writer.add_document(doc!(text_field=>"a"));
index_writer.add_document(doc!(text_field=>"b"));
index_writer.add_document(doc!(text_field=>"a"))?;
index_writer.add_document(doc!(text_field=>"b"))?;
}
index_writer.commit()?;
@@ -790,19 +797,19 @@ mod tests {
let mut index_writer = index.writer_for_tests()?;
for _ in 0..100 {
index_writer.add_document(doc!(text_field=>"a"));
index_writer.add_document(doc!(text_field=>"b"));
index_writer.add_document(doc!(text_field=>"a"))?;
index_writer.add_document(doc!(text_field=>"b"))?;
}
index_writer.commit()?;
for _ in 0..100 {
index_writer.add_document(doc!(text_field=>"c"));
index_writer.add_document(doc!(text_field=>"d"));
index_writer.add_document(doc!(text_field=>"c"))?;
index_writer.add_document(doc!(text_field=>"d"))?;
}
index_writer.commit()?;
index_writer.add_document(doc!(text_field=>"e"));
index_writer.add_document(doc!(text_field=>"f"));
index_writer.add_document(doc!(text_field=>"e"))?;
index_writer.add_document(doc!(text_field=>"f"))?;
index_writer.commit()?;
let seg_ids = index.searchable_segment_ids()?;
@@ -842,8 +849,8 @@ mod tests {
// writing the segment
let mut index_writer = index.writer_for_tests()?;
for _ in 0..100 {
index_writer.add_document(doc!(text_field=>"a"));
index_writer.add_document(doc!(text_field=>"b"));
index_writer.add_document(doc!(text_field=>"a"))?;
index_writer.add_document(doc!(text_field=>"b"))?;
}
index_writer.commit()?;
@@ -869,14 +876,14 @@ mod tests {
// writing two segments
let mut index_writer = index.writer_for_tests()?;
for _ in 0..100 {
index_writer.add_document(doc!(text_field=>"fizz"));
index_writer.add_document(doc!(text_field=>"buzz"));
index_writer.add_document(doc!(text_field=>"fizz"))?;
index_writer.add_document(doc!(text_field=>"buzz"))?;
}
index_writer.commit()?;
for _ in 0..1000 {
index_writer.add_document(doc!(text_field=>"foo"));
index_writer.add_document(doc!(text_field=>"bar"));
index_writer.add_document(doc!(text_field=>"foo"))?;
index_writer.add_document(doc!(text_field=>"bar"))?;
}
index_writer.commit()?;
indices.push(index);
@@ -909,7 +916,7 @@ mod tests {
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"some text"));
index_writer.add_document(doc!(text_field=>"some text"))?;
index_writer.commit()?;
index
};
@@ -919,7 +926,7 @@ mod tests {
let body_field = schema_builder.add_text_field("body", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(body_field=>"some body"));
index_writer.add_document(doc!(body_field=>"some body"))?;
index_writer.commit()?;
index
};
@@ -938,8 +945,8 @@ mod tests {
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"some text 1"));
index_writer.add_document(doc!(text_field=>"some text 2"));
index_writer.add_document(doc!(text_field=>"some text 1"))?;
index_writer.add_document(doc!(text_field=>"some text 2"))?;
index_writer.commit()?;
index
};
@@ -949,8 +956,8 @@ mod tests {
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"some text 3"));
index_writer.add_document(doc!(text_field=>"some text 4"));
index_writer.add_document(doc!(text_field=>"some text 3"))?;
index_writer.add_document(doc!(text_field=>"some text 4"))?;
index_writer.delete_term(Term::from_field_text(text_field, "4"));
index_writer.commit()?;
@@ -992,11 +999,11 @@ mod tests {
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"test text"));
index_writer.add_document(doc!(text_field=>"some text 2"));
index_writer.add_document(doc!(text_field=>"test text"))?;
index_writer.add_document(doc!(text_field=>"some text 2"))?;
index_writer.add_document(doc!(text_field=>"some text 3"));
index_writer.add_document(doc!(text_field=>"some text 4"));
index_writer.add_document(doc!(text_field=>"some text 3"))?;
index_writer.add_document(doc!(text_field=>"some text 4"))?;
index_writer.delete_term(Term::from_field_text(text_field, "4"));
@@ -1027,7 +1034,7 @@ mod tests {
assert_eq!(segment_metas.num_deleted_docs(), 0);
assert_eq!(segment_metas.num_docs(), 2);
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
{
let text_field = index.schema().get_field("text").unwrap();
@@ -1055,11 +1062,11 @@ mod tests {
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"some text 1"));
index_writer.add_document(doc!(text_field=>"some text 2"));
index_writer.add_document(doc!(text_field=>"some text 1"))?;
index_writer.add_document(doc!(text_field=>"some text 2"))?;
index_writer.add_document(doc!(text_field=>"some text 3"));
index_writer.add_document(doc!(text_field=>"some text 4"));
index_writer.add_document(doc!(text_field=>"some text 3"))?;
index_writer.add_document(doc!(text_field=>"some text 4"))?;
index_writer.delete_term(Term::from_field_text(text_field, "4"));

View File

@@ -63,7 +63,7 @@
//! body => "He was an old man who fished alone in a skiff in \
//! the Gulf Stream and he had gone eighty-four days \
//! now without taking a fish."
//! ));
//! ))?;
//!
//! // We need to call .commit() explicitly to force the
//! // index_writer to finish processing the documents in the queue,
@@ -379,24 +379,22 @@ pub mod tests {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_from_tempdir(schema).unwrap();
let index = Index::create_from_tempdir(schema)?;
// writing the segment
let mut index_writer = index.writer_for_tests()?;
{
// writing the segment
let mut index_writer = index.writer_for_tests()?;
{
let doc = doc!(text_field=>"af b");
index_writer.add_document(doc);
}
{
let doc = doc!(text_field=>"a b c");
index_writer.add_document(doc);
}
{
let doc = doc!(text_field=>"a b c d");
index_writer.add_document(doc);
}
assert!(index_writer.commit().is_ok());
let doc = doc!(text_field=>"af b");
index_writer.add_document(doc)?;
}
{
let doc = doc!(text_field=>"a b c");
index_writer.add_document(doc)?;
}
{
let doc = doc!(text_field=>"a b c d");
index_writer.add_document(doc)?;
}
index_writer.commit()?;
Ok(())
}
@@ -406,12 +404,12 @@ pub mod tests {
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"a b c"));
index_writer.add_document(doc!(text_field=>"a b c"))?;
index_writer.commit()?;
index_writer.add_document(doc!(text_field=>"a"));
index_writer.add_document(doc!(text_field=>"a a"));
index_writer.add_document(doc!(text_field=>"a"))?;
index_writer.add_document(doc!(text_field=>"a a"))?;
index_writer.commit()?;
index_writer.add_document(doc!(text_field=>"c"));
index_writer.add_document(doc!(text_field=>"c"))?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
@@ -433,7 +431,7 @@ pub mod tests {
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"a b c"));
index_writer.add_document(doc!(text_field=>"a b c"))?;
index_writer.commit()?;
let index_reader = index.reader()?;
let searcher = index_reader.searcher();
@@ -455,9 +453,9 @@ pub mod tests {
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"a b c"));
index_writer.add_document(doc!());
index_writer.add_document(doc!(text_field=>"a b"));
index_writer.add_document(doc!(text_field=>"a b c"))?;
index_writer.add_document(doc!())?;
index_writer.add_document(doc!(text_field=>"a b"))?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
@@ -499,20 +497,20 @@ pub mod tests {
// writing the segment
let mut index_writer = index.writer_for_tests()?;
// 0
index_writer.add_document(doc!(text_field=>"a b"));
index_writer.add_document(doc!(text_field=>"a b"))?;
// 1
index_writer.add_document(doc!(text_field=>" a c"));
index_writer.add_document(doc!(text_field=>" a c"))?;
// 2
index_writer.add_document(doc!(text_field=>" b c"));
index_writer.add_document(doc!(text_field=>" b c"))?;
// 3
index_writer.add_document(doc!(text_field=>" b d"));
index_writer.add_document(doc!(text_field=>" b d"))?;
index_writer.delete_term(Term::from_field_text(text_field, "c"));
index_writer.delete_term(Term::from_field_text(text_field, "a"));
// 4
index_writer.add_document(doc!(text_field=>" b c"));
index_writer.add_document(doc!(text_field=>" b c"))?;
// 5
index_writer.add_document(doc!(text_field=>" a"));
index_writer.add_document(doc!(text_field=>" a"))?;
index_writer.commit()?;
}
{
@@ -546,7 +544,7 @@ pub mod tests {
// writing the segment
let mut index_writer = index.writer_for_tests()?;
// 0
index_writer.add_document(doc!(text_field=>"a b"));
index_writer.add_document(doc!(text_field=>"a b"))?;
// 1
index_writer.delete_term(Term::from_field_text(text_field, "c"));
index_writer.rollback()?;
@@ -582,7 +580,7 @@ pub mod tests {
{
// writing the segment
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"a b"));
index_writer.add_document(doc!(text_field=>"a b"))?;
index_writer.delete_term(Term::from_field_text(text_field, "c"));
index_writer.rollback()?;
index_writer.delete_term(Term::from_field_text(text_field, "a"));
@@ -632,7 +630,7 @@ pub mod tests {
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(field=>1u64));
index_writer.add_document(doc!(field=>1u64))?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
@@ -656,7 +654,7 @@ pub mod tests {
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
let negative_val = -1i64;
index_writer.add_document(doc!(value_field => negative_val));
index_writer.add_document(doc!(value_field => negative_val))?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
@@ -680,7 +678,7 @@ pub mod tests {
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
let val = std::f64::consts::PI;
index_writer.add_document(doc!(value_field => val));
index_writer.add_document(doc!(value_field => val))?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
@@ -703,7 +701,7 @@ pub mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"a"));
index_writer.add_document(doc!(text_field=>"a"))?;
assert!(index_writer.commit().is_ok());
let reader = index.reader()?;
let searcher = reader.searcher();
@@ -726,14 +724,14 @@ pub mod tests {
// writing the segment
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"63"));
index_writer.add_document(doc!(text_field=>"70"));
index_writer.add_document(doc!(text_field=>"34"));
index_writer.add_document(doc!(text_field=>"1"));
index_writer.add_document(doc!(text_field=>"38"));
index_writer.add_document(doc!(text_field=>"33"));
index_writer.add_document(doc!(text_field=>"40"));
index_writer.add_document(doc!(text_field=>"17"));
index_writer.add_document(doc!(text_field=>"63"))?;
index_writer.add_document(doc!(text_field=>"70"))?;
index_writer.add_document(doc!(text_field=>"34"))?;
index_writer.add_document(doc!(text_field=>"1"))?;
index_writer.add_document(doc!(text_field=>"38"))?;
index_writer.add_document(doc!(text_field=>"33"))?;
index_writer.add_document(doc!(text_field=>"40"))?;
index_writer.add_document(doc!(text_field=>"17"))?;
index_writer.delete_term(Term::from_field_text(text_field, "38"));
index_writer.delete_term(Term::from_field_text(text_field, "34"));
index_writer.commit()?;
@@ -751,7 +749,7 @@ pub mod tests {
{
// writing the segment
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"af af af bc bc"));
index_writer.add_document(doc!(text_field=>"af af af bc bc"))?;
index_writer.commit()?;
}
{
@@ -783,9 +781,9 @@ pub mod tests {
let reader = index.reader()?;
// writing the segment
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"af af af b"));
index_writer.add_document(doc!(text_field=>"a b c"));
index_writer.add_document(doc!(text_field=>"a b c d"));
index_writer.add_document(doc!(text_field=>"af af af b"))?;
index_writer.add_document(doc!(text_field=>"a b c"))?;
index_writer.add_document(doc!(text_field=>"a b c d"))?;
index_writer.commit()?;
reader.reload()?;
@@ -847,9 +845,9 @@ pub mod tests {
assert_eq!(reader.searcher().num_docs(), 0u64);
// writing the segment
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"af b"));
index_writer.add_document(doc!(text_field=>"a b c"));
index_writer.add_document(doc!(text_field=>"a b c d"));
index_writer.add_document(doc!(text_field=>"af b"))?;
index_writer.add_document(doc!(text_field=>"a b c"))?;
index_writer.add_document(doc!(text_field=>"a b c d"))?;
index_writer.commit()?;
reader.reload()?;
assert_eq!(reader.searcher().num_docs(), 3u64);
@@ -889,7 +887,7 @@ pub mod tests {
{
let document =
doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
index_writer.add_document(document);
index_writer.add_document(document)?;
index_writer.commit()?;
}
let reader = index.reader()?;
@@ -956,7 +954,7 @@ pub mod tests {
index_writer.set_merge_policy(Box::new(NoMergePolicy));
for doc_id in 0u64..DOC_COUNT {
index_writer.add_document(doc!(id => doc_id));
index_writer.add_document(doc!(id => doc_id))?;
}
index_writer.commit()?;
@@ -973,7 +971,7 @@ pub mod tests {
index_writer.delete_term(Term::from_field_u64(id, doc_id));
index_writer.commit()?;
index_reader.reload()?;
index_writer.add_document(doc!(id => doc_id));
index_writer.add_document(doc!(id => doc_id))?;
index_writer.commit()?;
index_reader.reload()?;
let searcher = index_reader.searcher();
@@ -1008,8 +1006,8 @@ pub mod tests {
let index = Index::create_in_dir(&index_path, schema)?;
let mut writer = index.writer(50_000_000)?;
for _ in 0..5000 {
writer.add_document(doc!(body => "foo"));
writer.add_document(doc!(body => "boo"));
writer.add_document(doc!(body => "foo"))?;
writer.add_document(doc!(body => "boo"))?;
}
writer.commit()?;
assert!(index.validate_checksum()?.is_empty());

View File

@@ -393,8 +393,8 @@ mod tests {
}
#[test]
fn test_block_segment_postings() {
let mut block_segments = build_block_postings(&(0..100_000).collect::<Vec<u32>>());
fn test_block_segment_postings() -> crate::Result<()> {
let mut block_segments = build_block_postings(&(0..100_000).collect::<Vec<u32>>())?;
let mut offset: u32 = 0u32;
// checking that the `doc_freq` is correct
assert_eq!(block_segments.doc_freq(), 100_000);
@@ -409,16 +409,17 @@ mod tests {
offset += block.len() as u32;
block_segments.advance();
}
Ok(())
}
#[test]
fn test_skip_right_at_new_block() {
fn test_skip_right_at_new_block() -> crate::Result<()> {
let mut doc_ids = (0..128).collect::<Vec<u32>>();
// 128 is missing
doc_ids.push(129);
doc_ids.push(130);
{
let block_segments = build_block_postings(&doc_ids);
let block_segments = build_block_postings(&doc_ids)?;
let mut docset = SegmentPostings::from_block_postings(block_segments, None);
assert_eq!(docset.seek(128), 129);
assert_eq!(docset.doc(), 129);
@@ -427,7 +428,7 @@ mod tests {
assert_eq!(docset.advance(), TERMINATED);
}
{
let block_segments = build_block_postings(&doc_ids);
let block_segments = build_block_postings(&doc_ids).unwrap();
let mut docset = SegmentPostings::from_block_postings(block_segments, None);
assert_eq!(docset.seek(129), 129);
assert_eq!(docset.doc(), 129);
@@ -436,46 +437,47 @@ mod tests {
assert_eq!(docset.advance(), TERMINATED);
}
{
let block_segments = build_block_postings(&doc_ids);
let block_segments = build_block_postings(&doc_ids)?;
let mut docset = SegmentPostings::from_block_postings(block_segments, None);
assert_eq!(docset.doc(), 0);
assert_eq!(docset.seek(131), TERMINATED);
assert_eq!(docset.doc(), TERMINATED);
}
Ok(())
}
fn build_block_postings(docs: &[DocId]) -> BlockSegmentPostings {
fn build_block_postings(docs: &[DocId]) -> crate::Result<BlockSegmentPostings> {
let mut schema_builder = Schema::builder();
let int_field = schema_builder.add_u64_field("id", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
let mut last_doc = 0u32;
for &doc in docs {
for _ in last_doc..doc {
index_writer.add_document(doc!(int_field=>1u64));
index_writer.add_document(doc!(int_field=>1u64))?;
}
index_writer.add_document(doc!(int_field=>0u64));
index_writer.add_document(doc!(int_field=>0u64))?;
last_doc = doc + 1;
}
index_writer.commit().unwrap();
let searcher = index.reader().unwrap().searcher();
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let segment_reader = searcher.segment_reader(0);
let inverted_index = segment_reader.inverted_index(int_field).unwrap();
let term = Term::from_field_u64(int_field, 0u64);
let term_info = inverted_index.get_term_info(&term).unwrap().unwrap();
inverted_index
.read_block_postings_from_terminfo(&term_info, IndexRecordOption::Basic)
.unwrap()
let term_info = inverted_index.get_term_info(&term)?.unwrap();
let block_postings = inverted_index
.read_block_postings_from_terminfo(&term_info, IndexRecordOption::Basic)?;
Ok(block_postings)
}
#[test]
fn test_block_segment_postings_seek() {
fn test_block_segment_postings_seek() -> crate::Result<()> {
let mut docs = vec![0];
for i in 0..1300 {
docs.push((i * i / 100) + i);
}
let mut block_postings = build_block_postings(&docs[..]);
let mut block_postings = build_block_postings(&docs[..])?;
for i in &[0, 424, 10000] {
block_postings.seek(*i);
let docs = block_postings.docs();
@@ -484,6 +486,7 @@ mod tests {
}
block_postings.seek(100_000);
assert_eq!(block_postings.doc(COMPRESSION_BLOCK_SIZE - 1), TERMINATED);
Ok(())
}
#[test]
@@ -497,7 +500,7 @@ mod tests {
// the other containing odd numbers.
for i in 0..6 {
let doc = doc!(int_field=> (i % 2) as u64);
index_writer.add_document(doc);
index_writer.add_document(doc)?;
}
index_writer.commit()?;
let searcher = index.reader()?.searcher();

View File

@@ -87,12 +87,12 @@ pub mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(title => r#"abc abc abc"#));
index_writer.add_document(doc!(title => r#"abc be be be be abc"#));
index_writer.add_document(doc!(title => r#"abc abc abc"#))?;
index_writer.add_document(doc!(title => r#"abc be be be be abc"#))?;
for _ in 0..1_000 {
index_writer.add_document(doc!(title => r#"abc abc abc"#));
index_writer.add_document(doc!(title => r#"abc abc abc"#))?;
}
index_writer.add_document(doc!(title => r#"abc be be be be abc"#));
index_writer.add_document(doc!(title => r#"abc be be be be abc"#))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
@@ -174,9 +174,9 @@ pub mod tests {
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.set_merge_policy(Box::new(NoMergePolicy));
{
index_writer.add_document(doc!(text_field=>exceeding_token_text));
index_writer.commit().unwrap();
reader.reload().unwrap();
index_writer.add_document(doc!(text_field=>exceeding_token_text))?;
index_writer.commit()?;
reader.reload()?;
let searcher = reader.searcher();
let segment_reader = searcher.segment_reader(0u32);
let inverted_index = segment_reader.inverted_index(text_field)?;
@@ -186,9 +186,9 @@ pub mod tests {
assert_eq!(&bytes, b"hello");
}
{
index_writer.add_document(doc!(text_field=>ok_token_text.clone()));
index_writer.commit().unwrap();
reader.reload().unwrap();
index_writer.add_document(doc!(text_field=>ok_token_text.clone()))?;
index_writer.commit()?;
reader.reload()?;
let searcher = reader.searcher();
let segment_reader = searcher.segment_reader(1u32);
let inverted_index = segment_reader.inverted_index(text_field)?;
@@ -315,13 +315,13 @@ pub mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field => "g b b d c g c"));
index_writer.add_document(doc!(text_field => "g a b b a d c g c"));
assert!(index_writer.commit().is_ok());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field => "g b b d c g c"))?;
index_writer.add_document(doc!(text_field => "g a b b a d c g c"))?;
index_writer.commit()?;
}
let term_a = Term::from_field_text(text_field, "a");
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let segment_reader = searcher.segment_reader(0);
let mut postings = segment_reader
.inverted_index(text_field)?
@@ -350,7 +350,7 @@ pub mod tests {
let mut index_writer = index.writer_for_tests()?;
for i in 0u64..num_docs as u64 {
let doc = doc!(value_field => 2u64, value_field => i % 2u64);
index_writer.add_document(doc);
index_writer.add_document(doc)?;
}
assert!(index_writer.commit().is_ok());
}

View File

@@ -13,6 +13,7 @@ use crate::termdict::{TermDictionaryBuilder, TermOrdinal};
use crate::{DocId, Score};
use common::CountingWriter;
use common::{BinarySerializable, VInt};
use fail::fail_point;
use std::cmp::Ordering;
use std::io::{self, Write};
@@ -212,6 +213,9 @@ impl<'a> FieldSerializer<'a> {
/// If the current block is incomplete, it need to be encoded
/// using `VInt` encoding.
pub fn close_term(&mut self) -> io::Result<()> {
fail_point!("FieldSerializer::close_term", |msg: Option<String>| {
Err(io::Error::new(io::ErrorKind::Other, format!("{:?}", msg)))
});
if self.term_open {
self.postings_serializer
.close_term(self.current_term_info.doc_freq)?;

View File

@@ -78,29 +78,29 @@ mod tests {
use crate::schema::{Schema, TEXT};
use crate::Index;
fn create_test_index() -> Index {
fn create_test_index() -> crate::Result<Index> {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(field=>"aaa"));
index_writer.add_document(doc!(field=>"bbb"));
index_writer.commit().unwrap();
index_writer.add_document(doc!(field=>"ccc"));
index_writer.commit().unwrap();
index
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(field=>"aaa"))?;
index_writer.add_document(doc!(field=>"bbb"))?;
index_writer.commit()?;
index_writer.add_document(doc!(field=>"ccc"))?;
index_writer.commit()?;
Ok(index)
}
#[test]
fn test_all_query() {
let index = create_test_index();
let reader = index.reader().unwrap();
fn test_all_query() -> crate::Result<()> {
let index = create_test_index()?;
let reader = index.reader()?;
let searcher = reader.searcher();
let weight = AllQuery.weight(&searcher, false).unwrap();
let weight = AllQuery.weight(&searcher, false)?;
{
let reader = searcher.segment_reader(0);
let mut scorer = weight.scorer(reader, 1.0).unwrap();
let mut scorer = weight.scorer(reader, 1.0)?;
assert_eq!(scorer.doc(), 0u32);
assert_eq!(scorer.advance(), 1u32);
assert_eq!(scorer.doc(), 1u32);
@@ -108,28 +108,30 @@ mod tests {
}
{
let reader = searcher.segment_reader(1);
let mut scorer = weight.scorer(reader, 1.0).unwrap();
let mut scorer = weight.scorer(reader, 1.0)?;
assert_eq!(scorer.doc(), 0u32);
assert_eq!(scorer.advance(), TERMINATED);
}
Ok(())
}
#[test]
fn test_all_query_with_boost() {
let index = create_test_index();
let reader = index.reader().unwrap();
fn test_all_query_with_boost() -> crate::Result<()> {
let index = create_test_index()?;
let reader = index.reader()?;
let searcher = reader.searcher();
let weight = AllQuery.weight(&searcher, false).unwrap();
let weight = AllQuery.weight(&searcher, false)?;
let reader = searcher.segment_reader(0);
{
let mut scorer = weight.scorer(reader, 2.0).unwrap();
let mut scorer = weight.scorer(reader, 2.0)?;
assert_eq!(scorer.doc(), 0u32);
assert_eq!(scorer.score(), 2.0);
}
{
let mut scorer = weight.scorer(reader, 1.5).unwrap();
let mut scorer = weight.scorer(reader, 1.5)?;
assert_eq!(scorer.doc(), 0u32);
assert_eq!(scorer.score(), 1.5);
}
Ok(())
}
}

View File

@@ -92,16 +92,16 @@ mod tests {
use crate::Index;
use tantivy_fst::Automaton;
fn create_index() -> Index {
fn create_index() -> crate::Result<Index> {
let mut schema = Schema::builder();
let title = schema.add_text_field("title", STRING);
let index = Index::create_in_ram(schema.build());
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(title=>"abc"));
index_writer.add_document(doc!(title=>"bcd"));
index_writer.add_document(doc!(title=>"abcd"));
assert!(index_writer.commit().is_ok());
index
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(title=>"abc"))?;
index_writer.add_document(doc!(title=>"bcd"))?;
index_writer.add_document(doc!(title=>"abcd"))?;
index_writer.commit()?;
Ok(index)
}
#[derive(Clone, Copy)]
@@ -140,34 +140,32 @@ mod tests {
}
#[test]
fn test_automaton_weight() {
let index = create_index();
fn test_automaton_weight() -> crate::Result<()> {
let index = create_index()?;
let field = index.schema().get_field("title").unwrap();
let automaton_weight = AutomatonWeight::new(field, PrefixedByA);
let reader = index.reader().unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
let mut scorer = automaton_weight
.scorer(searcher.segment_reader(0u32), 1.0)
.unwrap();
let mut scorer = automaton_weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert_eq!(scorer.doc(), 0u32);
assert_eq!(scorer.score(), 1.0);
assert_eq!(scorer.advance(), 2u32);
assert_eq!(scorer.doc(), 2u32);
assert_eq!(scorer.score(), 1.0);
assert_eq!(scorer.advance(), TERMINATED);
Ok(())
}
#[test]
fn test_automaton_weight_boost() {
let index = create_index();
fn test_automaton_weight_boost() -> crate::Result<()> {
let index = create_index()?;
let field = index.schema().get_field("title").unwrap();
let automaton_weight = AutomatonWeight::new(field, PrefixedByA);
let reader = index.reader().unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
let mut scorer = automaton_weight
.scorer(searcher.segment_reader(0u32), 1.32)
.unwrap();
let mut scorer = automaton_weight.scorer(searcher.segment_reader(0u32), 1.32)?;
assert_eq!(scorer.doc(), 0u32);
assert_eq!(scorer.score(), 1.32);
Ok(())
}
}

View File

@@ -41,22 +41,22 @@ use std::collections::BTreeMap;
/// let mut index_writer = index.writer(3_000_000)?;
/// index_writer.add_document(doc!(
/// title => "The Name of the Wind",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "The Diary of Muadib",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "A Dairy Cow",
/// body => "hidden",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "A Dairy Cow",
/// body => "found",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "The Diary of a Young Girl",
/// ));
/// index_writer.commit().unwrap();
/// ))?;
/// index_writer.commit()?;
/// }
///
/// let reader = index.reader()?;
@@ -217,11 +217,11 @@ mod tests {
let text = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut writer = index.writer_for_tests().unwrap();
writer.add_document(doc!(text=>"b c"));
writer.add_document(doc!(text=>"a c"));
writer.add_document(doc!(text=>"a b"));
writer.add_document(doc!(text=>"a d"));
let mut writer = index.writer_for_tests()?;
writer.add_document(doc!(text=>"b c"))?;
writer.add_document(doc!(text=>"a c"))?;
writer.add_document(doc!(text=>"a b"))?;
writer.add_document(doc!(text=>"a d"))?;
writer.commit()?;
Ok(index)
}

View File

@@ -26,72 +26,75 @@ mod tests {
use crate::Index;
use crate::{DocAddress, DocId, Score};
fn aux_test_helper() -> (Index, Field) {
fn aux_test_helper() -> crate::Result<(Index, Field)> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
// writing the segment
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field => "a b c"));
index_writer.add_document(doc!(text_field => "a c"));
index_writer.add_document(doc!(text_field => "b c"));
index_writer.add_document(doc!(text_field => "a b c d"));
index_writer.add_document(doc!(text_field => "d"));
assert!(index_writer.commit().is_ok());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field => "a b c"))?;
index_writer.add_document(doc!(text_field => "a c"))?;
index_writer.add_document(doc!(text_field => "b c"))?;
index_writer.add_document(doc!(text_field => "a b c d"))?;
index_writer.add_document(doc!(text_field => "d"))?;
index_writer.commit()?;
}
(index, text_field)
Ok((index, text_field))
}
#[test]
pub fn test_boolean_non_all_term_disjunction() {
let (index, text_field) = aux_test_helper();
pub fn test_boolean_non_all_term_disjunction() -> crate::Result<()> {
let (index, text_field) = aux_test_helper()?;
let query_parser = QueryParser::for_index(&index, vec![text_field]);
let query = query_parser.parse_query("(+a +b) d").unwrap();
let searcher = index.reader().unwrap().searcher();
assert_eq!(query.count(&searcher).unwrap(), 3);
let query = query_parser.parse_query("(+a +b) d")?;
let searcher = index.reader()?.searcher();
assert_eq!(query.count(&searcher)?, 3);
Ok(())
}
#[test]
pub fn test_boolean_single_must_clause() {
let (index, text_field) = aux_test_helper();
pub fn test_boolean_single_must_clause() -> crate::Result<()> {
let (index, text_field) = aux_test_helper()?;
let query_parser = QueryParser::for_index(&index, vec![text_field]);
let query = query_parser.parse_query("+a").unwrap();
let searcher = index.reader().unwrap().searcher();
let weight = query.weight(&searcher, true).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0).unwrap();
let query = query_parser.parse_query("+a")?;
let searcher = index.reader()?.searcher();
let weight = query.weight(&searcher, true)?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<TermScorer>());
Ok(())
}
#[test]
pub fn test_boolean_termonly_intersection() {
let (index, text_field) = aux_test_helper();
pub fn test_boolean_termonly_intersection() -> crate::Result<()> {
let (index, text_field) = aux_test_helper()?;
let query_parser = QueryParser::for_index(&index, vec![text_field]);
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
{
let query = query_parser.parse_query("+a +b +c").unwrap();
let weight = query.weight(&searcher, true).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0).unwrap();
let query = query_parser.parse_query("+a +b +c")?;
let weight = query.weight(&searcher, true)?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<Intersection<TermScorer>>());
}
{
let query = query_parser.parse_query("+a +(b c)").unwrap();
let weight = query.weight(&searcher, true).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0).unwrap();
let query = query_parser.parse_query("+a +(b c)")?;
let weight = query.weight(&searcher, true)?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<Intersection<Box<dyn Scorer>>>());
}
Ok(())
}
#[test]
pub fn test_boolean_reqopt() {
let (index, text_field) = aux_test_helper();
pub fn test_boolean_reqopt() -> crate::Result<()> {
let (index, text_field) = aux_test_helper()?;
let query_parser = QueryParser::for_index(&index, vec![text_field]);
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
{
let query = query_parser.parse_query("+a b").unwrap();
let weight = query.weight(&searcher, true).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0).unwrap();
let query = query_parser.parse_query("+a b")?;
let weight = query.weight(&searcher, true)?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<RequiredOptionalScorer<
Box<dyn Scorer>,
Box<dyn Scorer>,
@@ -99,16 +102,17 @@ mod tests {
>>());
}
{
let query = query_parser.parse_query("+a b").unwrap();
let weight = query.weight(&searcher, false).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0).unwrap();
let query = query_parser.parse_query("+a b")?;
let weight = query.weight(&searcher, false)?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<TermScorer>());
}
Ok(())
}
#[test]
pub fn test_boolean_query() {
let (index, text_field) = aux_test_helper();
pub fn test_boolean_query() -> crate::Result<()> {
let (index, text_field) = aux_test_helper()?;
let make_term_query = |text: &str| {
let term_query = TermQuery::new(
@@ -119,7 +123,7 @@ mod tests {
query
};
let reader = index.reader().unwrap();
let reader = index.reader()?;
let matching_docs = |boolean_query: &dyn Query| {
reader
@@ -166,11 +170,12 @@ mod tests {
let boolean_query = BooleanQuery::new(vec![(Occur::MustNot, make_term_query("d"))]);
assert_eq!(matching_docs(&boolean_query), Vec::<u32>::new());
}
Ok(())
}
#[test]
pub fn test_boolean_query_two_excluded() {
let (index, text_field) = aux_test_helper();
pub fn test_boolean_query_two_excluded() -> crate::Result<()> {
let (index, text_field) = aux_test_helper()?;
let make_term_query = |text: &str| {
let term_query = TermQuery::new(
@@ -181,7 +186,7 @@ mod tests {
query
};
let reader = index.reader().unwrap();
let reader = index.reader()?;
let matching_topdocs = |query: &dyn Query| {
reader
@@ -214,20 +219,21 @@ mod tests {
assert_eq!(top_doc, DocAddress::new(0, 4));
assert_eq!(top_score, score_doc_4);
}
Ok(())
}
#[test]
pub fn test_boolean_query_with_weight() {
pub fn test_boolean_query_with_weight() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field => "a b c"));
index_writer.add_document(doc!(text_field => "a c"));
index_writer.add_document(doc!(text_field => "b c"));
assert!(index_writer.commit().is_ok());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field => "a b c"))?;
index_writer.add_document(doc!(text_field => "a c"))?;
index_writer.add_document(doc!(text_field => "b c"))?;
index_writer.commit()?;
}
let term_a: Box<dyn Query> = Box::new(TermQuery::new(
Term::from_field_text(text_field, "a"),
@@ -243,24 +249,21 @@ mod tests {
BooleanQuery::new(vec![(Occur::Should, term_a), (Occur::Should, term_b)]);
let boolean_weight = boolean_query.weight(&searcher, true).unwrap();
{
let mut boolean_scorer = boolean_weight
.scorer(searcher.segment_reader(0u32), 1.0)
.unwrap();
let mut boolean_scorer = boolean_weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert_eq!(boolean_scorer.doc(), 0u32);
assert_nearly_equals!(boolean_scorer.score(), 0.84163445);
}
{
let mut boolean_scorer = boolean_weight
.scorer(searcher.segment_reader(0u32), 2.0)
.unwrap();
let mut boolean_scorer = boolean_weight.scorer(searcher.segment_reader(0u32), 2.0)?;
assert_eq!(boolean_scorer.doc(), 0u32);
assert_nearly_equals!(boolean_scorer.score(), 1.6832689);
}
Ok(())
}
#[test]
pub fn test_intersection_score() {
let (index, text_field) = aux_test_helper();
pub fn test_intersection_score() -> crate::Result<()> {
let (index, text_field) = aux_test_helper()?;
let make_term_query = |text: &str| {
let term_query = TermQuery::new(
@@ -270,7 +273,7 @@ mod tests {
let query: Box<dyn Query> = Box::new(term_query);
query
};
let reader = index.reader().unwrap();
let reader = index.reader()?;
let score_docs = |boolean_query: &dyn Query| {
let fruit = reader
.searcher()
@@ -288,6 +291,7 @@ mod tests {
assert_nearly_equals!(scores[0], 0.977973);
assert_nearly_equals!(scores[1], 0.84699446);
}
Ok(())
}
#[test]
@@ -297,8 +301,8 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 5_000_000)?;
index_writer.add_document(doc!(text=>"a"));
index_writer.add_document(doc!(text=>"b"));
index_writer.add_document(doc!(text=>"a"))?;
index_writer.add_document(doc!(text=>"b"))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let term_a: Box<dyn Query> = Box::new(TermQuery::new(

View File

@@ -141,19 +141,20 @@ mod tests {
use crate::{DocAddress, Document, Index};
#[test]
fn test_boost_query_explain() {
fn test_boost_query_explain() -> crate::Result<()> {
let schema = Schema::builder().build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(Document::new());
assert!(index_writer.commit().is_ok());
let reader = index.reader().unwrap();
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(Document::new())?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
let query = BoostQuery::new(Box::new(AllQuery), 0.2);
let explanation = query.explain(&searcher, DocAddress::new(0, 0u32)).unwrap();
assert_eq!(
explanation.to_pretty_json(),
"{\n \"value\": 0.2,\n \"description\": \"Boost x0.2 of ...\",\n \"details\": [\n {\n \"value\": 1.0,\n \"description\": \"AllQuery\",\n \"context\": []\n }\n ],\n \"context\": []\n}"
)
);
Ok(())
}
}

View File

@@ -67,17 +67,17 @@ static LEV_BUILDER: Lazy<HashMap<(u8, bool), LevenshteinAutomatonBuilder>> = Laz
/// let mut index_writer = index.writer(3_000_000)?;
/// index_writer.add_document(doc!(
/// title => "The Name of the Wind",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "The Diary of Muadib",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "A Dairy Cow",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "The Diary of a Young Girl",
/// ));
/// index_writer.commit().unwrap();
/// ))?;
/// index_writer.commit()?;
/// }
/// let reader = index.reader()?;
/// let searcher = reader.searcher();
@@ -172,32 +172,29 @@ mod test {
use crate::Term;
#[test]
pub fn test_fuzzy_term() {
pub fn test_fuzzy_term() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let country_field = schema_builder.add_text_field("country", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(
country_field => "japan",
));
))?;
index_writer.add_document(doc!(
country_field => "korea",
));
index_writer.commit().unwrap();
))?;
index_writer.commit()?;
}
let reader = index.reader().unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
// passes because Levenshtein distance is 1 (substitute 'o' with 'a')
{
let term = Term::from_field_text(country_field, "japon");
let fuzzy_query = FuzzyTermQuery::new(term, 1, true);
let top_docs = searcher
.search(&fuzzy_query, &TopDocs::with_limit(2))
.unwrap();
let top_docs = searcher.search(&fuzzy_query, &TopDocs::with_limit(2))?;
assert_eq!(top_docs.len(), 1, "Expected only 1 document");
let (score, _) = top_docs[0];
assert_nearly_equals!(1.0, score);
@@ -208,24 +205,20 @@ mod test {
let term = Term::from_field_text(country_field, "jap");
let fuzzy_query = FuzzyTermQuery::new(term, 1, true);
let top_docs = searcher
.search(&fuzzy_query, &TopDocs::with_limit(2))
.unwrap();
let top_docs = searcher.search(&fuzzy_query, &TopDocs::with_limit(2))?;
assert_eq!(top_docs.len(), 0, "Expected no document");
}
// passes because prefix Levenshtein distance is 0
{
let term = Term::from_field_text(country_field, "jap");
let fuzzy_query = FuzzyTermQuery::new_prefix(term, 1, true);
let top_docs = searcher
.search(&fuzzy_query, &TopDocs::with_limit(2))
.unwrap();
let top_docs = searcher.search(&fuzzy_query, &TopDocs::with_limit(2))?;
assert_eq!(top_docs.len(), 1, "Expected only 1 document");
let (score, _) = top_docs[0];
assert_nearly_equals!(1.0, score);
}
Ok(())
}
#[test]
@@ -235,7 +228,7 @@ mod test {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(country_field => "japan"));
index_writer.add_document(doc!(country_field => "japan"))?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();

View File

@@ -176,20 +176,20 @@ mod tests {
use crate::DocAddress;
use crate::Index;
fn create_test_index() -> Index {
fn create_test_index() -> crate::Result<Index> {
let mut schema_builder = Schema::builder();
let title = schema_builder.add_text_field("title", TEXT);
let body = schema_builder.add_text_field("body", TEXT | STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(title => "aaa", body => "the old man and the sea"));
index_writer.add_document(doc!(title => "bbb", body => "an old man sailing on the sea"));
index_writer.add_document(doc!(title => "ccc", body=> "send this message to alice"));
index_writer.add_document(doc!(title => "ddd", body=> "a lady was riding and old bike"));
index_writer.add_document(doc!(title => "eee", body=> "Yes, my lady."));
index_writer.commit().unwrap();
index
index_writer.add_document(doc!(title => "aaa", body => "the old man and the sea"))?;
index_writer.add_document(doc!(title => "bbb", body => "an old man sailing on the sea"))?;
index_writer.add_document(doc!(title => "ccc", body=> "send this message to alice"))?;
index_writer.add_document(doc!(title => "ddd", body=> "a lady was riding and old bike"))?;
index_writer.add_document(doc!(title => "eee", body=> "Yes, my lady."))?;
index_writer.commit()?;
Ok(index)
}
#[test]
@@ -235,9 +235,9 @@ mod tests {
}
#[test]
fn test_more_like_this_query() {
let index = create_test_index();
let reader = index.reader().unwrap();
fn test_more_like_this_query() -> crate::Result<()> {
let index = create_test_index()?;
let reader = index.reader()?;
let searcher = reader.searcher();
// search base 1st doc with words [sea, and] skipping [old]
@@ -250,7 +250,7 @@ mod tests {
.with_boost_factor(1.0)
.with_stop_words(vec!["old".to_string()])
.with_document(DocAddress::new(0, 0));
let top_docs = searcher.search(&query, &TopDocs::with_limit(5)).unwrap();
let top_docs = searcher.search(&query, &TopDocs::with_limit(5))?;
let mut doc_ids: Vec<_> = top_docs.iter().map(|item| item.1.doc_id).collect();
doc_ids.sort_unstable();
@@ -266,11 +266,12 @@ mod tests {
.with_max_word_length(5)
.with_boost_factor(1.0)
.with_document(DocAddress::new(0, 4));
let top_docs = searcher.search(&query, &TopDocs::with_limit(5)).unwrap();
let top_docs = searcher.search(&query, &TopDocs::with_limit(5))?;
let mut doc_ids: Vec<_> = top_docs.iter().map(|item| item.1.doc_id).collect();
doc_ids.sort_unstable();
assert_eq!(doc_ids.len(), 2);
assert_eq!(doc_ids, vec![3, 4]);
Ok(())
}
}

View File

@@ -18,34 +18,34 @@ pub mod tests {
use crate::DocId;
use crate::{DocAddress, TERMINATED};
pub fn create_index(texts: &[&'static str]) -> Index {
pub fn create_index(texts: &[&'static str]) -> crate::Result<Index> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
for &text in texts {
let doc = doc!(text_field=>text);
index_writer.add_document(doc);
index_writer.add_document(doc)?;
}
assert!(index_writer.commit().is_ok());
index_writer.commit()?;
}
index
Ok(index)
}
#[test]
pub fn test_phrase_query() {
pub fn test_phrase_query() -> crate::Result<()> {
let index = create_index(&[
"b b b d c g c",
"a b b d c g c",
"a b a b c",
"c a b a d ga a",
"a b c",
]);
])?;
let schema = index.schema();
let text_field = schema.get_field("text").unwrap();
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let test_query = |texts: Vec<&str>| {
let terms: Vec<Term> = texts
.iter()
@@ -54,7 +54,7 @@ pub mod tests {
let phrase_query = PhraseQuery::new(terms);
let test_fruits = searcher
.search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
.expect("search should succeed");
.unwrap();
test_fruits
.docs()
.iter()
@@ -66,11 +66,12 @@ pub mod tests {
assert_eq!(test_query(vec!["b", "b"]), vec![0, 1]);
assert!(test_query(vec!["g", "ewrwer"]).is_empty());
assert!(test_query(vec!["g", "a"]).is_empty());
Ok(())
}
#[test]
pub fn test_phrase_query_simple() -> crate::Result<()> {
let index = create_index(&["a b b d c g c", "a b a b c"]);
let index = create_index(&["a b b d c g c", "a b a b c"])?;
let text_field = index.schema().get_field("text").unwrap();
let searcher = index.reader()?.searcher();
let terms: Vec<Term> = vec!["a", "b", "c"]
@@ -86,17 +87,17 @@ pub mod tests {
}
#[test]
pub fn test_phrase_query_no_score() {
pub fn test_phrase_query_no_score() -> crate::Result<()> {
let index = create_index(&[
"b b b d c g c",
"a b b d c g c",
"a b a b c",
"c a b a d ga a",
"a b c",
]);
])?;
let schema = index.schema();
let text_field = schema.get_field("text").unwrap();
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let test_query = |texts: Vec<&str>| {
let terms: Vec<Term> = texts
.iter()
@@ -105,7 +106,7 @@ pub mod tests {
let phrase_query = PhraseQuery::new(terms);
let test_fruits = searcher
.search(&phrase_query, &TEST_COLLECTOR_WITHOUT_SCORE)
.expect("search should succeed");
.unwrap();
test_fruits
.docs()
.iter()
@@ -117,10 +118,11 @@ pub mod tests {
assert_eq!(test_query(vec!["b", "b"]), vec![0, 1]);
assert!(test_query(vec!["g", "ewrwer"]).is_empty());
assert!(test_query(vec!["g", "a"]).is_empty());
Ok(())
}
#[test]
pub fn test_phrase_query_no_positions() {
pub fn test_phrase_query_no_positions() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
use crate::schema::IndexRecordOption;
use crate::schema::TextFieldIndexing;
@@ -135,33 +137,34 @@ pub mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field=>"a b c"));
assert!(index_writer.commit().is_ok());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"a b c"))?;
index_writer.commit()?;
}
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let phrase_query = PhraseQuery::new(vec![
Term::from_field_text(text_field, "a"),
Term::from_field_text(text_field, "b"),
]);
let search_result = searcher
let search_error = searcher
.search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
.map(|_| ());
.err();
assert!(matches!(
search_result,
Err(crate::TantivyError::SchemaError(msg))
search_error,
Some(crate::TantivyError::SchemaError(msg))
if msg == "Applied phrase query on field \"text\", which does not have positions \
indexed"
));
Ok(())
}
#[test]
pub fn test_phrase_score() {
let index = create_index(&["a b c", "a b c a b"]);
pub fn test_phrase_score() -> crate::Result<()> {
let index = create_index(&["a b c", "a b c a b"])?;
let schema = index.schema();
let text_field = schema.get_field("text").unwrap();
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let test_query = |texts: Vec<&str>| {
let terms: Vec<Term> = texts
.iter()
@@ -177,23 +180,24 @@ pub mod tests {
let scores = test_query(vec!["a", "b"]);
assert_nearly_equals!(scores[0], 0.40618482);
assert_nearly_equals!(scores[1], 0.46844664);
Ok(())
}
#[test] // motivated by #234
pub fn test_phrase_query_docfreq_order() {
pub fn test_phrase_query_docfreq_order() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field=>"b"));
index_writer.add_document(doc!(text_field=>"a b"));
index_writer.add_document(doc!(text_field=>"b a"));
assert!(index_writer.commit().is_ok());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"b"))?;
index_writer.add_document(doc!(text_field=>"a b"))?;
index_writer.add_document(doc!(text_field=>"b a"))?;
index_writer.commit()?;
}
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let test_query = |texts: Vec<&str>| {
let terms: Vec<Term> = texts
.iter()
@@ -208,18 +212,19 @@ pub mod tests {
};
assert_eq!(test_query(vec!["a", "b"]), vec![DocAddress::new(0, 1)]);
assert_eq!(test_query(vec!["b", "a"]), vec![DocAddress::new(0, 2)]);
Ok(())
}
#[test] // motivated by #234
pub fn test_phrase_query_non_trivial_offsets() {
pub fn test_phrase_query_non_trivial_offsets() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field=>"a b c d e f g h"));
assert!(index_writer.commit().is_ok());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"a b c d e f g h"))?;
index_writer.commit()?;
}
let searcher = index.reader().unwrap().searcher();
let test_query = |texts: Vec<(usize, &str)>| {
@@ -245,5 +250,6 @@ pub mod tests {
assert_eq!(test_query(vec![(4, "e"), (0, "a"), (2, "c")]), vec![0]);
assert!(test_query(vec![(0, "a"), (2, "d")]).is_empty());
assert_eq!(test_query(vec![(1, "a"), (3, "c")]), vec![0]);
Ok(())
}
}

View File

@@ -116,19 +116,18 @@ mod tests {
use crate::{DocSet, Term};
#[test]
pub fn test_phrase_count() {
let index = create_index(&["a c", "a a b d a b c", " a b"]);
pub fn test_phrase_count() -> crate::Result<()> {
let index = create_index(&["a c", "a a b d a b c", " a b"])?;
let schema = index.schema();
let text_field = schema.get_field("text").unwrap();
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let phrase_query = PhraseQuery::new(vec![
Term::from_field_text(text_field, "a"),
Term::from_field_text(text_field, "b"),
]);
let phrase_weight = phrase_query.phrase_weight(&searcher, true).unwrap();
let mut phrase_scorer = phrase_weight
.phrase_scorer(searcher.segment_reader(0u32), 1.0)
.unwrap()
.phrase_scorer(searcher.segment_reader(0u32), 1.0)?
.unwrap();
assert_eq!(phrase_scorer.doc(), 1);
assert_eq!(phrase_scorer.phrase_count(), 2);
@@ -136,5 +135,6 @@ mod tests {
assert_eq!(phrase_scorer.doc(), 2);
assert_eq!(phrase_scorer.phrase_count(), 1);
assert_eq!(phrase_scorer.advance(), TERMINATED);
Ok(())
}
}

View File

@@ -51,7 +51,7 @@ fn map_bound<TFrom, TTo, Transform: Fn(&TFrom) -> TTo>(
/// for year in 1950u64..2017u64 {
/// let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
/// for _ in 0..num_docs_within_year {
/// index_writer.add_document(doc!(year_field => year));
/// index_writer.add_document(doc!(year_field => year))?;
/// }
/// }
/// index_writer.commit()?;
@@ -338,34 +338,35 @@ mod tests {
use std::ops::Bound;
#[test]
fn test_range_query_simple() {
fn test_range_query_simple() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let year_field = schema_builder.add_u64_field("year", INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
for year in 1950u64..2017u64 {
let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
for _ in 0..num_docs_within_year {
index_writer.add_document(doc!(year_field => year));
index_writer.add_document(doc!(year_field => year))?;
}
}
index_writer.commit().unwrap();
index_writer.commit()?;
}
let reader = index.reader().unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64);
// ... or `1960..=1969` if inclusive range is enabled.
let count = searcher.search(&docs_in_the_sixties, &Count).unwrap();
let count = searcher.search(&docs_in_the_sixties, &Count)?;
assert_eq!(count, 2285);
Ok(())
}
#[test]
fn test_range_query() {
fn test_range_query() -> crate::Result<()> {
let int_field: Field;
let schema = {
let mut schema_builder = Schema::builder();
@@ -375,7 +376,7 @@ mod tests {
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap();
let mut index_writer = index.writer_with_num_threads(2, 6_000_000)?;
for i in 1..100 {
let mut doc = Document::new();
@@ -384,10 +385,10 @@ mod tests {
doc.add_i64(int_field, j as i64);
}
}
index_writer.add_document(doc);
index_writer.add_document(doc)?;
}
index_writer.commit().unwrap();
index_writer.commit()?;
}
let reader = index.reader().unwrap();
let searcher = reader.searcher();
@@ -419,10 +420,11 @@ mod tests {
)),
91
);
Ok(())
}
#[test]
fn test_range_float() {
fn test_range_float() -> crate::Result<()> {
let float_field: Field;
let schema = {
let mut schema_builder = Schema::builder();
@@ -441,12 +443,12 @@ mod tests {
doc.add_f64(float_field, j as f64);
}
}
index_writer.add_document(doc);
index_writer.add_document(doc)?;
}
index_writer.commit().unwrap();
index_writer.commit()?;
}
let reader = index.reader().unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
let count_multiples =
|range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();
@@ -479,6 +481,7 @@ mod tests {
)),
91
);
Ok(())
}
#[test]
@@ -494,7 +497,7 @@ mod tests {
index_writer.add_document(doc!(
title => "hemoglobin blood",
year => 1990_i64
));
))?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();

View File

@@ -28,17 +28,17 @@ use tantivy_fst::Regex;
/// let mut index_writer = index.writer(3_000_000)?;
/// index_writer.add_document(doc!(
/// title => "The Name of the Wind",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "The Diary of Muadib",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "A Dairy Cow",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "The Diary of a Young Girl",
/// ));
/// index_writer.commit().unwrap();
/// ))?;
/// index_writer.commit()?;
/// }
///
/// let reader = index.reader()?;
@@ -100,7 +100,7 @@ mod test {
use std::sync::Arc;
use tantivy_fst::Regex;
fn build_test_index() -> (IndexReader, Field) {
fn build_test_index() -> crate::Result<(IndexReader, Field)> {
let mut schema_builder = Schema::builder();
let country_field = schema_builder.add_text_field("country", TEXT);
let schema = schema_builder.build();
@@ -109,15 +109,15 @@ mod test {
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(
country_field => "japan",
));
))?;
index_writer.add_document(doc!(
country_field => "korea",
));
index_writer.commit().unwrap();
))?;
index_writer.commit()?;
}
let reader = index.reader().unwrap();
let reader = index.reader()?;
(reader, country_field)
Ok((reader, country_field))
}
fn verify_regex_query(
@@ -141,31 +141,32 @@ mod test {
}
#[test]
pub fn test_regex_query() {
let (reader, field) = build_test_index();
let matching_one = RegexQuery::from_pattern("jap[ao]n", field).unwrap();
let matching_zero = RegexQuery::from_pattern("jap[A-Z]n", field).unwrap();
pub fn test_regex_query() -> crate::Result<()> {
let (reader, field) = build_test_index()?;
let matching_one = RegexQuery::from_pattern("jap[ao]n", field)?;
let matching_zero = RegexQuery::from_pattern("jap[A-Z]n", field)?;
verify_regex_query(matching_one, matching_zero, reader);
Ok(())
}
#[test]
pub fn test_construct_from_regex() {
let (reader, field) = build_test_index();
pub fn test_construct_from_regex() -> crate::Result<()> {
let (reader, field) = build_test_index()?;
let matching_one = RegexQuery::from_regex(Regex::new("jap[ao]n").unwrap(), field);
let matching_zero = RegexQuery::from_regex(Regex::new("jap[A-Z]n").unwrap(), field);
verify_regex_query(matching_one, matching_zero, reader);
Ok(())
}
#[test]
pub fn test_construct_from_reused_regex() {
pub fn test_construct_from_reused_regex() -> crate::Result<()> {
let r1 = Arc::new(Regex::new("jap[ao]n").unwrap());
let r2 = Arc::new(Regex::new("jap[A-Z]n").unwrap());
let (reader, field) = build_test_index();
let (reader, field) = build_test_index()?;
let matching_one = RegexQuery::from_regex(r1.clone(), field);
let matching_zero = RegexQuery::from_regex(r2.clone(), field);
@@ -176,5 +177,6 @@ mod test {
let matching_zero = RegexQuery::from_regex(r2, field);
verify_regex_query(matching_one, matching_zero, reader);
Ok(())
}
}

View File

@@ -18,28 +18,29 @@ mod tests {
use crate::{Index, Term, TERMINATED};
#[test]
pub fn test_term_query_no_freq() {
pub fn test_term_query_no_freq() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", STRING);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
// writing the segment
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
let doc = doc!(text_field => "a");
index_writer.add_document(doc);
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc)?;
index_writer.commit()?;
}
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let term_query = TermQuery::new(
Term::from_field_text(text_field, "a"),
IndexRecordOption::Basic,
);
let term_weight = term_query.weight(&searcher, true).unwrap();
let term_weight = term_query.weight(&searcher, true)?;
let segment_reader = searcher.segment_reader(0);
let mut term_scorer = term_weight.scorer(segment_reader, 1.0).unwrap();
let mut term_scorer = term_weight.scorer(segment_reader, 1.0)?;
assert_eq!(term_scorer.doc(), 0);
assert_nearly_equals!(term_scorer.score(), 0.28768212);
Ok(())
}
#[test]
@@ -53,7 +54,7 @@ mod tests {
let mut index_writer = index.writer_for_tests()?;
for _ in 0..COMPRESSION_BLOCK_SIZE {
let doc = doc!(text_field => "a");
index_writer.add_document(doc);
index_writer.add_document(doc)?;
}
index_writer.commit()?;
}
@@ -78,7 +79,7 @@ mod tests {
}
#[test]
pub fn test_term_weight() {
pub fn test_term_weight() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let left_field = schema_builder.add_text_field("left", TEXT);
let right_field = schema_builder.add_text_field("right", TEXT);
@@ -86,22 +87,20 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(
left_field => "left1 left2 left2 left2f2 left2f2 left3 abcde abcde abcde abcde abcde abcde abcde abcde abcde abcewde abcde abcde",
right_field => "right1 right2",
large_field => "large0 large1 large2 large3 large4 large5 large6 large7 large8 large9 large10 large11 large12 large13 large14 large15 large16 large17 large18 large19 large20 large21 large22 large23 large24 large25 large26 large27 large28 large29 large30 large31 large32 large33 large34 large35 large36 large37 large38 large39 large40 large41 large42 large43 large44 large45 large46 large47 large48 large49 large50 large51 large52 large53 large54 large55 large56 large57 large58 large59 large60 large61 large62 large63 large64 large65 large66 large67 large68 large69 large70 large71 large72 large73 large74 large75 large76 large77 large78 large79 large80 large81 large82 large83 large84 large85 large86 large87 large88 large89 large90 large91 large92 large93 large94 large95 large96 large97 large98 large99 large100 large101 large102 large103 large104 large105 large106 large107 large108 large109 large110 large111 large112 large113 large114 large115 large116 large117 large118 large119 large120 large121 large122 large123 large124 large125 large126 large127 large128 large129 large130 large131 large132 large133 large134 large135 large136 large137 large138 large139 large140 large141 large142 large143 large144 large145 large146 large147 large148 large149 large150 large151 large152 large153 large154 large155 large156 large157 large158 large159 large160 large161 large162 large163 large164 large165 large166 large167 large168 large169 large170 large171 large172 large173 large174 large175 large176 large177 large178 large179 large180 large181 large182 large183 large184 large185 large186 large187 large188 large189 large190 large191 large192 large193 large194 large195 large196 large197 large198 large199 large200 large201 large202 large203 large204 large205 large206 large207 large208 large209 large210 large211 large212 large213 large214 large215 large216 large217 large218 large219 large220 large221 large222 large223 large224 large225 large226 large227 large228 large229 large230 large231 large232 large233 large234 large235 large236 large237 large238 large239 large240 large241 large242 large243 large244 large245 large246 large247 large248 large249 large250 large251 large252 large253 large254 large255 large256 large257 large258 large259 large260 large261 large262 large263 large264 large265 large266 large267 large268 large269 large270 large271 large272 large273 large274 large275 large276 large277 large278 large279 large280 large281 large282 large283 large284 large285 large286"
));
index_writer.add_document(doc!(left_field => "left4 left1"));
index_writer.commit().unwrap();
))?;
index_writer.add_document(doc!(left_field => "left4 left1"))?;
index_writer.commit()?;
}
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
{
let term = Term::from_field_text(left_field, "left2");
let term_query = TermQuery::new(term, IndexRecordOption::WithFreqs);
let topdocs = searcher
.search(&term_query, &TopDocs::with_limit(2))
.unwrap();
let topdocs = searcher.search(&term_query, &TopDocs::with_limit(2))?;
assert_eq!(topdocs.len(), 1);
let (score, _) = topdocs[0];
assert_nearly_equals!(0.77802235, score);
@@ -109,9 +108,7 @@ mod tests {
{
let term = Term::from_field_text(left_field, "left1");
let term_query = TermQuery::new(term, IndexRecordOption::WithFreqs);
let top_docs = searcher
.search(&term_query, &TopDocs::with_limit(2))
.unwrap();
let top_docs = searcher.search(&term_query, &TopDocs::with_limit(2))?;
assert_eq!(top_docs.len(), 2);
let (score1, _) = top_docs[0];
assert_nearly_equals!(0.27101856, score1);
@@ -119,32 +116,34 @@ mod tests {
assert_nearly_equals!(0.13736556, score2);
}
{
let query_parser = QueryParser::for_index(&index, vec![]);
let query = query_parser.parse_query("left:left2 left:left1").unwrap();
let top_docs = searcher.search(&query, &TopDocs::with_limit(2)).unwrap();
let query_parser = QueryParser::for_index(&index, Vec::new());
let query = query_parser.parse_query("left:left2 left:left1")?;
let top_docs = searcher.search(&query, &TopDocs::with_limit(2))?;
assert_eq!(top_docs.len(), 2);
let (score1, _) = top_docs[0];
assert_nearly_equals!(0.9153879, score1);
let (score2, _) = top_docs[1];
assert_nearly_equals!(0.27101856, score2);
}
Ok(())
}
#[test]
fn test_term_query_count_when_there_are_deletes() {
fn test_term_query_count_when_there_are_deletes() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field=>"a b"));
index_writer.add_document(doc!(text_field=>"a c"));
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"a b"))?;
index_writer.add_document(doc!(text_field=>"a c"))?;
index_writer.delete_term(Term::from_field_text(text_field, "b"));
index_writer.commit().unwrap();
index_writer.commit()?;
let term_a = Term::from_field_text(text_field, "a");
let term_query = TermQuery::new(term_a, IndexRecordOption::Basic);
let reader = index.reader().unwrap();
assert_eq!(term_query.count(&*reader.searcher()).unwrap(), 1);
let reader = index.reader()?;
assert_eq!(term_query.count(&*reader.searcher())?, 1);
Ok(())
}
#[test]
@@ -153,9 +152,9 @@ mod tests {
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field=>"a"));
index_writer.add_document(doc!(text_field=>"a"));
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"a"))?;
index_writer.add_document(doc!(text_field=>"a"))?;
index_writer.commit()?;
let term_a = Term::from_field_text(text_field, "a");
let term_query = TermQuery::new(term_a, IndexRecordOption::Basic);
@@ -186,11 +185,11 @@ mod tests {
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field=>"b"));
index_writer.add_document(doc!(text_field=>"a"));
index_writer.add_document(doc!(text_field=>"a"));
index_writer.add_document(doc!(text_field=>"b"));
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=>"b"))?;
index_writer.add_document(doc!(text_field=>"a"))?;
index_writer.add_document(doc!(text_field=>"a"))?;
index_writer.add_document(doc!(text_field=>"b"))?;
index_writer.commit()?;
let term_a = Term::from_field_text(text_field, "a");
let term_query = TermQuery::new(term_a, IndexRecordOption::Basic);

View File

@@ -32,16 +32,16 @@ use std::fmt;
/// let mut index_writer = index.writer(3_000_000)?;
/// index_writer.add_document(doc!(
/// title => "The Name of the Wind",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "The Diary of Muadib",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "A Dairy Cow",
/// ));
/// ))?;
/// index_writer.add_document(doc!(
/// title => "The Diary of a Young Girl",
/// ));
/// ))?;
/// index_writer.commit()?;
/// }
/// let reader = index.reader()?;

View File

@@ -305,7 +305,7 @@ mod tests {
let term_freq = rng.gen_range(1..10000);
let words: Vec<&str> = std::iter::repeat("bbbb").take(term_freq).collect();
let text = words.join(" ");
writer.add_document(doc!(text_field=>text));
writer.add_document(doc!(text_field=>text))?;
}
writer.commit()?;
let term_query = TermQuery::new(

View File

@@ -196,7 +196,7 @@ mod tests {
let field_entry: FieldEntry = serde_json::from_str(json_str).unwrap();
match field_entry.field_type {
FieldType::Str(_) => {}
_ => panic!("expected FieldType::Str")
_ => panic!("expected FieldType::Str"),
}
}
}

View File

@@ -7,7 +7,7 @@ use crate::schema::Value;
use crate::schema::{IntOptions, TextOptions};
use crate::tokenizer::PreTokenizedString;
use chrono::{FixedOffset, Utc};
use serde::{Serialize, Deserialize};
use serde::{Deserialize, Serialize};
use serde_json::Value as JsonValue;
/// Possible error that may occur while parsing a field value

View File

@@ -207,7 +207,7 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str)
/// # Quand avec mes haleurs ont fini ces tapages,
/// # Les Fleuves m'ont laissé descendre où je voulais.
/// # "#);
/// # index_writer.add_document(doc.clone());
/// # index_writer.add_document(doc.clone())?;
/// # index_writer.commit()?;
/// # let query_parser = QueryParser::for_index(&index, vec![text_field]);
/// // ...
@@ -470,58 +470,55 @@ Survey in 2016, 2017, and 2018."#;
}
#[test]
fn test_snippet_generator_term_score() {
fn test_snippet_generator_term_score() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
// writing the segment
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field => "a"));
index_writer.add_document(doc!(text_field => "a"));
index_writer.add_document(doc!(text_field => "a b"));
index_writer.commit().unwrap();
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field => "a"))?;
index_writer.add_document(doc!(text_field => "a"))?;
index_writer.add_document(doc!(text_field => "a b"))?;
index_writer.commit()?;
}
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let query_parser = QueryParser::for_index(&index, vec![text_field]);
{
let query = query_parser.parse_query("e").unwrap();
let snippet_generator =
SnippetGenerator::create(&searcher, &*query, text_field).unwrap();
let query = query_parser.parse_query("e")?;
let snippet_generator = SnippetGenerator::create(&searcher, &*query, text_field)?;
assert!(snippet_generator.terms_text().is_empty());
}
{
let query = query_parser.parse_query("a").unwrap();
let snippet_generator =
SnippetGenerator::create(&searcher, &*query, text_field).unwrap();
let query = query_parser.parse_query("a")?;
let snippet_generator = SnippetGenerator::create(&searcher, &*query, text_field)?;
assert_eq!(
&btreemap!("a".to_string() => 0.25),
snippet_generator.terms_text()
);
}
{
let query = query_parser.parse_query("a b").unwrap();
let snippet_generator =
SnippetGenerator::create(&searcher, &*query, text_field).unwrap();
let query = query_parser.parse_query("a b")?;
let snippet_generator = SnippetGenerator::create(&searcher, &*query, text_field)?;
assert_eq!(
&btreemap!("a".to_string() => 0.25, "b".to_string() => 0.5),
snippet_generator.terms_text()
);
}
{
let query = query_parser.parse_query("a b c").unwrap();
let snippet_generator =
SnippetGenerator::create(&searcher, &*query, text_field).unwrap();
let query = query_parser.parse_query("a b c")?;
let snippet_generator = SnippetGenerator::create(&searcher, &*query, text_field)?;
assert_eq!(
&btreemap!("a".to_string() => 0.25, "b".to_string() => 0.5),
snippet_generator.terms_text()
);
}
Ok(())
}
#[test]
fn test_snippet_generator() {
fn test_snippet_generator() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text_options = TextOptions::default().set_indexing_options(
TextFieldIndexing::default()
@@ -533,12 +530,10 @@ Survey in 2016, 2017, and 2018."#;
let index = Index::create_in_ram(schema);
{
// writing the segment
let mut index_writer = index.writer_for_tests().unwrap();
{
let doc = doc ! (text_field => TEST_TEXT);
index_writer.add_document(doc);
}
index_writer.commit().unwrap();
let mut index_writer = index.writer_for_tests()?;
let doc = doc!(text_field => TEST_TEXT);
index_writer.add_document(doc)?;
index_writer.commit()?;
}
let searcher = index.reader().unwrap().searcher();
let query_parser = QueryParser::for_index(&index, vec![text_field]);
@@ -554,5 +549,6 @@ Survey in 2016, 2017, and 2018."#;
let snippet = snippet_generator.snippet(TEST_TEXT);
assert_eq!(snippet.to_html(), "<b>Rust</b> is syntactically similar to C++[according to whom?],\nbut its <b>designers</b> intend it to");
}
Ok(())
}
}

View File

@@ -321,24 +321,24 @@ mod test {
}
#[test]
fn test_fast_indexed() {
fn test_fast_indexed() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let name = schema_builder.add_u64_field("name", FAST | INDEXED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(name => 1u64));
index_writer.add_document(doc!(name => 2u64));
index_writer.add_document(doc!(name => 10u64));
index_writer.add_document(doc!(name => 20u64));
index_writer.commit().unwrap();
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(name => 1u64))?;
index_writer.add_document(doc!(name => 2u64))?;
index_writer.add_document(doc!(name => 10u64))?;
index_writer.add_document(doc!(name => 20u64))?;
index_writer.commit()?;
}
let reader = index.reader().unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
let searcher_space_usage = searcher.space_usage().unwrap();
let searcher_space_usage = searcher.space_usage()?;
assert!(searcher_space_usage.total() > 0);
assert_eq!(1, searcher_space_usage.segments().len());
@@ -355,29 +355,30 @@ mod test {
// TODO: understand why the following fails
// assert_eq!(0, segment.store().total());
assert_eq!(0, segment.deletes());
Ok(())
}
#[test]
fn test_text() {
fn test_text() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let name = schema_builder.add_text_field("name", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(name => "hi"));
index_writer.add_document(doc!(name => "this is a test"));
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(name => "hi"))?;
index_writer.add_document(doc!(name => "this is a test"))?;
index_writer.add_document(
doc!(name => "some more documents with some word overlap with the other test"),
);
index_writer.add_document(doc!(name => "hello hi goodbye"));
index_writer.commit().unwrap();
)?;
index_writer.add_document(doc!(name => "hello hi goodbye"))?;
index_writer.commit()?;
}
let reader = index.reader().unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
let searcher_space_usage = searcher.space_usage().unwrap();
let searcher_space_usage = searcher.space_usage()?;
assert!(searcher_space_usage.total() > 0);
assert_eq!(1, searcher_space_usage.segments().len());
@@ -394,28 +395,29 @@ mod test {
// TODO: understand why the following fails
// assert_eq!(0, segment.store().total());
assert_eq!(0, segment.deletes());
Ok(())
}
#[test]
fn test_store() {
fn test_store() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let name = schema_builder.add_text_field("name", STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(name => "hi"));
index_writer.add_document(doc!(name => "this is a test"));
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(name => "hi"))?;
index_writer.add_document(doc!(name => "this is a test"))?;
index_writer.add_document(
doc!(name => "some more documents with some word overlap with the other test"),
);
index_writer.add_document(doc!(name => "hello hi goodbye"));
index_writer.commit().unwrap();
)?;
index_writer.add_document(doc!(name => "hello hi goodbye"))?;
index_writer.commit()?;
}
let reader = index.reader().unwrap();
let reader = index.reader()?;
let searcher = reader.searcher();
let searcher_space_usage = searcher.space_usage().unwrap();
let searcher_space_usage = searcher.space_usage()?;
assert!(searcher_space_usage.total() > 0);
assert_eq!(1, searcher_space_usage.segments().len());
@@ -432,6 +434,7 @@ mod test {
assert!(segment.store().total() > 0);
assert!(segment.store().total() < 512);
assert_eq!(0, segment.deletes());
Ok(())
}
#[test]
@@ -443,10 +446,10 @@ mod test {
{
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(name => 1u64));
index_writer.add_document(doc!(name => 2u64));
index_writer.add_document(doc!(name => 3u64));
index_writer.add_document(doc!(name => 4u64));
index_writer.add_document(doc!(name => 1u64))?;
index_writer.add_document(doc!(name => 2u64))?;
index_writer.add_document(doc!(name => 3u64))?;
index_writer.add_document(doc!(name => 4u64))?;
index_writer.commit()?;
}

View File

@@ -136,12 +136,12 @@ mod tests {
index_writer.set_merge_policy(Box::new(NoMergePolicy));
let long_text: String = "abcdefghijklmnopqrstuvwxyz".repeat(1_000);
for _ in 0..20 {
index_writer.add_document(doc!(body=>long_text.clone()));
index_writer.add_document(doc!(body=>long_text.clone()))?;
}
index_writer.commit()?;
index_writer.add_document(doc!(text=>"testb"));
index_writer.add_document(doc!(text=>"testb"))?;
for _ in 0..10 {
index_writer.add_document(doc!(text=>"testd", body=>long_text.clone()));
index_writer.add_document(doc!(text=>"testd", body=>long_text.clone()))?;
}
index_writer.commit()?;
index_writer.delete_term(Term::from_field_text(text, "testb"));

View File

@@ -213,24 +213,23 @@ pub mod tests {
let schema = schema_builder.build();
let index_builder = Index::builder().schema(schema);
let index = index_builder.create_in_ram().unwrap();
let index = index_builder.create_in_ram()?;
{
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field=> "deleteme"));
index_writer.add_document(doc!(text_field=> "deletemenot"));
index_writer.add_document(doc!(text_field=> "deleteme"));
index_writer.add_document(doc!(text_field=> "deletemenot"));
index_writer.add_document(doc!(text_field=> "deleteme"));
index_writer.add_document(doc!(text_field=> "deleteme"))?;
index_writer.add_document(doc!(text_field=> "deletemenot"))?;
index_writer.add_document(doc!(text_field=> "deleteme"))?;
index_writer.add_document(doc!(text_field=> "deletemenot"))?;
index_writer.add_document(doc!(text_field=> "deleteme"))?;
index_writer.delete_term(Term::from_field_text(text_field, "deleteme"));
assert!(index_writer.commit().is_ok());
index_writer.commit()?;
}
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
let reader = searcher.segment_reader(0);
let store = reader.get_store_reader().unwrap();
let store = reader.get_store_reader()?;
for doc in store.iter(reader.alive_bitset()) {
assert_eq!(
*doc?.get_first(text_field).unwrap().text().unwrap(),
@@ -311,33 +310,30 @@ pub mod tests {
let index = index_builder.create_in_ram().unwrap();
{
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.add_document(doc!(text_field=> "1"));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(text_field=> "2"));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(text_field=> "3"));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(text_field=> "4"));
assert!(index_writer.commit().is_ok());
index_writer.add_document(doc!(text_field=> "5"));
assert!(index_writer.commit().is_ok());
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(text_field=> "1"))?;
index_writer.commit()?;
index_writer.add_document(doc!(text_field=> "2"))?;
index_writer.commit()?;
index_writer.add_document(doc!(text_field=> "3"))?;
index_writer.commit()?;
index_writer.add_document(doc!(text_field=> "4"))?;
index_writer.commit()?;
index_writer.add_document(doc!(text_field=> "5"))?;
index_writer.commit()?;
}
// Merging the segments
{
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
let mut index_writer = index.writer_for_tests().unwrap();
assert!(block_on(index_writer.merge(&segment_ids)).is_ok());
assert!(index_writer.wait_merging_threads().is_ok());
let segment_ids = index.searchable_segment_ids()?;
let mut index_writer = index.writer_for_tests()?;
block_on(index_writer.merge(&segment_ids))?;
index_writer.wait_merging_threads()?;
}
let searcher = index.reader().unwrap().searcher();
let searcher = index.reader()?.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
let reader = searcher.segment_readers().iter().last().unwrap();
let store = reader.get_store_reader().unwrap();
let store = reader.get_store_reader()?;
assert_eq!(store.block_checkpoints().count(), 1);
Ok(())
}

View File

@@ -47,12 +47,12 @@ fn test_write_commit_fails() -> tantivy::Result<()> {
let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
for _ in 0..100 {
index_writer.add_document(doc!(text_field => "a"));
index_writer.add_document(doc!(text_field => "a"))?;
}
index_writer.commit()?;
fail::cfg("RamDirectory::atomic_write", "return(error_write_failed)").unwrap();
fail::cfg("save_metas", "return(error_write_failed)").unwrap();
for _ in 0..100 {
index_writer.add_document(doc!(text_field => "b"));
index_writer.add_document(doc!(text_field => "b"))?;
}
assert!(index_writer.commit().is_err());
@@ -64,3 +64,61 @@ fn test_write_commit_fails() -> tantivy::Result<()> {
assert_eq!(num_docs_containing("b")?, 0);
Ok(())
}
// Motivated by
// - https://github.com/quickwit-inc/quickwit/issues/730
// Details at
// - https://github.com/quickwit-inc/tantivy/issues/1198
#[test]
fn test_fail_on_flush_segment() -> tantivy::Result<()> {
let _fail_scenario_guard = fail::FailScenario::setup();
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let index_writer = index.writer_with_num_threads(1, 3_000_000)?;
fail::cfg("FieldSerializer::close_term", "return(simulatederror)").unwrap();
for i in 0..100_000 {
if index_writer
.add_document(doc!(text_field => format!("hellohappytaxpayerlongtokenblabla{}", i)))
.is_err()
{
return Ok(());
}
}
panic!("add_document should have returned an error");
}
#[test]
fn test_fail_on_flush_segment_but_one_worker_remains() -> tantivy::Result<()> {
let _fail_scenario_guard = fail::FailScenario::setup();
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let index_writer = index.writer_with_num_threads(2, 6_000_000)?;
fail::cfg("FieldSerializer::close_term", "1*return(simulatederror)").unwrap();
for i in 0..100_000 {
if index_writer
.add_document(doc!(text_field => format!("hellohappytaxpayerlongtokenblabla{}", i)))
.is_err()
{
return Ok(());
}
}
panic!("add_document should have returned an error");
}
#[test]
fn test_fail_on_commit_segment() -> tantivy::Result<()> {
let _fail_scenario_guard = fail::FailScenario::setup();
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
fail::cfg("FieldSerializer::close_term", "return(simulatederror)").unwrap();
for i in 0..10 {
index_writer
.add_document(doc!(text_field => format!("hellohappytaxpayerlongtokenblabla{}", i)))?;
}
assert!(index_writer.commit().is_err());
Ok(())
}