Compare commits

...

15 Commits

Author SHA1 Message Date
Paul Masurel
37e7af322d Reverting atomic_write to the atomic_writes in order to address #866 2020-09-19 10:39:43 +09:00
Paul Masurel
151498cbe7 Creating the tempfile for atomicwrites in the same directory as the MmapDirectory. (#878) 2020-09-05 23:06:29 +09:00
Paul Masurel
3a72b1cb98 Accept dash within field names. (#874)
Accept dash in field names and enforce field names constraint at the
creation of the schema.

Closes #796
2020-09-01 13:38:52 +09:00
Paul Masurel
2737822620 Fixing unit tests. (#868)
There was a unit test failing when notify was sending more
than one event on atomicwrites.

It was observed on MacOS CI.
2020-08-27 16:43:39 +09:00
b8591340
06c12ae221 Filter meta.json from validate_checksum (#872) 2020-08-27 07:54:37 +09:00
Paul Masurel
4e4400af7f Added cargo timing report to .gitignore 2020-08-23 16:15:28 +09:00
Paul Masurel
3f1ecf53ab Merge branch 'master' of github.com:tantivy-search/tantivy 2020-08-22 21:30:47 +09:00
Paul Masurel
0b583b8130 Plastic changes 2020-08-22 21:29:12 +09:00
Paul Masurel
31d18dca1c Removing dependency to atomicwrites (#866) 2020-08-21 21:37:05 +09:00
stephenlagree
5e06e7de5a Update basic_search.rs (#865)
Remove duplicated document entry.
2020-08-21 11:23:09 +09:00
Paul Masurel
8af53cbd36 Merge branch 'master' of github.com:tantivy-search/tantivy 2020-08-21 08:57:42 +09:00
Paul Masurel
4914076e8f Fixing release build 2020-08-21 08:57:27 +09:00
Paul Masurel
e04f47e922 Using block wand for term queries too. 2020-08-20 15:51:21 +09:00
Paul Masurel
f355695581 Code clean up 2020-08-20 15:42:50 +09:00
Paul Masurel
cbacdf0de8 Edited README. 2020-08-20 14:28:24 +09:00
19 changed files with 200 additions and 142 deletions

1
.gitignore vendored
View File

@@ -12,3 +12,4 @@ cpp/simdcomp/bitpackingbenchmark
*.bk *.bk
.idea .idea
trace.dat trace.dat
cargo-timing*

View File

@@ -1,3 +1,7 @@
Tantivy 0.14.0
=========================
- Remove dependency to atomicwrites #833 .Implemented by @pmasurel upon suggestion and research from @asafigan).
Tantivy 0.13.0 Tantivy 0.13.0
====================== ======================
Tantivy 0.13 introduce a change in the index format that will require Tantivy 0.13 introduce a change in the index format that will require

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "tantivy" name = "tantivy"
version = "0.13.0" version = "0.14.0-dev"
authors = ["Paul Masurel <paul.masurel@gmail.com>"] authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT" license = "MIT"
categories = ["database-implementations", "data-structures"] categories = ["database-implementations", "data-structures"]
@@ -22,8 +22,8 @@ tantivy-fst = "0.3"
memmap = {version = "0.7", optional=true} memmap = {version = "0.7", optional=true}
lz4 = {version="1.20", optional=true} lz4 = {version="1.20", optional=true}
snap = "1" snap = "1"
atomicwrites = {version="0.2.2", optional=true} tempfile = {version="3.0", optional=true}
tempfile = "3.0" atomicwrites = "0.2"
log = "0.4" log = "0.4"
serde = {version="1.0", features=["derive"]} serde = {version="1.0", features=["derive"]}
serde_json = "1.0" serde_json = "1.0"
@@ -38,7 +38,7 @@ owning_ref = "0.4"
stable_deref_trait = "1.0.0" stable_deref_trait = "1.0.0"
rust-stemmers = "1.2" rust-stemmers = "1.2"
downcast-rs = { version="1.0" } downcast-rs = { version="1.0" }
tantivy-query-grammar = { version="0.13", path="./query-grammar" } tantivy-query-grammar = { version="0.14.0-dev", path="./query-grammar" }
bitpacking = {version="0.8", default-features = false, features=["bitpacker4x"]} bitpacking = {version="0.8", default-features = false, features=["bitpacker4x"]}
census = "0.4" census = "0.4"
fnv = "1.0.6" fnv = "1.0.6"
@@ -75,7 +75,7 @@ overflow-checks = true
[features] [features]
default = ["mmap"] default = ["mmap"]
mmap = ["atomicwrites", "fs2", "memmap", "notify"] mmap = ["fs2", "tempfile", "memmap", "notify"]
lz4-compression = ["lz4"] lz4-compression = ["lz4"]
failpoints = ["fail/failpoints"] failpoints = ["fail/failpoints"]
unstable = [] # useful for benches. unstable = [] # useful for benches.

View File

@@ -34,11 +34,6 @@ Tantivy is, in fact, strongly inspired by Lucene's design.
The following [benchmark](https://tantivy-search.github.io/bench/) break downs The following [benchmark](https://tantivy-search.github.io/bench/) break downs
performance for different type of queries / collection. performance for different type of queries / collection.
In general, Tantivy tends to be
- slower than Lucene on union with a Top-K due to Block-WAND optimization.
- faster than Lucene on intersection and phrase queries.
Your mileage WILL vary depending on the nature of queries and their load. Your mileage WILL vary depending on the nature of queries and their load.
# Features # Features

View File

@@ -112,18 +112,6 @@ fn main() -> tantivy::Result<()> {
limbs and branches that arch over the pool" limbs and branches that arch over the pool"
)); ));
index_writer.add_document(doc!(
title => "Of Mice and Men",
body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
bank and runs deep and green. The water is warm too, for it has slipped twinkling \
over the yellow sands in the sunlight before reaching the narrow pool. On one \
side of the river the golden foothill slopes curve up to the strong and rocky \
Gabilan Mountains, but on the valley side the water is lined with trees—willows \
fresh and green with every spring, carrying in their lower leaf junctures the \
debris of the winters flooding; and sycamores with mottled, white, recumbent \
limbs and branches that arch over the pool"
));
// Multivalued field just need to be repeated. // Multivalued field just need to be repeated.
index_writer.add_document(doc!( index_writer.add_document(doc!(
title => "Frankenstein", title => "Frankenstein",

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "tantivy-query-grammar" name = "tantivy-query-grammar"
version = "0.13.0" version = "0.14.0-dev"
authors = ["Paul Masurel <paul.masurel@gmail.com>"] authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT" license = "MIT"
categories = ["database-implementations", "data-structures"] categories = ["database-implementations", "data-structures"]

View File

@@ -52,7 +52,7 @@ mod test {
use crate::Occur; use crate::Occur;
#[test] #[test]
fn test_Occur_compose() { fn test_occur_compose() {
assert_eq!(Occur::compose(Occur::Should, Occur::Should), Occur::Should); assert_eq!(Occur::compose(Occur::Should, Occur::Should), Occur::Should);
assert_eq!(Occur::compose(Occur::Should, Occur::Must), Occur::Must); assert_eq!(Occur::compose(Occur::Should, Occur::Must), Occur::Must);
assert_eq!( assert_eq!(

View File

@@ -9,8 +9,10 @@ use combine::{
fn field<'a>() -> impl Parser<&'a str, Output = String> { fn field<'a>() -> impl Parser<&'a str, Output = String> {
( (
letter(), (letter().or(char('_'))),
many(satisfy(|c: char| c.is_alphanumeric() || c == '_')), many(satisfy(|c: char| {
c.is_alphanumeric() || c == '_' || c == '-'
})),
) )
.skip(char(':')) .skip(char(':'))
.map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)) .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
@@ -279,6 +281,8 @@ pub fn parse_to_ast<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
type TestParseResult = Result<(), StringStreamError>;
use super::*; use super::*;
use combine::parser::Parser; use combine::parser::Parser;
@@ -296,9 +300,10 @@ mod test {
} }
#[test] #[test]
fn test_occur_symbol() { fn test_occur_symbol() -> TestParseResult {
assert_eq!(super::occur_symbol().parse("-"), Ok((Occur::MustNot, ""))); assert_eq!(super::occur_symbol().parse("-")?, (Occur::MustNot, ""));
assert_eq!(super::occur_symbol().parse("+"), Ok((Occur::Must, ""))); assert_eq!(super::occur_symbol().parse("+")?, (Occur::Must, ""));
Ok(())
} }
#[test] #[test]
@@ -410,6 +415,25 @@ mod test {
assert_eq!(format!("{:?}", ast), "\"abc\""); assert_eq!(format!("{:?}", ast), "\"abc\"");
} }
#[test]
fn test_field_name() -> TestParseResult {
assert_eq!(
super::field().parse("my-field-name:a")?,
("my-field-name".to_string(), "a")
);
assert_eq!(
super::field().parse("my_field_name:a")?,
("my_field_name".to_string(), "a")
);
assert!(super::field().parse(":a").is_err());
assert!(super::field().parse("-my_field:a").is_err());
assert_eq!(
super::field().parse("_my_field:a")?,
("_my_field".to_string(), "a")
);
Ok(())
}
#[test] #[test]
fn test_range_parser() { fn test_range_parser() {
// testing the range() parser separately // testing the range() parser separately

View File

@@ -539,7 +539,6 @@ mod tests {
test_index_on_commit_reload_policy_aux(field, &write_index, &reader); test_index_on_commit_reload_policy_aux(field, &write_index, &reader);
} }
} }
fn test_index_on_commit_reload_policy_aux(field: Field, index: &Index, reader: &IndexReader) { fn test_index_on_commit_reload_policy_aux(field: Field, index: &Index, reader: &IndexReader) {
let mut reader_index = reader.index(); let mut reader_index = reader.index();
let (sender, receiver) = crossbeam::channel::unbounded(); let (sender, receiver) = crossbeam::channel::unbounded();
@@ -550,12 +549,23 @@ mod tests {
assert_eq!(reader.searcher().num_docs(), 0); assert_eq!(reader.searcher().num_docs(), 0);
writer.add_document(doc!(field=>1u64)); writer.add_document(doc!(field=>1u64));
writer.commit().unwrap(); writer.commit().unwrap();
// We need a loop here because it is possible for notify to send more than
// one modify event. It was observed on CI on MacOS.
loop {
assert!(receiver.recv().is_ok()); assert!(receiver.recv().is_ok());
assert_eq!(reader.searcher().num_docs(), 1); if reader.searcher().num_docs() == 1 {
break;
}
}
writer.add_document(doc!(field=>2u64)); writer.add_document(doc!(field=>2u64));
writer.commit().unwrap(); writer.commit().unwrap();
// ... Same as above
loop {
assert!(receiver.recv().is_ok()); assert!(receiver.recv().is_ok());
assert_eq!(reader.searcher().num_docs(), 2); if reader.searcher().num_docs() == 2 {
break;
}
}
} }
// This test will not pass on windows, because windows // This test will not pass on windows, because windows

View File

@@ -1,4 +1,4 @@
use crate::core::MANAGED_FILEPATH; use crate::core::{MANAGED_FILEPATH, META_FILEPATH};
use crate::directory::error::{DeleteError, IOError, LockError, OpenReadError, OpenWriteError}; use crate::directory::error::{DeleteError, IOError, LockError, OpenReadError, OpenWriteError};
use crate::directory::footer::{Footer, FooterProxy}; use crate::directory::footer::{Footer, FooterProxy};
use crate::directory::DirectoryLock; use crate::directory::DirectoryLock;
@@ -246,13 +246,15 @@ impl ManagedDirectory {
/// List files for which checksum does not match content /// List files for which checksum does not match content
pub fn list_damaged(&self) -> result::Result<HashSet<PathBuf>, OpenReadError> { pub fn list_damaged(&self) -> result::Result<HashSet<PathBuf>, OpenReadError> {
let mut hashset = HashSet::new(); let mut hashset = HashSet::new();
let managed_paths = self let mut managed_paths = self
.meta_informations .meta_informations
.read() .read()
.expect("Managed directory rlock poisoned in list damaged.") .expect("Managed directory rlock poisoned in list damaged.")
.managed_paths .managed_paths
.clone(); .clone();
managed_paths.remove(*META_FILEPATH);
for path in managed_paths.into_iter() { for path in managed_paths.into_iter() {
if !self.validate_checksum(&path)? { if !self.validate_checksum(&path)? {
hashset.insert(path); hashset.insert(path);

View File

@@ -1,4 +1,5 @@
use crate::core::META_FILEPATH; use crate::core::META_FILEPATH;
use atomicwrites;
use crate::directory::error::LockError; use crate::directory::error::LockError;
use crate::directory::error::{ use crate::directory::error::{
DeleteError, IOError, OpenDirectoryError, OpenReadError, OpenWriteError, DeleteError, IOError, OpenDirectoryError, OpenReadError, OpenWriteError,
@@ -34,6 +35,7 @@ use std::sync::Mutex;
use std::sync::RwLock; use std::sync::RwLock;
use std::sync::Weak; use std::sync::Weak;
use std::thread; use std::thread;
use tempfile;
use tempfile::TempDir; use tempfile::TempDir;
/// Create a default io error given a string. /// Create a default io error given a string.
@@ -487,11 +489,11 @@ impl Directory for MmapDirectory {
} }
} }
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> { fn atomic_write(&mut self, path: &Path, content: &[u8]) -> io::Result<()> {
debug!("Atomic Write {:?}", path); debug!("Atomic Write {:?}", path);
let full_path = self.resolve_path(path); let full_path = self.resolve_path(path);
let meta_file = atomicwrites::AtomicFile::new(full_path, atomicwrites::AllowOverwrite); let meta_file = atomicwrites::AtomicFile::new(full_path, atomicwrites::AllowOverwrite);
meta_file.write(|f| f.write_all(data))?; meta_file.write(|f| f.write_all(content))?;
Ok(()) Ok(())
} }

View File

@@ -211,19 +211,18 @@ fn test_watch(directory: &mut dyn Directory) {
.unwrap(); .unwrap();
for i in 0..10 { for i in 0..10 {
assert_eq!(i, counter.load(SeqCst)); assert!(i <= counter.load(SeqCst));
assert!(directory assert!(directory
.atomic_write(Path::new("meta.json"), b"random_test_data_2") .atomic_write(Path::new("meta.json"), b"random_test_data_2")
.is_ok()); .is_ok());
assert_eq!(receiver.recv_timeout(Duration::from_millis(500)), Ok(i)); assert!(i + 1 <= counter.load(SeqCst)); // notify can trigger more than once.
assert_eq!(i + 1, counter.load(SeqCst));
} }
mem::drop(watch_handle); mem::drop(watch_handle);
assert!(directory assert!(directory
.atomic_write(Path::new("meta.json"), b"random_test_data") .atomic_write(Path::new("meta.json"), b"random_test_data")
.is_ok()); .is_ok());
assert!(receiver.recv_timeout(Duration::from_millis(500)).is_ok()); assert!(receiver.recv_timeout(Duration::from_millis(500)).is_ok());
assert_eq!(10, counter.load(SeqCst)); assert!(10 <= counter.load(SeqCst));
} }
fn test_lock_non_blocking(directory: &mut dyn Directory) { fn test_lock_non_blocking(directory: &mut dyn Directory) {

View File

@@ -29,8 +29,9 @@ pub use self::segment_writer::SegmentWriter;
/// Alias for the default merge policy, which is the `LogMergePolicy`. /// Alias for the default merge policy, which is the `LogMergePolicy`.
pub type DefaultMergePolicy = LogMergePolicy; pub type DefaultMergePolicy = LogMergePolicy;
#[cfg(feature = "mmap")]
#[cfg(test)] #[cfg(test)]
mod tests { mod tests_mmap {
use crate::schema::{self, Schema}; use crate::schema::{self, Schema};
use crate::{Index, Term}; use crate::{Index, Term};

View File

@@ -1012,4 +1012,12 @@ mod tests {
DOC_COUNT as usize DOC_COUNT as usize
); );
} }
#[test]
fn test_validate_checksum() {
let index_path = tempfile::tempdir().expect("dir");
let schema = Schema::builder().build();
let index = Index::create_in_dir(&index_path, schema).expect("index");
assert!(index.validate_checksum().unwrap().is_empty());
}
} }

View File

@@ -4,19 +4,6 @@ use crate::{DocId, DocSet, Score, TERMINATED};
use std::ops::Deref; use std::ops::Deref;
use std::ops::DerefMut; use std::ops::DerefMut;
fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
if let Some(first) = it.next() {
let mut prev = first;
for doc in it {
if doc < prev {
return false;
}
prev = doc;
}
}
true
}
/// Takes a term_scorers sorted by their current doc() and a threshold and returns /// Takes a term_scorers sorted by their current doc() and a threshold and returns
/// Returns (pivot_len, pivot_ord) defined as follows: /// Returns (pivot_len, pivot_ord) defined as follows:
/// - `pivot_doc` lowest document that has a chance of exceeding (>) the threshold score. /// - `pivot_doc` lowest document that has a chance of exceeding (>) the threshold score.
@@ -55,37 +42,12 @@ fn find_pivot_doc(
Some((before_pivot_len, pivot_len, pivot_doc)) Some((before_pivot_len, pivot_len, pivot_doc))
} }
struct TermScorerWithMaxScore<'a> {
scorer: &'a mut TermScorer,
max_score: Score,
}
impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
fn from(scorer: &'a mut TermScorer) -> Self {
let max_score = scorer.max_score();
TermScorerWithMaxScore { scorer, max_score }
}
}
impl<'a> Deref for TermScorerWithMaxScore<'a> {
type Target = TermScorer;
fn deref(&self) -> &Self::Target {
self.scorer
}
}
impl<'a> DerefMut for TermScorerWithMaxScore<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.scorer
}
}
// Before and after calling this method, scorers need to be sorted by their `.doc()`. // Before and after calling this method, scorers need to be sorted by their `.doc()`.
fn block_max_was_too_low_advance_one_scorer( fn block_max_was_too_low_advance_one_scorer(
scorers: &mut Vec<TermScorerWithMaxScore>, scorers: &mut Vec<TermScorerWithMaxScore>,
pivot_len: usize, pivot_len: usize,
) { ) {
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
let mut scorer_to_seek = pivot_len - 1; let mut scorer_to_seek = pivot_len - 1;
let mut doc_to_seek_after = scorers[scorer_to_seek].doc(); let mut doc_to_seek_after = scorers[scorer_to_seek].doc();
for scorer_ord in (0..pivot_len - 1).rev() { for scorer_ord in (0..pivot_len - 1).rev() {
@@ -102,6 +64,7 @@ fn block_max_was_too_low_advance_one_scorer(
} }
scorers[scorer_to_seek].seek(doc_to_seek_after + 1); scorers[scorer_to_seek].seek(doc_to_seek_after + 1);
restore_ordering(scorers, scorer_to_seek); restore_ordering(scorers, scorer_to_seek);
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
} }
// Given a list of term_scorers and a `ord` and assuming that `term_scorers[ord]` is sorted // Given a list of term_scorers and a `ord` and assuming that `term_scorers[ord]` is sorted
@@ -177,12 +140,12 @@ pub fn block_wand(
.map(TermScorerWithMaxScore::from) .map(TermScorerWithMaxScore::from)
.collect(); .collect();
scorers.sort_by_key(|scorer| scorer.doc()); scorers.sort_by_key(|scorer| scorer.doc());
loop {
// At this point we need to ensure that the scorers are sorted! // At this point we need to ensure that the scorers are sorted!
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc()))); debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
if let Some((before_pivot_len, pivot_len, pivot_doc)) = while let Some((before_pivot_len, pivot_len, pivot_doc)) =
find_pivot_doc(&scorers[..], threshold) find_pivot_doc(&scorers[..], threshold)
{ {
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
debug_assert_ne!(pivot_doc, TERMINATED); debug_assert_ne!(pivot_doc, TERMINATED);
debug_assert!(before_pivot_len < pivot_len); debug_assert!(before_pivot_len < pivot_len);
@@ -229,12 +192,47 @@ pub fn block_wand(
} }
// let's advance all of the scorers that are currently positioned on the pivot. // let's advance all of the scorers that are currently positioned on the pivot.
advance_all_scorers_on_pivot(&mut scorers, pivot_len); advance_all_scorers_on_pivot(&mut scorers, pivot_len);
} else {
return;
}
} }
} }
struct TermScorerWithMaxScore<'a> {
scorer: &'a mut TermScorer,
max_score: Score,
}
impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
fn from(scorer: &'a mut TermScorer) -> Self {
let max_score = scorer.max_score();
TermScorerWithMaxScore { scorer, max_score }
}
}
impl<'a> Deref for TermScorerWithMaxScore<'a> {
type Target = TermScorer;
fn deref(&self) -> &Self::Target {
self.scorer
}
}
impl<'a> DerefMut for TermScorerWithMaxScore<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.scorer
}
}
fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
if let Some(first) = it.next() {
let mut prev = first;
for doc in it {
if doc < prev {
return false;
}
prev = doc;
}
}
true
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::query::score_combiner::SumCombiner; use crate::query::score_combiner::SumCombiner;
@@ -248,17 +246,21 @@ mod tests {
use std::iter; use std::iter;
struct Float(Score); struct Float(Score);
impl Eq for Float {} impl Eq for Float {}
impl PartialEq for Float { impl PartialEq for Float {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
self.cmp(&other) == Ordering::Equal self.cmp(&other) == Ordering::Equal
} }
} }
impl PartialOrd for Float { impl PartialOrd for Float {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> { fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other)) Some(self.cmp(other))
} }
} }
impl Ord for Float { impl Ord for Float {
fn cmp(&self, other: &Self) -> Ordering { fn cmp(&self, other: &Self) -> Ordering {
other.0.partial_cmp(&self.0).unwrap_or(Ordering::Equal) other.0.partial_cmp(&self.0).unwrap_or(Ordering::Equal)

View File

@@ -4,7 +4,7 @@ use crate::docset::DocSet;
use crate::postings::SegmentPostings; use crate::postings::SegmentPostings;
use crate::query::bm25::BM25Weight; use crate::query::bm25::BM25Weight;
use crate::query::explanation::does_not_match; use crate::query::explanation::does_not_match;
use crate::query::weight::{for_each_pruning_scorer, for_each_scorer}; use crate::query::weight::for_each_scorer;
use crate::query::Weight; use crate::query::Weight;
use crate::query::{Explanation, Scorer}; use crate::query::{Explanation, Scorer};
use crate::schema::IndexRecordOption; use crate::schema::IndexRecordOption;
@@ -73,8 +73,8 @@ impl Weight for TermWeight {
reader: &SegmentReader, reader: &SegmentReader,
callback: &mut dyn FnMut(DocId, Score) -> Score, callback: &mut dyn FnMut(DocId, Score) -> Score,
) -> crate::Result<()> { ) -> crate::Result<()> {
let mut scorer = self.scorer(reader, 1.0)?; let scorer = self.specialized_scorer(reader, 1.0)?;
for_each_pruning_scorer(&mut scorer, threshold, callback); crate::query::boolean_query::block_wand(vec![scorer], threshold, callback);
Ok(()) Ok(())
} }
} }

View File

@@ -138,8 +138,10 @@ impl InnerIndexReader {
.collect::<crate::Result<_>>()? .collect::<crate::Result<_>>()?
}; };
let schema = self.index.schema(); let schema = self.index.schema();
let searchers = (0..self.num_searchers) let searchers = std::iter::repeat_with(|| {
.map(|_| Searcher::new(schema.clone(), self.index.clone(), segment_readers.clone())) Searcher::new(schema.clone(), self.index.clone(), segment_readers.clone())
})
.take(self.num_searchers)
.collect(); .collect();
self.searcher_pool.publish_new_generation(searchers); self.searcher_pool.publish_new_generation(searchers);
Ok(()) Ok(())

View File

@@ -1,5 +1,5 @@
use crate::schema::IntOptions;
use crate::schema::TextOptions; use crate::schema::TextOptions;
use crate::schema::{is_valid_field_name, IntOptions};
use crate::schema::FieldType; use crate::schema::FieldType;
use serde::de::{self, MapAccess, Visitor}; use serde::de::{self, MapAccess, Visitor};
@@ -24,6 +24,7 @@ impl FieldEntry {
/// Creates a new u64 field entry in the schema, given /// Creates a new u64 field entry in the schema, given
/// a name, and some options. /// a name, and some options.
pub fn new_text(field_name: String, text_options: TextOptions) -> FieldEntry { pub fn new_text(field_name: String, text_options: TextOptions) -> FieldEntry {
assert!(is_valid_field_name(&field_name));
FieldEntry { FieldEntry {
name: field_name, name: field_name,
field_type: FieldType::Str(text_options), field_type: FieldType::Str(text_options),
@@ -33,6 +34,7 @@ impl FieldEntry {
/// Creates a new u64 field entry in the schema, given /// Creates a new u64 field entry in the schema, given
/// a name, and some options. /// a name, and some options.
pub fn new_u64(field_name: String, field_type: IntOptions) -> FieldEntry { pub fn new_u64(field_name: String, field_type: IntOptions) -> FieldEntry {
assert!(is_valid_field_name(&field_name));
FieldEntry { FieldEntry {
name: field_name, name: field_name,
field_type: FieldType::U64(field_type), field_type: FieldType::U64(field_type),
@@ -42,6 +44,7 @@ impl FieldEntry {
/// Creates a new i64 field entry in the schema, given /// Creates a new i64 field entry in the schema, given
/// a name, and some options. /// a name, and some options.
pub fn new_i64(field_name: String, field_type: IntOptions) -> FieldEntry { pub fn new_i64(field_name: String, field_type: IntOptions) -> FieldEntry {
assert!(is_valid_field_name(&field_name));
FieldEntry { FieldEntry {
name: field_name, name: field_name,
field_type: FieldType::I64(field_type), field_type: FieldType::I64(field_type),
@@ -51,6 +54,7 @@ impl FieldEntry {
/// Creates a new f64 field entry in the schema, given /// Creates a new f64 field entry in the schema, given
/// a name, and some options. /// a name, and some options.
pub fn new_f64(field_name: String, field_type: IntOptions) -> FieldEntry { pub fn new_f64(field_name: String, field_type: IntOptions) -> FieldEntry {
assert!(is_valid_field_name(&field_name));
FieldEntry { FieldEntry {
name: field_name, name: field_name,
field_type: FieldType::F64(field_type), field_type: FieldType::F64(field_type),
@@ -60,6 +64,7 @@ impl FieldEntry {
/// Creates a new date field entry in the schema, given /// Creates a new date field entry in the schema, given
/// a name, and some options. /// a name, and some options.
pub fn new_date(field_name: String, field_type: IntOptions) -> FieldEntry { pub fn new_date(field_name: String, field_type: IntOptions) -> FieldEntry {
assert!(is_valid_field_name(&field_name));
FieldEntry { FieldEntry {
name: field_name, name: field_name,
field_type: FieldType::Date(field_type), field_type: FieldType::Date(field_type),
@@ -68,6 +73,7 @@ impl FieldEntry {
/// Creates a field entry for a facet. /// Creates a field entry for a facet.
pub fn new_facet(field_name: String) -> FieldEntry { pub fn new_facet(field_name: String) -> FieldEntry {
assert!(is_valid_field_name(&field_name));
FieldEntry { FieldEntry {
name: field_name, name: field_name,
field_type: FieldType::HierarchicalFacet, field_type: FieldType::HierarchicalFacet,
@@ -76,6 +82,7 @@ impl FieldEntry {
/// Creates a field entry for a bytes field /// Creates a field entry for a bytes field
pub fn new_bytes(field_name: String) -> FieldEntry { pub fn new_bytes(field_name: String) -> FieldEntry {
assert!(is_valid_field_name(&field_name));
FieldEntry { FieldEntry {
name: field_name, name: field_name,
field_type: FieldType::Bytes, field_type: FieldType::Bytes,
@@ -268,6 +275,12 @@ mod tests {
use crate::schema::TEXT; use crate::schema::TEXT;
use serde_json; use serde_json;
#[test]
#[should_panic]
fn test_invalid_field_name_should_panic() {
FieldEntry::new_text("-hello".to_string(), TEXT);
}
#[test] #[test]
fn test_json_serialization() { fn test_json_serialization() {
let field_value = FieldEntry::new_text(String::from("title"), TEXT); let field_value = FieldEntry::new_text(String::from("title"), TEXT);

View File

@@ -149,14 +149,16 @@ pub use self::int_options::IntOptions;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex; use regex::Regex;
/// Regular expression representing the restriction on a valid field names.
pub const FIELD_NAME_PATTERN: &'static str = r#"^[_a-zA-Z][_\-a-zA-Z0-9]*$"#;
/// Validator for a potential `field_name`. /// Validator for a potential `field_name`.
/// Returns true iff the name can be use for a field name. /// Returns true iff the name can be use for a field name.
/// ///
/// A field name must start by a letter `[a-zA-Z]`. /// A field name must start by a letter `[a-zA-Z]`.
/// The other characters can be any alphanumic character `[a-ZA-Z0-9]` or `_`. /// The other characters can be any alphanumic character `[a-ZA-Z0-9]` or `_`.
pub fn is_valid_field_name(field_name: &str) -> bool { pub fn is_valid_field_name(field_name: &str) -> bool {
static FIELD_NAME_PTN: Lazy<Regex> = static FIELD_NAME_PTN: Lazy<Regex> = Lazy::new(|| Regex::new(FIELD_NAME_PATTERN).unwrap());
Lazy::new(|| Regex::new("^[a-zA-Z][_a-zA-Z0-9]*$").unwrap());
FIELD_NAME_PTN.is_match(field_name) FIELD_NAME_PTN.is_match(field_name)
} }
@@ -170,6 +172,11 @@ mod tests {
assert!(is_valid_field_name("text")); assert!(is_valid_field_name("text"));
assert!(is_valid_field_name("text0")); assert!(is_valid_field_name("text0"));
assert!(!is_valid_field_name("0text")); assert!(!is_valid_field_name("0text"));
assert!(is_valid_field_name("field-name"));
assert!(is_valid_field_name("field_name"));
assert!(!is_valid_field_name("field!name"));
assert!(!is_valid_field_name("-fieldname"));
assert!(is_valid_field_name("_fieldname"));
assert!(!is_valid_field_name("")); assert!(!is_valid_field_name(""));
assert!(!is_valid_field_name("シャボン玉")); assert!(!is_valid_field_name("シャボン玉"));
assert!(is_valid_field_name("my_text_field")); assert!(is_valid_field_name("my_text_field"));