mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 17:42:55 +00:00
Compare commits
3 Commits
0.10.1
...
python-bin
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a7c579f5c9 | ||
|
|
f2e546bdff | ||
|
|
efd1af1325 |
@@ -3,6 +3,14 @@ Tantivy 0.11.0
|
|||||||
|
|
||||||
- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
|
- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
|
||||||
|
|
||||||
|
Tantivy 0.10.1
|
||||||
|
=====================
|
||||||
|
|
||||||
|
- Closes #544. A few users experienced problems with the directory watching system.
|
||||||
|
Avoid watching the mmap directory until someone effectively creates a reader that uses
|
||||||
|
this functionality.
|
||||||
|
|
||||||
|
|
||||||
Tantivy 0.10.0
|
Tantivy 0.10.0
|
||||||
=====================
|
=====================
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy"
|
name = "tantivy"
|
||||||
version = "0.10.0"
|
version = "0.10.1"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = ["database-implementations", "data-structures"]
|
categories = ["database-implementations", "data-structures"]
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ use crate::Result;
|
|||||||
use crate::Score;
|
use crate::Score;
|
||||||
use crate::SegmentLocalId;
|
use crate::SegmentLocalId;
|
||||||
use crate::SegmentReader;
|
use crate::SegmentReader;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
/// The Top Score Collector keeps track of the K documents
|
/// The Top Score Collector keeps track of the K documents
|
||||||
/// sorted by their score.
|
/// sorted by their score.
|
||||||
@@ -68,6 +69,12 @@ use crate::SegmentReader;
|
|||||||
/// ```
|
/// ```
|
||||||
pub struct TopDocs(TopCollector<Score>);
|
pub struct TopDocs(TopCollector<Score>);
|
||||||
|
|
||||||
|
impl fmt::Debug for TopDocs {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "TopDocs({})", self.0.limit())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl TopDocs {
|
impl TopDocs {
|
||||||
/// Creates a top score collector, with a number of documents equal to "limit".
|
/// Creates a top score collector, with a number of documents equal to "limit".
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -204,7 +204,7 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
|||||||
/// Internally, tantivy only uses this API to detect new commits to implement the
|
/// Internally, tantivy only uses this API to detect new commits to implement the
|
||||||
/// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the
|
/// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the
|
||||||
/// `OnCommit` `ReloadPolicy` to work properly.
|
/// `OnCommit` `ReloadPolicy` to work properly.
|
||||||
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle;
|
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// DirectoryClone
|
/// DirectoryClone
|
||||||
|
|||||||
@@ -241,7 +241,7 @@ impl Directory for ManagedDirectory {
|
|||||||
self.directory.acquire_lock(lock)
|
self.directory.acquire_lock(lock)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle {
|
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
||||||
self.directory.watch(watch_callback)
|
self.directory.watch(watch_callback)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -161,7 +161,7 @@ impl InnerWatcherWrapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub(crate) struct WatcherWrapper {
|
struct WatcherWrapper {
|
||||||
inner: Arc<InnerWatcherWrapper>,
|
inner: Arc<InnerWatcherWrapper>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -231,7 +231,7 @@ struct MmapDirectoryInner {
|
|||||||
root_path: PathBuf,
|
root_path: PathBuf,
|
||||||
mmap_cache: RwLock<MmapCache>,
|
mmap_cache: RwLock<MmapCache>,
|
||||||
_temp_directory: Option<TempDir>,
|
_temp_directory: Option<TempDir>,
|
||||||
watcher: RwLock<WatcherWrapper>,
|
watcher: RwLock<Option<WatcherWrapper>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MmapDirectoryInner {
|
impl MmapDirectoryInner {
|
||||||
@@ -239,19 +239,36 @@ impl MmapDirectoryInner {
|
|||||||
root_path: PathBuf,
|
root_path: PathBuf,
|
||||||
temp_directory: Option<TempDir>,
|
temp_directory: Option<TempDir>,
|
||||||
) -> Result<MmapDirectoryInner, OpenDirectoryError> {
|
) -> Result<MmapDirectoryInner, OpenDirectoryError> {
|
||||||
let watch_wrapper = WatcherWrapper::new(&root_path)?;
|
|
||||||
let mmap_directory_inner = MmapDirectoryInner {
|
let mmap_directory_inner = MmapDirectoryInner {
|
||||||
root_path,
|
root_path,
|
||||||
mmap_cache: Default::default(),
|
mmap_cache: Default::default(),
|
||||||
_temp_directory: temp_directory,
|
_temp_directory: temp_directory,
|
||||||
watcher: RwLock::new(watch_wrapper),
|
watcher: RwLock::new(None),
|
||||||
};
|
};
|
||||||
Ok(mmap_directory_inner)
|
Ok(mmap_directory_inner)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle {
|
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
||||||
let mut wlock = self.watcher.write().unwrap();
|
// a lot of juggling here, to ensure we don't do anything that panics
|
||||||
wlock.watch(watch_callback)
|
// while the rwlock is held. That way we ensure that the rwlock cannot
|
||||||
|
// be poisoned.
|
||||||
|
//
|
||||||
|
// The downside is that we might create a watch wrapper that is not useful.
|
||||||
|
let need_initialization = self.watcher.read().unwrap().is_none();
|
||||||
|
if need_initialization {
|
||||||
|
let watch_wrapper = WatcherWrapper::new(&self.root_path)?;
|
||||||
|
let mut watch_wlock = self.watcher.write().unwrap();
|
||||||
|
// the watcher could have been initialized when we released the lock, and
|
||||||
|
// we do not want to lose the watched files that were set.
|
||||||
|
if watch_wlock.is_none() {
|
||||||
|
*watch_wlock = Some(watch_wrapper);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(watch_wrapper) = self.watcher.write().unwrap().as_mut() {
|
||||||
|
return Ok(watch_wrapper.watch(watch_callback));
|
||||||
|
} else {
|
||||||
|
unreachable!("At this point, watch wrapper is supposed to be initialized");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -514,7 +531,7 @@ impl Directory for MmapDirectory {
|
|||||||
})))
|
})))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle {
|
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
||||||
self.inner.watch(watch_callback)
|
self.inner.watch(watch_callback)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -193,7 +193,7 @@ impl Directory for RAMDirectory {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle {
|
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
||||||
self.fs.write().unwrap().watch(watch_callback)
|
Ok(self.fs.write().unwrap().watch(watch_callback))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -121,7 +121,7 @@ fn test_watch(directory: &mut dyn Directory) {
|
|||||||
thread::sleep(Duration::new(0, 10_000));
|
thread::sleep(Duration::new(0, 10_000));
|
||||||
assert_eq!(0, counter.load(Ordering::SeqCst));
|
assert_eq!(0, counter.load(Ordering::SeqCst));
|
||||||
|
|
||||||
let watch_handle = directory.watch(watch_callback);
|
let watch_handle = directory.watch(watch_callback).unwrap();
|
||||||
for i in 0..10 {
|
for i in 0..10 {
|
||||||
assert_eq!(i, counter.load(Ordering::SeqCst));
|
assert_eq!(i, counter.load(Ordering::SeqCst));
|
||||||
assert!(directory
|
assert!(directory
|
||||||
|
|||||||
@@ -171,16 +171,16 @@ pub use self::snippet::{Snippet, SnippetGenerator};
|
|||||||
mod docset;
|
mod docset;
|
||||||
pub use self::docset::{DocSet, SkipResult};
|
pub use self::docset::{DocSet, SkipResult};
|
||||||
|
|
||||||
|
pub use crate::common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
|
||||||
pub use crate::core::SegmentComponent;
|
pub use crate::core::SegmentComponent;
|
||||||
pub use crate::core::{Index, IndexMeta, Searcher, Segment, SegmentId, SegmentMeta};
|
pub use crate::core::{Index, IndexMeta, Searcher, Segment, SegmentId, SegmentMeta};
|
||||||
pub use crate::core::{InvertedIndexReader, SegmentReader};
|
pub use crate::core::{InvertedIndexReader, SegmentReader};
|
||||||
pub use crate::directory::Directory;
|
pub use crate::directory::Directory;
|
||||||
pub use crate::indexer::IndexWriter;
|
pub use crate::indexer::IndexWriter;
|
||||||
pub use crate::postings::Postings;
|
pub use crate::postings::Postings;
|
||||||
|
pub use crate::reader::LeasedItem;
|
||||||
pub use crate::schema::{Document, Term};
|
pub use crate::schema::{Document, Term};
|
||||||
|
|
||||||
pub use crate::common::{i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64};
|
|
||||||
|
|
||||||
/// Expose the current version of tantivy, as well
|
/// Expose the current version of tantivy, as well
|
||||||
/// whether it was compiled with the simd compression.
|
/// whether it was compiled with the simd compression.
|
||||||
pub fn version() -> &'static str {
|
pub fn version() -> &'static str {
|
||||||
@@ -849,7 +849,8 @@ mod tests {
|
|||||||
let index = Index::create_in_ram(schema);
|
let index = Index::create_in_ram(schema);
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap();
|
let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap();
|
||||||
{
|
{
|
||||||
let document = doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
|
let document =
|
||||||
|
doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
|
||||||
index_writer.add_document(document);
|
index_writer.add_document(document);
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit().unwrap();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,42 +18,56 @@ use crate::schema::{FieldType, Term};
|
|||||||
use crate::tokenizer::TokenizerManager;
|
use crate::tokenizer::TokenizerManager;
|
||||||
use combine::Parser;
|
use combine::Parser;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::num::{ParseIntError, ParseFloatError};
|
use std::num::{ParseFloatError, ParseIntError};
|
||||||
use std::ops::Bound;
|
use std::ops::Bound;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
/// Possible error that may happen when parsing a query.
|
/// Possible error that may happen when parsing a query.
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq, Fail)]
|
||||||
pub enum QueryParserError {
|
pub enum QueryParserError {
|
||||||
/// Error in the query syntax
|
/// Error in the query syntax
|
||||||
|
#[fail(display = "Syntax Error")]
|
||||||
SyntaxError,
|
SyntaxError,
|
||||||
/// `FieldDoesNotExist(field_name: String)`
|
/// `FieldDoesNotExist(field_name: String)`
|
||||||
/// The query references a field that is not in the schema
|
/// The query references a field that is not in the schema
|
||||||
|
#[fail(display = "File does not exists: '{:?}'", _0)]
|
||||||
FieldDoesNotExist(String),
|
FieldDoesNotExist(String),
|
||||||
/// The query contains a term for a `u64` or `i64`-field, but the value
|
/// The query contains a term for a `u64` or `i64`-field, but the value
|
||||||
/// is neither.
|
/// is neither.
|
||||||
|
#[fail(display = "Expected a valid integer: '{:?}'", _0)]
|
||||||
ExpectedInt(ParseIntError),
|
ExpectedInt(ParseIntError),
|
||||||
/// The query contains a term for a `f64`-field, but the value
|
/// The query contains a term for a `f64`-field, but the value
|
||||||
/// is not a f64.
|
/// is not a f64.
|
||||||
|
#[fail(display = "Invalid query: Only excluding terms given")]
|
||||||
ExpectedFloat(ParseFloatError),
|
ExpectedFloat(ParseFloatError),
|
||||||
/// It is forbidden queries that are only "excluding". (e.g. -title:pop)
|
/// It is forbidden queries that are only "excluding". (e.g. -title:pop)
|
||||||
|
#[fail(display = "Invalid query: Only excluding terms given")]
|
||||||
AllButQueryForbidden,
|
AllButQueryForbidden,
|
||||||
/// If no default field is declared, running a query without any
|
/// If no default field is declared, running a query without any
|
||||||
/// field specified is forbbidden.
|
/// field specified is forbbidden.
|
||||||
|
#[fail(display = "No default field declared and no field specified in query")]
|
||||||
NoDefaultFieldDeclared,
|
NoDefaultFieldDeclared,
|
||||||
/// The field searched for is not declared
|
/// The field searched for is not declared
|
||||||
/// as indexed in the schema.
|
/// as indexed in the schema.
|
||||||
|
#[fail(display = "The field '{:?}' is not declared as indexed", _0)]
|
||||||
FieldNotIndexed(String),
|
FieldNotIndexed(String),
|
||||||
/// A phrase query was requested for a field that does not
|
/// A phrase query was requested for a field that does not
|
||||||
/// have any positions indexed.
|
/// have any positions indexed.
|
||||||
|
#[fail(display = "The field '{:?}' does not have positions indexed", _0)]
|
||||||
FieldDoesNotHavePositionsIndexed(String),
|
FieldDoesNotHavePositionsIndexed(String),
|
||||||
/// The tokenizer for the given field is unknown
|
/// The tokenizer for the given field is unknown
|
||||||
/// The two argument strings are the name of the field, the name of the tokenizer
|
/// The two argument strings are the name of the field, the name of the tokenizer
|
||||||
|
#[fail(
|
||||||
|
display = "The tokenizer '{:?}' for the field '{:?}' is unknown",
|
||||||
|
_0, _1
|
||||||
|
)]
|
||||||
UnknownTokenizer(String, String),
|
UnknownTokenizer(String, String),
|
||||||
/// The query contains a range query with a phrase as one of the bounds.
|
/// The query contains a range query with a phrase as one of the bounds.
|
||||||
/// Only terms can be used as bounds.
|
/// Only terms can be used as bounds.
|
||||||
|
#[fail(display = "A range query cannot have a phrase as one of the bounds")]
|
||||||
RangeMustNotHavePhrase,
|
RangeMustNotHavePhrase,
|
||||||
/// The format for the date field is not RFC 3339 compliant.
|
/// The format for the date field is not RFC 3339 compliant.
|
||||||
|
#[fail(display = "The date field has an invalid format")]
|
||||||
DateFormatError(chrono::ParseError),
|
DateFormatError(chrono::ParseError),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
mod pool;
|
mod pool;
|
||||||
|
|
||||||
use self::pool::{LeasedItem, Pool};
|
pub use self::pool::LeasedItem;
|
||||||
|
use self::pool::Pool;
|
||||||
use crate::core::Segment;
|
use crate::core::Segment;
|
||||||
use crate::directory::Directory;
|
use crate::directory::Directory;
|
||||||
use crate::directory::WatchHandle;
|
use crate::directory::WatchHandle;
|
||||||
@@ -85,7 +86,10 @@ impl IndexReaderBuilder {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let watch_handle = inner_reader_arc.index.directory().watch(Box::new(callback));
|
let watch_handle = inner_reader_arc
|
||||||
|
.index
|
||||||
|
.directory()
|
||||||
|
.watch(Box::new(callback))?;
|
||||||
watch_handle_opt = Some(watch_handle);
|
watch_handle_opt = Some(watch_handle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -123,6 +123,10 @@ impl<T> Pool<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A LeasedItem holds an object borrowed from a Pool.
|
||||||
|
///
|
||||||
|
/// Upon drop, the object is automatically returned
|
||||||
|
/// into the pool.
|
||||||
pub struct LeasedItem<T> {
|
pub struct LeasedItem<T> {
|
||||||
gen_item: Option<GenerationItem<T>>,
|
gen_item: Option<GenerationItem<T>>,
|
||||||
recycle_queue: Arc<Queue<GenerationItem<T>>>,
|
recycle_queue: Arc<Queue<GenerationItem<T>>>,
|
||||||
|
|||||||
@@ -261,6 +261,24 @@ impl Schema {
|
|||||||
NamedFieldDocument(field_map)
|
NamedFieldDocument(field_map)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts a named doc into a document.
|
||||||
|
pub fn from_named_doc(
|
||||||
|
&self,
|
||||||
|
named_doc: NamedFieldDocument,
|
||||||
|
) -> Result<Document, DocParsingError> {
|
||||||
|
let mut doc = Document::default();
|
||||||
|
for (field_name, field_values) in named_doc.0 {
|
||||||
|
if let Some(field) = self.get_field(&field_name) {
|
||||||
|
for field_value in field_values {
|
||||||
|
doc.add(FieldValue::new(field, field_value));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Err(DocParsingError::NoSuchFieldInSchema(field_name.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(doc)
|
||||||
|
}
|
||||||
|
|
||||||
/// Encode the schema in JSON.
|
/// Encode the schema in JSON.
|
||||||
///
|
///
|
||||||
/// Encoding a document cannot fail.
|
/// Encoding a document cannot fail.
|
||||||
@@ -279,7 +297,6 @@ impl Schema {
|
|||||||
};
|
};
|
||||||
DocParsingError::NotJSON(doc_json_sample)
|
DocParsingError::NotJSON(doc_json_sample)
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let mut doc = Document::default();
|
let mut doc = Document::default();
|
||||||
for (field_name, json_value) in json_obj.iter() {
|
for (field_name, json_value) in json_obj.iter() {
|
||||||
match self.get_field(field_name) {
|
match self.get_field(field_name) {
|
||||||
@@ -360,13 +377,16 @@ impl<'de> Deserialize<'de> for Schema {
|
|||||||
|
|
||||||
/// Error that may happen when deserializing
|
/// Error that may happen when deserializing
|
||||||
/// a document from JSON.
|
/// a document from JSON.
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Fail)]
|
||||||
pub enum DocParsingError {
|
pub enum DocParsingError {
|
||||||
/// The payload given is not valid JSON.
|
/// The payload given is not valid JSON.
|
||||||
|
#[fail(display = "The provided string is not valid JSON")]
|
||||||
NotJSON(String),
|
NotJSON(String),
|
||||||
/// One of the value node could not be parsed.
|
/// One of the value node could not be parsed.
|
||||||
|
#[fail(display = "The field '{:?}' could not be parsed: {:?}", _0, _1)]
|
||||||
ValueError(String, ValueParsingError),
|
ValueError(String, ValueParsingError),
|
||||||
/// The json-document contains a field that is not declared in the schema.
|
/// The json-document contains a field that is not declared in the schema.
|
||||||
|
#[fail(display = "The json-document contains an unknown field: {:?}", _0)]
|
||||||
NoSuchFieldInSchema(String),
|
NoSuchFieldInSchema(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user