mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-28 04:52:55 +00:00
Compare commits
3 Commits
0.10.1
...
python-bin
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a7c579f5c9 | ||
|
|
f2e546bdff | ||
|
|
efd1af1325 |
@@ -3,6 +3,14 @@ Tantivy 0.11.0
|
||||
|
||||
- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
|
||||
|
||||
Tantivy 0.10.1
|
||||
=====================
|
||||
|
||||
- Closes #544. A few users experienced problems with the directory watching system.
|
||||
Avoid watching the mmap directory until someone effectively creates a reader that uses
|
||||
this functionality.
|
||||
|
||||
|
||||
Tantivy 0.10.0
|
||||
=====================
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy"
|
||||
version = "0.10.0"
|
||||
version = "0.10.1"
|
||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||
license = "MIT"
|
||||
categories = ["database-implementations", "data-structures"]
|
||||
|
||||
@@ -13,6 +13,7 @@ use crate::Result;
|
||||
use crate::Score;
|
||||
use crate::SegmentLocalId;
|
||||
use crate::SegmentReader;
|
||||
use std::fmt;
|
||||
|
||||
/// The Top Score Collector keeps track of the K documents
|
||||
/// sorted by their score.
|
||||
@@ -68,6 +69,12 @@ use crate::SegmentReader;
|
||||
/// ```
|
||||
pub struct TopDocs(TopCollector<Score>);
|
||||
|
||||
impl fmt::Debug for TopDocs {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "TopDocs({})", self.0.limit())
|
||||
}
|
||||
}
|
||||
|
||||
impl TopDocs {
|
||||
/// Creates a top score collector, with a number of documents equal to "limit".
|
||||
///
|
||||
|
||||
@@ -204,7 +204,7 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
||||
/// Internally, tantivy only uses this API to detect new commits to implement the
|
||||
/// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the
|
||||
/// `OnCommit` `ReloadPolicy` to work properly.
|
||||
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle;
|
||||
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle>;
|
||||
}
|
||||
|
||||
/// DirectoryClone
|
||||
|
||||
@@ -241,7 +241,7 @@ impl Directory for ManagedDirectory {
|
||||
self.directory.acquire_lock(lock)
|
||||
}
|
||||
|
||||
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle {
|
||||
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
||||
self.directory.watch(watch_callback)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -161,7 +161,7 @@ impl InnerWatcherWrapper {
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct WatcherWrapper {
|
||||
struct WatcherWrapper {
|
||||
inner: Arc<InnerWatcherWrapper>,
|
||||
}
|
||||
|
||||
@@ -231,7 +231,7 @@ struct MmapDirectoryInner {
|
||||
root_path: PathBuf,
|
||||
mmap_cache: RwLock<MmapCache>,
|
||||
_temp_directory: Option<TempDir>,
|
||||
watcher: RwLock<WatcherWrapper>,
|
||||
watcher: RwLock<Option<WatcherWrapper>>,
|
||||
}
|
||||
|
||||
impl MmapDirectoryInner {
|
||||
@@ -239,19 +239,36 @@ impl MmapDirectoryInner {
|
||||
root_path: PathBuf,
|
||||
temp_directory: Option<TempDir>,
|
||||
) -> Result<MmapDirectoryInner, OpenDirectoryError> {
|
||||
let watch_wrapper = WatcherWrapper::new(&root_path)?;
|
||||
let mmap_directory_inner = MmapDirectoryInner {
|
||||
root_path,
|
||||
mmap_cache: Default::default(),
|
||||
_temp_directory: temp_directory,
|
||||
watcher: RwLock::new(watch_wrapper),
|
||||
watcher: RwLock::new(None),
|
||||
};
|
||||
Ok(mmap_directory_inner)
|
||||
}
|
||||
|
||||
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle {
|
||||
let mut wlock = self.watcher.write().unwrap();
|
||||
wlock.watch(watch_callback)
|
||||
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
||||
// a lot of juggling here, to ensure we don't do anything that panics
|
||||
// while the rwlock is held. That way we ensure that the rwlock cannot
|
||||
// be poisoned.
|
||||
//
|
||||
// The downside is that we might create a watch wrapper that is not useful.
|
||||
let need_initialization = self.watcher.read().unwrap().is_none();
|
||||
if need_initialization {
|
||||
let watch_wrapper = WatcherWrapper::new(&self.root_path)?;
|
||||
let mut watch_wlock = self.watcher.write().unwrap();
|
||||
// the watcher could have been initialized when we released the lock, and
|
||||
// we do not want to lose the watched files that were set.
|
||||
if watch_wlock.is_none() {
|
||||
*watch_wlock = Some(watch_wrapper);
|
||||
}
|
||||
}
|
||||
if let Some(watch_wrapper) = self.watcher.write().unwrap().as_mut() {
|
||||
return Ok(watch_wrapper.watch(watch_callback));
|
||||
} else {
|
||||
unreachable!("At this point, watch wrapper is supposed to be initialized");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -514,7 +531,7 @@ impl Directory for MmapDirectory {
|
||||
})))
|
||||
}
|
||||
|
||||
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle {
|
||||
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
||||
self.inner.watch(watch_callback)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,7 +193,7 @@ impl Directory for RAMDirectory {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn watch(&self, watch_callback: WatchCallback) -> WatchHandle {
|
||||
self.fs.write().unwrap().watch(watch_callback)
|
||||
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
|
||||
Ok(self.fs.write().unwrap().watch(watch_callback))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,7 +121,7 @@ fn test_watch(directory: &mut dyn Directory) {
|
||||
thread::sleep(Duration::new(0, 10_000));
|
||||
assert_eq!(0, counter.load(Ordering::SeqCst));
|
||||
|
||||
let watch_handle = directory.watch(watch_callback);
|
||||
let watch_handle = directory.watch(watch_callback).unwrap();
|
||||
for i in 0..10 {
|
||||
assert_eq!(i, counter.load(Ordering::SeqCst));
|
||||
assert!(directory
|
||||
|
||||
@@ -171,16 +171,16 @@ pub use self::snippet::{Snippet, SnippetGenerator};
|
||||
mod docset;
|
||||
pub use self::docset::{DocSet, SkipResult};
|
||||
|
||||
pub use crate::common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
|
||||
pub use crate::core::SegmentComponent;
|
||||
pub use crate::core::{Index, IndexMeta, Searcher, Segment, SegmentId, SegmentMeta};
|
||||
pub use crate::core::{InvertedIndexReader, SegmentReader};
|
||||
pub use crate::directory::Directory;
|
||||
pub use crate::indexer::IndexWriter;
|
||||
pub use crate::postings::Postings;
|
||||
pub use crate::reader::LeasedItem;
|
||||
pub use crate::schema::{Document, Term};
|
||||
|
||||
pub use crate::common::{i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64};
|
||||
|
||||
/// Expose the current version of tantivy, as well
|
||||
/// whether it was compiled with the simd compression.
|
||||
pub fn version() -> &'static str {
|
||||
@@ -849,7 +849,8 @@ mod tests {
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap();
|
||||
{
|
||||
let document = doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
|
||||
let document =
|
||||
doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
|
||||
index_writer.add_document(document);
|
||||
index_writer.commit().unwrap();
|
||||
}
|
||||
|
||||
@@ -18,42 +18,56 @@ use crate::schema::{FieldType, Term};
|
||||
use crate::tokenizer::TokenizerManager;
|
||||
use combine::Parser;
|
||||
use std::borrow::Cow;
|
||||
use std::num::{ParseIntError, ParseFloatError};
|
||||
use std::num::{ParseFloatError, ParseIntError};
|
||||
use std::ops::Bound;
|
||||
use std::str::FromStr;
|
||||
|
||||
/// Possible error that may happen when parsing a query.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Fail)]
|
||||
pub enum QueryParserError {
|
||||
/// Error in the query syntax
|
||||
#[fail(display = "Syntax Error")]
|
||||
SyntaxError,
|
||||
/// `FieldDoesNotExist(field_name: String)`
|
||||
/// The query references a field that is not in the schema
|
||||
#[fail(display = "File does not exists: '{:?}'", _0)]
|
||||
FieldDoesNotExist(String),
|
||||
/// The query contains a term for a `u64` or `i64`-field, but the value
|
||||
/// is neither.
|
||||
#[fail(display = "Expected a valid integer: '{:?}'", _0)]
|
||||
ExpectedInt(ParseIntError),
|
||||
/// The query contains a term for a `f64`-field, but the value
|
||||
/// is not a f64.
|
||||
#[fail(display = "Invalid query: Only excluding terms given")]
|
||||
ExpectedFloat(ParseFloatError),
|
||||
/// It is forbidden queries that are only "excluding". (e.g. -title:pop)
|
||||
#[fail(display = "Invalid query: Only excluding terms given")]
|
||||
AllButQueryForbidden,
|
||||
/// If no default field is declared, running a query without any
|
||||
/// field specified is forbbidden.
|
||||
#[fail(display = "No default field declared and no field specified in query")]
|
||||
NoDefaultFieldDeclared,
|
||||
/// The field searched for is not declared
|
||||
/// as indexed in the schema.
|
||||
#[fail(display = "The field '{:?}' is not declared as indexed", _0)]
|
||||
FieldNotIndexed(String),
|
||||
/// A phrase query was requested for a field that does not
|
||||
/// have any positions indexed.
|
||||
#[fail(display = "The field '{:?}' does not have positions indexed", _0)]
|
||||
FieldDoesNotHavePositionsIndexed(String),
|
||||
/// The tokenizer for the given field is unknown
|
||||
/// The two argument strings are the name of the field, the name of the tokenizer
|
||||
#[fail(
|
||||
display = "The tokenizer '{:?}' for the field '{:?}' is unknown",
|
||||
_0, _1
|
||||
)]
|
||||
UnknownTokenizer(String, String),
|
||||
/// The query contains a range query with a phrase as one of the bounds.
|
||||
/// Only terms can be used as bounds.
|
||||
#[fail(display = "A range query cannot have a phrase as one of the bounds")]
|
||||
RangeMustNotHavePhrase,
|
||||
/// The format for the date field is not RFC 3339 compliant.
|
||||
#[fail(display = "The date field has an invalid format")]
|
||||
DateFormatError(chrono::ParseError),
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
mod pool;
|
||||
|
||||
use self::pool::{LeasedItem, Pool};
|
||||
pub use self::pool::LeasedItem;
|
||||
use self::pool::Pool;
|
||||
use crate::core::Segment;
|
||||
use crate::directory::Directory;
|
||||
use crate::directory::WatchHandle;
|
||||
@@ -85,7 +86,10 @@ impl IndexReaderBuilder {
|
||||
);
|
||||
}
|
||||
};
|
||||
let watch_handle = inner_reader_arc.index.directory().watch(Box::new(callback));
|
||||
let watch_handle = inner_reader_arc
|
||||
.index
|
||||
.directory()
|
||||
.watch(Box::new(callback))?;
|
||||
watch_handle_opt = Some(watch_handle);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -123,6 +123,10 @@ impl<T> Pool<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A LeasedItem holds an object borrowed from a Pool.
|
||||
///
|
||||
/// Upon drop, the object is automatically returned
|
||||
/// into the pool.
|
||||
pub struct LeasedItem<T> {
|
||||
gen_item: Option<GenerationItem<T>>,
|
||||
recycle_queue: Arc<Queue<GenerationItem<T>>>,
|
||||
|
||||
@@ -261,6 +261,24 @@ impl Schema {
|
||||
NamedFieldDocument(field_map)
|
||||
}
|
||||
|
||||
/// Converts a named doc into a document.
|
||||
pub fn from_named_doc(
|
||||
&self,
|
||||
named_doc: NamedFieldDocument,
|
||||
) -> Result<Document, DocParsingError> {
|
||||
let mut doc = Document::default();
|
||||
for (field_name, field_values) in named_doc.0 {
|
||||
if let Some(field) = self.get_field(&field_name) {
|
||||
for field_value in field_values {
|
||||
doc.add(FieldValue::new(field, field_value));
|
||||
}
|
||||
} else {
|
||||
return Err(DocParsingError::NoSuchFieldInSchema(field_name.clone()));
|
||||
}
|
||||
}
|
||||
Ok(doc)
|
||||
}
|
||||
|
||||
/// Encode the schema in JSON.
|
||||
///
|
||||
/// Encoding a document cannot fail.
|
||||
@@ -279,7 +297,6 @@ impl Schema {
|
||||
};
|
||||
DocParsingError::NotJSON(doc_json_sample)
|
||||
})?;
|
||||
|
||||
let mut doc = Document::default();
|
||||
for (field_name, json_value) in json_obj.iter() {
|
||||
match self.get_field(field_name) {
|
||||
@@ -360,13 +377,16 @@ impl<'de> Deserialize<'de> for Schema {
|
||||
|
||||
/// Error that may happen when deserializing
|
||||
/// a document from JSON.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Fail)]
|
||||
pub enum DocParsingError {
|
||||
/// The payload given is not valid JSON.
|
||||
#[fail(display = "The provided string is not valid JSON")]
|
||||
NotJSON(String),
|
||||
/// One of the value node could not be parsed.
|
||||
#[fail(display = "The field '{:?}' could not be parsed: {:?}", _0, _1)]
|
||||
ValueError(String, ValueParsingError),
|
||||
/// The json-document contains a field that is not declared in the schema.
|
||||
#[fail(display = "The json-document contains an unknown field: {:?}", _0)]
|
||||
NoSuchFieldInSchema(String),
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user