mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-29 05:22:55 +00:00
Compare commits
12 Commits
0.20
...
addconvers
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1d72745bf5 | ||
|
|
8199aa7de7 | ||
|
|
657f0cd3bd | ||
|
|
3a82ef2560 | ||
|
|
3546e7fc63 | ||
|
|
862f367f9e | ||
|
|
14137d91c4 | ||
|
|
924fc70cb5 | ||
|
|
07023948aa | ||
|
|
0cb53207ec | ||
|
|
17c783b4db | ||
|
|
7220df8a09 |
@@ -27,6 +27,7 @@ Tantivy 0.20 [Unreleased]
|
||||
- [**breaking**] Drop JSON support on intermediate agg result (we use postcard as format in `quickwit` to send intermediate results) [#1992](https://github.com/quickwit-oss/tantivy/issues/1992) (@PSeitz)
|
||||
- Set memory limit in bytes for aggregations after which they abort (Previously there was only the bucket limit) [#1942](https://github.com/quickwit-oss/tantivy/issues/1942)[#1957](https://github.com/quickwit-oss/tantivy/issues/1957)(@PSeitz)
|
||||
- Add support for u64,i64,f64 fields in term aggregation [#1883](https://github.com/quickwit-oss/tantivy/issues/1883) (@PSeitz)
|
||||
- Allow histogram bounds to be passed as Rfc3339 [#2076](https://github.com/quickwit-oss/tantivy/issues/2076) (@PSeitz)
|
||||
- Add count, min, max, and sum aggregations [#1794](https://github.com/quickwit-oss/tantivy/issues/1794) (@guilload)
|
||||
- Switch to Aggregation without serde_untagged => better deserialization errors. [#2003](https://github.com/quickwit-oss/tantivy/issues/2003) (@PSeitz)
|
||||
- Switch to ms in histogram for date type (ES compatibility) [#2045](https://github.com/quickwit-oss/tantivy/issues/2045) (@PSeitz)
|
||||
@@ -39,10 +40,10 @@ Tantivy 0.20 [Unreleased]
|
||||
- Perf: Fetch blocks of vals in aggregation for all cardinality [#1950](https://github.com/quickwit-oss/tantivy/issues/1950) (@PSeitz)
|
||||
- `Searcher` with disabled scoring via `EnableScoring::Disabled` [#1780](https://github.com/quickwit-oss/tantivy/issues/1780) (@shikhar)
|
||||
- Enable tokenizer on json fields [#2053](https://github.com/quickwit-oss/tantivy/issues/2053) (@PSeitz)
|
||||
- Enforcing "NOT" and "-" queries consistency in UserInputAst [#1609](https://github.com/quickwit-oss/tantivy/issues/1609) (@Denis Bazhenov)
|
||||
- Enforcing "NOT" and "-" queries consistency in UserInputAst [#1609](https://github.com/quickwit-oss/tantivy/issues/1609) (@bazhenov)
|
||||
- Faster indexing
|
||||
- Refactor tokenization pipeline to use GATs [#1924](https://github.com/quickwit-oss/tantivy/issues/1924) (@trinity-1686a)
|
||||
- Faster term hash map [#1940](https://github.com/quickwit-oss/tantivy/issues/1940) (@PSeitz)
|
||||
- Faster term hash map [#2058](https://github.com/quickwit-oss/tantivy/issues/2058)[#1940](https://github.com/quickwit-oss/tantivy/issues/1940) (@PSeitz)
|
||||
- Refactor vint [#2010](https://github.com/quickwit-oss/tantivy/issues/2010) (@PSeitz)
|
||||
- Faster search
|
||||
- Work in batches of docs on the SegmentCollector (Only for cases without score for now) [#1937](https://github.com/quickwit-oss/tantivy/issues/1937) (@PSeitz)
|
||||
@@ -51,7 +52,8 @@ Tantivy 0.20 [Unreleased]
|
||||
- Make BM25 scoring more flexible [#1855](https://github.com/quickwit-oss/tantivy/issues/1855) (@alexcole)
|
||||
- Switch fs2 to fs4 as it is now unmaintained and does not support illumos [#1944](https://github.com/quickwit-oss/tantivy/issues/1944) (@Toasterson)
|
||||
- Made BooleanWeight and BoostWeight public [#1991](https://github.com/quickwit-oss/tantivy/issues/1991) (@fulmicoton)
|
||||
- Make index compatible with virtual drives on Windows [#1843](https://github.com/quickwit-oss/tantivy/issues/1843) (@Yukun Guo)
|
||||
- Make index compatible with virtual drives on Windows [#1843](https://github.com/quickwit-oss/tantivy/issues/1843) (@gyk)
|
||||
- Add stop words for Hungarian language [#2069](https://github.com/quickwit-oss/tantivy/issues/2069) (@tnxbutno)
|
||||
- Auto downgrade index record option, instead of vint error [#1857](https://github.com/quickwit-oss/tantivy/issues/1857) (@PSeitz)
|
||||
- Enable range query on fast field for u64 compatible types [#1762](https://github.com/quickwit-oss/tantivy/issues/1762) (@PSeitz) [#1876]
|
||||
- sstable
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy"
|
||||
version = "0.20.0"
|
||||
version = "0.20.2"
|
||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||
license = "MIT"
|
||||
categories = ["database-implementations", "data-structures"]
|
||||
@@ -12,6 +12,7 @@ readme = "README.md"
|
||||
keywords = ["search", "information", "retrieval"]
|
||||
edition = "2021"
|
||||
rust-version = "1.62"
|
||||
exclude = ["benches/*.json", "benches/*.txt"]
|
||||
|
||||
[dependencies]
|
||||
oneshot = "0.1.5"
|
||||
|
||||
@@ -60,6 +60,8 @@ impl AggregationLimits {
|
||||
/// *bucket_limit*
|
||||
/// Limits the maximum number of buckets returned from an aggregation request.
|
||||
/// bucket_limit will default to `DEFAULT_BUCKET_LIMIT` (65000)
|
||||
///
|
||||
/// Note: The returned instance contains a Arc shared counter to track memory consumption.
|
||||
pub fn new(memory_limit: Option<u64>, bucket_limit: Option<u32>) -> Self {
|
||||
Self {
|
||||
memory_consumption: Default::default(),
|
||||
|
||||
@@ -74,14 +74,14 @@ impl AggregationWithAccessor {
|
||||
ColumnType::I64,
|
||||
ColumnType::U64,
|
||||
ColumnType::F64,
|
||||
ColumnType::Bytes,
|
||||
ColumnType::Str,
|
||||
// ColumnType::Bytes Unsupported
|
||||
// ColumnType::Bool Unsupported
|
||||
// ColumnType::IpAddr Unsupported
|
||||
// ColumnType::DateTime Unsupported
|
||||
];
|
||||
let mut columns =
|
||||
get_all_ff_reader(reader, field_name, Some(&allowed_column_types))?;
|
||||
get_all_ff_reader_or_empty(reader, field_name, Some(&allowed_column_types))?;
|
||||
let first = columns.pop().unwrap();
|
||||
accessor2 = columns.pop();
|
||||
first
|
||||
@@ -177,7 +177,7 @@ fn get_ff_reader(
|
||||
/// Get all fast field reader or empty as default.
|
||||
///
|
||||
/// Is guaranteed to return at least one column.
|
||||
fn get_all_ff_reader(
|
||||
fn get_all_ff_reader_or_empty(
|
||||
reader: &SegmentReader,
|
||||
field_name: &str,
|
||||
allowed_column_types: Option<&[ColumnType]>,
|
||||
|
||||
@@ -428,6 +428,12 @@ impl SegmentTermCollector {
|
||||
field_type: ColumnType,
|
||||
accessor_idx: usize,
|
||||
) -> crate::Result<Self> {
|
||||
if field_type == ColumnType::Bytes || field_type == ColumnType::Bool {
|
||||
return Err(TantivyError::InvalidArgument(format!(
|
||||
"terms aggregation is not supported for column type {:?}",
|
||||
field_type
|
||||
)));
|
||||
}
|
||||
let term_buckets = TermBuckets::default();
|
||||
|
||||
if let Some(custom_order) = req.order.as_ref() {
|
||||
@@ -1500,4 +1506,41 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn terms_aggregation_bytes() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let bytes_field = schema_builder.add_bytes_field("bytes", FAST);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
{
|
||||
let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
|
||||
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||
index_writer.add_document(doc!(
|
||||
bytes_field => vec![1,2,3],
|
||||
))?;
|
||||
index_writer.commit()?;
|
||||
}
|
||||
|
||||
let agg_req: Aggregations = serde_json::from_value(json!({
|
||||
"my_texts": {
|
||||
"terms": {
|
||||
"field": "bytes"
|
||||
},
|
||||
}
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
let res = exec_request_with_query(agg_req, &index, None)?;
|
||||
|
||||
// TODO: Returning an error would be better instead of an empty result, since this is not a
|
||||
// JSON field
|
||||
assert_eq!(
|
||||
res["my_texts"]["buckets"][0]["key"],
|
||||
serde_json::Value::Null
|
||||
);
|
||||
assert_eq!(res["my_texts"]["sum_other_doc_count"], 0);
|
||||
assert_eq!(res["my_texts"]["doc_count_error_upper_bound"], 0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -161,6 +161,21 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
|
||||
/// ]);
|
||||
/// }
|
||||
///
|
||||
/// {
|
||||
/// let mut facet_collector = FacetCollector::for_field("facet");
|
||||
/// facet_collector.add_facet("/");
|
||||
/// let facet_counts = searcher.search(&AllQuery, &facet_collector)?;
|
||||
///
|
||||
/// // This lists all of the facet counts
|
||||
/// let facets: Vec<(&Facet, u64)> = facet_counts
|
||||
/// .get("/")
|
||||
/// .collect();
|
||||
/// assert_eq!(facets, vec![
|
||||
/// (&Facet::from("/category"), 4),
|
||||
/// (&Facet::from("/lang"), 4)
|
||||
/// ]);
|
||||
/// }
|
||||
///
|
||||
/// Ok(())
|
||||
/// }
|
||||
/// # assert!(example().is_ok());
|
||||
@@ -285,6 +300,9 @@ fn is_child_facet(parent_facet: &[u8], possible_child_facet: &[u8]) -> bool {
|
||||
if !possible_child_facet.starts_with(parent_facet) {
|
||||
return false;
|
||||
}
|
||||
if parent_facet.is_empty() {
|
||||
return true;
|
||||
}
|
||||
possible_child_facet.get(parent_facet.len()).copied() == Some(0u8)
|
||||
}
|
||||
|
||||
@@ -789,6 +807,15 @@ mod tests {
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_child_facet() {
|
||||
assert!(super::is_child_facet(&b"foo"[..], &b"foo\0bar"[..]));
|
||||
assert!(super::is_child_facet(&b""[..], &b"foo\0bar"[..]));
|
||||
assert!(super::is_child_facet(&b""[..], &b"foo"[..]));
|
||||
assert!(!super::is_child_facet(&b"foo\0bar"[..], &b"foo"[..]));
|
||||
assert!(!super::is_child_facet(&b"foo"[..], &b"foobar\0baz"[..]));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
|
||||
@@ -212,12 +212,12 @@ pub fn convert_to_fast_value_and_get_term(
|
||||
DateTime::from_utc(dt_utc),
|
||||
));
|
||||
}
|
||||
if let Ok(u64_val) = str::parse::<u64>(phrase) {
|
||||
return Some(set_fastvalue_and_get_term(json_term_writer, u64_val));
|
||||
}
|
||||
if let Ok(i64_val) = str::parse::<i64>(phrase) {
|
||||
return Some(set_fastvalue_and_get_term(json_term_writer, i64_val));
|
||||
}
|
||||
if let Ok(u64_val) = str::parse::<u64>(phrase) {
|
||||
return Some(set_fastvalue_and_get_term(json_term_writer, u64_val));
|
||||
}
|
||||
if let Ok(f64_val) = str::parse::<f64>(phrase) {
|
||||
return Some(set_fastvalue_and_get_term(json_term_writer, f64_val));
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::fs::{self, File, OpenOptions};
|
||||
use std::io::{self, BufWriter, Read, Seek, Write};
|
||||
use std::ops::Deref;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, RwLock, Weak};
|
||||
use std::{fmt, result};
|
||||
|
||||
use common::StableDeref;
|
||||
use fs4::FileExt;
|
||||
@@ -21,6 +21,7 @@ use crate::directory::{
|
||||
AntiCallToken, Directory, DirectoryLock, FileHandle, Lock, OwnedBytes, TerminatingWrite,
|
||||
WatchCallback, WatchHandle, WritePtr,
|
||||
};
|
||||
#[cfg(unix)]
|
||||
use crate::Advice;
|
||||
|
||||
pub type ArcBytes = Arc<dyn Deref<Target = [u8]> + Send + Sync + 'static>;
|
||||
@@ -33,10 +34,7 @@ pub(crate) fn make_io_err(msg: String) -> io::Error {
|
||||
|
||||
/// Returns `None` iff the file exists, can be read, but is empty (and hence
|
||||
/// cannot be mmapped)
|
||||
fn open_mmap(
|
||||
full_path: &Path,
|
||||
madvice_opt: Option<Advice>,
|
||||
) -> result::Result<Option<Mmap>, OpenReadError> {
|
||||
fn open_mmap(full_path: &Path) -> Result<Option<Mmap>, OpenReadError> {
|
||||
let file = File::open(full_path).map_err(|io_err| {
|
||||
if io_err.kind() == io::ErrorKind::NotFound {
|
||||
OpenReadError::FileDoesNotExist(full_path.to_path_buf())
|
||||
@@ -59,9 +57,7 @@ fn open_mmap(
|
||||
.map(Some)
|
||||
.map_err(|io_err| OpenReadError::wrap_io_error(io_err, full_path.to_path_buf()))
|
||||
}?;
|
||||
if let (Some(mmap), Some(madvice)) = (&mmap_opt, madvice_opt) {
|
||||
let _ = mmap.advise(madvice);
|
||||
}
|
||||
|
||||
Ok(mmap_opt)
|
||||
}
|
||||
|
||||
@@ -83,18 +79,25 @@ pub struct CacheInfo {
|
||||
struct MmapCache {
|
||||
counters: CacheCounters,
|
||||
cache: HashMap<PathBuf, WeakArcBytes>,
|
||||
#[cfg(unix)]
|
||||
madvice_opt: Option<Advice>,
|
||||
}
|
||||
|
||||
impl MmapCache {
|
||||
fn new(madvice_opt: Option<Advice>) -> MmapCache {
|
||||
fn new() -> MmapCache {
|
||||
MmapCache {
|
||||
counters: CacheCounters::default(),
|
||||
cache: HashMap::default(),
|
||||
madvice_opt,
|
||||
#[cfg(unix)]
|
||||
madvice_opt: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn set_advice(&mut self, madvice: Advice) {
|
||||
self.madvice_opt = Some(madvice);
|
||||
}
|
||||
|
||||
fn get_info(&self) -> CacheInfo {
|
||||
let paths: Vec<PathBuf> = self.cache.keys().cloned().collect();
|
||||
CacheInfo {
|
||||
@@ -115,6 +118,16 @@ impl MmapCache {
|
||||
}
|
||||
}
|
||||
|
||||
fn open_mmap_impl(&self, full_path: &Path) -> Result<Option<Mmap>, OpenReadError> {
|
||||
let mmap_opt = open_mmap(full_path)?;
|
||||
#[cfg(unix)]
|
||||
if let (Some(mmap), Some(madvice)) = (mmap_opt.as_ref(), self.madvice_opt) {
|
||||
// We ignore madvise errors.
|
||||
let _ = mmap.advise(madvice);
|
||||
}
|
||||
Ok(mmap_opt)
|
||||
}
|
||||
|
||||
// Returns None if the file exists but as a len of 0 (and hence is not mmappable).
|
||||
fn get_mmap(&mut self, full_path: &Path) -> Result<Option<ArcBytes>, OpenReadError> {
|
||||
if let Some(mmap_weak) = self.cache.get(full_path) {
|
||||
@@ -125,7 +138,7 @@ impl MmapCache {
|
||||
}
|
||||
self.cache.remove(full_path);
|
||||
self.counters.miss += 1;
|
||||
let mmap_opt = open_mmap(full_path, self.madvice_opt)?;
|
||||
let mmap_opt = self.open_mmap_impl(full_path)?;
|
||||
Ok(mmap_opt.map(|mmap| {
|
||||
let mmap_arc: ArcBytes = Arc::new(mmap);
|
||||
let mmap_weak = Arc::downgrade(&mmap_arc);
|
||||
@@ -160,13 +173,9 @@ struct MmapDirectoryInner {
|
||||
}
|
||||
|
||||
impl MmapDirectoryInner {
|
||||
fn new(
|
||||
root_path: PathBuf,
|
||||
temp_directory: Option<TempDir>,
|
||||
madvice_opt: Option<Advice>,
|
||||
) -> MmapDirectoryInner {
|
||||
fn new(root_path: PathBuf, temp_directory: Option<TempDir>) -> MmapDirectoryInner {
|
||||
MmapDirectoryInner {
|
||||
mmap_cache: RwLock::new(MmapCache::new(madvice_opt)),
|
||||
mmap_cache: RwLock::new(MmapCache::new()),
|
||||
_temp_directory: temp_directory,
|
||||
watcher: FileWatcher::new(&root_path.join(*META_FILEPATH)),
|
||||
root_path,
|
||||
@@ -185,12 +194,8 @@ impl fmt::Debug for MmapDirectory {
|
||||
}
|
||||
|
||||
impl MmapDirectory {
|
||||
fn new(
|
||||
root_path: PathBuf,
|
||||
temp_directory: Option<TempDir>,
|
||||
madvice_opt: Option<Advice>,
|
||||
) -> MmapDirectory {
|
||||
let inner = MmapDirectoryInner::new(root_path, temp_directory, madvice_opt);
|
||||
fn new(root_path: PathBuf, temp_directory: Option<TempDir>) -> MmapDirectory {
|
||||
let inner = MmapDirectoryInner::new(root_path, temp_directory);
|
||||
MmapDirectory {
|
||||
inner: Arc::new(inner),
|
||||
}
|
||||
@@ -206,29 +211,33 @@ impl MmapDirectory {
|
||||
Ok(MmapDirectory::new(
|
||||
tempdir.path().to_path_buf(),
|
||||
Some(tempdir),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
/// Opens a MmapDirectory in a directory, with a given access pattern.
|
||||
///
|
||||
/// This is only supported on unix platforms.
|
||||
#[cfg(unix)]
|
||||
pub fn open_with_madvice(
|
||||
directory_path: impl AsRef<Path>,
|
||||
madvice: Advice,
|
||||
) -> Result<MmapDirectory, OpenDirectoryError> {
|
||||
let dir = Self::open_impl_to_avoid_monomorphization(directory_path.as_ref())?;
|
||||
dir.inner.mmap_cache.write().unwrap().set_advice(madvice);
|
||||
Ok(dir)
|
||||
}
|
||||
|
||||
/// Opens a MmapDirectory in a directory.
|
||||
///
|
||||
/// Returns an error if the `directory_path` does not
|
||||
/// exist or if it is not a directory.
|
||||
pub fn open<P: AsRef<Path>>(directory_path: P) -> Result<MmapDirectory, OpenDirectoryError> {
|
||||
Self::open_with_access_pattern_impl(directory_path.as_ref(), None)
|
||||
pub fn open(directory_path: impl AsRef<Path>) -> Result<MmapDirectory, OpenDirectoryError> {
|
||||
Self::open_impl_to_avoid_monomorphization(directory_path.as_ref())
|
||||
}
|
||||
|
||||
/// Opens a MmapDirectory in a directory, with a given access pattern.
|
||||
pub fn open_with_madvice<P: AsRef<Path>>(
|
||||
directory_path: P,
|
||||
madvice: Advice,
|
||||
) -> Result<MmapDirectory, OpenDirectoryError> {
|
||||
Self::open_with_access_pattern_impl(directory_path.as_ref(), Some(madvice))
|
||||
}
|
||||
|
||||
fn open_with_access_pattern_impl(
|
||||
#[inline(never)]
|
||||
fn open_impl_to_avoid_monomorphization(
|
||||
directory_path: &Path,
|
||||
madvice_opt: Option<Advice>,
|
||||
) -> Result<MmapDirectory, OpenDirectoryError> {
|
||||
if !directory_path.exists() {
|
||||
return Err(OpenDirectoryError::DoesNotExist(PathBuf::from(
|
||||
@@ -256,7 +265,7 @@ impl MmapDirectory {
|
||||
directory_path,
|
||||
)));
|
||||
}
|
||||
Ok(MmapDirectory::new(canonical_path, None, madvice_opt))
|
||||
Ok(MmapDirectory::new(canonical_path, None))
|
||||
}
|
||||
|
||||
/// Joins a relative_path to the directory `root_path`
|
||||
@@ -365,7 +374,7 @@ pub(crate) fn atomic_write(path: &Path, content: &[u8]) -> io::Result<()> {
|
||||
}
|
||||
|
||||
impl Directory for MmapDirectory {
|
||||
fn get_file_handle(&self, path: &Path) -> result::Result<Arc<dyn FileHandle>, OpenReadError> {
|
||||
fn get_file_handle(&self, path: &Path) -> Result<Arc<dyn FileHandle>, OpenReadError> {
|
||||
debug!("Open Read {:?}", path);
|
||||
let full_path = self.resolve_path(path);
|
||||
|
||||
@@ -388,7 +397,7 @@ impl Directory for MmapDirectory {
|
||||
|
||||
/// Any entry associated with the path in the mmap will be
|
||||
/// removed before the file is deleted.
|
||||
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
|
||||
fn delete(&self, path: &Path) -> Result<(), DeleteError> {
|
||||
let full_path = self.resolve_path(path);
|
||||
fs::remove_file(full_path).map_err(|e| {
|
||||
if e.kind() == io::ErrorKind::NotFound {
|
||||
|
||||
91
src/lib.rs
91
src/lib.rs
@@ -191,6 +191,7 @@ pub use crate::schema::{DateOptions, DateTimePrecision, Document, Term};
|
||||
/// Index format version.
|
||||
const INDEX_FORMAT_VERSION: u32 = 5;
|
||||
|
||||
#[cfg(unix)]
|
||||
pub use memmap2::Advice;
|
||||
|
||||
/// Structure version for the index.
|
||||
@@ -301,6 +302,7 @@ pub struct DocAddress {
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use common::{BinarySerializable, FixedSize};
|
||||
use query_grammar::{UserInputAst, UserInputLeaf, UserInputLiteral};
|
||||
use rand::distributions::{Bernoulli, Uniform};
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
@@ -856,6 +858,95 @@ pub mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_searcher_on_json_field_with_type_inference() {
|
||||
// When indexing and searching a json value, we infer its type.
|
||||
// This tests aims to check the type infereence is consistent between indexing and search.
|
||||
// Inference order is date, i64, u64, f64, bool.
|
||||
let mut schema_builder = Schema::builder();
|
||||
let json_field = schema_builder.add_json_field("json", STORED | TEXT);
|
||||
let schema = schema_builder.build();
|
||||
let json_val: serde_json::Map<String, serde_json::Value> = serde_json::from_str(
|
||||
r#"{
|
||||
"signed": 2,
|
||||
"float": 2.0,
|
||||
"unsigned": 10000000000000,
|
||||
"date": "1985-04-12T23:20:50.52Z",
|
||||
"bool": true
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
let doc = doc!(json_field=>json_val.clone());
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let mut writer = index.writer_for_tests().unwrap();
|
||||
writer.add_document(doc).unwrap();
|
||||
writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let get_doc_ids = |user_input_literal: UserInputLiteral| {
|
||||
let query_parser = crate::query::QueryParser::for_index(&index, Vec::new());
|
||||
let query = query_parser
|
||||
.build_query_from_user_input_ast(UserInputAst::from(UserInputLeaf::Literal(
|
||||
user_input_literal,
|
||||
)))
|
||||
.unwrap();
|
||||
searcher
|
||||
.search(&query, &TEST_COLLECTOR_WITH_SCORE)
|
||||
.map(|topdocs| topdocs.docs().to_vec())
|
||||
.unwrap()
|
||||
};
|
||||
{
|
||||
let user_input_literal = UserInputLiteral {
|
||||
field_name: Some("json.signed".to_string()),
|
||||
phrase: "2".to_string(),
|
||||
delimiter: crate::query_grammar::Delimiter::None,
|
||||
slop: 0,
|
||||
prefix: false,
|
||||
};
|
||||
assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
|
||||
}
|
||||
{
|
||||
let user_input_literal = UserInputLiteral {
|
||||
field_name: Some("json.float".to_string()),
|
||||
phrase: "2.0".to_string(),
|
||||
delimiter: crate::query_grammar::Delimiter::None,
|
||||
slop: 0,
|
||||
prefix: false,
|
||||
};
|
||||
assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
|
||||
}
|
||||
{
|
||||
let user_input_literal = UserInputLiteral {
|
||||
field_name: Some("json.date".to_string()),
|
||||
phrase: "1985-04-12T23:20:50.52Z".to_string(),
|
||||
delimiter: crate::query_grammar::Delimiter::None,
|
||||
slop: 0,
|
||||
prefix: false,
|
||||
};
|
||||
assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
|
||||
}
|
||||
{
|
||||
let user_input_literal = UserInputLiteral {
|
||||
field_name: Some("json.unsigned".to_string()),
|
||||
phrase: "10000000000000".to_string(),
|
||||
delimiter: crate::query_grammar::Delimiter::None,
|
||||
slop: 0,
|
||||
prefix: false,
|
||||
};
|
||||
assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
|
||||
}
|
||||
{
|
||||
let user_input_literal = UserInputLiteral {
|
||||
field_name: Some("json.bool".to_string()),
|
||||
phrase: "true".to_string(),
|
||||
delimiter: crate::query_grammar::Delimiter::None,
|
||||
slop: 0,
|
||||
prefix: false,
|
||||
};
|
||||
assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_doc_macro() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
|
||||
@@ -1203,7 +1203,7 @@ mod test {
|
||||
fn test_json_field_possibly_a_number() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"json.titi:5",
|
||||
r#"(Term(field=14, type=Json, path=titi, type=U64, 5) Term(field=14, type=Json, path=titi, type=Str, "5"))"#,
|
||||
r#"(Term(field=14, type=Json, path=titi, type=I64, 5) Term(field=14, type=Json, path=titi, type=Str, "5"))"#,
|
||||
true,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
@@ -1211,6 +1211,11 @@ mod test {
|
||||
r#"(Term(field=14, type=Json, path=titi, type=I64, -5) Term(field=14, type=Json, path=titi, type=Str, "5"))"#, //< Yes this is a bit weird after going through the tokenizer we lose the "-".
|
||||
true,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"json.titi:10000000000000000000",
|
||||
r#"(Term(field=14, type=Json, path=titi, type=U64, 10000000000000000000) Term(field=14, type=Json, path=titi, type=Str, "10000000000000000000"))"#,
|
||||
true,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"json.titi:-5.2",
|
||||
r#"(Term(field=14, type=Json, path=titi, type=F64, -5.2) "[(0, Term(field=14, type=Json, path=titi, type=Str, "5")), (1, Term(field=14, type=Json, path=titi, type=Str, "2"))]")"#,
|
||||
@@ -1260,7 +1265,7 @@ mod test {
|
||||
fn test_json_default() {
|
||||
test_query_to_logical_ast_with_default_json(
|
||||
"titi:4",
|
||||
"(Term(field=14, type=Json, path=titi, type=U64, 4) Term(field=14, type=Json, \
|
||||
"(Term(field=14, type=Json, path=titi, type=I64, 4) Term(field=14, type=Json, \
|
||||
path=titi, type=Str, \"4\"))",
|
||||
false,
|
||||
);
|
||||
@@ -1282,7 +1287,7 @@ mod test {
|
||||
for conjunction in [false, true] {
|
||||
test_query_to_logical_ast_with_default_json(
|
||||
"json:4",
|
||||
r#"(Term(field=14, type=Json, path=, type=U64, 4) Term(field=14, type=Json, path=, type=Str, "4"))"#,
|
||||
r#"(Term(field=14, type=Json, path=, type=I64, 4) Term(field=14, type=Json, path=, type=Str, "4"))"#,
|
||||
conjunction,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -63,8 +63,15 @@ pub trait Tokenizer: 'static + Clone + Send + Sync {
|
||||
/// Simple wrapper of `Box<dyn TokenStream + 'a>`.
|
||||
pub struct BoxTokenStream<'a>(Box<dyn TokenStream + 'a>);
|
||||
|
||||
impl<'a> From<BoxTokenStream<'a>> for Box<dyn TokenStream + 'a> {
|
||||
fn from(token_stream: BoxTokenStream<'a>) -> Self {
|
||||
token_stream.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> From<T> for BoxTokenStream<'a>
|
||||
where T: TokenStream + 'a
|
||||
where
|
||||
T: TokenStream + 'a,
|
||||
{
|
||||
fn from(token_stream: T) -> BoxTokenStream<'a> {
|
||||
BoxTokenStream(Box::new(token_stream))
|
||||
|
||||
Reference in New Issue
Block a user