mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
Reduce typo count. (#2510)
This commit is contained in:
22
CHANGELOG.md
22
CHANGELOG.md
@@ -29,8 +29,8 @@ Tantivy 0.23 will be backwards compatible with indices created with v0.22 and v0
|
|||||||
- modify fastfield range query heuristic [#2375](https://github.com/quickwit-oss/tantivy/pull/2375)(@trinity-1686a)
|
- modify fastfield range query heuristic [#2375](https://github.com/quickwit-oss/tantivy/pull/2375)(@trinity-1686a)
|
||||||
- add FastFieldRangeQuery for explicit range queries on fast field (for `RangeQuery` it is autodetected) [#2477](https://github.com/quickwit-oss/tantivy/pull/2477)(@PSeitz)
|
- add FastFieldRangeQuery for explicit range queries on fast field (for `RangeQuery` it is autodetected) [#2477](https://github.com/quickwit-oss/tantivy/pull/2477)(@PSeitz)
|
||||||
|
|
||||||
- add format backwards-compatibiliy tests [#2485](https://github.com/quickwit-oss/tantivy/pull/2485)(@PSeitz)
|
- add format backwards-compatibility tests [#2485](https://github.com/quickwit-oss/tantivy/pull/2485)(@PSeitz)
|
||||||
- add columnar format compatibiliy tests [#2433](https://github.com/quickwit-oss/tantivy/pull/2433)(@PSeitz)
|
- add columnar format compatibility tests [#2433](https://github.com/quickwit-oss/tantivy/pull/2433)(@PSeitz)
|
||||||
- Improved snippet ranges algorithm [#2474](https://github.com/quickwit-oss/tantivy/pull/2474)(@gezihuzi)
|
- Improved snippet ranges algorithm [#2474](https://github.com/quickwit-oss/tantivy/pull/2474)(@gezihuzi)
|
||||||
- make find_field_with_default return json fields without path [#2476](https://github.com/quickwit-oss/tantivy/pull/2476)(@trinity-1686a)
|
- make find_field_with_default return json fields without path [#2476](https://github.com/quickwit-oss/tantivy/pull/2476)(@trinity-1686a)
|
||||||
- feat(query): Make `BooleanQuery` support `minimum_number_should_match` [#2405](https://github.com/quickwit-oss/tantivy/pull/2405)(@LebranceBW)
|
- feat(query): Make `BooleanQuery` support `minimum_number_should_match` [#2405](https://github.com/quickwit-oss/tantivy/pull/2405)(@LebranceBW)
|
||||||
@@ -74,7 +74,7 @@ Tantivy 0.22 will be able to read indices created with Tantivy 0.21.
|
|||||||
- Fix bug that can cause `get_docids_for_value_range` to panic. [#2295](https://github.com/quickwit-oss/tantivy/pull/2295)(@fulmicoton)
|
- Fix bug that can cause `get_docids_for_value_range` to panic. [#2295](https://github.com/quickwit-oss/tantivy/pull/2295)(@fulmicoton)
|
||||||
- Avoid 1 document indices by increase min memory to 15MB for indexing [#2176](https://github.com/quickwit-oss/tantivy/pull/2176)(@PSeitz)
|
- Avoid 1 document indices by increase min memory to 15MB for indexing [#2176](https://github.com/quickwit-oss/tantivy/pull/2176)(@PSeitz)
|
||||||
- Fix merge panic for JSON fields [#2284](https://github.com/quickwit-oss/tantivy/pull/2284)(@PSeitz)
|
- Fix merge panic for JSON fields [#2284](https://github.com/quickwit-oss/tantivy/pull/2284)(@PSeitz)
|
||||||
- Fix bug occuring when merging JSON object indexed with positions. [#2253](https://github.com/quickwit-oss/tantivy/pull/2253)(@fulmicoton)
|
- Fix bug occurring when merging JSON object indexed with positions. [#2253](https://github.com/quickwit-oss/tantivy/pull/2253)(@fulmicoton)
|
||||||
- Fix empty DateHistogram gap bug [#2183](https://github.com/quickwit-oss/tantivy/pull/2183)(@PSeitz)
|
- Fix empty DateHistogram gap bug [#2183](https://github.com/quickwit-oss/tantivy/pull/2183)(@PSeitz)
|
||||||
- Fix range query end check (fields with less than 1 value per doc are affected) [#2226](https://github.com/quickwit-oss/tantivy/pull/2226)(@PSeitz)
|
- Fix range query end check (fields with less than 1 value per doc are affected) [#2226](https://github.com/quickwit-oss/tantivy/pull/2226)(@PSeitz)
|
||||||
- Handle exclusive out of bounds ranges on fastfield range queries [#2174](https://github.com/quickwit-oss/tantivy/pull/2174)(@PSeitz)
|
- Handle exclusive out of bounds ranges on fastfield range queries [#2174](https://github.com/quickwit-oss/tantivy/pull/2174)(@PSeitz)
|
||||||
@@ -92,7 +92,7 @@ Tantivy 0.22 will be able to read indices created with Tantivy 0.21.
|
|||||||
- Support to deserialize f64 from string [#2311](https://github.com/quickwit-oss/tantivy/pull/2311)(@PSeitz)
|
- Support to deserialize f64 from string [#2311](https://github.com/quickwit-oss/tantivy/pull/2311)(@PSeitz)
|
||||||
- Add a top_hits aggregator [#2198](https://github.com/quickwit-oss/tantivy/pull/2198)(@ditsuke)
|
- Add a top_hits aggregator [#2198](https://github.com/quickwit-oss/tantivy/pull/2198)(@ditsuke)
|
||||||
- Support bool type in term aggregation [#2318](https://github.com/quickwit-oss/tantivy/pull/2318)(@PSeitz)
|
- Support bool type in term aggregation [#2318](https://github.com/quickwit-oss/tantivy/pull/2318)(@PSeitz)
|
||||||
- Support ip adresses in term aggregation [#2319](https://github.com/quickwit-oss/tantivy/pull/2319)(@PSeitz)
|
- Support ip addresses in term aggregation [#2319](https://github.com/quickwit-oss/tantivy/pull/2319)(@PSeitz)
|
||||||
- Support date type in term aggregation [#2172](https://github.com/quickwit-oss/tantivy/pull/2172)(@PSeitz)
|
- Support date type in term aggregation [#2172](https://github.com/quickwit-oss/tantivy/pull/2172)(@PSeitz)
|
||||||
- Support escaped dot when addressing field [#2250](https://github.com/quickwit-oss/tantivy/pull/2250)(@PSeitz)
|
- Support escaped dot when addressing field [#2250](https://github.com/quickwit-oss/tantivy/pull/2250)(@PSeitz)
|
||||||
|
|
||||||
@@ -182,7 +182,7 @@ Tantivy 0.20
|
|||||||
- Add PhrasePrefixQuery [#1842](https://github.com/quickwit-oss/tantivy/issues/1842) (@trinity-1686a)
|
- Add PhrasePrefixQuery [#1842](https://github.com/quickwit-oss/tantivy/issues/1842) (@trinity-1686a)
|
||||||
- Add `coerce` option for text and numbers types (convert the value instead of returning an error during indexing) [#1904](https://github.com/quickwit-oss/tantivy/issues/1904) (@PSeitz)
|
- Add `coerce` option for text and numbers types (convert the value instead of returning an error during indexing) [#1904](https://github.com/quickwit-oss/tantivy/issues/1904) (@PSeitz)
|
||||||
- Add regex tokenizer [#1759](https://github.com/quickwit-oss/tantivy/issues/1759)(@mkleen)
|
- Add regex tokenizer [#1759](https://github.com/quickwit-oss/tantivy/issues/1759)(@mkleen)
|
||||||
- Move tokenizer API to seperate crate. Having a seperate crate with a stable API will allow us to use tokenizers with different tantivy versions. [#1767](https://github.com/quickwit-oss/tantivy/issues/1767) (@PSeitz)
|
- Move tokenizer API to separate crate. Having a separate crate with a stable API will allow us to use tokenizers with different tantivy versions. [#1767](https://github.com/quickwit-oss/tantivy/issues/1767) (@PSeitz)
|
||||||
- **Columnar crate**: New fast field handling (@fulmicoton @PSeitz) [#1806](https://github.com/quickwit-oss/tantivy/issues/1806)[#1809](https://github.com/quickwit-oss/tantivy/issues/1809)
|
- **Columnar crate**: New fast field handling (@fulmicoton @PSeitz) [#1806](https://github.com/quickwit-oss/tantivy/issues/1806)[#1809](https://github.com/quickwit-oss/tantivy/issues/1809)
|
||||||
- Support for fast fields with optional values. Previously tantivy supported only single-valued and multi-value fast fields. The encoding of optional fast fields is now very compact.
|
- Support for fast fields with optional values. Previously tantivy supported only single-valued and multi-value fast fields. The encoding of optional fast fields is now very compact.
|
||||||
- Fast field Support for JSON (schemaless fast fields). Support multiple types on the same column. [#1876](https://github.com/quickwit-oss/tantivy/issues/1876) (@fulmicoton)
|
- Fast field Support for JSON (schemaless fast fields). Support multiple types on the same column. [#1876](https://github.com/quickwit-oss/tantivy/issues/1876) (@fulmicoton)
|
||||||
@@ -229,13 +229,13 @@ Tantivy 0.20
|
|||||||
- Auto downgrade index record option, instead of vint error [#1857](https://github.com/quickwit-oss/tantivy/issues/1857) (@PSeitz)
|
- Auto downgrade index record option, instead of vint error [#1857](https://github.com/quickwit-oss/tantivy/issues/1857) (@PSeitz)
|
||||||
- Enable range query on fast field for u64 compatible types [#1762](https://github.com/quickwit-oss/tantivy/issues/1762) (@PSeitz) [#1876]
|
- Enable range query on fast field for u64 compatible types [#1762](https://github.com/quickwit-oss/tantivy/issues/1762) (@PSeitz) [#1876]
|
||||||
- sstable
|
- sstable
|
||||||
- Isolating sstable and stacker in independant crates. [#1718](https://github.com/quickwit-oss/tantivy/issues/1718) (@fulmicoton)
|
- Isolating sstable and stacker in independent crates. [#1718](https://github.com/quickwit-oss/tantivy/issues/1718) (@fulmicoton)
|
||||||
- New sstable format [#1943](https://github.com/quickwit-oss/tantivy/issues/1943)[#1953](https://github.com/quickwit-oss/tantivy/issues/1953) (@trinity-1686a)
|
- New sstable format [#1943](https://github.com/quickwit-oss/tantivy/issues/1943)[#1953](https://github.com/quickwit-oss/tantivy/issues/1953) (@trinity-1686a)
|
||||||
- Use DeltaReader directly to implement Dictionnary::ord_to_term [#1928](https://github.com/quickwit-oss/tantivy/issues/1928) (@trinity-1686a)
|
- Use DeltaReader directly to implement Dictionary::ord_to_term [#1928](https://github.com/quickwit-oss/tantivy/issues/1928) (@trinity-1686a)
|
||||||
- Use DeltaReader directly to implement Dictionnary::term_ord [#1925](https://github.com/quickwit-oss/tantivy/issues/1925) (@trinity-1686a)
|
- Use DeltaReader directly to implement Dictionary::term_ord [#1925](https://github.com/quickwit-oss/tantivy/issues/1925) (@trinity-1686a)
|
||||||
- Add seperate tokenizer manager for fast fields [#2019](https://github.com/quickwit-oss/tantivy/issues/2019) (@PSeitz)
|
- Add separate tokenizer manager for fast fields [#2019](https://github.com/quickwit-oss/tantivy/issues/2019) (@PSeitz)
|
||||||
- Make construction of LevenshteinAutomatonBuilder for FuzzyTermQuery instances lazy. [#1756](https://github.com/quickwit-oss/tantivy/issues/1756) (@adamreichold)
|
- Make construction of LevenshteinAutomatonBuilder for FuzzyTermQuery instances lazy. [#1756](https://github.com/quickwit-oss/tantivy/issues/1756) (@adamreichold)
|
||||||
- Added support for madvise when opening an mmaped Index [#2036](https://github.com/quickwit-oss/tantivy/issues/2036) (@fulmicoton)
|
- Added support for madvise when opening an mmapped Index [#2036](https://github.com/quickwit-oss/tantivy/issues/2036) (@fulmicoton)
|
||||||
- Rename `DatePrecision` to `DateTimePrecision` [#2051](https://github.com/quickwit-oss/tantivy/issues/2051) (@guilload)
|
- Rename `DatePrecision` to `DateTimePrecision` [#2051](https://github.com/quickwit-oss/tantivy/issues/2051) (@guilload)
|
||||||
- Query Parser
|
- Query Parser
|
||||||
- Quotation mark can now be used for phrase queries. [#2050](https://github.com/quickwit-oss/tantivy/issues/2050) (@fulmicoton)
|
- Quotation mark can now be used for phrase queries. [#2050](https://github.com/quickwit-oss/tantivy/issues/2050) (@fulmicoton)
|
||||||
@@ -274,7 +274,7 @@ Tantivy 0.19
|
|||||||
- Add support for phrase slop in query language [#1393](https://github.com/quickwit-oss/tantivy/pull/1393) (@saroh)
|
- Add support for phrase slop in query language [#1393](https://github.com/quickwit-oss/tantivy/pull/1393) (@saroh)
|
||||||
- Aggregation
|
- Aggregation
|
||||||
- Add aggregation support for date type [#1693](https://github.com/quickwit-oss/tantivy/pull/1693)(@PSeitz)
|
- Add aggregation support for date type [#1693](https://github.com/quickwit-oss/tantivy/pull/1693)(@PSeitz)
|
||||||
- Add support for keyed parameter in range and histgram aggregations [#1424](https://github.com/quickwit-oss/tantivy/pull/1424) (@k-yomo)
|
- Add support for keyed parameter in range and histogram aggregations [#1424](https://github.com/quickwit-oss/tantivy/pull/1424) (@k-yomo)
|
||||||
- Add aggregation bucket limit [#1363](https://github.com/quickwit-oss/tantivy/pull/1363) (@PSeitz)
|
- Add aggregation bucket limit [#1363](https://github.com/quickwit-oss/tantivy/pull/1363) (@PSeitz)
|
||||||
- Faster indexing
|
- Faster indexing
|
||||||
- [#1610](https://github.com/quickwit-oss/tantivy/pull/1610) (@PSeitz)
|
- [#1610](https://github.com/quickwit-oss/tantivy/pull/1610) (@PSeitz)
|
||||||
|
|||||||
2
TODO.txt
2
TODO.txt
@@ -1,7 +1,7 @@
|
|||||||
Make schema_builder API fluent.
|
Make schema_builder API fluent.
|
||||||
fix doc serialization and prevent compression problems
|
fix doc serialization and prevent compression problems
|
||||||
|
|
||||||
u64 , etc. shoudl return Resutl<Option> now that we support optional missing a column is really not an error
|
u64 , etc. should return Result<Option> now that we support optional missing a column is really not an error
|
||||||
remove fastfield codecs
|
remove fastfield codecs
|
||||||
ditch the first_or_default trick. if it is still useful, improve its implementation.
|
ditch the first_or_default trick. if it is still useful, improve its implementation.
|
||||||
rename FastFieldReaders::open to load
|
rename FastFieldReaders::open to load
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ restriction on 50% of the values (e.g. a 64-bit hash). On the other hand, a lot
|
|||||||
# Columnar format
|
# Columnar format
|
||||||
|
|
||||||
This columnar format may have more than one column (with different types) associated to the same `column_name` (see [Coercion rules](#coercion-rules) above).
|
This columnar format may have more than one column (with different types) associated to the same `column_name` (see [Coercion rules](#coercion-rules) above).
|
||||||
The `(column_name, columne_type)` couple however uniquely identifies a column.
|
The `(column_name, column_type)` couple however uniquely identifies a column.
|
||||||
That couple is serialized as a column `column_key`. The format of that key is:
|
That couple is serialized as a column `column_key`. The format of that key is:
|
||||||
`[column_name][ZERO_BYTE][column_type_header: u8]`
|
`[column_name][ZERO_BYTE][column_type_header: u8]`
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
# Perf and Size
|
# Perf and Size
|
||||||
* remove alloc in `ord_to_term`
|
* remove alloc in `ord_to_term`
|
||||||
+ multivaued range queries restrat frm the beginning all of the time.
|
+ multivaued range queries restart from the beginning all of the time.
|
||||||
* re-add ZSTD compression for dictionaries
|
* re-add ZSTD compression for dictionaries
|
||||||
no systematic monotonic mapping
|
no systematic monotonic mapping
|
||||||
consider removing multilinear
|
consider removing multilinear
|
||||||
@@ -30,7 +30,7 @@ investigate if should have better errors? io::Error is overused at the moment.
|
|||||||
rename rank/select in unit tests
|
rename rank/select in unit tests
|
||||||
Review the public API via cargo doc
|
Review the public API via cargo doc
|
||||||
go through TODOs
|
go through TODOs
|
||||||
remove all doc_id occurences -> row_id
|
remove all doc_id occurrences -> row_id
|
||||||
use the rank & select naming in unit tests branch.
|
use the rank & select naming in unit tests branch.
|
||||||
multi-linear -> blockwise
|
multi-linear -> blockwise
|
||||||
linear codec -> simply a multiplication for the index column
|
linear codec -> simply a multiplication for the index column
|
||||||
@@ -43,5 +43,5 @@ isolate u128_based and uniform naming
|
|||||||
# Other
|
# Other
|
||||||
fix enhance column-cli
|
fix enhance column-cli
|
||||||
|
|
||||||
# Santa claus
|
# Santa Claus
|
||||||
autodetect datetime ipaddr, plug customizable tokenizer.
|
autodetect datetime ipaddr, plug customizable tokenizer.
|
||||||
|
|||||||
@@ -173,7 +173,7 @@ mod tests {
|
|||||||
.into();
|
.into();
|
||||||
let merged_column_index = merge_column_index(&column_indexes[..], &merge_row_order);
|
let merged_column_index = merge_column_index(&column_indexes[..], &merge_row_order);
|
||||||
let SerializableColumnIndex::Multivalued(start_index_iterable) = merged_column_index else {
|
let SerializableColumnIndex::Multivalued(start_index_iterable) = merged_column_index else {
|
||||||
panic!("Excpected a multivalued index")
|
panic!("Expected a multivalued index")
|
||||||
};
|
};
|
||||||
let mut output = Vec::new();
|
let mut output = Vec::new();
|
||||||
serialize_multivalued_index(&start_index_iterable, &mut output).unwrap();
|
serialize_multivalued_index(&start_index_iterable, &mut output).unwrap();
|
||||||
@@ -211,7 +211,7 @@ mod tests {
|
|||||||
|
|
||||||
let merged_column_index = merge_column_index(&column_indexes[..], &merge_row_order);
|
let merged_column_index = merge_column_index(&column_indexes[..], &merge_row_order);
|
||||||
let SerializableColumnIndex::Multivalued(start_index_iterable) = merged_column_index else {
|
let SerializableColumnIndex::Multivalued(start_index_iterable) = merged_column_index else {
|
||||||
panic!("Excpected a multivalued index")
|
panic!("Expected a multivalued index")
|
||||||
};
|
};
|
||||||
let mut output = Vec::new();
|
let mut output = Vec::new();
|
||||||
serialize_multivalued_index(&start_index_iterable, &mut output).unwrap();
|
serialize_multivalued_index(&start_index_iterable, &mut output).unwrap();
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ pub enum ColumnIndex {
|
|||||||
Full,
|
Full,
|
||||||
Optional(OptionalIndex),
|
Optional(OptionalIndex),
|
||||||
/// In addition, at index num_rows, an extra value is added
|
/// In addition, at index num_rows, an extra value is added
|
||||||
/// containing the overal number of values.
|
/// containing the overall number of values.
|
||||||
Multivalued(MultiValueIndex),
|
Multivalued(MultiValueIndex),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -184,7 +184,7 @@ impl CompactSpaceBuilder {
|
|||||||
|
|
||||||
let mut covered_space = Vec::with_capacity(self.blanks.len());
|
let mut covered_space = Vec::with_capacity(self.blanks.len());
|
||||||
|
|
||||||
// begining of the blanks
|
// beginning of the blanks
|
||||||
if let Some(first_blank_start) = self.blanks.first().map(RangeInclusive::start) {
|
if let Some(first_blank_start) = self.blanks.first().map(RangeInclusive::start) {
|
||||||
if *first_blank_start != 0 {
|
if *first_blank_start != 0 {
|
||||||
covered_space.push(0..=first_blank_start - 1);
|
covered_space.push(0..=first_blank_start - 1);
|
||||||
|
|||||||
@@ -122,7 +122,7 @@ impl Line {
|
|||||||
line
|
line
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a line that attemps to approximate a function
|
/// Returns a line that attempts to approximate a function
|
||||||
/// f: i in 0..[ys.num_vals()) -> ys[i].
|
/// f: i in 0..[ys.num_vals()) -> ys[i].
|
||||||
///
|
///
|
||||||
/// - The approximation is always lower than the actual value. Or more rigorously, formally
|
/// - The approximation is always lower than the actual value. Or more rigorously, formally
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ use crate::{
|
|||||||
/// After merge, all columns belonging to the same category are coerced to
|
/// After merge, all columns belonging to the same category are coerced to
|
||||||
/// the same column type.
|
/// the same column type.
|
||||||
///
|
///
|
||||||
/// In practise, today, only Numerical colummns are coerced into one type today.
|
/// In practise, today, only Numerical columns are coerced into one type today.
|
||||||
///
|
///
|
||||||
/// See also [README.md].
|
/// See also [README.md].
|
||||||
///
|
///
|
||||||
@@ -63,8 +63,8 @@ impl From<ColumnType> for ColumnTypeCategory {
|
|||||||
/// `require_columns` makes it possible to ensure that some columns will be present in the
|
/// `require_columns` makes it possible to ensure that some columns will be present in the
|
||||||
/// resulting columnar. When a required column is a numerical column type, one of two things can
|
/// resulting columnar. When a required column is a numerical column type, one of two things can
|
||||||
/// happen:
|
/// happen:
|
||||||
/// - If the required column type is compatible with all of the input columnar, the resulsting
|
/// - If the required column type is compatible with all of the input columnar, the resulting merged
|
||||||
/// merged columnar will simply coerce the input column and use the required column type.
|
/// columnar will simply coerce the input column and use the required column type.
|
||||||
/// - If the required column type is incompatible with one of the input columnar, the merged will
|
/// - If the required column type is incompatible with one of the input columnar, the merged will
|
||||||
/// fail with an InvalidData error.
|
/// fail with an InvalidData error.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -87,7 +87,7 @@ impl<V: SymbolValue> ColumnOperation<V> {
|
|||||||
minibuf
|
minibuf
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Deserialize a colummn operation.
|
/// Deserialize a column operation.
|
||||||
/// Returns None if the buffer is empty.
|
/// Returns None if the buffer is empty.
|
||||||
///
|
///
|
||||||
/// Panics if the payload is invalid:
|
/// Panics if the payload is invalid:
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ fn main() -> tantivy::Result<()> {
|
|||||||
let mut index_writer: IndexWriter = index.writer_with_num_threads(1, 50_000_000)?;
|
let mut index_writer: IndexWriter = index.writer_with_num_threads(1, 50_000_000)?;
|
||||||
index_writer.add_document(doc!(title => "The Old Man and the Sea"))?;
|
index_writer.add_document(doc!(title => "The Old Man and the Sea"))?;
|
||||||
index_writer.add_document(doc!(title => "Of Mice and Men"))?;
|
index_writer.add_document(doc!(title => "Of Mice and Men"))?;
|
||||||
index_writer.add_document(doc!(title => "The modern Promotheus"))?;
|
index_writer.add_document(doc!(title => "The modern Prometheus"))?;
|
||||||
index_writer.commit()?;
|
index_writer.commit()?;
|
||||||
|
|
||||||
let reader = index.reader()?;
|
let reader = index.reader()?;
|
||||||
|
|||||||
@@ -833,7 +833,7 @@ fn aggregate_infallible_expressions(
|
|||||||
if early_operand {
|
if early_operand {
|
||||||
err.push(LenientErrorInternal {
|
err.push(LenientErrorInternal {
|
||||||
pos: 0,
|
pos: 0,
|
||||||
message: "Found unexpeted boolean operator before term".to_string(),
|
message: "Found unexpected boolean operator before term".to_string(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -856,7 +856,7 @@ fn aggregate_infallible_expressions(
|
|||||||
_ => Some(Occur::Should),
|
_ => Some(Occur::Should),
|
||||||
};
|
};
|
||||||
if occur == &Some(Occur::MustNot) && default_op == Some(Occur::Should) {
|
if occur == &Some(Occur::MustNot) && default_op == Some(Occur::Should) {
|
||||||
// if occur is MustNot *and* operation is OR, we synthetize a ShouldNot
|
// if occur is MustNot *and* operation is OR, we synthesize a ShouldNot
|
||||||
clauses.push(vec![(
|
clauses.push(vec![(
|
||||||
Some(Occur::Should),
|
Some(Occur::Should),
|
||||||
ast.clone().unary(Occur::MustNot),
|
ast.clone().unary(Occur::MustNot),
|
||||||
@@ -872,7 +872,7 @@ fn aggregate_infallible_expressions(
|
|||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
if occur == &Some(Occur::MustNot) && default_op == Some(Occur::Should) {
|
if occur == &Some(Occur::MustNot) && default_op == Some(Occur::Should) {
|
||||||
// if occur is MustNot *and* operation is OR, we synthetize a ShouldNot
|
// if occur is MustNot *and* operation is OR, we synthesize a ShouldNot
|
||||||
clauses.push(vec![(
|
clauses.push(vec![(
|
||||||
Some(Occur::Should),
|
Some(Occur::Should),
|
||||||
ast.clone().unary(Occur::MustNot),
|
ast.clone().unary(Occur::MustNot),
|
||||||
@@ -897,7 +897,7 @@ fn aggregate_infallible_expressions(
|
|||||||
}
|
}
|
||||||
Some(BinaryOperand::Or) => {
|
Some(BinaryOperand::Or) => {
|
||||||
if last_occur == Some(Occur::MustNot) {
|
if last_occur == Some(Occur::MustNot) {
|
||||||
// if occur is MustNot *and* operation is OR, we synthetize a ShouldNot
|
// if occur is MustNot *and* operation is OR, we synthesize a ShouldNot
|
||||||
clauses.push(vec![(Some(Occur::Should), last_ast.unary(Occur::MustNot))]);
|
clauses.push(vec![(Some(Occur::Should), last_ast.unary(Occur::MustNot))]);
|
||||||
} else {
|
} else {
|
||||||
clauses.push(vec![(last_occur.or(Some(Occur::Should)), last_ast)]);
|
clauses.push(vec![(last_occur.or(Some(Occur::Should)), last_ast)]);
|
||||||
@@ -1057,7 +1057,7 @@ mod test {
|
|||||||
valid_parse("1", 1.0, "");
|
valid_parse("1", 1.0, "");
|
||||||
valid_parse("0.234234 aaa", 0.234234f64, " aaa");
|
valid_parse("0.234234 aaa", 0.234234f64, " aaa");
|
||||||
error_parse(".3332");
|
error_parse(".3332");
|
||||||
// TODO trinity-1686a: I disagree that it should fail, I think it should succeeed,
|
// TODO trinity-1686a: I disagree that it should fail, I think it should succeed,
|
||||||
// consuming only "1", and leave "." for the next thing (which will likely fail then)
|
// consuming only "1", and leave "." for the next thing (which will likely fail then)
|
||||||
// error_parse("1.");
|
// error_parse("1.");
|
||||||
error_parse("-1.");
|
error_parse("-1.");
|
||||||
@@ -1467,7 +1467,7 @@ mod test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_query_to_triming_spaces() {
|
fn test_parse_query_to_trimming_spaces() {
|
||||||
test_parse_query_to_ast_helper(" abc", "abc");
|
test_parse_query_to_ast_helper(" abc", "abc");
|
||||||
test_parse_query_to_ast_helper("abc ", "abc");
|
test_parse_query_to_ast_helper("abc ", "abc");
|
||||||
test_parse_query_to_ast_helper("( a OR abc)", "(?a ?abc)");
|
test_parse_query_to_ast_helper("( a OR abc)", "(?a ?abc)");
|
||||||
|
|||||||
@@ -267,7 +267,7 @@ impl fmt::Debug for UserInputAst {
|
|||||||
match *self {
|
match *self {
|
||||||
UserInputAst::Clause(ref subqueries) => {
|
UserInputAst::Clause(ref subqueries) => {
|
||||||
if subqueries.is_empty() {
|
if subqueries.is_empty() {
|
||||||
// TODO this will break ast reserialization, is writing "( )" enought?
|
// TODO this will break ast reserialization, is writing "( )" enough?
|
||||||
write!(formatter, "<emptyclause>")?;
|
write!(formatter, "<emptyclause>")?;
|
||||||
} else {
|
} else {
|
||||||
write!(formatter, "(")?;
|
write!(formatter, "(")?;
|
||||||
|
|||||||
@@ -870,7 +870,7 @@ fn test_aggregation_on_json_object_mixed_types() {
|
|||||||
.add_document(doc!(json => json!({"mixed_type": "blue", "mixed_price": 5.0})))
|
.add_document(doc!(json => json!({"mixed_type": "blue", "mixed_price": 5.0})))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
index_writer.commit().unwrap();
|
index_writer.commit().unwrap();
|
||||||
// => Segment with all boolen
|
// => Segment with all boolean
|
||||||
index_writer
|
index_writer
|
||||||
.add_document(doc!(json => json!({"mixed_type": true, "mixed_price": "no_price"})))
|
.add_document(doc!(json => json!({"mixed_type": true, "mixed_price": "no_price"})))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ use crate::aggregation::{format_date, Key};
|
|||||||
use crate::error::DataCorruption;
|
use crate::error::DataCorruption;
|
||||||
use crate::TantivyError;
|
use crate::TantivyError;
|
||||||
|
|
||||||
/// Creates a bucket for every unique term and counts the number of occurences.
|
/// Creates a bucket for every unique term and counts the number of occurrences.
|
||||||
/// Note that doc_count in the response buckets equals term count here.
|
/// Note that doc_count in the response buckets equals term count here.
|
||||||
///
|
///
|
||||||
/// If the text is untokenized and single value, that means one term per document and therefore it
|
/// If the text is untokenized and single value, that means one term per document and therefore it
|
||||||
@@ -158,7 +158,7 @@ pub struct TermsAggregation {
|
|||||||
/// when loading the text.
|
/// when loading the text.
|
||||||
/// Special Case 1:
|
/// Special Case 1:
|
||||||
/// If we have multiple columns on one field, we need to have a union on the indices on both
|
/// If we have multiple columns on one field, we need to have a union on the indices on both
|
||||||
/// columns, to find docids without a value. That requires a special missing aggreggation.
|
/// columns, to find docids without a value. That requires a special missing aggregation.
|
||||||
/// Special Case 2: if the key is of type text and the column is numerical, we also need to use
|
/// Special Case 2: if the key is of type text and the column is numerical, we also need to use
|
||||||
/// the special missing aggregation, since there is no mechanism in the numerical column to
|
/// the special missing aggregation, since there is no mechanism in the numerical column to
|
||||||
/// add text.
|
/// add text.
|
||||||
@@ -364,7 +364,7 @@ impl SegmentTermCollector {
|
|||||||
let term_buckets = TermBuckets::default();
|
let term_buckets = TermBuckets::default();
|
||||||
|
|
||||||
if let Some(custom_order) = req.order.as_ref() {
|
if let Some(custom_order) = req.order.as_ref() {
|
||||||
// Validate sub aggregtion exists
|
// Validate sub aggregation exists
|
||||||
if let OrderTarget::SubAggregation(sub_agg_name) = &custom_order.target {
|
if let OrderTarget::SubAggregation(sub_agg_name) = &custom_order.target {
|
||||||
let (agg_name, _agg_property) = get_agg_name_and_property(sub_agg_name);
|
let (agg_name, _agg_property) = get_agg_name_and_property(sub_agg_name);
|
||||||
|
|
||||||
@@ -1685,7 +1685,7 @@ mod tests {
|
|||||||
res["my_texts"]["buckets"][2]["key"],
|
res["my_texts"]["buckets"][2]["key"],
|
||||||
serde_json::Value::Null
|
serde_json::Value::Null
|
||||||
);
|
);
|
||||||
// text field with numner as missing fallback
|
// text field with number as missing fallback
|
||||||
assert_eq!(res["my_texts2"]["buckets"][0]["key"], "Hello Hello");
|
assert_eq!(res["my_texts2"]["buckets"][0]["key"], "Hello Hello");
|
||||||
assert_eq!(res["my_texts2"]["buckets"][0]["doc_count"], 5);
|
assert_eq!(res["my_texts2"]["buckets"][0]["doc_count"], 5);
|
||||||
assert_eq!(res["my_texts2"]["buckets"][1]["key"], 1337.0);
|
assert_eq!(res["my_texts2"]["buckets"][1]["key"], 1337.0);
|
||||||
@@ -1859,7 +1859,7 @@ mod tests {
|
|||||||
res["my_texts"]["buckets"][2]["key"],
|
res["my_texts"]["buckets"][2]["key"],
|
||||||
serde_json::Value::Null
|
serde_json::Value::Null
|
||||||
);
|
);
|
||||||
// text field with numner as missing fallback
|
// text field with number as missing fallback
|
||||||
assert_eq!(res["my_texts2"]["buckets"][0]["key"], "Hello Hello");
|
assert_eq!(res["my_texts2"]["buckets"][0]["key"], "Hello Hello");
|
||||||
assert_eq!(res["my_texts2"]["buckets"][0]["doc_count"], 4);
|
assert_eq!(res["my_texts2"]["buckets"][0]["doc_count"], 4);
|
||||||
assert_eq!(res["my_texts2"]["buckets"][1]["key"], 1337.0);
|
assert_eq!(res["my_texts2"]["buckets"][1]["key"], 1337.0);
|
||||||
|
|||||||
@@ -860,7 +860,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_intermediat_tree_with_ranges(
|
fn get_intermediate_tree_with_ranges(
|
||||||
data: &[(String, u64, String, u64)],
|
data: &[(String, u64, String, u64)],
|
||||||
) -> IntermediateAggregationResults {
|
) -> IntermediateAggregationResults {
|
||||||
let mut map = HashMap::new();
|
let mut map = HashMap::new();
|
||||||
@@ -896,18 +896,18 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_merge_fruits_tree_1() {
|
fn test_merge_fruits_tree_1() {
|
||||||
let mut tree_left = get_intermediat_tree_with_ranges(&[
|
let mut tree_left = get_intermediate_tree_with_ranges(&[
|
||||||
("red".to_string(), 50, "1900".to_string(), 25),
|
("red".to_string(), 50, "1900".to_string(), 25),
|
||||||
("blue".to_string(), 30, "1900".to_string(), 30),
|
("blue".to_string(), 30, "1900".to_string(), 30),
|
||||||
]);
|
]);
|
||||||
let tree_right = get_intermediat_tree_with_ranges(&[
|
let tree_right = get_intermediate_tree_with_ranges(&[
|
||||||
("red".to_string(), 60, "1900".to_string(), 30),
|
("red".to_string(), 60, "1900".to_string(), 30),
|
||||||
("blue".to_string(), 25, "1900".to_string(), 50),
|
("blue".to_string(), 25, "1900".to_string(), 50),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
tree_left.merge_fruits(tree_right).unwrap();
|
tree_left.merge_fruits(tree_right).unwrap();
|
||||||
|
|
||||||
let tree_expected = get_intermediat_tree_with_ranges(&[
|
let tree_expected = get_intermediate_tree_with_ranges(&[
|
||||||
("red".to_string(), 110, "1900".to_string(), 55),
|
("red".to_string(), 110, "1900".to_string(), 55),
|
||||||
("blue".to_string(), 55, "1900".to_string(), 80),
|
("blue".to_string(), 55, "1900".to_string(), 80),
|
||||||
]);
|
]);
|
||||||
@@ -917,18 +917,18 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_merge_fruits_tree_2() {
|
fn test_merge_fruits_tree_2() {
|
||||||
let mut tree_left = get_intermediat_tree_with_ranges(&[
|
let mut tree_left = get_intermediate_tree_with_ranges(&[
|
||||||
("red".to_string(), 50, "1900".to_string(), 25),
|
("red".to_string(), 50, "1900".to_string(), 25),
|
||||||
("blue".to_string(), 30, "1900".to_string(), 30),
|
("blue".to_string(), 30, "1900".to_string(), 30),
|
||||||
]);
|
]);
|
||||||
let tree_right = get_intermediat_tree_with_ranges(&[
|
let tree_right = get_intermediate_tree_with_ranges(&[
|
||||||
("red".to_string(), 60, "1900".to_string(), 30),
|
("red".to_string(), 60, "1900".to_string(), 30),
|
||||||
("green".to_string(), 25, "1900".to_string(), 50),
|
("green".to_string(), 25, "1900".to_string(), 50),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
tree_left.merge_fruits(tree_right).unwrap();
|
tree_left.merge_fruits(tree_right).unwrap();
|
||||||
|
|
||||||
let tree_expected = get_intermediat_tree_with_ranges(&[
|
let tree_expected = get_intermediate_tree_with_ranges(&[
|
||||||
("red".to_string(), 110, "1900".to_string(), 55),
|
("red".to_string(), 110, "1900".to_string(), 55),
|
||||||
("blue".to_string(), 30, "1900".to_string(), 30),
|
("blue".to_string(), 30, "1900".to_string(), 30),
|
||||||
("green".to_string(), 25, "1900".to_string(), 50),
|
("green".to_string(), 25, "1900".to_string(), 50),
|
||||||
@@ -939,7 +939,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_merge_fruits_tree_empty() {
|
fn test_merge_fruits_tree_empty() {
|
||||||
let mut tree_left = get_intermediat_tree_with_ranges(&[
|
let mut tree_left = get_intermediate_tree_with_ranges(&[
|
||||||
("red".to_string(), 50, "1900".to_string(), 25),
|
("red".to_string(), 50, "1900".to_string(), 25),
|
||||||
("blue".to_string(), 30, "1900".to_string(), 30),
|
("blue".to_string(), 30, "1900".to_string(), 30),
|
||||||
]);
|
]);
|
||||||
|
|||||||
@@ -163,8 +163,8 @@ impl PartialEq for PercentilesCollector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn format_percentil(percentil: f64) -> String {
|
fn format_percentile(percentile: f64) -> String {
|
||||||
let mut out = percentil.to_string();
|
let mut out = percentile.to_string();
|
||||||
// Slightly silly way to format trailing decimals
|
// Slightly silly way to format trailing decimals
|
||||||
if !out.contains('.') {
|
if !out.contains('.') {
|
||||||
out.push_str(".0");
|
out.push_str(".0");
|
||||||
@@ -197,7 +197,7 @@ impl PercentilesCollector {
|
|||||||
let values = if req.keyed {
|
let values = if req.keyed {
|
||||||
PercentileValues::HashMap(
|
PercentileValues::HashMap(
|
||||||
iter_quantile_and_values
|
iter_quantile_and_values
|
||||||
.map(|(val, quantil)| (format_percentil(val), quantil))
|
.map(|(val, quantil)| (format_percentile(val), quantil))
|
||||||
.collect(),
|
.collect(),
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -139,7 +139,7 @@ impl<'de> Deserialize<'de> for KeyOrder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tranform a glob (`pattern*`, for example) into a regex::Regex (`^pattern.*$`)
|
// Transform a glob (`pattern*`, for example) into a regex::Regex (`^pattern.*$`)
|
||||||
fn globbed_string_to_regex(glob: &str) -> Result<Regex, crate::TantivyError> {
|
fn globbed_string_to_regex(glob: &str) -> Result<Regex, crate::TantivyError> {
|
||||||
// Replace `*` glob with `.*` regex
|
// Replace `*` glob with `.*` regex
|
||||||
let sanitized = format!("^{}$", regex::escape(glob).replace(r"\*", ".*"));
|
let sanitized = format!("^{}$", regex::escape(glob).replace(r"\*", ".*"));
|
||||||
|
|||||||
@@ -942,10 +942,10 @@ mod tests {
|
|||||||
|
|
||||||
let numbers = [100, 200, 300];
|
let numbers = [100, 200, 300];
|
||||||
let test_range = |range: RangeInclusive<u64>| {
|
let test_range = |range: RangeInclusive<u64>| {
|
||||||
let expexted_count = numbers.iter().filter(|num| range.contains(num)).count();
|
let expected_count = numbers.iter().filter(|num| range.contains(num)).count();
|
||||||
let mut vec = vec![];
|
let mut vec = vec![];
|
||||||
field.get_row_ids_for_value_range(range, 0..u32::MAX, &mut vec);
|
field.get_row_ids_for_value_range(range, 0..u32::MAX, &mut vec);
|
||||||
assert_eq!(vec.len(), expexted_count);
|
assert_eq!(vec.len(), expected_count);
|
||||||
};
|
};
|
||||||
test_range(50..=50);
|
test_range(50..=50);
|
||||||
test_range(150..=150);
|
test_range(150..=150);
|
||||||
@@ -1020,10 +1020,10 @@ mod tests {
|
|||||||
|
|
||||||
let numbers = [1000, 1001, 1003];
|
let numbers = [1000, 1001, 1003];
|
||||||
let test_range = |range: RangeInclusive<u64>| {
|
let test_range = |range: RangeInclusive<u64>| {
|
||||||
let expexted_count = numbers.iter().filter(|num| range.contains(num)).count();
|
let expected_count = numbers.iter().filter(|num| range.contains(num)).count();
|
||||||
let mut vec = vec![];
|
let mut vec = vec![];
|
||||||
field.get_row_ids_for_value_range(range, 0..u32::MAX, &mut vec);
|
field.get_row_ids_for_value_range(range, 0..u32::MAX, &mut vec);
|
||||||
assert_eq!(vec.len(), expexted_count);
|
assert_eq!(vec.len(), expected_count);
|
||||||
};
|
};
|
||||||
let test_range_variant = |start, stop| {
|
let test_range_variant = |start, stop| {
|
||||||
let start_range = start..=stop;
|
let start_range = start..=stop;
|
||||||
|
|||||||
@@ -70,13 +70,13 @@ impl FastFieldReaders {
|
|||||||
///
|
///
|
||||||
/// This function transforms `attributes.color` into a column key to be used in the `columnar`.
|
/// This function transforms `attributes.color` into a column key to be used in the `columnar`.
|
||||||
///
|
///
|
||||||
/// The logic works as follows, first we identify which field is targetted by calling
|
/// The logic works as follows, first we identify which field is targeted by calling
|
||||||
/// `schema.find_field(..)`. This method will attempt to split the user splied fast field
|
/// `schema.find_field(..)`. This method will attempt to split the user splied fast field
|
||||||
/// name by non-escaped dots, and find the longest matching schema field name.
|
/// name by non-escaped dots, and find the longest matching schema field name.
|
||||||
/// In our case, it would return the (attribute_field, "color").
|
/// In our case, it would return the (attribute_field, "color").
|
||||||
///
|
///
|
||||||
/// If no field is found, but a dynamic field is supplied, then we
|
/// If no field is found, but a dynamic field is supplied, then we
|
||||||
/// will simply assuem the user is targetting the dynamic field. (This feature is used in
|
/// will simply assume the user is targeting the dynamic field. (This feature is used in
|
||||||
/// Quickwit.)
|
/// Quickwit.)
|
||||||
///
|
///
|
||||||
/// We then encode the `(field, path)` into the right `columnar_key`.
|
/// We then encode the `(field, path)` into the right `columnar_key`.
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ impl InvertedIndexReader {
|
|||||||
&self.termdict
|
&self.termdict
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the fields and types encoded in the dictionary in lexicographic oder.
|
/// Return the fields and types encoded in the dictionary in lexicographic order.
|
||||||
/// Only valid on JSON fields.
|
/// Only valid on JSON fields.
|
||||||
///
|
///
|
||||||
/// Notice: This requires a full scan and therefore **very expensive**.
|
/// Notice: This requires a full scan and therefore **very expensive**.
|
||||||
|
|||||||
@@ -358,7 +358,7 @@ impl SegmentReader {
|
|||||||
.map(|(mut field_name, handle)| {
|
.map(|(mut field_name, handle)| {
|
||||||
json_path_sep_to_dot(&mut field_name);
|
json_path_sep_to_dot(&mut field_name);
|
||||||
// map to canonical path, to avoid similar but different entries.
|
// map to canonical path, to avoid similar but different entries.
|
||||||
// Eventually we should just accept '.' seperated for all cases.
|
// Eventually we should just accept '.' separated for all cases.
|
||||||
let field_name = map_to_canonical
|
let field_name = map_to_canonical
|
||||||
.get(&field_name)
|
.get(&field_name)
|
||||||
.unwrap_or(&field_name)
|
.unwrap_or(&field_name)
|
||||||
|
|||||||
@@ -482,7 +482,7 @@ impl<D: Document> IndexWriter<D> {
|
|||||||
/// let index = Index::create_in_ram(schema.clone());
|
/// let index = Index::create_in_ram(schema.clone());
|
||||||
///
|
///
|
||||||
/// let mut index_writer = index.writer_with_num_threads(1, 50_000_000)?;
|
/// let mut index_writer = index.writer_with_num_threads(1, 50_000_000)?;
|
||||||
/// index_writer.add_document(doc!(title => "The modern Promotheus"))?;
|
/// index_writer.add_document(doc!(title => "The modern Prometheus"))?;
|
||||||
/// index_writer.commit()?;
|
/// index_writer.commit()?;
|
||||||
///
|
///
|
||||||
/// let clear_res = index_writer.delete_all_documents().unwrap();
|
/// let clear_res = index_writer.delete_all_documents().unwrap();
|
||||||
@@ -491,7 +491,7 @@ impl<D: Document> IndexWriter<D> {
|
|||||||
///
|
///
|
||||||
/// let searcher = index.reader()?.searcher();
|
/// let searcher = index.reader()?.searcher();
|
||||||
/// let query_parser = QueryParser::for_index(&index, vec![title]);
|
/// let query_parser = QueryParser::for_index(&index, vec![title]);
|
||||||
/// let query_promo = query_parser.parse_query("Promotheus")?;
|
/// let query_promo = query_parser.parse_query("Prometheus")?;
|
||||||
/// let top_docs_promo = searcher.search(&query_promo, &TopDocs::with_limit(1))?;
|
/// let top_docs_promo = searcher.search(&query_promo, &TopDocs::with_limit(1))?;
|
||||||
///
|
///
|
||||||
/// assert!(top_docs_promo.is_empty());
|
/// assert!(top_docs_promo.is_empty());
|
||||||
@@ -2093,7 +2093,7 @@ mod tests {
|
|||||||
//
|
//
|
||||||
// Take half as sample
|
// Take half as sample
|
||||||
let mut sample: Vec<_> = expected_ids_and_num_occurrences.iter().collect();
|
let mut sample: Vec<_> = expected_ids_and_num_occurrences.iter().collect();
|
||||||
sample.sort_by_key(|(k, _num_occurences)| *k);
|
sample.sort_by_key(|(k, _num_occurrences)| *k);
|
||||||
// sample.truncate(sample.len() / 2);
|
// sample.truncate(sample.len() / 2);
|
||||||
if !sample.is_empty() {
|
if !sample.is_empty() {
|
||||||
let (left_sample, right_sample) = sample.split_at(sample.len() / 2);
|
let (left_sample, right_sample) = sample.split_at(sample.len() / 2);
|
||||||
@@ -2102,7 +2102,7 @@ mod tests {
|
|||||||
sample
|
sample
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|(id, _)| id_is_full_doc(**id))
|
.filter(|(id, _)| id_is_full_doc(**id))
|
||||||
.map(|(_id, num_occurences)| **num_occurences)
|
.map(|(_id, num_occurrences)| **num_occurrences)
|
||||||
.sum::<u64>()
|
.sum::<u64>()
|
||||||
};
|
};
|
||||||
fn gen_query_inclusive<T1: ToString, T2: ToString>(
|
fn gen_query_inclusive<T1: ToString, T2: ToString>(
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ impl PathToUnorderedId {
|
|||||||
next_id
|
next_id
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retuns ids which reflect the lexical order of the paths.
|
/// Returns ids which reflect the lexical order of the paths.
|
||||||
///
|
///
|
||||||
/// The returned vec can be indexed with the unordered id to get the ordered id.
|
/// The returned vec can be indexed with the unordered id to get the ordered id.
|
||||||
pub(crate) fn unordered_id_to_ordered_id(&self) -> Vec<OrderedPathId> {
|
pub(crate) fn unordered_id_to_ordered_id(&self) -> Vec<OrderedPathId> {
|
||||||
@@ -57,7 +57,7 @@ impl PathToUnorderedId {
|
|||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retuns the paths so they can be queried by the ordered id (which is the index).
|
/// Returns the paths so they can be queried by the ordered id (which is the index).
|
||||||
pub(crate) fn ordered_id_to_path(&self) -> Vec<&str> {
|
pub(crate) fn ordered_id_to_path(&self) -> Vec<&str> {
|
||||||
let mut paths = self.map.keys().map(String::as_str).collect::<Vec<_>>();
|
let mut paths = self.map.keys().map(String::as_str).collect::<Vec<_>>();
|
||||||
paths.sort_unstable();
|
paths.sort_unstable();
|
||||||
|
|||||||
@@ -377,7 +377,7 @@ impl SegmentUpdater {
|
|||||||
if self.is_alive() {
|
if self.is_alive() {
|
||||||
let index = &self.index;
|
let index = &self.index;
|
||||||
let directory = index.directory();
|
let directory = index.directory();
|
||||||
let mut commited_segment_metas = self.segment_manager.committed_segment_metas();
|
let mut committed_segment_metas = self.segment_manager.committed_segment_metas();
|
||||||
|
|
||||||
// We sort segment_readers by number of documents.
|
// We sort segment_readers by number of documents.
|
||||||
// This is an heuristic to make multithreading more efficient.
|
// This is an heuristic to make multithreading more efficient.
|
||||||
@@ -392,10 +392,10 @@ impl SegmentUpdater {
|
|||||||
// from the different drives.
|
// from the different drives.
|
||||||
//
|
//
|
||||||
// Segment 1 from disk 1, Segment 1 from disk 2, etc.
|
// Segment 1 from disk 1, Segment 1 from disk 2, etc.
|
||||||
commited_segment_metas.sort_by_key(|segment_meta| -(segment_meta.max_doc() as i32));
|
committed_segment_metas.sort_by_key(|segment_meta| -(segment_meta.max_doc() as i32));
|
||||||
let index_meta = IndexMeta {
|
let index_meta = IndexMeta {
|
||||||
index_settings: index.settings().clone(),
|
index_settings: index.settings().clone(),
|
||||||
segments: commited_segment_metas,
|
segments: committed_segment_metas,
|
||||||
schema: index.schema(),
|
schema: index.schema(),
|
||||||
opstamp,
|
opstamp,
|
||||||
payload: commit_message,
|
payload: commit_message,
|
||||||
|
|||||||
@@ -209,7 +209,7 @@ impl MoreLikeThis {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// TOOD: Validate these changed align with the HEAD branch.
|
// TODO: Validate these changed align with the HEAD branch.
|
||||||
for value in values {
|
for value in values {
|
||||||
if let Some(text) = value.as_str() {
|
if let Some(text) = value.as_str() {
|
||||||
let tokenizer = match &mut tokenizer_opt {
|
let tokenizer = match &mut tokenizer_opt {
|
||||||
@@ -295,7 +295,7 @@ impl MoreLikeThis {
|
|||||||
self.stop_words.contains(&word)
|
self.stop_words.contains(&word)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Couputes the score for each term while ignoring not useful terms
|
/// Computes the score for each term while ignoring not useful terms
|
||||||
fn create_score_term(
|
fn create_score_term(
|
||||||
&self,
|
&self,
|
||||||
searcher: &Searcher,
|
searcher: &Searcher,
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ impl PhrasePrefixQuery {
|
|||||||
///
|
///
|
||||||
/// This function is the same as [`Query::weight()`] except it returns
|
/// This function is the same as [`Query::weight()`] except it returns
|
||||||
/// a specialized type [`PhraseQueryWeight`] instead of a Boxed trait.
|
/// a specialized type [`PhraseQueryWeight`] instead of a Boxed trait.
|
||||||
/// If the query was only one term long, this returns `None` wherease [`Query::weight`]
|
/// If the query was only one term long, this returns `None` whereas [`Query::weight`]
|
||||||
/// returns a boxed [`RangeWeight`]
|
/// returns a boxed [`RangeWeight`]
|
||||||
pub(crate) fn phrase_prefix_query_weight(
|
pub(crate) fn phrase_prefix_query_weight(
|
||||||
&self,
|
&self,
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ pub enum QueryParserError {
|
|||||||
#[error("Invalid query: Only excluding terms given")]
|
#[error("Invalid query: Only excluding terms given")]
|
||||||
AllButQueryForbidden,
|
AllButQueryForbidden,
|
||||||
/// If no default field is declared, running a query without any
|
/// If no default field is declared, running a query without any
|
||||||
/// field specified is forbbidden.
|
/// field specified is forbidden.
|
||||||
#[error("No default field declared and no field specified in query")]
|
#[error("No default field declared and no field specified in query")]
|
||||||
NoDefaultFieldDeclared,
|
NoDefaultFieldDeclared,
|
||||||
/// The field searched for is not declared
|
/// The field searched for is not declared
|
||||||
@@ -822,7 +822,7 @@ impl QueryParser {
|
|||||||
};
|
};
|
||||||
if lower == Bound::Unbounded && upper == Bound::Unbounded {
|
if lower == Bound::Unbounded && upper == Bound::Unbounded {
|
||||||
// this range is useless, either because a user requested [* TO *], or because
|
// this range is useless, either because a user requested [* TO *], or because
|
||||||
// we failed to parse something. Either way, there is no point emiting it
|
// we failed to parse something. Either way, there is no point emitting it
|
||||||
return (None, errors);
|
return (None, errors);
|
||||||
}
|
}
|
||||||
let logical_ast =
|
let logical_ast =
|
||||||
@@ -1307,7 +1307,7 @@ mod test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_json_field_query_with_espaced_dot() {
|
fn test_json_field_query_with_escaped_dot() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
extract_query_term_json_path(r#"json.k8s.node.name:hello"#),
|
extract_query_term_json_path(r#"json.k8s.node.name:hello"#),
|
||||||
"k8s\u{1}node\u{1}name\0shello"
|
"k8s\u{1}node\u{1}name\0shello"
|
||||||
|
|||||||
@@ -244,7 +244,7 @@ fn search_on_json_numerical_field(
|
|||||||
bounds: BoundsRange<ValueBytes<Vec<u8>>>,
|
bounds: BoundsRange<ValueBytes<Vec<u8>>>,
|
||||||
boost: Score,
|
boost: Score,
|
||||||
) -> crate::Result<Box<dyn Scorer>> {
|
) -> crate::Result<Box<dyn Scorer>> {
|
||||||
// Since we don't know which type was interpolated for the internal column whe
|
// Since we don't know which type was interpolated for the internal column we
|
||||||
// have to check for all numeric types (only one exists)
|
// have to check for all numeric types (only one exists)
|
||||||
let allowed_column_types: Option<&[ColumnType]> =
|
let allowed_column_types: Option<&[ColumnType]> =
|
||||||
Some(&[ColumnType::F64, ColumnType::I64, ColumnType::U64]);
|
Some(&[ColumnType::F64, ColumnType::I64, ColumnType::U64]);
|
||||||
@@ -254,7 +254,7 @@ fn search_on_json_numerical_field(
|
|||||||
else {
|
else {
|
||||||
return Ok(Box::new(EmptyScorer));
|
return Ok(Box::new(EmptyScorer));
|
||||||
};
|
};
|
||||||
let actual_colum_type: NumericalType = col_type.numerical_type().unwrap_or_else(|| {
|
let actual_column_type: NumericalType = col_type.numerical_type().unwrap_or_else(|| {
|
||||||
panic!(
|
panic!(
|
||||||
"internal error: couldn't cast to numerical_type: {:?}",
|
"internal error: couldn't cast to numerical_type: {:?}",
|
||||||
col_type
|
col_type
|
||||||
@@ -264,7 +264,7 @@ fn search_on_json_numerical_field(
|
|||||||
let bounds = match typ.numerical_type().unwrap() {
|
let bounds = match typ.numerical_type().unwrap() {
|
||||||
NumericalType::I64 => {
|
NumericalType::I64 => {
|
||||||
let bounds = bounds.map_bound(|term| (term.as_i64().unwrap()));
|
let bounds = bounds.map_bound(|term| (term.as_i64().unwrap()));
|
||||||
match actual_colum_type {
|
match actual_column_type {
|
||||||
NumericalType::I64 => bounds.map_bound(|&term| term.to_u64()),
|
NumericalType::I64 => bounds.map_bound(|&term| term.to_u64()),
|
||||||
NumericalType::U64 => {
|
NumericalType::U64 => {
|
||||||
bounds.transform_inner(
|
bounds.transform_inner(
|
||||||
@@ -288,7 +288,7 @@ fn search_on_json_numerical_field(
|
|||||||
}
|
}
|
||||||
NumericalType::U64 => {
|
NumericalType::U64 => {
|
||||||
let bounds = bounds.map_bound(|term| (term.as_u64().unwrap()));
|
let bounds = bounds.map_bound(|term| (term.as_u64().unwrap()));
|
||||||
match actual_colum_type {
|
match actual_column_type {
|
||||||
NumericalType::U64 => bounds.map_bound(|&term| term.to_u64()),
|
NumericalType::U64 => bounds.map_bound(|&term| term.to_u64()),
|
||||||
NumericalType::I64 => {
|
NumericalType::I64 => {
|
||||||
bounds.transform_inner(
|
bounds.transform_inner(
|
||||||
@@ -312,7 +312,7 @@ fn search_on_json_numerical_field(
|
|||||||
}
|
}
|
||||||
NumericalType::F64 => {
|
NumericalType::F64 => {
|
||||||
let bounds = bounds.map_bound(|term| (term.as_f64().unwrap()));
|
let bounds = bounds.map_bound(|term| (term.as_f64().unwrap()));
|
||||||
match actual_colum_type {
|
match actual_column_type {
|
||||||
NumericalType::U64 => transform_from_f64_bounds::<u64>(&bounds),
|
NumericalType::U64 => transform_from_f64_bounds::<u64>(&bounds),
|
||||||
NumericalType::I64 => transform_from_f64_bounds::<i64>(&bounds),
|
NumericalType::I64 => transform_from_f64_bounds::<i64>(&bounds),
|
||||||
NumericalType::F64 => bounds.map_bound(|&term| term.to_u64()),
|
NumericalType::F64 => bounds.map_bound(|&term| term.to_u64()),
|
||||||
@@ -1844,7 +1844,7 @@ mod bench_ip {
|
|||||||
start..=end
|
start..=end
|
||||||
}
|
}
|
||||||
|
|
||||||
fn excute_query(
|
fn execute_query(
|
||||||
field: &str,
|
field: &str,
|
||||||
ip_range: RangeInclusive<Ipv6Addr>,
|
ip_range: RangeInclusive<Ipv6Addr>,
|
||||||
suffix: &str,
|
suffix: &str,
|
||||||
@@ -1876,152 +1876,152 @@ mod bench_ip {
|
|||||||
fn bench_ip_range_hit_90_percent(bench: &mut Bencher) {
|
fn bench_ip_range_hit_90_percent(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ip", get_90_percent(), "", &index));
|
bench.iter(|| execute_query("ip", get_90_percent(), "", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_10_percent(bench: &mut Bencher) {
|
fn bench_ip_range_hit_10_percent(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ip", get_10_percent(), "", &index));
|
bench.iter(|| execute_query("ip", get_10_percent(), "", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_1_percent(bench: &mut Bencher) {
|
fn bench_ip_range_hit_1_percent(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ip", get_1_percent(), "", &index));
|
bench.iter(|| execute_query("ip", get_1_percent(), "", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_10_percent_intersect_with_10_percent(bench: &mut Bencher) {
|
fn bench_ip_range_hit_10_percent_intersect_with_10_percent(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ip", get_10_percent(), "AND id:few", &index));
|
bench.iter(|| execute_query("ip", get_10_percent(), "AND id:few", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_1_percent_intersect_with_10_percent(bench: &mut Bencher) {
|
fn bench_ip_range_hit_1_percent_intersect_with_10_percent(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ip", get_1_percent(), "AND id:few", &index));
|
bench.iter(|| execute_query("ip", get_1_percent(), "AND id:few", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_1_percent_intersect_with_90_percent(bench: &mut Bencher) {
|
fn bench_ip_range_hit_1_percent_intersect_with_90_percent(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ip", get_1_percent(), "AND id:many", &index));
|
bench.iter(|| execute_query("ip", get_1_percent(), "AND id:many", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_1_percent_intersect_with_1_percent(bench: &mut Bencher) {
|
fn bench_ip_range_hit_1_percent_intersect_with_1_percent(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ip", get_1_percent(), "AND id:veryfew", &index));
|
bench.iter(|| execute_query("ip", get_1_percent(), "AND id:veryfew", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_10_percent_intersect_with_90_percent(bench: &mut Bencher) {
|
fn bench_ip_range_hit_10_percent_intersect_with_90_percent(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ip", get_10_percent(), "AND id:many", &index));
|
bench.iter(|| execute_query("ip", get_10_percent(), "AND id:many", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_90_percent_intersect_with_90_percent(bench: &mut Bencher) {
|
fn bench_ip_range_hit_90_percent_intersect_with_90_percent(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ip", get_90_percent(), "AND id:many", &index));
|
bench.iter(|| execute_query("ip", get_90_percent(), "AND id:many", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_90_percent_intersect_with_10_percent(bench: &mut Bencher) {
|
fn bench_ip_range_hit_90_percent_intersect_with_10_percent(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ip", get_90_percent(), "AND id:few", &index));
|
bench.iter(|| execute_query("ip", get_90_percent(), "AND id:few", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_90_percent_intersect_with_1_percent(bench: &mut Bencher) {
|
fn bench_ip_range_hit_90_percent_intersect_with_1_percent(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ip", get_90_percent(), "AND id:veryfew", &index));
|
bench.iter(|| execute_query("ip", get_90_percent(), "AND id:veryfew", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_90_percent_multi(bench: &mut Bencher) {
|
fn bench_ip_range_hit_90_percent_multi(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ips", get_90_percent(), "", &index));
|
bench.iter(|| execute_query("ips", get_90_percent(), "", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_10_percent_multi(bench: &mut Bencher) {
|
fn bench_ip_range_hit_10_percent_multi(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ips", get_10_percent(), "", &index));
|
bench.iter(|| execute_query("ips", get_10_percent(), "", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_1_percent_multi(bench: &mut Bencher) {
|
fn bench_ip_range_hit_1_percent_multi(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ips", get_1_percent(), "", &index));
|
bench.iter(|| execute_query("ips", get_1_percent(), "", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_10_percent_intersect_with_10_percent_multi(bench: &mut Bencher) {
|
fn bench_ip_range_hit_10_percent_intersect_with_10_percent_multi(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ips", get_10_percent(), "AND id:few", &index));
|
bench.iter(|| execute_query("ips", get_10_percent(), "AND id:few", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_1_percent_intersect_with_10_percent_multi(bench: &mut Bencher) {
|
fn bench_ip_range_hit_1_percent_intersect_with_10_percent_multi(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ips", get_1_percent(), "AND id:few", &index));
|
bench.iter(|| execute_query("ips", get_1_percent(), "AND id:few", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_1_percent_intersect_with_90_percent_multi(bench: &mut Bencher) {
|
fn bench_ip_range_hit_1_percent_intersect_with_90_percent_multi(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
bench.iter(|| excute_query("ips", get_1_percent(), "AND id:many", &index));
|
bench.iter(|| execute_query("ips", get_1_percent(), "AND id:many", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_1_percent_intersect_with_1_percent_multi(bench: &mut Bencher) {
|
fn bench_ip_range_hit_1_percent_intersect_with_1_percent_multi(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ips", get_1_percent(), "AND id:veryfew", &index));
|
bench.iter(|| execute_query("ips", get_1_percent(), "AND id:veryfew", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_10_percent_intersect_with_90_percent_multi(bench: &mut Bencher) {
|
fn bench_ip_range_hit_10_percent_intersect_with_90_percent_multi(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ips", get_10_percent(), "AND id:many", &index));
|
bench.iter(|| execute_query("ips", get_10_percent(), "AND id:many", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_90_percent_intersect_with_90_percent_multi(bench: &mut Bencher) {
|
fn bench_ip_range_hit_90_percent_intersect_with_90_percent_multi(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ips", get_90_percent(), "AND id:many", &index));
|
bench.iter(|| execute_query("ips", get_90_percent(), "AND id:many", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_90_percent_intersect_with_10_percent_multi(bench: &mut Bencher) {
|
fn bench_ip_range_hit_90_percent_intersect_with_10_percent_multi(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ips", get_90_percent(), "AND id:few", &index));
|
bench.iter(|| execute_query("ips", get_90_percent(), "AND id:few", &index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_ip_range_hit_90_percent_intersect_with_1_percent_multi(bench: &mut Bencher) {
|
fn bench_ip_range_hit_90_percent_intersect_with_1_percent_multi(bench: &mut Bencher) {
|
||||||
let index = get_index_0_to_100();
|
let index = get_index_0_to_100();
|
||||||
|
|
||||||
bench.iter(|| excute_query("ips", get_90_percent(), "AND id:veryfew", &index));
|
bench.iter(|| execute_query("ips", get_90_percent(), "AND id:veryfew", &index));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
//! Implementations of some of the core traits on varius types to improve the ergonomics
|
//! Implementations of some of the core traits on various types to improve the ergonomics
|
||||||
//! of the API when providing custom documents.
|
//! of the API when providing custom documents.
|
||||||
//!
|
//!
|
||||||
//! This allows users a bit more freedom and ergonomics if they want a simple API
|
//! This allows users a bit more freedom and ergonomics if they want a simple API
|
||||||
@@ -258,7 +258,7 @@ impl ValueDeserialize for serde_json::Value {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A wrapper struct for an interator producing [Value]s.
|
/// A wrapper struct for an iterator producing [Value]s.
|
||||||
pub struct JsonObjectIter<'a>(pub(crate) serde_json::map::Iter<'a>);
|
pub struct JsonObjectIter<'a>(pub(crate) serde_json::map::Iter<'a>);
|
||||||
|
|
||||||
impl<'a> Iterator for JsonObjectIter<'a> {
|
impl<'a> Iterator for JsonObjectIter<'a> {
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ pub struct JsonObjectOptions {
|
|||||||
indexing: Option<TextFieldIndexing>,
|
indexing: Option<TextFieldIndexing>,
|
||||||
// Store all field as fast fields with an optional tokenizer for text.
|
// Store all field as fast fields with an optional tokenizer for text.
|
||||||
fast: FastFieldTextOptions,
|
fast: FastFieldTextOptions,
|
||||||
/// tantivy will generate pathes to the different nodes of the json object
|
/// tantivy will generate paths to the different nodes of the json object
|
||||||
/// both in:
|
/// both in:
|
||||||
/// - the inverted index (for the terms)
|
/// - the inverted index (for the terms)
|
||||||
/// - fast fields (for the column names).
|
/// - fast fields (for the column names).
|
||||||
@@ -26,7 +26,7 @@ pub struct JsonObjectOptions {
|
|||||||
///
|
///
|
||||||
/// By default, if an object key contains a `.`, we keep it as a `.` it as is.
|
/// By default, if an object key contains a `.`, we keep it as a `.` it as is.
|
||||||
/// On the search side, users will then have to escape this `.` in the query parser
|
/// On the search side, users will then have to escape this `.` in the query parser
|
||||||
/// or when refering to a column name.
|
/// or when referring to a column name.
|
||||||
///
|
///
|
||||||
/// For instance:
|
/// For instance:
|
||||||
/// `{"root": {"child.with.dot": "hello"}}`
|
/// `{"root": {"child.with.dot": "hello"}}`
|
||||||
@@ -35,7 +35,7 @@ pub struct JsonObjectOptions {
|
|||||||
/// `root.child\.with\.dot:hello`
|
/// `root.child\.with\.dot:hello`
|
||||||
///
|
///
|
||||||
/// If `expand_dots_enabled` is set to true, we will treat this `.` in object keys
|
/// If `expand_dots_enabled` is set to true, we will treat this `.` in object keys
|
||||||
/// as json seperators. In other words, if set to true, our object will be
|
/// as json separators. In other words, if set to true, our object will be
|
||||||
/// processed as if it was
|
/// processed as if it was
|
||||||
/// `{"root": {"child": {"with": {"dot": "hello"}}}}`
|
/// `{"root": {"child": {"with": {"dot": "hello"}}}}`
|
||||||
/// and it can be search using the following query:
|
/// and it can be search using the following query:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
//! Tantivy has a very strict schema.
|
//! Tantivy has a very strict schema.
|
||||||
//! The schema defines information about the fields your index contains, that is, for each field:
|
//! The schema defines information about the fields your index contains, that is, for each field:
|
||||||
//!
|
//!
|
||||||
//! - the field name (may contain any characted, can't start with a `-` and can't be empty. Some
|
//! - the field name (may contain any character, can't start with a `-` and can't be empty. Some
|
||||||
//! characters may require escaping when using the query parser).
|
//! characters may require escaping when using the query parser).
|
||||||
//! - the type of the field (currently `text`, `u64`, `i64`, `f64`, `bool`, `date`, `IpAddr`,
|
//! - the type of the field (currently `text`, `u64`, `i64`, `f64`, `bool`, `date`, `IpAddr`,
|
||||||
//! facets, bytes and json are supported)
|
//! facets, bytes and json are supported)
|
||||||
|
|||||||
@@ -332,7 +332,7 @@ where B: AsRef<[u8]>
|
|||||||
|
|
||||||
/// ValueBytes represents a serialized value.
|
/// ValueBytes represents a serialized value.
|
||||||
/// The value can be of any type of [`Type`] (e.g. string, u64, f64, bool, date, JSON).
|
/// The value can be of any type of [`Type`] (e.g. string, u64, f64, bool, date, JSON).
|
||||||
/// The serialized representation matches the lexographical order of the type.
|
/// The serialized representation matches the lexicographical order of the type.
|
||||||
///
|
///
|
||||||
/// The `ValueBytes` format is as follow:
|
/// The `ValueBytes` format is as follow:
|
||||||
/// `[type code: u8][serialized value]`
|
/// `[type code: u8][serialized value]`
|
||||||
|
|||||||
@@ -7,11 +7,11 @@ use lz4_flex::{compress_into, decompress_into};
|
|||||||
#[allow(clippy::uninit_vec)]
|
#[allow(clippy::uninit_vec)]
|
||||||
pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
|
pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
|
||||||
compressed.clear();
|
compressed.clear();
|
||||||
let maximum_ouput_size =
|
let maximum_output_size =
|
||||||
mem::size_of::<u32>() + lz4_flex::block::get_maximum_output_size(uncompressed.len());
|
mem::size_of::<u32>() + lz4_flex::block::get_maximum_output_size(uncompressed.len());
|
||||||
compressed.reserve(maximum_ouput_size);
|
compressed.reserve(maximum_output_size);
|
||||||
unsafe {
|
unsafe {
|
||||||
compressed.set_len(maximum_ouput_size);
|
compressed.set_len(maximum_output_size);
|
||||||
}
|
}
|
||||||
let bytes_written = compress_into(uncompressed, &mut compressed[4..])
|
let bytes_written = compress_into(uncompressed, &mut compressed[4..])
|
||||||
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?;
|
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?;
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ impl StoreWriter {
|
|||||||
|
|
||||||
/// Checks if the current block is full, and if so, compresses and flushes it.
|
/// Checks if the current block is full, and if so, compresses and flushes it.
|
||||||
fn check_flush_block(&mut self) -> io::Result<()> {
|
fn check_flush_block(&mut self) -> io::Result<()> {
|
||||||
// this does not count the VInt storing the index lenght itself, but it is negligible in
|
// this does not count the VInt storing the index length itself, but it is negligible in
|
||||||
// front of everything else.
|
// front of everything else.
|
||||||
let index_len = self.doc_pos.len() * std::mem::size_of::<usize>();
|
let index_len = self.doc_pos.len() * std::mem::size_of::<usize>();
|
||||||
if self.current_block.len() + index_len > self.block_size {
|
if self.current_block.len() + index_len > self.block_size {
|
||||||
|
|||||||
@@ -128,7 +128,7 @@ impl TermDictionary {
|
|||||||
if version != FST_VERSION {
|
if version != FST_VERSION {
|
||||||
return Err(io::Error::new(
|
return Err(io::Error::new(
|
||||||
io::ErrorKind::Other,
|
io::ErrorKind::Other,
|
||||||
format!("Unsuported fst version, expected {version}, found {FST_VERSION}",),
|
format!("Unsupported fst version, expected {version}, found {FST_VERSION}",),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ impl TermDictionary {
|
|||||||
let dict_type = DictionaryType::try_from(dict_type).map_err(|_| {
|
let dict_type = DictionaryType::try_from(dict_type).map_err(|_| {
|
||||||
io::Error::new(
|
io::Error::new(
|
||||||
io::ErrorKind::Other,
|
io::ErrorKind::Other,
|
||||||
format!("Unsuported dictionary type, found {dict_type}"),
|
format!("Unsupported dictionary type, found {dict_type}"),
|
||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@@ -94,7 +94,7 @@ impl TermDictionary {
|
|||||||
return Err(io::Error::new(
|
return Err(io::Error::new(
|
||||||
io::ErrorKind::Other,
|
io::ErrorKind::Other,
|
||||||
format!(
|
format!(
|
||||||
"Unsuported dictionary type, compiled tantivy with {CURRENT_TYPE:?}, but got \
|
"Unsupported dictionary type, compiled tantivy with {CURRENT_TYPE:?}, but got \
|
||||||
{dict_type:?}",
|
{dict_type:?}",
|
||||||
),
|
),
|
||||||
));
|
));
|
||||||
|
|||||||
@@ -192,7 +192,7 @@ impl<'a> TokenStream for NgramTokenStream<'a> {
|
|||||||
/// The elements are emitted in the order of appearance
|
/// The elements are emitted in the order of appearance
|
||||||
/// of `a` first, `b` then.
|
/// of `a` first, `b` then.
|
||||||
///
|
///
|
||||||
/// See `test_stutterring_iterator` for an example of its
|
/// See `test_stuttering_iterator` for an example of its
|
||||||
/// output.
|
/// output.
|
||||||
struct StutteringIterator<T> {
|
struct StutteringIterator<T> {
|
||||||
underlying: T,
|
underlying: T,
|
||||||
@@ -461,14 +461,14 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_stutterring_iterator_empty() {
|
fn test_stuttering_iterator_empty() {
|
||||||
let rg: Vec<usize> = vec![0];
|
let rg: Vec<usize> = vec![0];
|
||||||
let mut it = StutteringIterator::new(rg.into_iter(), 1, 2);
|
let mut it = StutteringIterator::new(rg.into_iter(), 1, 2);
|
||||||
assert_eq!(it.next(), None);
|
assert_eq!(it.next(), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_stutterring_iterator() {
|
fn test_stuterring_iterator() {
|
||||||
let mut it = StutteringIterator::new(0..10, 1, 2);
|
let mut it = StutteringIterator::new(0..10, 1, 2);
|
||||||
assert_eq!(it.next(), Some((0, 1)));
|
assert_eq!(it.next(), Some((0, 1)));
|
||||||
assert_eq!(it.next(), Some((0, 2)));
|
assert_eq!(it.next(), Some((0, 2)));
|
||||||
|
|||||||
@@ -238,7 +238,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
|||||||
_ => {
|
_ => {
|
||||||
return Err(io::Error::new(
|
return Err(io::Error::new(
|
||||||
io::ErrorKind::Other,
|
io::ErrorKind::Other,
|
||||||
format!("Unsuported sstable version, expected one of [2, 3], found {version}"),
|
format!("Unsupported sstable version, expected one of [2, 3], found {version}"),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -386,7 +386,7 @@ mod test {
|
|||||||
16, 17, 33, 18, 19, 17, 20, // data block
|
16, 17, 33, 18, 19, 17, 20, // data block
|
||||||
0, 0, 0, 0, // no more block
|
0, 0, 0, 0, // no more block
|
||||||
// index
|
// index
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, // fst lenght
|
0, 0, 0, 0, 0, 0, 0, 0, // fst length
|
||||||
16, 0, 0, 0, 0, 0, 0, 0, // index start offset
|
16, 0, 0, 0, 0, 0, 0, 0, // index start offset
|
||||||
3, 0, 0, 0, 0, 0, 0, 0, // num term
|
3, 0, 0, 0, 0, 0, 0, 0, // num term
|
||||||
3, 0, 0, 0, // version
|
3, 0, 0, 0, // version
|
||||||
|
|||||||
@@ -671,7 +671,7 @@ impl BlockAddrStoreWriter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Given an iterator over (index, value), returns the slope, and number of bits needed to
|
/// Given an iterator over (index, value), returns the slope, and number of bits needed to
|
||||||
/// represente the error to a prediction made by this slope.
|
/// represent the error to a prediction made by this slope.
|
||||||
///
|
///
|
||||||
/// The iterator may be empty, but all indexes in it must be non-zero.
|
/// The iterator may be empty, but all indexes in it must be non-zero.
|
||||||
fn find_best_slope(elements: impl Iterator<Item = (usize, u64)> + Clone) -> (u32, u8) {
|
fn find_best_slope(elements: impl Iterator<Item = (usize, u64)> + Clone) -> (u32, u8) {
|
||||||
@@ -702,7 +702,7 @@ fn find_best_slope(elements: impl Iterator<Item = (usize, u64)> + Clone) -> (u32
|
|||||||
// a point that appear earlier might have a high slope derivation, but a smaller absolute
|
// a point that appear earlier might have a high slope derivation, but a smaller absolute
|
||||||
// derivation than a latter point.
|
// derivation than a latter point.
|
||||||
// The actual best values can be obtained by using the symplex method, but the improvement is
|
// The actual best values can be obtained by using the symplex method, but the improvement is
|
||||||
// likely minimal, and computation is way more complexe.
|
// likely minimal, and computation is way more complex.
|
||||||
//
|
//
|
||||||
// Assuming these point are the furthest up and down, we find the slope that would cause the
|
// Assuming these point are the furthest up and down, we find the slope that would cause the
|
||||||
// same positive derivation for the highest as negative derivation for the lowest.
|
// same positive derivation for the highest as negative derivation for the lowest.
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ where
|
|||||||
/// Load no more data than what's required to to get `limit`
|
/// Load no more data than what's required to to get `limit`
|
||||||
/// matching entries.
|
/// matching entries.
|
||||||
///
|
///
|
||||||
/// The resulting [`Streamer`] can still return marginaly
|
/// The resulting [`Streamer`] can still return marginally
|
||||||
/// more than `limit` elements.
|
/// more than `limit` elements.
|
||||||
pub fn limit(mut self, limit: u64) -> Self {
|
pub fn limit(mut self, limit: u64) -> Self {
|
||||||
self.limit = Some(limit);
|
self.limit = Some(limit);
|
||||||
|
|||||||
@@ -111,7 +111,7 @@ impl<'a> ExpUnrolledLinkedListWriter<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// The block size is 2^block_num + 2, but max 2^15= 32k
|
// The block size is 2^block_num + 2, but max 2^15= 32k
|
||||||
// Inital size is 8, for the first block => block_num == 1
|
// Initial size is 8, for the first block => block_num == 1
|
||||||
#[inline]
|
#[inline]
|
||||||
fn get_block_size(block_num: u16) -> u16 {
|
fn get_block_size(block_num: u16) -> u16 {
|
||||||
1 << block_num.min(15)
|
1 << block_num.min(15)
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ pub fn fast_short_slice_compare(left: &[u8], right: &[u8]) -> bool {
|
|||||||
double_check_trick::<8>(left, right)
|
double_check_trick::<8>(left, right)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: The straigthforward left.chunks_exact(SIZE).zip(right.chunks_exact(SIZE)) produces slower
|
// Note: The straightforward left.chunks_exact(SIZE).zip(right.chunks_exact(SIZE)) produces slower
|
||||||
// assembly
|
// assembly
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn fast_nbyte_slice_compare<const SIZE: usize>(left: &[u8], right: &[u8]) -> bool {
|
pub fn fast_nbyte_slice_compare<const SIZE: usize>(left: &[u8], right: &[u8]) -> bool {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//! Tokenizer are in charge of chopping text into a stream of tokens
|
//! Tokenizer are in charge of chopping text into a stream of tokens
|
||||||
//! ready for indexing. This is an seperate crate from tantivy, so implementors don't need to update
|
//! ready for indexing. This is an separate crate from tantivy, so implementors don't need to update
|
||||||
//! for each new tantivy version.
|
//! for each new tantivy version.
|
||||||
//!
|
//!
|
||||||
//! To add support for a tokenizer, implement the [`Tokenizer`] trait.
|
//! To add support for a tokenizer, implement the [`Tokenizer`] trait.
|
||||||
|
|||||||
Reference in New Issue
Block a user