mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-02 23:32:54 +00:00
Compare commits
24 Commits
optional_c
...
commit-cha
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1e50f96fb0 | ||
|
|
a05a0035f8 | ||
|
|
976128a412 | ||
|
|
f27b3e312d | ||
|
|
56dea6f08d | ||
|
|
789d29cf45 | ||
|
|
a36b50d825 | ||
|
|
09f65e5467 | ||
|
|
2c2f5c3877 | ||
|
|
96c93a6ba3 | ||
|
|
11b01e4141 | ||
|
|
3e8852c606 | ||
|
|
725f1ecb80 | ||
|
|
afa27afe7d | ||
|
|
495824361a | ||
|
|
485a8f507e | ||
|
|
1119e59eae | ||
|
|
ee1f2c1f28 | ||
|
|
600548fd26 | ||
|
|
9929c0c221 | ||
|
|
f53e65648b | ||
|
|
0281b22b77 | ||
|
|
a05c184830 | ||
|
|
0b40a7fe43 |
@@ -1,9 +1,13 @@
|
|||||||
Tantivy 0.19
|
Tantivy 0.19
|
||||||
================================
|
================================
|
||||||
|
#### Bugfixes
|
||||||
|
- Fix missing fieldnorms for u64, i64, f64, bool, bytes and date [#1620](https://github.com/quickwit-oss/tantivy/pull/1620) (@PSeitz)
|
||||||
|
- Fix interpolation overflow in linear interpolation fastfield codec [#1480](https://github.com/quickwit-oss/tantivy/pull/1480 (@PSeitz @fulmicoton)
|
||||||
|
|
||||||
|
#### Features/Improvements
|
||||||
|
- Add support for `IN` in queryparser , e.g. `field: IN [val1 val2 val3]` [#1683](https://github.com/quickwit-oss/tantivy/pull/1683) (@trinity-1686a)
|
||||||
- Skip score calculation, when no scoring is required [#1646](https://github.com/quickwit-oss/tantivy/pull/1646) (@PSeitz)
|
- Skip score calculation, when no scoring is required [#1646](https://github.com/quickwit-oss/tantivy/pull/1646) (@PSeitz)
|
||||||
- Limit fast fields to u32 (`get_val(u32)`) [#1644](https://github.com/quickwit-oss/tantivy/pull/1644) (@PSeitz)
|
- Limit fast fields to u32 (`get_val(u32)`) [#1644](https://github.com/quickwit-oss/tantivy/pull/1644) (@PSeitz)
|
||||||
- Major bugfix: Fix missing fieldnorms for u64, i64, f64, bool, bytes and date [#1620](https://github.com/quickwit-oss/tantivy/pull/1620) (@PSeitz)
|
|
||||||
- Updated [Date Field Type](https://github.com/quickwit-oss/tantivy/pull/1396)
|
- Updated [Date Field Type](https://github.com/quickwit-oss/tantivy/pull/1396)
|
||||||
The `DateTime` type has been updated to hold timestamps with microseconds precision.
|
The `DateTime` type has been updated to hold timestamps with microseconds precision.
|
||||||
`DateOptions` and `DatePrecision` have been added to configure Date fields. The precision is used to hint on fast values compression. Otherwise, seconds precision is used everywhere else (i.e terms, indexing). (@evanxg852000)
|
`DateOptions` and `DatePrecision` have been added to configure Date fields. The precision is used to hint on fast values compression. Otherwise, seconds precision is used everywhere else (i.e terms, indexing). (@evanxg852000)
|
||||||
@@ -11,7 +15,6 @@ Tantivy 0.19
|
|||||||
- Add boolean field type [#1382](https://github.com/quickwit-oss/tantivy/pull/1382) (@boraarslan)
|
- Add boolean field type [#1382](https://github.com/quickwit-oss/tantivy/pull/1382) (@boraarslan)
|
||||||
- Remove Searcher pool and make `Searcher` cloneable. (@PSeitz)
|
- Remove Searcher pool and make `Searcher` cloneable. (@PSeitz)
|
||||||
- Validate settings on create [#1570](https://github.com/quickwit-oss/tantivy/pull/1570 (@PSeitz)
|
- Validate settings on create [#1570](https://github.com/quickwit-oss/tantivy/pull/1570 (@PSeitz)
|
||||||
- Fix interpolation overflow in linear interpolation fastfield codec [#1480](https://github.com/quickwit-oss/tantivy/pull/1480 (@PSeitz @fulmicoton)
|
|
||||||
- Detect and apply gcd on fastfield codecs [#1418](https://github.com/quickwit-oss/tantivy/pull/1418) (@PSeitz)
|
- Detect and apply gcd on fastfield codecs [#1418](https://github.com/quickwit-oss/tantivy/pull/1418) (@PSeitz)
|
||||||
- Doc store
|
- Doc store
|
||||||
- use separate thread to compress block store [#1389](https://github.com/quickwit-oss/tantivy/pull/1389) [#1510](https://github.com/quickwit-oss/tantivy/pull/1510 (@PSeitz @fulmicoton)
|
- use separate thread to compress block store [#1389](https://github.com/quickwit-oss/tantivy/pull/1389) [#1510](https://github.com/quickwit-oss/tantivy/pull/1510 (@PSeitz @fulmicoton)
|
||||||
@@ -21,6 +24,7 @@ Tantivy 0.19
|
|||||||
- Make `tantivy::TantivyError` cloneable [#1402](https://github.com/quickwit-oss/tantivy/pull/1402) (@PSeitz)
|
- Make `tantivy::TantivyError` cloneable [#1402](https://github.com/quickwit-oss/tantivy/pull/1402) (@PSeitz)
|
||||||
- Add support for phrase slop in query language [#1393](https://github.com/quickwit-oss/tantivy/pull/1393) (@saroh)
|
- Add support for phrase slop in query language [#1393](https://github.com/quickwit-oss/tantivy/pull/1393) (@saroh)
|
||||||
- Aggregation
|
- Aggregation
|
||||||
|
- Add aggregation support for date type [#1693](https://github.com/quickwit-oss/tantivy/pull/1693)(@PSeitz)
|
||||||
- Add support for keyed parameter in range and histgram aggregations [#1424](https://github.com/quickwit-oss/tantivy/pull/1424) (@k-yomo)
|
- Add support for keyed parameter in range and histgram aggregations [#1424](https://github.com/quickwit-oss/tantivy/pull/1424) (@k-yomo)
|
||||||
- Add aggregation bucket limit [#1363](https://github.com/quickwit-oss/tantivy/pull/1363) (@PSeitz)
|
- Add aggregation bucket limit [#1363](https://github.com/quickwit-oss/tantivy/pull/1363) (@PSeitz)
|
||||||
- Faster indexing
|
- Faster indexing
|
||||||
|
|||||||
17
Cargo.toml
17
Cargo.toml
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy"
|
name = "tantivy"
|
||||||
version = "0.19.0-dev"
|
version = "0.19.0"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = ["database-implementations", "data-structures"]
|
categories = ["database-implementations", "data-structures"]
|
||||||
@@ -25,7 +25,7 @@ tantivy-fst = "0.4.0"
|
|||||||
memmap2 = { version = "0.5.3", optional = true }
|
memmap2 = { version = "0.5.3", optional = true }
|
||||||
lz4_flex = { version = "0.9.2", default-features = false, features = ["checked-decode"], optional = true }
|
lz4_flex = { version = "0.9.2", default-features = false, features = ["checked-decode"], optional = true }
|
||||||
brotli = { version = "3.3.4", optional = true }
|
brotli = { version = "3.3.4", optional = true }
|
||||||
zstd = { version = "0.11", optional = true, default-features = false }
|
zstd = { version = "0.12", optional = true, default-features = false }
|
||||||
snap = { version = "1.0.5", optional = true }
|
snap = { version = "1.0.5", optional = true }
|
||||||
tempfile = { version = "3.3.0", optional = true }
|
tempfile = { version = "3.3.0", optional = true }
|
||||||
log = "0.4.16"
|
log = "0.4.16"
|
||||||
@@ -36,11 +36,6 @@ fs2 = { version = "0.4.3", optional = true }
|
|||||||
levenshtein_automata = "0.2.1"
|
levenshtein_automata = "0.2.1"
|
||||||
uuid = { version = "1.0.0", features = ["v4", "serde"] }
|
uuid = { version = "1.0.0", features = ["v4", "serde"] }
|
||||||
crossbeam-channel = "0.5.4"
|
crossbeam-channel = "0.5.4"
|
||||||
tantivy-query-grammar = { version="0.18.0", path="./query-grammar" }
|
|
||||||
tantivy-bitpacker = { version="0.2", path="./bitpacker" }
|
|
||||||
common = { version = "0.3", path = "./common/", package = "tantivy-common" }
|
|
||||||
fastfield_codecs = { version="0.2", path="./fastfield_codecs", default-features = false }
|
|
||||||
ownedbytes = { version="0.3", path="./ownedbytes" }
|
|
||||||
stable_deref_trait = "1.2.0"
|
stable_deref_trait = "1.2.0"
|
||||||
rust-stemmers = "1.2.0"
|
rust-stemmers = "1.2.0"
|
||||||
downcast-rs = "1.2.0"
|
downcast-rs = "1.2.0"
|
||||||
@@ -62,6 +57,12 @@ ciborium = { version = "0.2", optional = true}
|
|||||||
async-trait = "0.1.53"
|
async-trait = "0.1.53"
|
||||||
arc-swap = "1.5.0"
|
arc-swap = "1.5.0"
|
||||||
|
|
||||||
|
tantivy-query-grammar = { version= "0.19.0", path="./query-grammar" }
|
||||||
|
tantivy-bitpacker = { version= "0.3", path="./bitpacker" }
|
||||||
|
common = { version= "0.4", path = "./common/", package = "tantivy-common" }
|
||||||
|
fastfield_codecs = { version= "0.3", path="./fastfield_codecs", default-features = false }
|
||||||
|
ownedbytes = { version= "0.4", path="./ownedbytes" }
|
||||||
|
|
||||||
[target.'cfg(windows)'.dependencies]
|
[target.'cfg(windows)'.dependencies]
|
||||||
winapi = "0.3.9"
|
winapi = "0.3.9"
|
||||||
|
|
||||||
@@ -73,7 +74,7 @@ pretty_assertions = "1.2.1"
|
|||||||
proptest = "1.0.0"
|
proptest = "1.0.0"
|
||||||
criterion = "0.4"
|
criterion = "0.4"
|
||||||
test-log = "0.2.10"
|
test-log = "0.2.10"
|
||||||
env_logger = "0.9.0"
|
env_logger = "0.10.0"
|
||||||
pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
|
pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
|
||||||
futures = "0.3.21"
|
futures = "0.3.21"
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-bitpacker"
|
name = "tantivy-bitpacker"
|
||||||
version = "0.2.0"
|
version = "0.3.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
@@ -8,6 +8,8 @@ categories = []
|
|||||||
description = """Tantivy-sub crate: bitpacking"""
|
description = """Tantivy-sub crate: bitpacking"""
|
||||||
repository = "https://github.com/quickwit-oss/tantivy"
|
repository = "https://github.com/quickwit-oss/tantivy"
|
||||||
keywords = []
|
keywords = []
|
||||||
|
documentation = "https://docs.rs/tantivy-bitpacker/latest/tantivy_bitpacker"
|
||||||
|
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||||
|
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|||||||
@@ -1,16 +1,20 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-common"
|
name = "tantivy-common"
|
||||||
version = "0.3.0"
|
version = "0.4.0"
|
||||||
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "common traits and utility functions used by multiple tantivy subcrates"
|
description = "common traits and utility functions used by multiple tantivy subcrates"
|
||||||
|
documentation = "https://docs.rs/tantivy_common/"
|
||||||
|
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||||
|
repository = "https://github.com/quickwit-oss/tantivy"
|
||||||
|
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
byteorder = "1.4.3"
|
byteorder = "1.4.3"
|
||||||
ownedbytes = { version="0.3", path="../ownedbytes" }
|
ownedbytes = { version= "0.4", path="../ownedbytes" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
proptest = "1.0.0"
|
proptest = "1.0.0"
|
||||||
|
|||||||
@@ -94,6 +94,20 @@ impl FixedSize for u32 {
|
|||||||
const SIZE_IN_BYTES: usize = 4;
|
const SIZE_IN_BYTES: usize = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl BinarySerializable for u16 {
|
||||||
|
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||||
|
writer.write_u16::<Endianness>(*self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deserialize<R: Read>(reader: &mut R) -> io::Result<u16> {
|
||||||
|
reader.read_u16::<Endianness>()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FixedSize for u16 {
|
||||||
|
const SIZE_IN_BYTES: usize = 2;
|
||||||
|
}
|
||||||
|
|
||||||
impl BinarySerializable for u64 {
|
impl BinarySerializable for u64 {
|
||||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||||
writer.write_u64::<Endianness>(*self)
|
writer.write_u64::<Endianness>(*self)
|
||||||
|
|||||||
@@ -118,7 +118,7 @@ fn main() -> tantivy::Result<()> {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();
|
let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use fastfield_codecs::OptionalColumn;
|
use fastfield_codecs::Column;
|
||||||
// ---
|
// ---
|
||||||
// Importing tantivy...
|
// Importing tantivy...
|
||||||
use tantivy::collector::{Collector, SegmentCollector};
|
use tantivy::collector::{Collector, SegmentCollector};
|
||||||
@@ -97,7 +97,7 @@ impl Collector for StatsCollector {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct StatsSegmentCollector {
|
struct StatsSegmentCollector {
|
||||||
fast_field_reader: Arc<dyn OptionalColumn<u64>>,
|
fast_field_reader: Arc<dyn Column<u64>>,
|
||||||
stats: Stats,
|
stats: Stats,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -105,12 +105,10 @@ impl SegmentCollector for StatsSegmentCollector {
|
|||||||
type Fruit = Option<Stats>;
|
type Fruit = Option<Stats>;
|
||||||
|
|
||||||
fn collect(&mut self, doc: u32, _score: Score) {
|
fn collect(&mut self, doc: u32, _score: Score) {
|
||||||
if let Some(value) = self.fast_field_reader.get_val(doc) {
|
let value = self.fast_field_reader.get_val(doc) as f64;
|
||||||
let value = value as f64;
|
self.stats.count += 1;
|
||||||
self.stats.count += 1;
|
self.stats.sum += value;
|
||||||
self.stats.sum += value;
|
self.stats.squared_sum += value * value;
|
||||||
self.stats.squared_sum += value * value;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn harvest(self) -> <Self as SegmentCollector>::Fruit {
|
fn harvest(self) -> <Self as SegmentCollector>::Fruit {
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ impl Warmer for DynamicPriceColumn {
|
|||||||
let product_id_reader = segment.fast_fields().u64(self.field)?;
|
let product_id_reader = segment.fast_fields().u64(self.field)?;
|
||||||
let product_ids: Vec<ProductId> = segment
|
let product_ids: Vec<ProductId> = segment
|
||||||
.doc_ids_alive()
|
.doc_ids_alive()
|
||||||
.flat_map(|doc| product_id_reader.get_val(doc))
|
.map(|doc| product_id_reader.get_val(doc))
|
||||||
.collect();
|
.collect();
|
||||||
let mut prices_it = self.price_fetcher.fetch_prices(&product_ids).into_iter();
|
let mut prices_it = self.price_fetcher.fetch_prices(&product_ids).into_iter();
|
||||||
let mut price_vals: Vec<Price> = Vec::new();
|
let mut price_vals: Vec<Price> = Vec::new();
|
||||||
|
|||||||
@@ -1,17 +1,20 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "fastfield_codecs"
|
name = "fastfield_codecs"
|
||||||
version = "0.2.0"
|
version = "0.3.0"
|
||||||
authors = ["Pascal Seitz <pascal@quickwit.io>"]
|
authors = ["Pascal Seitz <pascal@quickwit.io>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "Fast field codecs used by tantivy"
|
description = "Fast field codecs used by tantivy"
|
||||||
|
documentation = "https://docs.rs/fastfield_codecs/"
|
||||||
|
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||||
|
repository = "https://github.com/quickwit-oss/tantivy"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
common = { version = "0.3", path = "../common/", package = "tantivy-common" }
|
common = { version = "0.4", path = "../common/", package = "tantivy-common" }
|
||||||
tantivy-bitpacker = { version="0.2", path = "../bitpacker/" }
|
tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" }
|
||||||
ownedbytes = { version = "0.3.0", path = "../ownedbytes" }
|
ownedbytes = { version = "0.4.0", path = "../ownedbytes" }
|
||||||
prettytable-rs = {version="0.9.0", optional= true}
|
prettytable-rs = {version="0.9.0", optional= true}
|
||||||
rand = {version="0.8.3", optional= true}
|
rand = {version="0.8.3", optional= true}
|
||||||
fastdivide = "0.4"
|
fastdivide = "0.4"
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ mod tests {
|
|||||||
) -> Arc<dyn Column<T>> {
|
) -> Arc<dyn Column<T>> {
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
serialize(VecColumn::from(&column), &mut buffer, &ALL_CODEC_TYPES).unwrap();
|
serialize(VecColumn::from(&column), &mut buffer, &ALL_CODEC_TYPES).unwrap();
|
||||||
open(OwnedBytes::new(buffer)).unwrap().to_full().unwrap()
|
open(OwnedBytes::new(buffer)).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
@@ -103,7 +103,7 @@ mod tests {
|
|||||||
let iter_gen = || data.iter().cloned();
|
let iter_gen = || data.iter().cloned();
|
||||||
serialize_u128(iter_gen, data.len() as u32, &mut out).unwrap();
|
serialize_u128(iter_gen, data.len() as u32, &mut out).unwrap();
|
||||||
let out = OwnedBytes::new(out);
|
let out = OwnedBytes::new(out);
|
||||||
open_u128::<u128>(out).unwrap().to_full().unwrap()
|
open_u128::<u128>(out).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
|
|||||||
@@ -456,6 +456,8 @@ impl CompactSpaceDecompressor {
|
|||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::format_version::read_format_version;
|
||||||
|
use crate::null_index_footer::read_null_index_footer;
|
||||||
use crate::serialize::U128Header;
|
use crate::serialize::U128Header;
|
||||||
use crate::{open_u128, serialize_u128};
|
use crate::{open_u128, serialize_u128};
|
||||||
|
|
||||||
@@ -541,7 +543,10 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let data = OwnedBytes::new(out);
|
let data = OwnedBytes::new(out);
|
||||||
|
let (data, _format_version) = read_format_version(data).unwrap();
|
||||||
|
let (data, _null_index_footer) = read_null_index_footer(data).unwrap();
|
||||||
test_all(data.clone(), u128_vals);
|
test_all(data.clone(), u128_vals);
|
||||||
|
|
||||||
data
|
data
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -559,6 +564,7 @@ mod tests {
|
|||||||
333u128,
|
333u128,
|
||||||
];
|
];
|
||||||
let mut data = test_aux_vals(vals);
|
let mut data = test_aux_vals(vals);
|
||||||
|
|
||||||
let _header = U128Header::deserialize(&mut data);
|
let _header = U128Header::deserialize(&mut data);
|
||||||
let decomp = CompactSpaceDecompressor::open(data).unwrap();
|
let decomp = CompactSpaceDecompressor::open(data).unwrap();
|
||||||
let complete_range = 0..vals.len() as u32;
|
let complete_range = 0..vals.len() as u32;
|
||||||
@@ -731,10 +737,7 @@ mod tests {
|
|||||||
];
|
];
|
||||||
let mut out = Vec::new();
|
let mut out = Vec::new();
|
||||||
serialize_u128(|| vals.iter().cloned(), vals.len() as u32, &mut out).unwrap();
|
serialize_u128(|| vals.iter().cloned(), vals.len() as u32, &mut out).unwrap();
|
||||||
let decomp = open_u128::<u128>(OwnedBytes::new(out))
|
let decomp = open_u128::<u128>(OwnedBytes::new(out)).unwrap();
|
||||||
.unwrap()
|
|
||||||
.to_full()
|
|
||||||
.unwrap();
|
|
||||||
let complete_range = 0..vals.len() as u32;
|
let complete_range = 0..vals.len() as u32;
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|||||||
39
fastfield_codecs/src/format_version.rs
Normal file
39
fastfield_codecs/src/format_version.rs
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
use std::io;
|
||||||
|
|
||||||
|
use common::BinarySerializable;
|
||||||
|
use ownedbytes::OwnedBytes;
|
||||||
|
|
||||||
|
const MAGIC_NUMBER: u16 = 4335u16;
|
||||||
|
const FASTFIELD_FORMAT_VERSION: u8 = 1;
|
||||||
|
|
||||||
|
pub(crate) fn append_format_version(output: &mut impl io::Write) -> io::Result<()> {
|
||||||
|
FASTFIELD_FORMAT_VERSION.serialize(output)?;
|
||||||
|
MAGIC_NUMBER.serialize(output)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn read_format_version(data: OwnedBytes) -> io::Result<(OwnedBytes, u8)> {
|
||||||
|
let (data, magic_number_bytes) = data.rsplit(2);
|
||||||
|
|
||||||
|
let magic_number = u16::deserialize(&mut magic_number_bytes.as_slice())?;
|
||||||
|
if magic_number != MAGIC_NUMBER {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidData,
|
||||||
|
format!("magic number mismatch {} != {}", magic_number, MAGIC_NUMBER),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let (data, format_version_bytes) = data.rsplit(1);
|
||||||
|
let format_version = u8::deserialize(&mut format_version_bytes.as_slice())?;
|
||||||
|
if format_version > FASTFIELD_FORMAT_VERSION {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidData,
|
||||||
|
format!(
|
||||||
|
"Unsupported fastfield format version: {}. Max supported version: {}",
|
||||||
|
format_version, FASTFIELD_FORMAT_VERSION
|
||||||
|
),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((data, format_version))
|
||||||
|
}
|
||||||
@@ -59,11 +59,11 @@ mod tests {
|
|||||||
crate::serialize(VecColumn::from(&vals), &mut buffer, &[codec_type])?;
|
crate::serialize(VecColumn::from(&vals), &mut buffer, &[codec_type])?;
|
||||||
let buffer = OwnedBytes::new(buffer);
|
let buffer = OwnedBytes::new(buffer);
|
||||||
let column = crate::open::<i64>(buffer.clone())?;
|
let column = crate::open::<i64>(buffer.clone())?;
|
||||||
assert_eq!(column.get_val(0), Some(-4000i64));
|
assert_eq!(column.get_val(0), -4000i64);
|
||||||
assert_eq!(column.get_val(1), Some(-3000i64));
|
assert_eq!(column.get_val(1), -3000i64);
|
||||||
assert_eq!(column.get_val(2), Some(-2000i64));
|
assert_eq!(column.get_val(2), -2000i64);
|
||||||
assert_eq!(column.max_value(), Some((num_vals as i64 - 5) * 1000));
|
assert_eq!(column.max_value(), (num_vals as i64 - 5) * 1000);
|
||||||
assert_eq!(column.min_value(), Some(-4000i64));
|
assert_eq!(column.min_value(), -4000i64);
|
||||||
|
|
||||||
// Can't apply gcd
|
// Can't apply gcd
|
||||||
let mut buffer_without_gcd = Vec::new();
|
let mut buffer_without_gcd = Vec::new();
|
||||||
@@ -101,11 +101,11 @@ mod tests {
|
|||||||
crate::serialize(VecColumn::from(&vals), &mut buffer, &[codec_type])?;
|
crate::serialize(VecColumn::from(&vals), &mut buffer, &[codec_type])?;
|
||||||
let buffer = OwnedBytes::new(buffer);
|
let buffer = OwnedBytes::new(buffer);
|
||||||
let column = crate::open::<u64>(buffer.clone())?;
|
let column = crate::open::<u64>(buffer.clone())?;
|
||||||
assert_eq!(column.get_val(0), Some(1000u64));
|
assert_eq!(column.get_val(0), 1000u64);
|
||||||
assert_eq!(column.get_val(1), Some(2000u64));
|
assert_eq!(column.get_val(1), 2000u64);
|
||||||
assert_eq!(column.get_val(2), Some(3000u64));
|
assert_eq!(column.get_val(2), 3000u64);
|
||||||
assert_eq!(column.max_value(), Some(num_vals as u64 * 1000));
|
assert_eq!(column.max_value(), num_vals as u64 * 1000);
|
||||||
assert_eq!(column.min_value(), Some(1000u64));
|
assert_eq!(column.min_value(), 1000u64);
|
||||||
|
|
||||||
// Can't apply gcd
|
// Can't apply gcd
|
||||||
let mut buffer_without_gcd = Vec::new();
|
let mut buffer_without_gcd = Vec::new();
|
||||||
|
|||||||
@@ -20,28 +20,33 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use common::BinarySerializable;
|
use common::BinarySerializable;
|
||||||
use compact_space::CompactSpaceDecompressor;
|
use compact_space::CompactSpaceDecompressor;
|
||||||
|
use format_version::read_format_version;
|
||||||
use monotonic_mapping::{
|
use monotonic_mapping::{
|
||||||
StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
|
StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
|
||||||
StrictlyMonotonicMappingToInternalBaseval, StrictlyMonotonicMappingToInternalGCDBaseval,
|
StrictlyMonotonicMappingToInternalBaseval, StrictlyMonotonicMappingToInternalGCDBaseval,
|
||||||
};
|
};
|
||||||
pub use optional_column::OptionalColumn;
|
use null_index_footer::read_null_index_footer;
|
||||||
use optional_column::ToOptionalColumn;
|
|
||||||
use ownedbytes::OwnedBytes;
|
use ownedbytes::OwnedBytes;
|
||||||
use serialize::{Header, U128Header};
|
use serialize::{Header, U128Header};
|
||||||
|
|
||||||
mod bitpacked;
|
mod bitpacked;
|
||||||
mod blockwise_linear;
|
mod blockwise_linear;
|
||||||
mod compact_space;
|
mod compact_space;
|
||||||
|
mod format_version;
|
||||||
mod line;
|
mod line;
|
||||||
mod linear;
|
mod linear;
|
||||||
mod monotonic_mapping;
|
mod monotonic_mapping;
|
||||||
mod monotonic_mapping_u128;
|
mod monotonic_mapping_u128;
|
||||||
mod optional_column;
|
mod null_index;
|
||||||
|
mod null_index_footer;
|
||||||
|
|
||||||
mod column;
|
mod column;
|
||||||
mod gcd;
|
mod gcd;
|
||||||
mod serialize;
|
mod serialize;
|
||||||
|
|
||||||
|
/// TODO: remove when codec is used
|
||||||
|
pub use null_index::*;
|
||||||
|
|
||||||
use self::bitpacked::BitpackedCodec;
|
use self::bitpacked::BitpackedCodec;
|
||||||
use self::blockwise_linear::BlockwiseLinearCodec;
|
use self::blockwise_linear::BlockwiseLinearCodec;
|
||||||
pub use self::column::{monotonic_map_column, Column, IterColumn, VecColumn};
|
pub use self::column::{monotonic_map_column, Column, IterColumn, VecColumn};
|
||||||
@@ -132,23 +137,22 @@ impl U128FastFieldCodecType {
|
|||||||
|
|
||||||
/// Returns the correct codec reader wrapped in the `Arc` for the data.
|
/// Returns the correct codec reader wrapped in the `Arc` for the data.
|
||||||
pub fn open_u128<Item: MonotonicallyMappableToU128>(
|
pub fn open_u128<Item: MonotonicallyMappableToU128>(
|
||||||
mut bytes: OwnedBytes,
|
bytes: OwnedBytes,
|
||||||
) -> io::Result<Arc<dyn OptionalColumn<Item>>> {
|
) -> io::Result<Arc<dyn Column<Item>>> {
|
||||||
|
let (bytes, _format_version) = read_format_version(bytes)?;
|
||||||
|
let (mut bytes, _null_index_footer) = read_null_index_footer(bytes)?;
|
||||||
let header = U128Header::deserialize(&mut bytes)?;
|
let header = U128Header::deserialize(&mut bytes)?;
|
||||||
assert_eq!(header.codec_type, U128FastFieldCodecType::CompactSpace);
|
assert_eq!(header.codec_type, U128FastFieldCodecType::CompactSpace);
|
||||||
let reader = CompactSpaceDecompressor::open(bytes)?;
|
let reader = CompactSpaceDecompressor::open(bytes)?;
|
||||||
let inverted: StrictlyMonotonicMappingInverter<StrictlyMonotonicMappingToInternal<Item>> =
|
let inverted: StrictlyMonotonicMappingInverter<StrictlyMonotonicMappingToInternal<Item>> =
|
||||||
StrictlyMonotonicMappingToInternal::<Item>::new().into();
|
StrictlyMonotonicMappingToInternal::<Item>::new().into();
|
||||||
|
Ok(Arc::new(monotonic_map_column(reader, inverted)))
|
||||||
Ok(Arc::new(ToOptionalColumn::new(Arc::new(
|
|
||||||
monotonic_map_column(reader, inverted),
|
|
||||||
))))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the correct codec reader wrapped in the `Arc` for the data.
|
/// Returns the correct codec reader wrapped in the `Arc` for the data.
|
||||||
pub fn open<T: MonotonicallyMappableToU64>(
|
pub fn open<T: MonotonicallyMappableToU64>(bytes: OwnedBytes) -> io::Result<Arc<dyn Column<T>>> {
|
||||||
mut bytes: OwnedBytes,
|
let (bytes, _format_version) = read_format_version(bytes)?;
|
||||||
) -> io::Result<Arc<dyn OptionalColumn<T>>> {
|
let (mut bytes, _null_index_footer) = read_null_index_footer(bytes)?;
|
||||||
let header = Header::deserialize(&mut bytes)?;
|
let header = Header::deserialize(&mut bytes)?;
|
||||||
match header.codec_type {
|
match header.codec_type {
|
||||||
FastFieldCodecType::Bitpacked => open_specific_codec::<BitpackedCodec, _>(bytes, &header),
|
FastFieldCodecType::Bitpacked => open_specific_codec::<BitpackedCodec, _>(bytes, &header),
|
||||||
@@ -162,7 +166,7 @@ pub fn open<T: MonotonicallyMappableToU64>(
|
|||||||
fn open_specific_codec<C: FastFieldCodec, Item: MonotonicallyMappableToU64>(
|
fn open_specific_codec<C: FastFieldCodec, Item: MonotonicallyMappableToU64>(
|
||||||
bytes: OwnedBytes,
|
bytes: OwnedBytes,
|
||||||
header: &Header,
|
header: &Header,
|
||||||
) -> io::Result<Arc<dyn OptionalColumn<Item>>> {
|
) -> io::Result<Arc<dyn Column<Item>>> {
|
||||||
let normalized_header = header.normalized();
|
let normalized_header = header.normalized();
|
||||||
let reader = C::open_from_bytes(bytes, normalized_header)?;
|
let reader = C::open_from_bytes(bytes, normalized_header)?;
|
||||||
let min_value = header.min_value;
|
let min_value = header.min_value;
|
||||||
@@ -170,16 +174,12 @@ fn open_specific_codec<C: FastFieldCodec, Item: MonotonicallyMappableToU64>(
|
|||||||
let mapping = StrictlyMonotonicMappingInverter::from(
|
let mapping = StrictlyMonotonicMappingInverter::from(
|
||||||
StrictlyMonotonicMappingToInternalGCDBaseval::new(gcd.get(), min_value),
|
StrictlyMonotonicMappingToInternalGCDBaseval::new(gcd.get(), min_value),
|
||||||
);
|
);
|
||||||
Ok(Arc::new(ToOptionalColumn::new(Arc::new(
|
Ok(Arc::new(monotonic_map_column(reader, mapping)))
|
||||||
monotonic_map_column(reader, mapping),
|
|
||||||
))))
|
|
||||||
} else {
|
} else {
|
||||||
let mapping = StrictlyMonotonicMappingInverter::from(
|
let mapping = StrictlyMonotonicMappingInverter::from(
|
||||||
StrictlyMonotonicMappingToInternalBaseval::new(min_value),
|
StrictlyMonotonicMappingToInternalBaseval::new(min_value),
|
||||||
);
|
);
|
||||||
Ok(Arc::new(ToOptionalColumn::new(Arc::new(
|
Ok(Arc::new(monotonic_map_column(reader, mapping)))
|
||||||
monotonic_map_column(reader, mapping),
|
|
||||||
))))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -250,9 +250,8 @@ mod tests {
|
|||||||
for (doc, orig_val) in data.iter().copied().enumerate() {
|
for (doc, orig_val) in data.iter().copied().enumerate() {
|
||||||
let val = reader.get_val(doc as u32);
|
let val = reader.get_val(doc as u32);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
val,
|
val, orig_val,
|
||||||
Some(orig_val),
|
"val `{val}` does not match orig_val {orig_val:?}, in data set {name}, data \
|
||||||
"val `{val:?}` does not match orig_val {orig_val:?}, in data set {name}, data \
|
|
||||||
`{data:?}`",
|
`{data:?}`",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -113,10 +113,7 @@ fn bench_ip() {
|
|||||||
(data.len() * 8) as f32 / dataset.len() as f32
|
(data.len() * 8) as f32 / dataset.len() as f32
|
||||||
);
|
);
|
||||||
|
|
||||||
let decompressor = open_u128::<u128>(OwnedBytes::new(data))
|
let decompressor = open_u128::<u128>(OwnedBytes::new(data)).unwrap();
|
||||||
.unwrap()
|
|
||||||
.to_full()
|
|
||||||
.unwrap();
|
|
||||||
// Sample some ranges
|
// Sample some ranges
|
||||||
let mut doc_values = Vec::new();
|
let mut doc_values = Vec::new();
|
||||||
for value in dataset.iter().take(1110).skip(1100).cloned() {
|
for value in dataset.iter().take(1110).skip(1100).cloned() {
|
||||||
|
|||||||
453
fastfield_codecs/src/null_index/dense.rs
Normal file
453
fastfield_codecs/src/null_index/dense.rs
Normal file
@@ -0,0 +1,453 @@
|
|||||||
|
use std::convert::TryInto;
|
||||||
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
use common::BinarySerializable;
|
||||||
|
use itertools::Itertools;
|
||||||
|
use ownedbytes::OwnedBytes;
|
||||||
|
|
||||||
|
use super::{get_bit_at, set_bit_at};
|
||||||
|
|
||||||
|
/// For the `DenseCodec`, `data` which contains the encoded blocks.
|
||||||
|
/// Each block consists of [u8; 12]. The first 8 bytes is a bitvec for 64 elements.
|
||||||
|
/// The last 4 bytes are the offset, the number of set bits so far.
|
||||||
|
///
|
||||||
|
/// When translating the original index to a dense index, the correct block can be computed
|
||||||
|
/// directly `orig_idx/64`. Inside the block the position is `orig_idx%64`.
|
||||||
|
///
|
||||||
|
/// When translating a dense index to the original index, we can use the offset to find the correct
|
||||||
|
/// block. Direct computation is not possible, but we can employ a linear or binary search.
|
||||||
|
pub struct DenseCodec {
|
||||||
|
// data consists of blocks of 64 bits.
|
||||||
|
//
|
||||||
|
// The format is &[(u64, u32)]
|
||||||
|
// u64 is the bitvec
|
||||||
|
// u32 is the offset of the block, the number of set bits so far.
|
||||||
|
//
|
||||||
|
// At the end one block is appended, to store the number of values in the index in offset.
|
||||||
|
data: OwnedBytes,
|
||||||
|
}
|
||||||
|
const ELEMENTS_PER_BLOCK: u32 = 64;
|
||||||
|
const BLOCK_BITVEC_SIZE: usize = 8;
|
||||||
|
const BLOCK_OFFSET_SIZE: usize = 4;
|
||||||
|
const SERIALIZED_BLOCK_SIZE: usize = BLOCK_BITVEC_SIZE + BLOCK_OFFSET_SIZE;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn count_ones(bitvec: u64, pos_in_bitvec: u32) -> u32 {
|
||||||
|
if pos_in_bitvec == 63 {
|
||||||
|
bitvec.count_ones()
|
||||||
|
} else {
|
||||||
|
let mask = (1u64 << (pos_in_bitvec + 1)) - 1;
|
||||||
|
let masked_bitvec = bitvec & mask;
|
||||||
|
masked_bitvec.count_ones()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
struct DenseIndexBlock {
|
||||||
|
bitvec: u64,
|
||||||
|
offset: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<[u8; SERIALIZED_BLOCK_SIZE]> for DenseIndexBlock {
|
||||||
|
fn from(data: [u8; SERIALIZED_BLOCK_SIZE]) -> Self {
|
||||||
|
let bitvec = u64::from_le_bytes(data[..BLOCK_BITVEC_SIZE].try_into().unwrap());
|
||||||
|
let offset = u32::from_le_bytes(data[BLOCK_BITVEC_SIZE..].try_into().unwrap());
|
||||||
|
Self { bitvec, offset }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DenseCodec {
|
||||||
|
/// Open the DenseCodec from OwnedBytes
|
||||||
|
pub fn open(data: OwnedBytes) -> Self {
|
||||||
|
Self { data }
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
/// Check if value at position is not null.
|
||||||
|
pub fn exists(&self, idx: u32) -> bool {
|
||||||
|
let block_pos = idx / ELEMENTS_PER_BLOCK;
|
||||||
|
let bitvec = self.dense_index_block(block_pos).bitvec;
|
||||||
|
|
||||||
|
let pos_in_bitvec = idx % ELEMENTS_PER_BLOCK;
|
||||||
|
|
||||||
|
get_bit_at(bitvec, pos_in_bitvec)
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
fn dense_index_block(&self, block_pos: u32) -> DenseIndexBlock {
|
||||||
|
dense_index_block(&self.data, block_pos)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the number of non-null values in an index
|
||||||
|
pub fn num_non_null_vals(&self) -> u32 {
|
||||||
|
let last_block = (self.data.len() / SERIALIZED_BLOCK_SIZE) - 1;
|
||||||
|
self.dense_index_block(last_block as u32).offset
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
/// Translate from the original index to the codec index.
|
||||||
|
pub fn translate_to_codec_idx(&self, idx: u32) -> Option<u32> {
|
||||||
|
let block_pos = idx / ELEMENTS_PER_BLOCK;
|
||||||
|
let index_block = self.dense_index_block(block_pos);
|
||||||
|
let pos_in_block_bit_vec = idx % ELEMENTS_PER_BLOCK;
|
||||||
|
let ones_in_block = count_ones(index_block.bitvec, pos_in_block_bit_vec);
|
||||||
|
if get_bit_at(index_block.bitvec, pos_in_block_bit_vec) {
|
||||||
|
// -1 is ok, since idx does exist, so there's at least one
|
||||||
|
Some(index_block.offset + ones_in_block - 1)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Translate positions from the codec index to the original index.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// May panic if any `idx` is greater than the column length.
|
||||||
|
pub fn translate_codec_idx_to_original_idx<'a>(
|
||||||
|
&'a self,
|
||||||
|
iter: impl Iterator<Item = u32> + 'a,
|
||||||
|
) -> impl Iterator<Item = u32> + 'a {
|
||||||
|
let mut block_pos = 0u32;
|
||||||
|
iter.map(move |dense_idx| {
|
||||||
|
// update block_pos to limit search scope
|
||||||
|
block_pos = find_block(dense_idx, block_pos, &self.data);
|
||||||
|
let index_block = self.dense_index_block(block_pos);
|
||||||
|
|
||||||
|
// The next offset is higher than dense_idx and therefore:
|
||||||
|
// dense_idx <= offset + num_set_bits in block
|
||||||
|
let mut num_set_bits = 0;
|
||||||
|
for idx_in_bitvec in 0..ELEMENTS_PER_BLOCK {
|
||||||
|
if get_bit_at(index_block.bitvec, idx_in_bitvec) {
|
||||||
|
num_set_bits += 1;
|
||||||
|
}
|
||||||
|
if num_set_bits == (dense_idx - index_block.offset + 1) {
|
||||||
|
let orig_idx = block_pos * ELEMENTS_PER_BLOCK + idx_in_bitvec as u32;
|
||||||
|
return orig_idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
panic!("Internal Error: Offset calculation in dense idx seems to be wrong.");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn dense_index_block(data: &[u8], block_pos: u32) -> DenseIndexBlock {
|
||||||
|
let data_start_pos = block_pos as usize * SERIALIZED_BLOCK_SIZE;
|
||||||
|
let block_data: [u8; SERIALIZED_BLOCK_SIZE] = data[data_start_pos..][..SERIALIZED_BLOCK_SIZE]
|
||||||
|
.try_into()
|
||||||
|
.unwrap();
|
||||||
|
block_data.into()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
/// Finds the block position containing the dense_idx.
|
||||||
|
///
|
||||||
|
/// # Correctness
|
||||||
|
/// dense_idx needs to be smaller than the number of values in the index
|
||||||
|
///
|
||||||
|
/// The last offset number is equal to the number of values in the index.
|
||||||
|
fn find_block(dense_idx: u32, mut block_pos: u32, data: &[u8]) -> u32 {
|
||||||
|
loop {
|
||||||
|
let offset = dense_index_block(data, block_pos).offset;
|
||||||
|
if offset > dense_idx {
|
||||||
|
return block_pos - 1;
|
||||||
|
}
|
||||||
|
block_pos += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterator over all values, true if set, otherwise false
|
||||||
|
pub fn serialize_dense_codec(
|
||||||
|
iter: impl Iterator<Item = bool>,
|
||||||
|
mut out: impl Write,
|
||||||
|
) -> io::Result<()> {
|
||||||
|
let mut offset: u32 = 0;
|
||||||
|
|
||||||
|
for chunk in &iter.chunks(ELEMENTS_PER_BLOCK as usize) {
|
||||||
|
let mut block: u64 = 0;
|
||||||
|
for (pos, is_bit_set) in chunk.enumerate() {
|
||||||
|
if is_bit_set {
|
||||||
|
set_bit_at(&mut block, pos as u64);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
block.serialize(&mut out)?;
|
||||||
|
offset.serialize(&mut out)?;
|
||||||
|
|
||||||
|
offset += block.count_ones() as u32;
|
||||||
|
}
|
||||||
|
// Add sentinal block for the offset
|
||||||
|
let block: u64 = 0;
|
||||||
|
block.serialize(&mut out)?;
|
||||||
|
offset.serialize(&mut out)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use proptest::prelude::{any, prop, *};
|
||||||
|
use proptest::strategy::Strategy;
|
||||||
|
use proptest::{prop_oneof, proptest};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn random_bitvec() -> BoxedStrategy<Vec<bool>> {
|
||||||
|
prop_oneof![
|
||||||
|
1 => prop::collection::vec(proptest::bool::weighted(1.0), 0..100),
|
||||||
|
1 => prop::collection::vec(proptest::bool::weighted(1.0), 0..64),
|
||||||
|
1 => prop::collection::vec(proptest::bool::weighted(0.0), 0..100),
|
||||||
|
1 => prop::collection::vec(proptest::bool::weighted(0.0), 0..64),
|
||||||
|
8 => vec![any::<bool>()],
|
||||||
|
2 => prop::collection::vec(any::<bool>(), 0..50),
|
||||||
|
]
|
||||||
|
.boxed()
|
||||||
|
}
|
||||||
|
|
||||||
|
proptest! {
|
||||||
|
#![proptest_config(ProptestConfig::with_cases(500))]
|
||||||
|
#[test]
|
||||||
|
fn test_with_random_bitvecs(bitvec1 in random_bitvec(), bitvec2 in random_bitvec(), bitvec3 in random_bitvec()) {
|
||||||
|
let mut bitvec = Vec::new();
|
||||||
|
bitvec.extend_from_slice(&bitvec1);
|
||||||
|
bitvec.extend_from_slice(&bitvec2);
|
||||||
|
bitvec.extend_from_slice(&bitvec3);
|
||||||
|
test_null_index(bitvec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dense_codec_test_one_block_false() {
|
||||||
|
let mut iter = vec![false; 64];
|
||||||
|
iter.push(true);
|
||||||
|
test_null_index(iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_null_index(data: Vec<bool>) {
|
||||||
|
let mut out = vec![];
|
||||||
|
|
||||||
|
serialize_dense_codec(data.iter().cloned(), &mut out).unwrap();
|
||||||
|
let null_index = DenseCodec::open(OwnedBytes::new(out));
|
||||||
|
|
||||||
|
let orig_idx_with_value: Vec<u32> = data
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.filter(|(_pos, val)| **val)
|
||||||
|
.map(|(pos, _val)| pos as u32)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
null_index
|
||||||
|
.translate_codec_idx_to_original_idx(0..orig_idx_with_value.len() as u32)
|
||||||
|
.collect_vec(),
|
||||||
|
orig_idx_with_value
|
||||||
|
);
|
||||||
|
|
||||||
|
for (dense_idx, orig_idx) in orig_idx_with_value.iter().enumerate() {
|
||||||
|
assert_eq!(
|
||||||
|
null_index.translate_to_codec_idx(*orig_idx),
|
||||||
|
Some(dense_idx as u32)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (pos, value) in data.iter().enumerate() {
|
||||||
|
assert_eq!(null_index.exists(pos as u32), *value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dense_codec_test_translation() {
|
||||||
|
let mut out = vec![];
|
||||||
|
|
||||||
|
let iter = ([true, false, true, false]).iter().cloned();
|
||||||
|
serialize_dense_codec(iter, &mut out).unwrap();
|
||||||
|
let null_index = DenseCodec::open(OwnedBytes::new(out));
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
null_index
|
||||||
|
.translate_codec_idx_to_original_idx(0..2)
|
||||||
|
.collect_vec(),
|
||||||
|
vec![0, 2]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dense_codec_translate() {
|
||||||
|
let mut out = vec![];
|
||||||
|
|
||||||
|
let iter = ([true, false, true, false]).iter().cloned();
|
||||||
|
serialize_dense_codec(iter, &mut out).unwrap();
|
||||||
|
let null_index = DenseCodec::open(OwnedBytes::new(out));
|
||||||
|
assert_eq!(null_index.translate_to_codec_idx(0), Some(0));
|
||||||
|
assert_eq!(null_index.translate_to_codec_idx(2), Some(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dense_codec_test_small() {
|
||||||
|
let mut out = vec![];
|
||||||
|
|
||||||
|
let iter = ([true, false, true, false]).iter().cloned();
|
||||||
|
serialize_dense_codec(iter, &mut out).unwrap();
|
||||||
|
let null_index = DenseCodec::open(OwnedBytes::new(out));
|
||||||
|
assert!(null_index.exists(0));
|
||||||
|
assert!(!null_index.exists(1));
|
||||||
|
assert!(null_index.exists(2));
|
||||||
|
assert!(!null_index.exists(3));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dense_codec_test_large() {
|
||||||
|
let mut docs = vec![];
|
||||||
|
docs.extend((0..1000).map(|_idx| false));
|
||||||
|
docs.extend((0..=1000).map(|_idx| true));
|
||||||
|
|
||||||
|
let iter = docs.iter().cloned();
|
||||||
|
let mut out = vec![];
|
||||||
|
serialize_dense_codec(iter, &mut out).unwrap();
|
||||||
|
let null_index = DenseCodec::open(OwnedBytes::new(out));
|
||||||
|
assert!(!null_index.exists(0));
|
||||||
|
assert!(!null_index.exists(100));
|
||||||
|
assert!(!null_index.exists(999));
|
||||||
|
assert!(null_index.exists(1000));
|
||||||
|
assert!(null_index.exists(1999));
|
||||||
|
assert!(null_index.exists(2000));
|
||||||
|
assert!(!null_index.exists(2001));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_count_ones() {
|
||||||
|
let mut block = 0;
|
||||||
|
set_bit_at(&mut block, 0);
|
||||||
|
set_bit_at(&mut block, 2);
|
||||||
|
|
||||||
|
assert_eq!(count_ones(block, 0), 1);
|
||||||
|
assert_eq!(count_ones(block, 1), 1);
|
||||||
|
assert_eq!(count_ones(block, 2), 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(test, feature = "unstable"))]
|
||||||
|
mod bench {
|
||||||
|
|
||||||
|
use rand::rngs::StdRng;
|
||||||
|
use rand::{Rng, SeedableRng};
|
||||||
|
use test::Bencher;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
const TOTAL_NUM_VALUES: u32 = 1_000_000;
|
||||||
|
fn gen_bools(fill_ratio: f64) -> DenseCodec {
|
||||||
|
let mut out = Vec::new();
|
||||||
|
let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
|
||||||
|
let bools: Vec<_> = (0..TOTAL_NUM_VALUES)
|
||||||
|
.map(|_| rng.gen_bool(fill_ratio))
|
||||||
|
.collect();
|
||||||
|
serialize_dense_codec(bools.into_iter(), &mut out).unwrap();
|
||||||
|
|
||||||
|
let codec = DenseCodec::open(OwnedBytes::new(out));
|
||||||
|
codec
|
||||||
|
}
|
||||||
|
|
||||||
|
fn random_range_iterator(start: u32, end: u32, step_size: u32) -> impl Iterator<Item = u32> {
|
||||||
|
let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
|
||||||
|
let mut current = start;
|
||||||
|
std::iter::from_fn(move || {
|
||||||
|
current += rng.gen_range(1..step_size + 1);
|
||||||
|
if current >= end {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(current)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn walk_over_data(codec: &DenseCodec, max_step_size: u32) -> Option<u32> {
|
||||||
|
walk_over_data_from_positions(
|
||||||
|
codec,
|
||||||
|
random_range_iterator(0, TOTAL_NUM_VALUES, max_step_size),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn walk_over_data_from_positions(
|
||||||
|
codec: &DenseCodec,
|
||||||
|
positions: impl Iterator<Item = u32>,
|
||||||
|
) -> Option<u32> {
|
||||||
|
let mut dense_idx: Option<u32> = None;
|
||||||
|
for idx in positions {
|
||||||
|
dense_idx = dense_idx.or(codec.translate_to_codec_idx(idx));
|
||||||
|
}
|
||||||
|
dense_idx
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_dense_codec_translate_orig_to_dense_90percent_filled_random_stride(
|
||||||
|
bench: &mut Bencher,
|
||||||
|
) {
|
||||||
|
let codec = gen_bools(0.9f64);
|
||||||
|
bench.iter(|| walk_over_data(&codec, 100));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_dense_codec_translate_orig_to_dense_50percent_filled_random_stride(
|
||||||
|
bench: &mut Bencher,
|
||||||
|
) {
|
||||||
|
let codec = gen_bools(0.5f64);
|
||||||
|
bench.iter(|| walk_over_data(&codec, 100));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_dense_codec_translate_orig_to_dense_full_scan_10percent(bench: &mut Bencher) {
|
||||||
|
let codec = gen_bools(0.1f64);
|
||||||
|
bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_dense_codec_translate_orig_to_dense_full_scan_90percent(bench: &mut Bencher) {
|
||||||
|
let codec = gen_bools(0.9f64);
|
||||||
|
bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_dense_codec_translate_orig_to_dense_10percent_filled_random_stride(
|
||||||
|
bench: &mut Bencher,
|
||||||
|
) {
|
||||||
|
let codec = gen_bools(0.1f64);
|
||||||
|
bench.iter(|| walk_over_data(&codec, 100));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_dense_codec_translate_dense_to_orig_90percent_filled_random_stride_big_step(
|
||||||
|
bench: &mut Bencher,
|
||||||
|
) {
|
||||||
|
let codec = gen_bools(0.9f64);
|
||||||
|
let num_vals = codec.num_non_null_vals();
|
||||||
|
bench.iter(|| {
|
||||||
|
codec
|
||||||
|
.translate_codec_idx_to_original_idx(random_range_iterator(0, num_vals, 50_000))
|
||||||
|
.last()
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_dense_codec_translate_dense_to_orig_90percent_filled_random_stride(
|
||||||
|
bench: &mut Bencher,
|
||||||
|
) {
|
||||||
|
let codec = gen_bools(0.9f64);
|
||||||
|
let num_vals = codec.num_non_null_vals();
|
||||||
|
bench.iter(|| {
|
||||||
|
codec
|
||||||
|
.translate_codec_idx_to_original_idx(random_range_iterator(0, num_vals, 100))
|
||||||
|
.last()
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_dense_codec_translate_dense_to_orig_90percent_filled_full_scan(bench: &mut Bencher) {
|
||||||
|
let codec = gen_bools(0.9f64);
|
||||||
|
let num_vals = codec.num_non_null_vals();
|
||||||
|
bench.iter(|| {
|
||||||
|
codec
|
||||||
|
.translate_codec_idx_to_original_idx(0..num_vals)
|
||||||
|
.last()
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
13
fastfield_codecs/src/null_index/mod.rs
Normal file
13
fastfield_codecs/src/null_index/mod.rs
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
pub use dense::{serialize_dense_codec, DenseCodec};
|
||||||
|
|
||||||
|
mod dense;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn get_bit_at(input: u64, n: u32) -> bool {
|
||||||
|
input & (1 << n) != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn set_bit_at(input: &mut u64, n: u64) {
|
||||||
|
*input |= 1 << n;
|
||||||
|
}
|
||||||
144
fastfield_codecs/src/null_index_footer.rs
Normal file
144
fastfield_codecs/src/null_index_footer.rs
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
use std::io::{self, Write};
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
use common::{BinarySerializable, CountingWriter, VInt};
|
||||||
|
use ownedbytes::OwnedBytes;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
|
||||||
|
pub(crate) enum FastFieldCardinality {
|
||||||
|
Single = 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BinarySerializable for FastFieldCardinality {
|
||||||
|
fn serialize<W: Write>(&self, wrt: &mut W) -> io::Result<()> {
|
||||||
|
self.to_code().serialize(wrt)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
|
||||||
|
let code = u8::deserialize(reader)?;
|
||||||
|
let codec_type: Self = Self::from_code(code)
|
||||||
|
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Unknown code `{code}.`"))?;
|
||||||
|
Ok(codec_type)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FastFieldCardinality {
|
||||||
|
pub(crate) fn to_code(self) -> u8 {
|
||||||
|
self as u8
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn from_code(code: u8) -> Option<Self> {
|
||||||
|
match code {
|
||||||
|
1 => Some(Self::Single),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub(crate) enum NullIndexCodec {
|
||||||
|
Full = 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BinarySerializable for NullIndexCodec {
|
||||||
|
fn serialize<W: Write>(&self, wrt: &mut W) -> io::Result<()> {
|
||||||
|
self.to_code().serialize(wrt)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
|
||||||
|
let code = u8::deserialize(reader)?;
|
||||||
|
let codec_type: Self = Self::from_code(code)
|
||||||
|
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Unknown code `{code}.`"))?;
|
||||||
|
Ok(codec_type)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NullIndexCodec {
|
||||||
|
pub(crate) fn to_code(self) -> u8 {
|
||||||
|
self as u8
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn from_code(code: u8) -> Option<Self> {
|
||||||
|
match code {
|
||||||
|
1 => Some(Self::Full),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||||
|
pub(crate) struct NullIndexFooter {
|
||||||
|
pub(crate) cardinality: FastFieldCardinality,
|
||||||
|
pub(crate) null_index_codec: NullIndexCodec,
|
||||||
|
// Unused for NullIndexCodec::Full
|
||||||
|
pub(crate) null_index_byte_range: Range<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BinarySerializable for NullIndexFooter {
|
||||||
|
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||||
|
self.cardinality.serialize(writer)?;
|
||||||
|
self.null_index_codec.serialize(writer)?;
|
||||||
|
VInt(self.null_index_byte_range.start).serialize(writer)?;
|
||||||
|
VInt(self.null_index_byte_range.end - self.null_index_byte_range.start)
|
||||||
|
.serialize(writer)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
|
||||||
|
let cardinality = FastFieldCardinality::deserialize(reader)?;
|
||||||
|
let null_index_codec = NullIndexCodec::deserialize(reader)?;
|
||||||
|
let null_index_byte_range_start = VInt::deserialize(reader)?.0;
|
||||||
|
let null_index_byte_range_end = VInt::deserialize(reader)?.0 + null_index_byte_range_start;
|
||||||
|
Ok(Self {
|
||||||
|
cardinality,
|
||||||
|
null_index_codec,
|
||||||
|
null_index_byte_range: null_index_byte_range_start..null_index_byte_range_end,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn append_null_index_footer(
|
||||||
|
output: &mut impl io::Write,
|
||||||
|
null_index_footer: NullIndexFooter,
|
||||||
|
) -> io::Result<()> {
|
||||||
|
let mut counting_write = CountingWriter::wrap(output);
|
||||||
|
null_index_footer.serialize(&mut counting_write)?;
|
||||||
|
let footer_payload_len = counting_write.written_bytes();
|
||||||
|
BinarySerializable::serialize(&(footer_payload_len as u16), &mut counting_write)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn read_null_index_footer(
|
||||||
|
data: OwnedBytes,
|
||||||
|
) -> io::Result<(OwnedBytes, NullIndexFooter)> {
|
||||||
|
let (data, null_footer_length_bytes) = data.rsplit(2);
|
||||||
|
|
||||||
|
let footer_length = u16::deserialize(&mut null_footer_length_bytes.as_slice())?;
|
||||||
|
let (data, null_index_footer_bytes) = data.rsplit(footer_length as usize);
|
||||||
|
let null_index_footer = NullIndexFooter::deserialize(&mut null_index_footer_bytes.as_ref())?;
|
||||||
|
|
||||||
|
Ok((data, null_index_footer))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn null_index_footer_deser_test() {
|
||||||
|
let null_index_footer = NullIndexFooter {
|
||||||
|
cardinality: FastFieldCardinality::Single,
|
||||||
|
null_index_codec: NullIndexCodec::Full,
|
||||||
|
null_index_byte_range: 100..120,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut out = vec![];
|
||||||
|
null_index_footer.serialize(&mut out).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
null_index_footer,
|
||||||
|
NullIndexFooter::deserialize(&mut &out[..]).unwrap()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,119 +0,0 @@
|
|||||||
use std::ops::{Range, RangeInclusive};
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use crate::Column;
|
|
||||||
|
|
||||||
/// `OptionalColumn` provides columnar access on a field.
|
|
||||||
pub trait OptionalColumn<T: PartialOrd = u64>: Send + Sync {
|
|
||||||
/// Return the value associated with the given idx.
|
|
||||||
///
|
|
||||||
/// This accessor should return as fast as possible.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// May panic if `idx` is greater than the column length.
|
|
||||||
fn get_val(&self, idx: u32) -> Option<T>;
|
|
||||||
|
|
||||||
/// Fills an output buffer with the fast field values
|
|
||||||
/// associated with the `DocId` going from
|
|
||||||
/// `start` to `start + output.len()`.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// Must panic if `start + output.len()` is greater than
|
|
||||||
/// the segment's `maxdoc`.
|
|
||||||
fn get_range(&self, start: u64, output: &mut [Option<T>]) {
|
|
||||||
for (out, idx) in output.iter_mut().zip(start..) {
|
|
||||||
*out = self.get_val(idx as u32);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the positions of values which are in the provided range.
|
|
||||||
fn get_docids_for_value_range(
|
|
||||||
&self,
|
|
||||||
value_range: RangeInclusive<T>,
|
|
||||||
doc_id_range: Range<u32>,
|
|
||||||
positions: &mut Vec<u32>,
|
|
||||||
) {
|
|
||||||
let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals());
|
|
||||||
|
|
||||||
for idx in doc_id_range {
|
|
||||||
let val = self.get_val(idx);
|
|
||||||
if let Some(val) = val {
|
|
||||||
if value_range.contains(&val) {
|
|
||||||
positions.push(idx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the minimum value for this fast field.
|
|
||||||
///
|
|
||||||
/// This min_value may not be exact.
|
|
||||||
/// For instance, the min value does not take in account of possible
|
|
||||||
/// deleted document. All values are however guaranteed to be higher than
|
|
||||||
/// `.min_value()`.
|
|
||||||
fn min_value(&self) -> Option<T>;
|
|
||||||
|
|
||||||
/// Returns the maximum value for this fast field.
|
|
||||||
///
|
|
||||||
/// This max_value may not be exact.
|
|
||||||
/// For instance, the max value does not take in account of possible
|
|
||||||
/// deleted document. All values are however guaranteed to be higher than
|
|
||||||
/// `.max_value()`.
|
|
||||||
fn max_value(&self) -> Option<T>;
|
|
||||||
|
|
||||||
/// The number of values including `None` in the column.
|
|
||||||
fn num_vals(&self) -> u32;
|
|
||||||
|
|
||||||
/// Returns a iterator over the data
|
|
||||||
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Option<T>> + 'a> {
|
|
||||||
Box::new((0..self.num_vals()).map(|idx| self.get_val(idx)))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// return full column if all values are set and is not empty
|
|
||||||
fn to_full(&self) -> Option<Arc<dyn Column<T>>> {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Temporary wrapper to migrate to optional column
|
|
||||||
pub(crate) struct ToOptionalColumn<T> {
|
|
||||||
column: Arc<dyn Column<T>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: PartialOrd> ToOptionalColumn<T> {
|
|
||||||
pub(crate) fn new(column: Arc<dyn Column<T>>) -> Self {
|
|
||||||
Self { column }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: PartialOrd> OptionalColumn<T> for ToOptionalColumn<T> {
|
|
||||||
#[inline]
|
|
||||||
fn get_val(&self, idx: u32) -> Option<T> {
|
|
||||||
let val = self.column.get_val(idx);
|
|
||||||
Some(val)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn min_value(&self) -> Option<T> {
|
|
||||||
let min_value = self.column.min_value();
|
|
||||||
Some(min_value)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn max_value(&self) -> Option<T> {
|
|
||||||
let max_value = self.column.max_value();
|
|
||||||
Some(max_value)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn num_vals(&self) -> u32 {
|
|
||||||
self.column.num_vals()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn iter(&self) -> Box<dyn Iterator<Item = Option<T>> + '_> {
|
|
||||||
Box::new(self.column.iter().map(|el| Some(el)))
|
|
||||||
}
|
|
||||||
/// return full column if all values are set and is not empty
|
|
||||||
fn to_full(&self) -> Option<Arc<dyn Column<T>>> {
|
|
||||||
Some(self.column.clone())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -28,11 +28,15 @@ use ownedbytes::OwnedBytes;
|
|||||||
use crate::bitpacked::BitpackedCodec;
|
use crate::bitpacked::BitpackedCodec;
|
||||||
use crate::blockwise_linear::BlockwiseLinearCodec;
|
use crate::blockwise_linear::BlockwiseLinearCodec;
|
||||||
use crate::compact_space::CompactSpaceCompressor;
|
use crate::compact_space::CompactSpaceCompressor;
|
||||||
|
use crate::format_version::append_format_version;
|
||||||
use crate::linear::LinearCodec;
|
use crate::linear::LinearCodec;
|
||||||
use crate::monotonic_mapping::{
|
use crate::monotonic_mapping::{
|
||||||
StrictlyMonotonicFn, StrictlyMonotonicMappingToInternal,
|
StrictlyMonotonicFn, StrictlyMonotonicMappingToInternal,
|
||||||
StrictlyMonotonicMappingToInternalGCDBaseval,
|
StrictlyMonotonicMappingToInternalGCDBaseval,
|
||||||
};
|
};
|
||||||
|
use crate::null_index_footer::{
|
||||||
|
append_null_index_footer, FastFieldCardinality, NullIndexCodec, NullIndexFooter,
|
||||||
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
monotonic_map_column, Column, FastFieldCodec, FastFieldCodecType, MonotonicallyMappableToU64,
|
monotonic_map_column, Column, FastFieldCodec, FastFieldCodecType, MonotonicallyMappableToU64,
|
||||||
U128FastFieldCodecType, VecColumn, ALL_CODEC_TYPES,
|
U128FastFieldCodecType, VecColumn, ALL_CODEC_TYPES,
|
||||||
@@ -198,6 +202,14 @@ pub fn serialize_u128<F: Fn() -> I, I: Iterator<Item = u128>>(
|
|||||||
let compressor = CompactSpaceCompressor::train_from(iter_gen(), num_vals);
|
let compressor = CompactSpaceCompressor::train_from(iter_gen(), num_vals);
|
||||||
compressor.compress_into(iter_gen(), output).unwrap();
|
compressor.compress_into(iter_gen(), output).unwrap();
|
||||||
|
|
||||||
|
let null_index_footer = NullIndexFooter {
|
||||||
|
cardinality: FastFieldCardinality::Single,
|
||||||
|
null_index_codec: NullIndexCodec::Full,
|
||||||
|
null_index_byte_range: 0..0,
|
||||||
|
};
|
||||||
|
append_null_index_footer(output, null_index_footer)?;
|
||||||
|
append_format_version(output)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -221,6 +233,15 @@ pub fn serialize<T: MonotonicallyMappableToU64>(
|
|||||||
let normalized_column = header.normalize_column(column);
|
let normalized_column = header.normalize_column(column);
|
||||||
assert_eq!(normalized_column.min_value(), 0u64);
|
assert_eq!(normalized_column.min_value(), 0u64);
|
||||||
serialize_given_codec(normalized_column, header.codec_type, output)?;
|
serialize_given_codec(normalized_column, header.codec_type, output)?;
|
||||||
|
|
||||||
|
let null_index_footer = NullIndexFooter {
|
||||||
|
cardinality: FastFieldCardinality::Single,
|
||||||
|
null_index_codec: NullIndexCodec::Full,
|
||||||
|
null_index_byte_range: 0..0,
|
||||||
|
};
|
||||||
|
append_null_index_footer(output, null_index_footer)?;
|
||||||
|
append_format_version(output)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -278,10 +299,7 @@ pub fn serialize_and_load<T: MonotonicallyMappableToU64 + Ord + Default>(
|
|||||||
) -> Arc<dyn Column<T>> {
|
) -> Arc<dyn Column<T>> {
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
super::serialize(VecColumn::from(&column), &mut buffer, &ALL_CODEC_TYPES).unwrap();
|
super::serialize(VecColumn::from(&column), &mut buffer, &ALL_CODEC_TYPES).unwrap();
|
||||||
super::open(OwnedBytes::new(buffer))
|
super::open(OwnedBytes::new(buffer)).unwrap()
|
||||||
.unwrap()
|
|
||||||
.to_full()
|
|
||||||
.unwrap()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -313,7 +331,7 @@ mod tests {
|
|||||||
let col = VecColumn::from(&[false, true][..]);
|
let col = VecColumn::from(&[false, true][..]);
|
||||||
serialize(col, &mut buffer, &ALL_CODEC_TYPES).unwrap();
|
serialize(col, &mut buffer, &ALL_CODEC_TYPES).unwrap();
|
||||||
// 5 bytes of header, 1 byte of value, 7 bytes of padding.
|
// 5 bytes of header, 1 byte of value, 7 bytes of padding.
|
||||||
assert_eq!(buffer.len(), 5 + 8);
|
assert_eq!(buffer.len(), 3 + 5 + 8 + 4 + 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -322,7 +340,7 @@ mod tests {
|
|||||||
let col = VecColumn::from(&[true][..]);
|
let col = VecColumn::from(&[true][..]);
|
||||||
serialize(col, &mut buffer, &ALL_CODEC_TYPES).unwrap();
|
serialize(col, &mut buffer, &ALL_CODEC_TYPES).unwrap();
|
||||||
// 5 bytes of header, 0 bytes of value, 7 bytes of padding.
|
// 5 bytes of header, 0 bytes of value, 7 bytes of padding.
|
||||||
assert_eq!(buffer.len(), 5 + 7);
|
assert_eq!(buffer.len(), 3 + 5 + 7 + 4 + 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -332,6 +350,6 @@ mod tests {
|
|||||||
let col = VecColumn::from(&vals[..]);
|
let col = VecColumn::from(&vals[..]);
|
||||||
serialize(col, &mut buffer, &[FastFieldCodecType::Bitpacked]).unwrap();
|
serialize(col, &mut buffer, &[FastFieldCodecType::Bitpacked]).unwrap();
|
||||||
// Values are stored over 3 bits.
|
// Values are stored over 3 bits.
|
||||||
assert_eq!(buffer.len(), 7 + (3 * 80 / 8) + 7);
|
assert_eq!(buffer.len(), 3 + 7 + (3 * 80 / 8) + 7 + 4 + 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,14 @@
|
|||||||
[package]
|
[package]
|
||||||
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
||||||
name = "ownedbytes"
|
name = "ownedbytes"
|
||||||
version = "0.3.0"
|
version = "0.4.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "Expose data as static slice"
|
description = "Expose data as static slice"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
documentation = "https://docs.rs/ownedbytes/"
|
||||||
|
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||||
|
repository = "https://github.com/quickwit-oss/tantivy"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@@ -80,6 +80,21 @@ impl OwnedBytes {
|
|||||||
(left, right)
|
(left, right)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Splits the OwnedBytes into two OwnedBytes `(left, right)`.
|
||||||
|
///
|
||||||
|
/// Right will hold `split_len` bytes.
|
||||||
|
///
|
||||||
|
/// This operation is cheap and does not require to copy any memory.
|
||||||
|
/// On the other hand, both `left` and `right` retain a handle over
|
||||||
|
/// the entire slice of memory. In other words, the memory will only
|
||||||
|
/// be released when both left and right are dropped.
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
pub fn rsplit(self, split_len: usize) -> (OwnedBytes, OwnedBytes) {
|
||||||
|
let data_len = self.data.len();
|
||||||
|
self.split(data_len - split_len)
|
||||||
|
}
|
||||||
|
|
||||||
/// Splits the right part of the `OwnedBytes` at the given offset.
|
/// Splits the right part of the `OwnedBytes` at the given offset.
|
||||||
///
|
///
|
||||||
/// `self` is truncated to `split_len`, left with the remaining bytes.
|
/// `self` is truncated to `split_len`, left with the remaining bytes.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-query-grammar"
|
name = "tantivy-query-grammar"
|
||||||
version = "0.18.0"
|
version = "0.19.0"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = ["database-implementations", "data-structures"]
|
categories = ["database-implementations", "data-structures"]
|
||||||
|
|||||||
@@ -4,14 +4,14 @@ use std::rc::Rc;
|
|||||||
use std::sync::atomic::AtomicU32;
|
use std::sync::atomic::AtomicU32;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use fastfield_codecs::OptionalColumn;
|
use fastfield_codecs::Column;
|
||||||
|
|
||||||
use super::agg_req::{Aggregation, Aggregations, BucketAggregationType, MetricAggregation};
|
use super::agg_req::{Aggregation, Aggregations, BucketAggregationType, MetricAggregation};
|
||||||
use super::bucket::{HistogramAggregation, RangeAggregation, TermsAggregation};
|
use super::bucket::{HistogramAggregation, RangeAggregation, TermsAggregation};
|
||||||
use super::metric::{AverageAggregation, StatsAggregation};
|
use super::metric::{AverageAggregation, StatsAggregation};
|
||||||
use super::segment_agg_result::BucketCount;
|
use super::segment_agg_result::BucketCount;
|
||||||
use super::VecWithNames;
|
use super::VecWithNames;
|
||||||
use crate::fastfield::{type_and_cardinality, FastType, MultiValuedFastFieldReader};
|
use crate::fastfield::{type_and_cardinality, MultiValuedFastFieldReader};
|
||||||
use crate::schema::{Cardinality, Type};
|
use crate::schema::{Cardinality, Type};
|
||||||
use crate::{InvertedIndexReader, SegmentReader, TantivyError};
|
use crate::{InvertedIndexReader, SegmentReader, TantivyError};
|
||||||
|
|
||||||
@@ -37,16 +37,16 @@ impl AggregationsWithAccessor {
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub(crate) enum FastFieldAccessor {
|
pub(crate) enum FastFieldAccessor {
|
||||||
Multi(MultiValuedFastFieldReader<u64>),
|
Multi(MultiValuedFastFieldReader<u64>),
|
||||||
Single(Arc<dyn OptionalColumn<u64>>),
|
Single(Arc<dyn Column<u64>>),
|
||||||
}
|
}
|
||||||
impl FastFieldAccessor {
|
impl FastFieldAccessor {
|
||||||
pub fn as_single(&self) -> Option<&dyn OptionalColumn<u64>> {
|
pub fn as_single(&self) -> Option<&dyn Column<u64>> {
|
||||||
match self {
|
match self {
|
||||||
FastFieldAccessor::Multi(_) => None,
|
FastFieldAccessor::Multi(_) => None,
|
||||||
FastFieldAccessor::Single(reader) => Some(&**reader),
|
FastFieldAccessor::Single(reader) => Some(&**reader),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn into_single(self) -> Option<Arc<dyn OptionalColumn<u64>>> {
|
pub fn into_single(self) -> Option<Arc<dyn Column<u64>>> {
|
||||||
match self {
|
match self {
|
||||||
FastFieldAccessor::Multi(_) => None,
|
FastFieldAccessor::Multi(_) => None,
|
||||||
FastFieldAccessor::Single(reader) => Some(reader),
|
FastFieldAccessor::Single(reader) => Some(reader),
|
||||||
@@ -124,7 +124,7 @@ impl BucketAggregationWithAccessor {
|
|||||||
pub struct MetricAggregationWithAccessor {
|
pub struct MetricAggregationWithAccessor {
|
||||||
pub metric: MetricAggregation,
|
pub metric: MetricAggregation,
|
||||||
pub field_type: Type,
|
pub field_type: Type,
|
||||||
pub accessor: Arc<dyn OptionalColumn>,
|
pub accessor: Arc<dyn Column>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MetricAggregationWithAccessor {
|
impl MetricAggregationWithAccessor {
|
||||||
@@ -194,13 +194,7 @@ fn get_ff_reader_and_validate(
|
|||||||
.ok_or_else(|| TantivyError::FieldNotFound(field_name.to_string()))?;
|
.ok_or_else(|| TantivyError::FieldNotFound(field_name.to_string()))?;
|
||||||
let field_type = reader.schema().get_field_entry(field).field_type();
|
let field_type = reader.schema().get_field_entry(field).field_type();
|
||||||
|
|
||||||
if let Some((ff_type, field_cardinality)) = type_and_cardinality(field_type) {
|
if let Some((_ff_type, field_cardinality)) = type_and_cardinality(field_type) {
|
||||||
if ff_type == FastType::Date {
|
|
||||||
return Err(TantivyError::InvalidArgument(
|
|
||||||
"Unsupported field type date in aggregation".to_string(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
if cardinality != field_cardinality {
|
if cardinality != field_cardinality {
|
||||||
return Err(TantivyError::InvalidArgument(format!(
|
return Err(TantivyError::InvalidArgument(format!(
|
||||||
"Invalid field cardinality on field {} expected {:?}, but got {:?}",
|
"Invalid field cardinality on field {} expected {:?}, but got {:?}",
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ use super::bucket::GetDocCount;
|
|||||||
use super::intermediate_agg_result::{IntermediateBucketResult, IntermediateMetricResult};
|
use super::intermediate_agg_result::{IntermediateBucketResult, IntermediateMetricResult};
|
||||||
use super::metric::{SingleMetricResult, Stats};
|
use super::metric::{SingleMetricResult, Stats};
|
||||||
use super::Key;
|
use super::Key;
|
||||||
|
use crate::schema::Schema;
|
||||||
use crate::TantivyError;
|
use crate::TantivyError;
|
||||||
|
|
||||||
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
@@ -129,9 +130,12 @@ pub enum BucketResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl BucketResult {
|
impl BucketResult {
|
||||||
pub(crate) fn empty_from_req(req: &BucketAggregationInternal) -> crate::Result<Self> {
|
pub(crate) fn empty_from_req(
|
||||||
|
req: &BucketAggregationInternal,
|
||||||
|
schema: &Schema,
|
||||||
|
) -> crate::Result<Self> {
|
||||||
let empty_bucket = IntermediateBucketResult::empty_from_req(&req.bucket_agg);
|
let empty_bucket = IntermediateBucketResult::empty_from_req(&req.bucket_agg);
|
||||||
empty_bucket.into_final_bucket_result(req)
|
empty_bucket.into_final_bucket_result(req, schema)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -174,6 +178,9 @@ pub enum BucketEntries<T> {
|
|||||||
/// ```
|
/// ```
|
||||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct BucketEntry {
|
pub struct BucketEntry {
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
/// The string representation of the bucket.
|
||||||
|
pub key_as_string: Option<String>,
|
||||||
/// The identifier of the bucket.
|
/// The identifier of the bucket.
|
||||||
pub key: Key,
|
pub key: Key,
|
||||||
/// Number of documents in the bucket.
|
/// Number of documents in the bucket.
|
||||||
@@ -238,4 +245,10 @@ pub struct RangeBucketEntry {
|
|||||||
/// The to range of the bucket. Equals `f64::MAX` when `None`.
|
/// The to range of the bucket. Equals `f64::MAX` when `None`.
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub to: Option<f64>,
|
pub to: Option<f64>,
|
||||||
|
/// The optional string representation for the `from` range.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub from_as_string: Option<String>,
|
||||||
|
/// The optional string representation for the `to` range.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub to_as_string: Option<String>,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
|
|
||||||
use fastfield_codecs::OptionalColumn;
|
use fastfield_codecs::Column;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
@@ -10,12 +10,12 @@ use crate::aggregation::agg_req_with_accessor::{
|
|||||||
AggregationsWithAccessor, BucketAggregationWithAccessor,
|
AggregationsWithAccessor, BucketAggregationWithAccessor,
|
||||||
};
|
};
|
||||||
use crate::aggregation::agg_result::BucketEntry;
|
use crate::aggregation::agg_result::BucketEntry;
|
||||||
use crate::aggregation::f64_from_fastfield_u64;
|
|
||||||
use crate::aggregation::intermediate_agg_result::{
|
use crate::aggregation::intermediate_agg_result::{
|
||||||
IntermediateAggregationResults, IntermediateBucketResult, IntermediateHistogramBucketEntry,
|
IntermediateAggregationResults, IntermediateBucketResult, IntermediateHistogramBucketEntry,
|
||||||
};
|
};
|
||||||
use crate::aggregation::segment_agg_result::SegmentAggregationResultsCollector;
|
use crate::aggregation::segment_agg_result::SegmentAggregationResultsCollector;
|
||||||
use crate::schema::Type;
|
use crate::aggregation::{f64_from_fastfield_u64, format_date};
|
||||||
|
use crate::schema::{Schema, Type};
|
||||||
use crate::{DocId, TantivyError};
|
use crate::{DocId, TantivyError};
|
||||||
|
|
||||||
/// Histogram is a bucket aggregation, where buckets are created dynamically for given `interval`.
|
/// Histogram is a bucket aggregation, where buckets are created dynamically for given `interval`.
|
||||||
@@ -263,17 +263,13 @@ impl SegmentHistogramCollector {
|
|||||||
req: &HistogramAggregation,
|
req: &HistogramAggregation,
|
||||||
sub_aggregation: &AggregationsWithAccessor,
|
sub_aggregation: &AggregationsWithAccessor,
|
||||||
field_type: Type,
|
field_type: Type,
|
||||||
accessor: &dyn OptionalColumn<u64>,
|
accessor: &dyn Column<u64>,
|
||||||
) -> crate::Result<Self> {
|
) -> crate::Result<Self> {
|
||||||
req.validate()?;
|
req.validate()?;
|
||||||
let min_max_u64 = accessor.min_value().zip(accessor.max_value());
|
let min = f64_from_fastfield_u64(accessor.min_value(), &field_type);
|
||||||
let min_max_f64 = min_max_u64.map(|(min, max)| {
|
let max = f64_from_fastfield_u64(accessor.max_value(), &field_type);
|
||||||
let min = f64_from_fastfield_u64(min, &field_type);
|
|
||||||
let max = f64_from_fastfield_u64(max, &field_type);
|
|
||||||
(min, max)
|
|
||||||
});
|
|
||||||
|
|
||||||
let (min, max) = get_req_min_max(req, min_max_f64);
|
let (min, max) = get_req_min_max(req, Some((min, max)));
|
||||||
|
|
||||||
// We compute and generate the buckets range (min, max) based on the request and the min
|
// We compute and generate the buckets range (min, max) based on the request and the min
|
||||||
// max in the fast field, but this is likely not ideal when this is a subbucket, where many
|
// max in the fast field, but this is likely not ideal when this is a subbucket, where many
|
||||||
@@ -335,58 +331,47 @@ impl SegmentHistogramCollector {
|
|||||||
.expect("unexpected fast field cardinatility");
|
.expect("unexpected fast field cardinatility");
|
||||||
let mut iter = doc.chunks_exact(4);
|
let mut iter = doc.chunks_exact(4);
|
||||||
for docs in iter.by_ref() {
|
for docs in iter.by_ref() {
|
||||||
if let Some(val) = accessor.get_val(docs[0]) {
|
let val0 = self.f64_from_fastfield_u64(accessor.get_val(docs[0]));
|
||||||
let val = self.f64_from_fastfield_u64(val);
|
let val1 = self.f64_from_fastfield_u64(accessor.get_val(docs[1]));
|
||||||
let bucket_pos = get_bucket_num(val);
|
let val2 = self.f64_from_fastfield_u64(accessor.get_val(docs[2]));
|
||||||
self.increment_bucket_if_in_bounds(
|
let val3 = self.f64_from_fastfield_u64(accessor.get_val(docs[3]));
|
||||||
val,
|
|
||||||
&bounds,
|
|
||||||
bucket_pos,
|
|
||||||
docs[0],
|
|
||||||
&bucket_with_accessor.sub_aggregation,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(val) = accessor.get_val(docs[1]) {
|
let bucket_pos0 = get_bucket_num(val0);
|
||||||
let val = self.f64_from_fastfield_u64(val);
|
let bucket_pos1 = get_bucket_num(val1);
|
||||||
let bucket_pos = get_bucket_num(val);
|
let bucket_pos2 = get_bucket_num(val2);
|
||||||
self.increment_bucket_if_in_bounds(
|
let bucket_pos3 = get_bucket_num(val3);
|
||||||
val,
|
|
||||||
&bounds,
|
|
||||||
bucket_pos,
|
|
||||||
docs[1],
|
|
||||||
&bucket_with_accessor.sub_aggregation,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(val) = accessor.get_val(docs[2]) {
|
self.increment_bucket_if_in_bounds(
|
||||||
let val = self.f64_from_fastfield_u64(val);
|
val0,
|
||||||
let bucket_pos = get_bucket_num(val);
|
&bounds,
|
||||||
self.increment_bucket_if_in_bounds(
|
bucket_pos0,
|
||||||
val,
|
docs[0],
|
||||||
&bounds,
|
&bucket_with_accessor.sub_aggregation,
|
||||||
bucket_pos,
|
)?;
|
||||||
docs[2],
|
self.increment_bucket_if_in_bounds(
|
||||||
&bucket_with_accessor.sub_aggregation,
|
val1,
|
||||||
)?;
|
&bounds,
|
||||||
}
|
bucket_pos1,
|
||||||
|
docs[1],
|
||||||
if let Some(val) = accessor.get_val(docs[3]) {
|
&bucket_with_accessor.sub_aggregation,
|
||||||
let val = self.f64_from_fastfield_u64(val);
|
)?;
|
||||||
let bucket_pos = get_bucket_num(val);
|
self.increment_bucket_if_in_bounds(
|
||||||
self.increment_bucket_if_in_bounds(
|
val2,
|
||||||
val,
|
&bounds,
|
||||||
&bounds,
|
bucket_pos2,
|
||||||
bucket_pos,
|
docs[2],
|
||||||
docs[3],
|
&bucket_with_accessor.sub_aggregation,
|
||||||
&bucket_with_accessor.sub_aggregation,
|
)?;
|
||||||
)?;
|
self.increment_bucket_if_in_bounds(
|
||||||
}
|
val3,
|
||||||
|
&bounds,
|
||||||
|
bucket_pos3,
|
||||||
|
docs[3],
|
||||||
|
&bucket_with_accessor.sub_aggregation,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
for &doc in iter.remainder() {
|
for &doc in iter.remainder() {
|
||||||
let Some(val) = accessor.get_val(doc).map(|val|f64_from_fastfield_u64(val, &self.field_type)) else{
|
let val = f64_from_fastfield_u64(accessor.get_val(doc), &self.field_type);
|
||||||
continue;
|
|
||||||
};
|
|
||||||
if !bounds.contains(val) {
|
if !bounds.contains(val) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -466,6 +451,7 @@ fn intermediate_buckets_to_final_buckets_fill_gaps(
|
|||||||
buckets: Vec<IntermediateHistogramBucketEntry>,
|
buckets: Vec<IntermediateHistogramBucketEntry>,
|
||||||
histogram_req: &HistogramAggregation,
|
histogram_req: &HistogramAggregation,
|
||||||
sub_aggregation: &AggregationsInternal,
|
sub_aggregation: &AggregationsInternal,
|
||||||
|
schema: &Schema,
|
||||||
) -> crate::Result<Vec<BucketEntry>> {
|
) -> crate::Result<Vec<BucketEntry>> {
|
||||||
// Generate the full list of buckets without gaps.
|
// Generate the full list of buckets without gaps.
|
||||||
//
|
//
|
||||||
@@ -506,7 +492,9 @@ fn intermediate_buckets_to_final_buckets_fill_gaps(
|
|||||||
sub_aggregation: empty_sub_aggregation.clone(),
|
sub_aggregation: empty_sub_aggregation.clone(),
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
.map(|intermediate_bucket| intermediate_bucket.into_final_bucket_entry(sub_aggregation))
|
.map(|intermediate_bucket| {
|
||||||
|
intermediate_bucket.into_final_bucket_entry(sub_aggregation, schema)
|
||||||
|
})
|
||||||
.collect::<crate::Result<Vec<_>>>()
|
.collect::<crate::Result<Vec<_>>>()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -515,20 +503,43 @@ pub(crate) fn intermediate_histogram_buckets_to_final_buckets(
|
|||||||
buckets: Vec<IntermediateHistogramBucketEntry>,
|
buckets: Vec<IntermediateHistogramBucketEntry>,
|
||||||
histogram_req: &HistogramAggregation,
|
histogram_req: &HistogramAggregation,
|
||||||
sub_aggregation: &AggregationsInternal,
|
sub_aggregation: &AggregationsInternal,
|
||||||
|
schema: &Schema,
|
||||||
) -> crate::Result<Vec<BucketEntry>> {
|
) -> crate::Result<Vec<BucketEntry>> {
|
||||||
if histogram_req.min_doc_count() == 0 {
|
let mut buckets = if histogram_req.min_doc_count() == 0 {
|
||||||
// With min_doc_count != 0, we may need to add buckets, so that there are no
|
// With min_doc_count != 0, we may need to add buckets, so that there are no
|
||||||
// gaps, since intermediate result does not contain empty buckets (filtered to
|
// gaps, since intermediate result does not contain empty buckets (filtered to
|
||||||
// reduce serialization size).
|
// reduce serialization size).
|
||||||
|
|
||||||
intermediate_buckets_to_final_buckets_fill_gaps(buckets, histogram_req, sub_aggregation)
|
intermediate_buckets_to_final_buckets_fill_gaps(
|
||||||
|
buckets,
|
||||||
|
histogram_req,
|
||||||
|
sub_aggregation,
|
||||||
|
schema,
|
||||||
|
)?
|
||||||
} else {
|
} else {
|
||||||
buckets
|
buckets
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(|histogram_bucket| histogram_bucket.doc_count >= histogram_req.min_doc_count())
|
.filter(|histogram_bucket| histogram_bucket.doc_count >= histogram_req.min_doc_count())
|
||||||
.map(|histogram_bucket| histogram_bucket.into_final_bucket_entry(sub_aggregation))
|
.map(|histogram_bucket| {
|
||||||
.collect::<crate::Result<Vec<_>>>()
|
histogram_bucket.into_final_bucket_entry(sub_aggregation, schema)
|
||||||
|
})
|
||||||
|
.collect::<crate::Result<Vec<_>>>()?
|
||||||
|
};
|
||||||
|
|
||||||
|
// If we have a date type on the histogram buckets, we add the `key_as_string` field as rfc339
|
||||||
|
let field = schema
|
||||||
|
.get_field(&histogram_req.field)
|
||||||
|
.ok_or_else(|| TantivyError::FieldNotFound(histogram_req.field.to_string()))?;
|
||||||
|
if schema.get_field_entry(field).field_type().is_date() {
|
||||||
|
for bucket in buckets.iter_mut() {
|
||||||
|
if let crate::aggregation::Key::F64(val) = bucket.key {
|
||||||
|
let key_as_string = format_date(val as i64)?;
|
||||||
|
bucket.key_as_string = Some(key_as_string);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(buckets)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Applies req extended_bounds/hard_bounds on the min_max value
|
/// Applies req extended_bounds/hard_bounds on the min_max value
|
||||||
@@ -1387,6 +1398,63 @@ mod tests {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn histogram_date_test_single_segment() -> crate::Result<()> {
|
||||||
|
histogram_date_test_with_opt(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn histogram_date_test_multi_segment() -> crate::Result<()> {
|
||||||
|
histogram_date_test_with_opt(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn histogram_date_test_with_opt(merge_segments: bool) -> crate::Result<()> {
|
||||||
|
let index = get_test_index_2_segments(merge_segments)?;
|
||||||
|
|
||||||
|
let agg_req: Aggregations = vec![(
|
||||||
|
"histogram".to_string(),
|
||||||
|
Aggregation::Bucket(BucketAggregation {
|
||||||
|
bucket_agg: BucketAggregationType::Histogram(HistogramAggregation {
|
||||||
|
field: "date".to_string(),
|
||||||
|
interval: 86400000000.0, // one day in microseconds
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
sub_aggregation: Default::default(),
|
||||||
|
}),
|
||||||
|
)]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let agg_res = exec_request(agg_req, &index)?;
|
||||||
|
|
||||||
|
let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
|
||||||
|
|
||||||
|
assert_eq!(res["histogram"]["buckets"][0]["key"], 1546300800000000.0);
|
||||||
|
assert_eq!(
|
||||||
|
res["histogram"]["buckets"][0]["key_as_string"],
|
||||||
|
"2019-01-01T00:00:00Z"
|
||||||
|
);
|
||||||
|
assert_eq!(res["histogram"]["buckets"][0]["doc_count"], 1);
|
||||||
|
|
||||||
|
assert_eq!(res["histogram"]["buckets"][1]["key"], 1546387200000000.0);
|
||||||
|
assert_eq!(
|
||||||
|
res["histogram"]["buckets"][1]["key_as_string"],
|
||||||
|
"2019-01-02T00:00:00Z"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(res["histogram"]["buckets"][1]["doc_count"], 5);
|
||||||
|
|
||||||
|
assert_eq!(res["histogram"]["buckets"][2]["key"], 1546473600000000.0);
|
||||||
|
assert_eq!(
|
||||||
|
res["histogram"]["buckets"][2]["key_as_string"],
|
||||||
|
"2019-01-03T00:00:00Z"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(res["histogram"]["buckets"][3], Value::Null);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn histogram_invalid_request() -> crate::Result<()> {
|
fn histogram_invalid_request() -> crate::Result<()> {
|
||||||
let index = get_test_index_2_segments(true)?;
|
let index = get_test_index_2_segments(true)?;
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
|
|
||||||
|
use fastfield_codecs::MonotonicallyMappableToU64;
|
||||||
use rustc_hash::FxHashMap;
|
use rustc_hash::FxHashMap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
@@ -11,7 +12,9 @@ use crate::aggregation::intermediate_agg_result::{
|
|||||||
IntermediateBucketResult, IntermediateRangeBucketEntry, IntermediateRangeBucketResult,
|
IntermediateBucketResult, IntermediateRangeBucketEntry, IntermediateRangeBucketResult,
|
||||||
};
|
};
|
||||||
use crate::aggregation::segment_agg_result::{BucketCount, SegmentAggregationResultsCollector};
|
use crate::aggregation::segment_agg_result::{BucketCount, SegmentAggregationResultsCollector};
|
||||||
use crate::aggregation::{f64_from_fastfield_u64, f64_to_fastfield_u64, Key, SerializedKey};
|
use crate::aggregation::{
|
||||||
|
f64_from_fastfield_u64, f64_to_fastfield_u64, format_date, Key, SerializedKey,
|
||||||
|
};
|
||||||
use crate::schema::Type;
|
use crate::schema::Type;
|
||||||
use crate::{DocId, TantivyError};
|
use crate::{DocId, TantivyError};
|
||||||
|
|
||||||
@@ -181,7 +184,7 @@ impl SegmentRangeCollector {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.map(move |range_bucket| {
|
.map(move |range_bucket| {
|
||||||
Ok((
|
Ok((
|
||||||
range_to_string(&range_bucket.range, &field_type),
|
range_to_string(&range_bucket.range, &field_type)?,
|
||||||
range_bucket
|
range_bucket
|
||||||
.bucket
|
.bucket
|
||||||
.into_intermediate_bucket_entry(&agg_with_accessor.sub_aggregation)?,
|
.into_intermediate_bucket_entry(&agg_with_accessor.sub_aggregation)?,
|
||||||
@@ -209,8 +212,8 @@ impl SegmentRangeCollector {
|
|||||||
let key = range
|
let key = range
|
||||||
.key
|
.key
|
||||||
.clone()
|
.clone()
|
||||||
.map(Key::Str)
|
.map(|key| Ok(Key::Str(key)))
|
||||||
.unwrap_or_else(|| range_to_key(&range.range, &field_type));
|
.unwrap_or_else(|| range_to_key(&range.range, &field_type))?;
|
||||||
let to = if range.range.end == u64::MAX {
|
let to = if range.range.end == u64::MAX {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
@@ -228,6 +231,7 @@ impl SegmentRangeCollector {
|
|||||||
sub_aggregation,
|
sub_aggregation,
|
||||||
)?)
|
)?)
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(SegmentRangeAndBucketEntry {
|
Ok(SegmentRangeAndBucketEntry {
|
||||||
range: range.range.clone(),
|
range: range.range.clone(),
|
||||||
bucket: SegmentRangeBucketEntry {
|
bucket: SegmentRangeBucketEntry {
|
||||||
@@ -267,29 +271,20 @@ impl SegmentRangeCollector {
|
|||||||
let val2 = accessor.get_val(docs[1]);
|
let val2 = accessor.get_val(docs[1]);
|
||||||
let val3 = accessor.get_val(docs[2]);
|
let val3 = accessor.get_val(docs[2]);
|
||||||
let val4 = accessor.get_val(docs[3]);
|
let val4 = accessor.get_val(docs[3]);
|
||||||
if let Some(val) = val1 {
|
let bucket_pos1 = self.get_bucket_pos(val1);
|
||||||
let bucket_pos = self.get_bucket_pos(val);
|
let bucket_pos2 = self.get_bucket_pos(val2);
|
||||||
self.increment_bucket(bucket_pos, docs[0], &bucket_with_accessor.sub_aggregation)?;
|
let bucket_pos3 = self.get_bucket_pos(val3);
|
||||||
}
|
let bucket_pos4 = self.get_bucket_pos(val4);
|
||||||
if let Some(val) = val2 {
|
|
||||||
let bucket_pos = self.get_bucket_pos(val);
|
self.increment_bucket(bucket_pos1, docs[0], &bucket_with_accessor.sub_aggregation)?;
|
||||||
self.increment_bucket(bucket_pos, docs[1], &bucket_with_accessor.sub_aggregation)?;
|
self.increment_bucket(bucket_pos2, docs[1], &bucket_with_accessor.sub_aggregation)?;
|
||||||
}
|
self.increment_bucket(bucket_pos3, docs[2], &bucket_with_accessor.sub_aggregation)?;
|
||||||
if let Some(val) = val3 {
|
self.increment_bucket(bucket_pos4, docs[3], &bucket_with_accessor.sub_aggregation)?;
|
||||||
let bucket_pos = self.get_bucket_pos(val);
|
|
||||||
self.increment_bucket(bucket_pos, docs[2], &bucket_with_accessor.sub_aggregation)?;
|
|
||||||
}
|
|
||||||
if let Some(val) = val4 {
|
|
||||||
let bucket_pos = self.get_bucket_pos(val);
|
|
||||||
self.increment_bucket(bucket_pos, docs[3], &bucket_with_accessor.sub_aggregation)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
for &doc in iter.remainder() {
|
for &doc in iter.remainder() {
|
||||||
let val = accessor.get_val(doc);
|
let val = accessor.get_val(doc);
|
||||||
if let Some(val) = val {
|
let bucket_pos = self.get_bucket_pos(val);
|
||||||
let bucket_pos = self.get_bucket_pos(val);
|
self.increment_bucket(bucket_pos, doc, &bucket_with_accessor.sub_aggregation)?;
|
||||||
self.increment_bucket(bucket_pos, doc, &bucket_with_accessor.sub_aggregation)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if force_flush {
|
if force_flush {
|
||||||
for bucket in &mut self.buckets {
|
for bucket in &mut self.buckets {
|
||||||
@@ -411,34 +406,45 @@ fn extend_validate_ranges(
|
|||||||
Ok(converted_buckets)
|
Ok(converted_buckets)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn range_to_string(range: &Range<u64>, field_type: &Type) -> String {
|
pub(crate) fn range_to_string(range: &Range<u64>, field_type: &Type) -> crate::Result<String> {
|
||||||
// is_start is there for malformed requests, e.g. ig the user passes the range u64::MIN..0.0,
|
// is_start is there for malformed requests, e.g. ig the user passes the range u64::MIN..0.0,
|
||||||
// it should be rendered as "*-0" and not "*-*"
|
// it should be rendered as "*-0" and not "*-*"
|
||||||
let to_str = |val: u64, is_start: bool| {
|
let to_str = |val: u64, is_start: bool| {
|
||||||
if (is_start && val == u64::MIN) || (!is_start && val == u64::MAX) {
|
if (is_start && val == u64::MIN) || (!is_start && val == u64::MAX) {
|
||||||
"*".to_string()
|
Ok("*".to_string())
|
||||||
|
} else if *field_type == Type::Date {
|
||||||
|
let val = i64::from_u64(val);
|
||||||
|
format_date(val)
|
||||||
} else {
|
} else {
|
||||||
f64_from_fastfield_u64(val, field_type).to_string()
|
Ok(f64_from_fastfield_u64(val, field_type).to_string())
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
format!("{}-{}", to_str(range.start, true), to_str(range.end, false))
|
Ok(format!(
|
||||||
|
"{}-{}",
|
||||||
|
to_str(range.start, true)?,
|
||||||
|
to_str(range.end, false)?
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn range_to_key(range: &Range<u64>, field_type: &Type) -> Key {
|
pub(crate) fn range_to_key(range: &Range<u64>, field_type: &Type) -> crate::Result<Key> {
|
||||||
Key::Str(range_to_string(range, field_type))
|
Ok(Key::Str(range_to_string(range, field_type)?))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use fastfield_codecs::MonotonicallyMappableToU64;
|
use fastfield_codecs::MonotonicallyMappableToU64;
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::aggregation::agg_req::{
|
use crate::aggregation::agg_req::{
|
||||||
Aggregation, Aggregations, BucketAggregation, BucketAggregationType,
|
Aggregation, Aggregations, BucketAggregation, BucketAggregationType,
|
||||||
};
|
};
|
||||||
use crate::aggregation::tests::{exec_request_with_query, get_test_index_with_num_docs};
|
use crate::aggregation::tests::{
|
||||||
|
exec_request, exec_request_with_query, get_test_index_2_segments,
|
||||||
|
get_test_index_with_num_docs,
|
||||||
|
};
|
||||||
|
|
||||||
pub fn get_collector_from_ranges(
|
pub fn get_collector_from_ranges(
|
||||||
ranges: Vec<RangeAggregationRange>,
|
ranges: Vec<RangeAggregationRange>,
|
||||||
@@ -576,6 +582,77 @@ mod tests {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn range_date_test_single_segment() -> crate::Result<()> {
|
||||||
|
range_date_test_with_opt(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn range_date_test_multi_segment() -> crate::Result<()> {
|
||||||
|
range_date_test_with_opt(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn range_date_test_with_opt(merge_segments: bool) -> crate::Result<()> {
|
||||||
|
let index = get_test_index_2_segments(merge_segments)?;
|
||||||
|
|
||||||
|
let agg_req: Aggregations = vec![(
|
||||||
|
"date_ranges".to_string(),
|
||||||
|
Aggregation::Bucket(BucketAggregation {
|
||||||
|
bucket_agg: BucketAggregationType::Range(RangeAggregation {
|
||||||
|
field: "date".to_string(),
|
||||||
|
ranges: vec![
|
||||||
|
RangeAggregationRange {
|
||||||
|
key: None,
|
||||||
|
from: None,
|
||||||
|
to: Some(1546300800000000.0f64),
|
||||||
|
},
|
||||||
|
RangeAggregationRange {
|
||||||
|
key: None,
|
||||||
|
from: Some(1546300800000000.0f64),
|
||||||
|
to: Some(1546387200000000.0f64),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
keyed: false,
|
||||||
|
}),
|
||||||
|
sub_aggregation: Default::default(),
|
||||||
|
}),
|
||||||
|
)]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let agg_res = exec_request(agg_req, &index)?;
|
||||||
|
|
||||||
|
let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
res["date_ranges"]["buckets"][0]["from_as_string"],
|
||||||
|
Value::Null
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
res["date_ranges"]["buckets"][0]["key"],
|
||||||
|
"*-2019-01-01T00:00:00Z"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
res["date_ranges"]["buckets"][1]["from_as_string"],
|
||||||
|
"2019-01-01T00:00:00Z"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
res["date_ranges"]["buckets"][1]["to_as_string"],
|
||||||
|
"2019-01-02T00:00:00Z"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
res["date_ranges"]["buckets"][2]["from_as_string"],
|
||||||
|
"2019-01-02T00:00:00Z"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
res["date_ranges"]["buckets"][2]["to_as_string"],
|
||||||
|
Value::Null
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn range_custom_key_keyed_buckets_test() -> crate::Result<()> {
|
fn range_custom_key_keyed_buckets_test() -> crate::Result<()> {
|
||||||
let index = get_test_index_with_num_docs(false, 100)?;
|
let index = get_test_index_with_num_docs(false, 100)?;
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ use super::intermediate_agg_result::IntermediateAggregationResults;
|
|||||||
use super::segment_agg_result::SegmentAggregationResultsCollector;
|
use super::segment_agg_result::SegmentAggregationResultsCollector;
|
||||||
use crate::aggregation::agg_req_with_accessor::get_aggs_with_accessor_and_validate;
|
use crate::aggregation::agg_req_with_accessor::get_aggs_with_accessor_and_validate;
|
||||||
use crate::collector::{Collector, SegmentCollector};
|
use crate::collector::{Collector, SegmentCollector};
|
||||||
|
use crate::schema::Schema;
|
||||||
use crate::{SegmentReader, TantivyError};
|
use crate::{SegmentReader, TantivyError};
|
||||||
|
|
||||||
/// The default max bucket count, before the aggregation fails.
|
/// The default max bucket count, before the aggregation fails.
|
||||||
@@ -16,6 +17,7 @@ pub const MAX_BUCKET_COUNT: u32 = 65000;
|
|||||||
///
|
///
|
||||||
/// The collector collects all aggregations by the underlying aggregation request.
|
/// The collector collects all aggregations by the underlying aggregation request.
|
||||||
pub struct AggregationCollector {
|
pub struct AggregationCollector {
|
||||||
|
schema: Schema,
|
||||||
agg: Aggregations,
|
agg: Aggregations,
|
||||||
max_bucket_count: u32,
|
max_bucket_count: u32,
|
||||||
}
|
}
|
||||||
@@ -25,8 +27,9 @@ impl AggregationCollector {
|
|||||||
///
|
///
|
||||||
/// Aggregation fails when the total bucket count is higher than max_bucket_count.
|
/// Aggregation fails when the total bucket count is higher than max_bucket_count.
|
||||||
/// max_bucket_count will default to `MAX_BUCKET_COUNT` (65000) when unset
|
/// max_bucket_count will default to `MAX_BUCKET_COUNT` (65000) when unset
|
||||||
pub fn from_aggs(agg: Aggregations, max_bucket_count: Option<u32>) -> Self {
|
pub fn from_aggs(agg: Aggregations, max_bucket_count: Option<u32>, schema: Schema) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
schema,
|
||||||
agg,
|
agg,
|
||||||
max_bucket_count: max_bucket_count.unwrap_or(MAX_BUCKET_COUNT),
|
max_bucket_count: max_bucket_count.unwrap_or(MAX_BUCKET_COUNT),
|
||||||
}
|
}
|
||||||
@@ -113,7 +116,7 @@ impl Collector for AggregationCollector {
|
|||||||
segment_fruits: Vec<<Self::Child as SegmentCollector>::Fruit>,
|
segment_fruits: Vec<<Self::Child as SegmentCollector>::Fruit>,
|
||||||
) -> crate::Result<Self::Fruit> {
|
) -> crate::Result<Self::Fruit> {
|
||||||
let res = merge_fruits(segment_fruits)?;
|
let res = merge_fruits(segment_fruits)?;
|
||||||
res.into_final_bucket_result(self.agg.clone())
|
res.into_final_bucket_result(self.agg.clone(), &self.schema)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
18
src/aggregation/date.rs
Normal file
18
src/aggregation/date.rs
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
use time::format_description::well_known::Rfc3339;
|
||||||
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
|
use crate::TantivyError;
|
||||||
|
|
||||||
|
pub(crate) fn format_date(val: i64) -> crate::Result<String> {
|
||||||
|
let datetime =
|
||||||
|
OffsetDateTime::from_unix_timestamp_nanos(1_000 * (val as i128)).map_err(|err| {
|
||||||
|
TantivyError::InvalidArgument(format!(
|
||||||
|
"Could not convert {:?} to OffsetDateTime, err {:?}",
|
||||||
|
val, err
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
let key_as_string = datetime
|
||||||
|
.format(&Rfc3339)
|
||||||
|
.map_err(|_err| TantivyError::InvalidArgument("Could not serialize date".to_string()))?;
|
||||||
|
Ok(key_as_string)
|
||||||
|
}
|
||||||
@@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
|
|
||||||
use super::agg_req::{
|
use super::agg_req::{
|
||||||
Aggregations, AggregationsInternal, BucketAggregationInternal, BucketAggregationType,
|
Aggregations, AggregationsInternal, BucketAggregationInternal, BucketAggregationType,
|
||||||
MetricAggregation,
|
MetricAggregation, RangeAggregation,
|
||||||
};
|
};
|
||||||
use super::agg_result::{AggregationResult, BucketResult, RangeBucketEntry};
|
use super::agg_result::{AggregationResult, BucketResult, RangeBucketEntry};
|
||||||
use super::bucket::{
|
use super::bucket::{
|
||||||
@@ -19,9 +19,11 @@ use super::bucket::{
|
|||||||
};
|
};
|
||||||
use super::metric::{IntermediateAverage, IntermediateStats};
|
use super::metric::{IntermediateAverage, IntermediateStats};
|
||||||
use super::segment_agg_result::SegmentMetricResultCollector;
|
use super::segment_agg_result::SegmentMetricResultCollector;
|
||||||
use super::{Key, SerializedKey, VecWithNames};
|
use super::{format_date, Key, SerializedKey, VecWithNames};
|
||||||
use crate::aggregation::agg_result::{AggregationResults, BucketEntries, BucketEntry};
|
use crate::aggregation::agg_result::{AggregationResults, BucketEntries, BucketEntry};
|
||||||
use crate::aggregation::bucket::TermsAggregationInternal;
|
use crate::aggregation::bucket::TermsAggregationInternal;
|
||||||
|
use crate::schema::Schema;
|
||||||
|
use crate::TantivyError;
|
||||||
|
|
||||||
/// Contains the intermediate aggregation result, which is optimized to be merged with other
|
/// Contains the intermediate aggregation result, which is optimized to be merged with other
|
||||||
/// intermediate results.
|
/// intermediate results.
|
||||||
@@ -35,8 +37,12 @@ pub struct IntermediateAggregationResults {
|
|||||||
|
|
||||||
impl IntermediateAggregationResults {
|
impl IntermediateAggregationResults {
|
||||||
/// Convert intermediate result and its aggregation request to the final result.
|
/// Convert intermediate result and its aggregation request to the final result.
|
||||||
pub fn into_final_bucket_result(self, req: Aggregations) -> crate::Result<AggregationResults> {
|
pub fn into_final_bucket_result(
|
||||||
self.into_final_bucket_result_internal(&(req.into()))
|
self,
|
||||||
|
req: Aggregations,
|
||||||
|
schema: &Schema,
|
||||||
|
) -> crate::Result<AggregationResults> {
|
||||||
|
self.into_final_bucket_result_internal(&(req.into()), schema)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Convert intermediate result and its aggregation request to the final result.
|
/// Convert intermediate result and its aggregation request to the final result.
|
||||||
@@ -46,6 +52,7 @@ impl IntermediateAggregationResults {
|
|||||||
pub(crate) fn into_final_bucket_result_internal(
|
pub(crate) fn into_final_bucket_result_internal(
|
||||||
self,
|
self,
|
||||||
req: &AggregationsInternal,
|
req: &AggregationsInternal,
|
||||||
|
schema: &Schema,
|
||||||
) -> crate::Result<AggregationResults> {
|
) -> crate::Result<AggregationResults> {
|
||||||
// Important assumption:
|
// Important assumption:
|
||||||
// When the tree contains buckets/metric, we expect it to have all buckets/metrics from the
|
// When the tree contains buckets/metric, we expect it to have all buckets/metrics from the
|
||||||
@@ -53,11 +60,11 @@ impl IntermediateAggregationResults {
|
|||||||
let mut results: FxHashMap<String, AggregationResult> = FxHashMap::default();
|
let mut results: FxHashMap<String, AggregationResult> = FxHashMap::default();
|
||||||
|
|
||||||
if let Some(buckets) = self.buckets {
|
if let Some(buckets) = self.buckets {
|
||||||
convert_and_add_final_buckets_to_result(&mut results, buckets, &req.buckets)?
|
convert_and_add_final_buckets_to_result(&mut results, buckets, &req.buckets, schema)?
|
||||||
} else {
|
} else {
|
||||||
// When there are no buckets, we create empty buckets, so that the serialized json
|
// When there are no buckets, we create empty buckets, so that the serialized json
|
||||||
// format is constant
|
// format is constant
|
||||||
add_empty_final_buckets_to_result(&mut results, &req.buckets)?
|
add_empty_final_buckets_to_result(&mut results, &req.buckets, schema)?
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(metrics) = self.metrics {
|
if let Some(metrics) = self.metrics {
|
||||||
@@ -158,10 +165,12 @@ fn add_empty_final_metrics_to_result(
|
|||||||
fn add_empty_final_buckets_to_result(
|
fn add_empty_final_buckets_to_result(
|
||||||
results: &mut FxHashMap<String, AggregationResult>,
|
results: &mut FxHashMap<String, AggregationResult>,
|
||||||
req_buckets: &VecWithNames<BucketAggregationInternal>,
|
req_buckets: &VecWithNames<BucketAggregationInternal>,
|
||||||
|
schema: &Schema,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
let requested_buckets = req_buckets.iter();
|
let requested_buckets = req_buckets.iter();
|
||||||
for (key, req) in requested_buckets {
|
for (key, req) in requested_buckets {
|
||||||
let empty_bucket = AggregationResult::BucketResult(BucketResult::empty_from_req(req)?);
|
let empty_bucket =
|
||||||
|
AggregationResult::BucketResult(BucketResult::empty_from_req(req, schema)?);
|
||||||
results.insert(key.to_string(), empty_bucket);
|
results.insert(key.to_string(), empty_bucket);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -171,12 +180,13 @@ fn convert_and_add_final_buckets_to_result(
|
|||||||
results: &mut FxHashMap<String, AggregationResult>,
|
results: &mut FxHashMap<String, AggregationResult>,
|
||||||
buckets: VecWithNames<IntermediateBucketResult>,
|
buckets: VecWithNames<IntermediateBucketResult>,
|
||||||
req_buckets: &VecWithNames<BucketAggregationInternal>,
|
req_buckets: &VecWithNames<BucketAggregationInternal>,
|
||||||
|
schema: &Schema,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
assert_eq!(buckets.len(), req_buckets.len());
|
assert_eq!(buckets.len(), req_buckets.len());
|
||||||
|
|
||||||
let buckets_with_request = buckets.into_iter().zip(req_buckets.values());
|
let buckets_with_request = buckets.into_iter().zip(req_buckets.values());
|
||||||
for ((key, bucket), req) in buckets_with_request {
|
for ((key, bucket), req) in buckets_with_request {
|
||||||
let result = AggregationResult::BucketResult(bucket.into_final_bucket_result(req)?);
|
let result = AggregationResult::BucketResult(bucket.into_final_bucket_result(req, schema)?);
|
||||||
results.insert(key, result);
|
results.insert(key, result);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -266,13 +276,21 @@ impl IntermediateBucketResult {
|
|||||||
pub(crate) fn into_final_bucket_result(
|
pub(crate) fn into_final_bucket_result(
|
||||||
self,
|
self,
|
||||||
req: &BucketAggregationInternal,
|
req: &BucketAggregationInternal,
|
||||||
|
schema: &Schema,
|
||||||
) -> crate::Result<BucketResult> {
|
) -> crate::Result<BucketResult> {
|
||||||
match self {
|
match self {
|
||||||
IntermediateBucketResult::Range(range_res) => {
|
IntermediateBucketResult::Range(range_res) => {
|
||||||
let mut buckets: Vec<RangeBucketEntry> = range_res
|
let mut buckets: Vec<RangeBucketEntry> = range_res
|
||||||
.buckets
|
.buckets
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(_, bucket)| bucket.into_final_bucket_entry(&req.sub_aggregation))
|
.map(|(_, bucket)| {
|
||||||
|
bucket.into_final_bucket_entry(
|
||||||
|
&req.sub_aggregation,
|
||||||
|
schema,
|
||||||
|
req.as_range()
|
||||||
|
.expect("unexpected aggregation, expected histogram aggregation"),
|
||||||
|
)
|
||||||
|
})
|
||||||
.collect::<crate::Result<Vec<_>>>()?;
|
.collect::<crate::Result<Vec<_>>>()?;
|
||||||
|
|
||||||
buckets.sort_by(|left, right| {
|
buckets.sort_by(|left, right| {
|
||||||
@@ -303,6 +321,7 @@ impl IntermediateBucketResult {
|
|||||||
req.as_histogram()
|
req.as_histogram()
|
||||||
.expect("unexpected aggregation, expected histogram aggregation"),
|
.expect("unexpected aggregation, expected histogram aggregation"),
|
||||||
&req.sub_aggregation,
|
&req.sub_aggregation,
|
||||||
|
schema,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let buckets = if req.as_histogram().unwrap().keyed {
|
let buckets = if req.as_histogram().unwrap().keyed {
|
||||||
@@ -321,6 +340,7 @@ impl IntermediateBucketResult {
|
|||||||
req.as_term()
|
req.as_term()
|
||||||
.expect("unexpected aggregation, expected term aggregation"),
|
.expect("unexpected aggregation, expected term aggregation"),
|
||||||
&req.sub_aggregation,
|
&req.sub_aggregation,
|
||||||
|
schema,
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -411,6 +431,7 @@ impl IntermediateTermBucketResult {
|
|||||||
self,
|
self,
|
||||||
req: &TermsAggregation,
|
req: &TermsAggregation,
|
||||||
sub_aggregation_req: &AggregationsInternal,
|
sub_aggregation_req: &AggregationsInternal,
|
||||||
|
schema: &Schema,
|
||||||
) -> crate::Result<BucketResult> {
|
) -> crate::Result<BucketResult> {
|
||||||
let req = TermsAggregationInternal::from_req(req);
|
let req = TermsAggregationInternal::from_req(req);
|
||||||
let mut buckets: Vec<BucketEntry> = self
|
let mut buckets: Vec<BucketEntry> = self
|
||||||
@@ -419,11 +440,12 @@ impl IntermediateTermBucketResult {
|
|||||||
.filter(|bucket| bucket.1.doc_count >= req.min_doc_count)
|
.filter(|bucket| bucket.1.doc_count >= req.min_doc_count)
|
||||||
.map(|(key, entry)| {
|
.map(|(key, entry)| {
|
||||||
Ok(BucketEntry {
|
Ok(BucketEntry {
|
||||||
|
key_as_string: None,
|
||||||
key: Key::Str(key),
|
key: Key::Str(key),
|
||||||
doc_count: entry.doc_count,
|
doc_count: entry.doc_count,
|
||||||
sub_aggregation: entry
|
sub_aggregation: entry
|
||||||
.sub_aggregation
|
.sub_aggregation
|
||||||
.into_final_bucket_result_internal(sub_aggregation_req)?,
|
.into_final_bucket_result_internal(sub_aggregation_req, schema)?,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.collect::<crate::Result<_>>()?;
|
.collect::<crate::Result<_>>()?;
|
||||||
@@ -528,13 +550,15 @@ impl IntermediateHistogramBucketEntry {
|
|||||||
pub(crate) fn into_final_bucket_entry(
|
pub(crate) fn into_final_bucket_entry(
|
||||||
self,
|
self,
|
||||||
req: &AggregationsInternal,
|
req: &AggregationsInternal,
|
||||||
|
schema: &Schema,
|
||||||
) -> crate::Result<BucketEntry> {
|
) -> crate::Result<BucketEntry> {
|
||||||
Ok(BucketEntry {
|
Ok(BucketEntry {
|
||||||
|
key_as_string: None,
|
||||||
key: Key::F64(self.key),
|
key: Key::F64(self.key),
|
||||||
doc_count: self.doc_count,
|
doc_count: self.doc_count,
|
||||||
sub_aggregation: self
|
sub_aggregation: self
|
||||||
.sub_aggregation
|
.sub_aggregation
|
||||||
.into_final_bucket_result_internal(req)?,
|
.into_final_bucket_result_internal(req, schema)?,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -571,16 +595,38 @@ impl IntermediateRangeBucketEntry {
|
|||||||
pub(crate) fn into_final_bucket_entry(
|
pub(crate) fn into_final_bucket_entry(
|
||||||
self,
|
self,
|
||||||
req: &AggregationsInternal,
|
req: &AggregationsInternal,
|
||||||
|
schema: &Schema,
|
||||||
|
range_req: &RangeAggregation,
|
||||||
) -> crate::Result<RangeBucketEntry> {
|
) -> crate::Result<RangeBucketEntry> {
|
||||||
Ok(RangeBucketEntry {
|
let mut range_bucket_entry = RangeBucketEntry {
|
||||||
key: self.key,
|
key: self.key,
|
||||||
doc_count: self.doc_count,
|
doc_count: self.doc_count,
|
||||||
sub_aggregation: self
|
sub_aggregation: self
|
||||||
.sub_aggregation
|
.sub_aggregation
|
||||||
.into_final_bucket_result_internal(req)?,
|
.into_final_bucket_result_internal(req, schema)?,
|
||||||
to: self.to,
|
to: self.to,
|
||||||
from: self.from,
|
from: self.from,
|
||||||
})
|
to_as_string: None,
|
||||||
|
from_as_string: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// If we have a date type on the histogram buckets, we add the `key_as_string` field as
|
||||||
|
// rfc339
|
||||||
|
let field = schema
|
||||||
|
.get_field(&range_req.field)
|
||||||
|
.ok_or_else(|| TantivyError::FieldNotFound(range_req.field.to_string()))?;
|
||||||
|
if schema.get_field_entry(field).field_type().is_date() {
|
||||||
|
if let Some(val) = range_bucket_entry.to {
|
||||||
|
let key_as_string = format_date(val as i64)?;
|
||||||
|
range_bucket_entry.to_as_string = Some(key_as_string);
|
||||||
|
}
|
||||||
|
if let Some(val) = range_bucket_entry.from {
|
||||||
|
let key_as_string = format_date(val as i64)?;
|
||||||
|
range_bucket_entry.from_as_string = Some(key_as_string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(range_bucket_entry)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
|
||||||
use fastfield_codecs::OptionalColumn;
|
use fastfield_codecs::Column;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::aggregation::f64_from_fastfield_u64;
|
use crate::aggregation::f64_from_fastfield_u64;
|
||||||
@@ -57,33 +57,26 @@ impl SegmentAverageCollector {
|
|||||||
data: Default::default(),
|
data: Default::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn OptionalColumn<u64>) {
|
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column<u64>) {
|
||||||
let mut iter = doc.chunks_exact(4);
|
let mut iter = doc.chunks_exact(4);
|
||||||
for docs in iter.by_ref() {
|
for docs in iter.by_ref() {
|
||||||
if let Some(val) = field.get_val(docs[0]) {
|
let val1 = field.get_val(docs[0]);
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
let val2 = field.get_val(docs[1]);
|
||||||
self.data.collect(val);
|
let val3 = field.get_val(docs[2]);
|
||||||
}
|
let val4 = field.get_val(docs[3]);
|
||||||
if let Some(val) = field.get_val(docs[1]) {
|
let val1 = f64_from_fastfield_u64(val1, &self.field_type);
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
let val2 = f64_from_fastfield_u64(val2, &self.field_type);
|
||||||
self.data.collect(val);
|
let val3 = f64_from_fastfield_u64(val3, &self.field_type);
|
||||||
}
|
let val4 = f64_from_fastfield_u64(val4, &self.field_type);
|
||||||
|
self.data.collect(val1);
|
||||||
if let Some(val) = field.get_val(docs[2]) {
|
self.data.collect(val2);
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
self.data.collect(val3);
|
||||||
self.data.collect(val);
|
self.data.collect(val4);
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(val) = field.get_val(docs[3]) {
|
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
|
||||||
self.data.collect(val);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
for &doc in iter.remainder() {
|
for &doc in iter.remainder() {
|
||||||
if let Some(val) = field.get_val(doc) {
|
let val = field.get_val(doc);
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
let val = f64_from_fastfield_u64(val, &self.field_type);
|
||||||
self.data.collect(val);
|
self.data.collect(val);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use fastfield_codecs::OptionalColumn;
|
use fastfield_codecs::Column;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::aggregation::f64_from_fastfield_u64;
|
use crate::aggregation::f64_from_fastfield_u64;
|
||||||
@@ -163,31 +163,26 @@ impl SegmentStatsCollector {
|
|||||||
stats: IntermediateStats::default(),
|
stats: IntermediateStats::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn OptionalColumn<u64>) {
|
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column<u64>) {
|
||||||
let mut iter = doc.chunks_exact(4);
|
let mut iter = doc.chunks_exact(4);
|
||||||
for docs in iter.by_ref() {
|
for docs in iter.by_ref() {
|
||||||
if let Some(val) = field.get_val(docs[0]) {
|
let val1 = field.get_val(docs[0]);
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
let val2 = field.get_val(docs[1]);
|
||||||
self.stats.collect(val);
|
let val3 = field.get_val(docs[2]);
|
||||||
}
|
let val4 = field.get_val(docs[3]);
|
||||||
if let Some(val) = field.get_val(docs[1]) {
|
let val1 = f64_from_fastfield_u64(val1, &self.field_type);
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
let val2 = f64_from_fastfield_u64(val2, &self.field_type);
|
||||||
self.stats.collect(val);
|
let val3 = f64_from_fastfield_u64(val3, &self.field_type);
|
||||||
}
|
let val4 = f64_from_fastfield_u64(val4, &self.field_type);
|
||||||
if let Some(val) = field.get_val(docs[2]) {
|
self.stats.collect(val1);
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
self.stats.collect(val2);
|
||||||
self.stats.collect(val);
|
self.stats.collect(val3);
|
||||||
}
|
self.stats.collect(val4);
|
||||||
if let Some(val) = field.get_val(docs[3]) {
|
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
|
||||||
self.stats.collect(val);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
for &doc in iter.remainder() {
|
for &doc in iter.remainder() {
|
||||||
if let Some(val) = field.get_val(doc) {
|
let val = field.get_val(doc);
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
let val = f64_from_fastfield_u64(val, &self.field_type);
|
||||||
self.stats.collect(val);
|
self.stats.collect(val);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -227,7 +222,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let reader = index.reader()?;
|
let reader = index.reader()?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
@@ -305,7 +300,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();
|
let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
//!
|
//!
|
||||||
//! ## Prerequisite
|
//! ## Prerequisite
|
||||||
//! Currently aggregations work only on [fast fields](`crate::fastfield`). Single value fast fields
|
//! Currently aggregations work only on [fast fields](`crate::fastfield`). Single value fast fields
|
||||||
//! of type `u64`, `f64`, `i64` and fast fields on text fields.
|
//! of type `u64`, `f64`, `i64`, `date` and fast fields on text fields.
|
||||||
//!
|
//!
|
||||||
//! ## Usage
|
//! ## Usage
|
||||||
//! To use aggregations, build an aggregation request by constructing
|
//! To use aggregations, build an aggregation request by constructing
|
||||||
@@ -53,9 +53,10 @@
|
|||||||
//! use tantivy::query::AllQuery;
|
//! use tantivy::query::AllQuery;
|
||||||
//! use tantivy::aggregation::agg_result::AggregationResults;
|
//! use tantivy::aggregation::agg_result::AggregationResults;
|
||||||
//! use tantivy::IndexReader;
|
//! use tantivy::IndexReader;
|
||||||
|
//! use tantivy::schema::Schema;
|
||||||
//!
|
//!
|
||||||
//! # #[allow(dead_code)]
|
//! # #[allow(dead_code)]
|
||||||
//! fn aggregate_on_index(reader: &IndexReader) {
|
//! fn aggregate_on_index(reader: &IndexReader, schema: Schema) {
|
||||||
//! let agg_req: Aggregations = vec![
|
//! let agg_req: Aggregations = vec![
|
||||||
//! (
|
//! (
|
||||||
//! "average".to_string(),
|
//! "average".to_string(),
|
||||||
@@ -67,7 +68,7 @@
|
|||||||
//! .into_iter()
|
//! .into_iter()
|
||||||
//! .collect();
|
//! .collect();
|
||||||
//!
|
//!
|
||||||
//! let collector = AggregationCollector::from_aggs(agg_req, None);
|
//! let collector = AggregationCollector::from_aggs(agg_req, None, schema);
|
||||||
//!
|
//!
|
||||||
//! let searcher = reader.searcher();
|
//! let searcher = reader.searcher();
|
||||||
//! let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
|
//! let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
|
||||||
@@ -157,6 +158,7 @@ mod agg_req_with_accessor;
|
|||||||
pub mod agg_result;
|
pub mod agg_result;
|
||||||
pub mod bucket;
|
pub mod bucket;
|
||||||
mod collector;
|
mod collector;
|
||||||
|
mod date;
|
||||||
pub mod intermediate_agg_result;
|
pub mod intermediate_agg_result;
|
||||||
pub mod metric;
|
pub mod metric;
|
||||||
mod segment_agg_result;
|
mod segment_agg_result;
|
||||||
@@ -167,6 +169,7 @@ pub use collector::{
|
|||||||
AggregationCollector, AggregationSegmentCollector, DistributedAggregationCollector,
|
AggregationCollector, AggregationSegmentCollector, DistributedAggregationCollector,
|
||||||
MAX_BUCKET_COUNT,
|
MAX_BUCKET_COUNT,
|
||||||
};
|
};
|
||||||
|
pub(crate) use date::format_date;
|
||||||
use fastfield_codecs::MonotonicallyMappableToU64;
|
use fastfield_codecs::MonotonicallyMappableToU64;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@@ -283,11 +286,11 @@ impl Display for Key {
|
|||||||
/// Inverse of `to_fastfield_u64`. Used to convert to `f64` for metrics.
|
/// Inverse of `to_fastfield_u64`. Used to convert to `f64` for metrics.
|
||||||
///
|
///
|
||||||
/// # Panics
|
/// # Panics
|
||||||
/// Only `u64`, `f64`, and `i64` are supported.
|
/// Only `u64`, `f64`, `date`, and `i64` are supported.
|
||||||
pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 {
|
pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 {
|
||||||
match field_type {
|
match field_type {
|
||||||
Type::U64 => val as f64,
|
Type::U64 => val as f64,
|
||||||
Type::I64 => i64::from_u64(val) as f64,
|
Type::I64 | Type::Date => i64::from_u64(val) as f64,
|
||||||
Type::F64 => f64::from_u64(val),
|
Type::F64 => f64::from_u64(val),
|
||||||
_ => {
|
_ => {
|
||||||
panic!("unexpected type {:?}. This should not happen", field_type)
|
panic!("unexpected type {:?}. This should not happen", field_type)
|
||||||
@@ -295,10 +298,9 @@ pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts the `f64` value to fast field value space.
|
/// Converts the `f64` value to fast field value space, which is always u64.
|
||||||
///
|
///
|
||||||
/// If the fast field has `u64`, values are stored as `u64` in the fast field.
|
/// If the fast field has `u64`, values are stored unchanged as `u64` in the fast field.
|
||||||
/// A `f64` value of e.g. `2.0` therefore needs to be converted to `1u64`.
|
|
||||||
///
|
///
|
||||||
/// If the fast field has `f64` values are converted and stored to `u64` using a
|
/// If the fast field has `f64` values are converted and stored to `u64` using a
|
||||||
/// monotonic mapping.
|
/// monotonic mapping.
|
||||||
@@ -308,7 +310,7 @@ pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 {
|
|||||||
pub(crate) fn f64_to_fastfield_u64(val: f64, field_type: &Type) -> Option<u64> {
|
pub(crate) fn f64_to_fastfield_u64(val: f64, field_type: &Type) -> Option<u64> {
|
||||||
match field_type {
|
match field_type {
|
||||||
Type::U64 => Some(val as u64),
|
Type::U64 => Some(val as u64),
|
||||||
Type::I64 => Some((val as i64).to_u64()),
|
Type::I64 | Type::Date => Some((val as i64).to_u64()),
|
||||||
Type::F64 => Some(val.to_u64()),
|
Type::F64 => Some(val.to_u64()),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
@@ -317,6 +319,7 @@ pub(crate) fn f64_to_fastfield_u64(val: f64, field_type: &Type) -> Option<u64> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use super::agg_req::{Aggregation, Aggregations, BucketAggregation};
|
use super::agg_req::{Aggregation, Aggregations, BucketAggregation};
|
||||||
use super::bucket::RangeAggregation;
|
use super::bucket::RangeAggregation;
|
||||||
@@ -332,7 +335,7 @@ mod tests {
|
|||||||
use crate::aggregation::DistributedAggregationCollector;
|
use crate::aggregation::DistributedAggregationCollector;
|
||||||
use crate::query::{AllQuery, TermQuery};
|
use crate::query::{AllQuery, TermQuery};
|
||||||
use crate::schema::{Cardinality, IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
|
use crate::schema::{Cardinality, IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
|
||||||
use crate::{Index, Term};
|
use crate::{DateTime, Index, Term};
|
||||||
|
|
||||||
fn get_avg_req(field_name: &str) -> Aggregation {
|
fn get_avg_req(field_name: &str) -> Aggregation {
|
||||||
Aggregation::Metric(MetricAggregation::Average(
|
Aggregation::Metric(MetricAggregation::Average(
|
||||||
@@ -358,7 +361,7 @@ mod tests {
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
query: Option<(&str, &str)>,
|
query: Option<(&str, &str)>,
|
||||||
) -> crate::Result<Value> {
|
) -> crate::Result<Value> {
|
||||||
let collector = AggregationCollector::from_aggs(agg_req, None);
|
let collector = AggregationCollector::from_aggs(agg_req, None, index.schema());
|
||||||
|
|
||||||
let reader = index.reader()?;
|
let reader = index.reader()?;
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
@@ -552,10 +555,10 @@ mod tests {
|
|||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let intermediate_agg_result = searcher.search(&AllQuery, &collector).unwrap();
|
let intermediate_agg_result = searcher.search(&AllQuery, &collector).unwrap();
|
||||||
intermediate_agg_result
|
intermediate_agg_result
|
||||||
.into_final_bucket_result(agg_req)
|
.into_final_bucket_result(agg_req, &index.schema())
|
||||||
.unwrap()
|
.unwrap()
|
||||||
} else {
|
} else {
|
||||||
let collector = AggregationCollector::from_aggs(agg_req, None);
|
let collector = AggregationCollector::from_aggs(agg_req, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
searcher.search(&AllQuery, &collector).unwrap()
|
searcher.search(&AllQuery, &collector).unwrap()
|
||||||
@@ -648,6 +651,7 @@ mod tests {
|
|||||||
.set_fast()
|
.set_fast()
|
||||||
.set_stored();
|
.set_stored();
|
||||||
let text_field = schema_builder.add_text_field("text", text_fieldtype);
|
let text_field = schema_builder.add_text_field("text", text_fieldtype);
|
||||||
|
let date_field = schema_builder.add_date_field("date", FAST);
|
||||||
schema_builder.add_text_field("dummy_text", STRING);
|
schema_builder.add_text_field("dummy_text", STRING);
|
||||||
let score_fieldtype =
|
let score_fieldtype =
|
||||||
crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue);
|
crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue);
|
||||||
@@ -665,6 +669,7 @@ mod tests {
|
|||||||
// writing the segment
|
// writing the segment
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
text_field => "cool",
|
text_field => "cool",
|
||||||
|
date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800).unwrap()),
|
||||||
score_field => 1u64,
|
score_field => 1u64,
|
||||||
score_field_f64 => 1f64,
|
score_field_f64 => 1f64,
|
||||||
score_field_i64 => 1i64,
|
score_field_i64 => 1i64,
|
||||||
@@ -673,6 +678,7 @@ mod tests {
|
|||||||
))?;
|
))?;
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
text_field => "cool",
|
text_field => "cool",
|
||||||
|
date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()),
|
||||||
score_field => 3u64,
|
score_field => 3u64,
|
||||||
score_field_f64 => 3f64,
|
score_field_f64 => 3f64,
|
||||||
score_field_i64 => 3i64,
|
score_field_i64 => 3i64,
|
||||||
@@ -681,18 +687,21 @@ mod tests {
|
|||||||
))?;
|
))?;
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
text_field => "cool",
|
text_field => "cool",
|
||||||
|
date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()),
|
||||||
score_field => 5u64,
|
score_field => 5u64,
|
||||||
score_field_f64 => 5f64,
|
score_field_f64 => 5f64,
|
||||||
score_field_i64 => 5i64,
|
score_field_i64 => 5i64,
|
||||||
))?;
|
))?;
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
text_field => "nohit",
|
text_field => "nohit",
|
||||||
|
date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()),
|
||||||
score_field => 6u64,
|
score_field => 6u64,
|
||||||
score_field_f64 => 6f64,
|
score_field_f64 => 6f64,
|
||||||
score_field_i64 => 6i64,
|
score_field_i64 => 6i64,
|
||||||
))?;
|
))?;
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
text_field => "cool",
|
text_field => "cool",
|
||||||
|
date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()),
|
||||||
score_field => 7u64,
|
score_field => 7u64,
|
||||||
score_field_f64 => 7f64,
|
score_field_f64 => 7f64,
|
||||||
score_field_i64 => 7i64,
|
score_field_i64 => 7i64,
|
||||||
@@ -700,12 +709,14 @@ mod tests {
|
|||||||
index_writer.commit()?;
|
index_writer.commit()?;
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
text_field => "cool",
|
text_field => "cool",
|
||||||
|
date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()),
|
||||||
score_field => 11u64,
|
score_field => 11u64,
|
||||||
score_field_f64 => 11f64,
|
score_field_f64 => 11f64,
|
||||||
score_field_i64 => 11i64,
|
score_field_i64 => 11i64,
|
||||||
))?;
|
))?;
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
text_field => "cool",
|
text_field => "cool",
|
||||||
|
date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400 + 86400).unwrap()),
|
||||||
score_field => 14u64,
|
score_field => 14u64,
|
||||||
score_field_f64 => 14f64,
|
score_field_f64 => 14f64,
|
||||||
score_field_i64 => 14i64,
|
score_field_i64 => 14i64,
|
||||||
@@ -713,6 +724,7 @@ mod tests {
|
|||||||
|
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
text_field => "cool",
|
text_field => "cool",
|
||||||
|
date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400 + 86400).unwrap()),
|
||||||
score_field => 44u64,
|
score_field => 44u64,
|
||||||
score_field_f64 => 44.5f64,
|
score_field_f64 => 44.5f64,
|
||||||
score_field_i64 => 44i64,
|
score_field_i64 => 44i64,
|
||||||
@@ -723,6 +735,7 @@ mod tests {
|
|||||||
// no hits segment
|
// no hits segment
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
text_field => "nohit",
|
text_field => "nohit",
|
||||||
|
date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400 + 86400).unwrap()),
|
||||||
score_field => 44u64,
|
score_field => 44u64,
|
||||||
score_field_f64 => 44.5f64,
|
score_field_f64 => 44.5f64,
|
||||||
score_field_i64 => 44i64,
|
score_field_i64 => 44i64,
|
||||||
@@ -795,7 +808,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();
|
let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();
|
||||||
@@ -995,9 +1008,10 @@ mod tests {
|
|||||||
// Test de/serialization roundtrip on intermediate_agg_result
|
// Test de/serialization roundtrip on intermediate_agg_result
|
||||||
let res: IntermediateAggregationResults =
|
let res: IntermediateAggregationResults =
|
||||||
serde_json::from_str(&serde_json::to_string(&res).unwrap()).unwrap();
|
serde_json::from_str(&serde_json::to_string(&res).unwrap()).unwrap();
|
||||||
res.into_final_bucket_result(agg_req.clone()).unwrap()
|
res.into_final_bucket_result(agg_req.clone(), &index.schema())
|
||||||
|
.unwrap()
|
||||||
} else {
|
} else {
|
||||||
let collector = AggregationCollector::from_aggs(agg_req.clone(), None);
|
let collector = AggregationCollector::from_aggs(agg_req.clone(), None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
searcher.search(&term_query, &collector).unwrap()
|
searcher.search(&term_query, &collector).unwrap()
|
||||||
@@ -1055,7 +1069,7 @@ mod tests {
|
|||||||
);
|
);
|
||||||
|
|
||||||
// Test empty result set
|
// Test empty result set
|
||||||
let collector = AggregationCollector::from_aggs(agg_req, None);
|
let collector = AggregationCollector::from_aggs(agg_req, None, index.schema());
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
searcher.search(&query_with_no_hits, &collector).unwrap();
|
searcher.search(&query_with_no_hits, &collector).unwrap();
|
||||||
|
|
||||||
@@ -1120,7 +1134,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
|
|
||||||
@@ -1233,7 +1247,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults =
|
let agg_res: AggregationResults =
|
||||||
@@ -1264,7 +1278,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults =
|
let agg_res: AggregationResults =
|
||||||
@@ -1295,7 +1309,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults =
|
let agg_res: AggregationResults =
|
||||||
@@ -1334,7 +1348,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults =
|
let agg_res: AggregationResults =
|
||||||
@@ -1363,7 +1377,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req, None);
|
let collector = AggregationCollector::from_aggs(agg_req, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults =
|
let agg_res: AggregationResults =
|
||||||
@@ -1392,7 +1406,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req, None);
|
let collector = AggregationCollector::from_aggs(agg_req, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults =
|
let agg_res: AggregationResults =
|
||||||
@@ -1429,7 +1443,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults =
|
let agg_res: AggregationResults =
|
||||||
@@ -1464,7 +1478,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults =
|
let agg_res: AggregationResults =
|
||||||
@@ -1503,7 +1517,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults =
|
let agg_res: AggregationResults =
|
||||||
@@ -1533,7 +1547,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults =
|
let agg_res: AggregationResults =
|
||||||
@@ -1590,7 +1604,7 @@ mod tests {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let collector = AggregationCollector::from_aggs(agg_req_1, None);
|
let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());
|
||||||
|
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let agg_res: AggregationResults =
|
let agg_res: AggregationResults =
|
||||||
|
|||||||
@@ -130,9 +130,7 @@ where
|
|||||||
|
|
||||||
let fast_field_reader = segment_reader
|
let fast_field_reader = segment_reader
|
||||||
.fast_fields()
|
.fast_fields()
|
||||||
.typed_fast_field_reader(self.field)?
|
.typed_fast_field_reader(self.field)?;
|
||||||
.to_full()
|
|
||||||
.expect("temp migration solution");
|
|
||||||
|
|
||||||
let segment_collector = self
|
let segment_collector = self
|
||||||
.collector
|
.collector
|
||||||
|
|||||||
@@ -112,11 +112,7 @@ impl Collector for HistogramCollector {
|
|||||||
_segment_local_id: crate::SegmentOrdinal,
|
_segment_local_id: crate::SegmentOrdinal,
|
||||||
segment: &crate::SegmentReader,
|
segment: &crate::SegmentReader,
|
||||||
) -> crate::Result<Self::Child> {
|
) -> crate::Result<Self::Child> {
|
||||||
let ff_reader = segment
|
let ff_reader = segment.fast_fields().u64_lenient(self.field)?;
|
||||||
.fast_fields()
|
|
||||||
.u64_lenient(self.field)?
|
|
||||||
.to_full()
|
|
||||||
.expect("temp migration solution");
|
|
||||||
Ok(SegmentHistogramCollector {
|
Ok(SegmentHistogramCollector {
|
||||||
histogram_computer: HistogramComputer {
|
histogram_computer: HistogramComputer {
|
||||||
counts: vec![0; self.num_buckets],
|
counts: vec![0; self.num_buckets],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use fastfield_codecs::OptionalColumn;
|
use fastfield_codecs::Column;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::collector::{Count, FilterCollector, TopDocs};
|
use crate::collector::{Count, FilterCollector, TopDocs};
|
||||||
@@ -160,7 +160,7 @@ pub struct FastFieldTestCollector {
|
|||||||
|
|
||||||
pub struct FastFieldSegmentCollector {
|
pub struct FastFieldSegmentCollector {
|
||||||
vals: Vec<u64>,
|
vals: Vec<u64>,
|
||||||
reader: Arc<dyn OptionalColumn<u64>>,
|
reader: Arc<dyn Column<u64>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FastFieldTestCollector {
|
impl FastFieldTestCollector {
|
||||||
@@ -202,9 +202,7 @@ impl SegmentCollector for FastFieldSegmentCollector {
|
|||||||
|
|
||||||
fn collect(&mut self, doc: DocId, _score: Score) {
|
fn collect(&mut self, doc: DocId, _score: Score) {
|
||||||
let val = self.reader.get_val(doc);
|
let val = self.reader.get_val(doc);
|
||||||
if let Some(val) = val {
|
self.vals.push(val);
|
||||||
self.vals.push(val);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn harvest(self) -> Vec<u64> {
|
fn harvest(self) -> Vec<u64> {
|
||||||
|
|||||||
@@ -156,9 +156,7 @@ impl CustomScorer<u64> for ScorerByField {
|
|||||||
// The conversion will then happen only on the top-K docs.
|
// The conversion will then happen only on the top-K docs.
|
||||||
let ff_reader = segment_reader
|
let ff_reader = segment_reader
|
||||||
.fast_fields()
|
.fast_fields()
|
||||||
.typed_fast_field_reader(self.field)?
|
.typed_fast_field_reader(self.field)?;
|
||||||
.to_full()
|
|
||||||
.expect("temp migration solution");
|
|
||||||
Ok(ScorerByFastFieldReader { ff_reader })
|
Ok(ScorerByFastFieldReader { ff_reader })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -460,7 +458,7 @@ impl TopDocs {
|
|||||||
///
|
///
|
||||||
/// // We can now define our actual scoring function
|
/// // We can now define our actual scoring function
|
||||||
/// move |doc: DocId, original_score: Score| {
|
/// move |doc: DocId, original_score: Score| {
|
||||||
/// let popularity: u64 = popularity_reader.get_val(doc).unwrap();
|
/// let popularity: u64 = popularity_reader.get_val(doc);
|
||||||
/// // Well.. For the sake of the example we use a simple logarithm
|
/// // Well.. For the sake of the example we use a simple logarithm
|
||||||
/// // function.
|
/// // function.
|
||||||
/// let popularity_boost_score = ((2u64 + popularity) as Score).log2();
|
/// let popularity_boost_score = ((2u64 + popularity) as Score).log2();
|
||||||
@@ -569,8 +567,8 @@ impl TopDocs {
|
|||||||
///
|
///
|
||||||
/// // We can now define our actual scoring function
|
/// // We can now define our actual scoring function
|
||||||
/// move |doc: DocId| {
|
/// move |doc: DocId| {
|
||||||
/// let popularity: u64 = popularity_reader.get_val(doc).unwrap();
|
/// let popularity: u64 = popularity_reader.get_val(doc);
|
||||||
/// let boosted: u64 = boosted_reader.get_val(doc).unwrap();
|
/// let boosted: u64 = boosted_reader.get_val(doc);
|
||||||
/// // Score do not have to be `f64` in tantivy.
|
/// // Score do not have to be `f64` in tantivy.
|
||||||
/// // Here we return a couple to get lexicographical order
|
/// // Here we return a couple to get lexicographical order
|
||||||
/// // for free.
|
/// // for free.
|
||||||
|
|||||||
@@ -149,7 +149,8 @@ impl IndexBuilder {
|
|||||||
/// Creates a new index using the [`RamDirectory`].
|
/// Creates a new index using the [`RamDirectory`].
|
||||||
///
|
///
|
||||||
/// The index will be allocated in anonymous memory.
|
/// The index will be allocated in anonymous memory.
|
||||||
/// This should only be used for unit tests.
|
/// This is useful for indexing small set of documents
|
||||||
|
/// for instances like unit test or temporary in memory index.
|
||||||
pub fn create_in_ram(self) -> Result<Index, TantivyError> {
|
pub fn create_in_ram(self) -> Result<Index, TantivyError> {
|
||||||
let ram_directory = RamDirectory::create();
|
let ram_directory = RamDirectory::create();
|
||||||
self.create(ram_directory)
|
self.create(ram_directory)
|
||||||
|
|||||||
@@ -30,8 +30,8 @@ pub use self::multivalued::{
|
|||||||
MultiValueIndex, MultiValueU128FastFieldWriter, MultiValuedFastFieldReader,
|
MultiValueIndex, MultiValueU128FastFieldWriter, MultiValuedFastFieldReader,
|
||||||
MultiValuedFastFieldWriter, MultiValuedU128FastFieldReader,
|
MultiValuedFastFieldWriter, MultiValuedU128FastFieldReader,
|
||||||
};
|
};
|
||||||
|
pub(crate) use self::readers::type_and_cardinality;
|
||||||
pub use self::readers::FastFieldReaders;
|
pub use self::readers::FastFieldReaders;
|
||||||
pub(crate) use self::readers::{type_and_cardinality, FastType};
|
|
||||||
pub use self::serializer::{Column, CompositeFastFieldSerializer};
|
pub use self::serializer::{Column, CompositeFastFieldSerializer};
|
||||||
use self::writer::unexpected_value;
|
use self::writer::unexpected_value;
|
||||||
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
||||||
@@ -207,10 +207,10 @@ mod tests {
|
|||||||
serializer.close().unwrap();
|
serializer.close().unwrap();
|
||||||
}
|
}
|
||||||
let file = directory.open_read(path).unwrap();
|
let file = directory.open_read(path).unwrap();
|
||||||
assert_eq!(file.len(), 25);
|
assert_eq!(file.len(), 34);
|
||||||
let composite_file = CompositeFile::open(&file)?;
|
let composite_file = CompositeFile::open(&file)?;
|
||||||
let fast_field_bytes = composite_file.open_read(*FIELD).unwrap().read_bytes()?;
|
let fast_field_bytes = composite_file.open_read(*FIELD).unwrap().read_bytes()?;
|
||||||
let fast_field_reader = open::<u64>(fast_field_bytes)?.to_full().unwrap();
|
let fast_field_reader = open::<u64>(fast_field_bytes)?;
|
||||||
assert_eq!(fast_field_reader.get_val(0), 13u64);
|
assert_eq!(fast_field_reader.get_val(0), 13u64);
|
||||||
assert_eq!(fast_field_reader.get_val(1), 14u64);
|
assert_eq!(fast_field_reader.get_val(1), 14u64);
|
||||||
assert_eq!(fast_field_reader.get_val(2), 2u64);
|
assert_eq!(fast_field_reader.get_val(2), 2u64);
|
||||||
@@ -256,14 +256,14 @@ mod tests {
|
|||||||
serializer.close()?;
|
serializer.close()?;
|
||||||
}
|
}
|
||||||
let file = directory.open_read(path)?;
|
let file = directory.open_read(path)?;
|
||||||
assert_eq!(file.len(), 53);
|
assert_eq!(file.len(), 62);
|
||||||
{
|
{
|
||||||
let fast_fields_composite = CompositeFile::open(&file)?;
|
let fast_fields_composite = CompositeFile::open(&file)?;
|
||||||
let data = fast_fields_composite
|
let data = fast_fields_composite
|
||||||
.open_read(*FIELD)
|
.open_read(*FIELD)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.read_bytes()?;
|
.read_bytes()?;
|
||||||
let fast_field_reader = open::<u64>(data)?.to_full().unwrap();
|
let fast_field_reader = open::<u64>(data)?;
|
||||||
assert_eq!(fast_field_reader.get_val(0), 4u64);
|
assert_eq!(fast_field_reader.get_val(0), 4u64);
|
||||||
assert_eq!(fast_field_reader.get_val(1), 14_082_001u64);
|
assert_eq!(fast_field_reader.get_val(1), 14_082_001u64);
|
||||||
assert_eq!(fast_field_reader.get_val(2), 3_052u64);
|
assert_eq!(fast_field_reader.get_val(2), 3_052u64);
|
||||||
@@ -297,14 +297,14 @@ mod tests {
|
|||||||
serializer.close().unwrap();
|
serializer.close().unwrap();
|
||||||
}
|
}
|
||||||
let file = directory.open_read(path).unwrap();
|
let file = directory.open_read(path).unwrap();
|
||||||
assert_eq!(file.len(), 26);
|
assert_eq!(file.len(), 35);
|
||||||
{
|
{
|
||||||
let fast_fields_composite = CompositeFile::open(&file).unwrap();
|
let fast_fields_composite = CompositeFile::open(&file).unwrap();
|
||||||
let data = fast_fields_composite
|
let data = fast_fields_composite
|
||||||
.open_read(*FIELD)
|
.open_read(*FIELD)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.read_bytes()?;
|
.read_bytes()?;
|
||||||
let fast_field_reader = open::<u64>(data)?.to_full().unwrap();
|
let fast_field_reader = open::<u64>(data)?;
|
||||||
for doc in 0..10_000 {
|
for doc in 0..10_000 {
|
||||||
assert_eq!(fast_field_reader.get_val(doc), 100_000u64);
|
assert_eq!(fast_field_reader.get_val(doc), 100_000u64);
|
||||||
}
|
}
|
||||||
@@ -336,14 +336,14 @@ mod tests {
|
|||||||
serializer.close().unwrap();
|
serializer.close().unwrap();
|
||||||
}
|
}
|
||||||
let file = directory.open_read(path).unwrap();
|
let file = directory.open_read(path).unwrap();
|
||||||
assert_eq!(file.len(), 80040);
|
assert_eq!(file.len(), 80049);
|
||||||
{
|
{
|
||||||
let fast_fields_composite = CompositeFile::open(&file)?;
|
let fast_fields_composite = CompositeFile::open(&file)?;
|
||||||
let data = fast_fields_composite
|
let data = fast_fields_composite
|
||||||
.open_read(*FIELD)
|
.open_read(*FIELD)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.read_bytes()?;
|
.read_bytes()?;
|
||||||
let fast_field_reader = open::<u64>(data)?.to_full().unwrap();
|
let fast_field_reader = open::<u64>(data)?;
|
||||||
assert_eq!(fast_field_reader.get_val(0), 0u64);
|
assert_eq!(fast_field_reader.get_val(0), 0u64);
|
||||||
for doc in 1..10_001 {
|
for doc in 1..10_001 {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -378,7 +378,7 @@ mod tests {
|
|||||||
serializer.close().unwrap();
|
serializer.close().unwrap();
|
||||||
}
|
}
|
||||||
let file = directory.open_read(path).unwrap();
|
let file = directory.open_read(path).unwrap();
|
||||||
assert_eq!(file.len(), 40_usize);
|
assert_eq!(file.len(), 49_usize);
|
||||||
|
|
||||||
{
|
{
|
||||||
let fast_fields_composite = CompositeFile::open(&file)?;
|
let fast_fields_composite = CompositeFile::open(&file)?;
|
||||||
@@ -386,7 +386,7 @@ mod tests {
|
|||||||
.open_read(i64_field)
|
.open_read(i64_field)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.read_bytes()?;
|
.read_bytes()?;
|
||||||
let fast_field_reader = open::<i64>(data)?.to_full().unwrap();
|
let fast_field_reader = open::<i64>(data)?;
|
||||||
|
|
||||||
assert_eq!(fast_field_reader.min_value(), -100i64);
|
assert_eq!(fast_field_reader.min_value(), -100i64);
|
||||||
assert_eq!(fast_field_reader.max_value(), 9_999i64);
|
assert_eq!(fast_field_reader.max_value(), 9_999i64);
|
||||||
@@ -429,7 +429,7 @@ mod tests {
|
|||||||
.open_read(i64_field)
|
.open_read(i64_field)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.read_bytes()?;
|
.read_bytes()?;
|
||||||
let fast_field_reader = open::<i64>(data)?.to_full().unwrap();
|
let fast_field_reader = open::<i64>(data)?;
|
||||||
assert_eq!(fast_field_reader.get_val(0), 0i64);
|
assert_eq!(fast_field_reader.get_val(0), 0i64);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -470,7 +470,7 @@ mod tests {
|
|||||||
.open_read(*FIELD)
|
.open_read(*FIELD)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.read_bytes()?;
|
.read_bytes()?;
|
||||||
let fast_field_reader = open::<u64>(data)?.to_full().unwrap();
|
let fast_field_reader = open::<u64>(data)?;
|
||||||
|
|
||||||
for a in 0..n {
|
for a in 0..n {
|
||||||
assert_eq!(fast_field_reader.get_val(a as u32), permutation[a as usize]);
|
assert_eq!(fast_field_reader.get_val(a as u32), permutation[a as usize]);
|
||||||
@@ -763,28 +763,19 @@ mod tests {
|
|||||||
let dates_fast_field = fast_fields.dates(multi_date_field).unwrap();
|
let dates_fast_field = fast_fields.dates(multi_date_field).unwrap();
|
||||||
let mut dates = vec![];
|
let mut dates = vec![];
|
||||||
{
|
{
|
||||||
assert_eq!(
|
assert_eq!(date_fast_field.get_val(0).into_timestamp_micros(), 1i64);
|
||||||
date_fast_field.get_val(0).unwrap().into_timestamp_micros(),
|
|
||||||
1i64
|
|
||||||
);
|
|
||||||
dates_fast_field.get_vals(0u32, &mut dates);
|
dates_fast_field.get_vals(0u32, &mut dates);
|
||||||
assert_eq!(dates.len(), 2);
|
assert_eq!(dates.len(), 2);
|
||||||
assert_eq!(dates[0].into_timestamp_micros(), 2i64);
|
assert_eq!(dates[0].into_timestamp_micros(), 2i64);
|
||||||
assert_eq!(dates[1].into_timestamp_micros(), 3i64);
|
assert_eq!(dates[1].into_timestamp_micros(), 3i64);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
assert_eq!(
|
assert_eq!(date_fast_field.get_val(1).into_timestamp_micros(), 4i64);
|
||||||
date_fast_field.get_val(1).unwrap().into_timestamp_micros(),
|
|
||||||
4i64
|
|
||||||
);
|
|
||||||
dates_fast_field.get_vals(1u32, &mut dates);
|
dates_fast_field.get_vals(1u32, &mut dates);
|
||||||
assert!(dates.is_empty());
|
assert!(dates.is_empty());
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
assert_eq!(
|
assert_eq!(date_fast_field.get_val(2).into_timestamp_micros(), 0i64);
|
||||||
date_fast_field.get_val(2).unwrap().into_timestamp_micros(),
|
|
||||||
0i64
|
|
||||||
);
|
|
||||||
dates_fast_field.get_vals(2u32, &mut dates);
|
dates_fast_field.get_vals(2u32, &mut dates);
|
||||||
assert_eq!(dates.len(), 2);
|
assert_eq!(dates.len(), 2);
|
||||||
assert_eq!(dates[0].into_timestamp_micros(), 5i64);
|
assert_eq!(dates[0].into_timestamp_micros(), 5i64);
|
||||||
@@ -831,10 +822,10 @@ mod tests {
|
|||||||
serializer.close().unwrap();
|
serializer.close().unwrap();
|
||||||
}
|
}
|
||||||
let file = directory.open_read(path).unwrap();
|
let file = directory.open_read(path).unwrap();
|
||||||
assert_eq!(file.len(), 24);
|
assert_eq!(file.len(), 33);
|
||||||
let composite_file = CompositeFile::open(&file)?;
|
let composite_file = CompositeFile::open(&file)?;
|
||||||
let data = composite_file.open_read(field).unwrap().read_bytes()?;
|
let data = composite_file.open_read(field).unwrap().read_bytes()?;
|
||||||
let fast_field_reader = open::<bool>(data)?.to_full().unwrap();
|
let fast_field_reader = open::<bool>(data)?;
|
||||||
assert_eq!(fast_field_reader.get_val(0), true);
|
assert_eq!(fast_field_reader.get_val(0), true);
|
||||||
assert_eq!(fast_field_reader.get_val(1), false);
|
assert_eq!(fast_field_reader.get_val(1), false);
|
||||||
assert_eq!(fast_field_reader.get_val(2), true);
|
assert_eq!(fast_field_reader.get_val(2), true);
|
||||||
@@ -869,10 +860,10 @@ mod tests {
|
|||||||
serializer.close().unwrap();
|
serializer.close().unwrap();
|
||||||
}
|
}
|
||||||
let file = directory.open_read(path).unwrap();
|
let file = directory.open_read(path).unwrap();
|
||||||
assert_eq!(file.len(), 36);
|
assert_eq!(file.len(), 45);
|
||||||
let composite_file = CompositeFile::open(&file)?;
|
let composite_file = CompositeFile::open(&file)?;
|
||||||
let data = composite_file.open_read(field).unwrap().read_bytes()?;
|
let data = composite_file.open_read(field).unwrap().read_bytes()?;
|
||||||
let fast_field_reader = open::<bool>(data)?.to_full().unwrap();
|
let fast_field_reader = open::<bool>(data)?;
|
||||||
for i in 0..25 {
|
for i in 0..25 {
|
||||||
assert_eq!(fast_field_reader.get_val(i * 2), true);
|
assert_eq!(fast_field_reader.get_val(i * 2), true);
|
||||||
assert_eq!(fast_field_reader.get_val(i * 2 + 1), false);
|
assert_eq!(fast_field_reader.get_val(i * 2 + 1), false);
|
||||||
@@ -901,9 +892,9 @@ mod tests {
|
|||||||
}
|
}
|
||||||
let file = directory.open_read(path).unwrap();
|
let file = directory.open_read(path).unwrap();
|
||||||
let composite_file = CompositeFile::open(&file)?;
|
let composite_file = CompositeFile::open(&file)?;
|
||||||
assert_eq!(file.len(), 23);
|
assert_eq!(file.len(), 32);
|
||||||
let data = composite_file.open_read(field).unwrap().read_bytes()?;
|
let data = composite_file.open_read(field).unwrap().read_bytes()?;
|
||||||
let fast_field_reader = open::<bool>(data)?.to_full().unwrap();
|
let fast_field_reader = open::<bool>(data)?;
|
||||||
assert_eq!(fast_field_reader.get_val(0), false);
|
assert_eq!(fast_field_reader.get_val(0), false);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -935,10 +926,10 @@ mod tests {
|
|||||||
pub fn test_gcd_date() -> crate::Result<()> {
|
pub fn test_gcd_date() -> crate::Result<()> {
|
||||||
let size_prec_sec =
|
let size_prec_sec =
|
||||||
test_gcd_date_with_codec(FastFieldCodecType::Bitpacked, DatePrecision::Seconds)?;
|
test_gcd_date_with_codec(FastFieldCodecType::Bitpacked, DatePrecision::Seconds)?;
|
||||||
assert_eq!(size_prec_sec, 28 + (1_000 * 13) / 8); // 13 bits per val = ceil(log_2(number of seconds in 2hours);
|
assert_eq!(size_prec_sec, 5 + 4 + 28 + (1_000 * 13) / 8); // 13 bits per val = ceil(log_2(number of seconds in 2hours);
|
||||||
let size_prec_micro =
|
let size_prec_micro =
|
||||||
test_gcd_date_with_codec(FastFieldCodecType::Bitpacked, DatePrecision::Microseconds)?;
|
test_gcd_date_with_codec(FastFieldCodecType::Bitpacked, DatePrecision::Microseconds)?;
|
||||||
assert_eq!(size_prec_micro, 26 + (1_000 * 33) / 8); // 33 bits per val = ceil(log_2(number of microsecsseconds in 2hours);
|
assert_eq!(size_prec_micro, 5 + 4 + 26 + (1_000 * 33) / 8); // 33 bits per val = ceil(log_2(number of microsecsseconds in 2hours);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -971,9 +962,7 @@ mod tests {
|
|||||||
let composite_file = CompositeFile::open(&file)?;
|
let composite_file = CompositeFile::open(&file)?;
|
||||||
let file = composite_file.open_read(*FIELD).unwrap();
|
let file = composite_file.open_read(*FIELD).unwrap();
|
||||||
let len = file.len();
|
let len = file.len();
|
||||||
let test_fastfield = open::<DateTime>(file.read_bytes()?)?
|
let test_fastfield = open::<DateTime>(file.read_bytes()?)?;
|
||||||
.to_full()
|
|
||||||
.expect("temp migration solution");
|
|
||||||
|
|
||||||
for (i, time) in times.iter().enumerate() {
|
for (i, time) in times.iter().enumerate() {
|
||||||
assert_eq!(test_fastfield.get_val(i as u32), time.truncate(precision));
|
assert_eq!(test_fastfield.get_val(i as u32), time.truncate(precision));
|
||||||
|
|||||||
@@ -533,17 +533,14 @@ mod bench {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
.read_bytes()
|
.read_bytes()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let idx_reader = fastfield_codecs::open(data_idx).unwrap().to_full().unwrap();
|
let idx_reader = fastfield_codecs::open(data_idx).unwrap();
|
||||||
|
|
||||||
let data_vals = fast_fields_composite
|
let data_vals = fast_fields_composite
|
||||||
.open_read_with_idx(field, 1)
|
.open_read_with_idx(field, 1)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.read_bytes()
|
.read_bytes()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let vals_reader = fastfield_codecs::open(data_vals)
|
let vals_reader = fastfield_codecs::open(data_vals).unwrap();
|
||||||
.unwrap()
|
|
||||||
.to_full()
|
|
||||||
.unwrap();
|
|
||||||
let fast_field_reader = MultiValuedFastFieldReader::open(idx_reader, vals_reader);
|
let fast_field_reader = MultiValuedFastFieldReader::open(idx_reader, vals_reader);
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
let mut sum = 0u64;
|
let mut sum = 0u64;
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use std::net::Ipv6Addr;
|
use std::net::Ipv6Addr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use fastfield_codecs::{open, open_u128, Column, OptionalColumn};
|
use fastfield_codecs::{open, open_u128, Column};
|
||||||
|
|
||||||
use super::multivalued::MultiValuedU128FastFieldReader;
|
use super::multivalued::MultiValuedU128FastFieldReader;
|
||||||
use crate::directory::{CompositeFile, FileSlice};
|
use crate::directory::{CompositeFile, FileSlice};
|
||||||
@@ -118,7 +118,7 @@ impl FastFieldReaders {
|
|||||||
&self,
|
&self,
|
||||||
field: Field,
|
field: Field,
|
||||||
index: usize,
|
index: usize,
|
||||||
) -> crate::Result<Arc<dyn OptionalColumn<TFastValue>>> {
|
) -> crate::Result<Arc<dyn Column<TFastValue>>> {
|
||||||
let fast_field_slice = self.fast_field_data(field, index)?;
|
let fast_field_slice = self.fast_field_data(field, index)?;
|
||||||
let bytes = fast_field_slice.read_bytes()?;
|
let bytes = fast_field_slice.read_bytes()?;
|
||||||
let column = fastfield_codecs::open(bytes)?;
|
let column = fastfield_codecs::open(bytes)?;
|
||||||
@@ -128,7 +128,7 @@ impl FastFieldReaders {
|
|||||||
pub(crate) fn typed_fast_field_reader<TFastValue: FastValue>(
|
pub(crate) fn typed_fast_field_reader<TFastValue: FastValue>(
|
||||||
&self,
|
&self,
|
||||||
field: Field,
|
field: Field,
|
||||||
) -> crate::Result<Arc<dyn OptionalColumn<TFastValue>>> {
|
) -> crate::Result<Arc<dyn Column<TFastValue>>> {
|
||||||
self.typed_fast_field_reader_with_idx(field, 0)
|
self.typed_fast_field_reader_with_idx(field, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,20 +138,13 @@ impl FastFieldReaders {
|
|||||||
) -> crate::Result<MultiValuedFastFieldReader<TFastValue>> {
|
) -> crate::Result<MultiValuedFastFieldReader<TFastValue>> {
|
||||||
let idx_reader = self.typed_fast_field_reader(field)?;
|
let idx_reader = self.typed_fast_field_reader(field)?;
|
||||||
let vals_reader = self.typed_fast_field_reader_with_idx(field, 1)?;
|
let vals_reader = self.typed_fast_field_reader_with_idx(field, 1)?;
|
||||||
Ok(MultiValuedFastFieldReader::open(
|
Ok(MultiValuedFastFieldReader::open(idx_reader, vals_reader))
|
||||||
idx_reader
|
|
||||||
.to_full()
|
|
||||||
.expect("multivalue fast field are always full"),
|
|
||||||
vals_reader
|
|
||||||
.to_full()
|
|
||||||
.expect("multivalue fast field are always full"),
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the `u64` fast field reader reader associated with `field`.
|
/// Returns the `u64` fast field reader reader associated with `field`.
|
||||||
///
|
///
|
||||||
/// If `field` is not a u64 fast field, this method returns an Error.
|
/// If `field` is not a u64 fast field, this method returns an Error.
|
||||||
pub fn u64(&self, field: Field) -> crate::Result<Arc<dyn OptionalColumn<u64>>> {
|
pub fn u64(&self, field: Field) -> crate::Result<Arc<dyn Column<u64>>> {
|
||||||
self.check_type(field, FastType::U64, Cardinality::SingleValue)?;
|
self.check_type(field, FastType::U64, Cardinality::SingleValue)?;
|
||||||
self.typed_fast_field_reader(field)
|
self.typed_fast_field_reader(field)
|
||||||
}
|
}
|
||||||
@@ -159,7 +152,7 @@ impl FastFieldReaders {
|
|||||||
/// Returns the `ip` fast field reader reader associated to `field`.
|
/// Returns the `ip` fast field reader reader associated to `field`.
|
||||||
///
|
///
|
||||||
/// If `field` is not a u128 fast field, this method returns an Error.
|
/// If `field` is not a u128 fast field, this method returns an Error.
|
||||||
pub fn ip_addr(&self, field: Field) -> crate::Result<Arc<dyn OptionalColumn<Ipv6Addr>>> {
|
pub fn ip_addr(&self, field: Field) -> crate::Result<Arc<dyn Column<Ipv6Addr>>> {
|
||||||
self.check_type(field, FastType::U128, Cardinality::SingleValue)?;
|
self.check_type(field, FastType::U128, Cardinality::SingleValue)?;
|
||||||
let bytes = self.fast_field_data(field, 0)?.read_bytes()?;
|
let bytes = self.fast_field_data(field, 0)?.read_bytes()?;
|
||||||
Ok(open_u128::<Ipv6Addr>(bytes)?)
|
Ok(open_u128::<Ipv6Addr>(bytes)?)
|
||||||
@@ -173,15 +166,10 @@ impl FastFieldReaders {
|
|||||||
field: Field,
|
field: Field,
|
||||||
) -> crate::Result<MultiValuedU128FastFieldReader<Ipv6Addr>> {
|
) -> crate::Result<MultiValuedU128FastFieldReader<Ipv6Addr>> {
|
||||||
self.check_type(field, FastType::U128, Cardinality::MultiValues)?;
|
self.check_type(field, FastType::U128, Cardinality::MultiValues)?;
|
||||||
let idx_reader: Arc<dyn Column<u64>> = self
|
let idx_reader: Arc<dyn Column<u64>> = self.typed_fast_field_reader(field)?;
|
||||||
.typed_fast_field_reader(field)?
|
|
||||||
.to_full()
|
|
||||||
.expect("multivalue fast fields are always full");
|
|
||||||
|
|
||||||
let bytes = self.fast_field_data(field, 1)?.read_bytes()?;
|
let bytes = self.fast_field_data(field, 1)?.read_bytes()?;
|
||||||
let vals_reader = open_u128::<Ipv6Addr>(bytes)?
|
let vals_reader = open_u128::<Ipv6Addr>(bytes)?;
|
||||||
.to_full()
|
|
||||||
.expect("multivalue fields are always full");
|
|
||||||
|
|
||||||
Ok(MultiValuedU128FastFieldReader::open(
|
Ok(MultiValuedU128FastFieldReader::open(
|
||||||
idx_reader,
|
idx_reader,
|
||||||
@@ -191,9 +179,8 @@ impl FastFieldReaders {
|
|||||||
|
|
||||||
/// Returns the `u128` fast field reader reader associated to `field`.
|
/// Returns the `u128` fast field reader reader associated to `field`.
|
||||||
///
|
///
|
||||||
/// If `field` is not a u128 base type fast field, this method returns an Error.
|
/// If `field` is not a u128 fast field, this method returns an Error.
|
||||||
/// Ip addresses use u128 as base type.
|
pub(crate) fn u128(&self, field: Field) -> crate::Result<Arc<dyn Column<u128>>> {
|
||||||
pub(crate) fn u128(&self, field: Field) -> crate::Result<Arc<dyn OptionalColumn<u128>>> {
|
|
||||||
self.check_type(field, FastType::U128, Cardinality::SingleValue)?;
|
self.check_type(field, FastType::U128, Cardinality::SingleValue)?;
|
||||||
let bytes = self.fast_field_data(field, 0)?.read_bytes()?;
|
let bytes = self.fast_field_data(field, 0)?.read_bytes()?;
|
||||||
Ok(open_u128::<u128>(bytes)?)
|
Ok(open_u128::<u128>(bytes)?)
|
||||||
@@ -204,15 +191,10 @@ impl FastFieldReaders {
|
|||||||
/// If `field` is not a u128 multi-valued fast field, this method returns an Error.
|
/// If `field` is not a u128 multi-valued fast field, this method returns an Error.
|
||||||
pub fn u128s(&self, field: Field) -> crate::Result<MultiValuedU128FastFieldReader<u128>> {
|
pub fn u128s(&self, field: Field) -> crate::Result<MultiValuedU128FastFieldReader<u128>> {
|
||||||
self.check_type(field, FastType::U128, Cardinality::MultiValues)?;
|
self.check_type(field, FastType::U128, Cardinality::MultiValues)?;
|
||||||
let idx_reader: Arc<dyn Column<u64>> = self
|
let idx_reader: Arc<dyn Column<u64>> = self.typed_fast_field_reader(field)?;
|
||||||
.typed_fast_field_reader(field)?
|
|
||||||
.to_full()
|
|
||||||
.expect("multivalue fast fields are always full");
|
|
||||||
|
|
||||||
let bytes = self.fast_field_data(field, 1)?.read_bytes()?;
|
let bytes = self.fast_field_data(field, 1)?.read_bytes()?;
|
||||||
let vals_reader = open_u128::<u128>(bytes)?
|
let vals_reader = open_u128::<u128>(bytes)?;
|
||||||
.to_full()
|
|
||||||
.expect("multivalue fast fields are always full");
|
|
||||||
|
|
||||||
Ok(MultiValuedU128FastFieldReader::open(
|
Ok(MultiValuedU128FastFieldReader::open(
|
||||||
idx_reader,
|
idx_reader,
|
||||||
@@ -225,14 +207,14 @@ impl FastFieldReaders {
|
|||||||
///
|
///
|
||||||
/// If not, the fastfield reader will returns the u64-value associated with the original
|
/// If not, the fastfield reader will returns the u64-value associated with the original
|
||||||
/// FastValue.
|
/// FastValue.
|
||||||
pub fn u64_lenient(&self, field: Field) -> crate::Result<Arc<dyn OptionalColumn<u64>>> {
|
pub fn u64_lenient(&self, field: Field) -> crate::Result<Arc<dyn Column<u64>>> {
|
||||||
self.typed_fast_field_reader(field)
|
self.typed_fast_field_reader(field)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the `i64` fast field reader reader associated with `field`.
|
/// Returns the `i64` fast field reader reader associated with `field`.
|
||||||
///
|
///
|
||||||
/// If `field` is not a i64 fast field, this method returns an Error.
|
/// If `field` is not a i64 fast field, this method returns an Error.
|
||||||
pub fn i64(&self, field: Field) -> crate::Result<Arc<dyn OptionalColumn<i64>>> {
|
pub fn i64(&self, field: Field) -> crate::Result<Arc<dyn Column<i64>>> {
|
||||||
self.check_type(field, FastType::I64, Cardinality::SingleValue)?;
|
self.check_type(field, FastType::I64, Cardinality::SingleValue)?;
|
||||||
self.typed_fast_field_reader(field)
|
self.typed_fast_field_reader(field)
|
||||||
}
|
}
|
||||||
@@ -240,7 +222,7 @@ impl FastFieldReaders {
|
|||||||
/// Returns the `date` fast field reader reader associated with `field`.
|
/// Returns the `date` fast field reader reader associated with `field`.
|
||||||
///
|
///
|
||||||
/// If `field` is not a date fast field, this method returns an Error.
|
/// If `field` is not a date fast field, this method returns an Error.
|
||||||
pub fn date(&self, field: Field) -> crate::Result<Arc<dyn OptionalColumn<DateTime>>> {
|
pub fn date(&self, field: Field) -> crate::Result<Arc<dyn Column<DateTime>>> {
|
||||||
self.check_type(field, FastType::Date, Cardinality::SingleValue)?;
|
self.check_type(field, FastType::Date, Cardinality::SingleValue)?;
|
||||||
self.typed_fast_field_reader(field)
|
self.typed_fast_field_reader(field)
|
||||||
}
|
}
|
||||||
@@ -248,7 +230,7 @@ impl FastFieldReaders {
|
|||||||
/// Returns the `f64` fast field reader reader associated with `field`.
|
/// Returns the `f64` fast field reader reader associated with `field`.
|
||||||
///
|
///
|
||||||
/// If `field` is not a f64 fast field, this method returns an Error.
|
/// If `field` is not a f64 fast field, this method returns an Error.
|
||||||
pub fn f64(&self, field: Field) -> crate::Result<Arc<dyn OptionalColumn<f64>>> {
|
pub fn f64(&self, field: Field) -> crate::Result<Arc<dyn Column<f64>>> {
|
||||||
self.check_type(field, FastType::F64, Cardinality::SingleValue)?;
|
self.check_type(field, FastType::F64, Cardinality::SingleValue)?;
|
||||||
self.typed_fast_field_reader(field)
|
self.typed_fast_field_reader(field)
|
||||||
}
|
}
|
||||||
@@ -256,7 +238,7 @@ impl FastFieldReaders {
|
|||||||
/// Returns the `bool` fast field reader reader associated with `field`.
|
/// Returns the `bool` fast field reader reader associated with `field`.
|
||||||
///
|
///
|
||||||
/// If `field` is not a bool fast field, this method returns an Error.
|
/// If `field` is not a bool fast field, this method returns an Error.
|
||||||
pub fn bool(&self, field: Field) -> crate::Result<Arc<dyn OptionalColumn<bool>>> {
|
pub fn bool(&self, field: Field) -> crate::Result<Arc<dyn Column<bool>>> {
|
||||||
self.check_type(field, FastType::Bool, Cardinality::SingleValue)?;
|
self.check_type(field, FastType::Bool, Cardinality::SingleValue)?;
|
||||||
self.typed_fast_field_reader(field)
|
self.typed_fast_field_reader(field)
|
||||||
}
|
}
|
||||||
@@ -327,12 +309,7 @@ impl FastFieldReaders {
|
|||||||
let fast_field_idx_bytes = fast_field_idx_file.read_bytes()?;
|
let fast_field_idx_bytes = fast_field_idx_file.read_bytes()?;
|
||||||
let idx_reader = open(fast_field_idx_bytes)?;
|
let idx_reader = open(fast_field_idx_bytes)?;
|
||||||
let data = self.fast_field_data(field, 1)?;
|
let data = self.fast_field_data(field, 1)?;
|
||||||
BytesFastFieldReader::open(
|
BytesFastFieldReader::open(idx_reader, data)
|
||||||
idx_reader
|
|
||||||
.to_full()
|
|
||||||
.expect("multivalue fields are always full"),
|
|
||||||
data,
|
|
||||||
)
|
|
||||||
} else {
|
} else {
|
||||||
Err(FastFieldNotAvailableError::new(field_entry).into())
|
Err(FastFieldNotAvailableError::new(field_entry).into())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -465,9 +465,9 @@ mod tests_indexsorting {
|
|||||||
let my_number = index.schema().get_field("my_number").unwrap();
|
let my_number = index.schema().get_field("my_number").unwrap();
|
||||||
|
|
||||||
let fast_field = fast_fields.u64(my_number).unwrap();
|
let fast_field = fast_fields.u64(my_number).unwrap();
|
||||||
assert_eq!(fast_field.get_val(0), Some(10u64));
|
assert_eq!(fast_field.get_val(0), 10u64);
|
||||||
assert_eq!(fast_field.get_val(1), Some(20u64));
|
assert_eq!(fast_field.get_val(1), 20u64);
|
||||||
assert_eq!(fast_field.get_val(2), Some(30u64));
|
assert_eq!(fast_field.get_val(2), 30u64);
|
||||||
|
|
||||||
let multi_numbers = index.schema().get_field("multi_numbers").unwrap();
|
let multi_numbers = index.schema().get_field("multi_numbers").unwrap();
|
||||||
let multifield = fast_fields.u64s(multi_numbers).unwrap();
|
let multifield = fast_fields.u64s(multi_numbers).unwrap();
|
||||||
|
|||||||
@@ -1467,7 +1467,7 @@ mod tests {
|
|||||||
let fast_field_reader = segment_reader.fast_fields().u64(id_field)?;
|
let fast_field_reader = segment_reader.fast_fields().u64(id_field)?;
|
||||||
let in_order_alive_ids: Vec<u64> = segment_reader
|
let in_order_alive_ids: Vec<u64> = segment_reader
|
||||||
.doc_ids_alive()
|
.doc_ids_alive()
|
||||||
.map(|doc| fast_field_reader.get_val(doc).unwrap())
|
.map(|doc| fast_field_reader.get_val(doc))
|
||||||
.collect();
|
.collect();
|
||||||
assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 1, 0]);
|
assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 1, 0]);
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -1528,7 +1528,7 @@ mod tests {
|
|||||||
let fast_field_reader = segment_reader.fast_fields().u64(id_field)?;
|
let fast_field_reader = segment_reader.fast_fields().u64(id_field)?;
|
||||||
let in_order_alive_ids: Vec<u64> = segment_reader
|
let in_order_alive_ids: Vec<u64> = segment_reader
|
||||||
.doc_ids_alive()
|
.doc_ids_alive()
|
||||||
.map(|doc| fast_field_reader.get_val(doc).unwrap())
|
.map(|doc| fast_field_reader.get_val(doc))
|
||||||
.collect();
|
.collect();
|
||||||
assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 2, 0]);
|
assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 2, 0]);
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -1777,12 +1777,7 @@ mod tests {
|
|||||||
.segment_readers()
|
.segment_readers()
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|segment_reader| {
|
.flat_map(|segment_reader| {
|
||||||
let ff_reader = segment_reader
|
let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap();
|
||||||
.fast_fields()
|
|
||||||
.u64(id_field)
|
|
||||||
.unwrap()
|
|
||||||
.to_full()
|
|
||||||
.unwrap();
|
|
||||||
segment_reader
|
segment_reader
|
||||||
.doc_ids_alive()
|
.doc_ids_alive()
|
||||||
.map(move |doc| ff_reader.get_val(doc))
|
.map(move |doc| ff_reader.get_val(doc))
|
||||||
@@ -1793,12 +1788,7 @@ mod tests {
|
|||||||
.segment_readers()
|
.segment_readers()
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|segment_reader| {
|
.flat_map(|segment_reader| {
|
||||||
let ff_reader = segment_reader
|
let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap();
|
||||||
.fast_fields()
|
|
||||||
.u64(id_field)
|
|
||||||
.unwrap()
|
|
||||||
.to_full()
|
|
||||||
.unwrap();
|
|
||||||
segment_reader
|
segment_reader
|
||||||
.doc_ids_alive()
|
.doc_ids_alive()
|
||||||
.map(move |doc| ff_reader.get_val(doc))
|
.map(move |doc| ff_reader.get_val(doc))
|
||||||
@@ -1874,7 +1864,7 @@ mod tests {
|
|||||||
.flat_map(|segment_reader| {
|
.flat_map(|segment_reader| {
|
||||||
let ff_reader = segment_reader.fast_fields().ip_addr(ip_field).unwrap();
|
let ff_reader = segment_reader.fast_fields().ip_addr(ip_field).unwrap();
|
||||||
segment_reader.doc_ids_alive().flat_map(move |doc| {
|
segment_reader.doc_ids_alive().flat_map(move |doc| {
|
||||||
let val = ff_reader.get_val(doc).unwrap(); // TODO handle null
|
let val = ff_reader.get_val(doc);
|
||||||
if val == Ipv6Addr::from_u128(0) {
|
if val == Ipv6Addr::from_u128(0) {
|
||||||
// TODO Fix null handling
|
// TODO Fix null handling
|
||||||
None
|
None
|
||||||
@@ -1931,7 +1921,7 @@ mod tests {
|
|||||||
ff_reader.get_vals(doc, &mut vals);
|
ff_reader.get_vals(doc, &mut vals);
|
||||||
assert_eq!(vals.len(), 2);
|
assert_eq!(vals.len(), 2);
|
||||||
assert_eq!(vals[0], vals[1]);
|
assert_eq!(vals[0], vals[1]);
|
||||||
assert_eq!(id_reader.get_val(doc).unwrap(), vals[0]);
|
assert_eq!(id_reader.get_val(doc), vals[0]);
|
||||||
|
|
||||||
let mut bool_vals = vec![];
|
let mut bool_vals = vec![];
|
||||||
bool_ff_reader.get_vals(doc, &mut bool_vals);
|
bool_ff_reader.get_vals(doc, &mut bool_vals);
|
||||||
@@ -2127,7 +2117,7 @@ mod tests {
|
|||||||
facet_reader
|
facet_reader
|
||||||
.facet_from_ord(facet_ords[0], &mut facet)
|
.facet_from_ord(facet_ords[0], &mut facet)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let id = ff_reader.get_val(doc_id).unwrap();
|
let id = ff_reader.get_val(doc_id);
|
||||||
let facet_expected = Facet::from(&("/cola/".to_string() + &id.to_string()));
|
let facet_expected = Facet::from(&("/cola/".to_string() + &id.to_string()));
|
||||||
|
|
||||||
assert_eq!(facet, facet_expected);
|
assert_eq!(facet, facet_expected);
|
||||||
|
|||||||
@@ -67,11 +67,12 @@ pub(crate) fn index_json_values<'a>(
|
|||||||
doc: DocId,
|
doc: DocId,
|
||||||
json_values: impl Iterator<Item = crate::Result<&'a serde_json::Map<String, serde_json::Value>>>,
|
json_values: impl Iterator<Item = crate::Result<&'a serde_json::Map<String, serde_json::Value>>>,
|
||||||
text_analyzer: &TextAnalyzer,
|
text_analyzer: &TextAnalyzer,
|
||||||
|
expand_dots_enabled: bool,
|
||||||
term_buffer: &mut Term,
|
term_buffer: &mut Term,
|
||||||
postings_writer: &mut dyn PostingsWriter,
|
postings_writer: &mut dyn PostingsWriter,
|
||||||
ctx: &mut IndexingContext,
|
ctx: &mut IndexingContext,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
let mut json_term_writer = JsonTermWriter::wrap(term_buffer);
|
let mut json_term_writer = JsonTermWriter::wrap(term_buffer, expand_dots_enabled);
|
||||||
let mut positions_per_path: IndexingPositionsPerPath = Default::default();
|
let mut positions_per_path: IndexingPositionsPerPath = Default::default();
|
||||||
for json_value_res in json_values {
|
for json_value_res in json_values {
|
||||||
let json_value = json_value_res?;
|
let json_value = json_value_res?;
|
||||||
@@ -259,6 +260,7 @@ pub(crate) fn set_string_and_get_terms(
|
|||||||
pub struct JsonTermWriter<'a> {
|
pub struct JsonTermWriter<'a> {
|
||||||
term_buffer: &'a mut Term,
|
term_buffer: &'a mut Term,
|
||||||
path_stack: Vec<usize>,
|
path_stack: Vec<usize>,
|
||||||
|
expand_dots_enabled: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Splits a json path supplied to the query parser in such a way that
|
/// Splits a json path supplied to the query parser in such a way that
|
||||||
@@ -298,23 +300,25 @@ impl<'a> JsonTermWriter<'a> {
|
|||||||
pub fn from_field_and_json_path(
|
pub fn from_field_and_json_path(
|
||||||
field: Field,
|
field: Field,
|
||||||
json_path: &str,
|
json_path: &str,
|
||||||
|
expand_dots_enabled: bool,
|
||||||
term_buffer: &'a mut Term,
|
term_buffer: &'a mut Term,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
term_buffer.set_field_and_type(field, Type::Json);
|
term_buffer.set_field_and_type(field, Type::Json);
|
||||||
let mut json_term_writer = Self::wrap(term_buffer);
|
let mut json_term_writer = Self::wrap(term_buffer, expand_dots_enabled);
|
||||||
for segment in split_json_path(json_path) {
|
for segment in split_json_path(json_path) {
|
||||||
json_term_writer.push_path_segment(&segment);
|
json_term_writer.push_path_segment(&segment);
|
||||||
}
|
}
|
||||||
json_term_writer
|
json_term_writer
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn wrap(term_buffer: &'a mut Term) -> Self {
|
pub fn wrap(term_buffer: &'a mut Term, expand_dots_enabled: bool) -> Self {
|
||||||
term_buffer.clear_with_type(Type::Json);
|
term_buffer.clear_with_type(Type::Json);
|
||||||
let mut path_stack = Vec::with_capacity(10);
|
let mut path_stack = Vec::with_capacity(10);
|
||||||
path_stack.push(0);
|
path_stack.push(0);
|
||||||
Self {
|
Self {
|
||||||
term_buffer,
|
term_buffer,
|
||||||
path_stack,
|
path_stack,
|
||||||
|
expand_dots_enabled,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -336,11 +340,24 @@ impl<'a> JsonTermWriter<'a> {
|
|||||||
self.trim_to_end_of_path();
|
self.trim_to_end_of_path();
|
||||||
let buffer = self.term_buffer.value_bytes_mut();
|
let buffer = self.term_buffer.value_bytes_mut();
|
||||||
let buffer_len = buffer.len();
|
let buffer_len = buffer.len();
|
||||||
|
|
||||||
if self.path_stack.len() > 1 {
|
if self.path_stack.len() > 1 {
|
||||||
buffer[buffer_len - 1] = JSON_PATH_SEGMENT_SEP;
|
buffer[buffer_len - 1] = JSON_PATH_SEGMENT_SEP;
|
||||||
}
|
}
|
||||||
self.term_buffer.append_bytes(segment.as_bytes());
|
if self.expand_dots_enabled && segment.as_bytes().contains(&b'.') {
|
||||||
self.term_buffer.append_bytes(&[JSON_PATH_SEGMENT_SEP]);
|
// We need to replace `.` by JSON_PATH_SEGMENT_SEP.
|
||||||
|
self.term_buffer
|
||||||
|
.append_bytes(segment.as_bytes())
|
||||||
|
.iter_mut()
|
||||||
|
.for_each(|byte| {
|
||||||
|
if *byte == b'.' {
|
||||||
|
*byte = JSON_PATH_SEGMENT_SEP;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
self.term_buffer.append_bytes(segment.as_bytes());
|
||||||
|
}
|
||||||
|
self.term_buffer.push_byte(JSON_PATH_SEGMENT_SEP);
|
||||||
self.path_stack.push(self.term_buffer.len_bytes());
|
self.path_stack.push(self.term_buffer.len_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -391,7 +408,7 @@ mod tests {
|
|||||||
fn test_json_writer() {
|
fn test_json_writer() {
|
||||||
let field = Field::from_field_id(1);
|
let field = Field::from_field_id(1);
|
||||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
json_writer.push_path_segment("attributes");
|
json_writer.push_path_segment("attributes");
|
||||||
json_writer.push_path_segment("color");
|
json_writer.push_path_segment("color");
|
||||||
json_writer.set_str("red");
|
json_writer.set_str("red");
|
||||||
@@ -425,7 +442,7 @@ mod tests {
|
|||||||
fn test_string_term() {
|
fn test_string_term() {
|
||||||
let field = Field::from_field_id(1);
|
let field = Field::from_field_id(1);
|
||||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
json_writer.push_path_segment("color");
|
json_writer.push_path_segment("color");
|
||||||
json_writer.set_str("red");
|
json_writer.set_str("red");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -438,7 +455,7 @@ mod tests {
|
|||||||
fn test_i64_term() {
|
fn test_i64_term() {
|
||||||
let field = Field::from_field_id(1);
|
let field = Field::from_field_id(1);
|
||||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
json_writer.push_path_segment("color");
|
json_writer.push_path_segment("color");
|
||||||
json_writer.set_fast_value(-4i64);
|
json_writer.set_fast_value(-4i64);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -451,7 +468,7 @@ mod tests {
|
|||||||
fn test_u64_term() {
|
fn test_u64_term() {
|
||||||
let field = Field::from_field_id(1);
|
let field = Field::from_field_id(1);
|
||||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
json_writer.push_path_segment("color");
|
json_writer.push_path_segment("color");
|
||||||
json_writer.set_fast_value(4u64);
|
json_writer.set_fast_value(4u64);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -464,7 +481,7 @@ mod tests {
|
|||||||
fn test_f64_term() {
|
fn test_f64_term() {
|
||||||
let field = Field::from_field_id(1);
|
let field = Field::from_field_id(1);
|
||||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
json_writer.push_path_segment("color");
|
json_writer.push_path_segment("color");
|
||||||
json_writer.set_fast_value(4.0f64);
|
json_writer.set_fast_value(4.0f64);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -477,7 +494,7 @@ mod tests {
|
|||||||
fn test_bool_term() {
|
fn test_bool_term() {
|
||||||
let field = Field::from_field_id(1);
|
let field = Field::from_field_id(1);
|
||||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
json_writer.push_path_segment("color");
|
json_writer.push_path_segment("color");
|
||||||
json_writer.set_fast_value(true);
|
json_writer.set_fast_value(true);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -490,7 +507,7 @@ mod tests {
|
|||||||
fn test_push_after_set_path_segment() {
|
fn test_push_after_set_path_segment() {
|
||||||
let field = Field::from_field_id(1);
|
let field = Field::from_field_id(1);
|
||||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
json_writer.push_path_segment("attribute");
|
json_writer.push_path_segment("attribute");
|
||||||
json_writer.set_str("something");
|
json_writer.set_str("something");
|
||||||
json_writer.push_path_segment("color");
|
json_writer.push_path_segment("color");
|
||||||
@@ -505,7 +522,7 @@ mod tests {
|
|||||||
fn test_pop_segment() {
|
fn test_pop_segment() {
|
||||||
let field = Field::from_field_id(1);
|
let field = Field::from_field_id(1);
|
||||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
json_writer.push_path_segment("color");
|
json_writer.push_path_segment("color");
|
||||||
json_writer.push_path_segment("hue");
|
json_writer.push_path_segment("hue");
|
||||||
json_writer.pop_path_segment();
|
json_writer.pop_path_segment();
|
||||||
@@ -520,7 +537,7 @@ mod tests {
|
|||||||
fn test_json_writer_path() {
|
fn test_json_writer_path() {
|
||||||
let field = Field::from_field_id(1);
|
let field = Field::from_field_id(1);
|
||||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
json_writer.push_path_segment("color");
|
json_writer.push_path_segment("color");
|
||||||
assert_eq!(json_writer.path(), b"color");
|
assert_eq!(json_writer.path(), b"color");
|
||||||
json_writer.push_path_segment("hue");
|
json_writer.push_path_segment("hue");
|
||||||
@@ -529,6 +546,37 @@ mod tests {
|
|||||||
assert_eq!(json_writer.path(), b"color\x01hue");
|
assert_eq!(json_writer.path(), b"color\x01hue");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_json_path_expand_dots_disabled() {
|
||||||
|
let field = Field::from_field_id(1);
|
||||||
|
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||||
|
let mut json_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
|
json_writer.push_path_segment("color.hue");
|
||||||
|
assert_eq!(json_writer.path(), b"color.hue");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_json_path_expand_dots_enabled() {
|
||||||
|
let field = Field::from_field_id(1);
|
||||||
|
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||||
|
let mut json_writer = JsonTermWriter::wrap(&mut term, true);
|
||||||
|
json_writer.push_path_segment("color.hue");
|
||||||
|
assert_eq!(json_writer.path(), b"color\x01hue");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_json_path_expand_dots_enabled_pop_segment() {
|
||||||
|
let field = Field::from_field_id(1);
|
||||||
|
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||||
|
let mut json_writer = JsonTermWriter::wrap(&mut term, true);
|
||||||
|
json_writer.push_path_segment("hello");
|
||||||
|
assert_eq!(json_writer.path(), b"hello");
|
||||||
|
json_writer.push_path_segment("color.hue");
|
||||||
|
assert_eq!(json_writer.path(), b"hello\x01color\x01hue");
|
||||||
|
json_writer.pop_path_segment();
|
||||||
|
assert_eq!(json_writer.path(), b"hello");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_json_path_simple() {
|
fn test_split_json_path_simple() {
|
||||||
let json_path = split_json_path("titi.toto");
|
let json_path = split_json_path("titi.toto");
|
||||||
|
|||||||
@@ -401,15 +401,10 @@ impl IndexMerger {
|
|||||||
.readers
|
.readers
|
||||||
.iter()
|
.iter()
|
||||||
.map(|reader| {
|
.map(|reader| {
|
||||||
let u128_reader: Arc<dyn Column<u128>> = reader
|
let u128_reader: Arc<dyn Column<u128>> = reader.fast_fields().u128(field).expect(
|
||||||
.fast_fields()
|
"Failed to find a reader for single fast field. This is a tantivy bug and it \
|
||||||
.u128(field)
|
should never happen.",
|
||||||
.expect(
|
);
|
||||||
"Failed to find a reader for single fast field. This is a tantivy bug and \
|
|
||||||
it should never happen.",
|
|
||||||
)
|
|
||||||
.to_full()
|
|
||||||
.expect("temp migration solution");
|
|
||||||
u128_reader
|
u128_reader
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
@@ -470,11 +465,7 @@ impl IndexMerger {
|
|||||||
sort_by_field: &IndexSortByField,
|
sort_by_field: &IndexSortByField,
|
||||||
) -> crate::Result<Arc<dyn Column>> {
|
) -> crate::Result<Arc<dyn Column>> {
|
||||||
let field_id = expect_field_id_for_sort_field(reader.schema(), sort_by_field)?; // for now expect fastfield, but not strictly required
|
let field_id = expect_field_id_for_sort_field(reader.schema(), sort_by_field)?; // for now expect fastfield, but not strictly required
|
||||||
let value_accessor = reader
|
let value_accessor = reader.fast_fields().u64_lenient(field_id)?;
|
||||||
.fast_fields()
|
|
||||||
.u64_lenient(field_id)?
|
|
||||||
.to_full()
|
|
||||||
.expect("temp migration solution");
|
|
||||||
Ok(value_accessor)
|
Ok(value_accessor)
|
||||||
}
|
}
|
||||||
/// Collecting value_accessors into a vec to bind the lifetime.
|
/// Collecting value_accessors into a vec to bind the lifetime.
|
||||||
@@ -1377,16 +1368,16 @@ mod tests {
|
|||||||
.fast_fields()
|
.fast_fields()
|
||||||
.u64(score_field)
|
.u64(score_field)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(score_field_reader.min_value(), Some(4000));
|
assert_eq!(score_field_reader.min_value(), 4000);
|
||||||
assert_eq!(score_field_reader.max_value(), Some(7000));
|
assert_eq!(score_field_reader.max_value(), 7000);
|
||||||
|
|
||||||
let score_field_reader = searcher
|
let score_field_reader = searcher
|
||||||
.segment_reader(1)
|
.segment_reader(1)
|
||||||
.fast_fields()
|
.fast_fields()
|
||||||
.u64(score_field)
|
.u64(score_field)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(score_field_reader.min_value(), Some(1));
|
assert_eq!(score_field_reader.min_value(), 1);
|
||||||
assert_eq!(score_field_reader.max_value(), Some(3));
|
assert_eq!(score_field_reader.max_value(), 3);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
// merging the segments
|
// merging the segments
|
||||||
@@ -1431,8 +1422,8 @@ mod tests {
|
|||||||
.fast_fields()
|
.fast_fields()
|
||||||
.u64(score_field)
|
.u64(score_field)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(score_field_reader.min_value(), Some(3));
|
assert_eq!(score_field_reader.min_value(), 3);
|
||||||
assert_eq!(score_field_reader.max_value(), Some(7000));
|
assert_eq!(score_field_reader.max_value(), 7000);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
// test a commit with only deletes
|
// test a commit with only deletes
|
||||||
@@ -1478,8 +1469,8 @@ mod tests {
|
|||||||
.fast_fields()
|
.fast_fields()
|
||||||
.u64(score_field)
|
.u64(score_field)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(score_field_reader.min_value(), Some(3));
|
assert_eq!(score_field_reader.min_value(), 3);
|
||||||
assert_eq!(score_field_reader.max_value(), Some(7000));
|
assert_eq!(score_field_reader.max_value(), 7000);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
// Test merging a single segment in order to remove deletes.
|
// Test merging a single segment in order to remove deletes.
|
||||||
@@ -1525,8 +1516,8 @@ mod tests {
|
|||||||
.fast_fields()
|
.fast_fields()
|
||||||
.u64(score_field)
|
.u64(score_field)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(score_field_reader.min_value(), Some(6000));
|
assert_eq!(score_field_reader.min_value(), 6000);
|
||||||
assert_eq!(score_field_reader.max_value(), Some(7000));
|
assert_eq!(score_field_reader.max_value(), 7000);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -186,17 +186,17 @@ mod tests {
|
|||||||
|
|
||||||
let fast_fields = segment_reader.fast_fields();
|
let fast_fields = segment_reader.fast_fields();
|
||||||
let fast_field = fast_fields.u64(int_field).unwrap();
|
let fast_field = fast_fields.u64(int_field).unwrap();
|
||||||
assert_eq!(fast_field.get_val(5), Some(1u64));
|
assert_eq!(fast_field.get_val(5), 1u64);
|
||||||
assert_eq!(fast_field.get_val(4), Some(2u64));
|
assert_eq!(fast_field.get_val(4), 2u64);
|
||||||
assert_eq!(fast_field.get_val(3), Some(3u64));
|
assert_eq!(fast_field.get_val(3), 3u64);
|
||||||
if force_disjunct_segment_sort_values {
|
if force_disjunct_segment_sort_values {
|
||||||
assert_eq!(fast_field.get_val(2), Some(20u64));
|
assert_eq!(fast_field.get_val(2), 20u64);
|
||||||
assert_eq!(fast_field.get_val(1), Some(100u64));
|
assert_eq!(fast_field.get_val(1), 100u64);
|
||||||
} else {
|
} else {
|
||||||
assert_eq!(fast_field.get_val(2), Some(10u64));
|
assert_eq!(fast_field.get_val(2), 10u64);
|
||||||
assert_eq!(fast_field.get_val(1), Some(20u64));
|
assert_eq!(fast_field.get_val(1), 20u64);
|
||||||
}
|
}
|
||||||
assert_eq!(fast_field.get_val(0), Some(1_000u64));
|
assert_eq!(fast_field.get_val(0), 1_000u64);
|
||||||
|
|
||||||
// test new field norm mapping
|
// test new field norm mapping
|
||||||
{
|
{
|
||||||
@@ -373,12 +373,12 @@ mod tests {
|
|||||||
|
|
||||||
let fast_fields = segment_reader.fast_fields();
|
let fast_fields = segment_reader.fast_fields();
|
||||||
let fast_field = fast_fields.u64(int_field).unwrap();
|
let fast_field = fast_fields.u64(int_field).unwrap();
|
||||||
assert_eq!(fast_field.get_val(0), Some(1u64));
|
assert_eq!(fast_field.get_val(0), 1u64);
|
||||||
assert_eq!(fast_field.get_val(1), Some(2u64));
|
assert_eq!(fast_field.get_val(1), 2u64);
|
||||||
assert_eq!(fast_field.get_val(2), Some(3u64));
|
assert_eq!(fast_field.get_val(2), 3u64);
|
||||||
assert_eq!(fast_field.get_val(3), Some(10u64));
|
assert_eq!(fast_field.get_val(3), 10u64);
|
||||||
assert_eq!(fast_field.get_val(4), Some(20u64));
|
assert_eq!(fast_field.get_val(4), 20u64);
|
||||||
assert_eq!(fast_field.get_val(5), Some(1_000u64));
|
assert_eq!(fast_field.get_val(5), 1_000u64);
|
||||||
|
|
||||||
let get_vals = |fast_field: &MultiValuedFastFieldReader<u64>, doc_id: u32| -> Vec<u64> {
|
let get_vals = |fast_field: &MultiValuedFastFieldReader<u64>, doc_id: u32| -> Vec<u64> {
|
||||||
let mut vals = vec![];
|
let mut vals = vec![];
|
||||||
@@ -535,15 +535,11 @@ mod bench_sorted_index_merge {
|
|||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
let sorted_doc_ids = doc_id_mapping.iter_old_doc_addrs().map(|doc_addr| {
|
let sorted_doc_ids = doc_id_mapping.iter_old_doc_addrs().map(|doc_addr| {
|
||||||
let reader = &merger.readers[doc_addr.segment_ord as usize];
|
let reader = &merger.readers[doc_addr.segment_ord as usize];
|
||||||
let u64_reader: Arc<dyn Column<u64>> = reader
|
let u64_reader: Arc<dyn Column<u64>> =
|
||||||
.fast_fields()
|
reader.fast_fields().typed_fast_field_reader(field).expect(
|
||||||
.typed_fast_field_reader(field)
|
|
||||||
.expect(
|
|
||||||
"Failed to find a reader for single fast field. This is a tantivy bug and \
|
"Failed to find a reader for single fast field. This is a tantivy bug and \
|
||||||
it should never happen.",
|
it should never happen.",
|
||||||
)
|
);
|
||||||
.to_full()
|
|
||||||
.unwrap();
|
|
||||||
(doc_addr.doc_id, reader, u64_reader)
|
(doc_addr.doc_id, reader, u64_reader)
|
||||||
});
|
});
|
||||||
// add values in order of the new doc_ids
|
// add values in order of the new doc_ids
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ type AddBatchReceiver = channel::Receiver<AddBatch>;
|
|||||||
mod tests_mmap {
|
mod tests_mmap {
|
||||||
use crate::collector::Count;
|
use crate::collector::Count;
|
||||||
use crate::query::QueryParser;
|
use crate::query::QueryParser;
|
||||||
use crate::schema::{Schema, STORED, TEXT};
|
use crate::schema::{JsonObjectOptions, Schema, TEXT};
|
||||||
use crate::{Index, Term};
|
use crate::{Index, Term};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -81,9 +81,9 @@ mod tests_mmap {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_json_field_espace() {
|
fn test_json_field_expand_dots_disabled_dot_escaped_required() {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let json_field = schema_builder.add_json_field("json", TEXT | STORED);
|
let json_field = schema_builder.add_json_field("json", TEXT);
|
||||||
let index = Index::create_in_ram(schema_builder.build());
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
let mut index_writer = index.writer_for_tests().unwrap();
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
|
let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
|
||||||
@@ -99,4 +99,26 @@ mod tests_mmap {
|
|||||||
let num_docs = searcher.search(&query, &Count).unwrap();
|
let num_docs = searcher.search(&query, &Count).unwrap();
|
||||||
assert_eq!(num_docs, 1);
|
assert_eq!(num_docs, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_json_field_expand_dots_enabled_dot_escape_not_required() {
|
||||||
|
let mut schema_builder = Schema::builder();
|
||||||
|
let json_options: JsonObjectOptions =
|
||||||
|
JsonObjectOptions::from(TEXT).set_expand_dots_enabled();
|
||||||
|
let json_field = schema_builder.add_json_field("json", json_options);
|
||||||
|
let index = Index::create_in_ram(schema_builder.build());
|
||||||
|
let mut index_writer = index.writer_for_tests().unwrap();
|
||||||
|
let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
|
||||||
|
index_writer.add_document(doc!(json_field=>json)).unwrap();
|
||||||
|
index_writer.commit().unwrap();
|
||||||
|
let reader = index.reader().unwrap();
|
||||||
|
let searcher = reader.searcher();
|
||||||
|
assert_eq!(searcher.num_docs(), 1);
|
||||||
|
let parse_query = QueryParser::for_index(&index, Vec::new());
|
||||||
|
let query = parse_query
|
||||||
|
.parse_query(r#"json.k8s.container.name:prometheus"#)
|
||||||
|
.unwrap();
|
||||||
|
let num_docs = searcher.search(&query, &Count).unwrap();
|
||||||
|
assert_eq!(num_docs, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -447,8 +447,8 @@ impl SegmentUpdater {
|
|||||||
let segment_entries = segment_updater.purge_deletes(opstamp)?;
|
let segment_entries = segment_updater.purge_deletes(opstamp)?;
|
||||||
segment_updater.segment_manager.commit(segment_entries);
|
segment_updater.segment_manager.commit(segment_entries);
|
||||||
segment_updater.save_metas(opstamp, payload)?;
|
segment_updater.save_metas(opstamp, payload)?;
|
||||||
let _ = garbage_collect_files(segment_updater.clone());
|
// let _ = garbage_collect_files(segment_updater.clone());
|
||||||
segment_updater.consider_merge_options();
|
// segment_updater.consider_merge_options();
|
||||||
Ok(opstamp)
|
Ok(opstamp)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -180,7 +180,7 @@ impl SegmentWriter {
|
|||||||
self.per_field_postings_writers.get_for_field_mut(field);
|
self.per_field_postings_writers.get_for_field_mut(field);
|
||||||
term_buffer.clear_with_field_and_type(field_entry.field_type().value_type(), field);
|
term_buffer.clear_with_field_and_type(field_entry.field_type().value_type(), field);
|
||||||
|
|
||||||
match *field_entry.field_type() {
|
match field_entry.field_type() {
|
||||||
FieldType::Facet(_) => {
|
FieldType::Facet(_) => {
|
||||||
for value in values {
|
for value in values {
|
||||||
let facet = value.as_facet().ok_or_else(make_schema_error)?;
|
let facet = value.as_facet().ok_or_else(make_schema_error)?;
|
||||||
@@ -307,7 +307,7 @@ impl SegmentWriter {
|
|||||||
self.fieldnorms_writer.record(doc_id, field, num_vals);
|
self.fieldnorms_writer.record(doc_id, field, num_vals);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
FieldType::JsonObject(_) => {
|
FieldType::JsonObject(json_options) => {
|
||||||
let text_analyzer = &self.per_field_text_analyzers[field.field_id() as usize];
|
let text_analyzer = &self.per_field_text_analyzers[field.field_id() as usize];
|
||||||
let json_values_it =
|
let json_values_it =
|
||||||
values.map(|value| value.as_json().ok_or_else(make_schema_error));
|
values.map(|value| value.as_json().ok_or_else(make_schema_error));
|
||||||
@@ -315,6 +315,7 @@ impl SegmentWriter {
|
|||||||
doc_id,
|
doc_id,
|
||||||
json_values_it,
|
json_values_it,
|
||||||
text_analyzer,
|
text_analyzer,
|
||||||
|
json_options.is_expand_dots_enabled(),
|
||||||
term_buffer,
|
term_buffer,
|
||||||
postings_writer,
|
postings_writer,
|
||||||
ctx,
|
ctx,
|
||||||
@@ -557,7 +558,7 @@ mod tests {
|
|||||||
let mut term = Term::with_type_and_field(Type::Json, json_field);
|
let mut term = Term::with_type_and_field(Type::Json, json_field);
|
||||||
let mut term_stream = term_dict.stream().unwrap();
|
let mut term_stream = term_dict.stream().unwrap();
|
||||||
|
|
||||||
let mut json_term_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_term_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
|
|
||||||
json_term_writer.push_path_segment("bool");
|
json_term_writer.push_path_segment("bool");
|
||||||
json_term_writer.set_fast_value(true);
|
json_term_writer.set_fast_value(true);
|
||||||
@@ -648,7 +649,7 @@ mod tests {
|
|||||||
let segment_reader = searcher.segment_reader(0u32);
|
let segment_reader = searcher.segment_reader(0u32);
|
||||||
let inv_index = segment_reader.inverted_index(json_field).unwrap();
|
let inv_index = segment_reader.inverted_index(json_field).unwrap();
|
||||||
let mut term = Term::with_type_and_field(Type::Json, json_field);
|
let mut term = Term::with_type_and_field(Type::Json, json_field);
|
||||||
let mut json_term_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_term_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
json_term_writer.push_path_segment("mykey");
|
json_term_writer.push_path_segment("mykey");
|
||||||
json_term_writer.set_str("token");
|
json_term_writer.set_str("token");
|
||||||
let term_info = inv_index
|
let term_info = inv_index
|
||||||
@@ -692,7 +693,7 @@ mod tests {
|
|||||||
let segment_reader = searcher.segment_reader(0u32);
|
let segment_reader = searcher.segment_reader(0u32);
|
||||||
let inv_index = segment_reader.inverted_index(json_field).unwrap();
|
let inv_index = segment_reader.inverted_index(json_field).unwrap();
|
||||||
let mut term = Term::with_type_and_field(Type::Json, json_field);
|
let mut term = Term::with_type_and_field(Type::Json, json_field);
|
||||||
let mut json_term_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_term_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
json_term_writer.push_path_segment("mykey");
|
json_term_writer.push_path_segment("mykey");
|
||||||
json_term_writer.set_str("two tokens");
|
json_term_writer.set_str("two tokens");
|
||||||
let term_info = inv_index
|
let term_info = inv_index
|
||||||
@@ -737,7 +738,7 @@ mod tests {
|
|||||||
let reader = index.reader().unwrap();
|
let reader = index.reader().unwrap();
|
||||||
let searcher = reader.searcher();
|
let searcher = reader.searcher();
|
||||||
let mut term = Term::with_type_and_field(Type::Json, json_field);
|
let mut term = Term::with_type_and_field(Type::Json, json_field);
|
||||||
let mut json_term_writer = JsonTermWriter::wrap(&mut term);
|
let mut json_term_writer = JsonTermWriter::wrap(&mut term, false);
|
||||||
json_term_writer.push_path_segment("mykey");
|
json_term_writer.push_path_segment("mykey");
|
||||||
json_term_writer.push_path_segment("field");
|
json_term_writer.push_path_segment("field");
|
||||||
json_term_writer.set_str("hello");
|
json_term_writer.set_str("hello");
|
||||||
|
|||||||
@@ -46,15 +46,11 @@ impl<'a> RemappedDocIdColumn<'a> {
|
|||||||
let (min_value, max_value) = readers
|
let (min_value, max_value) = readers
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|reader| {
|
.filter_map(|reader| {
|
||||||
let u64_reader: Arc<dyn Column<u64>> = reader
|
let u64_reader: Arc<dyn Column<u64>> =
|
||||||
.fast_fields()
|
reader.fast_fields().typed_fast_field_reader(field).expect(
|
||||||
.typed_fast_field_reader(field)
|
|
||||||
.expect(
|
|
||||||
"Failed to find a reader for single fast field. This is a tantivy bug and \
|
"Failed to find a reader for single fast field. This is a tantivy bug and \
|
||||||
it should never happen.",
|
it should never happen.",
|
||||||
)
|
);
|
||||||
.to_full()
|
|
||||||
.expect("temp migration solution");
|
|
||||||
compute_min_max_val(&*u64_reader, reader)
|
compute_min_max_val(&*u64_reader, reader)
|
||||||
})
|
})
|
||||||
.reduce(|a, b| (a.0.min(b.0), a.1.max(b.1)))
|
.reduce(|a, b| (a.0.min(b.0), a.1.max(b.1)))
|
||||||
@@ -63,15 +59,11 @@ impl<'a> RemappedDocIdColumn<'a> {
|
|||||||
let fast_field_readers = readers
|
let fast_field_readers = readers
|
||||||
.iter()
|
.iter()
|
||||||
.map(|reader| {
|
.map(|reader| {
|
||||||
let u64_reader: Arc<dyn Column<u64>> = reader
|
let u64_reader: Arc<dyn Column<u64>> =
|
||||||
.fast_fields()
|
reader.fast_fields().typed_fast_field_reader(field).expect(
|
||||||
.typed_fast_field_reader(field)
|
|
||||||
.expect(
|
|
||||||
"Failed to find a reader for single fast field. This is a tantivy bug and \
|
"Failed to find a reader for single fast field. This is a tantivy bug and \
|
||||||
it should never happen.",
|
it should never happen.",
|
||||||
)
|
);
|
||||||
.to_full()
|
|
||||||
.expect("temp migration solution");
|
|
||||||
u64_reader
|
u64_reader
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|||||||
@@ -1037,21 +1037,21 @@ pub mod tests {
|
|||||||
let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_unsigned);
|
let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_unsigned);
|
||||||
assert!(fast_field_reader_opt.is_ok());
|
assert!(fast_field_reader_opt.is_ok());
|
||||||
let fast_field_reader = fast_field_reader_opt.unwrap();
|
let fast_field_reader = fast_field_reader_opt.unwrap();
|
||||||
assert_eq!(fast_field_reader.get_val(0), Some(4u64))
|
assert_eq!(fast_field_reader.get_val(0), 4u64)
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
let fast_field_reader_res = segment_reader.fast_fields().i64(fast_field_signed);
|
let fast_field_reader_res = segment_reader.fast_fields().i64(fast_field_signed);
|
||||||
assert!(fast_field_reader_res.is_ok());
|
assert!(fast_field_reader_res.is_ok());
|
||||||
let fast_field_reader = fast_field_reader_res.unwrap();
|
let fast_field_reader = fast_field_reader_res.unwrap();
|
||||||
assert_eq!(fast_field_reader.get_val(0), Some(4i64))
|
assert_eq!(fast_field_reader.get_val(0), 4i64)
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
let fast_field_reader_res = segment_reader.fast_fields().f64(fast_field_float);
|
let fast_field_reader_res = segment_reader.fast_fields().f64(fast_field_float);
|
||||||
assert!(fast_field_reader_res.is_ok());
|
assert!(fast_field_reader_res.is_ok());
|
||||||
let fast_field_reader = fast_field_reader_res.unwrap();
|
let fast_field_reader = fast_field_reader_res.unwrap();
|
||||||
assert_eq!(fast_field_reader.get_val(0), Some(4f64))
|
assert_eq!(fast_field_reader.get_val(0), 4f64)
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,7 +16,8 @@ use crate::query::{
|
|||||||
TermQuery, TermSetQuery,
|
TermQuery, TermSetQuery,
|
||||||
};
|
};
|
||||||
use crate::schema::{
|
use crate::schema::{
|
||||||
Facet, FacetParseError, Field, FieldType, IndexRecordOption, IntoIpv6Addr, Schema, Term, Type,
|
Facet, FacetParseError, Field, FieldType, IndexRecordOption, IntoIpv6Addr, JsonObjectOptions,
|
||||||
|
Schema, Term, Type,
|
||||||
};
|
};
|
||||||
use crate::time::format_description::well_known::Rfc3339;
|
use crate::time::format_description::well_known::Rfc3339;
|
||||||
use crate::time::OffsetDateTime;
|
use crate::time::OffsetDateTime;
|
||||||
@@ -182,7 +183,6 @@ pub struct QueryParser {
|
|||||||
conjunction_by_default: bool,
|
conjunction_by_default: bool,
|
||||||
tokenizer_manager: TokenizerManager,
|
tokenizer_manager: TokenizerManager,
|
||||||
boost: HashMap<Field, Score>,
|
boost: HashMap<Field, Score>,
|
||||||
field_names: HashMap<String, Field>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn all_negative(ast: &LogicalAst) -> bool {
|
fn all_negative(ast: &LogicalAst) -> bool {
|
||||||
@@ -195,31 +195,6 @@ fn all_negative(ast: &LogicalAst) -> bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the position (in byte offsets) of the unescaped '.' in the `field_path`.
|
|
||||||
//
|
|
||||||
// This function operates directly on bytes (as opposed to codepoint), relying
|
|
||||||
// on a encoding property of utf-8 for its correctness.
|
|
||||||
fn locate_splitting_dots(field_path: &str) -> Vec<usize> {
|
|
||||||
let mut splitting_dots_pos = Vec::new();
|
|
||||||
let mut escape_state = false;
|
|
||||||
for (pos, b) in field_path.bytes().enumerate() {
|
|
||||||
if escape_state {
|
|
||||||
escape_state = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match b {
|
|
||||||
b'\\' => {
|
|
||||||
escape_state = true;
|
|
||||||
}
|
|
||||||
b'.' => {
|
|
||||||
splitting_dots_pos.push(pos);
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
splitting_dots_pos
|
|
||||||
}
|
|
||||||
|
|
||||||
impl QueryParser {
|
impl QueryParser {
|
||||||
/// Creates a `QueryParser`, given
|
/// Creates a `QueryParser`, given
|
||||||
/// * schema - index Schema
|
/// * schema - index Schema
|
||||||
@@ -229,34 +204,19 @@ impl QueryParser {
|
|||||||
default_fields: Vec<Field>,
|
default_fields: Vec<Field>,
|
||||||
tokenizer_manager: TokenizerManager,
|
tokenizer_manager: TokenizerManager,
|
||||||
) -> QueryParser {
|
) -> QueryParser {
|
||||||
let field_names = schema
|
|
||||||
.fields()
|
|
||||||
.map(|(field, field_entry)| (field_entry.name().to_string(), field))
|
|
||||||
.collect();
|
|
||||||
QueryParser {
|
QueryParser {
|
||||||
schema,
|
schema,
|
||||||
default_fields,
|
default_fields,
|
||||||
tokenizer_manager,
|
tokenizer_manager,
|
||||||
conjunction_by_default: false,
|
conjunction_by_default: false,
|
||||||
boost: Default::default(),
|
boost: Default::default(),
|
||||||
field_names,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Splits a full_path as written in a query, into a field name and a
|
// Splits a full_path as written in a query, into a field name and a
|
||||||
// json path.
|
// json path.
|
||||||
pub(crate) fn split_full_path<'a>(&self, full_path: &'a str) -> Option<(Field, &'a str)> {
|
pub(crate) fn split_full_path<'a>(&self, full_path: &'a str) -> Option<(Field, &'a str)> {
|
||||||
if let Some(field) = self.field_names.get(full_path) {
|
self.schema.find_field(full_path)
|
||||||
return Some((*field, ""));
|
|
||||||
}
|
|
||||||
let mut splitting_period_pos: Vec<usize> = locate_splitting_dots(full_path);
|
|
||||||
while let Some(pos) = splitting_period_pos.pop() {
|
|
||||||
let (prefix, suffix) = full_path.split_at(pos);
|
|
||||||
if let Some(field) = self.field_names.get(prefix) {
|
|
||||||
return Some((*field, &suffix[1..]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a `QueryParser`, given
|
/// Creates a `QueryParser`, given
|
||||||
@@ -482,28 +442,14 @@ impl QueryParser {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.collect())
|
.collect())
|
||||||
}
|
}
|
||||||
FieldType::JsonObject(ref json_options) => {
|
FieldType::JsonObject(ref json_options) => generate_literals_for_json_object(
|
||||||
let option = json_options.get_text_indexing_options().ok_or_else(|| {
|
field_name,
|
||||||
// This should have been seen earlier really.
|
field,
|
||||||
QueryParserError::FieldNotIndexed(field_name.to_string())
|
json_path,
|
||||||
})?;
|
phrase,
|
||||||
let text_analyzer =
|
&self.tokenizer_manager,
|
||||||
self.tokenizer_manager
|
json_options,
|
||||||
.get(option.tokenizer())
|
),
|
||||||
.ok_or_else(|| QueryParserError::UnknownTokenizer {
|
|
||||||
field: field_name.to_string(),
|
|
||||||
tokenizer: option.tokenizer().to_string(),
|
|
||||||
})?;
|
|
||||||
let index_record_option = option.index_option();
|
|
||||||
generate_literals_for_json_object(
|
|
||||||
field_name,
|
|
||||||
field,
|
|
||||||
json_path,
|
|
||||||
phrase,
|
|
||||||
&text_analyzer,
|
|
||||||
index_record_option,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
FieldType::Facet(_) => match Facet::from_text(phrase) {
|
FieldType::Facet(_) => match Facet::from_text(phrase) {
|
||||||
Ok(facet) => {
|
Ok(facet) => {
|
||||||
let facet_term = Term::from_facet(field, &facet);
|
let facet_term = Term::from_facet(field, &facet);
|
||||||
@@ -767,17 +713,32 @@ fn generate_literals_for_json_object(
|
|||||||
field: Field,
|
field: Field,
|
||||||
json_path: &str,
|
json_path: &str,
|
||||||
phrase: &str,
|
phrase: &str,
|
||||||
text_analyzer: &TextAnalyzer,
|
tokenizer_manager: &TokenizerManager,
|
||||||
index_record_option: IndexRecordOption,
|
json_options: &JsonObjectOptions,
|
||||||
) -> Result<Vec<LogicalLiteral>, QueryParserError> {
|
) -> Result<Vec<LogicalLiteral>, QueryParserError> {
|
||||||
|
let text_options = json_options.get_text_indexing_options().ok_or_else(|| {
|
||||||
|
// This should have been seen earlier really.
|
||||||
|
QueryParserError::FieldNotIndexed(field_name.to_string())
|
||||||
|
})?;
|
||||||
|
let text_analyzer = tokenizer_manager
|
||||||
|
.get(text_options.tokenizer())
|
||||||
|
.ok_or_else(|| QueryParserError::UnknownTokenizer {
|
||||||
|
field: field_name.to_string(),
|
||||||
|
tokenizer: text_options.tokenizer().to_string(),
|
||||||
|
})?;
|
||||||
|
let index_record_option = text_options.index_option();
|
||||||
let mut logical_literals = Vec::new();
|
let mut logical_literals = Vec::new();
|
||||||
let mut term = Term::with_capacity(100);
|
let mut term = Term::with_capacity(100);
|
||||||
let mut json_term_writer =
|
let mut json_term_writer = JsonTermWriter::from_field_and_json_path(
|
||||||
JsonTermWriter::from_field_and_json_path(field, json_path, &mut term);
|
field,
|
||||||
|
json_path,
|
||||||
|
json_options.is_expand_dots_enabled(),
|
||||||
|
&mut term,
|
||||||
|
);
|
||||||
if let Some(term) = convert_to_fast_value_and_get_term(&mut json_term_writer, phrase) {
|
if let Some(term) = convert_to_fast_value_and_get_term(&mut json_term_writer, phrase) {
|
||||||
logical_literals.push(LogicalLiteral::Term(term));
|
logical_literals.push(LogicalLiteral::Term(term));
|
||||||
}
|
}
|
||||||
let terms = set_string_and_get_terms(&mut json_term_writer, phrase, text_analyzer);
|
let terms = set_string_and_get_terms(&mut json_term_writer, phrase, &text_analyzer);
|
||||||
drop(json_term_writer);
|
drop(json_term_writer);
|
||||||
if terms.len() <= 1 {
|
if terms.len() <= 1 {
|
||||||
for (_, term) in terms {
|
for (_, term) in terms {
|
||||||
@@ -1564,13 +1525,6 @@ mod test {
|
|||||||
assert_eq!(query_parser.split_full_path("firsty"), None);
|
assert_eq!(query_parser.split_full_path("firsty"), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_locate_splitting_dots() {
|
|
||||||
assert_eq!(&super::locate_splitting_dots("a.b.c"), &[1, 3]);
|
|
||||||
assert_eq!(&super::locate_splitting_dots(r#"a\.b.c"#), &[4]);
|
|
||||||
assert_eq!(&super::locate_splitting_dots(r#"a\..b.c"#), &[3, 5]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
pub fn test_phrase_slop() {
|
pub fn test_phrase_slop() {
|
||||||
test_parse_query_to_logical_ast_helper(
|
test_parse_query_to_logical_ast_helper(
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ use std::ops::{Bound, RangeInclusive};
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common::BinarySerializable;
|
use common::BinarySerializable;
|
||||||
use fastfield_codecs::{MonotonicallyMappableToU128, OptionalColumn};
|
use fastfield_codecs::{Column, MonotonicallyMappableToU128};
|
||||||
|
|
||||||
use super::range_query::map_bound;
|
use super::range_query::map_bound;
|
||||||
use super::{ConstScorer, Explanation, Scorer, Weight};
|
use super::{ConstScorer, Explanation, Scorer, Weight};
|
||||||
@@ -45,10 +45,12 @@ impl Weight for IPFastFieldRangeWeight {
|
|||||||
match field_type.fastfield_cardinality().unwrap() {
|
match field_type.fastfield_cardinality().unwrap() {
|
||||||
Cardinality::SingleValue => {
|
Cardinality::SingleValue => {
|
||||||
let ip_addr_fast_field = reader.fast_fields().ip_addr(self.field)?;
|
let ip_addr_fast_field = reader.fast_fields().ip_addr(self.field)?;
|
||||||
let minmax = ip_addr_fast_field
|
let value_range = bound_to_value_range(
|
||||||
.min_value()
|
&self.left_bound,
|
||||||
.zip(ip_addr_fast_field.max_value());
|
&self.right_bound,
|
||||||
let value_range = bound_to_value_range(&self.left_bound, &self.right_bound, minmax);
|
ip_addr_fast_field.min_value(),
|
||||||
|
ip_addr_fast_field.max_value(),
|
||||||
|
);
|
||||||
let docset = IpRangeDocSet::new(
|
let docset = IpRangeDocSet::new(
|
||||||
value_range,
|
value_range,
|
||||||
IpFastFieldCardinality::SingleValue(ip_addr_fast_field),
|
IpFastFieldCardinality::SingleValue(ip_addr_fast_field),
|
||||||
@@ -60,10 +62,8 @@ impl Weight for IPFastFieldRangeWeight {
|
|||||||
let value_range = bound_to_value_range(
|
let value_range = bound_to_value_range(
|
||||||
&self.left_bound,
|
&self.left_bound,
|
||||||
&self.right_bound,
|
&self.right_bound,
|
||||||
Some((
|
ip_addr_fast_field.min_value(),
|
||||||
ip_addr_fast_field.min_value(),
|
ip_addr_fast_field.max_value(),
|
||||||
ip_addr_fast_field.max_value(),
|
|
||||||
)),
|
|
||||||
);
|
);
|
||||||
let docset = IpRangeDocSet::new(
|
let docset = IpRangeDocSet::new(
|
||||||
value_range,
|
value_range,
|
||||||
@@ -91,10 +91,9 @@ impl Weight for IPFastFieldRangeWeight {
|
|||||||
fn bound_to_value_range(
|
fn bound_to_value_range(
|
||||||
left_bound: &Bound<Ipv6Addr>,
|
left_bound: &Bound<Ipv6Addr>,
|
||||||
right_bound: &Bound<Ipv6Addr>,
|
right_bound: &Bound<Ipv6Addr>,
|
||||||
min_max: Option<(Ipv6Addr, Ipv6Addr)>,
|
min_value: Ipv6Addr,
|
||||||
|
max_value: Ipv6Addr,
|
||||||
) -> RangeInclusive<Ipv6Addr> {
|
) -> RangeInclusive<Ipv6Addr> {
|
||||||
let (min_value, max_value) =
|
|
||||||
min_max.unwrap_or((Ipv6Addr::from(u128::MIN), Ipv6Addr::from(u128::MAX)));
|
|
||||||
let start_value = match left_bound {
|
let start_value = match left_bound {
|
||||||
Bound::Included(ip_addr) => *ip_addr,
|
Bound::Included(ip_addr) => *ip_addr,
|
||||||
Bound::Excluded(ip_addr) => Ipv6Addr::from(ip_addr.to_u128() + 1),
|
Bound::Excluded(ip_addr) => Ipv6Addr::from(ip_addr.to_u128() + 1),
|
||||||
@@ -143,7 +142,7 @@ impl VecCursor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) enum IpFastFieldCardinality {
|
pub(crate) enum IpFastFieldCardinality {
|
||||||
SingleValue(Arc<dyn OptionalColumn<Ipv6Addr>>),
|
SingleValue(Arc<dyn Column<Ipv6Addr>>),
|
||||||
MultiValue(MultiValuedU128FastFieldReader<Ipv6Addr>),
|
MultiValue(MultiValuedU128FastFieldReader<Ipv6Addr>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -181,6 +181,11 @@ impl FieldType {
|
|||||||
matches!(self, FieldType::IpAddr(_))
|
matches!(self, FieldType::IpAddr(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// returns true if this is an date field
|
||||||
|
pub fn is_date(&self) -> bool {
|
||||||
|
matches!(self, FieldType::Date(_))
|
||||||
|
}
|
||||||
|
|
||||||
/// returns true if the field is indexed.
|
/// returns true if the field is indexed.
|
||||||
pub fn is_indexed(&self) -> bool {
|
pub fn is_indexed(&self) -> bool {
|
||||||
match *self {
|
match *self {
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ pub struct JsonObjectOptions {
|
|||||||
// If set to some, int, date, f64 and text will be indexed.
|
// If set to some, int, date, f64 and text will be indexed.
|
||||||
// Text will use the TextFieldIndexing setting for indexing.
|
// Text will use the TextFieldIndexing setting for indexing.
|
||||||
indexing: Option<TextFieldIndexing>,
|
indexing: Option<TextFieldIndexing>,
|
||||||
|
|
||||||
|
expand_dots_enabled: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl JsonObjectOptions {
|
impl JsonObjectOptions {
|
||||||
@@ -26,6 +28,29 @@ impl JsonObjectOptions {
|
|||||||
self.indexing.is_some()
|
self.indexing.is_some()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns `true` iff dots in json keys should be expanded.
|
||||||
|
///
|
||||||
|
/// When expand_dots is enabled, json object like
|
||||||
|
/// `{"k8s.node.id": 5}` is processed as if it was
|
||||||
|
/// `{"k8s": {"node": {"id": 5}}}`.
|
||||||
|
/// It option has the merit of allowing users to
|
||||||
|
/// write queries like `k8s.node.id:5`.
|
||||||
|
/// On the other, enabling that feature can lead to
|
||||||
|
/// ambiguity.
|
||||||
|
///
|
||||||
|
/// If disabled, the "." need to be escaped:
|
||||||
|
/// `k8s\.node\.id:5`.
|
||||||
|
pub fn is_expand_dots_enabled(&self) -> bool {
|
||||||
|
self.expand_dots_enabled
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sets `expands_dots` to true.
|
||||||
|
/// See `is_expand_dots_enabled` for more information.
|
||||||
|
pub fn set_expand_dots_enabled(mut self) -> Self {
|
||||||
|
self.expand_dots_enabled = true;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the text indexing options.
|
/// Returns the text indexing options.
|
||||||
///
|
///
|
||||||
/// If set to `Some` then both int and str values will be indexed.
|
/// If set to `Some` then both int and str values will be indexed.
|
||||||
@@ -55,6 +80,7 @@ impl From<StoredFlag> for JsonObjectOptions {
|
|||||||
JsonObjectOptions {
|
JsonObjectOptions {
|
||||||
stored: true,
|
stored: true,
|
||||||
indexing: None,
|
indexing: None,
|
||||||
|
expand_dots_enabled: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -69,10 +95,11 @@ impl<T: Into<JsonObjectOptions>> BitOr<T> for JsonObjectOptions {
|
|||||||
type Output = JsonObjectOptions;
|
type Output = JsonObjectOptions;
|
||||||
|
|
||||||
fn bitor(self, other: T) -> Self {
|
fn bitor(self, other: T) -> Self {
|
||||||
let other = other.into();
|
let other: JsonObjectOptions = other.into();
|
||||||
JsonObjectOptions {
|
JsonObjectOptions {
|
||||||
indexing: self.indexing.or(other.indexing),
|
indexing: self.indexing.or(other.indexing),
|
||||||
stored: self.stored | other.stored,
|
stored: self.stored | other.stored,
|
||||||
|
expand_dots_enabled: self.expand_dots_enabled | other.expand_dots_enabled,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -93,6 +120,7 @@ impl From<TextOptions> for JsonObjectOptions {
|
|||||||
JsonObjectOptions {
|
JsonObjectOptions {
|
||||||
stored: text_options.is_stored(),
|
stored: text_options.is_stored(),
|
||||||
indexing: text_options.get_indexing_options().cloned(),
|
indexing: text_options.get_indexing_options().cloned(),
|
||||||
|
expand_dots_enabled: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -252,6 +252,31 @@ impl Eq for InnerSchema {}
|
|||||||
#[derive(Clone, Eq, PartialEq, Debug)]
|
#[derive(Clone, Eq, PartialEq, Debug)]
|
||||||
pub struct Schema(Arc<InnerSchema>);
|
pub struct Schema(Arc<InnerSchema>);
|
||||||
|
|
||||||
|
// Returns the position (in byte offsets) of the unescaped '.' in the `field_path`.
|
||||||
|
//
|
||||||
|
// This function operates directly on bytes (as opposed to codepoint), relying
|
||||||
|
// on a encoding property of utf-8 for its correctness.
|
||||||
|
fn locate_splitting_dots(field_path: &str) -> Vec<usize> {
|
||||||
|
let mut splitting_dots_pos = Vec::new();
|
||||||
|
let mut escape_state = false;
|
||||||
|
for (pos, b) in field_path.bytes().enumerate() {
|
||||||
|
if escape_state {
|
||||||
|
escape_state = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match b {
|
||||||
|
b'\\' => {
|
||||||
|
escape_state = true;
|
||||||
|
}
|
||||||
|
b'.' => {
|
||||||
|
splitting_dots_pos.push(pos);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
splitting_dots_pos
|
||||||
|
}
|
||||||
|
|
||||||
impl Schema {
|
impl Schema {
|
||||||
/// Return the `FieldEntry` associated with a `Field`.
|
/// Return the `FieldEntry` associated with a `Field`.
|
||||||
pub fn get_field_entry(&self, field: Field) -> &FieldEntry {
|
pub fn get_field_entry(&self, field: Field) -> &FieldEntry {
|
||||||
@@ -358,6 +383,28 @@ impl Schema {
|
|||||||
}
|
}
|
||||||
Ok(doc)
|
Ok(doc)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Searches for a full_path in the schema, returning the field name and a JSON path.
|
||||||
|
///
|
||||||
|
/// This function works by checking if the field exists for the exact given full_path.
|
||||||
|
/// If it's not, it splits the full_path at non-escaped '.' chars and tries to match the
|
||||||
|
/// prefix with the field names, favoring the longest field names.
|
||||||
|
///
|
||||||
|
/// This does not check if field is a JSON field. It is possible for this functions to
|
||||||
|
/// return a non-empty JSON path with a non-JSON field.
|
||||||
|
pub fn find_field<'a>(&self, full_path: &'a str) -> Option<(Field, &'a str)> {
|
||||||
|
if let Some(field) = self.0.fields_map.get(full_path) {
|
||||||
|
return Some((*field, ""));
|
||||||
|
}
|
||||||
|
let mut splitting_period_pos: Vec<usize> = locate_splitting_dots(full_path);
|
||||||
|
while let Some(pos) = splitting_period_pos.pop() {
|
||||||
|
let (prefix, suffix) = full_path.split_at(pos);
|
||||||
|
if let Some(field) = self.0.fields_map.get(prefix) {
|
||||||
|
return Some((*field, &suffix[1..]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Serialize for Schema {
|
impl Serialize for Schema {
|
||||||
@@ -436,6 +483,13 @@ mod tests {
|
|||||||
use crate::schema::schema::DocParsingError::InvalidJson;
|
use crate::schema::schema::DocParsingError::InvalidJson;
|
||||||
use crate::schema::*;
|
use crate::schema::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_locate_splitting_dots() {
|
||||||
|
assert_eq!(&super::locate_splitting_dots("a.b.c"), &[1, 3]);
|
||||||
|
assert_eq!(&super::locate_splitting_dots(r#"a\.b.c"#), &[4]);
|
||||||
|
assert_eq!(&super::locate_splitting_dots(r#"a\..b.c"#), &[3, 5]);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
pub fn is_indexed_test() {
|
pub fn is_indexed_test() {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
@@ -936,4 +990,46 @@ mod tests {
|
|||||||
]"#;
|
]"#;
|
||||||
assert_eq!(schema_json, expected);
|
assert_eq!(schema_json, expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_find_field() {
|
||||||
|
let mut schema_builder = Schema::builder();
|
||||||
|
schema_builder.add_json_field("foo", STRING);
|
||||||
|
|
||||||
|
schema_builder.add_text_field("bar", STRING);
|
||||||
|
schema_builder.add_text_field("foo.bar", STRING);
|
||||||
|
schema_builder.add_text_field("foo.bar.baz", STRING);
|
||||||
|
schema_builder.add_text_field("bar.a.b.c", STRING);
|
||||||
|
let schema = schema_builder.build();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
schema.find_field("foo.bar"),
|
||||||
|
Some((schema.get_field("foo.bar").unwrap(), ""))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
schema.find_field("foo.bar.bar"),
|
||||||
|
Some((schema.get_field("foo.bar").unwrap(), "bar"))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
schema.find_field("foo.bar.baz"),
|
||||||
|
Some((schema.get_field("foo.bar.baz").unwrap(), ""))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
schema.find_field("foo.toto"),
|
||||||
|
Some((schema.get_field("foo").unwrap(), "toto"))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
schema.find_field("foo.bar"),
|
||||||
|
Some((schema.get_field("foo.bar").unwrap(), ""))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
schema.find_field("bar.toto.titi"),
|
||||||
|
Some((schema.get_field("bar").unwrap(), "toto.titi"))
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(schema.find_field("hello"), None);
|
||||||
|
assert_eq!(schema.find_field(""), None);
|
||||||
|
assert_eq!(schema.find_field("thiswouldbeareallylongfieldname"), None);
|
||||||
|
assert_eq!(schema.find_field("baz.bar.foo"), None);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -197,8 +197,19 @@ impl Term {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Appends value bytes to the Term.
|
/// Appends value bytes to the Term.
|
||||||
pub fn append_bytes(&mut self, bytes: &[u8]) {
|
///
|
||||||
|
/// This function returns the segment that has just been added.
|
||||||
|
#[inline]
|
||||||
|
pub fn append_bytes(&mut self, bytes: &[u8]) -> &mut [u8] {
|
||||||
|
let len_before = self.0.len();
|
||||||
self.0.extend_from_slice(bytes);
|
self.0.extend_from_slice(bytes);
|
||||||
|
&mut self.0[len_before..]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Appends a single byte to the term.
|
||||||
|
#[inline]
|
||||||
|
pub fn push_byte(&mut self, byte: u8) {
|
||||||
|
self.0.push(byte);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user