mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 17:22:54 +00:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a3175f5341 | ||
|
|
203b0eebf1 | ||
|
|
eb37dbee26 | ||
|
|
c6e77d27c6 | ||
|
|
db6587ed9b |
18
Cargo.toml
18
Cargo.toml
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy"
|
name = "tantivy"
|
||||||
version = "0.22.0"
|
version = "0.24.2"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = ["database-implementations", "data-structures"]
|
categories = ["database-implementations", "data-structures"]
|
||||||
@@ -11,7 +11,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
|
|||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
keywords = ["search", "information", "retrieval"]
|
keywords = ["search", "information", "retrieval"]
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
rust-version = "1.75"
|
rust-version = "1.81"
|
||||||
exclude = ["benches/*.json", "benches/*.txt"]
|
exclude = ["benches/*.json", "benches/*.txt"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
@@ -57,13 +57,13 @@ measure_time = "0.9.0"
|
|||||||
arc-swap = "1.5.0"
|
arc-swap = "1.5.0"
|
||||||
bon = "3.3.1"
|
bon = "3.3.1"
|
||||||
|
|
||||||
columnar = { version = "0.3", path = "./columnar", package = "tantivy-columnar" }
|
columnar = { version = "0.5", path = "./columnar", package = "tantivy-columnar" }
|
||||||
sstable = { version = "0.3", path = "./sstable", package = "tantivy-sstable", optional = true }
|
sstable = { version = "0.5", path = "./sstable", package = "tantivy-sstable", optional = true }
|
||||||
stacker = { version = "0.3", path = "./stacker", package = "tantivy-stacker" }
|
stacker = { version = "0.5", path = "./stacker", package = "tantivy-stacker" }
|
||||||
query-grammar = { version = "0.22.0", path = "./query-grammar", package = "tantivy-query-grammar" }
|
query-grammar = { version = "0.24.0", path = "./query-grammar", package = "tantivy-query-grammar" }
|
||||||
tantivy-bitpacker = { version = "0.6", path = "./bitpacker" }
|
tantivy-bitpacker = { version = "0.8", path = "./bitpacker" }
|
||||||
common = { version = "0.7", path = "./common/", package = "tantivy-common" }
|
common = { version = "0.9", path = "./common/", package = "tantivy-common" }
|
||||||
tokenizer-api = { version = "0.3", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }
|
tokenizer-api = { version = "0.5", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }
|
||||||
sketches-ddsketch = { version = "0.3.0", features = ["use_serde"] }
|
sketches-ddsketch = { version = "0.3.0", features = ["use_serde"] }
|
||||||
hyperloglogplus = { version = "0.4.1", features = ["const-loop"] }
|
hyperloglogplus = { version = "0.4.1", features = ["const-loop"] }
|
||||||
futures-util = { version = "0.3.28", optional = true }
|
futures-util = { version = "0.3.28", optional = true }
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-bitpacker"
|
name = "tantivy-bitpacker"
|
||||||
version = "0.6.0"
|
version = "0.8.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-columnar"
|
name = "tantivy-columnar"
|
||||||
version = "0.3.0"
|
version = "0.5.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||||
@@ -12,10 +12,10 @@ categories = ["database-implementations", "data-structures", "compression"]
|
|||||||
itertools = "0.14.0"
|
itertools = "0.14.0"
|
||||||
fastdivide = "0.4.0"
|
fastdivide = "0.4.0"
|
||||||
|
|
||||||
stacker = { version= "0.3", path = "../stacker", package="tantivy-stacker"}
|
stacker = { version= "0.5", path = "../stacker", package="tantivy-stacker"}
|
||||||
sstable = { version= "0.3", path = "../sstable", package = "tantivy-sstable" }
|
sstable = { version= "0.5", path = "../sstable", package = "tantivy-sstable" }
|
||||||
common = { version= "0.7", path = "../common", package = "tantivy-common" }
|
common = { version= "0.9", path = "../common", package = "tantivy-common" }
|
||||||
tantivy-bitpacker = { version= "0.6", path = "../bitpacker/" }
|
tantivy-bitpacker = { version= "0.8", path = "../bitpacker/" }
|
||||||
serde = "1.0.152"
|
serde = "1.0.152"
|
||||||
downcast-rs = "2.0.1"
|
downcast-rs = "2.0.1"
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-common"
|
name = "tantivy-common"
|
||||||
version = "0.7.0"
|
version = "0.9.0"
|
||||||
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
@@ -13,7 +13,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
byteorder = "1.4.3"
|
byteorder = "1.4.3"
|
||||||
ownedbytes = { version= "0.7", path="../ownedbytes" }
|
ownedbytes = { version= "0.9", path="../ownedbytes" }
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
time = { version = "0.3.10", features = ["serde-well-known"] }
|
time = { version = "0.3.10", features = ["serde-well-known"] }
|
||||||
serde = { version = "1.0.136", features = ["derive"] }
|
serde = { version = "1.0.136", features = ["derive"] }
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
||||||
name = "ownedbytes"
|
name = "ownedbytes"
|
||||||
version = "0.7.0"
|
version = "0.9.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "Expose data as static slice"
|
description = "Expose data as static slice"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-query-grammar"
|
name = "tantivy-query-grammar"
|
||||||
version = "0.22.0"
|
version = "0.24.0"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = ["database-implementations", "data-structures"]
|
categories = ["database-implementations", "data-structures"]
|
||||||
|
|||||||
@@ -786,7 +786,7 @@ impl<Score, D, const R: bool> From<TopNComputerDeser<Score, D, R>> for TopNCompu
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Score, D, const R: bool> TopNComputer<Score, D, R>
|
impl<Score, D, const REVERSE_ORDER: bool> TopNComputer<Score, D, REVERSE_ORDER>
|
||||||
where
|
where
|
||||||
Score: PartialOrd + Clone,
|
Score: PartialOrd + Clone,
|
||||||
D: Ord,
|
D: Ord,
|
||||||
@@ -807,7 +807,10 @@ where
|
|||||||
#[inline]
|
#[inline]
|
||||||
pub fn push(&mut self, feature: Score, doc: D) {
|
pub fn push(&mut self, feature: Score, doc: D) {
|
||||||
if let Some(last_median) = self.threshold.clone() {
|
if let Some(last_median) = self.threshold.clone() {
|
||||||
if feature < last_median {
|
if !REVERSE_ORDER && feature > last_median {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if REVERSE_ORDER && feature < last_median {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -842,7 +845,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the top n elements in sorted order.
|
/// Returns the top n elements in sorted order.
|
||||||
pub fn into_sorted_vec(mut self) -> Vec<ComparableDoc<Score, D, R>> {
|
pub fn into_sorted_vec(mut self) -> Vec<ComparableDoc<Score, D, REVERSE_ORDER>> {
|
||||||
if self.buffer.len() > self.top_n {
|
if self.buffer.len() > self.top_n {
|
||||||
self.truncate_top_n();
|
self.truncate_top_n();
|
||||||
}
|
}
|
||||||
@@ -853,7 +856,7 @@ where
|
|||||||
/// Returns the top n elements in stored order.
|
/// Returns the top n elements in stored order.
|
||||||
/// Useful if you do not need the elements in sorted order,
|
/// Useful if you do not need the elements in sorted order,
|
||||||
/// for example when merging the results of multiple segments.
|
/// for example when merging the results of multiple segments.
|
||||||
pub fn into_vec(mut self) -> Vec<ComparableDoc<Score, D, R>> {
|
pub fn into_vec(mut self) -> Vec<ComparableDoc<Score, D, REVERSE_ORDER>> {
|
||||||
if self.buffer.len() > self.top_n {
|
if self.buffer.len() > self.top_n {
|
||||||
self.truncate_top_n();
|
self.truncate_top_n();
|
||||||
}
|
}
|
||||||
@@ -863,9 +866,11 @@ where
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use proptest::prelude::*;
|
||||||
|
|
||||||
use super::{TopDocs, TopNComputer};
|
use super::{TopDocs, TopNComputer};
|
||||||
use crate::collector::top_collector::ComparableDoc;
|
use crate::collector::top_collector::ComparableDoc;
|
||||||
use crate::collector::Collector;
|
use crate::collector::{Collector, DocSetCollector};
|
||||||
use crate::query::{AllQuery, Query, QueryParser};
|
use crate::query::{AllQuery, Query, QueryParser};
|
||||||
use crate::schema::{Field, Schema, FAST, STORED, TEXT};
|
use crate::schema::{Field, Schema, FAST, STORED, TEXT};
|
||||||
use crate::time::format_description::well_known::Rfc3339;
|
use crate::time::format_description::well_known::Rfc3339;
|
||||||
@@ -960,6 +965,44 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
proptest! {
|
||||||
|
#[test]
|
||||||
|
fn test_topn_computer_asc_prop(
|
||||||
|
limit in 0..10_usize,
|
||||||
|
docs in proptest::collection::vec((0..100_u64, 0..100_u64), 0..100_usize),
|
||||||
|
) {
|
||||||
|
let mut computer: TopNComputer<_, _, false> = TopNComputer::new(limit);
|
||||||
|
for (feature, doc) in &docs {
|
||||||
|
computer.push(*feature, *doc);
|
||||||
|
}
|
||||||
|
let mut comparable_docs = docs.into_iter().map(|(feature, doc)| ComparableDoc { feature, doc }).collect::<Vec<_>>();
|
||||||
|
comparable_docs.sort();
|
||||||
|
comparable_docs.truncate(limit);
|
||||||
|
prop_assert_eq!(
|
||||||
|
computer.into_sorted_vec(),
|
||||||
|
comparable_docs,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_topn_computer_desc_prop(
|
||||||
|
limit in 0..10_usize,
|
||||||
|
docs in proptest::collection::vec((0..100_u64, 0..100_u64), 0..100_usize),
|
||||||
|
) {
|
||||||
|
let mut computer: TopNComputer<_, _, true> = TopNComputer::new(limit);
|
||||||
|
for (feature, doc) in &docs {
|
||||||
|
computer.push(*feature, *doc);
|
||||||
|
}
|
||||||
|
let mut comparable_docs = docs.into_iter().map(|(feature, doc)| ComparableDoc { feature, doc }).collect::<Vec<_>>();
|
||||||
|
comparable_docs.sort();
|
||||||
|
comparable_docs.truncate(limit);
|
||||||
|
prop_assert_eq!(
|
||||||
|
computer.into_sorted_vec(),
|
||||||
|
comparable_docs,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_top_collector_not_at_capacity_without_offset() -> crate::Result<()> {
|
fn test_top_collector_not_at_capacity_without_offset() -> crate::Result<()> {
|
||||||
let index = make_index()?;
|
let index = make_index()?;
|
||||||
@@ -1373,4 +1416,29 @@ mod tests {
|
|||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_topn_computer_asc() {
|
||||||
|
let mut computer: TopNComputer<u32, u32, false> = TopNComputer::new(2);
|
||||||
|
|
||||||
|
computer.push(1u32, 1u32);
|
||||||
|
computer.push(2u32, 2u32);
|
||||||
|
computer.push(3u32, 3u32);
|
||||||
|
computer.push(2u32, 4u32);
|
||||||
|
computer.push(4u32, 5u32);
|
||||||
|
computer.push(1u32, 6u32);
|
||||||
|
assert_eq!(
|
||||||
|
computer.into_sorted_vec(),
|
||||||
|
&[
|
||||||
|
ComparableDoc {
|
||||||
|
feature: 1u32,
|
||||||
|
doc: 1u32,
|
||||||
|
},
|
||||||
|
ComparableDoc {
|
||||||
|
feature: 1u32,
|
||||||
|
doc: 6u32,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-sstable"
|
name = "tantivy-sstable"
|
||||||
version = "0.3.0"
|
version = "0.5.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||||
@@ -10,10 +10,10 @@ categories = ["database-implementations", "data-structures", "compression"]
|
|||||||
description = "sstables for tantivy"
|
description = "sstables for tantivy"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
common = {version= "0.7", path="../common", package="tantivy-common"}
|
common = {version= "0.9", path="../common", package="tantivy-common"}
|
||||||
futures-util = "0.3.30"
|
futures-util = "0.3.30"
|
||||||
itertools = "0.14.0"
|
itertools = "0.14.0"
|
||||||
tantivy-bitpacker = { version= "0.6", path="../bitpacker" }
|
tantivy-bitpacker = { version= "0.8", path="../bitpacker" }
|
||||||
tantivy-fst = "0.5"
|
tantivy-fst = "0.5"
|
||||||
# experimental gives us access to Decompressor::upper_bound
|
# experimental gives us access to Decompressor::upper_bound
|
||||||
zstd = { version = "0.13", features = ["experimental"] }
|
zstd = { version = "0.13", features = ["experimental"] }
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-stacker"
|
name = "tantivy-stacker"
|
||||||
version = "0.3.0"
|
version = "0.5.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||||
@@ -9,7 +9,7 @@ description = "term hashmap used for indexing"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
murmurhash32 = "0.3"
|
murmurhash32 = "0.3"
|
||||||
common = { version = "0.7", path = "../common/", package = "tantivy-common" }
|
common = { version = "0.9", path = "../common/", package = "tantivy-common" }
|
||||||
ahash = { version = "0.8.11", default-features = false, optional = true }
|
ahash = { version = "0.8.11", default-features = false, optional = true }
|
||||||
rand_distr = "0.4.3"
|
rand_distr = "0.4.3"
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy-tokenizer-api"
|
name = "tantivy-tokenizer-api"
|
||||||
version = "0.3.0"
|
version = "0.5.0"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "Tokenizer API of tantivy"
|
description = "Tokenizer API of tantivy"
|
||||||
|
|||||||
Reference in New Issue
Block a user