mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-29 13:32:54 +00:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a3175f5341 | ||
|
|
203b0eebf1 | ||
|
|
eb37dbee26 | ||
|
|
c6e77d27c6 | ||
|
|
db6587ed9b |
18
Cargo.toml
18
Cargo.toml
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy"
|
||||
version = "0.22.0"
|
||||
version = "0.24.2"
|
||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||
license = "MIT"
|
||||
categories = ["database-implementations", "data-structures"]
|
||||
@@ -11,7 +11,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
|
||||
readme = "README.md"
|
||||
keywords = ["search", "information", "retrieval"]
|
||||
edition = "2021"
|
||||
rust-version = "1.75"
|
||||
rust-version = "1.81"
|
||||
exclude = ["benches/*.json", "benches/*.txt"]
|
||||
|
||||
[dependencies]
|
||||
@@ -57,13 +57,13 @@ measure_time = "0.9.0"
|
||||
arc-swap = "1.5.0"
|
||||
bon = "3.3.1"
|
||||
|
||||
columnar = { version = "0.3", path = "./columnar", package = "tantivy-columnar" }
|
||||
sstable = { version = "0.3", path = "./sstable", package = "tantivy-sstable", optional = true }
|
||||
stacker = { version = "0.3", path = "./stacker", package = "tantivy-stacker" }
|
||||
query-grammar = { version = "0.22.0", path = "./query-grammar", package = "tantivy-query-grammar" }
|
||||
tantivy-bitpacker = { version = "0.6", path = "./bitpacker" }
|
||||
common = { version = "0.7", path = "./common/", package = "tantivy-common" }
|
||||
tokenizer-api = { version = "0.3", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }
|
||||
columnar = { version = "0.5", path = "./columnar", package = "tantivy-columnar" }
|
||||
sstable = { version = "0.5", path = "./sstable", package = "tantivy-sstable", optional = true }
|
||||
stacker = { version = "0.5", path = "./stacker", package = "tantivy-stacker" }
|
||||
query-grammar = { version = "0.24.0", path = "./query-grammar", package = "tantivy-query-grammar" }
|
||||
tantivy-bitpacker = { version = "0.8", path = "./bitpacker" }
|
||||
common = { version = "0.9", path = "./common/", package = "tantivy-common" }
|
||||
tokenizer-api = { version = "0.5", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }
|
||||
sketches-ddsketch = { version = "0.3.0", features = ["use_serde"] }
|
||||
hyperloglogplus = { version = "0.4.1", features = ["const-loop"] }
|
||||
futures-util = { version = "0.3.28", optional = true }
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy-bitpacker"
|
||||
version = "0.6.0"
|
||||
version = "0.8.0"
|
||||
edition = "2021"
|
||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||
license = "MIT"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy-columnar"
|
||||
version = "0.3.0"
|
||||
version = "0.5.0"
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||
@@ -12,10 +12,10 @@ categories = ["database-implementations", "data-structures", "compression"]
|
||||
itertools = "0.14.0"
|
||||
fastdivide = "0.4.0"
|
||||
|
||||
stacker = { version= "0.3", path = "../stacker", package="tantivy-stacker"}
|
||||
sstable = { version= "0.3", path = "../sstable", package = "tantivy-sstable" }
|
||||
common = { version= "0.7", path = "../common", package = "tantivy-common" }
|
||||
tantivy-bitpacker = { version= "0.6", path = "../bitpacker/" }
|
||||
stacker = { version= "0.5", path = "../stacker", package="tantivy-stacker"}
|
||||
sstable = { version= "0.5", path = "../sstable", package = "tantivy-sstable" }
|
||||
common = { version= "0.9", path = "../common", package = "tantivy-common" }
|
||||
tantivy-bitpacker = { version= "0.8", path = "../bitpacker/" }
|
||||
serde = "1.0.152"
|
||||
downcast-rs = "2.0.1"
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy-common"
|
||||
version = "0.7.0"
|
||||
version = "0.9.0"
|
||||
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
||||
license = "MIT"
|
||||
edition = "2021"
|
||||
@@ -13,7 +13,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
|
||||
|
||||
[dependencies]
|
||||
byteorder = "1.4.3"
|
||||
ownedbytes = { version= "0.7", path="../ownedbytes" }
|
||||
ownedbytes = { version= "0.9", path="../ownedbytes" }
|
||||
async-trait = "0.1"
|
||||
time = { version = "0.3.10", features = ["serde-well-known"] }
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
||||
name = "ownedbytes"
|
||||
version = "0.7.0"
|
||||
version = "0.9.0"
|
||||
edition = "2021"
|
||||
description = "Expose data as static slice"
|
||||
license = "MIT"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy-query-grammar"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||
license = "MIT"
|
||||
categories = ["database-implementations", "data-structures"]
|
||||
|
||||
@@ -786,7 +786,7 @@ impl<Score, D, const R: bool> From<TopNComputerDeser<Score, D, R>> for TopNCompu
|
||||
}
|
||||
}
|
||||
|
||||
impl<Score, D, const R: bool> TopNComputer<Score, D, R>
|
||||
impl<Score, D, const REVERSE_ORDER: bool> TopNComputer<Score, D, REVERSE_ORDER>
|
||||
where
|
||||
Score: PartialOrd + Clone,
|
||||
D: Ord,
|
||||
@@ -807,7 +807,10 @@ where
|
||||
#[inline]
|
||||
pub fn push(&mut self, feature: Score, doc: D) {
|
||||
if let Some(last_median) = self.threshold.clone() {
|
||||
if feature < last_median {
|
||||
if !REVERSE_ORDER && feature > last_median {
|
||||
return;
|
||||
}
|
||||
if REVERSE_ORDER && feature < last_median {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -842,7 +845,7 @@ where
|
||||
}
|
||||
|
||||
/// Returns the top n elements in sorted order.
|
||||
pub fn into_sorted_vec(mut self) -> Vec<ComparableDoc<Score, D, R>> {
|
||||
pub fn into_sorted_vec(mut self) -> Vec<ComparableDoc<Score, D, REVERSE_ORDER>> {
|
||||
if self.buffer.len() > self.top_n {
|
||||
self.truncate_top_n();
|
||||
}
|
||||
@@ -853,7 +856,7 @@ where
|
||||
/// Returns the top n elements in stored order.
|
||||
/// Useful if you do not need the elements in sorted order,
|
||||
/// for example when merging the results of multiple segments.
|
||||
pub fn into_vec(mut self) -> Vec<ComparableDoc<Score, D, R>> {
|
||||
pub fn into_vec(mut self) -> Vec<ComparableDoc<Score, D, REVERSE_ORDER>> {
|
||||
if self.buffer.len() > self.top_n {
|
||||
self.truncate_top_n();
|
||||
}
|
||||
@@ -863,9 +866,11 @@ where
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use proptest::prelude::*;
|
||||
|
||||
use super::{TopDocs, TopNComputer};
|
||||
use crate::collector::top_collector::ComparableDoc;
|
||||
use crate::collector::Collector;
|
||||
use crate::collector::{Collector, DocSetCollector};
|
||||
use crate::query::{AllQuery, Query, QueryParser};
|
||||
use crate::schema::{Field, Schema, FAST, STORED, TEXT};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
@@ -960,6 +965,44 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_topn_computer_asc_prop(
|
||||
limit in 0..10_usize,
|
||||
docs in proptest::collection::vec((0..100_u64, 0..100_u64), 0..100_usize),
|
||||
) {
|
||||
let mut computer: TopNComputer<_, _, false> = TopNComputer::new(limit);
|
||||
for (feature, doc) in &docs {
|
||||
computer.push(*feature, *doc);
|
||||
}
|
||||
let mut comparable_docs = docs.into_iter().map(|(feature, doc)| ComparableDoc { feature, doc }).collect::<Vec<_>>();
|
||||
comparable_docs.sort();
|
||||
comparable_docs.truncate(limit);
|
||||
prop_assert_eq!(
|
||||
computer.into_sorted_vec(),
|
||||
comparable_docs,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_topn_computer_desc_prop(
|
||||
limit in 0..10_usize,
|
||||
docs in proptest::collection::vec((0..100_u64, 0..100_u64), 0..100_usize),
|
||||
) {
|
||||
let mut computer: TopNComputer<_, _, true> = TopNComputer::new(limit);
|
||||
for (feature, doc) in &docs {
|
||||
computer.push(*feature, *doc);
|
||||
}
|
||||
let mut comparable_docs = docs.into_iter().map(|(feature, doc)| ComparableDoc { feature, doc }).collect::<Vec<_>>();
|
||||
comparable_docs.sort();
|
||||
comparable_docs.truncate(limit);
|
||||
prop_assert_eq!(
|
||||
computer.into_sorted_vec(),
|
||||
comparable_docs,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_top_collector_not_at_capacity_without_offset() -> crate::Result<()> {
|
||||
let index = make_index()?;
|
||||
@@ -1373,4 +1416,29 @@ mod tests {
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_topn_computer_asc() {
|
||||
let mut computer: TopNComputer<u32, u32, false> = TopNComputer::new(2);
|
||||
|
||||
computer.push(1u32, 1u32);
|
||||
computer.push(2u32, 2u32);
|
||||
computer.push(3u32, 3u32);
|
||||
computer.push(2u32, 4u32);
|
||||
computer.push(4u32, 5u32);
|
||||
computer.push(1u32, 6u32);
|
||||
assert_eq!(
|
||||
computer.into_sorted_vec(),
|
||||
&[
|
||||
ComparableDoc {
|
||||
feature: 1u32,
|
||||
doc: 1u32,
|
||||
},
|
||||
ComparableDoc {
|
||||
feature: 1u32,
|
||||
doc: 6u32,
|
||||
}
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy-sstable"
|
||||
version = "0.3.0"
|
||||
version = "0.5.0"
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||
@@ -10,10 +10,10 @@ categories = ["database-implementations", "data-structures", "compression"]
|
||||
description = "sstables for tantivy"
|
||||
|
||||
[dependencies]
|
||||
common = {version= "0.7", path="../common", package="tantivy-common"}
|
||||
common = {version= "0.9", path="../common", package="tantivy-common"}
|
||||
futures-util = "0.3.30"
|
||||
itertools = "0.14.0"
|
||||
tantivy-bitpacker = { version= "0.6", path="../bitpacker" }
|
||||
tantivy-bitpacker = { version= "0.8", path="../bitpacker" }
|
||||
tantivy-fst = "0.5"
|
||||
# experimental gives us access to Decompressor::upper_bound
|
||||
zstd = { version = "0.13", features = ["experimental"] }
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy-stacker"
|
||||
version = "0.3.0"
|
||||
version = "0.5.0"
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||
@@ -9,7 +9,7 @@ description = "term hashmap used for indexing"
|
||||
|
||||
[dependencies]
|
||||
murmurhash32 = "0.3"
|
||||
common = { version = "0.7", path = "../common/", package = "tantivy-common" }
|
||||
common = { version = "0.9", path = "../common/", package = "tantivy-common" }
|
||||
ahash = { version = "0.8.11", default-features = false, optional = true }
|
||||
rand_distr = "0.4.3"
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tantivy-tokenizer-api"
|
||||
version = "0.3.0"
|
||||
version = "0.5.0"
|
||||
license = "MIT"
|
||||
edition = "2021"
|
||||
description = "Tokenizer API of tantivy"
|
||||
|
||||
Reference in New Issue
Block a user