mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-05 16:52:55 +00:00
Compare commits
2 Commits
issue/896
...
githubacti
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6eb7c7f419 | ||
|
|
e37ca8178a |
28
.github/workflows/ci.yml
vendored
Normal file
28
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
name: Tantivy CI
|
||||||
|
|
||||||
|
on: [push]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
name: Test Suite
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- uses: actions-rs/toolchain@v1
|
||||||
|
with:
|
||||||
|
profile: minimal
|
||||||
|
toolchain: stable
|
||||||
|
override: true
|
||||||
|
- uses: actions-rs/cargo@v1
|
||||||
|
with:
|
||||||
|
command: test
|
||||||
|
- uses: actions-rs/cargo@v1
|
||||||
|
with:
|
||||||
|
command: fmt
|
||||||
|
args: --all -- --check
|
||||||
|
- run: rustup component add clippy
|
||||||
|
- uses: actions-rs/cargo@v1
|
||||||
|
with:
|
||||||
|
command: clippy
|
||||||
|
args: -- -D warnings
|
||||||
|
|
||||||
66
.github/workflows/coveralls.yml
vendored
Normal file
66
.github/workflows/coveralls.yml
vendored
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
on: [push]
|
||||||
|
|
||||||
|
name: Code coverage with grcov
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
grcov:
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os:
|
||||||
|
- ubuntu-latest
|
||||||
|
#- macOS-latest
|
||||||
|
#- windows-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: Install toolchain
|
||||||
|
uses: actions-rs/toolchain@v1
|
||||||
|
with:
|
||||||
|
toolchain: nightly
|
||||||
|
override: true
|
||||||
|
profile: minimal
|
||||||
|
|
||||||
|
- name: Execute tests
|
||||||
|
uses: actions-rs/cargo@v1
|
||||||
|
with:
|
||||||
|
command: test
|
||||||
|
args: --all --lib
|
||||||
|
env:
|
||||||
|
CARGO_INCREMENTAL: 0
|
||||||
|
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests"
|
||||||
|
|
||||||
|
# Note that `actions-rs/grcov` Action can install `grcov` too,
|
||||||
|
# but can't use faster installation methods yet.
|
||||||
|
# As a temporary experiment `actions-rs/install` Action plugged in here.
|
||||||
|
# Consider **NOT** to copy that into your workflow,
|
||||||
|
# but use `actions-rs/grcov` only
|
||||||
|
- name: Pre-installing grcov
|
||||||
|
uses: actions-rs/install@v0.1
|
||||||
|
with:
|
||||||
|
crate: grcov
|
||||||
|
use-tool-cache: true
|
||||||
|
|
||||||
|
- name: Gather coverage data
|
||||||
|
id: coverage
|
||||||
|
uses: actions-rs/grcov@v0.1
|
||||||
|
with:
|
||||||
|
coveralls-token: ${{ secrets.COVERALLS_TOKEN }}
|
||||||
|
|
||||||
|
- name: Coveralls upload
|
||||||
|
uses: coverallsapp/github-action@master
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
parallel: true
|
||||||
|
path-to-lcov: ${{ steps.coverage.outputs.report }}
|
||||||
|
|
||||||
|
grcov_finalize:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: grcov
|
||||||
|
steps:
|
||||||
|
- name: Coveralls finalization
|
||||||
|
uses: coverallsapp/github-action@master
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
parallel-finished: true
|
||||||
11
CHANGELOG.md
11
CHANGELOG.md
@@ -1,14 +1,3 @@
|
|||||||
Tantivy 0.13.2
|
|
||||||
===================
|
|
||||||
Bugfix. Acquiring a facet reader on a segment that does not contain any
|
|
||||||
doc with this facet returns `None`. (#896)
|
|
||||||
|
|
||||||
Tantivy 0.13.1
|
|
||||||
======================
|
|
||||||
Made `Query` and `Collector` `Send + Sync`.
|
|
||||||
Updated misc dependency versions.
|
|
||||||
|
|
||||||
|
|
||||||
Tantivy 0.13.0
|
Tantivy 0.13.0
|
||||||
======================
|
======================
|
||||||
Tantivy 0.13 introduce a change in the index format that will require
|
Tantivy 0.13 introduce a change in the index format that will require
|
||||||
|
|||||||
36
Cargo.toml
36
Cargo.toml
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy"
|
name = "tantivy"
|
||||||
version = "0.13.2"
|
version = "0.13.0"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = ["database-implementations", "data-structures"]
|
categories = ["database-implementations", "data-structures"]
|
||||||
@@ -13,21 +13,21 @@ keywords = ["search", "information", "retrieval"]
|
|||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
base64 = "0.12"
|
base64 = "0.12.0"
|
||||||
byteorder = "1"
|
byteorder = "1.0"
|
||||||
crc32fast = "1"
|
crc32fast = "1.2.0"
|
||||||
once_cell = "1"
|
once_cell = "1.0"
|
||||||
regex ={version = "1", default-features = false, features = ["std"]}
|
regex ={version = "1.3.0", default-features = false, features = ["std"]}
|
||||||
tantivy-fst = "0.3"
|
tantivy-fst = "0.3"
|
||||||
memmap = {version = "0.7", optional=true}
|
memmap = {version = "0.7", optional=true}
|
||||||
lz4 = {version="1", optional=true}
|
lz4 = {version="1.20", optional=true}
|
||||||
snap = "1"
|
snap = "1"
|
||||||
atomicwrites = {version="0.2", optional=true}
|
atomicwrites = {version="0.2.2", optional=true}
|
||||||
tempfile = "3"
|
tempfile = "3.0"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
serde = {version="1", features=["derive"]}
|
serde = {version="1.0", features=["derive"]}
|
||||||
serde_json = "1"
|
serde_json = "1.0"
|
||||||
num_cpus = "1"
|
num_cpus = "1.2"
|
||||||
fs2={version="0.4", optional=true}
|
fs2={version="0.4", optional=true}
|
||||||
levenshtein_automata = "0.2"
|
levenshtein_automata = "0.2"
|
||||||
notify = {version="4", optional=true}
|
notify = {version="4", optional=true}
|
||||||
@@ -35,20 +35,20 @@ uuid = { version = "0.8", features = ["v4", "serde"] }
|
|||||||
crossbeam = "0.7"
|
crossbeam = "0.7"
|
||||||
futures = {version = "0.3", features=["thread-pool"] }
|
futures = {version = "0.3", features=["thread-pool"] }
|
||||||
owning_ref = "0.4"
|
owning_ref = "0.4"
|
||||||
stable_deref_trait = "1"
|
stable_deref_trait = "1.0.0"
|
||||||
rust-stemmers = "1"
|
rust-stemmers = "1.2"
|
||||||
downcast-rs = "1"
|
downcast-rs = { version="1.0" }
|
||||||
tantivy-query-grammar = { version="0.13", path="./query-grammar" }
|
tantivy-query-grammar = { version="0.13", path="./query-grammar" }
|
||||||
bitpacking = {version="0.8", default-features = false, features=["bitpacker4x"]}
|
bitpacking = {version="0.8", default-features = false, features=["bitpacker4x"]}
|
||||||
census = "0.4"
|
census = "0.4"
|
||||||
fnv = "1"
|
fnv = "1.0.6"
|
||||||
owned-read = "0.4"
|
owned-read = "0.4"
|
||||||
failure = "0.1"
|
failure = "0.1"
|
||||||
htmlescape = "0.3"
|
htmlescape = "0.3.1"
|
||||||
fail = "0.4"
|
fail = "0.4"
|
||||||
murmurhash32 = "0.2"
|
murmurhash32 = "0.2"
|
||||||
chrono = "0.4"
|
chrono = "0.4"
|
||||||
smallvec = "1"
|
smallvec = "1.0"
|
||||||
rayon = "1"
|
rayon = "1"
|
||||||
|
|
||||||
[target.'cfg(windows)'.dependencies]
|
[target.'cfg(windows)'.dependencies]
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ fn main() -> tantivy::Result<()> {
|
|||||||
);
|
);
|
||||||
let top_docs_by_custom_score =
|
let top_docs_by_custom_score =
|
||||||
TopDocs::with_limit(2).tweak_score(move |segment_reader: &SegmentReader| {
|
TopDocs::with_limit(2).tweak_score(move |segment_reader: &SegmentReader| {
|
||||||
let ingredient_reader = segment_reader.facet_reader(ingredient).unwrap();
|
let mut ingredient_reader = segment_reader.facet_reader(ingredient).unwrap();
|
||||||
let facet_dict = ingredient_reader.facet_dict();
|
let facet_dict = ingredient_reader.facet_dict();
|
||||||
|
|
||||||
let query_ords: HashSet<u64> = facets
|
let query_ords: HashSet<u64> = facets
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ pub trait CustomScorer<TScore>: Sync {
|
|||||||
|
|
||||||
impl<TCustomScorer, TScore> Collector for CustomScoreTopCollector<TCustomScorer, TScore>
|
impl<TCustomScorer, TScore> Collector for CustomScoreTopCollector<TCustomScorer, TScore>
|
||||||
where
|
where
|
||||||
TCustomScorer: CustomScorer<TScore> + Send + Sync,
|
TCustomScorer: CustomScorer<TScore>,
|
||||||
TScore: 'static + PartialOrd + Clone + Send + Sync,
|
TScore: 'static + PartialOrd + Clone + Send + Sync,
|
||||||
{
|
{
|
||||||
type Fruit = Vec<(TScore, DocAddress)>;
|
type Fruit = Vec<(TScore, DocAddress)>;
|
||||||
|
|||||||
@@ -133,7 +133,7 @@ impl<T> Fruit for T where T: Send + downcast_rs::Downcast {}
|
|||||||
/// The collection logic itself is in the `SegmentCollector`.
|
/// The collection logic itself is in the `SegmentCollector`.
|
||||||
///
|
///
|
||||||
/// Segments are not guaranteed to be visited in any specific order.
|
/// Segments are not guaranteed to be visited in any specific order.
|
||||||
pub trait Collector: Sync + Send {
|
pub trait Collector: Sync {
|
||||||
/// `Fruit` is the type for the result of our collection.
|
/// `Fruit` is the type for the result of our collection.
|
||||||
/// e.g. `usize` for the `Count` collector.
|
/// e.g. `usize` for the `Count` collector.
|
||||||
type Fruit: Fruit;
|
type Fruit: Fruit;
|
||||||
|
|||||||
@@ -324,7 +324,7 @@ impl TopDocs {
|
|||||||
where
|
where
|
||||||
TScore: 'static + Send + Sync + Clone + PartialOrd,
|
TScore: 'static + Send + Sync + Clone + PartialOrd,
|
||||||
TScoreSegmentTweaker: ScoreSegmentTweaker<TScore> + 'static,
|
TScoreSegmentTweaker: ScoreSegmentTweaker<TScore> + 'static,
|
||||||
TScoreTweaker: ScoreTweaker<TScore, Child = TScoreSegmentTweaker> + Send + Sync,
|
TScoreTweaker: ScoreTweaker<TScore, Child = TScoreSegmentTweaker>,
|
||||||
{
|
{
|
||||||
TweakedScoreTopCollector::new(score_tweaker, self.0.into_tscore())
|
TweakedScoreTopCollector::new(score_tweaker, self.0.into_tscore())
|
||||||
}
|
}
|
||||||
@@ -438,7 +438,7 @@ impl TopDocs {
|
|||||||
where
|
where
|
||||||
TScore: 'static + Send + Sync + Clone + PartialOrd,
|
TScore: 'static + Send + Sync + Clone + PartialOrd,
|
||||||
TCustomSegmentScorer: CustomSegmentScorer<TScore> + 'static,
|
TCustomSegmentScorer: CustomSegmentScorer<TScore> + 'static,
|
||||||
TCustomScorer: CustomScorer<TScore, Child = TCustomSegmentScorer> + Send + Sync,
|
TCustomScorer: CustomScorer<TScore, Child = TCustomSegmentScorer>,
|
||||||
{
|
{
|
||||||
CustomScoreTopCollector::new(custom_score, self.0.into_tscore())
|
CustomScoreTopCollector::new(custom_score, self.0.into_tscore())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ pub trait ScoreTweaker<TScore>: Sync {
|
|||||||
|
|
||||||
impl<TScoreTweaker, TScore> Collector for TweakedScoreTopCollector<TScoreTweaker, TScore>
|
impl<TScoreTweaker, TScore> Collector for TweakedScoreTopCollector<TScoreTweaker, TScore>
|
||||||
where
|
where
|
||||||
TScoreTweaker: ScoreTweaker<TScore> + Send + Sync,
|
TScoreTweaker: ScoreTweaker<TScore>,
|
||||||
TScore: 'static + PartialOrd + Clone + Send + Sync,
|
TScore: 'static + PartialOrd + Clone + Send + Sync,
|
||||||
{
|
{
|
||||||
type Fruit = Vec<(TScore, DocAddress)>;
|
type Fruit = Vec<(TScore, DocAddress)>;
|
||||||
|
|||||||
@@ -112,10 +112,8 @@ impl SegmentReader {
|
|||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let term_ords_reader = self.fast_fields().u64s(field)?;
|
let term_ords_reader = self.fast_fields().u64s(field)?;
|
||||||
let termdict = self.termdict_composite
|
let termdict_source = self.termdict_composite.open_read(field)?;
|
||||||
.open_read(field)
|
let termdict = TermDictionary::from_source(&termdict_source);
|
||||||
.map(|source| TermDictionary::from_source(&source))
|
|
||||||
.unwrap_or_else(TermDictionary::empty);
|
|
||||||
let facet_reader = FacetReader::new(term_ords_reader, termdict);
|
let facet_reader = FacetReader::new(term_ords_reader, termdict);
|
||||||
Some(facet_reader)
|
Some(facet_reader)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -73,52 +73,7 @@ impl FacetReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Return the list of facet ordinals associated to a document.
|
/// Return the list of facet ordinals associated to a document.
|
||||||
pub fn facet_ords(&self, doc: DocId, output: &mut Vec<u64>) {
|
pub fn facet_ords(&mut self, doc: DocId, output: &mut Vec<u64>) {
|
||||||
self.term_ords.get_vals(doc, output);
|
self.term_ords.get_vals(doc, output);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use crate::{Document, schema::{Facet, SchemaBuilder}};
|
|
||||||
use crate::Index;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_facet_not_populated_for_all_docs() -> crate::Result<()> {
|
|
||||||
let mut schema_builder = SchemaBuilder::default();
|
|
||||||
let facet_field = schema_builder.add_facet_field("facet");
|
|
||||||
let schema = schema_builder.build();
|
|
||||||
let index = Index::create_in_ram(schema);
|
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
|
|
||||||
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b")));
|
|
||||||
index_writer.add_document(Document::default());
|
|
||||||
index_writer.commit()?;
|
|
||||||
let searcher = index.reader()?.searcher();
|
|
||||||
let facet_reader = searcher.segment_reader(0u32).facet_reader(facet_field).unwrap();
|
|
||||||
let mut facet_ords = Vec::new();
|
|
||||||
facet_reader.facet_ords(0u32, &mut facet_ords);
|
|
||||||
assert_eq!(&facet_ords, &[2u64]);
|
|
||||||
facet_reader.facet_ords(1u32, &mut facet_ords);
|
|
||||||
assert!(facet_ords.is_empty());
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
fn test_facet_not_populated_for_any_docs() -> crate::Result<()> {
|
|
||||||
let mut schema_builder = SchemaBuilder::default();
|
|
||||||
let facet_field = schema_builder.add_facet_field("facet");
|
|
||||||
let schema = schema_builder.build();
|
|
||||||
let index = Index::create_in_ram(schema);
|
|
||||||
let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
|
|
||||||
index_writer.add_document(Document::default());
|
|
||||||
index_writer.add_document(Document::default());
|
|
||||||
index_writer.commit()?;
|
|
||||||
let searcher = index.reader()?.searcher();
|
|
||||||
let facet_reader = searcher.segment_reader(0u32).facet_reader(facet_field).unwrap();
|
|
||||||
let mut facet_ords = Vec::new();
|
|
||||||
facet_reader.facet_ords(0u32, &mut facet_ords);
|
|
||||||
assert!(facet_ords.is_empty());
|
|
||||||
facet_reader.facet_ords(1u32, &mut facet_ords);
|
|
||||||
assert!(facet_ords.is_empty());
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -126,7 +126,6 @@ impl FastFieldsWriter {
|
|||||||
for field_writer in &self.single_value_writers {
|
for field_writer in &self.single_value_writers {
|
||||||
field_writer.serialize(serializer)?;
|
field_writer.serialize(serializer)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
for field_writer in &self.multi_values_writers {
|
for field_writer in &self.multi_values_writers {
|
||||||
let field = field_writer.field();
|
let field = field_writer.field();
|
||||||
field_writer.serialize(serializer, mapping.get(&field))?;
|
field_writer.serialize(serializer, mapping.get(&field))?;
|
||||||
|
|||||||
@@ -151,7 +151,7 @@ impl SegmentWriter {
|
|||||||
if let Some(unordered_term_id) = unordered_term_id_opt {
|
if let Some(unordered_term_id) = unordered_term_id_opt {
|
||||||
self.fast_field_writers
|
self.fast_field_writers
|
||||||
.get_multivalue_writer(field)
|
.get_multivalue_writer(field)
|
||||||
.expect("writer for facet missing")
|
.expect("multified writer for facet missing")
|
||||||
.add_val(unordered_term_id);
|
.add_val(unordered_term_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ use std::fmt;
|
|||||||
///
|
///
|
||||||
/// When implementing a new type of `Query`, it is normal to implement a
|
/// When implementing a new type of `Query`, it is normal to implement a
|
||||||
/// dedicated `Query`, `Weight` and `Scorer`.
|
/// dedicated `Query`, `Weight` and `Scorer`.
|
||||||
pub trait Query: QueryClone + Send + Sync + downcast_rs::Downcast + fmt::Debug {
|
pub trait Query: QueryClone + downcast_rs::Downcast + fmt::Debug {
|
||||||
/// Create the weight associated to a query.
|
/// Create the weight associated to a query.
|
||||||
///
|
///
|
||||||
/// If scoring is not required, setting `scoring_enabled` to `false`
|
/// If scoring is not required, setting `scoring_enabled` to `false`
|
||||||
|
|||||||
Reference in New Issue
Block a user