mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 00:02:55 +00:00
* fix windows build (#1) * Fix windows build * Add doc traits * Add field value iter * Add value and serialization * Adjust order * Fix bug * Correct type * Fix generic bugs * Reformat code * Add generic to index writer which I forgot about * Fix missing generics on single segment writer * Add missing type export * Add default methods for convenience * Cleanup * Fix more-like-this query to use standard types * Update API and fix tests * Add doc traits * Add field value iter * Add value and serialization * Adjust order * Fix bug * Correct type * Rebase main and fix conflicts * Reformat code * Merge upstream * Fix missing generics on single segment writer * Add missing type export * Add default methods for convenience * Cleanup * Fix more-like-this query to use standard types * Update API and fix tests * Add tokenizer improvements from previous commits * Add tokenizer improvements from previous commits * Reformat * Fix unit tests * Fix unit tests * Use enum in changes * Stage changes * Add new deserializer logic * Add serializer integration * Add document deserializer * Implement new (de)serialization api for existing types * Fix bugs and type errors * Add helper implementations * Fix errors * Reformat code * Add unit tests and some code organisation for serialization * Add unit tests to deserializer * Add some small docs * Add support for deserializing serde values * Reformat * Fix typo * Fix typo * Change repr of facet * Remove unused trait methods * Add child value type * Resolve comments * Fix build * Fix more build errors * Fix more build errors * Fix the tests I missed * Fix examples * fix numerical order, serialize PreTok Str * fix coverage * rename Document to TantivyDocument, rename DocumentAccess to Document add Binary prefix to binary de/serialization * fix coverage --------- Co-authored-by: Pascal Seitz <pascal.seitz@gmail.com>
106 lines
3.7 KiB
Rust
106 lines
3.7 KiB
Rust
// # Faceted Search With Tweak Score
|
|
//
|
|
// This example covers the faceted search functionalities of
|
|
// tantivy.
|
|
//
|
|
// We will :
|
|
// - define a text field "name" in our schema
|
|
// - define a facet field "classification" in our schema
|
|
|
|
use std::collections::HashSet;
|
|
|
|
use tantivy::collector::TopDocs;
|
|
use tantivy::query::BooleanQuery;
|
|
use tantivy::schema::*;
|
|
use tantivy::{doc, DocId, Index, IndexWriter, Score, SegmentReader};
|
|
|
|
fn main() -> tantivy::Result<()> {
|
|
let mut schema_builder = Schema::builder();
|
|
|
|
let title = schema_builder.add_text_field("title", STORED);
|
|
let ingredient = schema_builder.add_facet_field("ingredient", FacetOptions::default());
|
|
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
|
|
let mut index_writer: IndexWriter = index.writer(30_000_000)?;
|
|
|
|
index_writer.add_document(doc!(
|
|
title => "Fried egg",
|
|
ingredient => Facet::from("/ingredient/egg"),
|
|
ingredient => Facet::from("/ingredient/oil"),
|
|
))?;
|
|
index_writer.add_document(doc!(
|
|
title => "Scrambled egg",
|
|
ingredient => Facet::from("/ingredient/egg"),
|
|
ingredient => Facet::from("/ingredient/butter"),
|
|
ingredient => Facet::from("/ingredient/milk"),
|
|
ingredient => Facet::from("/ingredient/salt"),
|
|
))?;
|
|
index_writer.add_document(doc!(
|
|
title => "Egg rolls",
|
|
ingredient => Facet::from("/ingredient/egg"),
|
|
ingredient => Facet::from("/ingredient/garlic"),
|
|
ingredient => Facet::from("/ingredient/salt"),
|
|
ingredient => Facet::from("/ingredient/oil"),
|
|
ingredient => Facet::from("/ingredient/tortilla-wrap"),
|
|
ingredient => Facet::from("/ingredient/mushroom"),
|
|
))?;
|
|
index_writer.commit()?;
|
|
|
|
let reader = index.reader()?;
|
|
let searcher = reader.searcher();
|
|
{
|
|
let facets = vec![
|
|
Facet::from("/ingredient/egg"),
|
|
Facet::from("/ingredient/oil"),
|
|
Facet::from("/ingredient/garlic"),
|
|
Facet::from("/ingredient/mushroom"),
|
|
];
|
|
let query = BooleanQuery::new_multiterms_query(
|
|
facets
|
|
.iter()
|
|
.map(|key| Term::from_facet(ingredient, key))
|
|
.collect(),
|
|
);
|
|
let top_docs_by_custom_score =
|
|
// Call TopDocs with a custom tweak score
|
|
TopDocs::with_limit(2).tweak_score(move |segment_reader: &SegmentReader| {
|
|
let ingredient_reader = segment_reader.facet_reader("ingredient").unwrap();
|
|
let facet_dict = ingredient_reader.facet_dict();
|
|
|
|
let query_ords: HashSet<u64> = facets
|
|
.iter()
|
|
.filter_map(|key| facet_dict.term_ord(key.encoded_str()).unwrap())
|
|
.collect();
|
|
|
|
move |doc: DocId, original_score: Score| {
|
|
// Update the original score with a tweaked score
|
|
let missing_ingredients = ingredient_reader
|
|
.facet_ords(doc)
|
|
.filter(|ord| !query_ords.contains(ord))
|
|
.count();
|
|
let tweak = 1.0 / 4_f32.powi(missing_ingredients as i32);
|
|
|
|
original_score * tweak
|
|
}
|
|
});
|
|
let top_docs = searcher.search(&query, &top_docs_by_custom_score)?;
|
|
|
|
let titles: Vec<String> = top_docs
|
|
.iter()
|
|
.map(|(_, doc_id)| {
|
|
searcher
|
|
.doc::<TantivyDocument>(*doc_id)
|
|
.unwrap()
|
|
.get_first(title)
|
|
.and_then(|v| v.as_str())
|
|
.unwrap()
|
|
.to_owned()
|
|
})
|
|
.collect();
|
|
assert_eq!(titles, vec!["Fried egg", "Egg rolls"]);
|
|
}
|
|
Ok(())
|
|
}
|