mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-31 07:30:39 +00:00
132 lines
5.4 KiB
Rust
132 lines
5.4 KiB
Rust
#[cfg(test)]
|
|
mod test {
|
|
use maplit::hashmap;
|
|
use tantivy::collector::TopDocs;
|
|
use tantivy::query::FuzzyTermQuery;
|
|
use tantivy::schema::{Schema, Value, STORED, TEXT};
|
|
use tantivy::{doc, Index, TantivyDocument, Term};
|
|
|
|
#[test]
|
|
pub fn test_fuzzy_term() {
|
|
// Define a list of documents to be indexed. Each entry represents a text
|
|
// that will be associated with the field "country" in the index.
|
|
let docs = vec![
|
|
"WENN ROT WIE RUBIN",
|
|
"WENN ROT WIE ROBIN",
|
|
"WHEN RED LIKE ROBIN",
|
|
"WENN RED AS ROBIN",
|
|
"WHEN ROYAL BLUE ROBIN",
|
|
"IF RED LIKE RUBEN",
|
|
"WHEN GREEN LIKE ROBIN",
|
|
"WENN ROSE LIKE ROBIN",
|
|
"IF PINK LIKE ROBIN",
|
|
"WENN ROT WIE RABIN",
|
|
"WENN BLU WIE ROBIN",
|
|
"WHEN YELLOW LIKE RABBIT",
|
|
"IF BLUE LIKE ROBIN",
|
|
"WHEN ORANGE LIKE RIBBON",
|
|
"WENN VIOLET WIE RUBIX",
|
|
"WHEN INDIGO LIKE ROBBIE",
|
|
"IF TEAL LIKE RUBY",
|
|
"WHEN GOLD LIKE ROB",
|
|
"WENN SILVER WIE ROBY",
|
|
"IF BRONZE LIKE ROBE",
|
|
];
|
|
|
|
// Define the expected scores when queried with "robin" and a fuzziness of 2.
|
|
// This map associates each document text with its expected score.
|
|
let expected_scores = hashmap! {
|
|
"WHEN GREEN LIKE ROBIN" => 1.0,
|
|
"WENN RED AS ROBIN" => 1.0,
|
|
"WHEN RED LIKE ROBIN" => 1.0,
|
|
"WENN ROSE LIKE ROBIN" => 1.0,
|
|
"WENN ROT WIE ROBIN" => 1.0,
|
|
"WHEN ROYAL BLUE ROBIN" => 1.0,
|
|
"IF PINK LIKE ROBIN" => 1.0,
|
|
"IF BLUE LIKE ROBIN" => 1.0,
|
|
"WENN BLU WIE ROBIN" => 1.0,
|
|
"WENN ROT WIE RUBIN" => 0.5,
|
|
"WENN ROT WIE RABIN" => 0.5,
|
|
"IF RED LIKE RUBEN" => 0.33333334,
|
|
"WENN VIOLET WIE RUBIX" => 0.33333334,
|
|
"IF BRONZE LIKE ROBE" => 0.33333334,
|
|
"WENN SILVER WIE ROBY" => 0.33333334,
|
|
"WHEN GOLD LIKE ROB" => 0.33333334,
|
|
"WHEN INDIGO LIKE ROBBIE" => 0.33333334,
|
|
};
|
|
|
|
// Build a schema for the index.
|
|
// The schema determines how documents are indexed and searched.
|
|
let mut schema_builder = Schema::builder();
|
|
|
|
// Add a text field named "country" to the schema. This field will store the text and
|
|
// is indexed in a way that makes it searchable.
|
|
let country_field = schema_builder.add_text_field("country", TEXT | STORED);
|
|
// Build the schema based on the provided definitions.
|
|
let schema = schema_builder.build();
|
|
// Create a new index in RAM based on the defined schema.
|
|
let index = Index::create_in_ram(schema);
|
|
{
|
|
// Create an index writer with one thread and a certain memory limit.
|
|
// The writer allows us to add documents to the index.
|
|
let mut index_writer = index.writer_with_num_threads(1, 15_000_000).unwrap();
|
|
|
|
// Index each document in the docs list.
|
|
for &doc in &docs {
|
|
index_writer
|
|
.add_document(doc!(country_field => doc))
|
|
.unwrap();
|
|
}
|
|
|
|
// Commit changes to the index. This finalizes the addition of documents.
|
|
index_writer.commit().unwrap();
|
|
}
|
|
|
|
// Create a reader for the index to search the indexed documents.
|
|
let reader = index.reader().unwrap();
|
|
let searcher = reader.searcher();
|
|
|
|
{
|
|
// Define a term based on the field "country" and the text "robin".
|
|
let term = Term::from_field_text(country_field, "robin");
|
|
|
|
// Create a fuzzy query for "robin", a fuzziness of 2, and a prefix length of 0.
|
|
let fuzzy_query = FuzzyTermQuery::new(term, 2, true);
|
|
|
|
// Search the index with the fuzzy query and retrieve up to 100 top documents.
|
|
let top_docs = searcher
|
|
.search(&fuzzy_query, &TopDocs::with_limit(100).order_by_score())
|
|
.unwrap();
|
|
|
|
// Print out the scores and documents retrieved by the search.
|
|
for (score, adr) in &top_docs {
|
|
let doc: TantivyDocument = searcher.doc(*adr).expect("document");
|
|
println!(
|
|
"{score}, {:?}",
|
|
doc.field_values().next().unwrap().1.as_str()
|
|
);
|
|
}
|
|
|
|
// Assert that 17 documents match the fuzzy query criteria.
|
|
// We don't expect anything that has a larger fuzziness than 2
|
|
// to be returned in the query, leaving us with 17 expected results.
|
|
assert_eq!(top_docs.len(), 17, "Expected 17 documents");
|
|
|
|
// Check the scores of the returned documents against the expected scores.
|
|
for (score, adr) in &top_docs {
|
|
let doc: TantivyDocument = searcher.doc(*adr).expect("document");
|
|
let doc_text = doc.field_values().next().unwrap().1.as_str().unwrap();
|
|
|
|
// Ensure the retrieved score for each document is close to the expected score.
|
|
assert!(
|
|
(score - expected_scores[doc_text]).abs() < f32::EPSILON,
|
|
"Unexpected score for document {}. Expected: {}, Actual: {}",
|
|
doc_text,
|
|
expected_scores[doc_text],
|
|
score
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|