mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-28 21:12:54 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
94c73363e4 |
@@ -205,4 +205,332 @@ mod tests {
|
||||
assert_eq!(score_docs(&boolean_query), vec![0.977973, 0.84699446]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
DoC 0
|
||||
{
|
||||
"_index": "test",
|
||||
"_type": "_doc",
|
||||
"_id": "0",
|
||||
"matched": true,
|
||||
"explanation": {
|
||||
"value": 6.2610235,
|
||||
"description": "max of:",
|
||||
"details": [{
|
||||
"value": 6.1969156,
|
||||
"description": "sum of:",
|
||||
"details": [{
|
||||
"value": 6.1969156,
|
||||
"description": "weight(text:оксана in 561) [PerFieldSimilarity], result of:",
|
||||
"details": [{
|
||||
"value": 6.1969156,
|
||||
"description": "score(freq=1.0), product of:",
|
||||
"details": [{
|
||||
"value": 2.2,
|
||||
"description": "boost",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 5.65998,
|
||||
"description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
|
||||
"details": [{
|
||||
"value": 3,
|
||||
"description": "n, number of documents containing term",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 1004,
|
||||
"description": "N, total number of documents with field",
|
||||
"details": []
|
||||
}]
|
||||
}, {
|
||||
"value": 0.49766606,
|
||||
"description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
|
||||
"details": [{
|
||||
"value": 1.0,
|
||||
"description": "freq, occurrences of term within document",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 1.2,
|
||||
"description": "k1, term saturation parameter",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 0.75,
|
||||
"description": "b, length normalization parameter",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 19.0,
|
||||
"description": "dl, length of field",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 24.105577,
|
||||
"description": "avgdl, average length of field",
|
||||
"details": []
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}, {
|
||||
"value": 6.2610235,
|
||||
"description": "sum of:",
|
||||
"details": [{
|
||||
"value": 6.2610235,
|
||||
"description": "weight(title:оксана in 561) [PerFieldSimilarity], result of:",
|
||||
"details": [{
|
||||
"value": 6.2610235,
|
||||
"description": "score(freq=1.0), product of:",
|
||||
"details": [{
|
||||
"value": 2.2,
|
||||
"description": "boost",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 5.4086657,
|
||||
"description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
|
||||
"details": [{
|
||||
"value": 4,
|
||||
"description": "n, number of documents containing term",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 1004,
|
||||
"description": "N, total number of documents with field",
|
||||
"details": []
|
||||
}]
|
||||
}, {
|
||||
"value": 0.52617776,
|
||||
"description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
|
||||
"details": [{
|
||||
"value": 1.0,
|
||||
"description": "freq, occurrences of term within document",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 1.2,
|
||||
"description": "k1, term saturation parameter",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 0.75,
|
||||
"description": "b, length normalization parameter",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 4.0,
|
||||
"description": "dl, length of field",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 5.99502,
|
||||
"description": "avgdl, average length of field",
|
||||
"details": []
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
doc 2
|
||||
{
|
||||
"_index": "test",
|
||||
"_type": "_doc",
|
||||
"_id": "2",
|
||||
"matched": true,
|
||||
"explanation": {
|
||||
"value": 11.911896,
|
||||
"description": "max of:",
|
||||
"details": [{
|
||||
"value": 11.911896,
|
||||
"description": "sum of:",
|
||||
"details": [{
|
||||
"value": 5.4068284,
|
||||
"description": "weight(title:оксана in 0) [PerFieldSimilarity], result of:",
|
||||
"details": [{
|
||||
"value": 5.4068284,
|
||||
"description": "score(freq=1.0), product of:",
|
||||
"details": [{
|
||||
"value": 2.2,
|
||||
"description": "boost",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 5.4086657,
|
||||
"description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
|
||||
"details": [{
|
||||
"value": 4,
|
||||
"description": "n, number of documents containing term",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 1004,
|
||||
"description": "N, total number of documents with field",
|
||||
"details": []
|
||||
}]
|
||||
}, {
|
||||
"value": 0.45439103,
|
||||
"description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
|
||||
"details": [{
|
||||
"value": 1.0,
|
||||
"description": "freq, occurrences of term within document",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 1.2,
|
||||
"description": "k1, term saturation parameter",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 0.75,
|
||||
"description": "b, length normalization parameter",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 6.0,
|
||||
"description": "dl, length of field",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 5.99502,
|
||||
"description": "avgdl, average length of field",
|
||||
"details": []
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}, {
|
||||
"value": 6.505067,
|
||||
"description": "weight(title:лифенко in 0) [PerFieldSimilarity], result of:",
|
||||
"details": [{
|
||||
"value": 6.505067,
|
||||
"description": "score(freq=1.0), product of:",
|
||||
"details": [{
|
||||
"value": 2.2,
|
||||
"description": "boost",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 6.5072775,
|
||||
"description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
|
||||
"details": [{
|
||||
"value": 1,
|
||||
"description": "n, number of documents containing term",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 1004,
|
||||
"description": "N, total number of documents with field",
|
||||
"details": []
|
||||
}]
|
||||
}, {
|
||||
"value": 0.45439103,
|
||||
"description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
|
||||
"details": [{
|
||||
"value": 1.0,
|
||||
"description": "freq, occurrences of term within document",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 1.2,
|
||||
"description": "k1, term saturation parameter",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 0.75,
|
||||
"description": "b, length normalization parameter",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 6.0,
|
||||
"description": "dl, length of field",
|
||||
"details": []
|
||||
}, {
|
||||
"value": 5.99502,
|
||||
"description": "avgdl, average length of field",
|
||||
"details": []
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}
|
||||
}
|
||||
*/
|
||||
// motivated by #554
|
||||
#[test]
|
||||
fn test_bm25_several_fields() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let title = schema_builder.add_text_field("title", TEXT);
|
||||
let text = schema_builder.add_text_field("text", TEXT);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||
index_writer.add_document(doc!(
|
||||
// tf = 1 0
|
||||
title => "Законы притяжения Оксана Кулакова",
|
||||
// tf = 1 0
|
||||
text => "Законы притяжения Оксана Кулакова] \n\nТема: Сексуальное искусство, Женственность\nТип товара: Запись вебинара (аудио)\nПродолжительность: 1,5 часа\n\nСсылка на вебинар:\n ",
|
||||
));
|
||||
index_writer.add_document(doc!(
|
||||
// tf = 1 0
|
||||
title => "Любимые русские пироги (Оксана Путан)",
|
||||
// tf = 2 0
|
||||
text => "http://i95.fastpic.ru/big/2017/0628/9a/615b9c8504d94a3893d7f496ac53539a.jpg \n\nОт издателя\nОксана Путан профессиональный повар, автор кулинарных книг и известный кулинарный блогер. Ее рецепты отличаются практичностью, доступностью и пользуются огромной популярностью в русскоязычном интернете. Это третья книга автора о самом вкусном и ароматном настоящих русских пирогах и выпечке!\nДаже новички на кухне легко готовят по ее рецептам. Оксана описывает процесс приготовления настолько подробно и понятно, что вам остается только наслаждаться готовкой и не тратить время на лишние усилия. Готовьте легко и просто!\n\nhttps://www.ozon.ru/context/detail/id/139872462/"
|
||||
));
|
||||
index_writer.add_document(doc!(
|
||||
// tf = 1 1
|
||||
title => "PDF Мастер Класс \"Морячок\" (Оксана Лифенко)",
|
||||
// tf = 0 0
|
||||
text => "https://i.ibb.co/pzvHrDN/I3d U T6 Gg TM.jpg\nhttps://i.ibb.co/NFrb6v6/N0ls Z9nwjb U.jpg\nВ описание входит штаны, кофта, берет, матросский воротник. Описание продается в формате PDF, состоит из 12 страниц формата А4 и может быть напечатано на любом принтере.\nОписание предназначено для кукол BJD RealPuki от FairyLand, но может подойти и другим подобным куклам. Также вы можете вязать этот наряд из обычной пряжи, и он подойдет для куколок побольше.\nhttps://vk.com/market 95724412?w=product 95724412_2212"
|
||||
));
|
||||
for _ in 0..1_000 {
|
||||
index_writer.add_document(doc!(
|
||||
title => "a b d e f g",
|
||||
text => "maitre corbeau sur un arbre perche tenait dans son bec un fromage Maitre rnard par lodeur alleche lui tint a peu pres ce langage."
|
||||
));
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let query_parser = QueryParser::for_index(&index, vec![title, text]);
|
||||
let query = query_parser
|
||||
.parse_query("Оксана Лифенко")
|
||||
.unwrap();
|
||||
let weight = query.weight(&searcher, true).unwrap();
|
||||
let mut scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
|
||||
// let mut scores = vec![];
|
||||
// while
|
||||
println!("=====|");
|
||||
scorer.advance();
|
||||
dbg!("scorer.score()");
|
||||
assert!(false);
|
||||
|
||||
// scores.push(scorer.score());
|
||||
// assert_eq!(scores, &[0.8017307, 0.72233325, 1.0300813]);
|
||||
}
|
||||
|
||||
// motivated by #554
|
||||
#[test]
|
||||
fn test_bm25_several_fields_bbb() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text = schema_builder.add_text_field("text", TEXT);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||
index_writer.add_document(doc!(
|
||||
text => "Законы притяжения Оксана Кулакова] \n\nТема: Сексуальное искусство, Женственность\nТип товара: Запись вебинара (аудио)\nПродолжительность: 1,5 часа\n\nСсылка на вебинар:\n ",
|
||||
));
|
||||
index_writer.add_document(doc!(
|
||||
text => "http://i95.fastpic.ru/big/2017/0628/9a/615b9c8504d94a3893d7f496ac53539a.jpg \n\nОт издателя\nОксана Путан профессиональный повар, автор кулинарных книг и известный кулинарный блогер. Ее рецепты отличаются практичностью, доступностью и пользуются огромной популярностью в русскоязычном интернете. Это третья книга автора о самом вкусном и ароматном настоящих русских пирогах и выпечке!\nДаже новички на кухне легко готовят по ее рецептам. Оксана описывает процесс приготовления настолько подробно и понятно, что вам остается только наслаждаться готовкой и не тратить время на лишние усилия. Готовьте легко и просто!\n\nhttps://www.ozon.ru/context/detail/id/139872462/"
|
||||
));
|
||||
index_writer.add_document(doc!(
|
||||
text => "https://i.ibb.co/pzvHrDN/I3d U T6 Gg TM.jpg\nhttps://i.ibb.co/NFrb6v6/N0ls Z9nwjb U.jpg\nВ описание входит штаны, кофта, берет, матросский воротник. Описание продается в формате PDF, состоит из 12 страниц формата А4 и может быть напечатано на любом принтере.\nОписание предназначено для кукол BJD RealPuki от FairyLand, но может подойти и другим подобным куклам. Также вы можете вязать этот наряд из обычной пряжи, и он подойдет для куколок побольше.\nhttps://vk.com/market 95724412?w=product 95724412_2212"
|
||||
));
|
||||
for _ in 0..100 {
|
||||
index_writer.add_document(doc!(
|
||||
text => "maitre corbeau sur un arbre perche tenait dans son bec un fromage Maitre rnard par lodeur alleche lui tint a peu pres ce langage."
|
||||
));
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let query_parser = QueryParser::for_index(&index, vec![text]);
|
||||
let query = query_parser
|
||||
.parse_query("Оксана Лифенко")
|
||||
.unwrap();
|
||||
let weight = query.weight(&searcher, true).unwrap();
|
||||
let mut scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
|
||||
let mut scores = vec![];
|
||||
while scorer.advance() {
|
||||
scores.push(scorer.score());
|
||||
}
|
||||
assert_eq!(scores, &[0.8017307, 0.72233325, 1.0300813]);
|
||||
index_writer.commit().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user