mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-26 20:19:57 +00:00
reusing idf from bm25 module as it was the same logic
This commit is contained in:
@@ -8,8 +8,9 @@ Tantivy 0.15.0
|
||||
- Bugfix consistent tie break handling in facet's topk (@hardikpnsp) #357
|
||||
- Date field support for range queries (@rihardsk) #516
|
||||
- Added lz4-flex as the default compression scheme in tantivy (@PSeitz) #1009
|
||||
- Renamed a lot of symbols to avoid all uppercasing on acronyms, as per new clippy recommendation. For instance, RAMDireotory -> RamDirectory. (@pmasurel)
|
||||
- Renamed a lot of symbols to avoid all uppercasing on acronyms, as per new clippy recommendation. For instance, RAMDirectory -> RamDirectory. (@pmasurel)
|
||||
- Simplified positions index format (@fulmicoton) #1022
|
||||
- Added support for more-like-this query in tantivy (@evanxg852000) #1011
|
||||
|
||||
Tantivy 0.14.0
|
||||
=========================
|
||||
@@ -25,6 +26,7 @@ Tantivy 0.14.0
|
||||
- Simplified the encoding of the skip reader struct. BlockWAND max tf is now encoded over a single byte. (@fulmicoton)
|
||||
- `FilterCollector` now supports all Fast Field value types (@barrotsteindev)
|
||||
- FastField are not all loaded when opening the segment reader. (@fulmicoton)
|
||||
- Added an API to merge segments, see `tantivy::merge_segments` #1005. (@evanxg852000)
|
||||
|
||||
This version breaks compatibility and requires users to reindex everything.
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ use serde::Serialize;
|
||||
const K1: Score = 1.2;
|
||||
const B: Score = 0.75;
|
||||
|
||||
fn idf(doc_freq: u64, doc_count: u64) -> Score {
|
||||
pub(crate) fn idf(doc_freq: u64, doc_count: u64) -> Score {
|
||||
assert!(doc_count >= doc_freq, "{} >= {}", doc_count, doc_freq);
|
||||
let x = ((doc_count - doc_freq) as Score + 0.5) / (doc_freq as Score + 0.5);
|
||||
(1.0 + x).ln()
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::cmp::Reverse;
|
||||
use std::collections::{BinaryHeap, HashMap};
|
||||
|
||||
use crate::{
|
||||
query::{BooleanQuery, BoostQuery, Occur, Query, TermQuery},
|
||||
query::{BooleanQuery, BoostQuery, Occur, Query, TermQuery, bm25::idf},
|
||||
schema::{Field, FieldType, FieldValue, IndexRecordOption, Term, Value},
|
||||
tokenizer::{BoxTokenStream, FacetTokenizer, PreTokenizedStream, Tokenizer},
|
||||
DocAddress, Result, Searcher, TantivyError,
|
||||
@@ -358,7 +358,7 @@ impl MoreLikeThis {
|
||||
}
|
||||
|
||||
// compute similarity & score
|
||||
let idf = self.idf(doc_freq, num_docs);
|
||||
let idf = idf(doc_freq, num_docs);
|
||||
let score = (*term_frequency as f32) * idf;
|
||||
if let Some(limit) = self.max_query_terms {
|
||||
if score_terms.len() > limit {
|
||||
@@ -383,9 +383,4 @@ impl MoreLikeThis {
|
||||
Ok(score_terms_vec)
|
||||
}
|
||||
|
||||
/// Computes the similarity
|
||||
fn idf(&self, doc_freq: u64, doc_count: u64) -> f32 {
|
||||
let x = ((doc_count - doc_freq) as f32 + 0.5) / (doc_freq as f32 + 0.5);
|
||||
(1f32 + x).ln()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user