mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
Add fast field fallback for term query if not indexed (#2693)
* Add fast field fallback for term query if not indexed * only fallback without scores
This commit is contained in:
@@ -12,10 +12,14 @@ pub use self::range_query_fastfield::*;
|
||||
// TODO is this correct?
|
||||
pub(crate) fn is_type_valid_for_fastfield_range_query(typ: Type) -> bool {
|
||||
match typ {
|
||||
Type::Str | Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date | Type::Json => {
|
||||
true
|
||||
}
|
||||
Type::IpAddr => true,
|
||||
Type::Str
|
||||
| Type::U64
|
||||
| Type::I64
|
||||
| Type::F64
|
||||
| Type::Bool
|
||||
| Type::Date
|
||||
| Type::Json
|
||||
| Type::IpAddr => true,
|
||||
Type::Facet | Type::Bytes => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ mod tests {
|
||||
use crate::docset::DocSet;
|
||||
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
||||
use crate::query::{EnableScoring, Query, QueryParser, Scorer, TermQuery};
|
||||
use crate::schema::{Field, IndexRecordOption, Schema, STRING, TEXT};
|
||||
use crate::schema::{Field, IndexRecordOption, Schema, FAST, STRING, TEXT};
|
||||
use crate::{assert_nearly_equals, DocAddress, Index, IndexWriter, Term, TERMINATED};
|
||||
|
||||
#[test]
|
||||
@@ -212,4 +212,232 @@ mod tests {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_term_query_fallback_to_fastfield() -> crate::Result<()> {
|
||||
use crate::collector::Count;
|
||||
use crate::schema::FAST;
|
||||
|
||||
// Create a FAST-only numeric field (not indexed)
|
||||
let mut schema_builder = Schema::builder();
|
||||
let num_field = schema_builder.add_u64_field("num", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
{
|
||||
let mut index_writer: IndexWriter = index.writer_for_tests()?;
|
||||
index_writer.add_document(doc!(num_field => 10u64))?;
|
||||
index_writer.add_document(doc!(num_field => 20u64))?;
|
||||
index_writer.add_document(doc!(num_field => 10u64))?;
|
||||
index_writer.commit()?;
|
||||
}
|
||||
|
||||
let reader = index.reader()?;
|
||||
let searcher = reader.searcher();
|
||||
|
||||
// TermQuery should fall back to a fastfield range query and match correctly.
|
||||
let tq_10 = TermQuery::new(
|
||||
Term::from_field_u64(num_field, 10u64),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
let tq_20 = TermQuery::new(
|
||||
Term::from_field_u64(num_field, 20u64),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
let tq_30 = TermQuery::new(
|
||||
Term::from_field_u64(num_field, 30u64),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
|
||||
let count_10 = searcher.search(&tq_10, &Count)?;
|
||||
let count_20 = searcher.search(&tq_20, &Count)?;
|
||||
let count_30 = searcher.search(&tq_30, &Count)?;
|
||||
|
||||
assert_eq!(count_10, 2);
|
||||
assert_eq!(count_20, 1);
|
||||
assert_eq!(count_30, 0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_term_query_fallback_text_fast_only() -> crate::Result<()> {
|
||||
use crate::collector::Count;
|
||||
|
||||
// FAST-only text field (not indexed)
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
{
|
||||
let mut index_writer: IndexWriter = index.writer_for_tests()?;
|
||||
index_writer.add_document(doc!(text_field => "hello"))?;
|
||||
index_writer.add_document(doc!(text_field => "world"))?;
|
||||
index_writer.add_document(doc!(text_field => "hello"))?;
|
||||
index_writer.commit()?;
|
||||
}
|
||||
|
||||
let searcher = index.reader()?.searcher();
|
||||
let tq_hello = TermQuery::new(
|
||||
Term::from_field_text(text_field, "hello"),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
let tq_world = TermQuery::new(
|
||||
Term::from_field_text(text_field, "world"),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
let tq_missing = TermQuery::new(
|
||||
Term::from_field_text(text_field, "nope"),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
|
||||
assert_eq!(searcher.search(&tq_hello, &Count)?, 2);
|
||||
assert_eq!(searcher.search(&tq_world, &Count)?, 1);
|
||||
assert_eq!(searcher.search(&tq_missing, &Count)?, 0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_term_query_fallback_json_fast_only() -> crate::Result<()> {
|
||||
use crate::collector::Count;
|
||||
use crate::fastfield::FastValue;
|
||||
use crate::schema::FAST;
|
||||
|
||||
let mut schema_builder = Schema::builder();
|
||||
let json_field = schema_builder.add_json_field("json", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
|
||||
{
|
||||
let mut index_writer: IndexWriter = index.writer_for_tests()?;
|
||||
index_writer.add_document(doc!(json_field => json!({"a": 10, "b": "x"})))?;
|
||||
index_writer.add_document(doc!(json_field => json!({"a": 20, "b": "y"})))?;
|
||||
index_writer.add_document(doc!(json_field => json!({"a": 10, "b": "z"})))?;
|
||||
index_writer.commit()?;
|
||||
}
|
||||
|
||||
fn json_term_fast<T: FastValue>(field: Field, path: &str, v: T) -> Term {
|
||||
let mut term = Term::from_field_json_path(field, path, true);
|
||||
term.append_type_and_fast_value(v);
|
||||
term
|
||||
}
|
||||
fn json_term_str(field: Field, path: &str, v: &str) -> Term {
|
||||
let mut term = Term::from_field_json_path(field, path, true);
|
||||
term.append_type_and_str(v);
|
||||
term
|
||||
}
|
||||
|
||||
let searcher = index.reader()?.searcher();
|
||||
// numeric path match
|
||||
let tq_a10 = TermQuery::new(
|
||||
json_term_fast(json_field, "a", 10u64),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
let tq_a20 = TermQuery::new(
|
||||
json_term_fast(json_field, "a", 20u64),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
let tq_a30 = TermQuery::new(
|
||||
json_term_fast(json_field, "a", 30u64),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
assert_eq!(searcher.search(&tq_a10, &Count)?, 2);
|
||||
assert_eq!(searcher.search(&tq_a20, &Count)?, 1);
|
||||
assert_eq!(searcher.search(&tq_a30, &Count)?, 0);
|
||||
|
||||
// string path match
|
||||
let tq_bx = TermQuery::new(
|
||||
json_term_str(json_field, "b", "x"),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
let tq_by = TermQuery::new(
|
||||
json_term_str(json_field, "b", "y"),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
let tq_bm = TermQuery::new(
|
||||
json_term_str(json_field, "b", "missing"),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
assert_eq!(searcher.search(&tq_bx, &Count)?, 1);
|
||||
assert_eq!(searcher.search(&tq_by, &Count)?, 1);
|
||||
assert_eq!(searcher.search(&tq_bm, &Count)?, 0);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_term_query_fallback_ip_fast_only() -> crate::Result<()> {
|
||||
use std::net::IpAddr;
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::collector::Count;
|
||||
use crate::schema::{IntoIpv6Addr, FAST};
|
||||
|
||||
let mut schema_builder = Schema::builder();
|
||||
let ip_field = schema_builder.add_ip_addr_field("ip", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
let ip1 = IpAddr::from_str("127.0.0.1").unwrap().into_ipv6_addr();
|
||||
let ip2 = IpAddr::from_str("127.0.0.2").unwrap().into_ipv6_addr();
|
||||
{
|
||||
let mut index_writer: IndexWriter = index.writer_for_tests()?;
|
||||
index_writer.add_document(doc!(ip_field => ip1))?;
|
||||
index_writer.add_document(doc!(ip_field => ip2))?;
|
||||
index_writer.add_document(doc!(ip_field => ip1))?;
|
||||
index_writer.commit()?;
|
||||
}
|
||||
|
||||
let searcher = index.reader()?.searcher();
|
||||
let tq_ip1 = TermQuery::new(
|
||||
Term::from_field_ip_addr(ip_field, ip1),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
let tq_ip2 = TermQuery::new(
|
||||
Term::from_field_ip_addr(ip_field, ip2),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
let ip3 = IpAddr::from_str("127.0.0.3").unwrap().into_ipv6_addr();
|
||||
let tq_ip3 = TermQuery::new(
|
||||
Term::from_field_ip_addr(ip_field, ip3),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
|
||||
assert_eq!(searcher.search(&tq_ip1, &Count)?, 2);
|
||||
assert_eq!(searcher.search(&tq_ip2, &Count)?, 1);
|
||||
assert_eq!(searcher.search(&tq_ip3, &Count)?, 0);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_term_query_fallback_fastfield_with_scores_errors() -> crate::Result<()> {
|
||||
use crate::collector::TopDocs;
|
||||
|
||||
// FAST-only numeric field (not indexed) should error when scoring is required
|
||||
let mut schema_builder = Schema::builder();
|
||||
let num_field = schema_builder.add_u64_field("num", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
{
|
||||
let mut index_writer: IndexWriter = index.writer_for_tests()?;
|
||||
index_writer.add_document(doc!(num_field => 10u64))?;
|
||||
index_writer.add_document(doc!(num_field => 20u64))?;
|
||||
index_writer.commit()?;
|
||||
}
|
||||
|
||||
let searcher = index.reader()?.searcher();
|
||||
let tq = TermQuery::new(
|
||||
Term::from_field_u64(num_field, 10u64),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
|
||||
// Using TopDocs requires scoring; since the field is not indexed,
|
||||
// TermQuery cannot score and should return a SchemaError.
|
||||
let res = searcher.search(&tq, &TopDocs::with_limit(1));
|
||||
assert!(matches!(res, Err(crate::TantivyError::SchemaError(_))));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
use std::fmt;
|
||||
use std::ops::Bound;
|
||||
|
||||
use super::term_weight::TermWeight;
|
||||
use crate::query::bm25::Bm25Weight;
|
||||
use crate::query::{EnableScoring, Explanation, Query, Weight};
|
||||
use crate::query::range_query::is_type_valid_for_fastfield_range_query;
|
||||
use crate::query::{EnableScoring, Explanation, Query, RangeQuery, Weight};
|
||||
use crate::schema::IndexRecordOption;
|
||||
use crate::Term;
|
||||
|
||||
@@ -122,6 +124,24 @@ impl TermQuery {
|
||||
|
||||
impl Query for TermQuery {
|
||||
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
|
||||
// If the field is not indexed but is a suitable fast field, fall back to a range query
|
||||
// on the fast field matching exactly this term.
|
||||
//
|
||||
// Note: This is considerable slower since it requires to scan the entire fast field.
|
||||
// TODO: The range query would gain from having a single-value optimization
|
||||
let schema = enable_scoring.schema();
|
||||
let field_entry = schema.get_field_entry(self.term.field());
|
||||
if !field_entry.is_indexed()
|
||||
&& field_entry.is_fast()
|
||||
&& is_type_valid_for_fastfield_range_query(self.term.typ())
|
||||
&& !enable_scoring.is_scoring_enabled()
|
||||
{
|
||||
let range_query = RangeQuery::new(
|
||||
Bound::Included(self.term.clone()),
|
||||
Bound::Included(self.term.clone()),
|
||||
);
|
||||
return range_query.weight(enable_scoring);
|
||||
}
|
||||
Ok(Box::new(self.specialized_weight(enable_scoring)?))
|
||||
}
|
||||
fn query_terms<'a>(&'a self, visitor: &mut dyn FnMut(&'a Term, bool)) {
|
||||
|
||||
Reference in New Issue
Block a user