From 4fd2b22b69dc0faaae57b5fcf3dba4e1c1aa00b2 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Fri, 26 Jan 2024 17:31:37 +0100 Subject: [PATCH] Make allocating field names avoidable for range and exists queries. If the field names are statically known, `Cow::Borrowed(&'static str)` can handle them without allocations. The general case is still handled by `Cow::Owned(String)`. --- src/query/exist_query.rs | 22 ++++--- src/query/range_query/range_query.rs | 63 ++++++++++--------- .../range_query/range_query_ip_fastfield.rs | 11 +++- .../range_query/range_query_u64_fastfield.rs | 10 +-- 4 files changed, 58 insertions(+), 48 deletions(-) diff --git a/src/query/exist_query.rs b/src/query/exist_query.rs index 7de8ee513..1760e3065 100644 --- a/src/query/exist_query.rs +++ b/src/query/exist_query.rs @@ -1,4 +1,4 @@ -use core::fmt::Debug; +use std::borrow::Cow; use columnar::{ColumnIndex, DynamicColumn}; @@ -14,7 +14,7 @@ use crate::{DocId, Score, TantivyError}; /// All of the matched documents get the score 1.0. #[derive(Clone, Debug)] pub struct ExistsQuery { - field_name: String, + field: Cow<'static, str>, } impl ExistsQuery { @@ -23,40 +23,42 @@ impl ExistsQuery { /// This query matches all documents with at least one non-null value in the specified field. /// This constructor never fails, but executing the search with this query will return an /// error if the specified field doesn't exists or is not a fast field. - pub fn new_exists_query(field: String) -> ExistsQuery { - ExistsQuery { field_name: field } + pub fn new_exists_query>>(field: F) -> ExistsQuery { + ExistsQuery { + field: field.into(), + } } } impl Query for ExistsQuery { fn weight(&self, enable_scoring: EnableScoring) -> crate::Result> { let schema = enable_scoring.schema(); - let Some((field, _path)) = schema.find_field(&self.field_name) else { - return Err(TantivyError::FieldNotFound(self.field_name.clone())); + let Some((field, _path)) = schema.find_field(&self.field) else { + return Err(TantivyError::FieldNotFound(self.field.to_string())); }; let field_type = schema.get_field_entry(field).field_type(); if !field_type.is_fast() { return Err(TantivyError::SchemaError(format!( "Field {} is not a fast field.", - self.field_name + self.field ))); } Ok(Box::new(ExistsWeight { - field_name: self.field_name.clone(), + field: self.field.clone(), })) } } /// Weight associated with the `ExistsQuery` query. pub struct ExistsWeight { - field_name: String, + field: Cow<'static, str>, } impl Weight for ExistsWeight { fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { let fast_field_reader = reader.fast_fields(); let dynamic_columns: crate::Result> = fast_field_reader - .dynamic_column_handles(&self.field_name)? + .dynamic_column_handles(&self.field)? .into_iter() .map(|handle| handle.open().map_err(|io_error| io_error.into())) .collect(); diff --git a/src/query/range_query/range_query.rs b/src/query/range_query/range_query.rs index 86ad4ac8f..f975de84b 100644 --- a/src/query/range_query/range_query.rs +++ b/src/query/range_query/range_query.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::io; use std::net::Ipv6Addr; use std::ops::{Bound, Range}; @@ -68,7 +69,7 @@ use crate::{DateTime, DocId, Score}; /// ``` #[derive(Clone, Debug)] pub struct RangeQuery { - field: String, + field: Cow<'static, str>, value_type: Type, lower_bound: Bound>, upper_bound: Bound>, @@ -80,15 +81,15 @@ impl RangeQuery { /// /// If the value type is not correct, something may go terribly wrong when /// the `Weight` object is created. - pub fn new_term_bounds( - field: String, + pub fn new_term_bounds>>( + field: F, value_type: Type, lower_bound: &Bound, upper_bound: &Bound, ) -> RangeQuery { let verify_and_unwrap_term = |val: &Term| val.serialized_value_bytes().to_owned(); RangeQuery { - field, + field: field.into(), value_type, lower_bound: map_bound(lower_bound, verify_and_unwrap_term), upper_bound: map_bound(upper_bound, verify_and_unwrap_term), @@ -100,7 +101,7 @@ impl RangeQuery { /// /// If the field is not of the type `i64`, tantivy /// will panic when the `Weight` object is created. - pub fn new_i64(field: String, range: Range) -> RangeQuery { + pub fn new_i64>>(field: F, range: Range) -> RangeQuery { RangeQuery::new_i64_bounds( field, Bound::Included(range.start), @@ -115,8 +116,8 @@ impl RangeQuery { /// /// If the field is not of the type `i64`, tantivy /// will panic when the `Weight` object is created. - pub fn new_i64_bounds( - field: String, + pub fn new_i64_bounds>>( + field: F, lower_bound: Bound, upper_bound: Bound, ) -> RangeQuery { @@ -126,7 +127,7 @@ impl RangeQuery { .to_owned() }; RangeQuery { - field, + field: field.into(), value_type: Type::I64, lower_bound: map_bound(&lower_bound, make_term_val), upper_bound: map_bound(&upper_bound, make_term_val), @@ -138,7 +139,7 @@ impl RangeQuery { /// /// If the field is not of the type `f64`, tantivy /// will panic when the `Weight` object is created. - pub fn new_f64(field: String, range: Range) -> RangeQuery { + pub fn new_f64>>(field: F, range: Range) -> RangeQuery { RangeQuery::new_f64_bounds( field, Bound::Included(range.start), @@ -153,8 +154,8 @@ impl RangeQuery { /// /// If the field is not of the type `f64`, tantivy /// will panic when the `Weight` object is created. - pub fn new_f64_bounds( - field: String, + pub fn new_f64_bounds>>( + field: F, lower_bound: Bound, upper_bound: Bound, ) -> RangeQuery { @@ -164,7 +165,7 @@ impl RangeQuery { .to_owned() }; RangeQuery { - field, + field: field.into(), value_type: Type::F64, lower_bound: map_bound(&lower_bound, make_term_val), upper_bound: map_bound(&upper_bound, make_term_val), @@ -179,8 +180,8 @@ impl RangeQuery { /// /// If the field is not of the type `u64`, tantivy /// will panic when the `Weight` object is created. - pub fn new_u64_bounds( - field: String, + pub fn new_u64_bounds>>( + field: F, lower_bound: Bound, upper_bound: Bound, ) -> RangeQuery { @@ -190,7 +191,7 @@ impl RangeQuery { .to_owned() }; RangeQuery { - field, + field: field.into(), value_type: Type::U64, lower_bound: map_bound(&lower_bound, make_term_val), upper_bound: map_bound(&upper_bound, make_term_val), @@ -202,8 +203,8 @@ impl RangeQuery { /// /// If the field is not of the type `ip`, tantivy /// will panic when the `Weight` object is created. - pub fn new_ip_bounds( - field: String, + pub fn new_ip_bounds>>( + field: F, lower_bound: Bound, upper_bound: Bound, ) -> RangeQuery { @@ -213,7 +214,7 @@ impl RangeQuery { .to_owned() }; RangeQuery { - field, + field: field.into(), value_type: Type::IpAddr, lower_bound: map_bound(&lower_bound, make_term_val), upper_bound: map_bound(&upper_bound, make_term_val), @@ -225,7 +226,7 @@ impl RangeQuery { /// /// If the field is not of the type `u64`, tantivy /// will panic when the `Weight` object is created. - pub fn new_u64(field: String, range: Range) -> RangeQuery { + pub fn new_u64>>(field: F, range: Range) -> RangeQuery { RangeQuery::new_u64_bounds( field, Bound::Included(range.start), @@ -240,8 +241,8 @@ impl RangeQuery { /// /// If the field is not of the type `date`, tantivy /// will panic when the `Weight` object is created. - pub fn new_date_bounds( - field: String, + pub fn new_date_bounds>>( + field: F, lower_bound: Bound, upper_bound: Bound, ) -> RangeQuery { @@ -251,7 +252,7 @@ impl RangeQuery { .to_owned() }; RangeQuery { - field, + field: field.into(), value_type: Type::Date, lower_bound: map_bound(&lower_bound, make_term_val), upper_bound: map_bound(&upper_bound, make_term_val), @@ -263,7 +264,7 @@ impl RangeQuery { /// /// If the field is not of the type `date`, tantivy /// will panic when the `Weight` object is created. - pub fn new_date(field: String, range: Range) -> RangeQuery { + pub fn new_date>>(field: F, range: Range) -> RangeQuery { RangeQuery::new_date_bounds( field, Bound::Included(range.start), @@ -278,14 +279,14 @@ impl RangeQuery { /// /// If the field is not of the type `Str`, tantivy /// will panic when the `Weight` object is created. - pub fn new_str_bounds( - field: String, + pub fn new_str_bounds>>( + field: F, lower_bound: Bound<&str>, upper_bound: Bound<&str>, ) -> RangeQuery { let make_term_val = |val: &&str| val.as_bytes().to_vec(); RangeQuery { - field, + field: field.into(), value_type: Type::Str, lower_bound: map_bound(&lower_bound, make_term_val), upper_bound: map_bound(&upper_bound, make_term_val), @@ -297,7 +298,7 @@ impl RangeQuery { /// /// If the field is not of the type `Str`, tantivy /// will panic when the `Weight` object is created. - pub fn new_str(field: String, range: Range<&str>) -> RangeQuery { + pub fn new_str>>(field: F, range: Range<&str>) -> RangeQuery { RangeQuery::new_str_bounds( field, Bound::Included(range.start), @@ -358,7 +359,7 @@ impl Query for RangeQuery { let lower_bound = map_bound_res(&self.lower_bound, parse_ip_from_bytes)?; let upper_bound = map_bound_res(&self.upper_bound, parse_ip_from_bytes)?; Ok(Box::new(IPFastFieldRangeWeight::new( - self.field.to_string(), + self.field.clone(), lower_bound, upper_bound, ))) @@ -373,14 +374,14 @@ impl Query for RangeQuery { let lower_bound = map_bound(&self.lower_bound, parse_from_bytes); let upper_bound = map_bound(&self.upper_bound, parse_from_bytes); Ok(Box::new(FastFieldRangeWeight::new_u64_lenient( - self.field.to_string(), + self.field.clone(), lower_bound, upper_bound, ))) } } else { Ok(Box::new(RangeWeight { - field: self.field.to_string(), + field: self.field.clone(), lower_bound: self.lower_bound.clone(), upper_bound: self.upper_bound.clone(), limit: self.limit, @@ -390,7 +391,7 @@ impl Query for RangeQuery { } pub struct RangeWeight { - field: String, + field: Cow<'static, str>, lower_bound: Bound>, upper_bound: Bound>, limit: Option, diff --git a/src/query/range_query/range_query_ip_fastfield.rs b/src/query/range_query/range_query_ip_fastfield.rs index 97f0cdb22..32e3985bf 100644 --- a/src/query/range_query/range_query_ip_fastfield.rs +++ b/src/query/range_query/range_query_ip_fastfield.rs @@ -2,6 +2,7 @@ //! We use this variant only if the fastfield exists, otherwise the default in `range_query` is //! used, which uses the term dictionary + postings. +use std::borrow::Cow; use std::net::Ipv6Addr; use std::ops::{Bound, RangeInclusive}; @@ -13,14 +14,18 @@ use crate::{DocId, DocSet, Score, SegmentReader, TantivyError}; /// `IPFastFieldRangeWeight` uses the ip address fast field to execute range queries. pub struct IPFastFieldRangeWeight { - field: String, + field: Cow<'static, str>, lower_bound: Bound, upper_bound: Bound, } impl IPFastFieldRangeWeight { /// Creates a new IPFastFieldRangeWeight. - pub fn new(field: String, lower_bound: Bound, upper_bound: Bound) -> Self { + pub fn new( + field: Cow<'static, str>, + lower_bound: Bound, + upper_bound: Bound, + ) -> Self { Self { field, lower_bound, @@ -171,7 +176,7 @@ pub mod tests { writer.commit().unwrap(); let searcher = index.reader().unwrap().searcher(); let range_weight = IPFastFieldRangeWeight { - field: "ips".to_string(), + field: Cow::Borrowed("ips"), lower_bound: Bound::Included(ip_addrs[1]), upper_bound: Bound::Included(ip_addrs[2]), }; diff --git a/src/query/range_query/range_query_u64_fastfield.rs b/src/query/range_query/range_query_u64_fastfield.rs index 077b8dcb3..d9bf03979 100644 --- a/src/query/range_query/range_query_u64_fastfield.rs +++ b/src/query/range_query/range_query_u64_fastfield.rs @@ -2,6 +2,7 @@ //! We use this variant only if the fastfield exists, otherwise the default in `range_query` is //! used, which uses the term dictionary + postings. +use std::borrow::Cow; use std::ops::{Bound, RangeInclusive}; use columnar::{ColumnType, HasAssociatedColumnType, MonotonicallyMappableToU64}; @@ -14,7 +15,7 @@ use crate::{DocId, DocSet, Score, SegmentReader, TantivyError}; /// `FastFieldRangeWeight` uses the fast field to execute range queries. #[derive(Clone, Debug)] pub struct FastFieldRangeWeight { - field: String, + field: Cow<'static, str>, lower_bound: Bound, upper_bound: Bound, column_type_opt: Option, @@ -23,7 +24,7 @@ pub struct FastFieldRangeWeight { impl FastFieldRangeWeight { /// Create a new FastFieldRangeWeight, using the u64 representation of any fast field. pub(crate) fn new_u64_lenient( - field: String, + field: Cow<'static, str>, lower_bound: Bound, upper_bound: Bound, ) -> Self { @@ -39,7 +40,7 @@ impl FastFieldRangeWeight { /// Create a new `FastFieldRangeWeight` for a range of a u64-mappable type . pub fn new( - field: String, + field: Cow<'static, str>, lower_bound: Bound, upper_bound: Bound, ) -> Self { @@ -130,6 +131,7 @@ fn bound_to_value_range( #[cfg(test)] pub mod tests { + use std::borrow::Cow; use std::ops::{Bound, RangeInclusive}; use proptest::prelude::*; @@ -214,7 +216,7 @@ pub mod tests { writer.commit().unwrap(); let searcher = index.reader().unwrap().searcher(); let range_query = FastFieldRangeWeight::new_u64_lenient( - "test_field".to_string(), + Cow::Borrowed("test_field"), Bound::Included(50_000), Bound::Included(50_002), );