diff --git a/src/aggregation/agg_req.rs b/src/aggregation/agg_req.rs index ebae69a5d..85269cf95 100644 --- a/src/aggregation/agg_req.rs +++ b/src/aggregation/agg_req.rs @@ -124,15 +124,6 @@ impl BucketAggregationInternal { } } -/// Extract all fields, where the term directory is used in the tree. -pub fn get_term_dict_field_names(aggs: &Aggregations) -> HashSet { - let mut term_dict_field_names = Default::default(); - for el in aggs.values() { - el.get_term_dict_field_names(&mut term_dict_field_names) - } - term_dict_field_names -} - /// Extract all fast field names used in the tree. pub fn get_fast_field_names(aggs: &Aggregations) -> HashSet { let mut fast_field_names = Default::default(); @@ -155,16 +146,12 @@ pub enum Aggregation { } impl Aggregation { - fn get_term_dict_field_names(&self, term_field_names: &mut HashSet) { - if let Aggregation::Bucket(bucket) = self { - bucket.get_term_dict_field_names(term_field_names) - } - } - fn get_fast_field_names(&self, fast_field_names: &mut HashSet) { match self { Aggregation::Bucket(bucket) => bucket.get_fast_field_names(fast_field_names), - Aggregation::Metric(metric) => metric.get_fast_field_names(fast_field_names), + Aggregation::Metric(metric) => { + fast_field_names.insert(metric.get_fast_field_name().to_string()); + } } } } @@ -193,14 +180,9 @@ pub struct BucketAggregation { } impl BucketAggregation { - fn get_term_dict_field_names(&self, term_dict_field_names: &mut HashSet) { - if let BucketAggregationType::Terms(terms) = &self.bucket_agg { - term_dict_field_names.insert(terms.field.to_string()); - } - term_dict_field_names.extend(get_term_dict_field_names(&self.sub_aggregation)); - } fn get_fast_field_names(&self, fast_field_names: &mut HashSet) { - self.bucket_agg.get_fast_field_names(fast_field_names); + let fast_field_name = self.bucket_agg.get_fast_field_name(); + fast_field_names.insert(fast_field_name.to_string()); fast_field_names.extend(get_fast_field_names(&self.sub_aggregation)); } } @@ -220,14 +202,12 @@ pub enum BucketAggregationType { } impl BucketAggregationType { - fn get_fast_field_names(&self, fast_field_names: &mut HashSet) { + fn get_fast_field_name(&self) -> &str { match self { - BucketAggregationType::Terms(terms) => fast_field_names.insert(terms.field.to_string()), - BucketAggregationType::Range(range) => fast_field_names.insert(range.field.to_string()), - BucketAggregationType::Histogram(histogram) => { - fast_field_names.insert(histogram.field.to_string()) - } - }; + BucketAggregationType::Terms(terms) => terms.field.as_str(), + BucketAggregationType::Range(range) => range.field.as_str(), + BucketAggregationType::Histogram(histogram) => histogram.field.as_str(), + } } } @@ -262,16 +242,15 @@ pub enum MetricAggregation { } impl MetricAggregation { - fn get_fast_field_names(&self, fast_field_names: &mut HashSet) { - let fast_field_name = match self { + fn get_fast_field_name(&self) -> &str { + match self { MetricAggregation::Average(avg) => avg.field_name(), MetricAggregation::Count(count) => count.field_name(), MetricAggregation::Max(max) => max.field_name(), MetricAggregation::Min(min) => min.field_name(), MetricAggregation::Stats(stats) => stats.field_name(), MetricAggregation::Sum(sum) => sum.field_name(), - }; - fast_field_names.insert(fast_field_name.to_string()); + } } } diff --git a/src/aggregation/agg_tests.rs b/src/aggregation/agg_tests.rs index f1f2cfc5e..53afd5d16 100644 --- a/src/aggregation/agg_tests.rs +++ b/src/aggregation/agg_tests.rs @@ -1,8 +1,7 @@ use serde_json::Value; use crate::aggregation::agg_req::{ - get_term_dict_field_names, Aggregation, Aggregations, BucketAggregation, BucketAggregationType, - MetricAggregation, + Aggregation, Aggregations, BucketAggregation, BucketAggregationType, MetricAggregation, }; use crate::aggregation::agg_result::AggregationResults; use crate::aggregation::bucket::{RangeAggregation, TermsAggregation}; @@ -432,9 +431,6 @@ fn test_aggregation_level2( agg_req }; - let field_names = get_term_dict_field_names(&agg_req); - assert_eq!(field_names, vec!["text".to_string()].into_iter().collect()); - let agg_res: AggregationResults = if use_distributed_collector { let collector = DistributedAggregationCollector::from_aggs(agg_req.clone(), None); diff --git a/src/aggregation/bucket/term_agg.rs b/src/aggregation/bucket/term_agg.rs index 570ef5a71..580c4a938 100644 --- a/src/aggregation/bucket/term_agg.rs +++ b/src/aggregation/bucket/term_agg.rs @@ -505,8 +505,7 @@ pub(crate) fn cut_off_buckets( mod tests { use super::*; use crate::aggregation::agg_req::{ - get_term_dict_field_names, Aggregation, Aggregations, BucketAggregation, - BucketAggregationType, MetricAggregation, + Aggregation, Aggregations, BucketAggregation, BucketAggregationType, MetricAggregation, }; use crate::aggregation::metric::{AverageAggregation, StatsAggregation}; use crate::aggregation::tests::{ @@ -607,12 +606,6 @@ mod tests { serde_json::Value::Null ); assert_eq!(res["my_texts"]["sum_other_doc_count"], 0); // TODO sum_other_doc_count with min_doc_count - - assert_eq!( - get_term_dict_field_names(&agg_req), - vec!["string_id".to_string(),].into_iter().collect() - ); - Ok(()) } diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 3f5f03e4d..8a10788df 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -19,9 +19,8 @@ //! //! Read access performance is comparable to that of an array lookup. -use std::net::Ipv6Addr; - pub use columnar::Column; +use columnar::MonotonicallyMappableToU64; pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveBitSet}; pub use self::error::{FastFieldNotAvailableError, Result}; @@ -37,38 +36,9 @@ mod facet_reader; mod readers; mod writer; -/// Trait for types that provide a zero value. -/// -/// The resulting value is never used, just as placeholder, e.g. for `vec.resize()`. -pub trait MakeZero { - /// Build a default value. This default value is never used, so the value does not - /// really matter. - fn make_zero() -> Self; -} - -impl MakeZero for T { - fn make_zero() -> Self { - T::from_u64(0) - } -} - -impl MakeZero for u128 { - fn make_zero() -> Self { - 0 - } -} - -impl MakeZero for Ipv6Addr { - fn make_zero() -> Self { - Ipv6Addr::from(0u128.to_be_bytes()) - } -} - /// Trait for types that are allowed for fast fields: /// (u64, i64 and f64, bool, DateTime). -pub trait FastValue: - Copy + Send + Sync + columnar::MonotonicallyMappableToU64 + PartialOrd + 'static -{ +pub trait FastValue: MonotonicallyMappableToU64 { /// Returns the `schema::Type` for this FastValue. fn to_type() -> Type; } @@ -105,6 +75,7 @@ impl FastValue for DateTime { #[cfg(test)] mod tests { + use std::net::Ipv6Addr; use std::ops::{Range, RangeInclusive}; use std::path::Path; diff --git a/src/termdict/sstable_termdict/merger.rs b/src/termdict/sstable_termdict/merger.rs index e98c29656..a9cfda69d 100644 --- a/src/termdict/sstable_termdict/merger.rs +++ b/src/termdict/sstable_termdict/merger.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; use std::collections::BinaryHeap; use crate::postings::TermInfo; -use crate::termdict::{TermOrdinal, TermStreamer}; +use crate::termdict::TermStreamer; pub struct HeapItem<'a> { pub streamer: TermStreamer<'a>, diff --git a/src/tokenizer/stop_word_filter/mod.rs b/src/tokenizer/stop_word_filter/mod.rs index a3599f3d9..daee693c5 100644 --- a/src/tokenizer/stop_word_filter/mod.rs +++ b/src/tokenizer/stop_word_filter/mod.rs @@ -18,7 +18,9 @@ use std::sync::Arc; use rustc_hash::FxHashSet; -use super::{BoxTokenStream, Language, Token, TokenFilter, TokenStream}; +#[cfg(feature = "stopwords")] +use super::Language; +use super::{BoxTokenStream, Token, TokenFilter, TokenStream}; /// `TokenFilter` that removes stop words from a token stream #[derive(Clone)]