Minor refactoring preparing for getting columnar integrated in quickwit. (#1911)

This commit is contained in:
Paul Masurel
2023-02-27 14:23:30 +09:00
committed by GitHub
parent 0a726a0897
commit 8ea97e7d6b
6 changed files with 22 additions and 81 deletions

View File

@@ -124,15 +124,6 @@ impl BucketAggregationInternal {
}
}
/// Extract all fields, where the term directory is used in the tree.
pub fn get_term_dict_field_names(aggs: &Aggregations) -> HashSet<String> {
let mut term_dict_field_names = Default::default();
for el in aggs.values() {
el.get_term_dict_field_names(&mut term_dict_field_names)
}
term_dict_field_names
}
/// Extract all fast field names used in the tree.
pub fn get_fast_field_names(aggs: &Aggregations) -> HashSet<String> {
let mut fast_field_names = Default::default();
@@ -155,16 +146,12 @@ pub enum Aggregation {
}
impl Aggregation {
fn get_term_dict_field_names(&self, term_field_names: &mut HashSet<String>) {
if let Aggregation::Bucket(bucket) = self {
bucket.get_term_dict_field_names(term_field_names)
}
}
fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
match self {
Aggregation::Bucket(bucket) => bucket.get_fast_field_names(fast_field_names),
Aggregation::Metric(metric) => metric.get_fast_field_names(fast_field_names),
Aggregation::Metric(metric) => {
fast_field_names.insert(metric.get_fast_field_name().to_string());
}
}
}
}
@@ -193,14 +180,9 @@ pub struct BucketAggregation {
}
impl BucketAggregation {
fn get_term_dict_field_names(&self, term_dict_field_names: &mut HashSet<String>) {
if let BucketAggregationType::Terms(terms) = &self.bucket_agg {
term_dict_field_names.insert(terms.field.to_string());
}
term_dict_field_names.extend(get_term_dict_field_names(&self.sub_aggregation));
}
fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
self.bucket_agg.get_fast_field_names(fast_field_names);
let fast_field_name = self.bucket_agg.get_fast_field_name();
fast_field_names.insert(fast_field_name.to_string());
fast_field_names.extend(get_fast_field_names(&self.sub_aggregation));
}
}
@@ -220,14 +202,12 @@ pub enum BucketAggregationType {
}
impl BucketAggregationType {
fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
fn get_fast_field_name(&self) -> &str {
match self {
BucketAggregationType::Terms(terms) => fast_field_names.insert(terms.field.to_string()),
BucketAggregationType::Range(range) => fast_field_names.insert(range.field.to_string()),
BucketAggregationType::Histogram(histogram) => {
fast_field_names.insert(histogram.field.to_string())
}
};
BucketAggregationType::Terms(terms) => terms.field.as_str(),
BucketAggregationType::Range(range) => range.field.as_str(),
BucketAggregationType::Histogram(histogram) => histogram.field.as_str(),
}
}
}
@@ -262,16 +242,15 @@ pub enum MetricAggregation {
}
impl MetricAggregation {
fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
let fast_field_name = match self {
fn get_fast_field_name(&self) -> &str {
match self {
MetricAggregation::Average(avg) => avg.field_name(),
MetricAggregation::Count(count) => count.field_name(),
MetricAggregation::Max(max) => max.field_name(),
MetricAggregation::Min(min) => min.field_name(),
MetricAggregation::Stats(stats) => stats.field_name(),
MetricAggregation::Sum(sum) => sum.field_name(),
};
fast_field_names.insert(fast_field_name.to_string());
}
}
}

View File

@@ -1,8 +1,7 @@
use serde_json::Value;
use crate::aggregation::agg_req::{
get_term_dict_field_names, Aggregation, Aggregations, BucketAggregation, BucketAggregationType,
MetricAggregation,
Aggregation, Aggregations, BucketAggregation, BucketAggregationType, MetricAggregation,
};
use crate::aggregation::agg_result::AggregationResults;
use crate::aggregation::bucket::{RangeAggregation, TermsAggregation};
@@ -432,9 +431,6 @@ fn test_aggregation_level2(
agg_req
};
let field_names = get_term_dict_field_names(&agg_req);
assert_eq!(field_names, vec!["text".to_string()].into_iter().collect());
let agg_res: AggregationResults = if use_distributed_collector {
let collector = DistributedAggregationCollector::from_aggs(agg_req.clone(), None);

View File

@@ -505,8 +505,7 @@ pub(crate) fn cut_off_buckets<T: GetDocCount + Debug>(
mod tests {
use super::*;
use crate::aggregation::agg_req::{
get_term_dict_field_names, Aggregation, Aggregations, BucketAggregation,
BucketAggregationType, MetricAggregation,
Aggregation, Aggregations, BucketAggregation, BucketAggregationType, MetricAggregation,
};
use crate::aggregation::metric::{AverageAggregation, StatsAggregation};
use crate::aggregation::tests::{
@@ -607,12 +606,6 @@ mod tests {
serde_json::Value::Null
);
assert_eq!(res["my_texts"]["sum_other_doc_count"], 0); // TODO sum_other_doc_count with min_doc_count
assert_eq!(
get_term_dict_field_names(&agg_req),
vec!["string_id".to_string(),].into_iter().collect()
);
Ok(())
}

View File

@@ -19,9 +19,8 @@
//!
//! Read access performance is comparable to that of an array lookup.
use std::net::Ipv6Addr;
pub use columnar::Column;
use columnar::MonotonicallyMappableToU64;
pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveBitSet};
pub use self::error::{FastFieldNotAvailableError, Result};
@@ -37,38 +36,9 @@ mod facet_reader;
mod readers;
mod writer;
/// Trait for types that provide a zero value.
///
/// The resulting value is never used, just as placeholder, e.g. for `vec.resize()`.
pub trait MakeZero {
/// Build a default value. This default value is never used, so the value does not
/// really matter.
fn make_zero() -> Self;
}
impl<T: FastValue> MakeZero for T {
fn make_zero() -> Self {
T::from_u64(0)
}
}
impl MakeZero for u128 {
fn make_zero() -> Self {
0
}
}
impl MakeZero for Ipv6Addr {
fn make_zero() -> Self {
Ipv6Addr::from(0u128.to_be_bytes())
}
}
/// Trait for types that are allowed for fast fields:
/// (u64, i64 and f64, bool, DateTime).
pub trait FastValue:
Copy + Send + Sync + columnar::MonotonicallyMappableToU64 + PartialOrd + 'static
{
pub trait FastValue: MonotonicallyMappableToU64 {
/// Returns the `schema::Type` for this FastValue.
fn to_type() -> Type;
}
@@ -105,6 +75,7 @@ impl FastValue for DateTime {
#[cfg(test)]
mod tests {
use std::net::Ipv6Addr;
use std::ops::{Range, RangeInclusive};
use std::path::Path;

View File

@@ -2,7 +2,7 @@ use std::cmp::Ordering;
use std::collections::BinaryHeap;
use crate::postings::TermInfo;
use crate::termdict::{TermOrdinal, TermStreamer};
use crate::termdict::TermStreamer;
pub struct HeapItem<'a> {
pub streamer: TermStreamer<'a>,

View File

@@ -18,7 +18,9 @@ use std::sync::Arc;
use rustc_hash::FxHashSet;
use super::{BoxTokenStream, Language, Token, TokenFilter, TokenStream};
#[cfg(feature = "stopwords")]
use super::Language;
use super::{BoxTokenStream, Token, TokenFilter, TokenStream};
/// `TokenFilter` that removes stop words from a token stream
#[derive(Clone)]