mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-03 17:10:48 +00:00
Minor refactoring preparing for getting columnar integrated in quickwit. (#1911)
This commit is contained in:
@@ -124,15 +124,6 @@ impl BucketAggregationInternal {
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract all fields, where the term directory is used in the tree.
|
||||
pub fn get_term_dict_field_names(aggs: &Aggregations) -> HashSet<String> {
|
||||
let mut term_dict_field_names = Default::default();
|
||||
for el in aggs.values() {
|
||||
el.get_term_dict_field_names(&mut term_dict_field_names)
|
||||
}
|
||||
term_dict_field_names
|
||||
}
|
||||
|
||||
/// Extract all fast field names used in the tree.
|
||||
pub fn get_fast_field_names(aggs: &Aggregations) -> HashSet<String> {
|
||||
let mut fast_field_names = Default::default();
|
||||
@@ -155,16 +146,12 @@ pub enum Aggregation {
|
||||
}
|
||||
|
||||
impl Aggregation {
|
||||
fn get_term_dict_field_names(&self, term_field_names: &mut HashSet<String>) {
|
||||
if let Aggregation::Bucket(bucket) = self {
|
||||
bucket.get_term_dict_field_names(term_field_names)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
|
||||
match self {
|
||||
Aggregation::Bucket(bucket) => bucket.get_fast_field_names(fast_field_names),
|
||||
Aggregation::Metric(metric) => metric.get_fast_field_names(fast_field_names),
|
||||
Aggregation::Metric(metric) => {
|
||||
fast_field_names.insert(metric.get_fast_field_name().to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -193,14 +180,9 @@ pub struct BucketAggregation {
|
||||
}
|
||||
|
||||
impl BucketAggregation {
|
||||
fn get_term_dict_field_names(&self, term_dict_field_names: &mut HashSet<String>) {
|
||||
if let BucketAggregationType::Terms(terms) = &self.bucket_agg {
|
||||
term_dict_field_names.insert(terms.field.to_string());
|
||||
}
|
||||
term_dict_field_names.extend(get_term_dict_field_names(&self.sub_aggregation));
|
||||
}
|
||||
fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
|
||||
self.bucket_agg.get_fast_field_names(fast_field_names);
|
||||
let fast_field_name = self.bucket_agg.get_fast_field_name();
|
||||
fast_field_names.insert(fast_field_name.to_string());
|
||||
fast_field_names.extend(get_fast_field_names(&self.sub_aggregation));
|
||||
}
|
||||
}
|
||||
@@ -220,14 +202,12 @@ pub enum BucketAggregationType {
|
||||
}
|
||||
|
||||
impl BucketAggregationType {
|
||||
fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
|
||||
fn get_fast_field_name(&self) -> &str {
|
||||
match self {
|
||||
BucketAggregationType::Terms(terms) => fast_field_names.insert(terms.field.to_string()),
|
||||
BucketAggregationType::Range(range) => fast_field_names.insert(range.field.to_string()),
|
||||
BucketAggregationType::Histogram(histogram) => {
|
||||
fast_field_names.insert(histogram.field.to_string())
|
||||
}
|
||||
};
|
||||
BucketAggregationType::Terms(terms) => terms.field.as_str(),
|
||||
BucketAggregationType::Range(range) => range.field.as_str(),
|
||||
BucketAggregationType::Histogram(histogram) => histogram.field.as_str(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -262,16 +242,15 @@ pub enum MetricAggregation {
|
||||
}
|
||||
|
||||
impl MetricAggregation {
|
||||
fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
|
||||
let fast_field_name = match self {
|
||||
fn get_fast_field_name(&self) -> &str {
|
||||
match self {
|
||||
MetricAggregation::Average(avg) => avg.field_name(),
|
||||
MetricAggregation::Count(count) => count.field_name(),
|
||||
MetricAggregation::Max(max) => max.field_name(),
|
||||
MetricAggregation::Min(min) => min.field_name(),
|
||||
MetricAggregation::Stats(stats) => stats.field_name(),
|
||||
MetricAggregation::Sum(sum) => sum.field_name(),
|
||||
};
|
||||
fast_field_names.insert(fast_field_name.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::aggregation::agg_req::{
|
||||
get_term_dict_field_names, Aggregation, Aggregations, BucketAggregation, BucketAggregationType,
|
||||
MetricAggregation,
|
||||
Aggregation, Aggregations, BucketAggregation, BucketAggregationType, MetricAggregation,
|
||||
};
|
||||
use crate::aggregation::agg_result::AggregationResults;
|
||||
use crate::aggregation::bucket::{RangeAggregation, TermsAggregation};
|
||||
@@ -432,9 +431,6 @@ fn test_aggregation_level2(
|
||||
agg_req
|
||||
};
|
||||
|
||||
let field_names = get_term_dict_field_names(&agg_req);
|
||||
assert_eq!(field_names, vec!["text".to_string()].into_iter().collect());
|
||||
|
||||
let agg_res: AggregationResults = if use_distributed_collector {
|
||||
let collector = DistributedAggregationCollector::from_aggs(agg_req.clone(), None);
|
||||
|
||||
|
||||
@@ -505,8 +505,7 @@ pub(crate) fn cut_off_buckets<T: GetDocCount + Debug>(
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::aggregation::agg_req::{
|
||||
get_term_dict_field_names, Aggregation, Aggregations, BucketAggregation,
|
||||
BucketAggregationType, MetricAggregation,
|
||||
Aggregation, Aggregations, BucketAggregation, BucketAggregationType, MetricAggregation,
|
||||
};
|
||||
use crate::aggregation::metric::{AverageAggregation, StatsAggregation};
|
||||
use crate::aggregation::tests::{
|
||||
@@ -607,12 +606,6 @@ mod tests {
|
||||
serde_json::Value::Null
|
||||
);
|
||||
assert_eq!(res["my_texts"]["sum_other_doc_count"], 0); // TODO sum_other_doc_count with min_doc_count
|
||||
|
||||
assert_eq!(
|
||||
get_term_dict_field_names(&agg_req),
|
||||
vec!["string_id".to_string(),].into_iter().collect()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -19,9 +19,8 @@
|
||||
//!
|
||||
//! Read access performance is comparable to that of an array lookup.
|
||||
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
pub use columnar::Column;
|
||||
use columnar::MonotonicallyMappableToU64;
|
||||
|
||||
pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveBitSet};
|
||||
pub use self::error::{FastFieldNotAvailableError, Result};
|
||||
@@ -37,38 +36,9 @@ mod facet_reader;
|
||||
mod readers;
|
||||
mod writer;
|
||||
|
||||
/// Trait for types that provide a zero value.
|
||||
///
|
||||
/// The resulting value is never used, just as placeholder, e.g. for `vec.resize()`.
|
||||
pub trait MakeZero {
|
||||
/// Build a default value. This default value is never used, so the value does not
|
||||
/// really matter.
|
||||
fn make_zero() -> Self;
|
||||
}
|
||||
|
||||
impl<T: FastValue> MakeZero for T {
|
||||
fn make_zero() -> Self {
|
||||
T::from_u64(0)
|
||||
}
|
||||
}
|
||||
|
||||
impl MakeZero for u128 {
|
||||
fn make_zero() -> Self {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
impl MakeZero for Ipv6Addr {
|
||||
fn make_zero() -> Self {
|
||||
Ipv6Addr::from(0u128.to_be_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait for types that are allowed for fast fields:
|
||||
/// (u64, i64 and f64, bool, DateTime).
|
||||
pub trait FastValue:
|
||||
Copy + Send + Sync + columnar::MonotonicallyMappableToU64 + PartialOrd + 'static
|
||||
{
|
||||
pub trait FastValue: MonotonicallyMappableToU64 {
|
||||
/// Returns the `schema::Type` for this FastValue.
|
||||
fn to_type() -> Type;
|
||||
}
|
||||
@@ -105,6 +75,7 @@ impl FastValue for DateTime {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use std::net::Ipv6Addr;
|
||||
use std::ops::{Range, RangeInclusive};
|
||||
use std::path::Path;
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::cmp::Ordering;
|
||||
use std::collections::BinaryHeap;
|
||||
|
||||
use crate::postings::TermInfo;
|
||||
use crate::termdict::{TermOrdinal, TermStreamer};
|
||||
use crate::termdict::TermStreamer;
|
||||
|
||||
pub struct HeapItem<'a> {
|
||||
pub streamer: TermStreamer<'a>,
|
||||
|
||||
@@ -18,7 +18,9 @@ use std::sync::Arc;
|
||||
|
||||
use rustc_hash::FxHashSet;
|
||||
|
||||
use super::{BoxTokenStream, Language, Token, TokenFilter, TokenStream};
|
||||
#[cfg(feature = "stopwords")]
|
||||
use super::Language;
|
||||
use super::{BoxTokenStream, Token, TokenFilter, TokenStream};
|
||||
|
||||
/// `TokenFilter` that removes stop words from a token stream
|
||||
#[derive(Clone)]
|
||||
|
||||
Reference in New Issue
Block a user