mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-05 10:00:41 +00:00
Exposing empty column builder. (#1959)
This commit is contained in:
@@ -16,7 +16,7 @@ pub use serialize::{
|
||||
use crate::column_index::ColumnIndex;
|
||||
use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
|
||||
use crate::column_values::{monotonic_map_column, ColumnValues};
|
||||
use crate::{Cardinality, DocId, MonotonicallyMappableToU64, RowId};
|
||||
use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Column<T = u64> {
|
||||
@@ -24,6 +24,15 @@ pub struct Column<T = u64> {
|
||||
pub values: Arc<dyn ColumnValues<T>>,
|
||||
}
|
||||
|
||||
impl<T: PartialOrd + Default> Column<T> {
|
||||
pub fn build_empty_column(num_docs: u32) -> Column<T> {
|
||||
Column {
|
||||
idx: ColumnIndex::Empty { num_docs },
|
||||
values: Arc::new(EmptyColumnValues),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: MonotonicallyMappableToU64> Column<T> {
|
||||
pub fn to_u64_monotonic(self) -> Column<u64> {
|
||||
let values = Arc::new(monotonic_map_column(
|
||||
|
||||
@@ -110,20 +110,26 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the minimum value for this fast field.
|
||||
/// Returns a lower bound for this column of values.
|
||||
///
|
||||
/// This min_value may not be exact.
|
||||
/// For instance, the min value does not take in account of possible
|
||||
/// deleted document. All values are however guaranteed to be higher than
|
||||
/// `.min_value()`.
|
||||
/// All values are guaranteed to be higher than `.min_value()`
|
||||
/// but this value is not necessary the best boundary value.
|
||||
///
|
||||
/// We have
|
||||
/// ∀i < self.num_vals(), self.get_val(i) >= self.min_value()
|
||||
/// But we don't have necessarily
|
||||
/// ∃i < self.num_vals(), self.get_val(i) == self.min_value()
|
||||
fn min_value(&self) -> T;
|
||||
|
||||
/// Returns the maximum value for this fast field.
|
||||
/// Returns an upper bound for this column of values.
|
||||
///
|
||||
/// This max_value may not be exact.
|
||||
/// For instance, the max value does not take in account of possible
|
||||
/// deleted document. All values are however guaranteed to be higher than
|
||||
/// `.max_value()`.
|
||||
/// All values are guaranteed to be lower than `.max_value()`
|
||||
/// but this value is not necessary the best boundary value.
|
||||
///
|
||||
/// We have
|
||||
/// ∀i < self.num_vals(), self.get_val(i) <= self.max_value()
|
||||
/// But we don't have necessarily
|
||||
/// ∃i < self.num_vals(), self.get_val(i) == self.max_value()
|
||||
fn max_value(&self) -> T;
|
||||
|
||||
/// The number of values in the column.
|
||||
@@ -135,6 +141,27 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
|
||||
}
|
||||
}
|
||||
|
||||
/// Empty column of values.
|
||||
pub struct EmptyColumnValues;
|
||||
|
||||
impl<T: PartialOrd + Default> ColumnValues<T> for EmptyColumnValues {
|
||||
fn get_val(&self, _idx: u32) -> T {
|
||||
panic!("Internal Error: Called get_val of empty column.")
|
||||
}
|
||||
|
||||
fn min_value(&self) -> T {
|
||||
T::default()
|
||||
}
|
||||
|
||||
fn max_value(&self) -> T {
|
||||
T::default()
|
||||
}
|
||||
|
||||
fn num_vals(&self) -> u32 {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Copy + PartialOrd + Debug> ColumnValues<T> for Arc<dyn ColumnValues<T>> {
|
||||
#[inline(always)]
|
||||
fn get_val(&self, idx: u32) -> T {
|
||||
|
||||
@@ -23,7 +23,9 @@ mod value;
|
||||
pub use block_accessor::ColumnBlockAccessor;
|
||||
pub use column::{BytesColumn, Column, StrColumn};
|
||||
pub use column_index::ColumnIndex;
|
||||
pub use column_values::{ColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64};
|
||||
pub use column_values::{
|
||||
ColumnValues, EmptyColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
|
||||
};
|
||||
pub use columnar::{
|
||||
merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
|
||||
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder,
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
//! This will enhance the request tree with access to the fastfield and metadata.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use columnar::{Column, ColumnBlockAccessor, ColumnType, ColumnValues, StrColumn};
|
||||
use columnar::{Column, ColumnBlockAccessor, ColumnType, StrColumn};
|
||||
|
||||
use super::agg_req::{Aggregation, Aggregations, BucketAggregationType, MetricAggregation};
|
||||
use super::bucket::{
|
||||
@@ -163,31 +161,11 @@ fn get_ff_reader_and_validate(
|
||||
let ff_fields = reader.fast_fields();
|
||||
let ff_field_with_type = ff_fields
|
||||
.u64_lenient_with_type(field_name)?
|
||||
.unwrap_or_else(|| (build_empty_column(reader.num_docs()), ColumnType::U64));
|
||||
|
||||
.unwrap_or_else(|| {
|
||||
(
|
||||
Column::build_empty_column(reader.num_docs()),
|
||||
ColumnType::U64,
|
||||
)
|
||||
});
|
||||
Ok(ff_field_with_type)
|
||||
}
|
||||
|
||||
// Empty Column
|
||||
fn build_empty_column(num_docs: u32) -> Column {
|
||||
struct EmptyValues;
|
||||
impl ColumnValues for EmptyValues {
|
||||
fn get_val(&self, _idx: u32) -> u64 {
|
||||
unimplemented!("Internal Error: Called get_val of empty column.")
|
||||
}
|
||||
fn min_value(&self) -> u64 {
|
||||
unimplemented!("Internal Error: Called min_value of empty column.")
|
||||
}
|
||||
fn max_value(&self) -> u64 {
|
||||
unimplemented!("Internal Error: Called max_value of empty column.")
|
||||
}
|
||||
fn num_vals(&self) -> u32 {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
Column {
|
||||
idx: columnar::ColumnIndex::Empty { num_docs },
|
||||
values: Arc::new(EmptyValues),
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user