mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 09:32:54 +00:00
pass index info to serialize (#1719)
This commit is contained in:
@@ -7,6 +7,7 @@ use ownedbytes::OwnedBytes;
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
|
||||
pub(crate) enum FastFieldCardinality {
|
||||
Single = 1,
|
||||
Multi = 2,
|
||||
}
|
||||
|
||||
impl BinarySerializable for FastFieldCardinality {
|
||||
@@ -30,6 +31,7 @@ impl FastFieldCardinality {
|
||||
pub(crate) fn from_code(code: u8) -> Option<Self> {
|
||||
match code {
|
||||
1 => Some(Self::Single),
|
||||
2 => Some(Self::Multi),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,6 +193,68 @@ pub fn serialize_u128<F: Fn() -> I, I: Iterator<Item = u128>>(
|
||||
iter_gen: F,
|
||||
num_vals: u32,
|
||||
output: &mut impl io::Write,
|
||||
) -> io::Result<()> {
|
||||
serialize_u128_new(ValueIndexInfo::default(), iter_gen, num_vals, output)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub enum ValueIndexInfo {
|
||||
MultiValue(Box<dyn MultiValueIndexInfo>),
|
||||
SingleValue(Box<dyn SingleValueIndexInfo>),
|
||||
}
|
||||
|
||||
impl Default for ValueIndexInfo {
|
||||
fn default() -> Self {
|
||||
struct Dummy {}
|
||||
impl SingleValueIndexInfo for Dummy {
|
||||
fn num_vals(&self) -> u32 {
|
||||
todo!()
|
||||
}
|
||||
fn num_nulls(&self) -> u32 {
|
||||
todo!()
|
||||
}
|
||||
fn iter(&self) -> Box<dyn Iterator<Item = u32>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
Self::SingleValue(Box::new(Dummy {}))
|
||||
}
|
||||
}
|
||||
|
||||
impl ValueIndexInfo {
|
||||
fn get_cardinality(&self) -> FastFieldCardinality {
|
||||
match self {
|
||||
ValueIndexInfo::MultiValue(_) => FastFieldCardinality::Multi,
|
||||
ValueIndexInfo::SingleValue(_) => FastFieldCardinality::Single,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait MultiValueIndexInfo {
|
||||
/// The number of docs in the column.
|
||||
fn num_docs(&self) -> u32;
|
||||
/// The number of values in the column.
|
||||
fn num_vals(&self) -> u32;
|
||||
/// Return the start index of the values for each doc
|
||||
fn iter(&self) -> Box<dyn Iterator<Item = u32>>;
|
||||
}
|
||||
|
||||
pub trait SingleValueIndexInfo {
|
||||
/// The number of values including nulls in the column.
|
||||
fn num_vals(&self) -> u32;
|
||||
/// The number of nulls in the column.
|
||||
fn num_nulls(&self) -> u32;
|
||||
/// Return a iterator of the positions of docs with a value
|
||||
fn iter(&self) -> Box<dyn Iterator<Item = u32>>;
|
||||
}
|
||||
|
||||
/// Serializes u128 values with the compact space codec.
|
||||
pub fn serialize_u128_new<F: Fn() -> I, I: Iterator<Item = u128>>(
|
||||
value_index: ValueIndexInfo,
|
||||
iter_gen: F,
|
||||
num_vals: u32,
|
||||
output: &mut impl io::Write,
|
||||
) -> io::Result<()> {
|
||||
let header = U128Header {
|
||||
num_vals,
|
||||
@@ -203,7 +265,7 @@ pub fn serialize_u128<F: Fn() -> I, I: Iterator<Item = u128>>(
|
||||
compressor.compress_into(iter_gen(), output).unwrap();
|
||||
|
||||
let null_index_footer = NullIndexFooter {
|
||||
cardinality: FastFieldCardinality::Single,
|
||||
cardinality: value_index.get_cardinality(),
|
||||
null_index_codec: NullIndexCodec::Full,
|
||||
null_index_byte_range: 0..0,
|
||||
};
|
||||
@@ -218,6 +280,16 @@ pub fn serialize<T: MonotonicallyMappableToU64>(
|
||||
typed_column: impl Column<T>,
|
||||
output: &mut impl io::Write,
|
||||
codecs: &[FastFieldCodecType],
|
||||
) -> io::Result<()> {
|
||||
serialize_new(ValueIndexInfo::default(), typed_column, output, codecs)
|
||||
}
|
||||
|
||||
/// Serializes the column with the codec with the best estimate on the data.
|
||||
pub fn serialize_new<T: MonotonicallyMappableToU64>(
|
||||
value_index: ValueIndexInfo,
|
||||
typed_column: impl Column<T>,
|
||||
output: &mut impl io::Write,
|
||||
codecs: &[FastFieldCodecType],
|
||||
) -> io::Result<()> {
|
||||
let column = monotonic_map_column(typed_column, StrictlyMonotonicMappingToInternal::<T>::new());
|
||||
let header = Header::compute_header(&column, codecs).ok_or_else(|| {
|
||||
@@ -235,7 +307,7 @@ pub fn serialize<T: MonotonicallyMappableToU64>(
|
||||
serialize_given_codec(normalized_column, header.codec_type, output)?;
|
||||
|
||||
let null_index_footer = NullIndexFooter {
|
||||
cardinality: FastFieldCardinality::Single,
|
||||
cardinality: value_index.get_cardinality(),
|
||||
null_index_codec: NullIndexCodec::Full,
|
||||
null_index_byte_range: 0..0,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user