diff --git a/fastfield_codecs/src/null_index_footer.rs b/fastfield_codecs/src/null_index_footer.rs index 1ce3cfcda..272ddbc3f 100644 --- a/fastfield_codecs/src/null_index_footer.rs +++ b/fastfield_codecs/src/null_index_footer.rs @@ -7,6 +7,7 @@ use ownedbytes::OwnedBytes; #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub(crate) enum FastFieldCardinality { Single = 1, + Multi = 2, } impl BinarySerializable for FastFieldCardinality { @@ -30,6 +31,7 @@ impl FastFieldCardinality { pub(crate) fn from_code(code: u8) -> Option { match code { 1 => Some(Self::Single), + 2 => Some(Self::Multi), _ => None, } } diff --git a/fastfield_codecs/src/serialize.rs b/fastfield_codecs/src/serialize.rs index 33f18bada..47fa969ea 100644 --- a/fastfield_codecs/src/serialize.rs +++ b/fastfield_codecs/src/serialize.rs @@ -193,6 +193,68 @@ pub fn serialize_u128 I, I: Iterator>( iter_gen: F, num_vals: u32, output: &mut impl io::Write, +) -> io::Result<()> { + serialize_u128_new(ValueIndexInfo::default(), iter_gen, num_vals, output) +} + +#[allow(dead_code)] +pub enum ValueIndexInfo { + MultiValue(Box), + SingleValue(Box), +} + +impl Default for ValueIndexInfo { + fn default() -> Self { + struct Dummy {} + impl SingleValueIndexInfo for Dummy { + fn num_vals(&self) -> u32 { + todo!() + } + fn num_nulls(&self) -> u32 { + todo!() + } + fn iter(&self) -> Box> { + todo!() + } + } + + Self::SingleValue(Box::new(Dummy {})) + } +} + +impl ValueIndexInfo { + fn get_cardinality(&self) -> FastFieldCardinality { + match self { + ValueIndexInfo::MultiValue(_) => FastFieldCardinality::Multi, + ValueIndexInfo::SingleValue(_) => FastFieldCardinality::Single, + } + } +} + +pub trait MultiValueIndexInfo { + /// The number of docs in the column. + fn num_docs(&self) -> u32; + /// The number of values in the column. + fn num_vals(&self) -> u32; + /// Return the start index of the values for each doc + fn iter(&self) -> Box>; +} + +pub trait SingleValueIndexInfo { + /// The number of values including nulls in the column. + fn num_vals(&self) -> u32; + /// The number of nulls in the column. + fn num_nulls(&self) -> u32; + /// Return a iterator of the positions of docs with a value + fn iter(&self) -> Box>; +} + +/// Serializes u128 values with the compact space codec. +pub fn serialize_u128_new I, I: Iterator>( + value_index: ValueIndexInfo, + iter_gen: F, + num_vals: u32, + output: &mut impl io::Write, ) -> io::Result<()> { let header = U128Header { num_vals, @@ -203,7 +265,7 @@ pub fn serialize_u128 I, I: Iterator>( compressor.compress_into(iter_gen(), output).unwrap(); let null_index_footer = NullIndexFooter { - cardinality: FastFieldCardinality::Single, + cardinality: value_index.get_cardinality(), null_index_codec: NullIndexCodec::Full, null_index_byte_range: 0..0, }; @@ -218,6 +280,16 @@ pub fn serialize( typed_column: impl Column, output: &mut impl io::Write, codecs: &[FastFieldCodecType], +) -> io::Result<()> { + serialize_new(ValueIndexInfo::default(), typed_column, output, codecs) +} + +/// Serializes the column with the codec with the best estimate on the data. +pub fn serialize_new( + value_index: ValueIndexInfo, + typed_column: impl Column, + output: &mut impl io::Write, + codecs: &[FastFieldCodecType], ) -> io::Result<()> { let column = monotonic_map_column(typed_column, StrictlyMonotonicMappingToInternal::::new()); let header = Header::compute_header(&column, codecs).ok_or_else(|| { @@ -235,7 +307,7 @@ pub fn serialize( serialize_given_codec(normalized_column, header.codec_type, output)?; let null_index_footer = NullIndexFooter { - cardinality: FastFieldCardinality::Single, + cardinality: value_index.get_cardinality(), null_index_codec: NullIndexCodec::Full, null_index_byte_range: 0..0, };