From 821208480b54eea875ba54814b314cccc80dc1ac Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sun, 26 Mar 2023 14:40:37 +0900 Subject: [PATCH] Adding Debug/Display impl. Refining the ColumnIndex::get_cardinality --- columnar/src/column/dictionary_encoded.rs | 16 ++++- columnar/src/column/mod.rs | 12 +++- columnar/src/column_index/mod.rs | 26 +++++++- .../src/column_index/multivalued_index.rs | 8 +++ .../src/column_index/optional_index/mod.rs | 9 +++ columnar/src/columnar/column_type.rs | 17 ++++++ columnar/src/columnar/reader/mod.rs | 28 ++++++++- columnar/src/dynamic_column.rs | 59 +++++++++++++++---- columnar/src/lib.rs | 13 +++- 9 files changed, 170 insertions(+), 18 deletions(-) diff --git a/columnar/src/column/dictionary_encoded.rs b/columnar/src/column/dictionary_encoded.rs index 5cfbf7140..f87603ee7 100644 --- a/columnar/src/column/dictionary_encoded.rs +++ b/columnar/src/column/dictionary_encoded.rs @@ -1,6 +1,6 @@ -use std::io; use std::ops::Deref; use std::sync::Arc; +use std::{fmt, io}; use sstable::{Dictionary, VoidSSTable}; @@ -21,6 +21,14 @@ pub struct BytesColumn { pub(crate) term_ord_column: Column, } +impl fmt::Debug for BytesColumn { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("BytesColumn") + .field("term_ord_column", &self.term_ord_column) + .finish() + } +} + impl BytesColumn { /// Fills the given `output` buffer with the term associated to the ordinal `ord`. /// @@ -56,6 +64,12 @@ impl BytesColumn { #[derive(Clone)] pub struct StrColumn(BytesColumn); +impl fmt::Debug for StrColumn { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self.term_ord_column) + } +} + impl From for BytesColumn { fn from(str_column: StrColumn) -> BytesColumn { str_column.0 diff --git a/columnar/src/column/mod.rs b/columnar/src/column/mod.rs index ef119de92..187377586 100644 --- a/columnar/src/column/mod.rs +++ b/columnar/src/column/mod.rs @@ -1,7 +1,7 @@ mod dictionary_encoded; mod serialize; -use std::fmt::Debug; +use std::fmt::{self, Debug}; use std::io::Write; use std::ops::{Deref, Range, RangeInclusive}; use std::sync::Arc; @@ -24,6 +24,16 @@ pub struct Column { pub values: Arc>, } +impl Debug for Column { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let num_docs = self.num_docs(); + let entries = (0..num_docs) + .map(|i| (i, self.values_for_doc(i).collect::>())) + .filter(|(_, vals)| !vals.is_empty()); + f.debug_map().entries(entries).finish() + } +} + impl Column { pub fn build_empty_column(num_docs: u32) -> Column { Column { diff --git a/columnar/src/column_index/mod.rs b/columnar/src/column_index/mod.rs index aba51dd8d..1a0e9073c 100644 --- a/columnar/src/column_index/mod.rs +++ b/columnar/src/column_index/mod.rs @@ -12,7 +12,7 @@ pub use serialize::{open_column_index, serialize_column_index, SerializableColum use crate::column_index::multivalued_index::MultiValueIndex; use crate::{Cardinality, DocId, RowId}; -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum ColumnIndex { Empty { num_docs: u32, @@ -37,11 +37,15 @@ impl From for ColumnIndex { } impl ColumnIndex { + // Returns the cardinality of the column index. + // + // By convention, if the column contains no docs, we consider that it is + // full. #[inline] pub fn get_cardinality(&self) -> Cardinality { match self { + ColumnIndex::Empty { num_docs: 0 } | ColumnIndex::Full => Cardinality::Full, ColumnIndex::Empty { .. } => Cardinality::Optional, - ColumnIndex::Full => Cardinality::Full, ColumnIndex::Optional(_) => Cardinality::Optional, ColumnIndex::Multivalued(_) => Cardinality::Multivalued, } @@ -152,3 +156,21 @@ impl ColumnIndex { } } } + +#[cfg(test)] +mod tests { + use crate::{Cardinality, ColumnIndex}; + + #[test] + fn test_column_index_get_cardinality() { + assert_eq!( + ColumnIndex::Empty { num_docs: 0 }.get_cardinality(), + Cardinality::Full + ); + assert_eq!(ColumnIndex::Full.get_cardinality(), Cardinality::Full); + assert_eq!( + ColumnIndex::Empty { num_docs: 1 }.get_cardinality(), + Cardinality::Optional + ); + } +} diff --git a/columnar/src/column_index/multivalued_index.rs b/columnar/src/column_index/multivalued_index.rs index 0052ce2e3..eab82a3e3 100644 --- a/columnar/src/column_index/multivalued_index.rs +++ b/columnar/src/column_index/multivalued_index.rs @@ -35,6 +35,14 @@ pub struct MultiValueIndex { pub start_index_column: Arc>, } +impl std::fmt::Debug for MultiValueIndex { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("MultiValuedIndex") + .field("num_rows", &self.start_index_column.num_vals()) + .finish_non_exhaustive() + } +} + impl From>> for MultiValueIndex { fn from(start_index_column: Arc>) -> Self { MultiValueIndex { start_index_column } diff --git a/columnar/src/column_index/optional_index/mod.rs b/columnar/src/column_index/optional_index/mod.rs index 241c0c1af..e885ee5bc 100644 --- a/columnar/src/column_index/optional_index/mod.rs +++ b/columnar/src/column_index/optional_index/mod.rs @@ -88,6 +88,15 @@ pub struct OptionalIndex { block_metas: Arc<[BlockMeta]>, } +impl std::fmt::Debug for OptionalIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("OptionalIndex") + .field("num_rows", &self.num_rows) + .field("num_non_null_rows", &self.num_non_null_rows) + .finish_non_exhaustive() + } +} + /// Splits a value address into lower and upper 16bits. /// The lower 16 bits are the value in the block /// The upper 16 bits are the block index diff --git a/columnar/src/columnar/column_type.rs b/columnar/src/columnar/column_type.rs index 16da5e36b..36763089d 100644 --- a/columnar/src/columnar/column_type.rs +++ b/columnar/src/columnar/column_type.rs @@ -1,3 +1,4 @@ +use std::fmt; use std::fmt::Debug; use std::net::Ipv6Addr; @@ -21,6 +22,22 @@ pub enum ColumnType { DateTime = 7u8, } +impl fmt::Display for ColumnType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let short_str = match self { + ColumnType::I64 => "i64", + ColumnType::U64 => "u64", + ColumnType::F64 => "f64", + ColumnType::Bytes => "bytes", + ColumnType::Str => "str", + ColumnType::Bool => "bool", + ColumnType::IpAddr => "ip", + ColumnType::DateTime => "datetime", + }; + write!(f, "{}", short_str) + } +} + // The order needs to match _exactly_ the order in the enum const COLUMN_TYPES: [ColumnType; 8] = [ ColumnType::I64, diff --git a/columnar/src/columnar/reader/mod.rs b/columnar/src/columnar/reader/mod.rs index d11a4dfb7..fb154abfd 100644 --- a/columnar/src/columnar/reader/mod.rs +++ b/columnar/src/columnar/reader/mod.rs @@ -1,4 +1,4 @@ -use std::{io, mem}; +use std::{fmt, io, mem}; use common::file_slice::FileSlice; use common::BinarySerializable; @@ -21,6 +21,32 @@ pub struct ColumnarReader { num_rows: RowId, } +impl fmt::Debug for ColumnarReader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let num_rows = self.num_rows(); + let columns = self.list_columns().unwrap(); + let num_cols = columns.len(); + let mut debug_struct = f.debug_struct("Columnar"); + debug_struct + .field("num_rows", &num_rows) + .field("num_cols", &num_cols); + for (col_name, dynamic_column_handle) in columns.into_iter().take(5) { + let col = dynamic_column_handle.open().unwrap(); + if col.num_values() > 10 { + debug_struct.field(&col_name, &".."); + } else { + debug_struct.field(&col_name, &col); + } + } + if num_cols > 5 { + debug_struct.finish_non_exhaustive()?; + } else { + debug_struct.finish()?; + } + Ok(()) + } +} + /// Functions by both the async/sync code listing columns. /// It takes a stream from the column sstable and return the list of /// `DynamicColumn` available in it. diff --git a/columnar/src/dynamic_column.rs b/columnar/src/dynamic_column.rs index 08117e1e3..cfb31d0bf 100644 --- a/columnar/src/dynamic_column.rs +++ b/columnar/src/dynamic_column.rs @@ -1,6 +1,6 @@ -use std::io; use std::net::Ipv6Addr; use std::sync::Arc; +use std::{fmt, io}; use common::file_slice::FileSlice; use common::{ByteCount, DateTime, HasLen, OwnedBytes}; @@ -8,7 +8,7 @@ use common::{ByteCount, DateTime, HasLen, OwnedBytes}; use crate::column::{BytesColumn, Column, StrColumn}; use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn}; use crate::columnar::ColumnType; -use crate::{Cardinality, NumericalType}; +use crate::{Cardinality, ColumnIndex, NumericalType}; #[derive(Clone)] pub enum DynamicColumn { @@ -22,19 +22,54 @@ pub enum DynamicColumn { Str(StrColumn), } -impl DynamicColumn { - pub fn get_cardinality(&self) -> Cardinality { +impl fmt::Debug for DynamicColumn { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "[{} {} |", self.get_cardinality(), self.column_type())?; match self { - DynamicColumn::Bool(c) => c.get_cardinality(), - DynamicColumn::I64(c) => c.get_cardinality(), - DynamicColumn::U64(c) => c.get_cardinality(), - DynamicColumn::F64(c) => c.get_cardinality(), - DynamicColumn::IpAddr(c) => c.get_cardinality(), - DynamicColumn::DateTime(c) => c.get_cardinality(), - DynamicColumn::Bytes(c) => c.ords().get_cardinality(), - DynamicColumn::Str(c) => c.ords().get_cardinality(), + DynamicColumn::Bool(col) => write!(f, " {:?}", col)?, + DynamicColumn::I64(col) => write!(f, " {:?}", col)?, + DynamicColumn::U64(col) => write!(f, " {:?}", col)?, + DynamicColumn::F64(col) => write!(f, "{:?}", col)?, + DynamicColumn::IpAddr(col) => write!(f, "{:?}", col)?, + DynamicColumn::DateTime(col) => write!(f, "{:?}", col)?, + DynamicColumn::Bytes(col) => write!(f, "{:?}", col)?, + DynamicColumn::Str(col) => write!(f, "{:?}", col)?, + } + write!(f, "]") + } +} + +impl DynamicColumn { + pub fn column_index(&self) -> &ColumnIndex { + match self { + DynamicColumn::Bool(c) => &c.index, + DynamicColumn::I64(c) => &c.index, + DynamicColumn::U64(c) => &c.index, + DynamicColumn::F64(c) => &c.index, + DynamicColumn::IpAddr(c) => &c.index, + DynamicColumn::DateTime(c) => &c.index, + DynamicColumn::Bytes(c) => &c.ords().index, + DynamicColumn::Str(c) => &c.ords().index, } } + + pub fn get_cardinality(&self) -> Cardinality { + self.column_index().get_cardinality() + } + + pub fn num_values(&self) -> u32 { + match self { + DynamicColumn::Bool(c) => c.values.num_vals(), + DynamicColumn::I64(c) => c.values.num_vals(), + DynamicColumn::U64(c) => c.values.num_vals(), + DynamicColumn::F64(c) => c.values.num_vals(), + DynamicColumn::IpAddr(c) => c.values.num_vals(), + DynamicColumn::DateTime(c) => c.values.num_vals(), + DynamicColumn::Bytes(c) => c.ords().values.num_vals(), + DynamicColumn::Str(c) => c.ords().values.num_vals(), + } + } + pub fn column_type(&self) -> ColumnType { match self { DynamicColumn::Bool(_) => ColumnType::Bool, diff --git a/columnar/src/lib.rs b/columnar/src/lib.rs index 147ff67cb..a0c604f9a 100644 --- a/columnar/src/lib.rs +++ b/columnar/src/lib.rs @@ -7,6 +7,7 @@ extern crate more_asserts; #[cfg(all(test, feature = "unstable"))] extern crate test; +use std::fmt::Display; use std::io; mod block_accessor; @@ -75,6 +76,17 @@ pub enum Cardinality { Multivalued = 2, } +impl Display for Cardinality { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let short_str = match self { + Cardinality::Full => "full", + Cardinality::Optional => "opt", + Cardinality::Multivalued => "mult", + }; + write!(f, "{short_str}") + } +} + impl Cardinality { pub fn is_optional(&self) -> bool { matches!(self, Cardinality::Optional) @@ -85,7 +97,6 @@ impl Cardinality { pub(crate) fn to_code(self) -> u8 { self as u8 } - pub(crate) fn try_from_code(code: u8) -> Result { match code { 0 => Ok(Cardinality::Full),