Adding Debug/Display impl. Refining the ColumnIndex::get_cardinality

This commit is contained in:
Paul Masurel
2023-03-26 14:40:37 +09:00
parent a2e3c2ed5b
commit 821208480b
9 changed files with 170 additions and 18 deletions

View File

@@ -1,6 +1,6 @@
use std::io;
use std::ops::Deref;
use std::sync::Arc;
use std::{fmt, io};
use sstable::{Dictionary, VoidSSTable};
@@ -21,6 +21,14 @@ pub struct BytesColumn {
pub(crate) term_ord_column: Column<u64>,
}
impl fmt::Debug for BytesColumn {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("BytesColumn")
.field("term_ord_column", &self.term_ord_column)
.finish()
}
}
impl BytesColumn {
/// Fills the given `output` buffer with the term associated to the ordinal `ord`.
///
@@ -56,6 +64,12 @@ impl BytesColumn {
#[derive(Clone)]
pub struct StrColumn(BytesColumn);
impl fmt::Debug for StrColumn {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self.term_ord_column)
}
}
impl From<StrColumn> for BytesColumn {
fn from(str_column: StrColumn) -> BytesColumn {
str_column.0

View File

@@ -1,7 +1,7 @@
mod dictionary_encoded;
mod serialize;
use std::fmt::Debug;
use std::fmt::{self, Debug};
use std::io::Write;
use std::ops::{Deref, Range, RangeInclusive};
use std::sync::Arc;
@@ -24,6 +24,16 @@ pub struct Column<T = u64> {
pub values: Arc<dyn ColumnValues<T>>,
}
impl<T: Debug + PartialOrd + Send + Sync + Copy + 'static> Debug for Column<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let num_docs = self.num_docs();
let entries = (0..num_docs)
.map(|i| (i, self.values_for_doc(i).collect::<Vec<_>>()))
.filter(|(_, vals)| !vals.is_empty());
f.debug_map().entries(entries).finish()
}
}
impl<T: PartialOrd + Default> Column<T> {
pub fn build_empty_column(num_docs: u32) -> Column<T> {
Column {

View File

@@ -12,7 +12,7 @@ pub use serialize::{open_column_index, serialize_column_index, SerializableColum
use crate::column_index::multivalued_index::MultiValueIndex;
use crate::{Cardinality, DocId, RowId};
#[derive(Clone)]
#[derive(Clone, Debug)]
pub enum ColumnIndex {
Empty {
num_docs: u32,
@@ -37,11 +37,15 @@ impl From<MultiValueIndex> for ColumnIndex {
}
impl ColumnIndex {
// Returns the cardinality of the column index.
//
// By convention, if the column contains no docs, we consider that it is
// full.
#[inline]
pub fn get_cardinality(&self) -> Cardinality {
match self {
ColumnIndex::Empty { num_docs: 0 } | ColumnIndex::Full => Cardinality::Full,
ColumnIndex::Empty { .. } => Cardinality::Optional,
ColumnIndex::Full => Cardinality::Full,
ColumnIndex::Optional(_) => Cardinality::Optional,
ColumnIndex::Multivalued(_) => Cardinality::Multivalued,
}
@@ -152,3 +156,21 @@ impl ColumnIndex {
}
}
}
#[cfg(test)]
mod tests {
use crate::{Cardinality, ColumnIndex};
#[test]
fn test_column_index_get_cardinality() {
assert_eq!(
ColumnIndex::Empty { num_docs: 0 }.get_cardinality(),
Cardinality::Full
);
assert_eq!(ColumnIndex::Full.get_cardinality(), Cardinality::Full);
assert_eq!(
ColumnIndex::Empty { num_docs: 1 }.get_cardinality(),
Cardinality::Optional
);
}
}

View File

@@ -35,6 +35,14 @@ pub struct MultiValueIndex {
pub start_index_column: Arc<dyn crate::ColumnValues<RowId>>,
}
impl std::fmt::Debug for MultiValueIndex {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.debug_struct("MultiValuedIndex")
.field("num_rows", &self.start_index_column.num_vals())
.finish_non_exhaustive()
}
}
impl From<Arc<dyn ColumnValues<RowId>>> for MultiValueIndex {
fn from(start_index_column: Arc<dyn ColumnValues<RowId>>) -> Self {
MultiValueIndex { start_index_column }

View File

@@ -88,6 +88,15 @@ pub struct OptionalIndex {
block_metas: Arc<[BlockMeta]>,
}
impl std::fmt::Debug for OptionalIndex {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("OptionalIndex")
.field("num_rows", &self.num_rows)
.field("num_non_null_rows", &self.num_non_null_rows)
.finish_non_exhaustive()
}
}
/// Splits a value address into lower and upper 16bits.
/// The lower 16 bits are the value in the block
/// The upper 16 bits are the block index

View File

@@ -1,3 +1,4 @@
use std::fmt;
use std::fmt::Debug;
use std::net::Ipv6Addr;
@@ -21,6 +22,22 @@ pub enum ColumnType {
DateTime = 7u8,
}
impl fmt::Display for ColumnType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let short_str = match self {
ColumnType::I64 => "i64",
ColumnType::U64 => "u64",
ColumnType::F64 => "f64",
ColumnType::Bytes => "bytes",
ColumnType::Str => "str",
ColumnType::Bool => "bool",
ColumnType::IpAddr => "ip",
ColumnType::DateTime => "datetime",
};
write!(f, "{}", short_str)
}
}
// The order needs to match _exactly_ the order in the enum
const COLUMN_TYPES: [ColumnType; 8] = [
ColumnType::I64,

View File

@@ -1,4 +1,4 @@
use std::{io, mem};
use std::{fmt, io, mem};
use common::file_slice::FileSlice;
use common::BinarySerializable;
@@ -21,6 +21,32 @@ pub struct ColumnarReader {
num_rows: RowId,
}
impl fmt::Debug for ColumnarReader {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let num_rows = self.num_rows();
let columns = self.list_columns().unwrap();
let num_cols = columns.len();
let mut debug_struct = f.debug_struct("Columnar");
debug_struct
.field("num_rows", &num_rows)
.field("num_cols", &num_cols);
for (col_name, dynamic_column_handle) in columns.into_iter().take(5) {
let col = dynamic_column_handle.open().unwrap();
if col.num_values() > 10 {
debug_struct.field(&col_name, &"..");
} else {
debug_struct.field(&col_name, &col);
}
}
if num_cols > 5 {
debug_struct.finish_non_exhaustive()?;
} else {
debug_struct.finish()?;
}
Ok(())
}
}
/// Functions by both the async/sync code listing columns.
/// It takes a stream from the column sstable and return the list of
/// `DynamicColumn` available in it.

View File

@@ -1,6 +1,6 @@
use std::io;
use std::net::Ipv6Addr;
use std::sync::Arc;
use std::{fmt, io};
use common::file_slice::FileSlice;
use common::{ByteCount, DateTime, HasLen, OwnedBytes};
@@ -8,7 +8,7 @@ use common::{ByteCount, DateTime, HasLen, OwnedBytes};
use crate::column::{BytesColumn, Column, StrColumn};
use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn};
use crate::columnar::ColumnType;
use crate::{Cardinality, NumericalType};
use crate::{Cardinality, ColumnIndex, NumericalType};
#[derive(Clone)]
pub enum DynamicColumn {
@@ -22,19 +22,54 @@ pub enum DynamicColumn {
Str(StrColumn),
}
impl DynamicColumn {
pub fn get_cardinality(&self) -> Cardinality {
impl fmt::Debug for DynamicColumn {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "[{} {} |", self.get_cardinality(), self.column_type())?;
match self {
DynamicColumn::Bool(c) => c.get_cardinality(),
DynamicColumn::I64(c) => c.get_cardinality(),
DynamicColumn::U64(c) => c.get_cardinality(),
DynamicColumn::F64(c) => c.get_cardinality(),
DynamicColumn::IpAddr(c) => c.get_cardinality(),
DynamicColumn::DateTime(c) => c.get_cardinality(),
DynamicColumn::Bytes(c) => c.ords().get_cardinality(),
DynamicColumn::Str(c) => c.ords().get_cardinality(),
DynamicColumn::Bool(col) => write!(f, " {:?}", col)?,
DynamicColumn::I64(col) => write!(f, " {:?}", col)?,
DynamicColumn::U64(col) => write!(f, " {:?}", col)?,
DynamicColumn::F64(col) => write!(f, "{:?}", col)?,
DynamicColumn::IpAddr(col) => write!(f, "{:?}", col)?,
DynamicColumn::DateTime(col) => write!(f, "{:?}", col)?,
DynamicColumn::Bytes(col) => write!(f, "{:?}", col)?,
DynamicColumn::Str(col) => write!(f, "{:?}", col)?,
}
write!(f, "]")
}
}
impl DynamicColumn {
pub fn column_index(&self) -> &ColumnIndex {
match self {
DynamicColumn::Bool(c) => &c.index,
DynamicColumn::I64(c) => &c.index,
DynamicColumn::U64(c) => &c.index,
DynamicColumn::F64(c) => &c.index,
DynamicColumn::IpAddr(c) => &c.index,
DynamicColumn::DateTime(c) => &c.index,
DynamicColumn::Bytes(c) => &c.ords().index,
DynamicColumn::Str(c) => &c.ords().index,
}
}
pub fn get_cardinality(&self) -> Cardinality {
self.column_index().get_cardinality()
}
pub fn num_values(&self) -> u32 {
match self {
DynamicColumn::Bool(c) => c.values.num_vals(),
DynamicColumn::I64(c) => c.values.num_vals(),
DynamicColumn::U64(c) => c.values.num_vals(),
DynamicColumn::F64(c) => c.values.num_vals(),
DynamicColumn::IpAddr(c) => c.values.num_vals(),
DynamicColumn::DateTime(c) => c.values.num_vals(),
DynamicColumn::Bytes(c) => c.ords().values.num_vals(),
DynamicColumn::Str(c) => c.ords().values.num_vals(),
}
}
pub fn column_type(&self) -> ColumnType {
match self {
DynamicColumn::Bool(_) => ColumnType::Bool,

View File

@@ -7,6 +7,7 @@ extern crate more_asserts;
#[cfg(all(test, feature = "unstable"))]
extern crate test;
use std::fmt::Display;
use std::io;
mod block_accessor;
@@ -75,6 +76,17 @@ pub enum Cardinality {
Multivalued = 2,
}
impl Display for Cardinality {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let short_str = match self {
Cardinality::Full => "full",
Cardinality::Optional => "opt",
Cardinality::Multivalued => "mult",
};
write!(f, "{short_str}")
}
}
impl Cardinality {
pub fn is_optional(&self) -> bool {
matches!(self, Cardinality::Optional)
@@ -85,7 +97,6 @@ impl Cardinality {
pub(crate) fn to_code(self) -> u8 {
self as u8
}
pub(crate) fn try_from_code(code: u8) -> Result<Cardinality, InvalidData> {
match code {
0 => Ok(Cardinality::Full),