mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-03 17:10:48 +00:00
Adding Debug/Display impl. Refining the ColumnIndex::get_cardinality
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
use std::io;
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, io};
|
||||
|
||||
use sstable::{Dictionary, VoidSSTable};
|
||||
|
||||
@@ -21,6 +21,14 @@ pub struct BytesColumn {
|
||||
pub(crate) term_ord_column: Column<u64>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for BytesColumn {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("BytesColumn")
|
||||
.field("term_ord_column", &self.term_ord_column)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesColumn {
|
||||
/// Fills the given `output` buffer with the term associated to the ordinal `ord`.
|
||||
///
|
||||
@@ -56,6 +64,12 @@ impl BytesColumn {
|
||||
#[derive(Clone)]
|
||||
pub struct StrColumn(BytesColumn);
|
||||
|
||||
impl fmt::Debug for StrColumn {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{:?}", self.term_ord_column)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StrColumn> for BytesColumn {
|
||||
fn from(str_column: StrColumn) -> BytesColumn {
|
||||
str_column.0
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
mod dictionary_encoded;
|
||||
mod serialize;
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::fmt::{self, Debug};
|
||||
use std::io::Write;
|
||||
use std::ops::{Deref, Range, RangeInclusive};
|
||||
use std::sync::Arc;
|
||||
@@ -24,6 +24,16 @@ pub struct Column<T = u64> {
|
||||
pub values: Arc<dyn ColumnValues<T>>,
|
||||
}
|
||||
|
||||
impl<T: Debug + PartialOrd + Send + Sync + Copy + 'static> Debug for Column<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let num_docs = self.num_docs();
|
||||
let entries = (0..num_docs)
|
||||
.map(|i| (i, self.values_for_doc(i).collect::<Vec<_>>()))
|
||||
.filter(|(_, vals)| !vals.is_empty());
|
||||
f.debug_map().entries(entries).finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: PartialOrd + Default> Column<T> {
|
||||
pub fn build_empty_column(num_docs: u32) -> Column<T> {
|
||||
Column {
|
||||
|
||||
@@ -12,7 +12,7 @@ pub use serialize::{open_column_index, serialize_column_index, SerializableColum
|
||||
use crate::column_index::multivalued_index::MultiValueIndex;
|
||||
use crate::{Cardinality, DocId, RowId};
|
||||
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ColumnIndex {
|
||||
Empty {
|
||||
num_docs: u32,
|
||||
@@ -37,11 +37,15 @@ impl From<MultiValueIndex> for ColumnIndex {
|
||||
}
|
||||
|
||||
impl ColumnIndex {
|
||||
// Returns the cardinality of the column index.
|
||||
//
|
||||
// By convention, if the column contains no docs, we consider that it is
|
||||
// full.
|
||||
#[inline]
|
||||
pub fn get_cardinality(&self) -> Cardinality {
|
||||
match self {
|
||||
ColumnIndex::Empty { num_docs: 0 } | ColumnIndex::Full => Cardinality::Full,
|
||||
ColumnIndex::Empty { .. } => Cardinality::Optional,
|
||||
ColumnIndex::Full => Cardinality::Full,
|
||||
ColumnIndex::Optional(_) => Cardinality::Optional,
|
||||
ColumnIndex::Multivalued(_) => Cardinality::Multivalued,
|
||||
}
|
||||
@@ -152,3 +156,21 @@ impl ColumnIndex {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{Cardinality, ColumnIndex};
|
||||
|
||||
#[test]
|
||||
fn test_column_index_get_cardinality() {
|
||||
assert_eq!(
|
||||
ColumnIndex::Empty { num_docs: 0 }.get_cardinality(),
|
||||
Cardinality::Full
|
||||
);
|
||||
assert_eq!(ColumnIndex::Full.get_cardinality(), Cardinality::Full);
|
||||
assert_eq!(
|
||||
ColumnIndex::Empty { num_docs: 1 }.get_cardinality(),
|
||||
Cardinality::Optional
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,14 @@ pub struct MultiValueIndex {
|
||||
pub start_index_column: Arc<dyn crate::ColumnValues<RowId>>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for MultiValueIndex {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
f.debug_struct("MultiValuedIndex")
|
||||
.field("num_rows", &self.start_index_column.num_vals())
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Arc<dyn ColumnValues<RowId>>> for MultiValueIndex {
|
||||
fn from(start_index_column: Arc<dyn ColumnValues<RowId>>) -> Self {
|
||||
MultiValueIndex { start_index_column }
|
||||
|
||||
@@ -88,6 +88,15 @@ pub struct OptionalIndex {
|
||||
block_metas: Arc<[BlockMeta]>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for OptionalIndex {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("OptionalIndex")
|
||||
.field("num_rows", &self.num_rows)
|
||||
.field("num_non_null_rows", &self.num_non_null_rows)
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
/// Splits a value address into lower and upper 16bits.
|
||||
/// The lower 16 bits are the value in the block
|
||||
/// The upper 16 bits are the block index
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::fmt;
|
||||
use std::fmt::Debug;
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
@@ -21,6 +22,22 @@ pub enum ColumnType {
|
||||
DateTime = 7u8,
|
||||
}
|
||||
|
||||
impl fmt::Display for ColumnType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let short_str = match self {
|
||||
ColumnType::I64 => "i64",
|
||||
ColumnType::U64 => "u64",
|
||||
ColumnType::F64 => "f64",
|
||||
ColumnType::Bytes => "bytes",
|
||||
ColumnType::Str => "str",
|
||||
ColumnType::Bool => "bool",
|
||||
ColumnType::IpAddr => "ip",
|
||||
ColumnType::DateTime => "datetime",
|
||||
};
|
||||
write!(f, "{}", short_str)
|
||||
}
|
||||
}
|
||||
|
||||
// The order needs to match _exactly_ the order in the enum
|
||||
const COLUMN_TYPES: [ColumnType; 8] = [
|
||||
ColumnType::I64,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::{io, mem};
|
||||
use std::{fmt, io, mem};
|
||||
|
||||
use common::file_slice::FileSlice;
|
||||
use common::BinarySerializable;
|
||||
@@ -21,6 +21,32 @@ pub struct ColumnarReader {
|
||||
num_rows: RowId,
|
||||
}
|
||||
|
||||
impl fmt::Debug for ColumnarReader {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let num_rows = self.num_rows();
|
||||
let columns = self.list_columns().unwrap();
|
||||
let num_cols = columns.len();
|
||||
let mut debug_struct = f.debug_struct("Columnar");
|
||||
debug_struct
|
||||
.field("num_rows", &num_rows)
|
||||
.field("num_cols", &num_cols);
|
||||
for (col_name, dynamic_column_handle) in columns.into_iter().take(5) {
|
||||
let col = dynamic_column_handle.open().unwrap();
|
||||
if col.num_values() > 10 {
|
||||
debug_struct.field(&col_name, &"..");
|
||||
} else {
|
||||
debug_struct.field(&col_name, &col);
|
||||
}
|
||||
}
|
||||
if num_cols > 5 {
|
||||
debug_struct.finish_non_exhaustive()?;
|
||||
} else {
|
||||
debug_struct.finish()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Functions by both the async/sync code listing columns.
|
||||
/// It takes a stream from the column sstable and return the list of
|
||||
/// `DynamicColumn` available in it.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::io;
|
||||
use std::net::Ipv6Addr;
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, io};
|
||||
|
||||
use common::file_slice::FileSlice;
|
||||
use common::{ByteCount, DateTime, HasLen, OwnedBytes};
|
||||
@@ -8,7 +8,7 @@ use common::{ByteCount, DateTime, HasLen, OwnedBytes};
|
||||
use crate::column::{BytesColumn, Column, StrColumn};
|
||||
use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn};
|
||||
use crate::columnar::ColumnType;
|
||||
use crate::{Cardinality, NumericalType};
|
||||
use crate::{Cardinality, ColumnIndex, NumericalType};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum DynamicColumn {
|
||||
@@ -22,19 +22,54 @@ pub enum DynamicColumn {
|
||||
Str(StrColumn),
|
||||
}
|
||||
|
||||
impl DynamicColumn {
|
||||
pub fn get_cardinality(&self) -> Cardinality {
|
||||
impl fmt::Debug for DynamicColumn {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "[{} {} |", self.get_cardinality(), self.column_type())?;
|
||||
match self {
|
||||
DynamicColumn::Bool(c) => c.get_cardinality(),
|
||||
DynamicColumn::I64(c) => c.get_cardinality(),
|
||||
DynamicColumn::U64(c) => c.get_cardinality(),
|
||||
DynamicColumn::F64(c) => c.get_cardinality(),
|
||||
DynamicColumn::IpAddr(c) => c.get_cardinality(),
|
||||
DynamicColumn::DateTime(c) => c.get_cardinality(),
|
||||
DynamicColumn::Bytes(c) => c.ords().get_cardinality(),
|
||||
DynamicColumn::Str(c) => c.ords().get_cardinality(),
|
||||
DynamicColumn::Bool(col) => write!(f, " {:?}", col)?,
|
||||
DynamicColumn::I64(col) => write!(f, " {:?}", col)?,
|
||||
DynamicColumn::U64(col) => write!(f, " {:?}", col)?,
|
||||
DynamicColumn::F64(col) => write!(f, "{:?}", col)?,
|
||||
DynamicColumn::IpAddr(col) => write!(f, "{:?}", col)?,
|
||||
DynamicColumn::DateTime(col) => write!(f, "{:?}", col)?,
|
||||
DynamicColumn::Bytes(col) => write!(f, "{:?}", col)?,
|
||||
DynamicColumn::Str(col) => write!(f, "{:?}", col)?,
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
}
|
||||
|
||||
impl DynamicColumn {
|
||||
pub fn column_index(&self) -> &ColumnIndex {
|
||||
match self {
|
||||
DynamicColumn::Bool(c) => &c.index,
|
||||
DynamicColumn::I64(c) => &c.index,
|
||||
DynamicColumn::U64(c) => &c.index,
|
||||
DynamicColumn::F64(c) => &c.index,
|
||||
DynamicColumn::IpAddr(c) => &c.index,
|
||||
DynamicColumn::DateTime(c) => &c.index,
|
||||
DynamicColumn::Bytes(c) => &c.ords().index,
|
||||
DynamicColumn::Str(c) => &c.ords().index,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_cardinality(&self) -> Cardinality {
|
||||
self.column_index().get_cardinality()
|
||||
}
|
||||
|
||||
pub fn num_values(&self) -> u32 {
|
||||
match self {
|
||||
DynamicColumn::Bool(c) => c.values.num_vals(),
|
||||
DynamicColumn::I64(c) => c.values.num_vals(),
|
||||
DynamicColumn::U64(c) => c.values.num_vals(),
|
||||
DynamicColumn::F64(c) => c.values.num_vals(),
|
||||
DynamicColumn::IpAddr(c) => c.values.num_vals(),
|
||||
DynamicColumn::DateTime(c) => c.values.num_vals(),
|
||||
DynamicColumn::Bytes(c) => c.ords().values.num_vals(),
|
||||
DynamicColumn::Str(c) => c.ords().values.num_vals(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn column_type(&self) -> ColumnType {
|
||||
match self {
|
||||
DynamicColumn::Bool(_) => ColumnType::Bool,
|
||||
|
||||
@@ -7,6 +7,7 @@ extern crate more_asserts;
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
extern crate test;
|
||||
|
||||
use std::fmt::Display;
|
||||
use std::io;
|
||||
|
||||
mod block_accessor;
|
||||
@@ -75,6 +76,17 @@ pub enum Cardinality {
|
||||
Multivalued = 2,
|
||||
}
|
||||
|
||||
impl Display for Cardinality {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let short_str = match self {
|
||||
Cardinality::Full => "full",
|
||||
Cardinality::Optional => "opt",
|
||||
Cardinality::Multivalued => "mult",
|
||||
};
|
||||
write!(f, "{short_str}")
|
||||
}
|
||||
}
|
||||
|
||||
impl Cardinality {
|
||||
pub fn is_optional(&self) -> bool {
|
||||
matches!(self, Cardinality::Optional)
|
||||
@@ -85,7 +97,6 @@ impl Cardinality {
|
||||
pub(crate) fn to_code(self) -> u8 {
|
||||
self as u8
|
||||
}
|
||||
|
||||
pub(crate) fn try_from_code(code: u8) -> Result<Cardinality, InvalidData> {
|
||||
match code {
|
||||
0 => Ok(Cardinality::Full),
|
||||
|
||||
Reference in New Issue
Block a user