From b63c6c27bcf0c097f0f2d549512a9837115d5e4c Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Thu, 9 Feb 2023 14:18:46 +0100 Subject: [PATCH] adding change from main --- columnar/src/columnar/column_type.rs | 39 +++++++------------ .../src/columnar/writer/column_writers.rs | 10 +++-- columnar/src/columnar/writer/mod.rs | 39 +++++++++++-------- 3 files changed, 41 insertions(+), 47 deletions(-) diff --git a/columnar/src/columnar/column_type.rs b/columnar/src/columnar/column_type.rs index cd0ae6dc1..001e57833 100644 --- a/columnar/src/columnar/column_type.rs +++ b/columnar/src/columnar/column_type.rs @@ -14,14 +14,14 @@ pub enum ColumnType { I64 = 0u8, U64 = 1u8, F64 = 2u8, - Bytes = 10u8, - Str = 14u8, - Bool = 18u8, - IpAddr = 22u8, - DateTime = 26u8, + Bytes = 3u8, + Str = 4u8, + Bool = 5u8, + IpAddr = 6u8, + DateTime = 7u8, } -#[cfg(test)] +// The order needs to match _exactly_ the order in the enum const COLUMN_TYPES: [ColumnType; 8] = [ ColumnType::I64, ColumnType::U64, @@ -39,18 +39,7 @@ impl ColumnType { } pub(crate) fn try_from_code(code: u8) -> Result { - use ColumnType::*; - match code { - 0u8 => Ok(I64), - 1u8 => Ok(U64), - 2u8 => Ok(F64), - 10u8 => Ok(Bytes), - 14u8 => Ok(Str), - 18u8 => Ok(Bool), - 22u8 => Ok(IpAddr), - 26u8 => Ok(Self::DateTime), - _ => Err(InvalidData), - } + COLUMN_TYPES.get(code as usize).copied().ok_or(InvalidData) } } @@ -178,21 +167,19 @@ impl From for ColumnTypeCategory { #[cfg(test)] mod tests { - use std::collections::HashSet; - use super::*; use crate::Cardinality; #[test] fn test_column_type_to_code() { - let mut column_type_set: HashSet = HashSet::new(); - for code in u8::MIN..=u8::MAX { - if let Ok(column_type) = ColumnType::try_from_code(code) { - assert_eq!(column_type.to_code(), code); - assert!(column_type_set.insert(column_type)); + for (code, expected_column_type) in super::COLUMN_TYPES.iter().copied().enumerate() { + if let Ok(column_type) = ColumnType::try_from_code(code as u8) { + assert_eq!(column_type, expected_column_type); } } - assert_eq!(column_type_set.len(), super::COLUMN_TYPES.len()); + for code in COLUMN_TYPES.len() as u8..=u8::MAX { + assert!(ColumnType::try_from_code(code as u8).is_err()); + } } #[test] diff --git a/columnar/src/columnar/writer/column_writers.rs b/columnar/src/columnar/writer/column_writers.rs index c07d3bde8..82c20db7d 100644 --- a/columnar/src/columnar/writer/column_writers.rs +++ b/columnar/src/columnar/writer/column_writers.rs @@ -210,10 +210,12 @@ impl CompatibleNumericalTypes { } impl NumericalColumnWriter { - pub fn column_type_and_cardinality(&self, num_docs: RowId) -> (NumericalType, Cardinality) { - let numerical_type = self.compatible_numerical_types.to_numerical_type(); - let cardinality = self.column_writer.get_cardinality(num_docs); - (numerical_type, cardinality) + pub fn numerical_type(&self) -> NumericalType { + self.compatible_numerical_types.to_numerical_type() + } + + pub fn cardinality(&self, num_docs: RowId) -> Cardinality { + self.column_writer.get_cardinality(num_docs) } pub fn record_numerical_value( diff --git a/columnar/src/columnar/writer/mod.rs b/columnar/src/columnar/writer/mod.rs index 0e24bb8e1..6057cf6e2 100644 --- a/columnar/src/columnar/writer/mod.rs +++ b/columnar/src/columnar/writer/mod.rs @@ -327,35 +327,40 @@ impl ColumnarWriter { wrt: &mut dyn io::Write, ) -> io::Result<()> { let mut serializer = ColumnarSerializer::new(wrt); - let mut columns: Vec<(&[u8], ColumnTypeCategory, Addr)> = self + let mut columns: Vec<(&[u8], ColumnType, Addr)> = self .numerical_field_hash_map .iter() - .map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::Numerical, addr)) + .map(|(column_name, addr, _)| { + let numerical_column_writer: NumericalColumnWriter = + self.numerical_field_hash_map.read(addr); + let column_type = numerical_column_writer.numerical_type().into(); + (column_name, column_type, addr) + }) .collect(); columns.extend( self.bytes_field_hash_map .iter() - .map(|(term, addr, _)| (term, ColumnTypeCategory::Bytes, addr)), + .map(|(term, addr, _)| (term, ColumnType::Bytes, addr)), ); columns.extend( self.str_field_hash_map .iter() - .map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::Str, addr)), + .map(|(column_name, addr, _)| (column_name, ColumnType::Str, addr)), ); columns.extend( self.bool_field_hash_map .iter() - .map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::Bool, addr)), + .map(|(column_name, addr, _)| (column_name, ColumnType::Bool, addr)), ); columns.extend( self.ip_addr_field_hash_map .iter() - .map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::IpAddr, addr)), + .map(|(column_name, addr, _)| (column_name, ColumnType::IpAddr, addr)), ); columns.extend( self.datetime_field_hash_map .iter() - .map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::DateTime, addr)), + .map(|(column_name, addr, _)| (column_name, ColumnType::DateTime, addr)), ); columns.sort_unstable_by_key(|(column_name, col_type, _)| (*column_name, *col_type)); @@ -363,11 +368,11 @@ impl ColumnarWriter { let mut symbol_byte_buffer: Vec = Vec::new(); for (column_name, column_type, addr) in columns { match column_type { - ColumnTypeCategory::Bool => { + ColumnType::Bool => { let column_writer: ColumnWriter = self.bool_field_hash_map.read(addr); let cardinality = column_writer.get_cardinality(num_docs); let mut column_serializer = - serializer.serialize_column(column_name, ColumnType::Bool); + serializer.serialize_column(column_name, column_type); serialize_bool_column( cardinality, num_docs, @@ -380,7 +385,7 @@ impl ColumnarWriter { &mut column_serializer, )?; } - ColumnTypeCategory::IpAddr => { + ColumnType::IpAddr => { let column_writer: ColumnWriter = self.ip_addr_field_hash_map.read(addr); let cardinality = column_writer.get_cardinality(num_docs); let mut column_serializer = @@ -397,11 +402,11 @@ impl ColumnarWriter { &mut column_serializer, )?; } - ColumnTypeCategory::Bytes | ColumnTypeCategory::Str => { + ColumnType::Bytes | ColumnType::Str => { let (column_type, str_or_bytes_column_writer): ( ColumnType, StrOrBytesColumnWriter, - ) = if column_type == ColumnTypeCategory::Bytes { + ) = if column_type == ColumnType::Bytes { (ColumnType::Bytes, self.bytes_field_hash_map.read(addr)) } else { (ColumnType::Str, self.str_field_hash_map.read(addr)) @@ -427,13 +432,13 @@ impl ColumnarWriter { &mut column_serializer, )?; } - ColumnTypeCategory::Numerical => { + ColumnType::F64 | ColumnType::I64 | ColumnType::U64 => { let numerical_column_writer: NumericalColumnWriter = self.numerical_field_hash_map.read(addr); - let (numerical_type, cardinality) = - numerical_column_writer.column_type_and_cardinality(num_docs); + let cardinality = numerical_column_writer.cardinality(num_docs); let mut column_serializer = - serializer.serialize_column(column_name, ColumnType::from(numerical_type)); + serializer.serialize_column(column_name, column_type); + let numerical_type = column_type.numerical_type().unwrap(); serialize_numerical_column( cardinality, num_docs, @@ -447,7 +452,7 @@ impl ColumnarWriter { &mut column_serializer, )?; } - ColumnTypeCategory::DateTime => { + ColumnType::DateTime => { let column_writer: ColumnWriter = self.datetime_field_hash_map.read(addr); let cardinality = column_writer.get_cardinality(num_docs); let mut column_serializer =