adding change from main

This commit is contained in:
Paul Masurel
2023-02-09 14:18:46 +01:00
parent bd5eea9852
commit b63c6c27bc
3 changed files with 41 additions and 47 deletions

View File

@@ -14,14 +14,14 @@ pub enum ColumnType {
I64 = 0u8,
U64 = 1u8,
F64 = 2u8,
Bytes = 10u8,
Str = 14u8,
Bool = 18u8,
IpAddr = 22u8,
DateTime = 26u8,
Bytes = 3u8,
Str = 4u8,
Bool = 5u8,
IpAddr = 6u8,
DateTime = 7u8,
}
#[cfg(test)]
// The order needs to match _exactly_ the order in the enum
const COLUMN_TYPES: [ColumnType; 8] = [
ColumnType::I64,
ColumnType::U64,
@@ -39,18 +39,7 @@ impl ColumnType {
}
pub(crate) fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> {
use ColumnType::*;
match code {
0u8 => Ok(I64),
1u8 => Ok(U64),
2u8 => Ok(F64),
10u8 => Ok(Bytes),
14u8 => Ok(Str),
18u8 => Ok(Bool),
22u8 => Ok(IpAddr),
26u8 => Ok(Self::DateTime),
_ => Err(InvalidData),
}
COLUMN_TYPES.get(code as usize).copied().ok_or(InvalidData)
}
}
@@ -178,21 +167,19 @@ impl From<ColumnType> for ColumnTypeCategory {
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use super::*;
use crate::Cardinality;
#[test]
fn test_column_type_to_code() {
let mut column_type_set: HashSet<ColumnType> = HashSet::new();
for code in u8::MIN..=u8::MAX {
if let Ok(column_type) = ColumnType::try_from_code(code) {
assert_eq!(column_type.to_code(), code);
assert!(column_type_set.insert(column_type));
for (code, expected_column_type) in super::COLUMN_TYPES.iter().copied().enumerate() {
if let Ok(column_type) = ColumnType::try_from_code(code as u8) {
assert_eq!(column_type, expected_column_type);
}
}
assert_eq!(column_type_set.len(), super::COLUMN_TYPES.len());
for code in COLUMN_TYPES.len() as u8..=u8::MAX {
assert!(ColumnType::try_from_code(code as u8).is_err());
}
}
#[test]

View File

@@ -210,10 +210,12 @@ impl CompatibleNumericalTypes {
}
impl NumericalColumnWriter {
pub fn column_type_and_cardinality(&self, num_docs: RowId) -> (NumericalType, Cardinality) {
let numerical_type = self.compatible_numerical_types.to_numerical_type();
let cardinality = self.column_writer.get_cardinality(num_docs);
(numerical_type, cardinality)
pub fn numerical_type(&self) -> NumericalType {
self.compatible_numerical_types.to_numerical_type()
}
pub fn cardinality(&self, num_docs: RowId) -> Cardinality {
self.column_writer.get_cardinality(num_docs)
}
pub fn record_numerical_value(

View File

@@ -327,35 +327,40 @@ impl ColumnarWriter {
wrt: &mut dyn io::Write,
) -> io::Result<()> {
let mut serializer = ColumnarSerializer::new(wrt);
let mut columns: Vec<(&[u8], ColumnTypeCategory, Addr)> = self
let mut columns: Vec<(&[u8], ColumnType, Addr)> = self
.numerical_field_hash_map
.iter()
.map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::Numerical, addr))
.map(|(column_name, addr, _)| {
let numerical_column_writer: NumericalColumnWriter =
self.numerical_field_hash_map.read(addr);
let column_type = numerical_column_writer.numerical_type().into();
(column_name, column_type, addr)
})
.collect();
columns.extend(
self.bytes_field_hash_map
.iter()
.map(|(term, addr, _)| (term, ColumnTypeCategory::Bytes, addr)),
.map(|(term, addr, _)| (term, ColumnType::Bytes, addr)),
);
columns.extend(
self.str_field_hash_map
.iter()
.map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::Str, addr)),
.map(|(column_name, addr, _)| (column_name, ColumnType::Str, addr)),
);
columns.extend(
self.bool_field_hash_map
.iter()
.map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::Bool, addr)),
.map(|(column_name, addr, _)| (column_name, ColumnType::Bool, addr)),
);
columns.extend(
self.ip_addr_field_hash_map
.iter()
.map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::IpAddr, addr)),
.map(|(column_name, addr, _)| (column_name, ColumnType::IpAddr, addr)),
);
columns.extend(
self.datetime_field_hash_map
.iter()
.map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::DateTime, addr)),
.map(|(column_name, addr, _)| (column_name, ColumnType::DateTime, addr)),
);
columns.sort_unstable_by_key(|(column_name, col_type, _)| (*column_name, *col_type));
@@ -363,11 +368,11 @@ impl ColumnarWriter {
let mut symbol_byte_buffer: Vec<u8> = Vec::new();
for (column_name, column_type, addr) in columns {
match column_type {
ColumnTypeCategory::Bool => {
ColumnType::Bool => {
let column_writer: ColumnWriter = self.bool_field_hash_map.read(addr);
let cardinality = column_writer.get_cardinality(num_docs);
let mut column_serializer =
serializer.serialize_column(column_name, ColumnType::Bool);
serializer.serialize_column(column_name, column_type);
serialize_bool_column(
cardinality,
num_docs,
@@ -380,7 +385,7 @@ impl ColumnarWriter {
&mut column_serializer,
)?;
}
ColumnTypeCategory::IpAddr => {
ColumnType::IpAddr => {
let column_writer: ColumnWriter = self.ip_addr_field_hash_map.read(addr);
let cardinality = column_writer.get_cardinality(num_docs);
let mut column_serializer =
@@ -397,11 +402,11 @@ impl ColumnarWriter {
&mut column_serializer,
)?;
}
ColumnTypeCategory::Bytes | ColumnTypeCategory::Str => {
ColumnType::Bytes | ColumnType::Str => {
let (column_type, str_or_bytes_column_writer): (
ColumnType,
StrOrBytesColumnWriter,
) = if column_type == ColumnTypeCategory::Bytes {
) = if column_type == ColumnType::Bytes {
(ColumnType::Bytes, self.bytes_field_hash_map.read(addr))
} else {
(ColumnType::Str, self.str_field_hash_map.read(addr))
@@ -427,13 +432,13 @@ impl ColumnarWriter {
&mut column_serializer,
)?;
}
ColumnTypeCategory::Numerical => {
ColumnType::F64 | ColumnType::I64 | ColumnType::U64 => {
let numerical_column_writer: NumericalColumnWriter =
self.numerical_field_hash_map.read(addr);
let (numerical_type, cardinality) =
numerical_column_writer.column_type_and_cardinality(num_docs);
let cardinality = numerical_column_writer.cardinality(num_docs);
let mut column_serializer =
serializer.serialize_column(column_name, ColumnType::from(numerical_type));
serializer.serialize_column(column_name, column_type);
let numerical_type = column_type.numerical_type().unwrap();
serialize_numerical_column(
cardinality,
num_docs,
@@ -447,7 +452,7 @@ impl ColumnarWriter {
&mut column_serializer,
)?;
}
ColumnTypeCategory::DateTime => {
ColumnType::DateTime => {
let column_writer: ColumnWriter = self.datetime_field_hash_map.read(addr);
let cardinality = column_writer.get_cardinality(num_docs);
let mut column_serializer =