diff --git a/columnar/compat_tests_data/v1.columnar b/columnar/compat_tests_data/v1.columnar new file mode 100644 index 000000000..512b4394e Binary files /dev/null and b/columnar/compat_tests_data/v1.columnar differ diff --git a/columnar/src/columnar/format_version.rs b/columnar/src/columnar/format_version.rs index 5254a43b1..a46913be8 100644 --- a/columnar/src/columnar/format_version.rs +++ b/columnar/src/columnar/format_version.rs @@ -1,3 +1,6 @@ +use core::fmt; +use std::fmt::{Display, Formatter}; + use crate::InvalidData; pub const VERSION_FOOTER_NUM_BYTES: usize = MAGIC_BYTES.len() + std::mem::size_of::(); @@ -20,12 +23,22 @@ pub fn parse_footer(footer_bytes: [u8; VERSION_FOOTER_NUM_BYTES]) -> Result fmt::Result { + match self { + Version::V1 => write!(f, "v1"), + } + } +} + impl Version { fn to_bytes(self) -> [u8; 4] { (self as u32).to_le_bytes() diff --git a/columnar/src/columnar/mod.rs b/columnar/src/columnar/mod.rs index 12a7084e7..bb9e485be 100644 --- a/columnar/src/columnar/mod.rs +++ b/columnar/src/columnar/mod.rs @@ -5,6 +5,7 @@ mod reader; mod writer; pub use column_type::{ColumnType, HasAssociatedColumnType}; +pub use format_version::{Version, CURRENT_VERSION}; #[cfg(test)] pub(crate) use merge::ColumnTypeCategory; pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder}; diff --git a/columnar/src/compat_tests.rs b/columnar/src/compat_tests.rs new file mode 100644 index 000000000..52950fe15 --- /dev/null +++ b/columnar/src/compat_tests.rs @@ -0,0 +1,85 @@ +use std::path::PathBuf; + +use crate::{Column, ColumnarReader, DynamicColumn, CURRENT_VERSION}; + +const NUM_DOCS: u32 = u16::MAX as u32; + +fn generate_columnar(num_docs: u32) -> Vec { + use crate::ColumnarWriter; + + let mut columnar_writer = ColumnarWriter::default(); + + for i in 0..num_docs { + if i % 100 == 0 { + columnar_writer.record_numerical(i, "sparse", i as u64); + } + if i % 2 == 0 { + columnar_writer.record_numerical(i, "dense", i as u64); + } + columnar_writer.record_numerical(i, "full", i as u64); + columnar_writer.record_numerical(i, "multi", i as u64); + columnar_writer.record_numerical(i, "multi", i as u64); + } + + let mut wrt: Vec = Vec::new(); + columnar_writer.serialize(num_docs, None, &mut wrt).unwrap(); + + wrt +} + +#[test] +/// Writes a columnar for the CURRENT_VERSION to disk. +fn create_format() { + let version = CURRENT_VERSION.to_string(); + let file_path = path_for_version(&version); + if PathBuf::from(file_path.clone()).exists() { + return; + } + let columnar = generate_columnar(NUM_DOCS); + std::fs::write(file_path, columnar).unwrap(); +} + +fn path_for_version(version: &str) -> String { + format!("./compat_tests_data/{}.columnar", version) +} + +#[test] +fn test_format_v1() { + let path = path_for_version("v1"); + test_format(&path); +} + +fn test_format(path: &str) { + let file_content = std::fs::read(path).unwrap(); + let reader = ColumnarReader::open(file_content).unwrap(); + + let column = open_column(&reader, "full"); + assert_eq!(column.first(0).unwrap(), 0); + assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1); + + let column = open_column(&reader, "multi"); + assert_eq!(column.first(0).unwrap(), 0); + assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1); + + let column = open_column(&reader, "sparse"); + assert_eq!(column.first(0).unwrap(), 0); + assert_eq!(column.first(NUM_DOCS - 1), None); + assert_eq!(column.first(65000), Some(65000)); + + let column = open_column(&reader, "dense"); + assert_eq!(column.first(0).unwrap(), 0); + assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1); + assert_eq!(column.first(NUM_DOCS - 2), None); +} + +fn open_column(reader: &ColumnarReader, name: &str) -> Column { + let column = reader.read_columns(name).unwrap()[0] + .open() + .unwrap() + .coerce_numerical(crate::NumericalType::U64) + .unwrap(); + let DynamicColumn::U64(column) = column else { + panic!(); + }; + column +} diff --git a/columnar/src/lib.rs b/columnar/src/lib.rs index 7236ea5bc..2b7a60b3a 100644 --- a/columnar/src/lib.rs +++ b/columnar/src/lib.rs @@ -48,7 +48,7 @@ pub use column_values::{ }; pub use columnar::{ merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType, - MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, + MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION, }; use sstable::VoidSSTable; pub use value::{NumericalType, NumericalValue}; @@ -131,3 +131,6 @@ impl Cardinality { #[cfg(test)] mod tests; + +#[cfg(test)] +mod compat_tests;