add columnar format compatibiliy tests (#2433)

* add columnar format compatibiliy tests

* always try to write current format
This commit is contained in:
PSeitz
2024-06-13 16:04:52 +09:00
committed by GitHub
parent e90e7a25ae
commit a141c3ec59
5 changed files with 103 additions and 1 deletions

Binary file not shown.

View File

@@ -1,3 +1,6 @@
use core::fmt;
use std::fmt::{Display, Formatter};
use crate::InvalidData;
pub const VERSION_FOOTER_NUM_BYTES: usize = MAGIC_BYTES.len() + std::mem::size_of::<u32>();
@@ -20,12 +23,22 @@ pub fn parse_footer(footer_bytes: [u8; VERSION_FOOTER_NUM_BYTES]) -> Result<Vers
Version::try_from_bytes(footer_bytes[0..4].try_into().unwrap())
}
pub const CURRENT_VERSION: Version = Version::V1;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[repr(u32)]
pub enum Version {
V1 = 1u32,
}
impl Display for Version {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Version::V1 => write!(f, "v1"),
}
}
}
impl Version {
fn to_bytes(self) -> [u8; 4] {
(self as u32).to_le_bytes()

View File

@@ -5,6 +5,7 @@ mod reader;
mod writer;
pub use column_type::{ColumnType, HasAssociatedColumnType};
pub use format_version::{Version, CURRENT_VERSION};
#[cfg(test)]
pub(crate) use merge::ColumnTypeCategory;
pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};

View File

@@ -0,0 +1,85 @@
use std::path::PathBuf;
use crate::{Column, ColumnarReader, DynamicColumn, CURRENT_VERSION};
const NUM_DOCS: u32 = u16::MAX as u32;
fn generate_columnar(num_docs: u32) -> Vec<u8> {
use crate::ColumnarWriter;
let mut columnar_writer = ColumnarWriter::default();
for i in 0..num_docs {
if i % 100 == 0 {
columnar_writer.record_numerical(i, "sparse", i as u64);
}
if i % 2 == 0 {
columnar_writer.record_numerical(i, "dense", i as u64);
}
columnar_writer.record_numerical(i, "full", i as u64);
columnar_writer.record_numerical(i, "multi", i as u64);
columnar_writer.record_numerical(i, "multi", i as u64);
}
let mut wrt: Vec<u8> = Vec::new();
columnar_writer.serialize(num_docs, None, &mut wrt).unwrap();
wrt
}
#[test]
/// Writes a columnar for the CURRENT_VERSION to disk.
fn create_format() {
let version = CURRENT_VERSION.to_string();
let file_path = path_for_version(&version);
if PathBuf::from(file_path.clone()).exists() {
return;
}
let columnar = generate_columnar(NUM_DOCS);
std::fs::write(file_path, columnar).unwrap();
}
fn path_for_version(version: &str) -> String {
format!("./compat_tests_data/{}.columnar", version)
}
#[test]
fn test_format_v1() {
let path = path_for_version("v1");
test_format(&path);
}
fn test_format(path: &str) {
let file_content = std::fs::read(path).unwrap();
let reader = ColumnarReader::open(file_content).unwrap();
let column = open_column(&reader, "full");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);
let column = open_column(&reader, "multi");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);
let column = open_column(&reader, "sparse");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1), None);
assert_eq!(column.first(65000), Some(65000));
let column = open_column(&reader, "dense");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);
assert_eq!(column.first(NUM_DOCS - 2), None);
}
fn open_column(reader: &ColumnarReader, name: &str) -> Column<u64> {
let column = reader.read_columns(name).unwrap()[0]
.open()
.unwrap()
.coerce_numerical(crate::NumericalType::U64)
.unwrap();
let DynamicColumn::U64(column) = column else {
panic!();
};
column
}

View File

@@ -48,7 +48,7 @@ pub use column_values::{
};
pub use columnar::{
merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder,
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION,
};
use sstable::VoidSSTable;
pub use value::{NumericalType, NumericalValue};
@@ -131,3 +131,6 @@ impl Cardinality {
#[cfg(test)]
mod tests;
#[cfg(test)]
mod compat_tests;