mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-09 02:22:54 +00:00
add columnar format compatibiliy tests (#2433)
* add columnar format compatibiliy tests * always try to write current format
This commit is contained in:
BIN
columnar/compat_tests_data/v1.columnar
Normal file
BIN
columnar/compat_tests_data/v1.columnar
Normal file
Binary file not shown.
@@ -1,3 +1,6 @@
|
||||
use core::fmt;
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use crate::InvalidData;
|
||||
|
||||
pub const VERSION_FOOTER_NUM_BYTES: usize = MAGIC_BYTES.len() + std::mem::size_of::<u32>();
|
||||
@@ -20,12 +23,22 @@ pub fn parse_footer(footer_bytes: [u8; VERSION_FOOTER_NUM_BYTES]) -> Result<Vers
|
||||
Version::try_from_bytes(footer_bytes[0..4].try_into().unwrap())
|
||||
}
|
||||
|
||||
pub const CURRENT_VERSION: Version = Version::V1;
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
#[repr(u32)]
|
||||
pub enum Version {
|
||||
V1 = 1u32,
|
||||
}
|
||||
|
||||
impl Display for Version {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Version::V1 => write!(f, "v1"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Version {
|
||||
fn to_bytes(self) -> [u8; 4] {
|
||||
(self as u32).to_le_bytes()
|
||||
|
||||
@@ -5,6 +5,7 @@ mod reader;
|
||||
mod writer;
|
||||
|
||||
pub use column_type::{ColumnType, HasAssociatedColumnType};
|
||||
pub use format_version::{Version, CURRENT_VERSION};
|
||||
#[cfg(test)]
|
||||
pub(crate) use merge::ColumnTypeCategory;
|
||||
pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
|
||||
|
||||
85
columnar/src/compat_tests.rs
Normal file
85
columnar/src/compat_tests.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::{Column, ColumnarReader, DynamicColumn, CURRENT_VERSION};
|
||||
|
||||
const NUM_DOCS: u32 = u16::MAX as u32;
|
||||
|
||||
fn generate_columnar(num_docs: u32) -> Vec<u8> {
|
||||
use crate::ColumnarWriter;
|
||||
|
||||
let mut columnar_writer = ColumnarWriter::default();
|
||||
|
||||
for i in 0..num_docs {
|
||||
if i % 100 == 0 {
|
||||
columnar_writer.record_numerical(i, "sparse", i as u64);
|
||||
}
|
||||
if i % 2 == 0 {
|
||||
columnar_writer.record_numerical(i, "dense", i as u64);
|
||||
}
|
||||
columnar_writer.record_numerical(i, "full", i as u64);
|
||||
columnar_writer.record_numerical(i, "multi", i as u64);
|
||||
columnar_writer.record_numerical(i, "multi", i as u64);
|
||||
}
|
||||
|
||||
let mut wrt: Vec<u8> = Vec::new();
|
||||
columnar_writer.serialize(num_docs, None, &mut wrt).unwrap();
|
||||
|
||||
wrt
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// Writes a columnar for the CURRENT_VERSION to disk.
|
||||
fn create_format() {
|
||||
let version = CURRENT_VERSION.to_string();
|
||||
let file_path = path_for_version(&version);
|
||||
if PathBuf::from(file_path.clone()).exists() {
|
||||
return;
|
||||
}
|
||||
let columnar = generate_columnar(NUM_DOCS);
|
||||
std::fs::write(file_path, columnar).unwrap();
|
||||
}
|
||||
|
||||
fn path_for_version(version: &str) -> String {
|
||||
format!("./compat_tests_data/{}.columnar", version)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_v1() {
|
||||
let path = path_for_version("v1");
|
||||
test_format(&path);
|
||||
}
|
||||
|
||||
fn test_format(path: &str) {
|
||||
let file_content = std::fs::read(path).unwrap();
|
||||
let reader = ColumnarReader::open(file_content).unwrap();
|
||||
|
||||
let column = open_column(&reader, "full");
|
||||
assert_eq!(column.first(0).unwrap(), 0);
|
||||
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);
|
||||
|
||||
let column = open_column(&reader, "multi");
|
||||
assert_eq!(column.first(0).unwrap(), 0);
|
||||
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);
|
||||
|
||||
let column = open_column(&reader, "sparse");
|
||||
assert_eq!(column.first(0).unwrap(), 0);
|
||||
assert_eq!(column.first(NUM_DOCS - 1), None);
|
||||
assert_eq!(column.first(65000), Some(65000));
|
||||
|
||||
let column = open_column(&reader, "dense");
|
||||
assert_eq!(column.first(0).unwrap(), 0);
|
||||
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);
|
||||
assert_eq!(column.first(NUM_DOCS - 2), None);
|
||||
}
|
||||
|
||||
fn open_column(reader: &ColumnarReader, name: &str) -> Column<u64> {
|
||||
let column = reader.read_columns(name).unwrap()[0]
|
||||
.open()
|
||||
.unwrap()
|
||||
.coerce_numerical(crate::NumericalType::U64)
|
||||
.unwrap();
|
||||
let DynamicColumn::U64(column) = column else {
|
||||
panic!();
|
||||
};
|
||||
column
|
||||
}
|
||||
@@ -48,7 +48,7 @@ pub use column_values::{
|
||||
};
|
||||
pub use columnar::{
|
||||
merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
|
||||
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder,
|
||||
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION,
|
||||
};
|
||||
use sstable::VoidSSTable;
|
||||
pub use value::{NumericalType, NumericalValue};
|
||||
@@ -131,3 +131,6 @@ impl Cardinality {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
#[cfg(test)]
|
||||
mod compat_tests;
|
||||
|
||||
Reference in New Issue
Block a user