add compat tests (#2485)

This commit is contained in:
PSeitz
2024-09-04 18:26:57 +08:00
committed by GitHub
parent dc5d31c116
commit a206c3ccd3
14 changed files with 150 additions and 7 deletions

80
src/compat_tests.rs Normal file
View File

@@ -0,0 +1,80 @@
use std::path::PathBuf;
use schema::*;
use crate::*;
fn create_index(path: &str) {
let mut schema_builder = Schema::builder();
let label = schema_builder.add_text_field("label", TEXT | STORED);
let date = schema_builder.add_date_field("date", INDEXED | STORED);
let schema = schema_builder.build();
std::fs::create_dir_all(path).unwrap();
let index = Index::create_in_dir(path, schema).unwrap();
let mut index_writer = index.writer_with_num_threads(1, 20_000_000).unwrap();
index_writer
.add_document(doc!(label => "dateformat", date => DateTime::from_timestamp_nanos(123456)))
.unwrap();
index_writer.commit().unwrap();
}
#[test]
/// Writes an Index for the current INDEX_FORMAT_VERSION to disk.
fn create_format() {
let version = INDEX_FORMAT_VERSION.to_string();
let file_path = path_for_version(&version);
if PathBuf::from(file_path.clone()).exists() {
return;
}
create_index(&file_path);
}
fn path_for_version(version: &str) -> String {
format!("./tests/compat_tests_data/index_v{}/", version)
}
/// feature flag quickwit uses a different dictionary type
#[test]
#[cfg(not(feature = "quickwit"))]
fn test_format_6() {
let path = path_for_version("6");
let index = Index::open_in_dir(path).expect("Failed to open index");
// dates are truncated to Microseconds in v6
assert_date_time_precision(&index, DateTimePrecision::Microseconds);
}
#[cfg(not(feature = "quickwit"))]
fn assert_date_time_precision(index: &Index, precision: DateTimePrecision) {
use collector::TopDocs;
let reader = index.reader().expect("Failed to create reader");
let searcher = reader.searcher();
let schema = index.schema();
let label_field = schema.get_field("label").expect("Field 'label' not found");
let query_parser = query::QueryParser::for_index(index, vec![label_field]);
let query = query_parser
.parse_query("dateformat")
.expect("Failed to parse query");
let top_docs = searcher
.search(&query, &TopDocs::with_limit(1))
.expect("Search failed");
assert_eq!(top_docs.len(), 1, "Expected 1 search result");
let doc_address = top_docs[0].1;
let retrieved_doc: TantivyDocument = searcher
.doc(doc_address)
.expect("Failed to retrieve document");
let date_field = schema.get_field("date").expect("Field 'date' not found");
let date_value = retrieved_doc
.get_first(date_field)
.expect("Date field not found in document")
.as_datetime()
.unwrap();
let expected = DateTime::from_timestamp_nanos(123456).truncate(precision);
assert_eq!(date_value, expected,);
}

View File

@@ -202,12 +202,15 @@ pub mod space_usage;
pub mod store;
pub mod termdict;
mod docset;
mod reader;
#[cfg(test)]
mod compat_tests;
pub use self::reader::{IndexReader, IndexReaderBuilder, ReloadPolicy, Warmer};
pub mod snippet;
mod docset;
use std::fmt;
pub use census::{Inventory, TrackedObject};
@@ -229,9 +232,9 @@ pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
pub use crate::schema::{Document, TantivyDocument, Term};
/// Index format version.
const INDEX_FORMAT_VERSION: u32 = 6;
pub const INDEX_FORMAT_VERSION: u32 = 6;
/// Oldest index format version this tantivy version can read.
const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4;
pub const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4;
/// Structure version for the index.
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]

View File

@@ -471,7 +471,7 @@ fn bound_to_value_range<T: MonotonicallyMappableToU64>(
}
#[cfg(test)]
pub mod tests {
mod tests {
use std::ops::{Bound, RangeInclusive};
use common::bounds::BoundsRange;

View File

@@ -47,6 +47,7 @@ use self::termdict::{
pub use self::termdict::{TermMerger, TermStreamer};
use crate::postings::TermInfo;
#[derive(Debug, Eq, PartialEq)]
#[repr(u32)]
#[allow(dead_code)]
enum DictionaryType {
@@ -54,6 +55,18 @@ enum DictionaryType {
SSTable = 2,
}
impl TryFrom<u32> for DictionaryType {
type Error = &'static str;
fn try_from(value: u32) -> Result<Self, Self::Error> {
match value {
1 => Ok(DictionaryType::Fst),
2 => Ok(DictionaryType::SSTable),
_ => Err("Invalid value for DictionaryType"),
}
}
}
#[cfg(not(feature = "quickwit"))]
const CURRENT_TYPE: DictionaryType = DictionaryType::Fst;
@@ -70,13 +83,19 @@ impl TermDictionary {
let (main_slice, dict_type) = file.split_from_end(4);
let mut dict_type = dict_type.read_bytes()?;
let dict_type = u32::deserialize(&mut dict_type)?;
let dict_type = DictionaryType::try_from(dict_type).map_err(|_| {
io::Error::new(
io::ErrorKind::Other,
format!("Unsuported dictionary type, found {dict_type}"),
)
})?;
if dict_type != CURRENT_TYPE as u32 {
if dict_type != CURRENT_TYPE {
return Err(io::Error::new(
io::ErrorKind::Other,
format!(
"Unsuported dictionary type, expected {}, found {dict_type}",
CURRENT_TYPE as u32,
"Unsuported dictionary type, compiled tantivy with {CURRENT_TYPE:?}, but got \
{dict_type:?}",
),
));
}

View File

@@ -0,0 +1 @@
["00000000000000000000000000000000.store","00000000000000000000000000000000.fast","00000000000000000000000000000000.fieldnorm","00000000000000000000000000000000.term","00000000000000000000000000000000.idx","meta.json","00000000000000000000000000000000.pos"]

View File

@@ -0,0 +1,40 @@
{
"index_settings": {
"docstore_compression": "lz4",
"docstore_blocksize": 16384
},
"segments": [
{
"segment_id": "00000000-0000-0000-0000-000000000000",
"max_doc": 1,
"deletes": null
}
],
"schema": [
{
"name": "label",
"type": "text",
"options": {
"indexing": {
"record": "position",
"fieldnorms": true,
"tokenizer": "default"
},
"stored": true,
"fast": false
}
},
{
"name": "date",
"type": "date",
"options": {
"indexed": true,
"fieldnorms": true,
"fast": false,
"stored": true,
"precision": "seconds"
}
}
],
"opstamp": 2
}