diff --git a/src/compat_tests.rs b/src/compat_tests.rs new file mode 100644 index 000000000..6e75c5de2 --- /dev/null +++ b/src/compat_tests.rs @@ -0,0 +1,80 @@ +use std::path::PathBuf; + +use schema::*; + +use crate::*; + +fn create_index(path: &str) { + let mut schema_builder = Schema::builder(); + let label = schema_builder.add_text_field("label", TEXT | STORED); + let date = schema_builder.add_date_field("date", INDEXED | STORED); + let schema = schema_builder.build(); + std::fs::create_dir_all(path).unwrap(); + let index = Index::create_in_dir(path, schema).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 20_000_000).unwrap(); + index_writer + .add_document(doc!(label => "dateformat", date => DateTime::from_timestamp_nanos(123456))) + .unwrap(); + index_writer.commit().unwrap(); +} + +#[test] +/// Writes an Index for the current INDEX_FORMAT_VERSION to disk. +fn create_format() { + let version = INDEX_FORMAT_VERSION.to_string(); + let file_path = path_for_version(&version); + if PathBuf::from(file_path.clone()).exists() { + return; + } + create_index(&file_path); +} + +fn path_for_version(version: &str) -> String { + format!("./tests/compat_tests_data/index_v{}/", version) +} + +/// feature flag quickwit uses a different dictionary type +#[test] +#[cfg(not(feature = "quickwit"))] +fn test_format_6() { + let path = path_for_version("6"); + + let index = Index::open_in_dir(path).expect("Failed to open index"); + // dates are truncated to Microseconds in v6 + assert_date_time_precision(&index, DateTimePrecision::Microseconds); +} + +#[cfg(not(feature = "quickwit"))] +fn assert_date_time_precision(index: &Index, precision: DateTimePrecision) { + use collector::TopDocs; + let reader = index.reader().expect("Failed to create reader"); + let searcher = reader.searcher(); + + let schema = index.schema(); + let label_field = schema.get_field("label").expect("Field 'label' not found"); + let query_parser = query::QueryParser::for_index(index, vec![label_field]); + + let query = query_parser + .parse_query("dateformat") + .expect("Failed to parse query"); + let top_docs = searcher + .search(&query, &TopDocs::with_limit(1)) + .expect("Search failed"); + + assert_eq!(top_docs.len(), 1, "Expected 1 search result"); + + let doc_address = top_docs[0].1; + let retrieved_doc: TantivyDocument = searcher + .doc(doc_address) + .expect("Failed to retrieve document"); + + let date_field = schema.get_field("date").expect("Field 'date' not found"); + let date_value = retrieved_doc + .get_first(date_field) + .expect("Date field not found in document") + .as_datetime() + .unwrap(); + + let expected = DateTime::from_timestamp_nanos(123456).truncate(precision); + assert_eq!(date_value, expected,); +} diff --git a/src/lib.rs b/src/lib.rs index 6ad2981c1..dd4fc4f52 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -202,12 +202,15 @@ pub mod space_usage; pub mod store; pub mod termdict; +mod docset; mod reader; +#[cfg(test)] +mod compat_tests; + pub use self::reader::{IndexReader, IndexReaderBuilder, ReloadPolicy, Warmer}; pub mod snippet; -mod docset; use std::fmt; pub use census::{Inventory, TrackedObject}; @@ -229,9 +232,9 @@ pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter}; pub use crate::schema::{Document, TantivyDocument, Term}; /// Index format version. -const INDEX_FORMAT_VERSION: u32 = 6; +pub const INDEX_FORMAT_VERSION: u32 = 6; /// Oldest index format version this tantivy version can read. -const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4; +pub const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4; /// Structure version for the index. #[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] diff --git a/src/query/range_query/range_query_fastfield.rs b/src/query/range_query/range_query_fastfield.rs index 0d9aa7bb4..4647587be 100644 --- a/src/query/range_query/range_query_fastfield.rs +++ b/src/query/range_query/range_query_fastfield.rs @@ -471,7 +471,7 @@ fn bound_to_value_range( } #[cfg(test)] -pub mod tests { +mod tests { use std::ops::{Bound, RangeInclusive}; use common::bounds::BoundsRange; diff --git a/src/termdict/mod.rs b/src/termdict/mod.rs index cb546c5fc..c5e08233d 100644 --- a/src/termdict/mod.rs +++ b/src/termdict/mod.rs @@ -47,6 +47,7 @@ use self::termdict::{ pub use self::termdict::{TermMerger, TermStreamer}; use crate::postings::TermInfo; +#[derive(Debug, Eq, PartialEq)] #[repr(u32)] #[allow(dead_code)] enum DictionaryType { @@ -54,6 +55,18 @@ enum DictionaryType { SSTable = 2, } +impl TryFrom for DictionaryType { + type Error = &'static str; + + fn try_from(value: u32) -> Result { + match value { + 1 => Ok(DictionaryType::Fst), + 2 => Ok(DictionaryType::SSTable), + _ => Err("Invalid value for DictionaryType"), + } + } +} + #[cfg(not(feature = "quickwit"))] const CURRENT_TYPE: DictionaryType = DictionaryType::Fst; @@ -70,13 +83,19 @@ impl TermDictionary { let (main_slice, dict_type) = file.split_from_end(4); let mut dict_type = dict_type.read_bytes()?; let dict_type = u32::deserialize(&mut dict_type)?; + let dict_type = DictionaryType::try_from(dict_type).map_err(|_| { + io::Error::new( + io::ErrorKind::Other, + format!("Unsuported dictionary type, found {dict_type}"), + ) + })?; - if dict_type != CURRENT_TYPE as u32 { + if dict_type != CURRENT_TYPE { return Err(io::Error::new( io::ErrorKind::Other, format!( - "Unsuported dictionary type, expected {}, found {dict_type}", - CURRENT_TYPE as u32, + "Unsuported dictionary type, compiled tantivy with {CURRENT_TYPE:?}, but got \ + {dict_type:?}", ), )); } diff --git a/tests/compat_tests_data/index_v6/.managed.json b/tests/compat_tests_data/index_v6/.managed.json new file mode 100644 index 000000000..95b76f568 --- /dev/null +++ b/tests/compat_tests_data/index_v6/.managed.json @@ -0,0 +1 @@ +["00000000000000000000000000000000.store","00000000000000000000000000000000.fast","00000000000000000000000000000000.fieldnorm","00000000000000000000000000000000.term","00000000000000000000000000000000.idx","meta.json","00000000000000000000000000000000.pos"] diff --git a/tests/compat_tests_data/index_v6/.tantivy-meta.lock b/tests/compat_tests_data/index_v6/.tantivy-meta.lock new file mode 100644 index 000000000..e69de29bb diff --git a/tests/compat_tests_data/index_v6/.tantivy-writer.lock b/tests/compat_tests_data/index_v6/.tantivy-writer.lock new file mode 100644 index 000000000..e69de29bb diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fast b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fast new file mode 100644 index 000000000..f550fe3aa Binary files /dev/null and b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fast differ diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fieldnorm b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fieldnorm new file mode 100644 index 000000000..08af09ea2 Binary files /dev/null and b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fieldnorm differ diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.idx b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.idx new file mode 100644 index 000000000..a881999d1 Binary files /dev/null and b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.idx differ diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.pos b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.pos new file mode 100644 index 000000000..e1faa1081 Binary files /dev/null and b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.pos differ diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.store b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.store new file mode 100644 index 000000000..97497908d Binary files /dev/null and b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.store differ diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.term b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.term new file mode 100644 index 000000000..b2df64c6c Binary files /dev/null and b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.term differ diff --git a/tests/compat_tests_data/index_v6/meta.json b/tests/compat_tests_data/index_v6/meta.json new file mode 100644 index 000000000..6fa7c001f --- /dev/null +++ b/tests/compat_tests_data/index_v6/meta.json @@ -0,0 +1,40 @@ +{ + "index_settings": { + "docstore_compression": "lz4", + "docstore_blocksize": 16384 + }, + "segments": [ + { + "segment_id": "00000000-0000-0000-0000-000000000000", + "max_doc": 1, + "deletes": null + } + ], + "schema": [ + { + "name": "label", + "type": "text", + "options": { + "indexing": { + "record": "position", + "fieldnorms": true, + "tokenizer": "default" + }, + "stored": true, + "fast": false + } + }, + { + "name": "date", + "type": "date", + "options": { + "indexed": true, + "fieldnorms": true, + "fast": false, + "stored": true, + "precision": "seconds" + } + } + ], + "opstamp": 2 +}