diff --git a/CHANGELOG.md b/CHANGELOG.md index 0db60c6ec..94be9ef3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Tantivy 0.11.0 - API change around `Box`. See detail in #629 - Avoid rebuilding Regex automaton whenever a regex query is reused. #639 (@brainlock) - Add footer with some metadata to index files. #605 (@fdb-hiroshima) +- Add a method to check the compatibility of the footer in the index with the running version of tantivy (@petr-tik) - TopDocs collector: ensure stable sorting on equal score. #671 (@brainlock) - Added handling of pre-tokenized text fields (#642), which will enable users to load tokens created outside tantivy. See usage in examples/pre_tokenized_text. (@kkoziara) @@ -16,10 +17,11 @@ Tantivy 0.11.0 ## How to update? +- The index format is changed. You are required to reindex your data to use tantivy 0.11. - `Box` has been replaced by a `BoxedTokenizer` struct. - Regex are now compiled when the `RegexQuery` instance is built. As a result, it can now return an error and handling the `Result` is required. - +- `tantivy::version()` now returns a `Version` object. This object implements `ToString()` Tantivy 0.10.2 ===================== diff --git a/src/directory/error.rs b/src/directory/error.rs index 4cc509443..9a3ff44eb 100644 --- a/src/directory/error.rs +++ b/src/directory/error.rs @@ -1,3 +1,4 @@ +use crate::Version; use std::error::Error as StdError; use std::fmt; use std::io; @@ -156,6 +157,65 @@ impl StdError for OpenWriteError { } } +/// Type of index incompatibility between the library and the index found on disk +/// Used to catch and provide a hint to solve this incompatibility issue +pub enum Incompatibility { + /// This library cannot decompress the index found on disk + CompressionMismatch { + /// Compression algorithm used by the current version of tantivy + library_compression_format: String, + /// Compression algorithm that was used to serialise the index + index_compression_format: String, + }, + /// The index format found on disk isn't supported by this version of the library + IndexMismatch { + /// Version used by the library + library_version: Version, + /// Version the index was built with + index_version: Version, + }, +} + +impl fmt::Debug for Incompatibility { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + match self { + Incompatibility::CompressionMismatch { + library_compression_format, + index_compression_format, + } => { + let err = format!( + "Library was compiled with {:?} compression, index was compressed with {:?}", + library_compression_format, index_compression_format + ); + let advice = format!( + "Change the feature flag to {:?} and rebuild the library", + index_compression_format + ); + write!(f, "{}. {}", err, advice)?; + } + Incompatibility::IndexMismatch { + library_version, + index_version, + } => { + let err = format!( + "Library version: {}, index version: {}", + library_version.index_format_version, index_version.index_format_version + ); + // TODO make a more useful error message + // include the version range that supports this index_format_version + let advice = format!( + "Change tantivy to a version compatible with index format {} (e.g. {}.{}.x) \ + and rebuild your project.", + index_version.index_format_version, index_version.major, index_version.minor + ); + write!(f, "{}. {}", err, advice)?; + } + } + + Ok(()) + } +} + /// Error that may occur when accessing a file read #[derive(Debug)] pub enum OpenReadError { @@ -164,6 +224,8 @@ pub enum OpenReadError { /// Any kind of IO error that happens when /// interacting with the underlying IO device. IOError(IOError), + /// This library doesn't support the index version found on disk + IncompatibleIndex(Incompatibility), } impl From for OpenReadError { @@ -183,19 +245,9 @@ impl fmt::Display for OpenReadError { "an io error occurred while opening a file for reading: '{}'", err ), - } - } -} - -impl StdError for OpenReadError { - fn description(&self) -> &str { - "error occurred while opening a file for reading" - } - - fn cause(&self) -> Option<&dyn StdError> { - match *self { - OpenReadError::FileDoesNotExist(_) => None, - OpenReadError::IOError(ref err) => Some(err), + OpenReadError::IncompatibleIndex(ref footer) => { + write!(f, "Incompatible index format: {:?}", footer) + } } } } @@ -216,6 +268,12 @@ impl From for DeleteError { } } +impl From for OpenReadError { + fn from(incompatibility: Incompatibility) -> Self { + OpenReadError::IncompatibleIndex(incompatibility) + } +} + impl fmt::Display for DeleteError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { diff --git a/src/directory/footer.rs b/src/directory/footer.rs index 1cfe911e0..c1f471788 100644 --- a/src/directory/footer.rs +++ b/src/directory/footer.rs @@ -1,181 +1,175 @@ +use crate::common::{BinarySerializable, CountingWriter, FixedSize, VInt}; +use crate::directory::error::Incompatibility; use crate::directory::read_only_source::ReadOnlySource; use crate::directory::{AntiCallToken, TerminatingWrite}; -use byteorder::{ByteOrder, LittleEndian}; +use crate::Version; +use byteorder::{ByteOrder, LittleEndian, WriteBytesExt}; use crc32fast::Hasher; use std::io; use std::io::Write; -const COMMON_FOOTER_SIZE: usize = 4 * 5; - type CrcHashU32 = u32; #[derive(Debug, Clone, PartialEq)] pub struct Footer { - pub tantivy_version: (u32, u32, u32), + pub version: Version, pub meta: String, pub versioned_footer: VersionedFooter, } +/// Serialises the footer to a byte-array +/// - versioned_footer_len : 4 bytes +///- versioned_footer: variable bytes +/// - meta_len: 4 bytes +/// - meta: variable bytes +/// - version_len: 4 bytes +/// - version json: variable bytes +impl BinarySerializable for Footer { + fn serialize(&self, writer: &mut W) -> io::Result<()> { + BinarySerializable::serialize(&self.versioned_footer, writer)?; + BinarySerializable::serialize(&self.meta, writer)?; + let version_string = + serde_json::to_string(&self.version).map_err(|_err| io::ErrorKind::InvalidInput)?; + BinarySerializable::serialize(&version_string, writer)?; + Ok(()) + } + + fn deserialize(reader: &mut R) -> io::Result { + let versioned_footer = VersionedFooter::deserialize(reader)?; + let meta = String::deserialize(reader)?; + let version_json = String::deserialize(reader)?; + let version = serde_json::from_str(&version_json)?; + Ok(Footer { + version, + meta, + versioned_footer, + }) + } +} + impl Footer { pub fn new(versioned_footer: VersionedFooter) -> Self { - let tantivy_version = ( - env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(), - env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(), - env!("CARGO_PKG_VERSION_PATCH").parse().unwrap(), - ); + let version = crate::VERSION.clone(); + let meta = version.to_string(); Footer { - tantivy_version, - meta: format!( - "tantivy v{}.{}.{}, index_format v{}", - tantivy_version.0, - tantivy_version.1, - tantivy_version.2, - versioned_footer.version() - ), + version, + meta, versioned_footer, } } - /// Serialises the footer to a byte-array - /// [ versioned_footer | meta | common_footer ] - /// [ 0..8 | 8..32 | 32..52 ] - pub fn to_bytes(&self) -> Vec { - let mut res = self.versioned_footer.to_bytes(); - res.extend_from_slice(self.meta.as_bytes()); - let len = res.len(); - res.resize(len + COMMON_FOOTER_SIZE, 0); - let mut common_footer = &mut res[len..]; - LittleEndian::write_u32(&mut common_footer, self.meta.len() as u32); - LittleEndian::write_u32(&mut common_footer[4..], self.tantivy_version.0); - LittleEndian::write_u32(&mut common_footer[8..], self.tantivy_version.1); - LittleEndian::write_u32(&mut common_footer[12..], self.tantivy_version.2); - LittleEndian::write_u32(&mut common_footer[16..], (len + COMMON_FOOTER_SIZE) as u32); - res - } - - pub fn from_bytes(data: &[u8]) -> Result { - let len = data.len(); - if len < COMMON_FOOTER_SIZE + 4 { - // 4 bytes for index version, stored in versioned footer - return Err(io::Error::new( - io::ErrorKind::UnexpectedEof, - format!("File corrupted. The footer len must be over 24, while the entire file len is {}", len) - ) - ); - } - - let size = LittleEndian::read_u32(&data[len - 4..]) as usize; - if len < size as usize { - return Err(io::Error::new( - io::ErrorKind::UnexpectedEof, - format!( - "The footer len is {}, while the entire file len is {}. \ - Your index is either corrupted or was built using a tantivy version\ - anterior to 0.11.", - size, len - ), - )); - } - let footer = &data[len - size as usize..]; - let meta_len = LittleEndian::read_u32(&footer[size - COMMON_FOOTER_SIZE..]) as usize; - let tantivy_major = LittleEndian::read_u32(&footer[size - 16..]); - let tantivy_minor = LittleEndian::read_u32(&footer[size - 12..]); - let tantivy_patch = LittleEndian::read_u32(&footer[size - 8..]); - Ok(Footer { - tantivy_version: (tantivy_major, tantivy_minor, tantivy_patch), - meta: String::from_utf8_lossy( - &footer[size - meta_len - COMMON_FOOTER_SIZE..size - COMMON_FOOTER_SIZE], - ) - .into_owned(), - versioned_footer: VersionedFooter::from_bytes( - &footer[..size - meta_len - COMMON_FOOTER_SIZE], - )?, - }) + pub fn append_footer(&self, mut write: &mut W) -> io::Result<()> { + let mut counting_write = CountingWriter::wrap(&mut write); + self.serialize(&mut counting_write)?; + let written_len = counting_write.written_bytes(); + write.write_u32::(written_len as u32)?; + Ok(()) } pub fn extract_footer(source: ReadOnlySource) -> Result<(Footer, ReadOnlySource), io::Error> { - let footer = Footer::from_bytes(source.as_slice())?; - let reader = source.slice_to(source.as_slice().len() - footer.size()); - Ok((footer, reader)) + if source.len() < 4 { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + format!( + "File corrupted. The file is smaller than 4 bytes (len={}).", + source.len() + ), + )); + } + let (body_footer, footer_len_bytes) = source.split_from_end(u32::SIZE_IN_BYTES); + let footer_len = LittleEndian::read_u32(footer_len_bytes.as_slice()) as usize; + let body_len = body_footer.len() - footer_len; + let (body, footer_data) = body_footer.split(body_len); + let mut cursor = footer_data.as_slice(); + let footer = Footer::deserialize(&mut cursor)?; + Ok((footer, body)) } - pub fn size(&self) -> usize { - self.versioned_footer.size() as usize + self.meta.len() + COMMON_FOOTER_SIZE + /// Confirms that the index will be read correctly by this version of tantivy + /// Has to be called after `extract_footer` to make sure it's not accessing uninitialised memory + pub fn is_compatible(&self) -> Result<(), Incompatibility> { + let library_version = crate::version(); + match &self.versioned_footer { + VersionedFooter::V1 { + crc32: _crc, + store_compression: compression, + } => { + if &library_version.store_compression != compression { + return Err(Incompatibility::CompressionMismatch { + library_compression_format: library_version.store_compression.to_string(), + index_compression_format: compression.to_string(), + }); + } + Ok(()) + } + VersionedFooter::UnknownVersion => Err(Incompatibility::IndexMismatch { + library_version: library_version.clone(), + index_version: self.version.clone(), + }), + } } } /// Footer that includes a crc32 hash that enables us to checksum files in the index #[derive(Debug, Clone, PartialEq)] pub enum VersionedFooter { - UnknownVersion { version: u32, size: u32 }, - V0(CrcHashU32), // crc + UnknownVersion, + V1 { + crc32: CrcHashU32, + store_compression: String, + }, +} + +impl BinarySerializable for VersionedFooter { + fn serialize(&self, writer: &mut W) -> io::Result<()> { + let mut buf = Vec::new(); + match self { + VersionedFooter::V1 { + crc32, + store_compression: compression, + } => { + // Serializes a valid `VersionedFooter` or panics if the version is unknown + // [ version | crc_hash | compression_mode ] + // [ 0..4 | 4..8 | variable ] + BinarySerializable::serialize(&1u32, &mut buf)?; + BinarySerializable::serialize(crc32, &mut buf)?; + BinarySerializable::serialize(compression, &mut buf)?; + } + VersionedFooter::UnknownVersion => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Cannot serialize an unknown versioned footer ", + )); + } + } + BinarySerializable::serialize(&VInt(buf.len() as u64), writer)?; + writer.write_all(&buf[..])?; + Ok(()) + } + + fn deserialize(reader: &mut R) -> io::Result { + let len = VInt::deserialize(reader)?.0 as usize; + let mut buf = vec![0u8; len]; + reader.read_exact(&mut buf[..])?; + let mut cursor = &buf[..]; + let version = u32::deserialize(&mut cursor)?; + if version == 1 { + let crc32 = u32::deserialize(&mut cursor)?; + let compression = String::deserialize(&mut cursor)?; + Ok(VersionedFooter::V1 { + crc32, + store_compression: compression, + }) + } else { + Ok(VersionedFooter::UnknownVersion) + } + } } impl VersionedFooter { - /// Serializes a valid `VersionedFooter` or panics if the version is unknown - /// [ version | crc_hash ] - /// [ 0..4 | 4..8 ] - pub fn to_bytes(&self) -> Vec { - match self { - VersionedFooter::V0(crc) => { - let mut buf = [0u8; 8]; - LittleEndian::write_u32(&mut buf[0..4], 0); - LittleEndian::write_u32(&mut buf[4..8], *crc); - buf.to_vec() - } - VersionedFooter::UnknownVersion { .. } => { - panic!("Unsupported index should never get serialized"); - } - } - } - - pub fn from_bytes(footer: &[u8]) -> Result { - assert!(footer.len() >= 4); - if footer.len() < 4 { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - "Footer should be more than 4 bytes.", - )); - } - let version = LittleEndian::read_u32(footer); - match version { - // the first 4 bytes should be zeroed out thus returning a `0` - 0 => { - if footer.len() != 8 { - return Err(io::Error::new( - io::ErrorKind::UnexpectedEof, - format!( - "File corrupted. The versioned footer len is {}, while it should be 8", - footer.len() - ), - )); - } - Ok(VersionedFooter::V0(LittleEndian::read_u32(&footer[4..]))) - } - version => Ok(VersionedFooter::UnknownVersion { - version, - size: footer.len() as u32, - }), - } - } - - pub fn size(&self) -> u32 { - match self { - VersionedFooter::V0(_) => 8, - VersionedFooter::UnknownVersion { size, .. } => *size, - } - } - - pub fn version(&self) -> u32 { - match self { - VersionedFooter::V0(_) => 0, - VersionedFooter::UnknownVersion { version, .. } => *version, - } - } - pub fn crc(&self) -> Option { match self { - VersionedFooter::V0(crc) => Some(*crc), + VersionedFooter::V1 { crc32, .. } => Some(*crc32), VersionedFooter::UnknownVersion { .. } => None, } } @@ -211,10 +205,13 @@ impl Write for FooterProxy { impl TerminatingWrite for FooterProxy { fn terminate_ref(&mut self, _: AntiCallToken) -> io::Result<()> { - let crc = self.hasher.take().unwrap().finalize(); - let footer = Footer::new(VersionedFooter::V0(crc)).to_bytes(); + let crc32 = self.hasher.take().unwrap().finalize(); + let footer = Footer::new(VersionedFooter::V1 { + crc32, + store_compression: crate::store::COMPRESSION.to_string(), + }); let mut writer = self.writer.take().unwrap(); - writer.write_all(&footer)?; + footer.append_footer(&mut writer)?; writer.terminate() } } @@ -222,56 +219,121 @@ impl TerminatingWrite for FooterProxy { #[cfg(test)] mod tests { + use super::CrcHashU32; + use super::FooterProxy; + use crate::common::BinarySerializable; use crate::directory::footer::{Footer, VersionedFooter}; + use crate::directory::TerminatingWrite; + use byteorder::{ByteOrder, LittleEndian}; use regex::Regex; + #[test] + fn test_versioned_footer() { + let mut vec = Vec::new(); + let footer_proxy = FooterProxy::new(&mut vec); + assert!(footer_proxy.terminate().is_ok()); + assert_eq!(vec.len(), 167); + let footer = Footer::deserialize(&mut &vec[..]).unwrap(); + if let VersionedFooter::V1 { + crc32: _, + store_compression, + } = footer.versioned_footer + { + assert_eq!(store_compression, crate::store::COMPRESSION); + } else { + panic!("Versioned footer should be V1."); + } + assert_eq!(&footer.version, crate::version()); + } + #[test] fn test_serialize_deserialize_footer() { - let crc = 123456; - let footer = Footer::new(VersionedFooter::V0(crc)); - let footer_bytes = footer.to_bytes(); - assert_eq!(Footer::from_bytes(&footer_bytes).unwrap(), footer); + let mut buffer = Vec::new(); + let crc32 = 123456u32; + let footer: Footer = Footer::new(VersionedFooter::V1 { + crc32, + store_compression: "lz4".to_string(), + }); + footer.serialize(&mut buffer).unwrap(); + let footer_deser = Footer::deserialize(&mut &buffer[..]).unwrap(); + assert_eq!(footer_deser, footer); } #[test] fn footer_length() { - // test to make sure the ascii art in the doc-strings is correct - let crc = 1111111 as u32; - let versioned_footer = VersionedFooter::V0(crc); - assert_eq!(versioned_footer.size(), 8); + let crc32 = 1111111u32; + let versioned_footer = VersionedFooter::V1 { + crc32, + store_compression: "lz4".to_string(), + }; + let mut buf = Vec::new(); + versioned_footer.serialize(&mut buf).unwrap(); + assert_eq!(buf.len(), 13); let footer = Footer::new(versioned_footer); let regex_ptn = Regex::new( "tantivy v[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.{0,10}, index_format v[0-9]{1,5}", ) .unwrap(); - assert!(regex_ptn.find(&footer.meta).is_some()); + assert!(regex_ptn.is_match(&footer.meta)); } #[test] fn versioned_footer_from_bytes() { - use byteorder::{ByteOrder, LittleEndian}; - let v_footer_bytes = vec![0, 0, 0, 0, 12, 35, 89, 18]; - let versioned_footer = VersionedFooter::from_bytes(&v_footer_bytes).unwrap(); - let expected_versioned_footer = - VersionedFooter::V0(LittleEndian::read_u32(&[12, 35, 89, 18])); - assert_eq!(versioned_footer, expected_versioned_footer); - - assert_eq!(versioned_footer.to_bytes(), v_footer_bytes); - } - - #[should_panic(expected = "Unsupported index should never get serialized")] - #[test] - fn versioned_footer_panic() { - use byteorder::{ByteOrder, LittleEndian}; - let v_footer_bytes = vec![1; 8]; - let versioned_footer = VersionedFooter::from_bytes(&v_footer_bytes).unwrap(); - let expected_version = LittleEndian::read_u32(&[1, 1, 1, 1]); - let expected_versioned_footer = VersionedFooter::UnknownVersion { - version: expected_version, - size: v_footer_bytes.len() as u32, + let v_footer_bytes = vec![ + // versionned footer length + 12 | 128, + // index format version + 1, + 0, + 0, + 0, + // crc 32 + 12, + 35, + 89, + 18, + // compression format + 3 | 128, + b'l', + b'z', + b'4', + ]; + let mut cursor = &v_footer_bytes[..]; + let versioned_footer = VersionedFooter::deserialize(&mut cursor).unwrap(); + assert!(cursor.is_empty()); + let expected_crc: u32 = LittleEndian::read_u32(&v_footer_bytes[5..9]) as CrcHashU32; + let expected_versioned_footer: VersionedFooter = VersionedFooter::V1 { + crc32: expected_crc, + store_compression: "lz4".to_string(), }; assert_eq!(versioned_footer, expected_versioned_footer); + let mut buffer = Vec::new(); + assert!(versioned_footer.serialize(&mut buffer).is_ok()); + assert_eq!(&v_footer_bytes[..], &buffer[..]); + } - versioned_footer.to_bytes(); + #[test] + fn versioned_footer_panic() { + let v_footer_bytes = vec![6u8 | 128u8, 3u8, 0u8, 0u8, 1u8, 0u8, 0u8]; + let mut b = &v_footer_bytes[..]; + let versioned_footer = VersionedFooter::deserialize(&mut b).unwrap(); + assert!(b.is_empty()); + let expected_versioned_footer = VersionedFooter::UnknownVersion; + assert_eq!(versioned_footer, expected_versioned_footer); + let mut buf = Vec::new(); + assert!(versioned_footer.serialize(&mut buf).is_err()); + } + + #[test] + #[cfg(not(feature = "lz4"))] + fn compression_mismatch() { + let crc32 = 1111111u32; + let versioned_footer = VersionedFooter::V1 { + crc32, + store_compression: "lz4".to_string(), + }; + let footer = Footer::new(versioned_footer); + let res = footer.is_compatible(); + assert!(res.is_err()); } } diff --git a/src/directory/managed_directory.rs b/src/directory/managed_directory.rs index 8351136f7..1874119d5 100644 --- a/src/directory/managed_directory.rs +++ b/src/directory/managed_directory.rs @@ -9,7 +9,7 @@ use crate::directory::{ReadOnlySource, WritePtr}; use crate::directory::{WatchCallback, WatchHandle}; use crate::error::DataCorruption; use crate::Directory; -use crate::Result; + use crc32fast::Hasher; use serde_json; use std::collections::HashSet; @@ -65,7 +65,7 @@ fn save_managed_paths( impl ManagedDirectory { /// Wraps a directory as managed directory. - pub fn wrap(directory: Dir) -> Result { + pub fn wrap(directory: Dir) -> crate::Result { match directory.atomic_read(&MANAGED_FILEPATH) { Ok(data) => { let managed_files_json = String::from_utf8_lossy(&data); @@ -88,6 +88,11 @@ impl ManagedDirectory { meta_informations: Arc::default(), }), Err(OpenReadError::IOError(e)) => Err(From::from(e)), + Err(OpenReadError::IncompatibleIndex(incompatibility)) => { + // For the moment, this should never happen `meta.json` + // do not have any footer and cannot detect incompatibility. + Err(crate::TantivyError::IncompatibleIndex(incompatibility)) + } } } @@ -261,8 +266,9 @@ impl ManagedDirectory { impl Directory for ManagedDirectory { fn open_read(&self, path: &Path) -> result::Result { let read_only_source = self.directory.open_read(path)?; - let (_footer, reader) = Footer::extract_footer(read_only_source) + let (footer, reader) = Footer::extract_footer(read_only_source) .map_err(|err| IOError::with_path(path.to_path_buf(), err))?; + footer.is_compatible()?; Ok(reader) } @@ -409,6 +415,8 @@ mod tests_mmap_specific { write.write_all(&[3u8, 4u8, 5u8]).unwrap(); write.terminate().unwrap(); + let read_source = managed_directory.open_read(test_path2).unwrap(); + assert_eq!(read_source.as_slice(), &[3u8, 4u8, 5u8]); assert!(managed_directory.list_damaged().unwrap().is_empty()); let mut corrupted_path = tempdir_path.clone(); diff --git a/src/directory/mod.rs b/src/directory/mod.rs index ceabbc3cc..c7a836909 100644 --- a/src/directory/mod.rs +++ b/src/directory/mod.rs @@ -81,6 +81,13 @@ impl TerminatingWrite for BufWriter { } } +#[cfg(test)] +impl<'a> TerminatingWrite for &'a mut Vec { + fn terminate_ref(&mut self, _a: AntiCallToken) -> io::Result<()> { + self.flush() + } +} + /// Write object for Directory. /// /// `WritePtr` are required to implement both Write diff --git a/src/directory/read_only_source.rs b/src/directory/read_only_source.rs index 1a49fc0aa..9949b9e77 100644 --- a/src/directory/read_only_source.rs +++ b/src/directory/read_only_source.rs @@ -70,6 +70,12 @@ impl ReadOnlySource { (left, right) } + /// Splits into 2 `ReadOnlySource`, at the offset `end - right_len`. + pub fn split_from_end(self, right_len: usize) -> (ReadOnlySource, ReadOnlySource) { + let left_len = self.len() - right_len; + self.split(left_len) + } + /// Creates a ReadOnlySource that is just a /// view over a slice of the data. /// diff --git a/src/error.rs b/src/error.rs index ac6d96216..c4752141b 100644 --- a/src/error.rs +++ b/src/error.rs @@ -2,8 +2,8 @@ use std::io; -use crate::directory::error::LockError; use crate::directory::error::{IOError, OpenDirectoryError, OpenReadError, OpenWriteError}; +use crate::directory::error::{Incompatibility, LockError}; use crate::fastfield::FastFieldNotAvailableError; use crate::query; use crate::schema; @@ -80,6 +80,9 @@ pub enum TantivyError { /// System error. (e.g.: We failed spawning a new thread) #[fail(display = "System error.'{}'", _0)] SystemError(String), + /// Index incompatible with current version of tantivy + #[fail(display = "{:?}", _0)] + IncompatibleIndex(Incompatibility), } impl From for TantivyError { @@ -129,6 +132,9 @@ impl From for TantivyError { match error { OpenReadError::FileDoesNotExist(filepath) => TantivyError::PathDoesNotExist(filepath), OpenReadError::IOError(io_error) => TantivyError::IOError(io_error), + OpenReadError::IncompatibleIndex(incompatibility) => { + TantivyError::IncompatibleIndex(incompatibility) + } } } } diff --git a/src/lib.rs b/src/lib.rs old mode 100755 new mode 100644 index e0328d200..de3f2705a --- a/src/lib.rs +++ b/src/lib.rs @@ -160,7 +160,6 @@ pub use self::snippet::{Snippet, SnippetGenerator}; mod docset; pub use self::docset::{DocSet, SkipResult}; - pub use crate::common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64}; pub use crate::core::SegmentComponent; pub use crate::core::{Index, IndexMeta, Searcher, Segment, SegmentId, SegmentMeta}; @@ -170,11 +169,58 @@ pub use crate::indexer::IndexWriter; pub use crate::postings::Postings; pub use crate::reader::LeasedItem; pub use crate::schema::{Document, Term}; +use std::fmt; -/// Expose the current version of tantivy, as well -/// whether it was compiled with the simd compression. -pub fn version() -> &'static str { - env!("CARGO_PKG_VERSION") +use once_cell::sync::Lazy; + +/// Index format version. +const INDEX_FORMAT_VERSION: u32 = 1; + +/// Structure version for the index. +#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct Version { + major: u32, + minor: u32, + patch: u32, + index_format_version: u32, + store_compression: String, +} + +impl fmt::Debug for Version { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.to_string()) + } +} + +static VERSION: Lazy = Lazy::new(|| Version { + major: env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(), + minor: env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(), + patch: env!("CARGO_PKG_VERSION_PATCH").parse().unwrap(), + index_format_version: INDEX_FORMAT_VERSION, + store_compression: crate::store::COMPRESSION.to_string(), +}); + +impl ToString for Version { + fn to_string(&self) -> String { + format!( + "tantivy v{}.{}.{}, index_format v{}, store_compression: {}", + self.major, self.minor, self.patch, self.index_format_version, self.store_compression + ) + } +} + +static VERSION_STRING: Lazy = Lazy::new(|| VERSION.to_string()); + +/// Expose the current version of tantivy as found in Cargo.toml during compilation. +/// eg. "0.11.0" as well as the compression scheme used in the docstore. +pub fn version() -> &'static Version { + &VERSION +} + +/// Exposes the complete version of tantivy as found in Cargo.toml during compilation as a string. +/// eg. "tantivy v0.11.0, index_format v1, store_compression: lz4". +pub fn version_string() -> &'static str { + VERSION_STRING.as_str() } /// Defines tantivy's merging strategy @@ -287,6 +333,18 @@ mod tests { sample_with_seed(n, ratio, 4) } + #[test] + #[cfg(not(feature = "lz4"))] + fn test_version_string() { + use regex::Regex; + let regex_ptn = Regex::new( + "tantivy v[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.{0,10}, index_format v[0-9]{1,5}", + ) + .unwrap(); + let version = super::version().to_string(); + assert!(regex_ptn.find(&version).is_some()); + } + #[test] #[cfg(feature = "mmap")] fn test_indexing() { diff --git a/src/positions/reader.rs b/src/positions/reader.rs index 5c845c0b3..6737f4bbc 100644 --- a/src/positions/reader.rs +++ b/src/positions/reader.rs @@ -36,11 +36,10 @@ struct Positions { impl Positions { pub fn new(position_source: ReadOnlySource, skip_source: ReadOnlySource) -> Positions { - let skip_len = skip_source.len(); - let (body, footer) = skip_source.split(skip_len - u32::SIZE_IN_BYTES); + let (body, footer) = skip_source.split_from_end(u32::SIZE_IN_BYTES); let num_long_skips = u32::deserialize(&mut footer.as_slice()).expect("Index corrupted"); - let body_split = body.len() - u64::SIZE_IN_BYTES * (num_long_skips as usize); - let (skip_source, long_skip_source) = body.split(body_split); + let (skip_source, long_skip_source) = + body.split_from_end(u64::SIZE_IN_BYTES * (num_long_skips as usize)); Positions { bit_packer: BitPacker4x::new(), skip_source, diff --git a/src/store/compression_lz4.rs b/src/store/compression_lz4.rs index 533985327..07a1c9127 100644 --- a/src/store/compression_lz4.rs +++ b/src/store/compression_lz4.rs @@ -1,7 +1,10 @@ -extern crate lz4; - use std::io::{self, Read, Write}; +/// Name of the compression scheme used in the doc store. +/// +/// This name is appended to the version string of tantivy. +pub const COMPRESSION: &'static str = "lz4"; + pub fn compress(uncompressed: &[u8], compressed: &mut Vec) -> io::Result<()> { compressed.clear(); let mut encoder = lz4::EncoderBuilder::new().build(compressed)?; diff --git a/src/store/compression_snap.rs b/src/store/compression_snap.rs index b5cc2ded9..6a3182997 100644 --- a/src/store/compression_snap.rs +++ b/src/store/compression_snap.rs @@ -2,6 +2,11 @@ use snap; use std::io::{self, Read, Write}; +/// Name of the compression scheme used in the doc store. +/// +/// This name is appended to the version string of tantivy. +pub const COMPRESSION: &str = "snappy"; + pub fn compress(uncompressed: &[u8], compressed: &mut Vec) -> io::Result<()> { compressed.clear(); let mut encoder = snap::Writer::new(compressed); diff --git a/src/store/mod.rs b/src/store/mod.rs index 4f1347654..bb15301b7 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -42,12 +42,16 @@ pub use self::writer::StoreWriter; #[cfg(feature = "lz4")] mod compression_lz4; #[cfg(feature = "lz4")] -use self::compression_lz4::*; +pub use self::compression_lz4::COMPRESSION; +#[cfg(feature = "lz4")] +use self::compression_lz4::{compress, decompress}; #[cfg(not(feature = "lz4"))] mod compression_snap; #[cfg(not(feature = "lz4"))] -use self::compression_snap::*; +pub use self::compression_snap::COMPRESSION; +#[cfg(not(feature = "lz4"))] +use self::compression_snap::{compress, decompress}; #[cfg(test)] pub mod tests {