Make error handling richer in Footer::is_compatible (#724)

* WIP implemented is_compatible

hide Footer::from_bytes from public consumption - only found Footer::extract
used outside the module

Add a new error type for IncompatibleIndex
add a prototypical call to footer.is_compatible() in ManagedDirectory::open_read
to make sure we error before reading it further

* Make error handling more ergonomic

Add an error subtype for OpenReadError and converters to TantivyError

* Remove an unnecessary assert

it's follower by the same check that Errors instead of panicking

* Correct the compatibility check logic

Leave a defensive versioned footer check to make sure we add new logic handling
when we add possible footer versions

Restricted VersionedFooter::from_bytes to be used inside the crate only

remove a half-baked test

* WIP.

* Return an error if index incompatible - closes #662

Enrich the error type with incompatibility

Change return type to Result<bool, TantivyError>, instead of bool

Add an Incompatibility enum that enriches the IncompatibleIndex error variant
with information, which then allows us to generate a developer-friendly hint how
to upgrade library version or switch feature flags for a different compression
algorithm

Updated changelog

Change the signature of is_compatible

Added documentation to the Incompatibility
Added a conditional test on a Footer with lz4 erroring
This commit is contained in:
petr-tik
2019-12-14 00:14:33 +00:00
committed by Paul Masurel
parent 392abec420
commit 431c187a60
12 changed files with 423 additions and 205 deletions

View File

@@ -9,6 +9,7 @@ Tantivy 0.11.0
- API change around `Box<BoxableTokenizer>`. See detail in #629
- Avoid rebuilding Regex automaton whenever a regex query is reused. #639 (@brainlock)
- Add footer with some metadata to index files. #605 (@fdb-hiroshima)
- Add a method to check the compatibility of the footer in the index with the running version of tantivy (@petr-tik)
- TopDocs collector: ensure stable sorting on equal score. #671 (@brainlock)
- Added handling of pre-tokenized text fields (#642), which will enable users to
load tokens created outside tantivy. See usage in examples/pre_tokenized_text. (@kkoziara)
@@ -16,10 +17,11 @@ Tantivy 0.11.0
## How to update?
- The index format is changed. You are required to reindex your data to use tantivy 0.11.
- `Box<dyn BoxableTokenizer>` has been replaced by a `BoxedTokenizer` struct.
- Regex are now compiled when the `RegexQuery` instance is built. As a result, it can now return
an error and handling the `Result` is required.
- `tantivy::version()` now returns a `Version` object. This object implements `ToString()`
Tantivy 0.10.2
=====================

View File

@@ -1,3 +1,4 @@
use crate::Version;
use std::error::Error as StdError;
use std::fmt;
use std::io;
@@ -156,6 +157,65 @@ impl StdError for OpenWriteError {
}
}
/// Type of index incompatibility between the library and the index found on disk
/// Used to catch and provide a hint to solve this incompatibility issue
pub enum Incompatibility {
/// This library cannot decompress the index found on disk
CompressionMismatch {
/// Compression algorithm used by the current version of tantivy
library_compression_format: String,
/// Compression algorithm that was used to serialise the index
index_compression_format: String,
},
/// The index format found on disk isn't supported by this version of the library
IndexMismatch {
/// Version used by the library
library_version: Version,
/// Version the index was built with
index_version: Version,
},
}
impl fmt::Debug for Incompatibility {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
match self {
Incompatibility::CompressionMismatch {
library_compression_format,
index_compression_format,
} => {
let err = format!(
"Library was compiled with {:?} compression, index was compressed with {:?}",
library_compression_format, index_compression_format
);
let advice = format!(
"Change the feature flag to {:?} and rebuild the library",
index_compression_format
);
write!(f, "{}. {}", err, advice)?;
}
Incompatibility::IndexMismatch {
library_version,
index_version,
} => {
let err = format!(
"Library version: {}, index version: {}",
library_version.index_format_version, index_version.index_format_version
);
// TODO make a more useful error message
// include the version range that supports this index_format_version
let advice = format!(
"Change tantivy to a version compatible with index format {} (e.g. {}.{}.x) \
and rebuild your project.",
index_version.index_format_version, index_version.major, index_version.minor
);
write!(f, "{}. {}", err, advice)?;
}
}
Ok(())
}
}
/// Error that may occur when accessing a file read
#[derive(Debug)]
pub enum OpenReadError {
@@ -164,6 +224,8 @@ pub enum OpenReadError {
/// Any kind of IO error that happens when
/// interacting with the underlying IO device.
IOError(IOError),
/// This library doesn't support the index version found on disk
IncompatibleIndex(Incompatibility),
}
impl From<IOError> for OpenReadError {
@@ -183,19 +245,9 @@ impl fmt::Display for OpenReadError {
"an io error occurred while opening a file for reading: '{}'",
err
),
}
}
}
impl StdError for OpenReadError {
fn description(&self) -> &str {
"error occurred while opening a file for reading"
}
fn cause(&self) -> Option<&dyn StdError> {
match *self {
OpenReadError::FileDoesNotExist(_) => None,
OpenReadError::IOError(ref err) => Some(err),
OpenReadError::IncompatibleIndex(ref footer) => {
write!(f, "Incompatible index format: {:?}", footer)
}
}
}
}
@@ -216,6 +268,12 @@ impl From<IOError> for DeleteError {
}
}
impl From<Incompatibility> for OpenReadError {
fn from(incompatibility: Incompatibility) -> Self {
OpenReadError::IncompatibleIndex(incompatibility)
}
}
impl fmt::Display for DeleteError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {

View File

@@ -1,181 +1,175 @@
use crate::common::{BinarySerializable, CountingWriter, FixedSize, VInt};
use crate::directory::error::Incompatibility;
use crate::directory::read_only_source::ReadOnlySource;
use crate::directory::{AntiCallToken, TerminatingWrite};
use byteorder::{ByteOrder, LittleEndian};
use crate::Version;
use byteorder::{ByteOrder, LittleEndian, WriteBytesExt};
use crc32fast::Hasher;
use std::io;
use std::io::Write;
const COMMON_FOOTER_SIZE: usize = 4 * 5;
type CrcHashU32 = u32;
#[derive(Debug, Clone, PartialEq)]
pub struct Footer {
pub tantivy_version: (u32, u32, u32),
pub version: Version,
pub meta: String,
pub versioned_footer: VersionedFooter,
}
/// Serialises the footer to a byte-array
/// - versioned_footer_len : 4 bytes
///- versioned_footer: variable bytes
/// - meta_len: 4 bytes
/// - meta: variable bytes
/// - version_len: 4 bytes
/// - version json: variable bytes
impl BinarySerializable for Footer {
fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
BinarySerializable::serialize(&self.versioned_footer, writer)?;
BinarySerializable::serialize(&self.meta, writer)?;
let version_string =
serde_json::to_string(&self.version).map_err(|_err| io::ErrorKind::InvalidInput)?;
BinarySerializable::serialize(&version_string, writer)?;
Ok(())
}
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
let versioned_footer = VersionedFooter::deserialize(reader)?;
let meta = String::deserialize(reader)?;
let version_json = String::deserialize(reader)?;
let version = serde_json::from_str(&version_json)?;
Ok(Footer {
version,
meta,
versioned_footer,
})
}
}
impl Footer {
pub fn new(versioned_footer: VersionedFooter) -> Self {
let tantivy_version = (
env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(),
env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(),
env!("CARGO_PKG_VERSION_PATCH").parse().unwrap(),
);
let version = crate::VERSION.clone();
let meta = version.to_string();
Footer {
tantivy_version,
meta: format!(
"tantivy v{}.{}.{}, index_format v{}",
tantivy_version.0,
tantivy_version.1,
tantivy_version.2,
versioned_footer.version()
),
version,
meta,
versioned_footer,
}
}
/// Serialises the footer to a byte-array
/// [ versioned_footer | meta | common_footer ]
/// [ 0..8 | 8..32 | 32..52 ]
pub fn to_bytes(&self) -> Vec<u8> {
let mut res = self.versioned_footer.to_bytes();
res.extend_from_slice(self.meta.as_bytes());
let len = res.len();
res.resize(len + COMMON_FOOTER_SIZE, 0);
let mut common_footer = &mut res[len..];
LittleEndian::write_u32(&mut common_footer, self.meta.len() as u32);
LittleEndian::write_u32(&mut common_footer[4..], self.tantivy_version.0);
LittleEndian::write_u32(&mut common_footer[8..], self.tantivy_version.1);
LittleEndian::write_u32(&mut common_footer[12..], self.tantivy_version.2);
LittleEndian::write_u32(&mut common_footer[16..], (len + COMMON_FOOTER_SIZE) as u32);
res
}
pub fn from_bytes(data: &[u8]) -> Result<Self, io::Error> {
let len = data.len();
if len < COMMON_FOOTER_SIZE + 4 {
// 4 bytes for index version, stored in versioned footer
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
format!("File corrupted. The footer len must be over 24, while the entire file len is {}", len)
)
);
}
let size = LittleEndian::read_u32(&data[len - 4..]) as usize;
if len < size as usize {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
format!(
"The footer len is {}, while the entire file len is {}. \
Your index is either corrupted or was built using a tantivy version\
anterior to 0.11.",
size, len
),
));
}
let footer = &data[len - size as usize..];
let meta_len = LittleEndian::read_u32(&footer[size - COMMON_FOOTER_SIZE..]) as usize;
let tantivy_major = LittleEndian::read_u32(&footer[size - 16..]);
let tantivy_minor = LittleEndian::read_u32(&footer[size - 12..]);
let tantivy_patch = LittleEndian::read_u32(&footer[size - 8..]);
Ok(Footer {
tantivy_version: (tantivy_major, tantivy_minor, tantivy_patch),
meta: String::from_utf8_lossy(
&footer[size - meta_len - COMMON_FOOTER_SIZE..size - COMMON_FOOTER_SIZE],
)
.into_owned(),
versioned_footer: VersionedFooter::from_bytes(
&footer[..size - meta_len - COMMON_FOOTER_SIZE],
)?,
})
pub fn append_footer<W: io::Write>(&self, mut write: &mut W) -> io::Result<()> {
let mut counting_write = CountingWriter::wrap(&mut write);
self.serialize(&mut counting_write)?;
let written_len = counting_write.written_bytes();
write.write_u32::<LittleEndian>(written_len as u32)?;
Ok(())
}
pub fn extract_footer(source: ReadOnlySource) -> Result<(Footer, ReadOnlySource), io::Error> {
let footer = Footer::from_bytes(source.as_slice())?;
let reader = source.slice_to(source.as_slice().len() - footer.size());
Ok((footer, reader))
if source.len() < 4 {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
format!(
"File corrupted. The file is smaller than 4 bytes (len={}).",
source.len()
),
));
}
let (body_footer, footer_len_bytes) = source.split_from_end(u32::SIZE_IN_BYTES);
let footer_len = LittleEndian::read_u32(footer_len_bytes.as_slice()) as usize;
let body_len = body_footer.len() - footer_len;
let (body, footer_data) = body_footer.split(body_len);
let mut cursor = footer_data.as_slice();
let footer = Footer::deserialize(&mut cursor)?;
Ok((footer, body))
}
pub fn size(&self) -> usize {
self.versioned_footer.size() as usize + self.meta.len() + COMMON_FOOTER_SIZE
/// Confirms that the index will be read correctly by this version of tantivy
/// Has to be called after `extract_footer` to make sure it's not accessing uninitialised memory
pub fn is_compatible(&self) -> Result<(), Incompatibility> {
let library_version = crate::version();
match &self.versioned_footer {
VersionedFooter::V1 {
crc32: _crc,
store_compression: compression,
} => {
if &library_version.store_compression != compression {
return Err(Incompatibility::CompressionMismatch {
library_compression_format: library_version.store_compression.to_string(),
index_compression_format: compression.to_string(),
});
}
Ok(())
}
VersionedFooter::UnknownVersion => Err(Incompatibility::IndexMismatch {
library_version: library_version.clone(),
index_version: self.version.clone(),
}),
}
}
}
/// Footer that includes a crc32 hash that enables us to checksum files in the index
#[derive(Debug, Clone, PartialEq)]
pub enum VersionedFooter {
UnknownVersion { version: u32, size: u32 },
V0(CrcHashU32), // crc
UnknownVersion,
V1 {
crc32: CrcHashU32,
store_compression: String,
},
}
impl BinarySerializable for VersionedFooter {
fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
let mut buf = Vec::new();
match self {
VersionedFooter::V1 {
crc32,
store_compression: compression,
} => {
// Serializes a valid `VersionedFooter` or panics if the version is unknown
// [ version | crc_hash | compression_mode ]
// [ 0..4 | 4..8 | variable ]
BinarySerializable::serialize(&1u32, &mut buf)?;
BinarySerializable::serialize(crc32, &mut buf)?;
BinarySerializable::serialize(compression, &mut buf)?;
}
VersionedFooter::UnknownVersion => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"Cannot serialize an unknown versioned footer ",
));
}
}
BinarySerializable::serialize(&VInt(buf.len() as u64), writer)?;
writer.write_all(&buf[..])?;
Ok(())
}
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
let len = VInt::deserialize(reader)?.0 as usize;
let mut buf = vec![0u8; len];
reader.read_exact(&mut buf[..])?;
let mut cursor = &buf[..];
let version = u32::deserialize(&mut cursor)?;
if version == 1 {
let crc32 = u32::deserialize(&mut cursor)?;
let compression = String::deserialize(&mut cursor)?;
Ok(VersionedFooter::V1 {
crc32,
store_compression: compression,
})
} else {
Ok(VersionedFooter::UnknownVersion)
}
}
}
impl VersionedFooter {
/// Serializes a valid `VersionedFooter` or panics if the version is unknown
/// [ version | crc_hash ]
/// [ 0..4 | 4..8 ]
pub fn to_bytes(&self) -> Vec<u8> {
match self {
VersionedFooter::V0(crc) => {
let mut buf = [0u8; 8];
LittleEndian::write_u32(&mut buf[0..4], 0);
LittleEndian::write_u32(&mut buf[4..8], *crc);
buf.to_vec()
}
VersionedFooter::UnknownVersion { .. } => {
panic!("Unsupported index should never get serialized");
}
}
}
pub fn from_bytes(footer: &[u8]) -> Result<Self, io::Error> {
assert!(footer.len() >= 4);
if footer.len() < 4 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Footer should be more than 4 bytes.",
));
}
let version = LittleEndian::read_u32(footer);
match version {
// the first 4 bytes should be zeroed out thus returning a `0`
0 => {
if footer.len() != 8 {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
format!(
"File corrupted. The versioned footer len is {}, while it should be 8",
footer.len()
),
));
}
Ok(VersionedFooter::V0(LittleEndian::read_u32(&footer[4..])))
}
version => Ok(VersionedFooter::UnknownVersion {
version,
size: footer.len() as u32,
}),
}
}
pub fn size(&self) -> u32 {
match self {
VersionedFooter::V0(_) => 8,
VersionedFooter::UnknownVersion { size, .. } => *size,
}
}
pub fn version(&self) -> u32 {
match self {
VersionedFooter::V0(_) => 0,
VersionedFooter::UnknownVersion { version, .. } => *version,
}
}
pub fn crc(&self) -> Option<CrcHashU32> {
match self {
VersionedFooter::V0(crc) => Some(*crc),
VersionedFooter::V1 { crc32, .. } => Some(*crc32),
VersionedFooter::UnknownVersion { .. } => None,
}
}
@@ -211,10 +205,13 @@ impl<W: TerminatingWrite> Write for FooterProxy<W> {
impl<W: TerminatingWrite> TerminatingWrite for FooterProxy<W> {
fn terminate_ref(&mut self, _: AntiCallToken) -> io::Result<()> {
let crc = self.hasher.take().unwrap().finalize();
let footer = Footer::new(VersionedFooter::V0(crc)).to_bytes();
let crc32 = self.hasher.take().unwrap().finalize();
let footer = Footer::new(VersionedFooter::V1 {
crc32,
store_compression: crate::store::COMPRESSION.to_string(),
});
let mut writer = self.writer.take().unwrap();
writer.write_all(&footer)?;
footer.append_footer(&mut writer)?;
writer.terminate()
}
}
@@ -222,56 +219,121 @@ impl<W: TerminatingWrite> TerminatingWrite for FooterProxy<W> {
#[cfg(test)]
mod tests {
use super::CrcHashU32;
use super::FooterProxy;
use crate::common::BinarySerializable;
use crate::directory::footer::{Footer, VersionedFooter};
use crate::directory::TerminatingWrite;
use byteorder::{ByteOrder, LittleEndian};
use regex::Regex;
#[test]
fn test_versioned_footer() {
let mut vec = Vec::new();
let footer_proxy = FooterProxy::new(&mut vec);
assert!(footer_proxy.terminate().is_ok());
assert_eq!(vec.len(), 167);
let footer = Footer::deserialize(&mut &vec[..]).unwrap();
if let VersionedFooter::V1 {
crc32: _,
store_compression,
} = footer.versioned_footer
{
assert_eq!(store_compression, crate::store::COMPRESSION);
} else {
panic!("Versioned footer should be V1.");
}
assert_eq!(&footer.version, crate::version());
}
#[test]
fn test_serialize_deserialize_footer() {
let crc = 123456;
let footer = Footer::new(VersionedFooter::V0(crc));
let footer_bytes = footer.to_bytes();
assert_eq!(Footer::from_bytes(&footer_bytes).unwrap(), footer);
let mut buffer = Vec::new();
let crc32 = 123456u32;
let footer: Footer = Footer::new(VersionedFooter::V1 {
crc32,
store_compression: "lz4".to_string(),
});
footer.serialize(&mut buffer).unwrap();
let footer_deser = Footer::deserialize(&mut &buffer[..]).unwrap();
assert_eq!(footer_deser, footer);
}
#[test]
fn footer_length() {
// test to make sure the ascii art in the doc-strings is correct
let crc = 1111111 as u32;
let versioned_footer = VersionedFooter::V0(crc);
assert_eq!(versioned_footer.size(), 8);
let crc32 = 1111111u32;
let versioned_footer = VersionedFooter::V1 {
crc32,
store_compression: "lz4".to_string(),
};
let mut buf = Vec::new();
versioned_footer.serialize(&mut buf).unwrap();
assert_eq!(buf.len(), 13);
let footer = Footer::new(versioned_footer);
let regex_ptn = Regex::new(
"tantivy v[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.{0,10}, index_format v[0-9]{1,5}",
)
.unwrap();
assert!(regex_ptn.find(&footer.meta).is_some());
assert!(regex_ptn.is_match(&footer.meta));
}
#[test]
fn versioned_footer_from_bytes() {
use byteorder::{ByteOrder, LittleEndian};
let v_footer_bytes = vec![0, 0, 0, 0, 12, 35, 89, 18];
let versioned_footer = VersionedFooter::from_bytes(&v_footer_bytes).unwrap();
let expected_versioned_footer =
VersionedFooter::V0(LittleEndian::read_u32(&[12, 35, 89, 18]));
assert_eq!(versioned_footer, expected_versioned_footer);
assert_eq!(versioned_footer.to_bytes(), v_footer_bytes);
}
#[should_panic(expected = "Unsupported index should never get serialized")]
#[test]
fn versioned_footer_panic() {
use byteorder::{ByteOrder, LittleEndian};
let v_footer_bytes = vec![1; 8];
let versioned_footer = VersionedFooter::from_bytes(&v_footer_bytes).unwrap();
let expected_version = LittleEndian::read_u32(&[1, 1, 1, 1]);
let expected_versioned_footer = VersionedFooter::UnknownVersion {
version: expected_version,
size: v_footer_bytes.len() as u32,
let v_footer_bytes = vec![
// versionned footer length
12 | 128,
// index format version
1,
0,
0,
0,
// crc 32
12,
35,
89,
18,
// compression format
3 | 128,
b'l',
b'z',
b'4',
];
let mut cursor = &v_footer_bytes[..];
let versioned_footer = VersionedFooter::deserialize(&mut cursor).unwrap();
assert!(cursor.is_empty());
let expected_crc: u32 = LittleEndian::read_u32(&v_footer_bytes[5..9]) as CrcHashU32;
let expected_versioned_footer: VersionedFooter = VersionedFooter::V1 {
crc32: expected_crc,
store_compression: "lz4".to_string(),
};
assert_eq!(versioned_footer, expected_versioned_footer);
let mut buffer = Vec::new();
assert!(versioned_footer.serialize(&mut buffer).is_ok());
assert_eq!(&v_footer_bytes[..], &buffer[..]);
}
versioned_footer.to_bytes();
#[test]
fn versioned_footer_panic() {
let v_footer_bytes = vec![6u8 | 128u8, 3u8, 0u8, 0u8, 1u8, 0u8, 0u8];
let mut b = &v_footer_bytes[..];
let versioned_footer = VersionedFooter::deserialize(&mut b).unwrap();
assert!(b.is_empty());
let expected_versioned_footer = VersionedFooter::UnknownVersion;
assert_eq!(versioned_footer, expected_versioned_footer);
let mut buf = Vec::new();
assert!(versioned_footer.serialize(&mut buf).is_err());
}
#[test]
#[cfg(not(feature = "lz4"))]
fn compression_mismatch() {
let crc32 = 1111111u32;
let versioned_footer = VersionedFooter::V1 {
crc32,
store_compression: "lz4".to_string(),
};
let footer = Footer::new(versioned_footer);
let res = footer.is_compatible();
assert!(res.is_err());
}
}

View File

@@ -9,7 +9,7 @@ use crate::directory::{ReadOnlySource, WritePtr};
use crate::directory::{WatchCallback, WatchHandle};
use crate::error::DataCorruption;
use crate::Directory;
use crate::Result;
use crc32fast::Hasher;
use serde_json;
use std::collections::HashSet;
@@ -65,7 +65,7 @@ fn save_managed_paths(
impl ManagedDirectory {
/// Wraps a directory as managed directory.
pub fn wrap<Dir: Directory>(directory: Dir) -> Result<ManagedDirectory> {
pub fn wrap<Dir: Directory>(directory: Dir) -> crate::Result<ManagedDirectory> {
match directory.atomic_read(&MANAGED_FILEPATH) {
Ok(data) => {
let managed_files_json = String::from_utf8_lossy(&data);
@@ -88,6 +88,11 @@ impl ManagedDirectory {
meta_informations: Arc::default(),
}),
Err(OpenReadError::IOError(e)) => Err(From::from(e)),
Err(OpenReadError::IncompatibleIndex(incompatibility)) => {
// For the moment, this should never happen `meta.json`
// do not have any footer and cannot detect incompatibility.
Err(crate::TantivyError::IncompatibleIndex(incompatibility))
}
}
}
@@ -261,8 +266,9 @@ impl ManagedDirectory {
impl Directory for ManagedDirectory {
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
let read_only_source = self.directory.open_read(path)?;
let (_footer, reader) = Footer::extract_footer(read_only_source)
let (footer, reader) = Footer::extract_footer(read_only_source)
.map_err(|err| IOError::with_path(path.to_path_buf(), err))?;
footer.is_compatible()?;
Ok(reader)
}
@@ -409,6 +415,8 @@ mod tests_mmap_specific {
write.write_all(&[3u8, 4u8, 5u8]).unwrap();
write.terminate().unwrap();
let read_source = managed_directory.open_read(test_path2).unwrap();
assert_eq!(read_source.as_slice(), &[3u8, 4u8, 5u8]);
assert!(managed_directory.list_damaged().unwrap().is_empty());
let mut corrupted_path = tempdir_path.clone();

View File

@@ -81,6 +81,13 @@ impl<W: TerminatingWrite> TerminatingWrite for BufWriter<W> {
}
}
#[cfg(test)]
impl<'a> TerminatingWrite for &'a mut Vec<u8> {
fn terminate_ref(&mut self, _a: AntiCallToken) -> io::Result<()> {
self.flush()
}
}
/// Write object for Directory.
///
/// `WritePtr` are required to implement both Write

View File

@@ -70,6 +70,12 @@ impl ReadOnlySource {
(left, right)
}
/// Splits into 2 `ReadOnlySource`, at the offset `end - right_len`.
pub fn split_from_end(self, right_len: usize) -> (ReadOnlySource, ReadOnlySource) {
let left_len = self.len() - right_len;
self.split(left_len)
}
/// Creates a ReadOnlySource that is just a
/// view over a slice of the data.
///

View File

@@ -2,8 +2,8 @@
use std::io;
use crate::directory::error::LockError;
use crate::directory::error::{IOError, OpenDirectoryError, OpenReadError, OpenWriteError};
use crate::directory::error::{Incompatibility, LockError};
use crate::fastfield::FastFieldNotAvailableError;
use crate::query;
use crate::schema;
@@ -80,6 +80,9 @@ pub enum TantivyError {
/// System error. (e.g.: We failed spawning a new thread)
#[fail(display = "System error.'{}'", _0)]
SystemError(String),
/// Index incompatible with current version of tantivy
#[fail(display = "{:?}", _0)]
IncompatibleIndex(Incompatibility),
}
impl From<DataCorruption> for TantivyError {
@@ -129,6 +132,9 @@ impl From<OpenReadError> for TantivyError {
match error {
OpenReadError::FileDoesNotExist(filepath) => TantivyError::PathDoesNotExist(filepath),
OpenReadError::IOError(io_error) => TantivyError::IOError(io_error),
OpenReadError::IncompatibleIndex(incompatibility) => {
TantivyError::IncompatibleIndex(incompatibility)
}
}
}
}

68
src/lib.rs Executable file → Normal file
View File

@@ -160,7 +160,6 @@ pub use self::snippet::{Snippet, SnippetGenerator};
mod docset;
pub use self::docset::{DocSet, SkipResult};
pub use crate::common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
pub use crate::core::SegmentComponent;
pub use crate::core::{Index, IndexMeta, Searcher, Segment, SegmentId, SegmentMeta};
@@ -170,11 +169,58 @@ pub use crate::indexer::IndexWriter;
pub use crate::postings::Postings;
pub use crate::reader::LeasedItem;
pub use crate::schema::{Document, Term};
use std::fmt;
/// Expose the current version of tantivy, as well
/// whether it was compiled with the simd compression.
pub fn version() -> &'static str {
env!("CARGO_PKG_VERSION")
use once_cell::sync::Lazy;
/// Index format version.
const INDEX_FORMAT_VERSION: u32 = 1;
/// Structure version for the index.
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Version {
major: u32,
minor: u32,
patch: u32,
index_format_version: u32,
store_compression: String,
}
impl fmt::Debug for Version {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.to_string())
}
}
static VERSION: Lazy<Version> = Lazy::new(|| Version {
major: env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(),
minor: env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(),
patch: env!("CARGO_PKG_VERSION_PATCH").parse().unwrap(),
index_format_version: INDEX_FORMAT_VERSION,
store_compression: crate::store::COMPRESSION.to_string(),
});
impl ToString for Version {
fn to_string(&self) -> String {
format!(
"tantivy v{}.{}.{}, index_format v{}, store_compression: {}",
self.major, self.minor, self.patch, self.index_format_version, self.store_compression
)
}
}
static VERSION_STRING: Lazy<String> = Lazy::new(|| VERSION.to_string());
/// Expose the current version of tantivy as found in Cargo.toml during compilation.
/// eg. "0.11.0" as well as the compression scheme used in the docstore.
pub fn version() -> &'static Version {
&VERSION
}
/// Exposes the complete version of tantivy as found in Cargo.toml during compilation as a string.
/// eg. "tantivy v0.11.0, index_format v1, store_compression: lz4".
pub fn version_string() -> &'static str {
VERSION_STRING.as_str()
}
/// Defines tantivy's merging strategy
@@ -287,6 +333,18 @@ mod tests {
sample_with_seed(n, ratio, 4)
}
#[test]
#[cfg(not(feature = "lz4"))]
fn test_version_string() {
use regex::Regex;
let regex_ptn = Regex::new(
"tantivy v[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.{0,10}, index_format v[0-9]{1,5}",
)
.unwrap();
let version = super::version().to_string();
assert!(regex_ptn.find(&version).is_some());
}
#[test]
#[cfg(feature = "mmap")]
fn test_indexing() {

View File

@@ -36,11 +36,10 @@ struct Positions {
impl Positions {
pub fn new(position_source: ReadOnlySource, skip_source: ReadOnlySource) -> Positions {
let skip_len = skip_source.len();
let (body, footer) = skip_source.split(skip_len - u32::SIZE_IN_BYTES);
let (body, footer) = skip_source.split_from_end(u32::SIZE_IN_BYTES);
let num_long_skips = u32::deserialize(&mut footer.as_slice()).expect("Index corrupted");
let body_split = body.len() - u64::SIZE_IN_BYTES * (num_long_skips as usize);
let (skip_source, long_skip_source) = body.split(body_split);
let (skip_source, long_skip_source) =
body.split_from_end(u64::SIZE_IN_BYTES * (num_long_skips as usize));
Positions {
bit_packer: BitPacker4x::new(),
skip_source,

View File

@@ -1,7 +1,10 @@
extern crate lz4;
use std::io::{self, Read, Write};
/// Name of the compression scheme used in the doc store.
///
/// This name is appended to the version string of tantivy.
pub const COMPRESSION: &'static str = "lz4";
pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
compressed.clear();
let mut encoder = lz4::EncoderBuilder::new().build(compressed)?;

View File

@@ -2,6 +2,11 @@ use snap;
use std::io::{self, Read, Write};
/// Name of the compression scheme used in the doc store.
///
/// This name is appended to the version string of tantivy.
pub const COMPRESSION: &str = "snappy";
pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
compressed.clear();
let mut encoder = snap::Writer::new(compressed);

View File

@@ -42,12 +42,16 @@ pub use self::writer::StoreWriter;
#[cfg(feature = "lz4")]
mod compression_lz4;
#[cfg(feature = "lz4")]
use self::compression_lz4::*;
pub use self::compression_lz4::COMPRESSION;
#[cfg(feature = "lz4")]
use self::compression_lz4::{compress, decompress};
#[cfg(not(feature = "lz4"))]
mod compression_snap;
#[cfg(not(feature = "lz4"))]
use self::compression_snap::*;
pub use self::compression_snap::COMPRESSION;
#[cfg(not(feature = "lz4"))]
use self::compression_snap::{compress, decompress};
#[cfg(test)]
pub mod tests {