mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 16:22:55 +00:00
Compare commits
8 Commits
column-rea
...
petr-tik-n
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
073cc99254 | ||
|
|
367c2102fc | ||
|
|
a57e3a8cc4 | ||
|
|
be26d54719 | ||
|
|
99d2b54273 | ||
|
|
5804a2c7e5 | ||
|
|
b34e660c84 | ||
|
|
660471a063 |
@@ -16,10 +16,11 @@ Tantivy 0.11.0
|
||||
|
||||
## How to update?
|
||||
|
||||
- The index format is changed. You are required to reindex your data to use tantivy 0.11.
|
||||
- `Box<dyn BoxableTokenizer>` has been replaced by a `BoxedTokenizer` struct.
|
||||
- Regex are now compiled when the `RegexQuery` instance is built. As a result, it can now return
|
||||
an error and handling the `Result` is required.
|
||||
|
||||
- `tantivy::version()` now returns a `Version` object. This object implements `ToString()`
|
||||
|
||||
Tantivy 0.10.2
|
||||
=====================
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use crate::directory::footer::Footer;
|
||||
use std::error::Error as StdError;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
@@ -164,6 +165,8 @@ pub enum OpenReadError {
|
||||
/// Any kind of IO error that happens when
|
||||
/// interacting with the underlying IO device.
|
||||
IOError(IOError),
|
||||
/// The version of tantivy trying to read the index doesn't support its format
|
||||
IncompatibleIndex(Footer),
|
||||
}
|
||||
|
||||
impl From<IOError> for OpenReadError {
|
||||
@@ -183,6 +186,9 @@ impl fmt::Display for OpenReadError {
|
||||
"an io error occurred while opening a file for reading: '{}'",
|
||||
err
|
||||
),
|
||||
OpenReadError::IncompatibleIndex(ref footer) => {
|
||||
write!(f, "Incompatible index format: {:?}", footer)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -196,6 +202,7 @@ impl StdError for OpenReadError {
|
||||
match *self {
|
||||
OpenReadError::FileDoesNotExist(_) => None,
|
||||
OpenReadError::IOError(ref err) => Some(err),
|
||||
OpenReadError::IncompatibleIndex(_) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,181 +1,174 @@
|
||||
use crate::common::{BinarySerializable, CountingWriter, FixedSize, VInt};
|
||||
use crate::directory::read_only_source::ReadOnlySource;
|
||||
use crate::directory::{AntiCallToken, TerminatingWrite};
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
use crate::Version;
|
||||
use byteorder::{ByteOrder, LittleEndian, WriteBytesExt};
|
||||
use crc32fast::Hasher;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
|
||||
const COMMON_FOOTER_SIZE: usize = 4 * 5;
|
||||
|
||||
type CrcHashU32 = u32;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Footer {
|
||||
pub tantivy_version: (u32, u32, u32),
|
||||
pub version: Version,
|
||||
pub meta: String,
|
||||
pub versioned_footer: VersionedFooter,
|
||||
}
|
||||
|
||||
/// Serialises the footer to a byte-array
|
||||
/// - versioned_footer_len : 4bytes
|
||||
///- versioned_footer: variable bytes
|
||||
/// - meta_len: 4 bytes
|
||||
/// - meta: variable bytes
|
||||
/// - version_len: 4bytes
|
||||
/// - version json: variable bytes
|
||||
impl BinarySerializable for Footer {
|
||||
fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
BinarySerializable::serialize(&self.versioned_footer, writer)?;
|
||||
BinarySerializable::serialize(&self.meta, writer)?;
|
||||
let version_string =
|
||||
serde_json::to_string(&self.version).map_err(|_err| io::ErrorKind::InvalidInput)?;
|
||||
BinarySerializable::serialize(&version_string, writer)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
|
||||
let versioned_footer = VersionedFooter::deserialize(reader)?;
|
||||
let meta = String::deserialize(reader)?;
|
||||
let version_json = String::deserialize(reader)?;
|
||||
let version = serde_json::from_str(&version_json)?;
|
||||
Ok(Footer {
|
||||
version,
|
||||
meta,
|
||||
versioned_footer,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Footer {
|
||||
pub fn new(versioned_footer: VersionedFooter) -> Self {
|
||||
let tantivy_version = (
|
||||
env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(),
|
||||
env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(),
|
||||
env!("CARGO_PKG_VERSION_PATCH").parse().unwrap(),
|
||||
);
|
||||
let version = crate::VERSION.clone();
|
||||
let meta = version.to_string();
|
||||
Footer {
|
||||
tantivy_version,
|
||||
meta: format!(
|
||||
"tantivy v{}.{}.{}, index_format v{}",
|
||||
tantivy_version.0,
|
||||
tantivy_version.1,
|
||||
tantivy_version.2,
|
||||
versioned_footer.version()
|
||||
),
|
||||
version,
|
||||
meta,
|
||||
versioned_footer,
|
||||
}
|
||||
}
|
||||
|
||||
/// Serialises the footer to a byte-array
|
||||
/// [ versioned_footer | meta | common_footer ]
|
||||
/// [ 0..8 | 8..32 | 32..52 ]
|
||||
pub fn to_bytes(&self) -> Vec<u8> {
|
||||
let mut res = self.versioned_footer.to_bytes();
|
||||
res.extend_from_slice(self.meta.as_bytes());
|
||||
let len = res.len();
|
||||
res.resize(len + COMMON_FOOTER_SIZE, 0);
|
||||
let mut common_footer = &mut res[len..];
|
||||
LittleEndian::write_u32(&mut common_footer, self.meta.len() as u32);
|
||||
LittleEndian::write_u32(&mut common_footer[4..], self.tantivy_version.0);
|
||||
LittleEndian::write_u32(&mut common_footer[8..], self.tantivy_version.1);
|
||||
LittleEndian::write_u32(&mut common_footer[12..], self.tantivy_version.2);
|
||||
LittleEndian::write_u32(&mut common_footer[16..], (len + COMMON_FOOTER_SIZE) as u32);
|
||||
res
|
||||
}
|
||||
|
||||
pub fn from_bytes(data: &[u8]) -> Result<Self, io::Error> {
|
||||
let len = data.len();
|
||||
if len < COMMON_FOOTER_SIZE + 4 {
|
||||
// 4 bytes for index version, stored in versioned footer
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
format!("File corrupted. The footer len must be over 24, while the entire file len is {}", len)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
let size = LittleEndian::read_u32(&data[len - 4..]) as usize;
|
||||
if len < size as usize {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
format!(
|
||||
"The footer len is {}, while the entire file len is {}. \
|
||||
Your index is either corrupted or was built using a tantivy version\
|
||||
anterior to 0.11.",
|
||||
size, len
|
||||
),
|
||||
));
|
||||
}
|
||||
let footer = &data[len - size as usize..];
|
||||
let meta_len = LittleEndian::read_u32(&footer[size - COMMON_FOOTER_SIZE..]) as usize;
|
||||
let tantivy_major = LittleEndian::read_u32(&footer[size - 16..]);
|
||||
let tantivy_minor = LittleEndian::read_u32(&footer[size - 12..]);
|
||||
let tantivy_patch = LittleEndian::read_u32(&footer[size - 8..]);
|
||||
Ok(Footer {
|
||||
tantivy_version: (tantivy_major, tantivy_minor, tantivy_patch),
|
||||
meta: String::from_utf8_lossy(
|
||||
&footer[size - meta_len - COMMON_FOOTER_SIZE..size - COMMON_FOOTER_SIZE],
|
||||
)
|
||||
.into_owned(),
|
||||
versioned_footer: VersionedFooter::from_bytes(
|
||||
&footer[..size - meta_len - COMMON_FOOTER_SIZE],
|
||||
)?,
|
||||
})
|
||||
pub fn append_footer<W: io::Write>(&self, mut write: &mut W) -> io::Result<()> {
|
||||
let mut counting_write = CountingWriter::wrap(&mut write);
|
||||
self.serialize(&mut counting_write)?;
|
||||
let written_len = counting_write.written_bytes();
|
||||
write.write_u32::<LittleEndian>(written_len as u32)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn extract_footer(source: ReadOnlySource) -> Result<(Footer, ReadOnlySource), io::Error> {
|
||||
let footer = Footer::from_bytes(source.as_slice())?;
|
||||
let reader = source.slice_to(source.as_slice().len() - footer.size());
|
||||
Ok((footer, reader))
|
||||
if source.len() < 4 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
format!(
|
||||
"File corrupted. The file is smaller than 4 bytes (len={}).",
|
||||
source.len()
|
||||
),
|
||||
));
|
||||
}
|
||||
let (body_footer, footer_len_bytes) = source.split_from_end(u32::SIZE_IN_BYTES);
|
||||
let footer_len = LittleEndian::read_u32(footer_len_bytes.as_slice()) as usize;
|
||||
let body_len = body_footer.len() - footer_len;
|
||||
let (body, footer_data) = body_footer.split(body_len);
|
||||
let mut cursor = footer_data.as_slice();
|
||||
let footer = Footer::deserialize(&mut cursor)?;
|
||||
Ok((footer, body))
|
||||
}
|
||||
|
||||
pub fn size(&self) -> usize {
|
||||
self.versioned_footer.size() as usize + self.meta.len() + COMMON_FOOTER_SIZE
|
||||
/// Confirms that the index will be read correctly by this version of tantivy
|
||||
/// Has to be called after `extract_footer` to make sure it's not accessing uninitialised memory
|
||||
pub fn is_compatible(&self) -> bool {
|
||||
let version = &*crate::VERSION;
|
||||
match &self.versioned_footer {
|
||||
VersionedFooter::V1 {
|
||||
crc32: _,
|
||||
compression,
|
||||
} => {
|
||||
return compression == &version.store_compression;
|
||||
}
|
||||
VersionedFooter::UnknownVersion { version: _ } => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Footer that includes a crc32 hash that enables us to checksum files in the index
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum VersionedFooter {
|
||||
UnknownVersion { version: u32, size: u32 },
|
||||
V0(CrcHashU32), // crc
|
||||
UnknownVersion {
|
||||
version: u32,
|
||||
},
|
||||
V1 {
|
||||
crc32: CrcHashU32,
|
||||
compression: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl BinarySerializable for VersionedFooter {
|
||||
fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
let mut buf = Vec::new();
|
||||
BinarySerializable::serialize(&self.version(), &mut buf)?;
|
||||
match self {
|
||||
VersionedFooter::V1 { crc32, compression } => {
|
||||
// Serializes a valid `VersionedFooter` or panics if the version is unknown
|
||||
// [ version | crc_hash | compression_mode ]
|
||||
// [ 0..4 | 4..8 | variable ]
|
||||
BinarySerializable::serialize(crc32, &mut buf)?;
|
||||
BinarySerializable::serialize(compression, &mut buf)?;
|
||||
}
|
||||
VersionedFooter::UnknownVersion { version: _ } => {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"Cannot serialize an unknown versionned footer ",
|
||||
));
|
||||
}
|
||||
}
|
||||
BinarySerializable::serialize(&VInt(buf.len() as u64), writer)?;
|
||||
writer.write_all(&buf[..])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
|
||||
let len = VInt::deserialize(reader)?.0 as usize;
|
||||
let mut buf = vec![0u8; len];
|
||||
reader.read_exact(&mut buf[..])?;
|
||||
let mut cursor = &buf[..];
|
||||
let version = u32::deserialize(&mut cursor)?;
|
||||
if version == 1 {
|
||||
let crc32 = u32::deserialize(&mut cursor)?;
|
||||
let compression = String::deserialize(&mut cursor)?;
|
||||
Ok(VersionedFooter::V1 { crc32, compression })
|
||||
} else {
|
||||
Ok(VersionedFooter::UnknownVersion { version })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl VersionedFooter {
|
||||
/// Serializes a valid `VersionedFooter` or panics if the version is unknown
|
||||
/// [ version | crc_hash ]
|
||||
/// [ 0..4 | 4..8 ]
|
||||
pub fn to_bytes(&self) -> Vec<u8> {
|
||||
match self {
|
||||
VersionedFooter::V0(crc) => {
|
||||
let mut buf = [0u8; 8];
|
||||
LittleEndian::write_u32(&mut buf[0..4], 0);
|
||||
LittleEndian::write_u32(&mut buf[4..8], *crc);
|
||||
buf.to_vec()
|
||||
}
|
||||
VersionedFooter::UnknownVersion { .. } => {
|
||||
panic!("Unsupported index should never get serialized");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_bytes(footer: &[u8]) -> Result<Self, io::Error> {
|
||||
assert!(footer.len() >= 4);
|
||||
if footer.len() < 4 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"Footer should be more than 4 bytes.",
|
||||
));
|
||||
}
|
||||
let version = LittleEndian::read_u32(footer);
|
||||
match version {
|
||||
// the first 4 bytes should be zeroed out thus returning a `0`
|
||||
0 => {
|
||||
if footer.len() != 8 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
format!(
|
||||
"File corrupted. The versioned footer len is {}, while it should be 8",
|
||||
footer.len()
|
||||
),
|
||||
));
|
||||
}
|
||||
Ok(VersionedFooter::V0(LittleEndian::read_u32(&footer[4..])))
|
||||
}
|
||||
version => Ok(VersionedFooter::UnknownVersion {
|
||||
version,
|
||||
size: footer.len() as u32,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u32 {
|
||||
match self {
|
||||
VersionedFooter::V0(_) => 8,
|
||||
VersionedFooter::UnknownVersion { size, .. } => *size,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn version(&self) -> u32 {
|
||||
match self {
|
||||
VersionedFooter::V0(_) => 0,
|
||||
VersionedFooter::V1 {
|
||||
crc32: _,
|
||||
compression: _,
|
||||
} => 1u32,
|
||||
VersionedFooter::UnknownVersion { version, .. } => *version,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn crc(&self) -> Option<CrcHashU32> {
|
||||
match self {
|
||||
VersionedFooter::V0(crc) => Some(*crc),
|
||||
VersionedFooter::V1 {
|
||||
crc32,
|
||||
compression: _,
|
||||
} => Some(*crc32),
|
||||
VersionedFooter::UnknownVersion { .. } => None,
|
||||
}
|
||||
}
|
||||
@@ -211,10 +204,13 @@ impl<W: TerminatingWrite> Write for FooterProxy<W> {
|
||||
|
||||
impl<W: TerminatingWrite> TerminatingWrite for FooterProxy<W> {
|
||||
fn terminate_ref(&mut self, _: AntiCallToken) -> io::Result<()> {
|
||||
let crc = self.hasher.take().unwrap().finalize();
|
||||
let footer = Footer::new(VersionedFooter::V0(crc)).to_bytes();
|
||||
let crc32 = self.hasher.take().unwrap().finalize();
|
||||
let footer = Footer::new(VersionedFooter::V1 {
|
||||
crc32,
|
||||
compression: crate::store::COMPRESSION.to_string(),
|
||||
});
|
||||
let mut writer = self.writer.take().unwrap();
|
||||
writer.write_all(&footer)?;
|
||||
footer.append_footer(&mut writer)?;
|
||||
writer.terminate()
|
||||
}
|
||||
}
|
||||
@@ -222,56 +218,106 @@ impl<W: TerminatingWrite> TerminatingWrite for FooterProxy<W> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::CrcHashU32;
|
||||
use super::FooterProxy;
|
||||
use crate::common::BinarySerializable;
|
||||
use crate::directory::footer::{Footer, VersionedFooter};
|
||||
use crate::directory::TerminatingWrite;
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
use regex::Regex;
|
||||
|
||||
#[test]
|
||||
fn test_footer_version() {
|
||||
let mut vec = Vec::new();
|
||||
let footer_proxy = FooterProxy::new(&mut vec);
|
||||
assert!(footer_proxy.terminate().is_ok());
|
||||
assert_eq!(vec.len(), 167);
|
||||
let footer = Footer::deserialize(&mut &vec[..]).unwrap();
|
||||
assert_eq!(
|
||||
footer.versioned_footer.version(),
|
||||
crate::INDEX_FORMAT_VERSION
|
||||
);
|
||||
assert_eq!(&footer.version, crate::version());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_deserialize_footer() {
|
||||
let crc = 123456;
|
||||
let footer = Footer::new(VersionedFooter::V0(crc));
|
||||
let footer_bytes = footer.to_bytes();
|
||||
assert_eq!(Footer::from_bytes(&footer_bytes).unwrap(), footer);
|
||||
let mut buffer = Vec::new();
|
||||
let crc32 = 123456u32;
|
||||
let footer: Footer = Footer::new(VersionedFooter::V1 {
|
||||
crc32,
|
||||
compression: "lz4".to_string(),
|
||||
});
|
||||
footer.serialize(&mut buffer).unwrap();
|
||||
let footer_deser = Footer::deserialize(&mut &buffer[..]).unwrap();
|
||||
assert_eq!(footer_deser, footer);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn footer_length() {
|
||||
// test to make sure the ascii art in the doc-strings is correct
|
||||
let crc = 1111111 as u32;
|
||||
let versioned_footer = VersionedFooter::V0(crc);
|
||||
assert_eq!(versioned_footer.size(), 8);
|
||||
let crc32 = 1111111u32;
|
||||
let versioned_footer = VersionedFooter::V1 {
|
||||
crc32,
|
||||
compression: "lz4".to_string(),
|
||||
};
|
||||
let mut buf = Vec::new();
|
||||
versioned_footer.serialize(&mut buf).unwrap();
|
||||
assert_eq!(buf.len(), 13);
|
||||
let footer = Footer::new(versioned_footer);
|
||||
let regex_ptn = Regex::new(
|
||||
"tantivy v[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.{0,10}, index_format v[0-9]{1,5}",
|
||||
)
|
||||
.unwrap();
|
||||
assert!(regex_ptn.find(&footer.meta).is_some());
|
||||
assert!(regex_ptn.is_match(&footer.meta));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn versioned_footer_from_bytes() {
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
let v_footer_bytes = vec![0, 0, 0, 0, 12, 35, 89, 18];
|
||||
let versioned_footer = VersionedFooter::from_bytes(&v_footer_bytes).unwrap();
|
||||
let expected_versioned_footer =
|
||||
VersionedFooter::V0(LittleEndian::read_u32(&[12, 35, 89, 18]));
|
||||
assert_eq!(versioned_footer, expected_versioned_footer);
|
||||
|
||||
assert_eq!(versioned_footer.to_bytes(), v_footer_bytes);
|
||||
}
|
||||
|
||||
#[should_panic(expected = "Unsupported index should never get serialized")]
|
||||
#[test]
|
||||
fn versioned_footer_panic() {
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
let v_footer_bytes = vec![1; 8];
|
||||
let versioned_footer = VersionedFooter::from_bytes(&v_footer_bytes).unwrap();
|
||||
let expected_version = LittleEndian::read_u32(&[1, 1, 1, 1]);
|
||||
let expected_versioned_footer = VersionedFooter::UnknownVersion {
|
||||
version: expected_version,
|
||||
size: v_footer_bytes.len() as u32,
|
||||
let v_footer_bytes = vec![
|
||||
// versionned footer length
|
||||
12 | 128,
|
||||
// index format version
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
// crc 32
|
||||
12,
|
||||
35,
|
||||
89,
|
||||
18,
|
||||
// compression format
|
||||
3 | 128,
|
||||
b'l',
|
||||
b'z',
|
||||
b'4',
|
||||
];
|
||||
let mut cursor = &v_footer_bytes[..];
|
||||
let versioned_footer = VersionedFooter::deserialize(&mut cursor).unwrap();
|
||||
assert!(cursor.is_empty());
|
||||
let expected_crc: u32 = LittleEndian::read_u32(&v_footer_bytes[5..9]) as CrcHashU32;
|
||||
let expected_versioned_footer: VersionedFooter = VersionedFooter::V1 {
|
||||
crc32: expected_crc,
|
||||
compression: "lz4".to_string(),
|
||||
};
|
||||
assert_eq!(versioned_footer, expected_versioned_footer);
|
||||
let mut buffer = Vec::new();
|
||||
assert!(versioned_footer.serialize(&mut buffer).is_ok());
|
||||
assert_eq!(&v_footer_bytes[..], &buffer[..]);
|
||||
}
|
||||
|
||||
versioned_footer.to_bytes();
|
||||
#[test]
|
||||
fn versioned_footer_panic() {
|
||||
let v_footer_bytes = vec![6u8 | 128u8, 3u8, 0u8, 0u8, 1u8, 0u8, 0u8];
|
||||
let mut b = &v_footer_bytes[..];
|
||||
let versioned_footer = VersionedFooter::deserialize(&mut b).unwrap();
|
||||
assert!(b.is_empty());
|
||||
let expected_versioned_footer = VersionedFooter::UnknownVersion {
|
||||
version: 16_777_219u32,
|
||||
};
|
||||
assert_eq!(versioned_footer, expected_versioned_footer);
|
||||
let mut buf = Vec::new();
|
||||
assert!(versioned_footer.serialize(&mut buf).is_err());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ use crate::directory::{WatchCallback, WatchHandle};
|
||||
use crate::error::DataCorruption;
|
||||
use crate::Directory;
|
||||
use crate::Result;
|
||||
|
||||
use crc32fast::Hasher;
|
||||
use serde_json;
|
||||
use std::collections::HashSet;
|
||||
@@ -88,6 +89,9 @@ impl ManagedDirectory {
|
||||
meta_informations: Arc::default(),
|
||||
}),
|
||||
Err(OpenReadError::IOError(e)) => Err(From::from(e)),
|
||||
Err(OpenReadError::IncompatibleIndex(footer)) => {
|
||||
Err(crate::Error::IncompatibleIndex(format!("{:?}", footer)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -261,8 +265,11 @@ impl ManagedDirectory {
|
||||
impl Directory for ManagedDirectory {
|
||||
fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
|
||||
let read_only_source = self.directory.open_read(path)?;
|
||||
let (_footer, reader) = Footer::extract_footer(read_only_source)
|
||||
let (footer, reader) = Footer::extract_footer(read_only_source)
|
||||
.map_err(|err| IOError::with_path(path.to_path_buf(), err))?;
|
||||
if !footer.is_compatible() {
|
||||
return Err(OpenReadError::IncompatibleIndex(footer));
|
||||
}
|
||||
Ok(reader)
|
||||
}
|
||||
|
||||
@@ -409,6 +416,8 @@ mod tests_mmap_specific {
|
||||
write.write_all(&[3u8, 4u8, 5u8]).unwrap();
|
||||
write.terminate().unwrap();
|
||||
|
||||
let read_source = managed_directory.open_read(test_path2).unwrap();
|
||||
assert_eq!(read_source.as_slice(), &[3u8, 4u8, 5u8]);
|
||||
assert!(managed_directory.list_damaged().unwrap().is_empty());
|
||||
|
||||
let mut corrupted_path = tempdir_path.clone();
|
||||
|
||||
@@ -78,6 +78,13 @@ impl<W: TerminatingWrite> TerminatingWrite for BufWriter<W> {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl<'a> TerminatingWrite for &'a mut Vec<u8> {
|
||||
fn terminate_ref(&mut self, _a: AntiCallToken) -> io::Result<()> {
|
||||
self.flush()
|
||||
}
|
||||
}
|
||||
|
||||
/// Write object for Directory.
|
||||
///
|
||||
/// `WritePtr` are required to implement both Write
|
||||
|
||||
@@ -70,6 +70,12 @@ impl ReadOnlySource {
|
||||
(left, right)
|
||||
}
|
||||
|
||||
/// Splits into 2 `ReadOnlySource`, at the offset `end - right_len`.
|
||||
pub fn split_from_end(self, right_len: usize) -> (ReadOnlySource, ReadOnlySource) {
|
||||
let left_len = self.len() - right_len;
|
||||
self.split(left_len)
|
||||
}
|
||||
|
||||
/// Creates a ReadOnlySource that is just a
|
||||
/// view over a slice of the data.
|
||||
///
|
||||
|
||||
@@ -80,6 +80,12 @@ pub enum TantivyError {
|
||||
/// System error. (e.g.: We failed spawning a new thread)
|
||||
#[fail(display = "System error.'{}'", _0)]
|
||||
SystemError(String),
|
||||
/// Index incompatible with current version of tantivy
|
||||
#[fail(
|
||||
display = "Current version of tantivy is incompatible with index version: '{}'",
|
||||
_0
|
||||
)]
|
||||
IncompatibleIndex(String),
|
||||
}
|
||||
|
||||
impl From<DataCorruption> for TantivyError {
|
||||
@@ -129,6 +135,9 @@ impl From<OpenReadError> for TantivyError {
|
||||
match error {
|
||||
OpenReadError::FileDoesNotExist(filepath) => TantivyError::PathDoesNotExist(filepath),
|
||||
OpenReadError::IOError(io_error) => TantivyError::IOError(io_error),
|
||||
OpenReadError::IncompatibleIndex(tantivy_err) => {
|
||||
TantivyError::IncompatibleIndex(format!("{:?}", tantivy_err))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
67
src/lib.rs
Executable file → Normal file
67
src/lib.rs
Executable file → Normal file
@@ -160,7 +160,6 @@ pub use self::snippet::{Snippet, SnippetGenerator};
|
||||
|
||||
mod docset;
|
||||
pub use self::docset::{DocSet, SkipResult};
|
||||
|
||||
pub use crate::common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
|
||||
pub use crate::core::SegmentComponent;
|
||||
pub use crate::core::{Index, IndexMeta, Searcher, Segment, SegmentId, SegmentMeta};
|
||||
@@ -170,11 +169,57 @@ pub use crate::indexer::IndexWriter;
|
||||
pub use crate::postings::Postings;
|
||||
pub use crate::reader::LeasedItem;
|
||||
pub use crate::schema::{Document, Term};
|
||||
use std::fmt;
|
||||
|
||||
/// Expose the current version of tantivy, as well
|
||||
/// whether it was compiled with the simd compression.
|
||||
pub fn version() -> &'static str {
|
||||
env!("CARGO_PKG_VERSION")
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
const INDEX_FORMAT_VERSION: u32 = 1;
|
||||
|
||||
/// Structure version for the index.
|
||||
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct Version {
|
||||
major: u32,
|
||||
minor: u32,
|
||||
patch: u32,
|
||||
index_format_version: u32,
|
||||
store_compression: String,
|
||||
}
|
||||
|
||||
impl fmt::Debug for Version {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
static VERSION: Lazy<Version> = Lazy::new(|| Version {
|
||||
major: env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(),
|
||||
minor: env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(),
|
||||
patch: env!("CARGO_PKG_VERSION_PATCH").parse().unwrap(),
|
||||
index_format_version: INDEX_FORMAT_VERSION,
|
||||
store_compression: crate::store::COMPRESSION.to_string(),
|
||||
});
|
||||
|
||||
impl ToString for Version {
|
||||
fn to_string(&self) -> String {
|
||||
format!(
|
||||
"tantivy v{}.{}.{}, index_format v{}, store_compression: {}",
|
||||
self.major, self.minor, self.patch, self.index_format_version, self.store_compression
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
static VERSION_STRING: Lazy<String> = Lazy::new(|| VERSION.to_string());
|
||||
|
||||
/// Expose the current version of tantivy as found in Cargo.toml during compilation.
|
||||
/// eg. "0.11.0" as well as the compression scheme used in the docstore.
|
||||
pub fn version() -> &'static Version {
|
||||
&VERSION
|
||||
}
|
||||
|
||||
/// Exposes the complete version of tantivy as found in Cargo.toml during compilation as a string.
|
||||
/// eg. "tantivy v0.11.0, index_format v1, store_compression: lz4".
|
||||
pub fn version_string() -> &'static str {
|
||||
VERSION_STRING.as_str()
|
||||
}
|
||||
|
||||
/// Defines tantivy's merging strategy
|
||||
@@ -287,6 +332,18 @@ mod tests {
|
||||
sample_with_seed(n, ratio, 4)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(not(feature = "lz4"))]
|
||||
fn test_version_string() {
|
||||
use regex::Regex;
|
||||
let regex_ptn = Regex::new(
|
||||
"tantivy v[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.{0,10}, index_format v[0-9]{1,5}",
|
||||
)
|
||||
.unwrap();
|
||||
let version = super::version().to_string();
|
||||
assert!(regex_ptn.find(&version).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "mmap")]
|
||||
fn test_indexing() {
|
||||
|
||||
@@ -36,11 +36,10 @@ struct Positions {
|
||||
|
||||
impl Positions {
|
||||
pub fn new(position_source: ReadOnlySource, skip_source: ReadOnlySource) -> Positions {
|
||||
let skip_len = skip_source.len();
|
||||
let (body, footer) = skip_source.split(skip_len - u32::SIZE_IN_BYTES);
|
||||
let (body, footer) = skip_source.split_from_end(u32::SIZE_IN_BYTES);
|
||||
let num_long_skips = u32::deserialize(&mut footer.as_slice()).expect("Index corrupted");
|
||||
let body_split = body.len() - u64::SIZE_IN_BYTES * (num_long_skips as usize);
|
||||
let (skip_source, long_skip_source) = body.split(body_split);
|
||||
let (skip_source, long_skip_source) =
|
||||
body.split_from_end(u64::SIZE_IN_BYTES * (num_long_skips as usize));
|
||||
Positions {
|
||||
bit_packer: BitPacker4x::new(),
|
||||
skip_source,
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
extern crate lz4;
|
||||
|
||||
use std::io::{self, Read, Write};
|
||||
|
||||
/// Name of the compression scheme used in the doc store.
|
||||
///
|
||||
/// This name is appended to the version string of tantivy.
|
||||
pub const COMPRESSION: &'static str = "lz4";
|
||||
|
||||
pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
|
||||
compressed.clear();
|
||||
let mut encoder = lz4::EncoderBuilder::new().build(compressed)?;
|
||||
|
||||
@@ -2,6 +2,11 @@ use snap;
|
||||
|
||||
use std::io::{self, Read, Write};
|
||||
|
||||
/// Name of the compression scheme used in the doc store.
|
||||
///
|
||||
/// This name is appended to the version string of tantivy.
|
||||
pub const COMPRESSION: &'static str = "snappy";
|
||||
|
||||
pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
|
||||
compressed.clear();
|
||||
let mut encoder = snap::Writer::new(compressed);
|
||||
|
||||
@@ -42,12 +42,16 @@ pub use self::writer::StoreWriter;
|
||||
#[cfg(feature = "lz4")]
|
||||
mod compression_lz4;
|
||||
#[cfg(feature = "lz4")]
|
||||
use self::compression_lz4::*;
|
||||
pub use self::compression_lz4::COMPRESSION;
|
||||
#[cfg(feature = "lz4")]
|
||||
use self::compression_lz4::{compress, decompress};
|
||||
|
||||
#[cfg(not(feature = "lz4"))]
|
||||
mod compression_snap;
|
||||
#[cfg(not(feature = "lz4"))]
|
||||
use self::compression_snap::*;
|
||||
pub use self::compression_snap::COMPRESSION;
|
||||
#[cfg(not(feature = "lz4"))]
|
||||
use self::compression_snap::{compress, decompress};
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
|
||||
Reference in New Issue
Block a user