Files
tantivy/src/store/mod.rs
petr-tik 431c187a60 Make error handling richer in Footer::is_compatible (#724)
* WIP implemented is_compatible

hide Footer::from_bytes from public consumption - only found Footer::extract
used outside the module

Add a new error type for IncompatibleIndex
add a prototypical call to footer.is_compatible() in ManagedDirectory::open_read
to make sure we error before reading it further

* Make error handling more ergonomic

Add an error subtype for OpenReadError and converters to TantivyError

* Remove an unnecessary assert

it's follower by the same check that Errors instead of panicking

* Correct the compatibility check logic

Leave a defensive versioned footer check to make sure we add new logic handling
when we add possible footer versions

Restricted VersionedFooter::from_bytes to be used inside the crate only

remove a half-baked test

* WIP.

* Return an error if index incompatible - closes #662

Enrich the error type with incompatibility

Change return type to Result<bool, TantivyError>, instead of bool

Add an Incompatibility enum that enriches the IncompatibleIndex error variant
with information, which then allows us to generate a developer-friendly hint how
to upgrade library version or switch feature flags for a different compression
algorithm

Updated changelog

Change the signature of is_compatible

Added documentation to the Incompatibility
Added a conditional test on a Footer with lz4 erroring
2019-12-14 09:14:33 +09:00

162 lines
5.4 KiB
Rust

/*!
Compressed/slow/row-oriented storage for documents.
A field needs to be marked as stored in the schema in
order to be handled in the `Store`.
Internally, documents (or rather their stored fields) are serialized to a buffer.
When the buffer exceeds 16K, the buffer is compressed using `LZ4`
and the resulting block is written to disk.
One can then request for a specific `DocId`.
A skip list helps navigating to the right block,
decompresses it entirely and returns the document within it.
If the last document requested was in the same block,
the reader is smart enough to avoid decompressing
the block a second time, but their is no real
*uncompressed block* cache.
A typical use case for the store is, once
the search result page has been computed, returning
the actual content of the 10 best document.
# Usage
Most users should not access the `StoreReader` directly
and should rely on either
- at the segment level, the
[`SegmentReader`'s `doc` method](../struct.SegmentReader.html#method.doc)
- at the index level, the
[`Searcher`'s `doc` method](../struct.Searcher.html#method.doc)
!*/
mod reader;
mod skiplist;
mod writer;
pub use self::reader::StoreReader;
pub use self::writer::StoreWriter;
#[cfg(feature = "lz4")]
mod compression_lz4;
#[cfg(feature = "lz4")]
pub use self::compression_lz4::COMPRESSION;
#[cfg(feature = "lz4")]
use self::compression_lz4::{compress, decompress};
#[cfg(not(feature = "lz4"))]
mod compression_snap;
#[cfg(not(feature = "lz4"))]
pub use self::compression_snap::COMPRESSION;
#[cfg(not(feature = "lz4"))]
use self::compression_snap::{compress, decompress};
#[cfg(test)]
pub mod tests {
use super::*;
use crate::directory::{Directory, RAMDirectory, WritePtr};
use crate::schema::Document;
use crate::schema::FieldValue;
use crate::schema::Schema;
use crate::schema::TextOptions;
use std::path::Path;
pub fn write_lorem_ipsum_store(writer: WritePtr, num_docs: usize) -> Schema {
let mut schema_builder = Schema::builder();
let field_body = schema_builder.add_text_field("body", TextOptions::default().set_stored());
let field_title =
schema_builder.add_text_field("title", TextOptions::default().set_stored());
let schema = schema_builder.build();
let lorem = String::from(
"Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed \
do eiusmod tempor incididunt ut labore et dolore magna aliqua. \
Ut enim ad minim veniam, quis nostrud exercitation ullamco \
laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure \
dolor in reprehenderit in voluptate velit esse cillum dolore eu \
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non \
proident, sunt in culpa qui officia deserunt mollit anim id est \
laborum.",
);
{
let mut store_writer = StoreWriter::new(writer);
for i in 0..num_docs {
let mut fields: Vec<FieldValue> = Vec::new();
{
let field_value = FieldValue::new(field_body, From::from(lorem.clone()));
fields.push(field_value);
}
{
let title_text = format!("Doc {}", i);
let field_value = FieldValue::new(field_title, From::from(title_text));
fields.push(field_value);
}
//let fields_refs: Vec<&FieldValue> = fields.iter().collect();
let doc = Document::from(fields);
store_writer.store(&doc).unwrap();
}
store_writer.close().unwrap();
}
schema
}
#[test]
fn test_store() {
let path = Path::new("store");
let mut directory = RAMDirectory::create();
let store_file = directory.open_write(path).unwrap();
let schema = write_lorem_ipsum_store(store_file, 1_000);
let field_title = schema.get_field("title").unwrap();
let store_source = directory.open_read(path).unwrap();
let store = StoreReader::from_source(store_source);
for i in 0..1_000 {
assert_eq!(
*store
.get(i)
.unwrap()
.get_first(field_title)
.unwrap()
.text()
.unwrap(),
format!("Doc {}", i)
);
}
}
}
#[cfg(all(test, feature = "unstable"))]
mod bench {
use super::tests::write_lorem_ipsum_store;
use crate::directory::Directory;
use crate::directory::RAMDirectory;
use crate::store::StoreReader;
use std::path::Path;
use test::Bencher;
#[bench]
#[cfg(feature = "mmap")]
fn bench_store_encode(b: &mut Bencher) {
let mut directory = RAMDirectory::create();
let path = Path::new("store");
b.iter(|| {
write_lorem_ipsum_store(directory.open_write(path).unwrap(), 1_000);
directory.delete(path).unwrap();
});
}
#[bench]
fn bench_store_decode(b: &mut Bencher) {
let mut directory = RAMDirectory::create();
let path = Path::new("store");
write_lorem_ipsum_store(directory.open_write(path).unwrap(), 1_000);
let store_source = directory.open_read(path).unwrap();
let store = StoreReader::from_source(store_source);
b.iter(|| {
store.get(12).unwrap();
});
}
}