store DateTime as nanoseconds in doc store (#2486)

* store DateTime as nanoseconds in doc store

The doc store DateTime was truncated to microseconds previously. This
removes this truncation, while still keeping backwards compatibility.

This is done by adding the trait `ConfigurableBinarySerializable`, which
works like `BinarySerializable`, but with a config that allows de/serialize
as different date time precision currently.

bump version format to 7.
add compat test to check the date time truncation.

* remove configurable binary serialize, add enum for doc store version

* test doc store version ord
This commit is contained in:
PSeitz
2024-10-18 10:50:20 +08:00
committed by GitHub
parent d152e29687
commit 2f2db16ec1
22 changed files with 246 additions and 89 deletions

View File

@@ -20,7 +20,7 @@ pub use datetime::{DateTime, DateTimePrecision};
pub use group_by::GroupByIteratorExtended;
pub use json_path_writer::JsonPathWriter;
pub use ownedbytes::{OwnedBytes, StableDeref};
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
pub use serialize::*;
pub use vint::{
read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt, VIntU128,
};

View File

@@ -74,14 +74,14 @@ impl FixedSize for () {
impl<T: BinarySerializable> BinarySerializable for Vec<T> {
fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
VInt(self.len() as u64).serialize(writer)?;
BinarySerializable::serialize(&VInt(self.len() as u64), writer)?;
for it in self {
it.serialize(writer)?;
}
Ok(())
}
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Vec<T>> {
let num_items = VInt::deserialize(reader)?.val();
let num_items = <VInt as BinarySerializable>::deserialize(reader)?.val();
let mut items: Vec<T> = Vec::with_capacity(num_items as usize);
for _ in 0..num_items {
let item = T::deserialize(reader)?;
@@ -236,12 +236,12 @@ impl FixedSize for bool {
impl BinarySerializable for String {
fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
let data: &[u8] = self.as_bytes();
VInt(data.len() as u64).serialize(writer)?;
BinarySerializable::serialize(&VInt(data.len() as u64), writer)?;
writer.write_all(data)
}
fn deserialize<R: Read>(reader: &mut R) -> io::Result<String> {
let string_length = VInt::deserialize(reader)?.val() as usize;
let string_length = <VInt as BinarySerializable>::deserialize(reader)?.val() as usize;
let mut result = String::with_capacity(string_length);
reader
.take(string_length as u64)
@@ -253,12 +253,12 @@ impl BinarySerializable for String {
impl<'a> BinarySerializable for Cow<'a, str> {
fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
let data: &[u8] = self.as_bytes();
VInt(data.len() as u64).serialize(writer)?;
BinarySerializable::serialize(&VInt(data.len() as u64), writer)?;
writer.write_all(data)
}
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Cow<'a, str>> {
let string_length = VInt::deserialize(reader)?.val() as usize;
let string_length = <VInt as BinarySerializable>::deserialize(reader)?.val() as usize;
let mut result = String::with_capacity(string_length);
reader
.take(string_length as u64)
@@ -269,18 +269,18 @@ impl<'a> BinarySerializable for Cow<'a, str> {
impl<'a> BinarySerializable for Cow<'a, [u8]> {
fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
VInt(self.len() as u64).serialize(writer)?;
BinarySerializable::serialize(&VInt(self.len() as u64), writer)?;
for it in self.iter() {
it.serialize(writer)?;
BinarySerializable::serialize(it, writer)?;
}
Ok(())
}
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Cow<'a, [u8]>> {
let num_items = VInt::deserialize(reader)?.val();
let num_items = <VInt as BinarySerializable>::deserialize(reader)?.val();
let mut items: Vec<u8> = Vec::with_capacity(num_items as usize);
for _ in 0..num_items {
let item = u8::deserialize(reader)?;
let item = <u8 as BinarySerializable>::deserialize(reader)?;
items.push(item);
}
Ok(Cow::Owned(items))

View File

@@ -44,8 +44,19 @@ fn test_format_6() {
assert_date_time_precision(&index, DateTimePrecision::Microseconds);
}
/// feature flag quickwit uses a different dictionary type
#[test]
#[cfg(not(feature = "quickwit"))]
fn assert_date_time_precision(index: &Index, precision: DateTimePrecision) {
fn test_format_7() {
let path = path_for_version("7");
let index = Index::open_in_dir(path).expect("Failed to open index");
// dates are not truncated in v7 in the docstore
assert_date_time_precision(&index, DateTimePrecision::Nanoseconds);
}
#[cfg(not(feature = "quickwit"))]
fn assert_date_time_precision(index: &Index, doc_store_precision: DateTimePrecision) {
use collector::TopDocs;
let reader = index.reader().expect("Failed to create reader");
let searcher = reader.searcher();
@@ -75,6 +86,6 @@ fn assert_date_time_precision(index: &Index, precision: DateTimePrecision) {
.as_datetime()
.unwrap();
let expected = DateTime::from_timestamp_nanos(123456).truncate(precision);
let expected = DateTime::from_timestamp_nanos(123456).truncate(doc_store_precision);
assert_eq!(date_value, expected,);
}

View File

@@ -232,7 +232,7 @@ pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
pub use crate::schema::{Document, TantivyDocument, Term};
/// Index format version.
pub const INDEX_FORMAT_VERSION: u32 = 6;
pub const INDEX_FORMAT_VERSION: u32 = 7;
/// Oldest index format version this tantivy version can read.
pub const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4;

View File

@@ -22,6 +22,7 @@ use super::se::BinaryObjectSerializer;
use super::{OwnedValue, Value};
use crate::schema::document::type_codes;
use crate::schema::{Facet, Field};
use crate::store::DocStoreVersion;
use crate::tokenizer::PreTokenizedString;
#[derive(Debug, thiserror::Error, Clone)]
@@ -45,6 +46,9 @@ pub enum DeserializeError {
#[error("{0}")]
/// A custom error message.
Custom(String),
#[error("Version {0}, Max version supported: {1}")]
/// Unsupported version error.
UnsupportedVersion(u32, u32),
}
impl DeserializeError {
@@ -291,6 +295,7 @@ pub trait ObjectAccess<'de> {
pub struct BinaryDocumentDeserializer<'de, R> {
length: usize,
position: usize,
doc_store_version: DocStoreVersion,
reader: &'de mut R,
}
@@ -298,12 +303,16 @@ impl<'de, R> BinaryDocumentDeserializer<'de, R>
where R: Read
{
/// Attempts to create a new document deserializer from a given reader.
pub(crate) fn from_reader(reader: &'de mut R) -> Result<Self, DeserializeError> {
pub(crate) fn from_reader(
reader: &'de mut R,
doc_store_version: DocStoreVersion,
) -> Result<Self, DeserializeError> {
let length = VInt::deserialize(reader)?;
Ok(Self {
length: length.val() as usize,
position: 0,
doc_store_version,
reader,
})
}
@@ -329,8 +338,8 @@ where R: Read
}
let field = Field::deserialize(self.reader).map_err(DeserializeError::from)?;
let deserializer = BinaryValueDeserializer::from_reader(self.reader)?;
let deserializer =
BinaryValueDeserializer::from_reader(self.reader, self.doc_store_version)?;
let value = V::deserialize(deserializer)?;
self.position += 1;
@@ -344,13 +353,17 @@ where R: Read
pub struct BinaryValueDeserializer<'de, R> {
value_type: ValueType,
reader: &'de mut R,
doc_store_version: DocStoreVersion,
}
impl<'de, R> BinaryValueDeserializer<'de, R>
where R: Read
{
/// Attempts to create a new value deserializer from a given reader.
fn from_reader(reader: &'de mut R) -> Result<Self, DeserializeError> {
fn from_reader(
reader: &'de mut R,
doc_store_version: DocStoreVersion,
) -> Result<Self, DeserializeError> {
let type_code = <u8 as BinarySerializable>::deserialize(reader)?;
let value_type = match type_code {
@@ -391,7 +404,11 @@ where R: Read
}
};
Ok(Self { value_type, reader })
Ok(Self {
value_type,
reader,
doc_store_version,
})
}
fn validate_type(&self, expected_type: ValueType) -> Result<(), DeserializeError> {
@@ -438,7 +455,16 @@ where R: Read
fn deserialize_datetime(self) -> Result<DateTime, DeserializeError> {
self.validate_type(ValueType::DateTime)?;
<DateTime as BinarySerializable>::deserialize(self.reader).map_err(DeserializeError::from)
match self.doc_store_version {
DocStoreVersion::V1 => {
let timestamp_micros = <i64 as BinarySerializable>::deserialize(self.reader)?;
Ok(DateTime::from_timestamp_micros(timestamp_micros))
}
DocStoreVersion::V2 => {
let timestamp_nanos = <i64 as BinarySerializable>::deserialize(self.reader)?;
Ok(DateTime::from_timestamp_nanos(timestamp_nanos))
}
}
}
fn deserialize_facet(self) -> Result<Facet, DeserializeError> {
@@ -514,11 +540,13 @@ where R: Read
visitor.visit_pre_tokenized_string(val)
}
ValueType::Array => {
let access = BinaryArrayDeserializer::from_reader(self.reader)?;
let access =
BinaryArrayDeserializer::from_reader(self.reader, self.doc_store_version)?;
visitor.visit_array(access)
}
ValueType::Object => {
let access = BinaryObjectDeserializer::from_reader(self.reader)?;
let access =
BinaryObjectDeserializer::from_reader(self.reader, self.doc_store_version)?;
visitor.visit_object(access)
}
#[allow(deprecated)]
@@ -537,7 +565,8 @@ where R: Read
let out_rc = std::rc::Rc::new(out);
let mut slice: &[u8] = &out_rc;
let access = BinaryObjectDeserializer::from_reader(&mut slice)?;
let access =
BinaryObjectDeserializer::from_reader(&mut slice, self.doc_store_version)?;
visitor.visit_object(access)
}
@@ -551,19 +580,24 @@ pub struct BinaryArrayDeserializer<'de, R> {
length: usize,
position: usize,
reader: &'de mut R,
doc_store_version: DocStoreVersion,
}
impl<'de, R> BinaryArrayDeserializer<'de, R>
where R: Read
{
/// Attempts to create a new array deserializer from a given reader.
fn from_reader(reader: &'de mut R) -> Result<Self, DeserializeError> {
fn from_reader(
reader: &'de mut R,
doc_store_version: DocStoreVersion,
) -> Result<Self, DeserializeError> {
let length = <VInt as BinarySerializable>::deserialize(reader)?;
Ok(Self {
length: length.val() as usize,
position: 0,
reader,
doc_store_version,
})
}
@@ -587,7 +621,8 @@ where R: Read
return Ok(None);
}
let deserializer = BinaryValueDeserializer::from_reader(self.reader)?;
let deserializer =
BinaryValueDeserializer::from_reader(self.reader, self.doc_store_version)?;
let value = V::deserialize(deserializer)?;
// Advance the position cursor.
@@ -610,8 +645,11 @@ impl<'de, R> BinaryObjectDeserializer<'de, R>
where R: Read
{
/// Attempts to create a new object deserializer from a given reader.
fn from_reader(reader: &'de mut R) -> Result<Self, DeserializeError> {
let inner = BinaryArrayDeserializer::from_reader(reader)?;
fn from_reader(
reader: &'de mut R,
doc_store_version: DocStoreVersion,
) -> Result<Self, DeserializeError> {
let inner = BinaryArrayDeserializer::from_reader(reader, doc_store_version)?;
Ok(Self { inner })
}
}
@@ -819,6 +857,7 @@ mod tests {
use crate::schema::document::existing_type_impls::JsonObjectIter;
use crate::schema::document::se::BinaryValueSerializer;
use crate::schema::document::{ReferenceValue, ReferenceValueLeaf};
use crate::store::DOC_STORE_VERSION;
fn serialize_value<'a>(value: ReferenceValue<'a, &'a serde_json::Value>) -> Vec<u8> {
let mut writer = Vec::new();
@@ -829,9 +868,19 @@ mod tests {
writer
}
fn serialize_owned_value<'a>(value: ReferenceValue<'a, &'a OwnedValue>) -> Vec<u8> {
let mut writer = Vec::new();
let mut serializer = BinaryValueSerializer::new(&mut writer);
serializer.serialize_value(value).expect("Serialize value");
writer
}
fn deserialize_value(buffer: Vec<u8>) -> crate::schema::OwnedValue {
let mut cursor = Cursor::new(buffer);
let deserializer = BinaryValueDeserializer::from_reader(&mut cursor).unwrap();
let deserializer =
BinaryValueDeserializer::from_reader(&mut cursor, DOC_STORE_VERSION).unwrap();
crate::schema::OwnedValue::deserialize(deserializer).expect("Deserialize value")
}
@@ -1010,6 +1059,17 @@ mod tests {
assert_eq!(value, expected_val);
}
#[test]
fn test_nested_date_precision() {
let object = OwnedValue::Object(vec![(
"my-date".into(),
OwnedValue::Date(DateTime::from_timestamp_nanos(323456)),
)]);
let result = serialize_owned_value((&object).as_value());
let value = deserialize_value(result);
assert_eq!(value, object);
}
#[test]
fn test_nested_serialize() {
let mut object = serde_json::Map::new();

View File

@@ -81,6 +81,15 @@ where W: Write
Self { writer }
}
fn serialize_with_type_code<T: BinarySerializable>(
&mut self,
code: u8,
val: &T,
) -> io::Result<()> {
self.write_type_code(code)?;
BinarySerializable::serialize(val, self.writer)
}
/// Attempts to serialize a given value and write the output
/// to the writer.
pub(crate) fn serialize_value<'a, V>(
@@ -94,56 +103,38 @@ where W: Write
ReferenceValue::Leaf(leaf) => match leaf {
ReferenceValueLeaf::Null => self.write_type_code(type_codes::NULL_CODE),
ReferenceValueLeaf::Str(val) => {
self.write_type_code(type_codes::TEXT_CODE)?;
let temp_val = Cow::Borrowed(val);
temp_val.serialize(self.writer)
self.serialize_with_type_code(type_codes::TEXT_CODE, &Cow::Borrowed(val))
}
ReferenceValueLeaf::U64(val) => {
self.write_type_code(type_codes::U64_CODE)?;
val.serialize(self.writer)
self.serialize_with_type_code(type_codes::U64_CODE, &val)
}
ReferenceValueLeaf::I64(val) => {
self.write_type_code(type_codes::I64_CODE)?;
val.serialize(self.writer)
self.serialize_with_type_code(type_codes::I64_CODE, &val)
}
ReferenceValueLeaf::F64(val) => {
self.write_type_code(type_codes::F64_CODE)?;
f64_to_u64(val).serialize(self.writer)
self.serialize_with_type_code(type_codes::F64_CODE, &f64_to_u64(val))
}
ReferenceValueLeaf::Date(val) => {
self.write_type_code(type_codes::DATE_CODE)?;
val.serialize(self.writer)
}
ReferenceValueLeaf::Facet(val) => {
self.write_type_code(type_codes::HIERARCHICAL_FACET_CODE)?;
Cow::Borrowed(val).serialize(self.writer)
let timestamp_nanos: i64 = val.into_timestamp_nanos();
BinarySerializable::serialize(&timestamp_nanos, self.writer)
}
ReferenceValueLeaf::Facet(val) => self.serialize_with_type_code(
type_codes::HIERARCHICAL_FACET_CODE,
&Cow::Borrowed(val),
),
ReferenceValueLeaf::Bytes(val) => {
self.write_type_code(type_codes::BYTES_CODE)?;
let temp_val = Cow::Borrowed(val);
temp_val.serialize(self.writer)
self.serialize_with_type_code(type_codes::BYTES_CODE, &Cow::Borrowed(val))
}
ReferenceValueLeaf::IpAddr(val) => {
self.write_type_code(type_codes::IP_CODE)?;
val.to_u128().serialize(self.writer)
self.serialize_with_type_code(type_codes::IP_CODE, &val.to_u128())
}
ReferenceValueLeaf::Bool(val) => {
self.write_type_code(type_codes::BOOL_CODE)?;
val.serialize(self.writer)
self.serialize_with_type_code(type_codes::BOOL_CODE, &val)
}
ReferenceValueLeaf::PreTokStr(val) => {
self.write_type_code(type_codes::EXT_CODE)?;
self.write_type_code(type_codes::TOK_STR_EXT_CODE)?;
val.serialize(self.writer)
self.serialize_with_type_code(type_codes::TOK_STR_EXT_CODE, &*val)
}
},
ReferenceValue::Array(elements) => {
@@ -306,7 +297,6 @@ where W: Write
mod tests {
use std::collections::BTreeMap;
use common::DateTime;
use serde_json::Number;
use tokenizer_api::Token;
@@ -337,7 +327,10 @@ mod tests {
$ext_code.serialize(&mut writer).unwrap();
)?
$value.serialize(&mut writer).unwrap();
BinarySerializable::serialize(
&$value,
&mut writer,
).unwrap();
)*
writer
@@ -355,7 +348,10 @@ mod tests {
$ext_code.serialize(&mut writer).unwrap();
)?
$value.serialize(&mut writer).unwrap();
BinarySerializable::serialize(
&$value,
&mut writer,
).unwrap();
)*
writer
@@ -418,15 +414,6 @@ mod tests {
"Expected serialized value to match the binary representation"
);
let result = serialize_value(ReferenceValueLeaf::Date(DateTime::MAX).into());
let expected = binary_repr!(
type_codes::DATE_CODE => DateTime::MAX,
);
assert_eq!(
result, expected,
"Expected serialized value to match the binary representation"
);
let facet = Facet::from_text("/hello/world").unwrap();
let result = serialize_value(ReferenceValueLeaf::Facet(facet.encoded_str()).into());
let expected = binary_repr!(

View File

@@ -4,7 +4,7 @@ use std::io::{self, Read, Write};
use std::str;
use std::string::FromUtf8Error;
use common::BinarySerializable;
use common::*;
use once_cell::sync::Lazy;
use regex::Regex;
use serde::de::Error as _;

View File

@@ -2,12 +2,13 @@ use std::io;
use common::{BinarySerializable, FixedSize, HasLen};
use super::{Decompressor, DOC_STORE_VERSION};
use super::{Decompressor, DocStoreVersion, DOC_STORE_VERSION};
use crate::directory::FileSlice;
#[derive(Debug, Clone, PartialEq)]
pub struct DocStoreFooter {
pub offset: u64,
pub doc_store_version: DocStoreVersion,
pub decompressor: Decompressor,
}
@@ -25,9 +26,11 @@ impl BinarySerializable for DocStoreFooter {
}
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
let doc_store_version = u32::deserialize(reader)?;
if doc_store_version != DOC_STORE_VERSION {
panic!("actual doc store version: {doc_store_version}, expected: {DOC_STORE_VERSION}");
let doc_store_version = DocStoreVersion::deserialize(reader)?;
if doc_store_version > DOC_STORE_VERSION {
panic!(
"actual doc store version: {doc_store_version}, max_supported: {DOC_STORE_VERSION}"
);
}
let offset = u64::deserialize(reader)?;
let compressor_id = u8::deserialize(reader)?;
@@ -35,6 +38,7 @@ impl BinarySerializable for DocStoreFooter {
reader.read_exact(&mut skip_buf)?;
Ok(DocStoreFooter {
offset,
doc_store_version,
decompressor: Decompressor::from_id(compressor_id),
})
}
@@ -45,9 +49,14 @@ impl FixedSize for DocStoreFooter {
}
impl DocStoreFooter {
pub fn new(offset: u64, decompressor: Decompressor) -> Self {
pub fn new(
offset: u64,
decompressor: Decompressor,
doc_store_version: DocStoreVersion,
) -> Self {
DocStoreFooter {
offset,
doc_store_version,
decompressor,
}
}

View File

@@ -35,15 +35,16 @@ mod footer;
mod index;
mod reader;
mod writer;
pub use self::compressors::{Compressor, ZstdCompressor};
pub use self::decompressors::Decompressor;
pub(crate) use self::reader::DOCSTORE_CACHE_CAPACITY;
pub use self::reader::{CacheStats, StoreReader};
pub(crate) use self::reader::{DocStoreVersion, DOCSTORE_CACHE_CAPACITY};
pub use self::writer::StoreWriter;
mod store_compressor;
/// Doc store version in footer to handle format changes.
pub(crate) const DOC_STORE_VERSION: u32 = 1;
pub(crate) const DOC_STORE_VERSION: DocStoreVersion = DocStoreVersion::V2;
#[cfg(feature = "lz4-compression")]
mod compression_lz4_block;

View File

@@ -1,3 +1,4 @@
use std::fmt::Display;
use std::io;
use std::iter::Sum;
use std::num::NonZeroUsize;
@@ -25,9 +26,43 @@ pub(crate) const DOCSTORE_CACHE_CAPACITY: usize = 100;
type Block = OwnedBytes;
/// The format version of the document store.
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
pub(crate) enum DocStoreVersion {
V1 = 1,
V2 = 2,
}
impl Display for DocStoreVersion {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DocStoreVersion::V1 => write!(f, "V1"),
DocStoreVersion::V2 => write!(f, "V2"),
}
}
}
impl BinarySerializable for DocStoreVersion {
fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
(*self as u32).serialize(writer)
}
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
Ok(match u32::deserialize(reader)? {
1 => DocStoreVersion::V1,
2 => DocStoreVersion::V2,
v => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid doc store version {}", v),
))
}
})
}
}
/// Reads document off tantivy's [`Store`](./index.html)
pub struct StoreReader {
decompressor: Decompressor,
doc_store_version: DocStoreVersion,
data: FileSlice,
skip_index: Arc<SkipIndex>,
space_usage: StoreSpaceUsage,
@@ -129,6 +164,7 @@ impl StoreReader {
let skip_index = SkipIndex::open(index_data);
Ok(StoreReader {
decompressor: footer.decompressor,
doc_store_version: footer.doc_store_version,
data: data_file,
cache: BlockCache {
cache: NonZeroUsize::new(cache_num_blocks)
@@ -203,8 +239,9 @@ impl StoreReader {
pub fn get<D: DocumentDeserialize>(&self, doc_id: DocId) -> crate::Result<D> {
let mut doc_bytes = self.get_document_bytes(doc_id)?;
let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
.map_err(crate::TantivyError::from)?;
let deserializer =
BinaryDocumentDeserializer::from_reader(&mut doc_bytes, self.doc_store_version)
.map_err(crate::TantivyError::from)?;
D::deserialize(deserializer).map_err(crate::TantivyError::from)
}
@@ -244,8 +281,9 @@ impl StoreReader {
self.iter_raw(alive_bitset).map(|doc_bytes_res| {
let mut doc_bytes = doc_bytes_res?;
let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
.map_err(crate::TantivyError::from)?;
let deserializer =
BinaryDocumentDeserializer::from_reader(&mut doc_bytes, self.doc_store_version)
.map_err(crate::TantivyError::from)?;
D::deserialize(deserializer).map_err(crate::TantivyError::from)
})
}
@@ -391,8 +429,9 @@ impl StoreReader {
) -> crate::Result<D> {
let mut doc_bytes = self.get_document_bytes_async(doc_id, executor).await?;
let deserializer = BinaryDocumentDeserializer::from_reader(&mut doc_bytes)
.map_err(crate::TantivyError::from)?;
let deserializer =
BinaryDocumentDeserializer::from_reader(&mut doc_bytes, self.doc_store_version)
.map_err(crate::TantivyError::from)?;
D::deserialize(deserializer).map_err(crate::TantivyError::from)
}
}
@@ -414,6 +453,11 @@ mod tests {
doc.get_first(*field).and_then(|f| f.as_value().as_str())
}
#[test]
fn test_doc_store_version_ord() {
assert!(DocStoreVersion::V1 < DocStoreVersion::V2);
}
#[test]
fn test_store_lru_cache() -> crate::Result<()> {
let directory = RamDirectory::create();

View File

@@ -5,6 +5,7 @@ use std::{io, thread};
use common::{BinarySerializable, CountingWriter, TerminatingWrite};
use super::DOC_STORE_VERSION;
use crate::directory::WritePtr;
use crate::store::footer::DocStoreFooter;
use crate::store::index::{Checkpoint, SkipIndexBuilder};
@@ -143,8 +144,11 @@ impl BlockCompressorImpl {
fn close(mut self) -> io::Result<()> {
let header_offset: u64 = self.writer.written_bytes();
let docstore_footer =
DocStoreFooter::new(header_offset, Decompressor::from(self.compressor));
let docstore_footer = DocStoreFooter::new(
header_offset,
Decompressor::from(self.compressor),
DOC_STORE_VERSION,
);
self.offset_index_writer.serialize_into(&mut self.writer)?;
docstore_footer.serialize(&mut self.writer)?;
self.writer.terminate()

View File

@@ -2,7 +2,7 @@ use std::cmp::Ordering;
use std::io;
use std::io::{Read, Write};
use common::BinarySerializable;
use common::*;
use crate::tokenizer::{Token, TokenStream};

View File

@@ -0,0 +1 @@
["meta.json","000002f0000000000000000000000000.fieldnorm","000002f0000000000000000000000000.pos","000002f0000000000000000000000000.store","000002f0000000000000000000000000.term","000002f0000000000000000000000000.fast","000002f0000000000000000000000000.idx"]

View File

@@ -0,0 +1,40 @@
{
"index_settings": {
"docstore_compression": "lz4",
"docstore_blocksize": 16384
},
"segments": [
{
"segment_id": "000002f0-0000-0000-0000-000000000000",
"max_doc": 1,
"deletes": null
}
],
"schema": [
{
"name": "label",
"type": "text",
"options": {
"indexing": {
"record": "position",
"fieldnorms": true,
"tokenizer": "default"
},
"stored": true,
"fast": false
}
},
{
"name": "date",
"type": "date",
"options": {
"indexed": true,
"fieldnorms": true,
"fast": false,
"stored": true,
"precision": "seconds"
}
}
],
"opstamp": 2
}