mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
encode dictionary type in fst footer (#1968)
* encode additional footer for dictionary kind in fst
This commit is contained in:
@@ -87,16 +87,15 @@ Note: there is no ambiguity between both representation as Add is always guarant
|
||||
|
||||
### SSTFooter
|
||||
```
|
||||
+-------+-------+-----+-------------+---------+---------+------+
|
||||
| Block | Block | ... | IndexOffset | NumTerm | Version | Type |
|
||||
+-------+-------+-----+-------------+---------+---------+------+
|
||||
+-------+-------+-----+-------------+---------+---------+
|
||||
| Block | Block | ... | IndexOffset | NumTerm | Version |
|
||||
+-------+-------+-----+-------------+---------+---------+
|
||||
|----( # of blocks)---|
|
||||
```
|
||||
- Block(SSTBlock): uses IndexValue for its Values format
|
||||
- IndexOffset(u64): Offset to the start of the SSTFooter
|
||||
- NumTerm(u64): number of terms in the sstable
|
||||
- Version(u32): Currently defined to 0x00\_00\_00\_01
|
||||
- Type(u32): Defined to 0x00\_00\_00\_02
|
||||
|
||||
### IndexValue
|
||||
```
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::ops::{Bound, RangeBounds};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::file_slice::FileSlice;
|
||||
use common::{BinarySerializable, DictionaryFooter, OwnedBytes};
|
||||
use common::{BinarySerializable, OwnedBytes};
|
||||
use tantivy_fst::automaton::AlwaysMatch;
|
||||
use tantivy_fst::Automaton;
|
||||
|
||||
@@ -178,14 +178,22 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
||||
|
||||
/// Opens a `TermDictionary`.
|
||||
pub fn open(term_dictionary_file: FileSlice) -> io::Result<Self> {
|
||||
let (main_slice, footer_len_slice) = term_dictionary_file.split_from_end(24);
|
||||
let (main_slice, footer_len_slice) = term_dictionary_file.split_from_end(20);
|
||||
let mut footer_len_bytes: OwnedBytes = footer_len_slice.read_bytes()?;
|
||||
|
||||
let index_offset = u64::deserialize(&mut footer_len_bytes)?;
|
||||
let num_terms = u64::deserialize(&mut footer_len_bytes)?;
|
||||
|
||||
let footer = DictionaryFooter::deserialize(&mut footer_len_bytes)?;
|
||||
crate::FOOTER.verify_equal(&footer)?;
|
||||
let version = u32::deserialize(&mut footer_len_bytes)?;
|
||||
if version != crate::SSTABLE_VERSION {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!(
|
||||
"Unsuported sstable version, expected {}, found {}",
|
||||
version,
|
||||
crate::SSTABLE_VERSION,
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
let (sstable_slice, index_slice) = main_slice.split(index_offset as usize);
|
||||
let sstable_index_bytes = index_slice.read_bytes()?;
|
||||
|
||||
@@ -17,7 +17,7 @@ pub use dictionary::Dictionary;
|
||||
pub use streamer::{Streamer, StreamerBuilder};
|
||||
|
||||
mod block_reader;
|
||||
use common::{BinarySerializable, DictionaryFooter, DictionaryKind};
|
||||
use common::BinarySerializable;
|
||||
|
||||
pub use self::block_reader::BlockReader;
|
||||
pub use self::delta::{DeltaReader, DeltaWriter};
|
||||
@@ -28,10 +28,7 @@ use crate::value::{RangeValueReader, RangeValueWriter};
|
||||
pub type TermOrdinal = u64;
|
||||
|
||||
const DEFAULT_KEY_CAPACITY: usize = 50;
|
||||
const FOOTER: DictionaryFooter = DictionaryFooter {
|
||||
kind: DictionaryKind::SSTable,
|
||||
version: 1,
|
||||
};
|
||||
const SSTABLE_VERSION: u32 = 1;
|
||||
|
||||
/// Given two byte string returns the length of
|
||||
/// the longest common prefix.
|
||||
@@ -311,7 +308,7 @@ where
|
||||
wrt.write_all(&offset.to_le_bytes())?;
|
||||
wrt.write_all(&self.num_terms.to_le_bytes())?;
|
||||
|
||||
FOOTER.serialize(&mut wrt)?;
|
||||
SSTABLE_VERSION.serialize(&mut wrt)?;
|
||||
|
||||
let wrt = wrt.finish();
|
||||
Ok(wrt.into_inner()?)
|
||||
@@ -398,7 +395,6 @@ mod test {
|
||||
15, 0, 0, 0, 0, 0, 0, 0, // index start offset
|
||||
3, 0, 0, 0, 0, 0, 0, 0, // num_term
|
||||
1, 0, 0, 0, // version
|
||||
2, 0, 0, 0, // dictionary kind. sstable = 2
|
||||
]
|
||||
);
|
||||
let mut sstable_reader = VoidSSTable::reader(&buffer[..]);
|
||||
|
||||
Reference in New Issue
Block a user