encode dictionary type in fst footer (#1968)

* encode additional footer for dictionary kind in fst
This commit is contained in:
trinity-1686a
2023-04-12 09:43:01 +02:00
committed by GitHub
parent 4b01cc4c49
commit 205e8a0a92
11 changed files with 206 additions and 94 deletions

View File

@@ -87,16 +87,15 @@ Note: there is no ambiguity between both representation as Add is always guarant
### SSTFooter
```
+-------+-------+-----+-------------+---------+---------+------+
| Block | Block | ... | IndexOffset | NumTerm | Version | Type |
+-------+-------+-----+-------------+---------+---------+------+
+-------+-------+-----+-------------+---------+---------+
| Block | Block | ... | IndexOffset | NumTerm | Version |
+-------+-------+-----+-------------+---------+---------+
|----( # of blocks)---|
```
- Block(SSTBlock): uses IndexValue for its Values format
- IndexOffset(u64): Offset to the start of the SSTFooter
- NumTerm(u64): number of terms in the sstable
- Version(u32): Currently defined to 0x00\_00\_00\_01
- Type(u32): Defined to 0x00\_00\_00\_02
### IndexValue
```

View File

@@ -5,7 +5,7 @@ use std::ops::{Bound, RangeBounds};
use std::sync::Arc;
use common::file_slice::FileSlice;
use common::{BinarySerializable, DictionaryFooter, OwnedBytes};
use common::{BinarySerializable, OwnedBytes};
use tantivy_fst::automaton::AlwaysMatch;
use tantivy_fst::Automaton;
@@ -178,14 +178,22 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
/// Opens a `TermDictionary`.
pub fn open(term_dictionary_file: FileSlice) -> io::Result<Self> {
let (main_slice, footer_len_slice) = term_dictionary_file.split_from_end(24);
let (main_slice, footer_len_slice) = term_dictionary_file.split_from_end(20);
let mut footer_len_bytes: OwnedBytes = footer_len_slice.read_bytes()?;
let index_offset = u64::deserialize(&mut footer_len_bytes)?;
let num_terms = u64::deserialize(&mut footer_len_bytes)?;
let footer = DictionaryFooter::deserialize(&mut footer_len_bytes)?;
crate::FOOTER.verify_equal(&footer)?;
let version = u32::deserialize(&mut footer_len_bytes)?;
if version != crate::SSTABLE_VERSION {
return Err(io::Error::new(
io::ErrorKind::Other,
format!(
"Unsuported sstable version, expected {}, found {}",
version,
crate::SSTABLE_VERSION,
),
));
}
let (sstable_slice, index_slice) = main_slice.split(index_offset as usize);
let sstable_index_bytes = index_slice.read_bytes()?;

View File

@@ -17,7 +17,7 @@ pub use dictionary::Dictionary;
pub use streamer::{Streamer, StreamerBuilder};
mod block_reader;
use common::{BinarySerializable, DictionaryFooter, DictionaryKind};
use common::BinarySerializable;
pub use self::block_reader::BlockReader;
pub use self::delta::{DeltaReader, DeltaWriter};
@@ -28,10 +28,7 @@ use crate::value::{RangeValueReader, RangeValueWriter};
pub type TermOrdinal = u64;
const DEFAULT_KEY_CAPACITY: usize = 50;
const FOOTER: DictionaryFooter = DictionaryFooter {
kind: DictionaryKind::SSTable,
version: 1,
};
const SSTABLE_VERSION: u32 = 1;
/// Given two byte string returns the length of
/// the longest common prefix.
@@ -311,7 +308,7 @@ where
wrt.write_all(&offset.to_le_bytes())?;
wrt.write_all(&self.num_terms.to_le_bytes())?;
FOOTER.serialize(&mut wrt)?;
SSTABLE_VERSION.serialize(&mut wrt)?;
let wrt = wrt.finish();
Ok(wrt.into_inner()?)
@@ -398,7 +395,6 @@ mod test {
15, 0, 0, 0, 0, 0, 0, 0, // index start offset
3, 0, 0, 0, 0, 0, 0, 0, // num_term
1, 0, 0, 0, // version
2, 0, 0, 0, // dictionary kind. sstable = 2
]
);
let mut sstable_reader = VoidSSTable::reader(&buffer[..]);