Compare commits

..

21 Commits

Author SHA1 Message Date
Pascal Seitz
bb57e63522 Store List of Fields in Segment
Fiels may be encoded in the columnar storage or in the inverted index
for JSON fields.
Add a new Segment file that contains the list of fields (schema +
encoded)
2023-12-13 15:52:41 +08:00
PSeitz
bff7c58497 improve indexing benchmark (#2275) 2023-12-11 09:04:42 +01:00
trinity-1686a
9ebc5ed053 use fst for sstable index (#2268)
* read path for new fst based index

* implement BlockAddrStoreWriter

* extract slop/derivation computation

* use better linear approximator and allow negative correction to approximator

* document format and reorder some fields

* optimize single block sstable size

* plug backward compat
2023-12-04 15:13:15 +01:00
PSeitz
0b56c88e69 Revert "Preparing for 0.21.2 release." (#2258)
* Revert "Preparing for 0.21.2 release. (#2256)"

This reverts commit 9caab45136.

* bump version to 0.21.1

* set version to 0.22.0-dev
2023-12-01 13:46:12 +01:00
PSeitz
24841f0b2a update bitpacker dep (#2269) 2023-12-01 13:45:52 +01:00
PSeitz
1a9fc10be9 add fields_metadata to SegmentReader, add columnar docs (#2222)
* add fields_metadata to SegmentReader, add columnar docs

* use schema to resolve field, add test

* normalize paths

* merge for FieldsMetadata, add fields_metadata on Index

* Update src/core/segment_reader.rs

Co-authored-by: Paul Masurel <paul@quickwit.io>

* merge code paths

* add Hash

* move function oustide

---------

Co-authored-by: Paul Masurel <paul@quickwit.io>
2023-11-22 12:29:53 +01:00
PSeitz
07573a7f19 update fst (#2267)
update fst to 0.5 (deduplicates regex-syntax in the dep tree)
deps cleanup
2023-11-21 16:06:57 +01:00
BlackHoleFox
daad2dc151 Take string references instead of owned values building Facet paths (#2265) 2023-11-20 09:40:44 +01:00
PSeitz
054f49dc31 support escaped dot, add agg test (#2250)
add agg test for nested JSON
allow escaping of dot
2023-11-20 03:00:57 +01:00
PSeitz
47009ed2d3 remove unused deps (#2264)
found with cargo machete
remove pprof (doesn't work)
2023-11-20 02:59:59 +01:00
PSeitz
0aae31d7d7 reduce number of allocations (#2257)
* reduce number of allocations

Explanation makes up around 50% of all allocations (numbers not perf).
It's created during serialization but not called.

- Make Explanation optional in BM25
- Avoid allocations when using Explanation

* use Cow
2023-11-16 13:47:36 +01:00
Paul Masurel
9caab45136 Preparing for 0.21.2 release. (#2256) 2023-11-15 10:43:36 +09:00
Chris Tam
6d9a7b7eb0 Derive Debug for SchemaBuilder (#2254) 2023-11-15 01:03:44 +01:00
dependabot[bot]
7a2c5804b1 Update itertools requirement from 0.11.0 to 0.12.0 (#2255)
Updates the requirements on [itertools](https://github.com/rust-itertools/itertools) to permit the latest version.
- [Changelog](https://github.com/rust-itertools/itertools/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-itertools/itertools/compare/v0.11.0...v0.12.0)

---
updated-dependencies:
- dependency-name: itertools
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-11-15 01:03:08 +01:00
François Massot
5319977171 Merge pull request #2253 from quickwit-oss/issue/2251-bug-merge-json-object-with-number
Fix bug occuring when merging JSON object indexed with positions.
2023-11-14 17:28:29 +01:00
trinity-1686a
828632e8c4 rustfmt 2023-11-14 15:05:16 +01:00
Paul Masurel
6b59ec6fd5 Fix bug occuring when merging JSON object indexed with positions.
In JSON Object field the presence of term frequencies depend on the
field.
Typically, a string with postiions indexed will have positions
while numbers won't.

The presence or absence of term freqs for a given term is unfortunately
encoded in a very passive way.

It is given by the presence of extra information in the skip info, or
the lack of term freqs after decoding vint blocks.

Before, after writing a segment, we would encode the segment correctly
(without any term freq for number in json object field).
However during merge, we would get the default term freq=1 value.
(this is default in the absence of encoded term freqs)

The merger would then proceed and attempt to decode 1 position when
there are in fact none.

This PR requires to explictly tell the posting serialize whether
term frequencies should be serialized for each new term.

Closes #2251
2023-11-14 22:41:48 +09:00
PSeitz
b60d862150 docid deltas while indexing (#2249)
* docid deltas while indexing

storing deltas is especially helpful for repetitive data like logs.
In those cases, recording a doc on a term costed 4 bytes instead of 1
byte now.

HDFS Indexing 1.1GB Total memory consumption:
Before:  760 MB
Now:     590 MB

* use scan for delta decoding
2023-11-13 05:14:27 +01:00
PSeitz
4837c7811a add missing inlines (#2245) 2023-11-10 08:00:42 +01:00
PSeitz
5a2397d57e add sstable ord_to_term benchmark (#2242) 2023-11-10 07:27:48 +01:00
PSeitz
927b4432c9 Perf: use term hashmap in fastfield (#2243)
* add shared arena hashmap

* bench fastfield indexing

* use shared arena hashmap in columnar

lower minimum resize in hashtable

* clippy

* add comments
2023-11-09 13:44:02 +01:00
68 changed files with 3357 additions and 1345 deletions

View File

@@ -1,6 +1,6 @@
[package]
name = "tantivy"
version = "0.21.0"
version = "0.22.0-dev"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT"
categories = ["database-implementations", "data-structures"]
@@ -22,10 +22,10 @@ crc32fast = "1.3.2"
once_cell = "1.10.0"
regex = { version = "1.5.5", default-features = false, features = ["std", "unicode"] }
aho-corasick = "1.0"
tantivy-fst = "0.4.0"
tantivy-fst = "0.5"
memmap2 = { version = "0.9.0", optional = true }
lz4_flex = { version = "0.11", default-features = false, optional = true }
zstd = { version = "0.13", optional = true, default-features = false }
zstd = { version = "0.13", default-features = false }
tempfile = { version = "3.3.0", optional = true }
log = "0.4.16"
serde = { version = "1.0.136", features = ["derive"] }
@@ -37,21 +37,19 @@ uuid = { version = "1.0.0", features = ["v4", "serde"] }
crossbeam-channel = "0.5.4"
rust-stemmers = "1.2.0"
downcast-rs = "1.2.0"
bitpacking = { git = "https://github.com/quickwit-oss/bitpacking", rev = "f730b75", default-features = false, features = ["bitpacker4x"] }
bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker4x"] }
census = "0.4.0"
rustc-hash = "1.1.0"
thiserror = "1.0.30"
htmlescape = "0.3.1"
fail = { version = "0.5.0", optional = true }
murmurhash32 = "0.3.0"
time = { version = "0.3.10", features = ["serde-well-known"] }
smallvec = "1.8.0"
rayon = "1.5.2"
lru = "0.12.0"
fastdivide = "0.4.0"
itertools = "0.11.0"
itertools = "0.12.0"
measure_time = "0.8.2"
async-trait = "0.1.53"
arc-swap = "1.5.0"
columnar = { version= "0.2", path="./columnar", package ="tantivy-columnar" }
@@ -75,15 +73,13 @@ matches = "0.1.9"
pretty_assertions = "1.2.1"
proptest = "1.0.0"
test-log = "0.2.10"
env_logger = "0.10.0"
futures = "0.3.21"
paste = "1.0.11"
more-asserts = "0.3.1"
rand_distr = "0.4.3"
[target.'cfg(not(windows))'.dev-dependencies]
criterion = "0.5"
pprof = { git = "https://github.com/PSeitz/pprof-rs/", rev = "53af24b", features = ["flamegraph", "criterion"] } # temp fork that works with criterion 0.5
criterion = { version = "0.5", default-features = false }
[dev-dependencies.fail]
version = "0.5.0"
@@ -109,7 +105,7 @@ mmap = ["fs4", "tempfile", "memmap2"]
stopwords = []
lz4-compression = ["lz4_flex"]
zstd-compression = ["zstd"]
zstd-compression = []
failpoints = ["fail", "fail/failpoints"]
unstable = [] # useful for benches.

View File

@@ -1,14 +1,99 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use pprof::criterion::{Output, PProfProfiler};
use criterion::{criterion_group, criterion_main, BatchSize, Bencher, Criterion, Throughput};
use tantivy::schema::{TantivyDocument, FAST, INDEXED, STORED, STRING, TEXT};
use tantivy::{Index, IndexWriter};
use tantivy::{tokenizer, Index, IndexWriter};
const HDFS_LOGS: &str = include_str!("hdfs.json");
const GH_LOGS: &str = include_str!("gh.json");
const WIKI: &str = include_str!("wiki.json");
fn get_lines(input: &str) -> Vec<&str> {
input.trim().split('\n').collect()
fn benchmark(
b: &mut Bencher,
input: &str,
schema: tantivy::schema::Schema,
commit: bool,
parse_json: bool,
is_dynamic: bool,
) {
if is_dynamic {
benchmark_dynamic_json(b, input, schema, commit, parse_json)
} else {
_benchmark(b, input, schema, commit, parse_json, |schema, doc_json| {
TantivyDocument::parse_json(&schema, doc_json).unwrap()
})
}
}
fn get_index(schema: tantivy::schema::Schema) -> Index {
let mut index = Index::create_in_ram(schema.clone());
let ff_tokenizer_manager = tokenizer::TokenizerManager::default();
ff_tokenizer_manager.register(
"raw",
tokenizer::TextAnalyzer::builder(tokenizer::RawTokenizer::default())
.filter(tokenizer::RemoveLongFilter::limit(255))
.build(),
);
index.set_fast_field_tokenizers(ff_tokenizer_manager.clone());
index
}
fn _benchmark(
b: &mut Bencher,
input: &str,
schema: tantivy::schema::Schema,
commit: bool,
include_json_parsing: bool,
create_doc: impl Fn(&tantivy::schema::Schema, &str) -> TantivyDocument,
) {
if include_json_parsing {
let lines: Vec<&str> = input.trim().split('\n').collect();
b.iter(|| {
let index = get_index(schema.clone());
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let doc = create_doc(&schema, doc_json);
index_writer.add_document(doc).unwrap();
}
if commit {
index_writer.commit().unwrap();
}
})
} else {
let docs: Vec<_> = input
.trim()
.split('\n')
.map(|doc_json| create_doc(&schema, doc_json))
.collect();
b.iter_batched(
|| docs.clone(),
|docs| {
let index = get_index(schema.clone());
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc in docs {
index_writer.add_document(doc).unwrap();
}
if commit {
index_writer.commit().unwrap();
}
},
BatchSize::SmallInput,
)
}
}
fn benchmark_dynamic_json(
b: &mut Bencher,
input: &str,
schema: tantivy::schema::Schema,
commit: bool,
parse_json: bool,
) {
let json_field = schema.get_field("json").unwrap();
_benchmark(b, input, schema, commit, parse_json, |_schema, doc_json| {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
tantivy::doc!(json_field=>json_val)
})
}
pub fn hdfs_index_benchmark(c: &mut Criterion) {
@@ -19,7 +104,14 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
schema_builder.add_text_field("severity", STRING);
schema_builder.build()
};
let schema_with_store = {
let schema_only_fast = {
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
schema_builder.add_u64_field("timestamp", FAST);
schema_builder.add_text_field("body", FAST);
schema_builder.add_text_field("severity", FAST);
schema_builder.build()
};
let _schema_with_store = {
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
schema_builder.add_u64_field("timestamp", INDEXED | STORED);
schema_builder.add_text_field("body", TEXT | STORED);
@@ -28,77 +120,39 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
};
let dynamic_schema = {
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
schema_builder.add_json_field("json", TEXT);
schema_builder.add_json_field("json", TEXT | FAST);
schema_builder.build()
};
let mut group = c.benchmark_group("index-hdfs");
group.throughput(Throughput::Bytes(HDFS_LOGS.len() as u64));
group.sample_size(20);
group.bench_function("index-hdfs-no-commit", |b| {
let lines = get_lines(HDFS_LOGS);
b.iter(|| {
let index = Index::create_in_ram(schema.clone());
let index_writer: IndexWriter = index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
index_writer.add_document(doc).unwrap();
let benches = [
("only-indexed-".to_string(), schema, false),
//("stored-".to_string(), _schema_with_store, false),
("only-fast-".to_string(), schema_only_fast, false),
("dynamic-".to_string(), dynamic_schema, true),
];
for (prefix, schema, is_dynamic) in benches {
for commit in [false, true] {
let suffix = if commit { "with-commit" } else { "no-commit" };
for parse_json in [false] {
// for parse_json in [false, true] {
let suffix = if parse_json {
format!("{}-with-json-parsing", suffix)
} else {
format!("{}", suffix)
};
let bench_name = format!("{}{}", prefix, suffix);
group.bench_function(bench_name, |b| {
benchmark(b, HDFS_LOGS, schema.clone(), commit, parse_json, is_dynamic)
});
}
})
});
group.bench_function("index-hdfs-with-commit", |b| {
let lines = get_lines(HDFS_LOGS);
b.iter(|| {
let index = Index::create_in_ram(schema.clone());
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
index_writer.add_document(doc).unwrap();
}
index_writer.commit().unwrap();
})
});
group.bench_function("index-hdfs-no-commit-with-docstore", |b| {
let lines = get_lines(HDFS_LOGS);
b.iter(|| {
let index = Index::create_in_ram(schema_with_store.clone());
let index_writer: IndexWriter = index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
index_writer.add_document(doc).unwrap();
}
})
});
group.bench_function("index-hdfs-with-commit-with-docstore", |b| {
let lines = get_lines(HDFS_LOGS);
b.iter(|| {
let index = Index::create_in_ram(schema_with_store.clone());
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
index_writer.add_document(doc).unwrap();
}
index_writer.commit().unwrap();
})
});
group.bench_function("index-hdfs-no-commit-json-without-docstore", |b| {
let lines = get_lines(HDFS_LOGS);
b.iter(|| {
let index = Index::create_in_ram(dynamic_schema.clone());
let json_field = dynamic_schema.get_field("json").unwrap();
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let doc = tantivy::doc!(json_field=>json_val);
index_writer.add_document(doc).unwrap();
}
index_writer.commit().unwrap();
})
});
}
}
}
pub fn gh_index_benchmark(c: &mut Criterion) {
@@ -107,39 +161,24 @@ pub fn gh_index_benchmark(c: &mut Criterion) {
schema_builder.add_json_field("json", TEXT | FAST);
schema_builder.build()
};
let dynamic_schema_fast = {
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
schema_builder.add_json_field("json", FAST);
schema_builder.build()
};
let mut group = c.benchmark_group("index-gh");
group.throughput(Throughput::Bytes(GH_LOGS.len() as u64));
group.bench_function("index-gh-no-commit", |b| {
let lines = get_lines(GH_LOGS);
b.iter(|| {
let json_field = dynamic_schema.get_field("json").unwrap();
let index = Index::create_in_ram(dynamic_schema.clone());
let index_writer: IndexWriter = index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let doc = tantivy::doc!(json_field=>json_val);
index_writer.add_document(doc).unwrap();
}
})
benchmark_dynamic_json(b, GH_LOGS, dynamic_schema.clone(), false, false)
});
group.bench_function("index-gh-with-commit", |b| {
let lines = get_lines(GH_LOGS);
b.iter(|| {
let json_field = dynamic_schema.get_field("json").unwrap();
let index = Index::create_in_ram(dynamic_schema.clone());
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let doc = tantivy::doc!(json_field=>json_val);
index_writer.add_document(doc).unwrap();
}
index_writer.commit().unwrap();
})
group.bench_function("index-gh-fast", |b| {
benchmark_dynamic_json(b, GH_LOGS, dynamic_schema_fast.clone(), false, false)
});
group.bench_function("index-gh-fast-with-commit", |b| {
benchmark_dynamic_json(b, GH_LOGS, dynamic_schema_fast.clone(), true, false)
});
}
@@ -154,34 +193,10 @@ pub fn wiki_index_benchmark(c: &mut Criterion) {
group.throughput(Throughput::Bytes(WIKI.len() as u64));
group.bench_function("index-wiki-no-commit", |b| {
let lines = get_lines(WIKI);
b.iter(|| {
let json_field = dynamic_schema.get_field("json").unwrap();
let index = Index::create_in_ram(dynamic_schema.clone());
let index_writer: IndexWriter = index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let doc = tantivy::doc!(json_field=>json_val);
index_writer.add_document(doc).unwrap();
}
})
benchmark_dynamic_json(b, WIKI, dynamic_schema.clone(), false, false)
});
group.bench_function("index-wiki-with-commit", |b| {
let lines = get_lines(WIKI);
b.iter(|| {
let json_field = dynamic_schema.get_field("json").unwrap();
let index = Index::create_in_ram(dynamic_schema.clone());
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let doc = tantivy::doc!(json_field=>json_val);
index_writer.add_document(doc).unwrap();
}
index_writer.commit().unwrap();
})
benchmark_dynamic_json(b, WIKI, dynamic_schema.clone(), true, false)
});
}
@@ -192,12 +207,12 @@ criterion_group! {
}
criterion_group! {
name = gh_benches;
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
config = Criterion::default();
targets = gh_index_benchmark
}
criterion_group! {
name = wiki_benches;
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
config = Criterion::default();
targets = wiki_index_benchmark
}
criterion_main!(benches, gh_benches, wiki_benches);

View File

@@ -15,7 +15,7 @@ homepage = "https://github.com/quickwit-oss/tantivy"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
bitpacking = {version="0.8", default-features=false, features = ["bitpacker1x"]}
bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker1x"] }
[dev-dependencies]
rand = "0.8"

View File

@@ -9,8 +9,7 @@ description = "column oriented storage for tantivy"
categories = ["database-implementations", "data-structures", "compression"]
[dependencies]
itertools = "0.11.0"
fnv = "1.0.7"
itertools = "0.12.0"
fastdivide = "0.4.0"
stacker = { version= "0.2", path = "../stacker", package="tantivy-stacker"}

View File

@@ -8,7 +8,6 @@ license = "MIT"
columnar = {path="../", package="tantivy-columnar"}
serde_json = "1"
serde_json_borrow = {git="https://github.com/PSeitz/serde_json_borrow/"}
serde = "1"
[workspace]
members = []

View File

@@ -58,7 +58,7 @@ impl ColumnType {
self == &ColumnType::DateTime
}
pub(crate) fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> {
pub fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> {
COLUMN_TYPES.get(code as usize).copied().ok_or(InvalidData)
}
}

View File

@@ -269,7 +269,8 @@ impl StrOrBytesColumnWriter {
dictionaries: &mut [DictionaryBuilder],
arena: &mut MemoryArena,
) {
let unordered_id = dictionaries[self.dictionary_id as usize].get_or_allocate_id(bytes);
let unordered_id =
dictionaries[self.dictionary_id as usize].get_or_allocate_id(bytes, arena);
self.column_writer.record(doc, unordered_id, arena);
}

View File

@@ -333,7 +333,7 @@ impl ColumnarWriter {
num_docs: RowId,
old_to_new_row_ids: Option<&[RowId]>,
wrt: &mut dyn io::Write,
) -> io::Result<()> {
) -> io::Result<Vec<(String, ColumnType)>> {
let mut serializer = ColumnarSerializer::new(wrt);
let mut columns: Vec<(&[u8], ColumnType, Addr)> = self
.numerical_field_hash_map
@@ -374,7 +374,9 @@ impl ColumnarWriter {
let (arena, buffers, dictionaries) = (&self.arena, &mut self.buffers, &self.dictionaries);
let mut symbol_byte_buffer: Vec<u8> = Vec::new();
for (column_name, column_type, addr) in columns {
for (column_name, column_type, addr) in columns.iter() {
let column_type = *column_type;
let addr = *addr;
match column_type {
ColumnType::Bool => {
let column_writer: ColumnWriter = self.bool_field_hash_map.read(addr);
@@ -437,6 +439,7 @@ impl ColumnarWriter {
&mut symbol_byte_buffer,
),
buffers,
&self.arena,
&mut column_serializer,
)?;
column_serializer.finalize()?;
@@ -484,12 +487,21 @@ impl ColumnarWriter {
};
}
serializer.finalize(num_docs)?;
Ok(())
Ok(columns
.into_iter()
.map(|(column_name, column_type, _)| {
(
String::from_utf8_lossy(column_name).to_string(),
column_type,
)
})
.collect())
}
}
// Serialize [Dictionary, Column, dictionary num bytes U32::LE]
// Column: [Column Index, Column Values, column index num bytes U32::LE]
#[allow(clippy::too_many_arguments)]
fn serialize_bytes_or_str_column(
cardinality: Cardinality,
num_docs: RowId,
@@ -497,6 +509,7 @@ fn serialize_bytes_or_str_column(
dictionary_builder: &DictionaryBuilder,
operation_it: impl Iterator<Item = ColumnOperation<UnorderedId>>,
buffers: &mut SpareBuffers,
arena: &MemoryArena,
wrt: impl io::Write,
) -> io::Result<()> {
let SpareBuffers {
@@ -505,7 +518,8 @@ fn serialize_bytes_or_str_column(
..
} = buffers;
let mut counting_writer = CountingWriter::wrap(wrt);
let term_id_mapping: TermIdMapping = dictionary_builder.serialize(&mut counting_writer)?;
let term_id_mapping: TermIdMapping =
dictionary_builder.serialize(arena, &mut counting_writer)?;
let dictionary_num_bytes: u32 = counting_writer.written_bytes() as u32;
let mut wrt = counting_writer.finish();
let operation_iterator = operation_it.map(|symbol: ColumnOperation<UnorderedId>| {

View File

@@ -1,7 +1,7 @@
use std::io;
use fnv::FnvHashMap;
use sstable::SSTable;
use stacker::{MemoryArena, SharedArenaHashMap};
pub(crate) struct TermIdMapping {
unordered_to_ord: Vec<OrderedId>,
@@ -31,29 +31,38 @@ pub struct OrderedId(pub u32);
/// mapping.
#[derive(Default)]
pub(crate) struct DictionaryBuilder {
dict: FnvHashMap<Vec<u8>, UnorderedId>,
memory_consumption: usize,
dict: SharedArenaHashMap,
}
impl DictionaryBuilder {
/// Get or allocate an unordered id.
/// (This ID is simply an auto-incremented id.)
pub fn get_or_allocate_id(&mut self, term: &[u8]) -> UnorderedId {
if let Some(term_id) = self.dict.get(term) {
return *term_id;
}
let new_id = UnorderedId(self.dict.len() as u32);
self.dict.insert(term.to_vec(), new_id);
self.memory_consumption += term.len();
self.memory_consumption += 40; // Term Metadata + HashMap overhead
new_id
pub fn get_or_allocate_id(&mut self, term: &[u8], arena: &mut MemoryArena) -> UnorderedId {
let next_id = self.dict.len() as u32;
let unordered_id = self
.dict
.mutate_or_create(term, arena, |unordered_id: Option<u32>| {
if let Some(unordered_id) = unordered_id {
unordered_id
} else {
next_id
}
});
UnorderedId(unordered_id)
}
/// Serialize the dictionary into an fst, and returns the
/// `UnorderedId -> TermOrdinal` map.
pub fn serialize<'a, W: io::Write + 'a>(&self, wrt: &mut W) -> io::Result<TermIdMapping> {
let mut terms: Vec<(&[u8], UnorderedId)> =
self.dict.iter().map(|(k, v)| (k.as_slice(), *v)).collect();
pub fn serialize<'a, W: io::Write + 'a>(
&self,
arena: &MemoryArena,
wrt: &mut W,
) -> io::Result<TermIdMapping> {
let mut terms: Vec<(&[u8], UnorderedId)> = self
.dict
.iter(arena)
.map(|(k, v)| (k, arena.read(v)))
.collect();
terms.sort_unstable_by_key(|(key, _)| *key);
// TODO Remove the allocation.
let mut unordered_to_ord: Vec<OrderedId> = vec![OrderedId(0u32); terms.len()];
@@ -68,7 +77,7 @@ impl DictionaryBuilder {
}
pub(crate) fn mem_usage(&self) -> usize {
self.memory_consumption
self.dict.mem_usage()
}
}
@@ -78,12 +87,13 @@ mod tests {
#[test]
fn test_dictionary_builder() {
let mut arena = MemoryArena::default();
let mut dictionary_builder = DictionaryBuilder::default();
let hello_uid = dictionary_builder.get_or_allocate_id(b"hello");
let happy_uid = dictionary_builder.get_or_allocate_id(b"happy");
let tax_uid = dictionary_builder.get_or_allocate_id(b"tax");
let hello_uid = dictionary_builder.get_or_allocate_id(b"hello", &mut arena);
let happy_uid = dictionary_builder.get_or_allocate_id(b"happy", &mut arena);
let tax_uid = dictionary_builder.get_or_allocate_id(b"tax", &mut arena);
let mut buffer = Vec::new();
let id_mapping = dictionary_builder.serialize(&mut buffer).unwrap();
let id_mapping = dictionary_builder.serialize(&arena, &mut buffer).unwrap();
assert_eq!(id_mapping.to_ord(hello_uid), OrderedId(1));
assert_eq!(id_mapping.to_ord(happy_uid), OrderedId(0));
assert_eq!(id_mapping.to_ord(tax_uid), OrderedId(2));

View File

@@ -1,3 +1,22 @@
//! # Tantivy-Columnar
//!
//! `tantivy-columnar`provides a columnar storage for tantivy.
//! The crate allows for efficient read operations on specific columns rather than entire records.
//!
//! ## Overview
//!
//! - **columnar**: Reading, writing, and merging multiple columns:
//! - **[ColumnarWriter]**: Makes it possible to create a new columnar.
//! - **[ColumnarReader]**: The ColumnarReader makes it possible to access a set of columns
//! associated to field names.
//! - **[merge_columnar]**: Contains the functionalities to merge multiple ColumnarReader or
//! segments into a single one.
//!
//! - **column**: A single column, which contains
//! - [column_index]: Resolves the rows for a document id. Manages the cardinality of the
//! column.
//! - [column_values]: Stores the values of a column in a dense format.
#![cfg_attr(all(feature = "unstable", test), feature(test))]
#[cfg(test)]

View File

@@ -26,7 +26,7 @@ fn test_dataframe_writer_str() {
assert_eq!(columnar.num_columns(), 1);
let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
assert_eq!(cols.len(), 1);
assert_eq!(cols[0].num_bytes(), 99);
assert_eq!(cols[0].num_bytes(), 73);
}
#[test]
@@ -40,7 +40,7 @@ fn test_dataframe_writer_bytes() {
assert_eq!(columnar.num_columns(), 1);
let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
assert_eq!(cols.len(), 1);
assert_eq!(cols[0].num_bytes(), 99);
assert_eq!(cols[0].num_bytes(), 73);
}
#[test]

View File

@@ -6,7 +6,7 @@ use ownedbytes::OwnedBytes;
use crate::ByteCount;
#[derive(Clone, Copy, Eq, PartialEq)]
#[derive(Clone, Copy, Eq, PartialEq, Hash)]
pub struct TinySet(u64);
impl fmt::Debug for TinySet {

View File

@@ -1,5 +1,5 @@
use std::convert::TryInto;
use std::ops::Deref;
use std::ops::{Deref, Range};
use std::sync::Arc;
use std::{fmt, io};
@@ -37,7 +37,7 @@ impl OwnedBytes {
/// creates a fileslice that is just a view over a slice of the data.
#[must_use]
#[inline]
pub fn slice(&self, range: impl std::slice::SliceIndex<[u8], Output = [u8]>) -> Self {
pub fn slice(&self, range: Range<usize>) -> Self {
OwnedBytes {
data: &self.data[range],
box_stable_deref: self.box_stable_deref.clone(),

View File

@@ -624,6 +624,65 @@ fn test_aggregation_on_json_object() {
);
}
#[test]
fn test_aggregation_on_nested_json_object() {
let mut schema_builder = Schema::builder();
let json = schema_builder.add_json_field("json.blub", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
index_writer
.add_document(doc!(json => json!({"color.dot": "red", "color": {"nested":"red"} })))
.unwrap();
index_writer
.add_document(doc!(json => json!({"color.dot": "blue", "color": {"nested":"blue"} })))
.unwrap();
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let agg: Aggregations = serde_json::from_value(json!({
"jsonagg1": {
"terms": {
"field": "json\\.blub.color\\.dot",
}
},
"jsonagg2": {
"terms": {
"field": "json\\.blub.color.nested",
}
}
}))
.unwrap();
let aggregation_collector = get_collector(agg);
let aggregation_results = searcher.search(&AllQuery, &aggregation_collector).unwrap();
let aggregation_res_json = serde_json::to_value(aggregation_results).unwrap();
assert_eq!(
&aggregation_res_json,
&serde_json::json!({
"jsonagg1": {
"buckets": [
{"doc_count": 1, "key": "blue"},
{"doc_count": 1, "key": "red"}
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0
},
"jsonagg2": {
"buckets": [
{"doc_count": 1, "key": "blue"},
{"doc_count": 1, "key": "red"}
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0
}
})
);
}
#[test]
fn test_aggregation_on_json_object_empty_columns() {
let mut schema_builder = Schema::builder();

View File

@@ -23,6 +23,7 @@ use crate::reader::{IndexReader, IndexReaderBuilder};
use crate::schema::document::Document;
use crate::schema::{Field, FieldType, Schema};
use crate::tokenizer::{TextAnalyzer, TokenizerManager};
use crate::{merge_field_meta_data, FieldMetadata, SegmentReader};
fn load_metas(
directory: &dyn Directory,
@@ -489,6 +490,28 @@ impl Index {
self.inventory.all()
}
/// Returns the list of fields that have been indexed in the Index.
/// The field list includes the field defined in the schema as well as the fields
/// that have been indexed as a part of a JSON field.
/// The returned field name is the full field name, including the name of the JSON field.
///
/// The returned field names can be used in queries.
///
/// Notice: If your data contains JSON fields this is **very expensive**, as it requires
/// browsing through the inverted index term dictionary and the columnar field dictionary.
///
/// Disclaimer: Some fields may not be listed here. For instance, if the schema contains a json
/// field that is not indexed nor a fast field but is stored, it is possible for the field
/// to not be listed.
pub fn fields_metadata(&self) -> crate::Result<Vec<FieldMetadata>> {
let segments = self.searchable_segments()?;
let fields_metadata: Vec<Vec<FieldMetadata>> = segments
.into_iter()
.map(|segment| SegmentReader::open(&segment)?.fields_metadata())
.collect::<Result<_, _>>()?;
Ok(merge_field_meta_data(fields_metadata, &self.schema()))
}
/// Creates a new segment_meta (Advanced user only).
///
/// As long as the `SegmentMeta` lives, the files associated with the

View File

@@ -142,6 +142,7 @@ impl SegmentMeta {
SegmentComponent::FastFields => ".fast".to_string(),
SegmentComponent::FieldNorms => ".fieldnorm".to_string(),
SegmentComponent::Delete => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
SegmentComponent::FieldList => ".fieldlist".to_string(),
});
PathBuf::from(path)
}

View File

@@ -70,12 +70,12 @@ impl InvertedIndexReader {
&self.termdict
}
/// Return the fields and types encoded in the dictionary in lexicographic oder.
/// Return the fields and types encoded in the dictionary in lexicographic order.
/// Only valid on JSON fields.
///
/// Notice: This requires a full scan and therefore **very expensive**.
/// TODO: Move to sstable to use the index.
pub fn list_fields(&self) -> io::Result<Vec<(String, Type)>> {
pub fn list_encoded_fields(&self) -> io::Result<Vec<(String, Type)>> {
let mut stream = self.termdict.stream()?;
let mut fields = Vec::new();
let mut fields_set = FnvHashSet::default();

View File

@@ -1,4 +1,4 @@
use columnar::MonotonicallyMappableToU64;
use columnar::{ColumnType, MonotonicallyMappableToU64};
use common::{replace_in_place, JsonPathWriter};
use rustc_hash::FxHashMap;
@@ -62,6 +62,14 @@ impl IndexingPositionsPerPath {
}
}
/// Convert JSON_PATH_SEGMENT_SEP to a dot.
pub fn json_path_sep_to_dot(path: &mut str) {
// This is safe since we are replacing a ASCII character by another ASCII character.
unsafe {
replace_in_place(JSON_PATH_SEGMENT_SEP, b'.', path.as_bytes_mut());
}
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn index_json_values<'a, V: Value<'a>>(
doc: DocId,
@@ -145,7 +153,7 @@ fn index_json_value<'a, V: Value<'a>>(
let mut token_stream = text_analyzer.token_stream(val);
let unordered_id = ctx
.path_to_unordered_id
.get_or_allocate_unordered_id(json_path_writer.as_str());
.get_or_allocate_unordered_id(json_path_writer.as_str(), ColumnType::Str);
// TODO: make sure the chain position works out.
set_path_id(term_buffer, unordered_id);
@@ -163,7 +171,7 @@ fn index_json_value<'a, V: Value<'a>>(
set_path_id(
term_buffer,
ctx.path_to_unordered_id
.get_or_allocate_unordered_id(json_path_writer.as_str()),
.get_or_allocate_unordered_id(json_path_writer.as_str(), ColumnType::U64),
);
term_buffer.append_type_and_fast_value(val);
postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
@@ -172,7 +180,7 @@ fn index_json_value<'a, V: Value<'a>>(
set_path_id(
term_buffer,
ctx.path_to_unordered_id
.get_or_allocate_unordered_id(json_path_writer.as_str()),
.get_or_allocate_unordered_id(json_path_writer.as_str(), ColumnType::I64),
);
term_buffer.append_type_and_fast_value(val);
postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
@@ -181,7 +189,7 @@ fn index_json_value<'a, V: Value<'a>>(
set_path_id(
term_buffer,
ctx.path_to_unordered_id
.get_or_allocate_unordered_id(json_path_writer.as_str()),
.get_or_allocate_unordered_id(json_path_writer.as_str(), ColumnType::F64),
);
term_buffer.append_type_and_fast_value(val);
postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
@@ -190,7 +198,7 @@ fn index_json_value<'a, V: Value<'a>>(
set_path_id(
term_buffer,
ctx.path_to_unordered_id
.get_or_allocate_unordered_id(json_path_writer.as_str()),
.get_or_allocate_unordered_id(json_path_writer.as_str(), ColumnType::Bool),
);
term_buffer.append_type_and_fast_value(val);
postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
@@ -198,8 +206,10 @@ fn index_json_value<'a, V: Value<'a>>(
ReferenceValueLeaf::Date(val) => {
set_path_id(
term_buffer,
ctx.path_to_unordered_id
.get_or_allocate_unordered_id(json_path_writer.as_str()),
ctx.path_to_unordered_id.get_or_allocate_unordered_id(
json_path_writer.as_str(),
ColumnType::DateTime,
),
);
term_buffer.append_type_and_fast_value(val);
postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
@@ -320,7 +330,7 @@ pub struct JsonTermWriter<'a> {
/// In other words,
/// - `k8s.node` ends up as `["k8s", "node"]`.
/// - `k8s\.node` ends up as `["k8s.node"]`.
fn split_json_path(json_path: &str) -> Vec<String> {
pub fn split_json_path(json_path: &str) -> Vec<String> {
let mut escaped_state: bool = false;
let mut json_path_segments = Vec::new();
let mut buffer = String::new();

View File

@@ -25,7 +25,7 @@ pub use self::searcher::{Searcher, SearcherGeneration};
pub use self::segment::Segment;
pub use self::segment_component::SegmentComponent;
pub use self::segment_id::SegmentId;
pub use self::segment_reader::SegmentReader;
pub use self::segment_reader::{merge_field_meta_data, FieldMetadata, SegmentReader};
pub use self::single_segment_index_writer::SingleSegmentIndexWriter;
/// The meta file contains all the information about the list of segments and the schema

View File

@@ -27,12 +27,14 @@ pub enum SegmentComponent {
/// Bitset describing which document of the segment is alive.
/// (It was representing deleted docs but changed to represent alive docs from v0.17)
Delete,
/// Field list describing the fields in the segment.
FieldList,
}
impl SegmentComponent {
/// Iterates through the components.
pub fn iterator() -> slice::Iter<'static, SegmentComponent> {
static SEGMENT_COMPONENTS: [SegmentComponent; 8] = [
static SEGMENT_COMPONENTS: [SegmentComponent; 9] = [
SegmentComponent::Postings,
SegmentComponent::Positions,
SegmentComponent::FastFields,
@@ -41,6 +43,7 @@ impl SegmentComponent {
SegmentComponent::Store,
SegmentComponent::TempStore,
SegmentComponent::Delete,
SegmentComponent::FieldList,
];
SEGMENT_COMPONENTS.iter()
}

View File

@@ -1,11 +1,15 @@
use std::collections::HashMap;
use std::ops::BitOrAssign;
use std::sync::{Arc, RwLock};
use std::{fmt, io};
use itertools::Itertools;
use crate::core::{InvertedIndexReader, Segment, SegmentComponent, SegmentId};
use crate::directory::{CompositeFile, FileSlice};
use crate::error::DataCorruption;
use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders};
use crate::field_list::read_split_fields;
use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
use crate::schema::{Field, IndexRecordOption, Schema, Type};
use crate::space_usage::SegmentSpaceUsage;
@@ -39,6 +43,7 @@ pub struct SegmentReader {
fast_fields_readers: FastFieldReaders,
fieldnorm_readers: FieldNormReaders,
list_fields_file: Option<FileSlice>, // Optional field list file for backwards compatibility
store_file: FileSlice,
alive_bitset_opt: Option<AliveBitSet>,
schema: Schema,
@@ -148,6 +153,7 @@ impl SegmentReader {
let termdict_composite = CompositeFile::open(&termdict_file)?;
let store_file = segment.open_read(SegmentComponent::Store)?;
let list_fields_file = segment.open_read(SegmentComponent::FieldList).ok();
crate::fail_point!("SegmentReader::open#middle");
@@ -196,6 +202,7 @@ impl SegmentReader {
segment_id: segment.id(),
delete_opstamp: segment.meta().delete_opstamp(),
store_file,
list_fields_file,
alive_bitset_opt,
positions_composite,
schema,
@@ -280,6 +287,41 @@ impl SegmentReader {
Ok(inv_idx_reader)
}
/// Returns the list of fields that have been indexed in the segment.
/// The field list includes the field defined in the schema as well as the fields
/// that have been indexed as a part of a JSON field.
/// The returned field name is the full field name, including the name of the JSON field.
///
/// The returned field names can be used in queries.
///
/// Notice: If your data contains JSON fields this is **very expensive**, as it requires
/// browsing through the inverted index term dictionary and the columnar field dictionary.
///
/// Disclaimer: Some fields may not be listed here. For instance, if the schema contains a json
/// field that is not indexed nor a fast field but is stored, it is possible for the field
/// to not be listed.
pub fn fields_metadata(&self) -> crate::Result<Vec<FieldMetadata>> {
if let Some(list_fields_file) = self.list_fields_file.as_ref() {
let file = list_fields_file.read_bytes()?;
let fields_metadata =
read_split_fields(file)?.collect::<io::Result<Vec<FieldMetadata>>>();
fields_metadata.map_err(|e| e.into())
} else {
// Schema fallback
Ok(self
.schema()
.fields()
.map(|(_field, entry)| FieldMetadata {
field_name: entry.name().to_string(),
typ: entry.field_type().value_type(),
indexed: entry.is_indexed(),
stored: entry.is_stored(),
fast: entry.is_fast(),
})
.collect())
}
}
/// Returns the segment id
pub fn segment_id(&self) -> SegmentId {
self.segment_id
@@ -330,6 +372,65 @@ impl SegmentReader {
}
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
/// FieldMetadata
pub struct FieldMetadata {
/// The field name
// Notice: Don't reorder the declaration of 1.field_name 2.typ, as it is used for ordering by
// field_name then typ.
pub field_name: String,
/// The field type
// Notice: Don't reorder the declaration of 1.field_name 2.typ, as it is used for ordering by
// field_name then typ.
pub typ: Type,
/// Is the field indexed for search
pub indexed: bool,
/// Is the field stored in the doc store
pub stored: bool,
/// Is the field stored in the columnar storage
pub fast: bool,
}
impl BitOrAssign for FieldMetadata {
fn bitor_assign(&mut self, rhs: Self) {
assert!(self.field_name == rhs.field_name);
assert!(self.typ == rhs.typ);
self.indexed |= rhs.indexed;
self.stored |= rhs.stored;
self.fast |= rhs.fast;
}
}
// Maybe too slow for the high cardinality case
fn is_field_stored(field_name: &str, schema: &Schema) -> bool {
schema
.find_field(field_name)
.map(|(field, _path)| schema.get_field_entry(field).is_stored())
.unwrap_or(false)
}
/// Helper to merge the field metadata from multiple segments.
pub fn merge_field_meta_data(
field_metadatas: Vec<Vec<FieldMetadata>>,
schema: &Schema,
) -> Vec<FieldMetadata> {
let mut merged_field_metadata = Vec::new();
for (_key, mut group) in &field_metadatas
.into_iter()
.kmerge_by(|left, right| left < right)
// TODO: Remove allocation
.group_by(|el| (el.field_name.to_string(), el.typ))
{
let mut merged: FieldMetadata = group.next().unwrap();
for el in group {
merged |= el;
}
// Currently is_field_stored is maybe too slow for the high cardinality case
merged.stored = is_field_stored(&merged.field_name, schema);
merged_field_metadata.push(merged);
}
merged_field_metadata
}
fn intersect_alive_bitset(
left_opt: Option<AliveBitSet>,
right_opt: Option<AliveBitSet>,
@@ -353,9 +454,127 @@ impl fmt::Debug for SegmentReader {
#[cfg(test)]
mod test {
use super::*;
use crate::core::Index;
use crate::schema::{Schema, Term, STORED, TEXT};
use crate::{DocId, IndexWriter};
use crate::schema::{Schema, SchemaBuilder, Term, STORED, TEXT};
use crate::{DocId, FieldMetadata, IndexWriter};
#[test]
fn test_merge_field_meta_data_same() {
let schema = SchemaBuilder::new().build();
let field_metadata1 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: true,
stored: false,
fast: true,
};
let field_metadata2 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: true,
stored: false,
fast: true,
};
let res = merge_field_meta_data(
vec![vec![field_metadata1.clone()], vec![field_metadata2]],
&schema,
);
assert_eq!(res, vec![field_metadata1]);
}
#[test]
fn test_merge_field_meta_data_different() {
let schema = SchemaBuilder::new().build();
let field_metadata1 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: false,
stored: false,
fast: true,
};
let field_metadata2 = FieldMetadata {
field_name: "b".to_string(),
typ: crate::schema::Type::Str,
indexed: false,
stored: false,
fast: true,
};
let field_metadata3 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: true,
stored: false,
fast: false,
};
let res = merge_field_meta_data(
vec![
vec![field_metadata1.clone(), field_metadata2.clone()],
vec![field_metadata3],
],
&schema,
);
let field_metadata_expected1 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: true,
stored: false,
fast: true,
};
assert_eq!(res, vec![field_metadata_expected1, field_metadata2.clone()]);
}
#[test]
fn test_merge_field_meta_data_merge() {
use pretty_assertions::assert_eq;
let get_meta_data = |name: &str, typ: Type| FieldMetadata {
field_name: name.to_string(),
typ,
indexed: false,
stored: false,
fast: true,
};
let schema = SchemaBuilder::new().build();
let mut metas = vec![get_meta_data("d", Type::Str), get_meta_data("e", Type::U64)];
metas.sort();
let res = merge_field_meta_data(vec![vec![get_meta_data("e", Type::Str)], metas], &schema);
assert_eq!(
res,
vec![
get_meta_data("d", Type::Str),
get_meta_data("e", Type::Str),
get_meta_data("e", Type::U64),
]
);
}
#[test]
fn test_merge_field_meta_data_bitxor() {
let field_metadata1 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: false,
stored: false,
fast: true,
};
let field_metadata2 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: true,
stored: false,
fast: false,
};
let field_metadata_expected = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: true,
stored: false,
fast: true,
};
let mut res1 = field_metadata1.clone();
res1 |= field_metadata2.clone();
let mut res2 = field_metadata2.clone();
res2 |= field_metadata1;
assert_eq!(res1, field_metadata_expected);
assert_eq!(res2, field_metadata_expected);
}
#[test]
fn test_num_alive() -> crate::Result<()> {

View File

@@ -1,12 +1,13 @@
use crate::collector::Count;
use crate::directory::{RamDirectory, WatchCallback};
use crate::indexer::NoMergePolicy;
use crate::indexer::{LogMergePolicy, NoMergePolicy};
use crate::json_utils::JsonTermWriter;
use crate::query::TermQuery;
use crate::schema::{Field, IndexRecordOption, Schema, INDEXED, STRING, TEXT};
use crate::schema::{Field, IndexRecordOption, Schema, Type, INDEXED, STRING, TEXT};
use crate::tokenizer::TokenizerManager;
use crate::{
Directory, Index, IndexBuilder, IndexReader, IndexSettings, IndexWriter, ReloadPolicy,
SegmentId, TantivyDocument, Term,
Directory, DocSet, Index, IndexBuilder, IndexReader, IndexSettings, IndexWriter, Postings,
ReloadPolicy, SegmentId, TantivyDocument, Term,
};
#[test]
@@ -344,3 +345,132 @@ fn test_merging_segment_update_docfreq() {
let term_info = inv_index.get_term_info(&term).unwrap().unwrap();
assert_eq!(term_info.doc_freq, 12);
}
// motivated by https://github.com/quickwit-oss/quickwit/issues/4130
#[test]
fn test_positions_merge_bug_non_text_json_vint() {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_json_field("dynamic", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let mut writer: IndexWriter = index.writer_for_tests().unwrap();
let mut merge_policy = LogMergePolicy::default();
merge_policy.set_min_num_segments(2);
writer.set_merge_policy(Box::new(merge_policy));
// Here a string would work.
let doc_json = r#"{"tenant_id":75}"#;
let vals = serde_json::from_str(doc_json).unwrap();
let mut doc = TantivyDocument::default();
doc.add_object(field, vals);
writer.add_document(doc.clone()).unwrap();
writer.commit().unwrap();
writer.add_document(doc.clone()).unwrap();
writer.commit().unwrap();
writer.wait_merging_threads().unwrap();
let reader = index.reader().unwrap();
assert_eq!(reader.searcher().segment_readers().len(), 1);
}
// Same as above but with bitpacked blocks
#[test]
fn test_positions_merge_bug_non_text_json_bitpacked_block() {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_json_field("dynamic", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let mut writer: IndexWriter = index.writer_for_tests().unwrap();
let mut merge_policy = LogMergePolicy::default();
merge_policy.set_min_num_segments(2);
writer.set_merge_policy(Box::new(merge_policy));
// Here a string would work.
let doc_json = r#"{"tenant_id":75}"#;
let vals = serde_json::from_str(doc_json).unwrap();
let mut doc = TantivyDocument::default();
doc.add_object(field, vals);
for _ in 0..128 {
writer.add_document(doc.clone()).unwrap();
}
writer.commit().unwrap();
writer.add_document(doc.clone()).unwrap();
writer.commit().unwrap();
writer.wait_merging_threads().unwrap();
let reader = index.reader().unwrap();
assert_eq!(reader.searcher().segment_readers().len(), 1);
}
#[test]
fn test_non_text_json_term_freq() {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_json_field("dynamic", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let mut writer: IndexWriter = index.writer_for_tests().unwrap();
// Here a string would work.
let doc_json = r#"{"tenant_id":75}"#;
let vals = serde_json::from_str(doc_json).unwrap();
let mut doc = TantivyDocument::default();
doc.add_object(field, vals);
writer.add_document(doc.clone()).unwrap();
writer.commit().unwrap();
let reader = index.reader().unwrap();
assert_eq!(reader.searcher().segment_readers().len(), 1);
let searcher = reader.searcher();
let segment_reader = searcher.segment_reader(0u32);
let inv_idx = segment_reader.inverted_index(field).unwrap();
let mut term = Term::with_type_and_field(Type::Json, field);
let mut json_term_writer = JsonTermWriter::wrap(&mut term, false);
json_term_writer.push_path_segment("tenant_id");
json_term_writer.close_path_and_set_type(Type::U64);
json_term_writer.set_fast_value(75u64);
let postings = inv_idx
.read_postings(
&json_term_writer.term(),
IndexRecordOption::WithFreqsAndPositions,
)
.unwrap()
.unwrap();
assert_eq!(postings.doc(), 0);
assert_eq!(postings.term_freq(), 1u32);
}
#[test]
fn test_non_text_json_term_freq_bitpacked() {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_json_field("dynamic", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let mut writer: IndexWriter = index.writer_for_tests().unwrap();
// Here a string would work.
let doc_json = r#"{"tenant_id":75}"#;
let vals = serde_json::from_str(doc_json).unwrap();
let mut doc = TantivyDocument::default();
doc.add_object(field, vals);
let num_docs = 132;
for _ in 0..num_docs {
writer.add_document(doc.clone()).unwrap();
}
writer.commit().unwrap();
let reader = index.reader().unwrap();
assert_eq!(reader.searcher().segment_readers().len(), 1);
let searcher = reader.searcher();
let segment_reader = searcher.segment_reader(0u32);
let inv_idx = segment_reader.inverted_index(field).unwrap();
let mut term = Term::with_type_and_field(Type::Json, field);
let mut json_term_writer = JsonTermWriter::wrap(&mut term, false);
json_term_writer.push_path_segment("tenant_id");
json_term_writer.close_path_and_set_type(Type::U64);
json_term_writer.set_fast_value(75u64);
let mut postings = inv_idx
.read_postings(
&json_term_writer.term(),
IndexRecordOption::WithFreqsAndPositions,
)
.unwrap()
.unwrap();
assert_eq!(postings.doc(), 0);
assert_eq!(postings.term_freq(), 1u32);
for i in 1..num_docs {
assert_eq!(postings.advance(), i);
assert_eq!(postings.term_freq(), 1u32);
}
}

View File

@@ -131,7 +131,7 @@ mod tests {
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 105);
assert_eq!(file.len(), 80);
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let column = fast_field_readers
.u64("field")
@@ -181,7 +181,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 133);
assert_eq!(file.len(), 108);
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let col = fast_field_readers
.u64("field")
@@ -214,7 +214,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 106);
assert_eq!(file.len(), 81);
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let fast_field_reader = fast_field_readers
.u64("field")
@@ -246,7 +246,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 4501);
assert_eq!(file.len(), 4476);
{
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let col = fast_field_readers
@@ -279,7 +279,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 277);
assert_eq!(file.len(), 252);
{
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
@@ -773,7 +773,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 114);
assert_eq!(file.len(), 84);
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
let bool_col = fast_field_readers.bool("field_bool").unwrap();
assert_eq!(bool_col.first(0), Some(true));
@@ -805,7 +805,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 126);
assert_eq!(file.len(), 96);
let readers = FastFieldReaders::open(file, schema).unwrap();
let bool_col = readers.bool("field_bool").unwrap();
for i in 0..25 {
@@ -830,7 +830,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 116);
assert_eq!(file.len(), 86);
let fastfield_readers = FastFieldReaders::open(file, schema).unwrap();
let col = fastfield_readers.bool("field_bool").unwrap();
assert_eq!(col.first(0), None);
@@ -1288,11 +1288,18 @@ mod tests {
index_writer.commit().unwrap();
let searcher = index.reader().unwrap().searcher();
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
// Supported for now, maybe dropped in the future.
let column = fast_field_reader
.column_opt::<i64>("jsonfield.attr.age")
.unwrap()
.unwrap();
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
assert_eq!(&vals, &[33]);
let column = fast_field_reader
.column_opt::<i64>("jsonfield\\.attr.age")
.unwrap()
.unwrap();
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
assert_eq!(&vals, &[33]);
}
}

View File

@@ -238,13 +238,17 @@ impl FastFieldsWriter {
mut self,
wrt: &mut dyn io::Write,
doc_id_map_opt: Option<&DocIdMapping>,
) -> io::Result<()> {
) -> io::Result<Vec<(String, Type)>> {
let num_docs = self.num_docs;
let old_to_new_row_ids =
doc_id_map_opt.map(|doc_id_mapping| doc_id_mapping.old_to_new_ids());
self.columnar_writer
let columns = self
.columnar_writer
.serialize(num_docs, old_to_new_row_ids, wrt)?;
Ok(())
Ok(columns
.into_iter()
.map(|(field_name, column)| (field_name.to_string(), column.into()))
.collect())
}
}

369
src/field_list/mod.rs Normal file
View File

@@ -0,0 +1,369 @@
//! The list of fields that are stored in a `tantivy` `Index`.
use std::collections::HashSet;
use std::io::{self, ErrorKind, Read};
use columnar::ColumnType;
use common::TinySet;
use fnv::FnvHashMap;
use crate::indexer::path_to_unordered_id::OrderedPathId;
use crate::json_utils::json_path_sep_to_dot;
use crate::postings::IndexingContext;
use crate::schema::{Field, Schema, Type};
use crate::{merge_field_meta_data, FieldMetadata, Term};
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
pub(crate) struct FieldConfig {
pub typ: Type,
pub indexed: bool,
pub stored: bool,
pub fast: bool,
}
impl FieldConfig {
fn serialize(&self) -> [u8; 2] {
let typ = self.typ.to_code();
let flags = (self.indexed as u8) << 2 | (self.stored as u8) << 1 | (self.fast as u8);
[typ, flags]
}
fn deserialize_from(data: [u8; 2]) -> io::Result<FieldConfig> {
let typ = Type::from_code(data[0]).ok_or_else(|| {
io::Error::new(
ErrorKind::InvalidData,
format!("could not deserialize type {}", data[0]),
)
})?;
let data = data[1];
let indexed = (data & 0b100) != 0;
let stored = (data & 0b010) != 0;
let fast = (data & 0b001) != 0;
Ok(FieldConfig {
typ,
indexed,
stored,
fast,
})
}
}
/// Serializes the split fields.
pub(crate) fn serialize_segment_fields(
ctx: IndexingContext,
wrt: &mut dyn io::Write,
schema: &Schema,
unordered_id_to_ordered_id: &[(OrderedPathId, TinySet)],
mut columns: Vec<(String, Type)>,
) -> crate::Result<()> {
let mut field_list_set: HashSet<(Field, OrderedPathId, TinySet)> = HashSet::default();
let mut encoded_fields = Vec::new();
let mut map_to_canonical = FnvHashMap::default();
// Replace unordered ids by ordered ids to be able to sort
let ordered_id_to_path = ctx.path_to_unordered_id.ordered_id_to_path();
for (key, _addr) in ctx.term_index.iter() {
let field = Term::wrap(key).field();
let field_entry = schema.get_field_entry(field);
if field_entry.field_type().value_type() == Type::Json {
let byte_range_unordered_id = 5..5 + 4;
let unordered_id =
u32::from_be_bytes(key[byte_range_unordered_id.clone()].try_into().unwrap());
let (path_id, typ_code_bitvec) = unordered_id_to_ordered_id[unordered_id as usize];
if !field_list_set.contains(&(field, path_id, typ_code_bitvec)) {
field_list_set.insert((field, path_id, typ_code_bitvec));
let mut build_path = |field_name: &str, mut json_path: String| {
// In this case we need to map the potential fast field to the field name
// accepted by the query parser.
let create_canonical =
!field_entry.is_expand_dots_enabled() && json_path.contains('.');
if create_canonical {
// Without expand dots enabled dots need to be escaped.
let escaped_json_path = json_path.replace('.', "\\.");
let full_path = format!("{}.{}", field_name, escaped_json_path);
let full_path_unescaped = format!("{}.{}", field_name, &json_path);
map_to_canonical.insert(full_path_unescaped, full_path.to_string());
full_path
} else {
// With expand dots enabled, we can use '.' instead of '\u{1}'.
json_path_sep_to_dot(&mut json_path);
format!("{}.{}", field_name, json_path)
}
};
let path = build_path(
field_entry.name(),
ordered_id_to_path[path_id.path_id() as usize].to_string(), /* String::from_utf8(key[5..].to_vec()).unwrap(), */
);
encoded_fields.push((path, typ_code_bitvec));
}
}
}
let mut indexed_fields: Vec<FieldMetadata> = Vec::new();
for (_field, field_entry) in schema.fields() {
let field_name = field_entry.name().to_string();
let is_indexed = field_entry.is_indexed();
let is_json = field_entry.field_type().value_type() == Type::Json;
if is_indexed && !is_json {
indexed_fields.push(FieldMetadata {
indexed: true,
stored: false,
field_name: field_name.to_string(),
fast: false,
typ: field_entry.field_type().value_type(),
});
}
}
for (field_name, field_type_set) in encoded_fields {
for field_type in field_type_set {
let column_type = ColumnType::try_from_code(field_type as u8).unwrap();
indexed_fields.push(FieldMetadata {
indexed: true,
stored: false,
field_name: field_name.to_string(),
fast: false,
typ: Type::from(column_type),
});
}
}
let mut fast_fields: Vec<FieldMetadata> = columns
.iter_mut()
.map(|(field_name, typ)| {
json_path_sep_to_dot(field_name);
// map to canonical path, to avoid similar but different entries.
// Eventually we should just accept '.' seperated for all cases.
let field_name = map_to_canonical
.get(field_name)
.unwrap_or(field_name)
.to_string();
FieldMetadata {
indexed: false,
stored: false,
field_name,
fast: true,
typ: *typ,
}
})
.collect();
// Since the type is encoded differently in the fast field and in the inverted index,
// the order of the fields is not guaranteed to be the same. Therefore, we sort the fields.
// If we are sure that the order is the same, we can remove this sort.
indexed_fields.sort_unstable();
fast_fields.sort_unstable();
let merged = merge_field_meta_data(vec![indexed_fields, fast_fields], schema);
let out = serialize_split_fields(&merged);
wrt.write_all(&out)?;
Ok(())
}
/// Serializes the Split fields.
///
/// `fields_metadata` has to be sorted.
pub fn serialize_split_fields(fields_metadata: &[FieldMetadata]) -> Vec<u8> {
// ensure that fields_metadata is strictly sorted.
debug_assert!(fields_metadata.windows(2).all(|w| w[0] < w[1]));
let mut payload = Vec::new();
// Write Num Fields
let length = fields_metadata.len() as u32;
payload.extend_from_slice(&length.to_le_bytes());
for field_metadata in fields_metadata {
write_field(field_metadata, &mut payload);
}
let compression_level = 3;
let payload_compressed = zstd::stream::encode_all(&mut &payload[..], compression_level)
.expect("zstd encoding failed");
let mut out = Vec::new();
// Write Header -- Format Version
let format_version = 1u8;
out.push(format_version);
// Write Payload
out.extend_from_slice(&payload_compressed);
out
}
fn write_field(field_metadata: &FieldMetadata, out: &mut Vec<u8>) {
let field_config = FieldConfig {
typ: field_metadata.typ,
indexed: field_metadata.indexed,
stored: field_metadata.stored,
fast: field_metadata.fast,
};
// Write Config 2 bytes
out.extend_from_slice(&field_config.serialize());
let str_length = field_metadata.field_name.len() as u16;
// Write String length 2 bytes
out.extend_from_slice(&str_length.to_le_bytes());
out.extend_from_slice(field_metadata.field_name.as_bytes());
}
/// Reads a fixed number of bytes into an array and returns the array.
fn read_exact_array<R: Read, const N: usize>(reader: &mut R) -> io::Result<[u8; N]> {
let mut buffer = [0u8; N];
reader.read_exact(&mut buffer)?;
Ok(buffer)
}
/// Reads the Split fields from a zstd compressed stream of bytes
pub fn read_split_fields<R: Read>(
mut reader: R,
) -> io::Result<impl Iterator<Item = io::Result<FieldMetadata>>> {
let format_version = read_exact_array::<_, 1>(&mut reader)?[0];
assert_eq!(format_version, 1);
let reader = zstd::Decoder::new(reader)?;
read_split_fields_from_zstd(reader)
}
fn read_field<R: Read>(reader: &mut R) -> io::Result<FieldMetadata> {
// Read FieldConfig (2 bytes)
let config_bytes = read_exact_array::<_, 2>(reader)?;
let field_config = FieldConfig::deserialize_from(config_bytes)?; // Assuming this returns a Result
// Read field name length and the field name
let name_len = u16::from_le_bytes(read_exact_array::<_, 2>(reader)?) as usize;
let mut data = vec![0; name_len];
reader.read_exact(&mut data)?;
let field_name = String::from_utf8(data).map_err(|err| {
io::Error::new(
ErrorKind::InvalidData,
format!(
"Encountered invalid utf8 when deserializing field name: {}",
err
),
)
})?;
Ok(FieldMetadata {
field_name,
typ: field_config.typ,
indexed: field_config.indexed,
stored: field_config.stored,
fast: field_config.fast,
})
}
/// Reads the Split fields from a stream of bytes
fn read_split_fields_from_zstd<R: Read>(
mut reader: R,
) -> io::Result<impl Iterator<Item = io::Result<FieldMetadata>>> {
let mut num_fields = u32::from_le_bytes(read_exact_array::<_, 4>(&mut reader)?);
Ok(std::iter::from_fn(move || {
if num_fields == 0 {
return None;
}
num_fields -= 1;
Some(read_field(&mut reader))
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn field_config_deser_test() {
let field_config = FieldConfig {
typ: Type::Str,
indexed: true,
stored: false,
fast: true,
};
let serialized = field_config.serialize();
let deserialized = FieldConfig::deserialize_from(serialized).unwrap();
assert_eq!(field_config, deserialized);
}
#[test]
fn write_read_field_test() {
for typ in Type::iter_values() {
let field_metadata = FieldMetadata {
field_name: "test".to_string(),
typ,
indexed: true,
stored: true,
fast: true,
};
let mut out = Vec::new();
write_field(&field_metadata, &mut out);
let deserialized = read_field(&mut &out[..]).unwrap();
assert_eq!(field_metadata, deserialized);
}
let field_metadata = FieldMetadata {
field_name: "test".to_string(),
typ: Type::Str,
indexed: false,
stored: true,
fast: true,
};
let mut out = Vec::new();
write_field(&field_metadata, &mut out);
let deserialized = read_field(&mut &out[..]).unwrap();
assert_eq!(field_metadata, deserialized);
let field_metadata = FieldMetadata {
field_name: "test".to_string(),
typ: Type::Str,
indexed: false,
stored: false,
fast: true,
};
let mut out = Vec::new();
write_field(&field_metadata, &mut out);
let deserialized = read_field(&mut &out[..]).unwrap();
assert_eq!(field_metadata, deserialized);
let field_metadata = FieldMetadata {
field_name: "test".to_string(),
typ: Type::Str,
indexed: true,
stored: false,
fast: false,
};
let mut out = Vec::new();
write_field(&field_metadata, &mut out);
let deserialized = read_field(&mut &out[..]).unwrap();
assert_eq!(field_metadata, deserialized);
}
#[test]
fn write_split_fields_test() {
let fields_metadata = vec![
FieldMetadata {
field_name: "test".to_string(),
typ: Type::Str,
indexed: true,
stored: true,
fast: true,
},
FieldMetadata {
field_name: "test2".to_string(),
typ: Type::Str,
indexed: true,
stored: false,
fast: false,
},
FieldMetadata {
field_name: "test3".to_string(),
typ: Type::U64,
indexed: true,
stored: false,
fast: true,
},
];
let out = serialize_split_fields(&fields_metadata);
let deserialized: Vec<FieldMetadata> = read_split_fields(&mut &out[..])
.unwrap()
.map(|el| el.unwrap())
.collect();
assert_eq!(fields_metadata, deserialized);
}
}

View File

@@ -1,3 +1,4 @@
use std::io::Write;
use std::sync::Arc;
use columnar::{
@@ -13,6 +14,7 @@ use crate::directory::WritePtr;
use crate::docset::{DocSet, TERMINATED};
use crate::error::DataCorruption;
use crate::fastfield::{AliveBitSet, FastFieldNotAvailableError};
use crate::field_list::serialize_split_fields;
use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping};
use crate::indexer::SegmentSerializer;
@@ -21,8 +23,8 @@ use crate::schema::{value_type_to_column_type, Field, FieldType, Schema};
use crate::store::StoreWriter;
use crate::termdict::{TermMerger, TermOrdinal};
use crate::{
DocAddress, DocId, IndexSettings, IndexSortByField, InvertedIndexReader, Order,
SegmentComponent, SegmentOrdinal,
merge_field_meta_data, DocAddress, DocId, FieldMetadata, IndexSettings, IndexSortByField,
InvertedIndexReader, Order, SegmentComponent, SegmentOrdinal,
};
/// Segment's max doc must be `< MAX_DOC_LIMIT`.
@@ -255,6 +257,19 @@ impl IndexMerger {
Ok(())
}
fn write_field_list(&self, list_field_wrt: &mut WritePtr) -> crate::Result<()> {
let field_metadatas: Vec<Vec<FieldMetadata>> = self
.readers
.iter()
.map(|reader| reader.fields_metadata())
.collect::<crate::Result<Vec<_>>>()?;
let merged = merge_field_meta_data(field_metadatas, &self.schema);
let out = serialize_split_fields(&merged);
list_field_wrt.write_all(&out)?;
Ok(())
}
fn write_fast_fields(
&self,
fast_field_wrt: &mut WritePtr,
@@ -552,7 +567,41 @@ impl IndexMerger {
continue;
}
field_serializer.new_term(term_bytes, total_doc_freq)?;
// This should never happen as we early exited for total_doc_freq == 0.
assert!(!segment_postings_containing_the_term.is_empty());
let has_term_freq = {
let has_term_freq = !segment_postings_containing_the_term[0]
.1
.block_cursor
.freqs()
.is_empty();
for (_, postings) in &segment_postings_containing_the_term[1..] {
// This may look at a strange way to test whether we have term freq or not.
// With JSON object, the schema is not sufficient to know whether a term
// has its term frequency encoded or not:
// strings may have term frequencies, while number terms never have one.
//
// Ideally, we should have burnt one bit of two in the `TermInfo`.
// However, we preferred not changing the codec too much and detect this
// instead by
// - looking at the size of the skip data for bitpacked blocks
// - observing the absence of remaining data after reading the docs for vint
// blocks.
//
// Overall the reliable way to know if we have actual frequencies loaded or not
// is to check whether the actual decoded array is empty or not.
if has_term_freq != !postings.block_cursor.freqs().is_empty() {
return Err(DataCorruption::comment_only(
"Term freqs are inconsistent across segments",
)
.into());
}
}
has_term_freq
};
field_serializer.new_term(term_bytes, total_doc_freq, has_term_freq)?;
// We can now serialize this postings, by pushing each document to the
// postings serializer.
@@ -567,8 +616,13 @@ impl IndexMerger {
if let Some(remapped_doc_id) = old_to_new_doc_id[doc as usize] {
// we make sure to only write the term if
// there is at least one document.
let term_freq = segment_postings.term_freq();
segment_postings.positions(&mut positions_buffer);
let term_freq = if has_term_freq {
segment_postings.positions(&mut positions_buffer);
segment_postings.term_freq()
} else {
0u32
};
// if doc_id_mapping exists, the doc_ids are reordered, they are
// not just stacked. The field serializer expects monotonically increasing
// doc_ids, so we collect and sort them first, before writing.
@@ -734,6 +788,7 @@ impl IndexMerger {
self.write_storable_fields(serializer.get_store_writer(), &doc_id_mapping)?;
debug!("write-fastfields");
self.write_fast_fields(serializer.get_fast_field_write(), doc_id_mapping)?;
self.write_field_list(serializer.get_field_list_write())?;
debug!("close-serializer");
serializer.close()?;

View File

@@ -59,10 +59,13 @@ type AddBatchReceiver<D> = channel::Receiver<AddBatch<D>>;
#[cfg(test)]
mod tests_mmap {
use crate::collector::Count;
use crate::query::QueryParser;
use crate::schema::{JsonObjectOptions, Schema, Type, TEXT};
use crate::{Index, IndexWriter, Term};
use crate::aggregation::agg_req::Aggregations;
use crate::aggregation::agg_result::AggregationResults;
use crate::aggregation::AggregationCollector;
use crate::collector::{Count, TopDocs};
use crate::query::{AllQuery, QueryParser};
use crate::schema::{JsonObjectOptions, Schema, Type, FAST, INDEXED, STORED, TEXT};
use crate::{FieldMetadata, Index, IndexWriter, Term};
#[test]
fn test_advance_delete_bug() -> crate::Result<()> {
@@ -173,8 +176,7 @@ mod tests_mmap {
#[test]
fn test_json_field_list_fields() {
let mut schema_builder = Schema::builder();
let json_options: JsonObjectOptions =
JsonObjectOptions::from(TEXT).set_expand_dots_enabled();
let json_options: JsonObjectOptions = JsonObjectOptions::from(TEXT);
let json_field = schema_builder.add_json_field("json", json_options);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests().unwrap();
@@ -193,9 +195,9 @@ mod tests_mmap {
let reader = &searcher.segment_readers()[0];
let inverted_index = reader.inverted_index(json_field).unwrap();
assert_eq!(
inverted_index.list_fields().unwrap(),
inverted_index.list_encoded_fields().unwrap(),
[
("k8s\u{1}container\u{1}name".to_string(), Type::Str),
("k8s.container.name".to_string(), Type::Str),
("sub\u{1}a".to_string(), Type::I64),
("sub\u{1}b".to_string(), Type::I64),
("suber\u{1}a".to_string(), Type::I64),
@@ -205,4 +207,240 @@ mod tests_mmap {
]
);
}
#[test]
fn test_json_fields_metadata_expanded_dots_one_segment() {
test_json_fields_metadata(true, true);
}
#[test]
fn test_json_fields_metadata_expanded_dots_multi_segment() {
test_json_fields_metadata(true, false);
}
#[test]
fn test_json_fields_metadata_no_expanded_dots_one_segment() {
test_json_fields_metadata(false, true);
}
#[test]
fn test_json_fields_metadata_no_expanded_dots_multi_segment() {
test_json_fields_metadata(false, false);
}
fn test_json_fields_metadata(expanded_dots: bool, one_segment: bool) {
use pretty_assertions::assert_eq;
let mut schema_builder = Schema::builder();
let json_options: JsonObjectOptions =
JsonObjectOptions::from(TEXT).set_fast(None).set_stored();
let json_options = if expanded_dots {
json_options.set_expand_dots_enabled()
} else {
json_options
};
schema_builder.add_json_field("json.confusing", json_options.clone());
let json_field = schema_builder.add_json_field("json.shadow", json_options.clone());
let json_field2 = schema_builder.add_json_field("json", json_options.clone());
schema_builder.add_json_field("empty_json", json_options);
let number_field = schema_builder.add_u64_field("numbers", FAST);
schema_builder.add_u64_field("empty", FAST | INDEXED | STORED);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests().unwrap();
let json =
serde_json::json!({"k8s.container.name": "a", "val": "a", "sub": {"a": 1, "b": 1}});
index_writer.add_document(doc!(json_field=>json)).unwrap();
let json =
serde_json::json!({"k8s.container.name": "a", "val": "a", "suber": {"a": 1, "b": 1}});
if !one_segment {
index_writer.commit().unwrap();
}
index_writer.add_document(doc!(json_field=>json)).unwrap();
let json = serde_json::json!({"k8s.container.name": "a", "k8s.container.name": "a", "val": "a", "suber": {"a": "a", "b": 1}});
index_writer
.add_document(doc!(number_field => 50u64, json_field=>json, json_field2=>json!({"shadow": {"val": "a"}})))
.unwrap();
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
assert_eq!(searcher.num_docs(), 3);
let fields_metadata = index.fields_metadata().unwrap();
assert_eq!(
fields_metadata,
[
FieldMetadata {
field_name: "empty".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::U64
},
FieldMetadata {
field_name: if expanded_dots {
"json.shadow.k8s.container.name".to_string()
} else {
"json.shadow.k8s\\.container\\.name".to_string()
},
indexed: true,
stored: true,
fast: true,
typ: Type::Str
},
FieldMetadata {
field_name: "json.shadow.sub.a".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::I64
},
FieldMetadata {
field_name: "json.shadow.sub.b".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::I64
},
FieldMetadata {
field_name: "json.shadow.suber.a".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::I64
},
FieldMetadata {
field_name: "json.shadow.suber.a".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::Str
},
FieldMetadata {
field_name: "json.shadow.suber.b".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::I64
},
FieldMetadata {
field_name: "json.shadow.val".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::Str
},
FieldMetadata {
field_name: "numbers".to_string(),
indexed: false,
stored: false,
fast: true,
typ: Type::U64
}
]
);
let query_parser = QueryParser::for_index(&index, vec![]);
// Test if returned field name can be queried
for indexed_field in fields_metadata.iter().filter(|meta| meta.indexed) {
let val = if indexed_field.typ == Type::Str {
"a"
} else {
"1"
};
let query_str = &format!("{}:{}", indexed_field.field_name, val);
let query = query_parser.parse_query(query_str).unwrap();
let count_docs = searcher.search(&*query, &TopDocs::with_limit(2)).unwrap();
if indexed_field.field_name.contains("empty") || indexed_field.typ == Type::Json {
assert_eq!(count_docs.len(), 0);
} else {
assert!(!count_docs.is_empty(), "{}", indexed_field.field_name);
}
}
// Test if returned field name can be used for aggregation
for fast_field in fields_metadata.iter().filter(|meta| meta.fast) {
let agg_req_str = json!(
{
"termagg": {
"terms": {
"field": fast_field.field_name,
}
}
});
let agg_req: Aggregations = serde_json::from_value(agg_req_str).unwrap();
let collector = AggregationCollector::from_aggs(agg_req, Default::default());
let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
let res = serde_json::to_value(agg_res).unwrap();
if !fast_field.field_name.contains("empty") && fast_field.typ != Type::Json {
assert!(
!res["termagg"]["buckets"].as_array().unwrap().is_empty(),
"{}",
fast_field.field_name
);
}
}
}
#[test]
fn test_json_field_shadowing_field_name_bug() {
/// This test is only there to display a bug on addressing a field if it gets shadowed
/// The issues only occurs if the field name that shadows contains a dot.
///
/// Happens independently of the `expand_dots` option. Since that option does not
/// affect the field name itself.
use pretty_assertions::assert_eq;
let mut schema_builder = Schema::builder();
let json_options: JsonObjectOptions =
JsonObjectOptions::from(TEXT).set_fast(None).set_stored();
// let json_options = json_options.set_expand_dots_enabled();
let json_field_shadow = schema_builder.add_json_field("json.shadow", json_options.clone());
let json_field = schema_builder.add_json_field("json", json_options.clone());
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests().unwrap();
index_writer
.add_document(
doc!(json_field_shadow=>json!({"val": "b"}), json_field=>json!({"shadow": {"val": "a"}})),
)
.unwrap();
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let fields_and_vals = vec![
// Only way to address or it gets shadowed by `json.shadow` field
("json.shadow\u{1}val".to_string(), "a"), // Succeeds
//("json.shadow.val".to_string(), "a"), // Fails
("json.shadow.val".to_string(), "b"), // Succeeds
];
let query_parser = QueryParser::for_index(&index, vec![]);
// Test if field name can be queried
for (indexed_field, val) in fields_and_vals.iter() {
let query_str = &format!("{}:{}", indexed_field, val);
let query = query_parser.parse_query(query_str).unwrap();
let count_docs = searcher.search(&*query, &TopDocs::with_limit(2)).unwrap();
assert!(!count_docs.is_empty(), "{}:{}", indexed_field, val);
}
// Test if field name can be used for aggregation
for (field_name, val) in fields_and_vals.iter() {
let agg_req_str = json!(
{
"termagg": {
"terms": {
"field": field_name,
}
}
});
let agg_req: Aggregations = serde_json::from_value(agg_req_str).unwrap();
let collector = AggregationCollector::from_aggs(agg_req, Default::default());
let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
let res = serde_json::to_value(agg_res).unwrap();
assert_eq!(
res["termagg"]["buckets"].as_array().unwrap()[0]["key"]
.as_str()
.unwrap(),
*val,
"{}",
field_name
);
}
}
}

View File

@@ -1,3 +1,5 @@
use columnar::ColumnType;
use common::TinySet;
use fnv::FnvHashMap;
/// `Field` is represented by an unsigned 32-bit integer type.
@@ -24,34 +26,44 @@ impl From<u32> for OrderedPathId {
#[derive(Default)]
pub(crate) struct PathToUnorderedId {
map: FnvHashMap<String, u32>,
/// TinySet contains the type codes of the columns in the path.
map: FnvHashMap<String, (u32, TinySet)>,
}
impl PathToUnorderedId {
#[inline]
pub(crate) fn get_or_allocate_unordered_id(&mut self, path: &str) -> u32 {
if let Some(id) = self.map.get(path) {
pub(crate) fn get_or_allocate_unordered_id(&mut self, path: &str, typ: ColumnType) -> u32 {
let code_bit = typ.to_code();
if let Some((id, all_codes)) = self.map.get_mut(path) {
*all_codes = all_codes.insert(code_bit as u32);
return *id;
}
self.insert_new_path(path)
self.insert_new_path(path, code_bit)
}
#[cold]
fn insert_new_path(&mut self, path: &str) -> u32 {
fn insert_new_path(&mut self, path: &str, typ_code: u8) -> u32 {
let next_id = self.map.len() as u32;
self.map.insert(path.to_string(), next_id);
self.map.insert(
path.to_string(),
(next_id, TinySet::singleton(typ_code as u32)),
);
next_id
}
/// Retuns ids which reflect the lexical order of the paths.
///
/// The returned vec can be indexed with the unordered id to get the ordered id.
pub(crate) fn unordered_id_to_ordered_id(&self) -> Vec<OrderedPathId> {
let mut sorted_ids: Vec<(&str, &u32)> =
self.map.iter().map(|(k, v)| (k.as_str(), v)).collect();
pub(crate) fn unordered_id_to_ordered_id(&self) -> Vec<(OrderedPathId, TinySet)> {
let mut sorted_ids: Vec<(&str, (u32, TinySet))> = self
.map
.iter()
.map(|(k, (id, typ_code))| (k.as_str(), (*id, *typ_code)))
.collect();
sorted_ids.sort_unstable_by_key(|(path, _)| *path);
let mut result = vec![OrderedPathId::default(); sorted_ids.len()];
for (ordered, unordered) in sorted_ids.iter().map(|(_k, v)| v).enumerate() {
result[**unordered as usize] = OrderedPathId::from_ordered_id(ordered as u32);
let mut result = vec![(OrderedPathId::default(), TinySet::empty()); sorted_ids.len()];
for (ordered, (unordered, typ_code)) in sorted_ids.iter().map(|(_k, v)| v).enumerate() {
result[*unordered as usize] =
(OrderedPathId::from_ordered_id(ordered as u32), *typ_code);
}
result
}
@@ -74,12 +86,12 @@ mod tests {
let terms = vec!["b", "a", "b", "c"];
let ids = terms
.iter()
.map(|term| path_to_id.get_or_allocate_unordered_id(term))
.map(|term| path_to_id.get_or_allocate_unordered_id(term, ColumnType::Str))
.collect::<Vec<u32>>();
assert_eq!(ids, vec![0, 1, 0, 2]);
let ordered_ids = ids
.iter()
.map(|id| path_to_id.unordered_id_to_ordered_id()[*id as usize])
.map(|id| path_to_id.unordered_id_to_ordered_id()[*id as usize].0)
.collect::<Vec<OrderedPathId>>();
assert_eq!(ordered_ids, vec![1.into(), 0.into(), 1.into(), 2.into()]);
// Fetch terms

View File

@@ -12,6 +12,7 @@ pub struct SegmentSerializer {
segment: Segment,
pub(crate) store_writer: StoreWriter,
fast_field_write: WritePtr,
field_list_write: WritePtr,
fieldnorms_serializer: Option<FieldNormsSerializer>,
postings_serializer: InvertedIndexSerializer,
}
@@ -49,6 +50,7 @@ impl SegmentSerializer {
};
let fast_field_write = segment.open_write(SegmentComponent::FastFields)?;
let field_list_write = segment.open_write(SegmentComponent::FieldList)?;
let fieldnorms_write = segment.open_write(SegmentComponent::FieldNorms)?;
let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?;
@@ -58,6 +60,7 @@ impl SegmentSerializer {
segment,
store_writer,
fast_field_write,
field_list_write,
fieldnorms_serializer: Some(fieldnorms_serializer),
postings_serializer,
})
@@ -81,6 +84,11 @@ impl SegmentSerializer {
&mut self.postings_serializer
}
/// Accessor to the ``.
pub fn get_field_list_write(&mut self) -> &mut WritePtr {
&mut self.field_list_write
}
/// Accessor to the `FastFieldSerializer`.
pub fn get_fast_field_write(&mut self) -> &mut WritePtr {
&mut self.fast_field_write
@@ -104,6 +112,7 @@ impl SegmentSerializer {
fieldnorms_serializer.close()?;
}
self.fast_field_write.terminate()?;
self.field_list_write.terminate()?;
self.postings_serializer.close()?;
self.store_writer.close()?;
Ok(())

View File

@@ -8,6 +8,7 @@ use super::operation::AddOperation;
use crate::core::json_utils::index_json_values;
use crate::core::Segment;
use crate::fastfield::FastFieldsWriter;
use crate::field_list::serialize_segment_fields;
use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
use crate::indexer::segment_serializer::SegmentSerializer;
use crate::postings::{
@@ -443,16 +444,29 @@ fn remap_and_write(
.segment()
.open_read(SegmentComponent::FieldNorms)?;
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
let unordered_id_to_ordered_id = ctx.path_to_unordered_id.unordered_id_to_ordered_id();
serialize_postings(
ctx,
schema,
&ctx,
schema.clone(),
per_field_postings_writers,
fieldnorm_readers,
doc_id_map,
&unordered_id_to_ordered_id,
serializer.get_postings_serializer(),
)?;
debug!("fastfield-serialize");
fast_field_writers.serialize(serializer.get_fast_field_write(), doc_id_map)?;
let columns = fast_field_writers.serialize(serializer.get_fast_field_write(), doc_id_map)?;
let field_list_serializer = serializer.get_field_list_write();
serialize_segment_fields(
ctx,
field_list_serializer,
&schema,
&unordered_id_to_ordered_id,
columns,
)?;
// finalize temp docstore and create version, which reflects the doc_id_map
if let Some(doc_id_map) = doc_id_map {

View File

@@ -188,6 +188,7 @@ pub mod aggregation;
pub mod collector;
pub mod directory;
pub mod fastfield;
pub mod field_list;
pub mod fieldnorm;
pub mod positions;
pub mod postings;
@@ -221,9 +222,9 @@ pub use self::snippet::{Snippet, SnippetGenerator};
#[doc(hidden)]
pub use crate::core::json_utils;
pub use crate::core::{
Executor, Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, InvertedIndexReader,
Order, Searcher, SearcherGeneration, Segment, SegmentComponent, SegmentId, SegmentMeta,
SegmentReader, SingleSegmentIndexWriter,
merge_field_meta_data, Executor, FieldMetadata, Index, IndexBuilder, IndexMeta, IndexSettings,
IndexSortByField, InvertedIndexReader, Order, Searcher, SearcherGeneration, Segment,
SegmentComponent, SegmentId, SegmentMeta, SegmentReader, SingleSegmentIndexWriter,
};
pub use crate::directory::Directory;
pub use crate::indexer::IndexWriter;
@@ -238,7 +239,9 @@ pub use crate::schema::DatePrecision;
pub use crate::schema::{DateOptions, DateTimePrecision, Document, TantivyDocument, Term};
/// Index format version.
const INDEX_FORMAT_VERSION: u32 = 6;
///
/// Version 7: Add `.fieldlist` file containing the list of fields in a segment.
const INDEX_FORMAT_VERSION: u32 = 7;
/// Oldest index format version this tantivy version can read.
const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4;

View File

@@ -11,6 +11,10 @@ use crate::schema::{Field, Type, JSON_END_OF_PATH};
use crate::tokenizer::TokenStream;
use crate::{DocId, Term};
/// The `JsonPostingsWriter` is odd in that it relies on a hidden contract:
///
/// `subscribe` is called directly to index non-text tokens, while
/// `index_text` is used to index text.
#[derive(Default)]
pub(crate) struct JsonPostingsWriter<Rec: Recorder> {
str_posting_writer: SpecializedPostingsWriter<Rec>,

View File

@@ -63,7 +63,7 @@ pub mod tests {
let mut segment = index.new_segment();
let mut posting_serializer = InvertedIndexSerializer::open(&mut segment)?;
let mut field_serializer = posting_serializer.new_field(text_field, 120 * 4, None)?;
field_serializer.new_term("abc".as_bytes(), 12u32)?;
field_serializer.new_term("abc".as_bytes(), 12u32, true)?;
for doc_id in 0u32..120u32 {
let delta_positions = vec![1, 2, 3, 2];
field_serializer.write_doc(doc_id, 4, &delta_positions);

View File

@@ -2,6 +2,7 @@ use std::io;
use std::marker::PhantomData;
use std::ops::Range;
use common::TinySet;
use stacker::Addr;
use crate::fieldnorm::FieldNormReaders;
@@ -46,37 +47,38 @@ fn make_field_partition(
/// It pushes all term, one field at a time, towards the
/// postings serializer.
pub(crate) fn serialize_postings(
ctx: IndexingContext,
ctx: &IndexingContext,
schema: Schema,
per_field_postings_writers: &PerFieldPostingsWriter,
fieldnorm_readers: FieldNormReaders,
doc_id_map: Option<&DocIdMapping>,
unordered_id_to_ordered_id: &[(OrderedPathId, TinySet)],
serializer: &mut InvertedIndexSerializer,
) -> crate::Result<()> {
// Replace unordered ids by ordered ids to be able to sort
let unordered_id_to_ordered_id: Vec<OrderedPathId> =
ctx.path_to_unordered_id.unordered_id_to_ordered_id();
let ordered_id_to_path = ctx.path_to_unordered_id.ordered_id_to_path();
let mut term_offsets: Vec<(Field, OrderedPathId, &[u8], Addr)> =
Vec::with_capacity(ctx.term_index.len());
term_offsets.extend(ctx.term_index.iter().map(|(key, addr)| {
for (key, addr) in ctx.term_index.iter() {
let field = Term::wrap(key).field();
if schema.get_field_entry(field).field_type().value_type() == Type::Json {
let byte_range_path = 5..5 + 4;
let unordered_id = u32::from_be_bytes(key[byte_range_path.clone()].try_into().unwrap());
let path_id = unordered_id_to_ordered_id[unordered_id as usize];
(field, path_id, &key[byte_range_path.end..], addr)
let field_entry = schema.get_field_entry(field);
if field_entry.field_type().value_type() == Type::Json {
let byte_range_unordered_id = 5..5 + 4;
let unordered_id =
u32::from_be_bytes(key[byte_range_unordered_id.clone()].try_into().unwrap());
let (path_id, _typ_code_bitvec) = unordered_id_to_ordered_id[unordered_id as usize];
term_offsets.push((field, path_id, &key[byte_range_unordered_id.end..], addr));
} else {
(field, 0.into(), &key[5..], addr)
term_offsets.push((field, 0.into(), &key[5..], addr));
}
}));
}
// Sort by field, path, and term
term_offsets.sort_unstable_by(
|(field1, path_id1, bytes1, _), (field2, path_id2, bytes2, _)| {
(field1, path_id1, bytes1).cmp(&(field2, path_id2, bytes2))
},
);
let ordered_id_to_path = ctx.path_to_unordered_id.ordered_id_to_path();
let field_offsets = make_field_partition(&term_offsets);
for (field, byte_offsets) in field_offsets {
let postings_writer = per_field_postings_writers.get_for_field(field);
@@ -87,7 +89,7 @@ pub(crate) fn serialize_postings(
&term_offsets[byte_offsets],
&ordered_id_to_path,
doc_id_map,
&ctx,
ctx,
&mut field_serializer,
)?;
field_serializer.close()?;
@@ -194,7 +196,7 @@ impl<Rec: Recorder> SpecializedPostingsWriter<Rec> {
) -> io::Result<()> {
let recorder: Rec = ctx.term_index.read(addr);
let term_doc_freq = recorder.term_doc_freq().unwrap_or(0u32);
serializer.new_term(term, term_doc_freq)?;
serializer.new_term(term, term_doc_freq, recorder.has_term_freq())?;
recorder.serialize(&ctx.arena, doc_id_map, serializer, buffer_lender);
serializer.close_term()?;
Ok(())

View File

@@ -79,24 +79,20 @@ pub(crate) trait Recorder: Copy + Default + Send + Sync + 'static {
///
/// Returns `None` if not available.
fn term_doc_freq(&self) -> Option<u32>;
#[inline]
fn has_term_freq(&self) -> bool {
true
}
}
/// Only records the doc ids
#[derive(Clone, Copy)]
#[derive(Clone, Copy, Default)]
pub struct DocIdRecorder {
stack: ExpUnrolledLinkedList,
current_doc: DocId,
}
impl Default for DocIdRecorder {
fn default() -> Self {
DocIdRecorder {
stack: ExpUnrolledLinkedList::default(),
current_doc: u32::MAX,
}
}
}
impl Recorder for DocIdRecorder {
#[inline]
fn current_doc(&self) -> DocId {
@@ -105,8 +101,9 @@ impl Recorder for DocIdRecorder {
#[inline]
fn new_doc(&mut self, doc: DocId, arena: &mut MemoryArena) {
let delta = doc - self.current_doc;
self.current_doc = doc;
self.stack.writer(arena).write_u32_vint(doc);
self.stack.writer(arena).write_u32_vint(delta);
}
#[inline]
@@ -123,21 +120,20 @@ impl Recorder for DocIdRecorder {
buffer_lender: &mut BufferLender,
) {
let (buffer, doc_ids) = buffer_lender.lend_all();
self.stack.read_to_end(arena, buffer);
// TODO avoid reading twice.
self.stack.read_to_end(arena, buffer);
if let Some(doc_id_map) = doc_id_map {
doc_ids.extend(
VInt32Reader::new(&buffer[..])
.map(|old_doc_id| doc_id_map.get_new_doc_id(old_doc_id)),
);
let iter = get_sum_reader(VInt32Reader::new(&buffer[..]));
doc_ids.extend(iter.map(|old_doc_id| doc_id_map.get_new_doc_id(old_doc_id)));
doc_ids.sort_unstable();
for doc in doc_ids {
serializer.write_doc(*doc, 0u32, &[][..]);
}
} else {
for doc in VInt32Reader::new(&buffer[..]) {
serializer.write_doc(doc, 0u32, &[][..]);
let iter = get_sum_reader(VInt32Reader::new(&buffer[..]));
for doc_id in iter {
serializer.write_doc(doc_id, 0u32, &[][..]);
}
}
}
@@ -145,6 +141,19 @@ impl Recorder for DocIdRecorder {
fn term_doc_freq(&self) -> Option<u32> {
None
}
fn has_term_freq(&self) -> bool {
false
}
}
/// Takes an Iterator of delta encoded elements and returns an iterator
/// that yields the sum of the elements.
fn get_sum_reader(iter: impl Iterator<Item = u32>) -> impl Iterator<Item = u32> {
iter.scan(0, |state, delta| {
*state += delta;
Some(*state)
})
}
/// Recorder encoding document ids, and term frequencies
@@ -164,9 +173,10 @@ impl Recorder for TermFrequencyRecorder {
#[inline]
fn new_doc(&mut self, doc: DocId, arena: &mut MemoryArena) {
let delta = doc - self.current_doc;
self.term_doc_freq += 1;
self.current_doc = doc;
self.stack.writer(arena).write_u32_vint(doc);
self.stack.writer(arena).write_u32_vint(delta);
}
#[inline]
@@ -193,9 +203,12 @@ impl Recorder for TermFrequencyRecorder {
let mut u32_it = VInt32Reader::new(&buffer[..]);
if let Some(doc_id_map) = doc_id_map {
let mut doc_id_and_tf = vec![];
while let Some(old_doc_id) = u32_it.next() {
let mut prev_doc = 0;
while let Some(delta_doc_id) = u32_it.next() {
let doc_id = prev_doc + delta_doc_id;
prev_doc = doc_id;
let term_freq = u32_it.next().unwrap_or(self.current_tf);
doc_id_and_tf.push((doc_id_map.get_new_doc_id(old_doc_id), term_freq));
doc_id_and_tf.push((doc_id_map.get_new_doc_id(doc_id), term_freq));
}
doc_id_and_tf.sort_unstable_by_key(|&(doc_id, _)| doc_id);
@@ -203,9 +216,12 @@ impl Recorder for TermFrequencyRecorder {
serializer.write_doc(doc_id, tf, &[][..]);
}
} else {
while let Some(doc) = u32_it.next() {
let mut prev_doc = 0;
while let Some(delta_doc_id) = u32_it.next() {
let doc_id = prev_doc + delta_doc_id;
prev_doc = doc_id;
let term_freq = u32_it.next().unwrap_or(self.current_tf);
serializer.write_doc(doc, term_freq, &[][..]);
serializer.write_doc(doc_id, term_freq, &[][..]);
}
}
}
@@ -216,23 +232,13 @@ impl Recorder for TermFrequencyRecorder {
}
/// Recorder encoding term frequencies as well as positions.
#[derive(Clone, Copy)]
#[derive(Clone, Copy, Default)]
pub struct TfAndPositionRecorder {
stack: ExpUnrolledLinkedList,
current_doc: DocId,
term_doc_freq: u32,
}
impl Default for TfAndPositionRecorder {
fn default() -> Self {
TfAndPositionRecorder {
stack: ExpUnrolledLinkedList::default(),
current_doc: u32::MAX,
term_doc_freq: 0u32,
}
}
}
impl Recorder for TfAndPositionRecorder {
#[inline]
fn current_doc(&self) -> DocId {
@@ -241,9 +247,10 @@ impl Recorder for TfAndPositionRecorder {
#[inline]
fn new_doc(&mut self, doc: DocId, arena: &mut MemoryArena) {
let delta = doc - self.current_doc;
self.current_doc = doc;
self.term_doc_freq += 1u32;
self.stack.writer(arena).write_u32_vint(doc);
self.stack.writer(arena).write_u32_vint(delta);
}
#[inline]
@@ -269,7 +276,10 @@ impl Recorder for TfAndPositionRecorder {
self.stack.read_to_end(arena, buffer_u8);
let mut u32_it = VInt32Reader::new(&buffer_u8[..]);
let mut doc_id_and_positions = vec![];
while let Some(doc) = u32_it.next() {
let mut prev_doc = 0;
while let Some(delta_doc_id) = u32_it.next() {
let doc_id = prev_doc + delta_doc_id;
prev_doc = doc_id;
let mut prev_position_plus_one = 1u32;
buffer_positions.clear();
loop {
@@ -287,9 +297,9 @@ impl Recorder for TfAndPositionRecorder {
if let Some(doc_id_map) = doc_id_map {
// this simple variant to remap may consume to much memory
doc_id_and_positions
.push((doc_id_map.get_new_doc_id(doc), buffer_positions.to_vec()));
.push((doc_id_map.get_new_doc_id(doc_id), buffer_positions.to_vec()));
} else {
serializer.write_doc(doc, buffer_positions.len() as u32, buffer_positions);
serializer.write_doc(doc_id, buffer_positions.len() as u32, buffer_positions);
}
}
if doc_id_map.is_some() {

View File

@@ -71,7 +71,7 @@ impl SegmentPostings {
{
let mut postings_serializer =
PostingsSerializer::new(&mut buffer, 0.0, IndexRecordOption::Basic, None);
postings_serializer.new_term(docs.len() as u32);
postings_serializer.new_term(docs.len() as u32, false);
for &doc in docs {
postings_serializer.write_doc(doc, 1u32);
}
@@ -120,7 +120,7 @@ impl SegmentPostings {
IndexRecordOption::WithFreqs,
fieldnorm_reader,
);
postings_serializer.new_term(doc_and_tfs.len() as u32);
postings_serializer.new_term(doc_and_tfs.len() as u32, true);
for &(doc, tf) in doc_and_tfs {
postings_serializer.write_doc(doc, tf);
}
@@ -238,14 +238,18 @@ impl Postings for SegmentPostings {
}
fn positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) {
let term_freq = self.term_freq() as usize;
let term_freq = self.term_freq();
if let Some(position_reader) = self.position_reader.as_mut() {
debug_assert!(
!self.block_cursor.freqs().is_empty(),
"No positions available"
);
let read_offset = self.block_cursor.position_offset()
+ (self.block_cursor.freqs()[..self.cur]
.iter()
.cloned()
.sum::<u32>() as u64);
output.resize(term_freq, 0u32);
output.resize(term_freq as usize, 0u32);
position_reader.read(read_offset, &mut output[..]);
let mut cum = offset;
for output_mut in output.iter_mut() {

View File

@@ -168,7 +168,12 @@ impl<'a> FieldSerializer<'a> {
/// * term - the term. It needs to come after the previous term according to the lexicographical
/// order.
/// * term_doc_freq - return the number of document containing the term.
pub fn new_term(&mut self, term: &[u8], term_doc_freq: u32) -> io::Result<()> {
pub fn new_term(
&mut self,
term: &[u8],
term_doc_freq: u32,
record_term_freq: bool,
) -> io::Result<()> {
assert!(
!self.term_open,
"Called new_term, while the previous term was not closed."
@@ -177,7 +182,8 @@ impl<'a> FieldSerializer<'a> {
self.postings_serializer.clear();
self.current_term_info = self.current_term_info();
self.term_dictionary_builder.insert_key(term)?;
self.postings_serializer.new_term(term_doc_freq);
self.postings_serializer
.new_term(term_doc_freq, record_term_freq);
Ok(())
}
@@ -330,10 +336,10 @@ impl<W: Write> PostingsSerializer<W> {
}
}
pub fn new_term(&mut self, term_doc_freq: u32) {
pub fn new_term(&mut self, term_doc_freq: u32, record_term_freq: bool) {
self.bm25_weight = None;
self.term_has_freq = self.mode.has_freq() && term_doc_freq != 0;
self.term_has_freq = self.mode.has_freq() && record_term_freq;
if !self.term_has_freq {
return;
}
@@ -349,7 +355,7 @@ impl<W: Write> PostingsSerializer<W> {
return;
}
self.bm25_weight = Some(Bm25Weight::for_one_term(
self.bm25_weight = Some(Bm25Weight::for_one_term_without_explain(
term_doc_freq as u64,
num_docs_in_segment,
self.avg_fieldnorm,

View File

@@ -77,7 +77,7 @@ pub struct Bm25Params {
/// A struct used for computing BM25 scores.
#[derive(Clone)]
pub struct Bm25Weight {
idf_explain: Explanation,
idf_explain: Option<Explanation>,
weight: Score,
cache: [Score; 256],
average_fieldnorm: Score,
@@ -147,11 +147,30 @@ impl Bm25Weight {
idf_explain.add_const("N, total number of docs", total_num_docs as Score);
Bm25Weight::new(idf_explain, avg_fieldnorm)
}
/// Construct a [Bm25Weight] for a single term.
/// This method does not carry the [Explanation] for the idf.
pub fn for_one_term_without_explain(
term_doc_freq: u64,
total_num_docs: u64,
avg_fieldnorm: Score,
) -> Bm25Weight {
let idf = idf(term_doc_freq, total_num_docs);
Bm25Weight::new_without_explain(idf, avg_fieldnorm)
}
pub(crate) fn new(idf_explain: Explanation, average_fieldnorm: Score) -> Bm25Weight {
let weight = idf_explain.value() * (1.0 + K1);
Bm25Weight {
idf_explain,
idf_explain: Some(idf_explain),
weight,
cache: compute_tf_cache(average_fieldnorm),
average_fieldnorm,
}
}
pub(crate) fn new_without_explain(idf: f32, average_fieldnorm: Score) -> Bm25Weight {
let weight = idf * (1.0 + K1);
Bm25Weight {
idf_explain: None,
weight,
cache: compute_tf_cache(average_fieldnorm),
average_fieldnorm,
@@ -202,7 +221,9 @@ impl Bm25Weight {
let mut explanation = Explanation::new("TermQuery, product of...", score);
explanation.add_detail(Explanation::new("(K1+1)", K1 + 1.0));
explanation.add_detail(self.idf_explain.clone());
if let Some(idf_explain) = &self.idf_explain {
explanation.add_detail(idf_explain.clone());
}
explanation.add_detail(tf_explanation);
explanation
}

View File

@@ -74,7 +74,8 @@ impl Weight for BoostWeight {
fn explain(&self, reader: &SegmentReader, doc: u32) -> crate::Result<Explanation> {
let underlying_explanation = self.weight.explain(reader, doc)?;
let score = underlying_explanation.value() * self.boost;
let mut explanation = Explanation::new(format!("Boost x{} of ...", self.boost), score);
let mut explanation =
Explanation::new_with_string(format!("Boost x{} of ...", self.boost), score);
explanation.add_detail(underlying_explanation);
Ok(explanation)
}
@@ -151,7 +152,7 @@ mod tests {
let explanation = query.explain(&searcher, DocAddress::new(0, 0u32)).unwrap();
assert_eq!(
explanation.to_pretty_json(),
"{\n \"value\": 0.2,\n \"description\": \"Boost x0.2 of ...\",\n \"details\": [\n {\n \"value\": 1.0,\n \"description\": \"AllQuery\",\n \"context\": []\n }\n ],\n \"context\": []\n}"
"{\n \"value\": 0.2,\n \"description\": \"Boost x0.2 of ...\",\n \"details\": [\n {\n \"value\": 1.0,\n \"description\": \"AllQuery\"\n }\n ]\n}"
);
Ok(())
}

View File

@@ -164,11 +164,9 @@ mod tests {
"details": [
{
"value": 1.0,
"description": "AllQuery",
"context": []
"description": "AllQuery"
}
],
"context": []
]
}"#
);
Ok(())

View File

@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::fmt;
use serde::Serialize;
@@ -16,12 +17,12 @@ pub(crate) fn does_not_match(doc: DocId) -> TantivyError {
#[derive(Clone, Serialize)]
pub struct Explanation {
value: Score,
description: String,
#[serde(skip_serializing_if = "Vec::is_empty")]
details: Vec<Explanation>,
context: Vec<String>,
description: Cow<'static, str>,
#[serde(skip_serializing_if = "Option::is_none")]
details: Option<Vec<Explanation>>,
#[serde(skip_serializing_if = "Option::is_none")]
context: Option<Vec<String>>,
}
impl fmt::Debug for Explanation {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Explanation({})", self.to_pretty_json())
@@ -30,12 +31,21 @@ impl fmt::Debug for Explanation {
impl Explanation {
/// Creates a new explanation object.
pub fn new<T: ToString>(description: T, value: Score) -> Explanation {
pub fn new_with_string(description: String, value: Score) -> Explanation {
Explanation {
value,
description: description.to_string(),
details: vec![],
context: vec![],
description: Cow::Owned(description),
details: None,
context: None,
}
}
/// Creates a new explanation object.
pub fn new(description: &'static str, value: Score) -> Explanation {
Explanation {
value,
description: Cow::Borrowed(description),
details: None,
context: None,
}
}
@@ -48,17 +58,21 @@ impl Explanation {
///
/// Details are treated as child of the current node.
pub fn add_detail(&mut self, child_explanation: Explanation) {
self.details.push(child_explanation);
self.details
.get_or_insert_with(Vec::new)
.push(child_explanation);
}
/// Adds some extra context to the explanation.
pub fn add_context(&mut self, context: String) {
self.context.push(context);
self.context.get_or_insert_with(Vec::new).push(context);
}
/// Shortcut for `self.details.push(Explanation::new(name, value));`
pub fn add_const<T: ToString>(&mut self, name: T, value: Score) {
self.details.push(Explanation::new(name, value));
pub fn add_const(&mut self, name: &'static str, value: Score) {
self.details
.get_or_insert_with(Vec::new)
.push(Explanation::new(name, value));
}
/// Returns an indented json representation of the explanation tree for debug usage.

View File

@@ -101,7 +101,7 @@ impl TermQuery {
..
} => Bm25Weight::for_terms(statistics_provider, &[self.term.clone()])?,
EnableScoring::Disabled { .. } => {
Bm25Weight::new(Explanation::new("<no score>".to_string(), 1.0f32), 1.0f32)
Bm25Weight::new(Explanation::new("<no score>", 1.0f32), 1.0f32)
}
};
let scoring_enabled = enable_scoring.is_scoring_enabled();

View File

@@ -40,21 +40,25 @@ impl From<BytesOptionsDeser> for BytesOptions {
impl BytesOptions {
/// Returns true if the value is indexed.
#[inline]
pub fn is_indexed(&self) -> bool {
self.indexed
}
/// Returns true if and only if the value is normed.
#[inline]
pub fn fieldnorms(&self) -> bool {
self.fieldnorms
}
/// Returns true if the value is a fast field.
#[inline]
pub fn is_fast(&self) -> bool {
self.fast
}
/// Returns true if the value is stored.
#[inline]
pub fn is_stored(&self) -> bool {
self.stored
}

View File

@@ -27,21 +27,25 @@ pub struct DateOptions {
impl DateOptions {
/// Returns true iff the value is stored.
#[inline]
pub fn is_stored(&self) -> bool {
self.stored
}
/// Returns true iff the value is indexed and therefore searchable.
#[inline]
pub fn is_indexed(&self) -> bool {
self.indexed
}
/// Returns true iff the field has fieldnorm.
#[inline]
pub fn fieldnorms(&self) -> bool {
self.fieldnorms && self.indexed
}
/// Returns true iff the value is a fast field.
#[inline]
pub fn is_fast(&self) -> bool {
self.fast
}

View File

@@ -131,16 +131,16 @@ impl Facet {
pub fn from_path<Path>(path: Path) -> Facet
where
Path: IntoIterator,
Path::Item: ToString,
Path::Item: AsRef<str>,
{
let mut facet_string: String = String::with_capacity(100);
let mut step_it = path.into_iter();
if let Some(step) = step_it.next() {
facet_string.push_str(&step.to_string());
facet_string.push_str(step.as_ref());
}
for step in step_it {
facet_string.push(FACET_SEP_CHAR);
facet_string.push_str(&step.to_string());
facet_string.push_str(step.as_ref());
}
Facet(facet_string)
}

View File

@@ -14,6 +14,7 @@ pub struct FacetOptions {
impl FacetOptions {
/// Returns true if the value is stored.
#[inline]
pub fn is_stored(&self) -> bool {
self.stored
}

View File

@@ -108,7 +108,16 @@ impl FieldEntry {
self.field_type.is_fast()
}
/// Returns true if the field has the expand dots option set (for json fields)
pub fn is_expand_dots_enabled(&self) -> bool {
match self.field_type {
FieldType::JsonObject(ref options) => options.is_expand_dots_enabled(),
_ => false,
}
}
/// Returns true if the field is stored
#[inline]
pub fn is_stored(&self) -> bool {
match self.field_type {
FieldType::U64(ref options)

View File

@@ -3,6 +3,7 @@ use std::str::FromStr;
use base64::engine::general_purpose::STANDARD as BASE64;
use base64::Engine;
use columnar::ColumnType;
use serde::{Deserialize, Serialize};
use serde_json::Value as JsonValue;
use thiserror::Error;
@@ -47,7 +48,7 @@ pub enum ValueParsingError {
///
/// Contrary to FieldType, this does
/// not include the way the field must be indexed.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[repr(u8)]
pub enum Type {
/// `&str`
@@ -72,6 +73,21 @@ pub enum Type {
IpAddr = b'p',
}
impl From<ColumnType> for Type {
fn from(value: ColumnType) -> Self {
match value {
ColumnType::Str => Type::Str,
ColumnType::U64 => Type::U64,
ColumnType::I64 => Type::I64,
ColumnType::F64 => Type::F64,
ColumnType::Bool => Type::Bool,
ColumnType::DateTime => Type::Date,
ColumnType::Bytes => Type::Bytes,
ColumnType::IpAddr => Type::IpAddr,
}
}
}
const ALL_TYPES: [Type; 10] = [
Type::Str,
Type::U64,

View File

@@ -31,21 +31,25 @@ pub struct IpAddrOptions {
impl IpAddrOptions {
/// Returns true iff the value is a fast field.
#[inline]
pub fn is_fast(&self) -> bool {
self.fast
}
/// Returns `true` if the ip address should be stored in the doc store.
#[inline]
pub fn is_stored(&self) -> bool {
self.stored
}
/// Returns true iff the value is indexed and therefore searchable.
#[inline]
pub fn is_indexed(&self) -> bool {
self.indexed
}
/// Returns true if and only if the value is normed.
#[inline]
pub fn fieldnorms(&self) -> bool {
self.fieldnorms
}

View File

@@ -46,17 +46,20 @@ pub struct JsonObjectOptions {
impl JsonObjectOptions {
/// Returns `true` if the json object should be stored.
#[inline]
pub fn is_stored(&self) -> bool {
self.stored
}
/// Returns `true` iff the json object should be indexed.
#[inline]
pub fn is_indexed(&self) -> bool {
self.indexing.is_some()
}
/// Returns true if and only if the json object fields are
/// to be treated as fast fields.
#[inline]
pub fn is_fast(&self) -> bool {
matches!(self.fast, FastFieldTextOptions::IsEnabled(true))
|| matches!(
@@ -66,6 +69,7 @@ impl JsonObjectOptions {
}
/// Returns true if and only if the value is a fast field.
#[inline]
pub fn get_fast_field_tokenizer_name(&self) -> Option<&str> {
match &self.fast {
FastFieldTextOptions::IsEnabled(true) | FastFieldTextOptions::IsEnabled(false) => None,
@@ -87,6 +91,7 @@ impl JsonObjectOptions {
///
/// If disabled, the "." needs to be escaped:
/// `k8s\.node\.id:5`.
#[inline]
pub fn is_expand_dots_enabled(&self) -> bool {
self.expand_dots_enabled
}
@@ -103,6 +108,7 @@ impl JsonObjectOptions {
/// If set to `Some` then both int and str values will be indexed.
/// The inner `TextFieldIndexing` will however, only apply to the str values
/// in the json object.
#[inline]
pub fn get_text_indexing_options(&self) -> Option<&TextFieldIndexing> {
self.indexing.as_ref()
}

View File

@@ -57,26 +57,31 @@ impl From<NumericOptionsDeser> for NumericOptions {
impl NumericOptions {
/// Returns true iff the value is stored in the doc store.
#[inline]
pub fn is_stored(&self) -> bool {
self.stored
}
/// Returns true iff the value is indexed and therefore searchable.
#[inline]
pub fn is_indexed(&self) -> bool {
self.indexed
}
/// Returns true iff the field has fieldnorm.
#[inline]
pub fn fieldnorms(&self) -> bool {
self.fieldnorms && self.indexed
}
/// Returns true iff the value is a fast field.
#[inline]
pub fn is_fast(&self) -> bool {
self.fast
}
/// Returns true if values should be coerced to numbers.
#[inline]
pub fn should_coerce(&self) -> bool {
self.coerce
}

View File

@@ -8,6 +8,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
use super::ip_options::IpAddrOptions;
use super::*;
use crate::json_utils::split_json_path;
use crate::schema::bytes_options::BytesOptions;
use crate::TantivyError;
@@ -30,7 +31,7 @@ use crate::TantivyError;
/// let body_field = schema_builder.add_text_field("body", TEXT);
/// let schema = schema_builder.build();
/// ```
#[derive(Default)]
#[derive(Debug, Default)]
pub struct SchemaBuilder {
fields: Vec<FieldEntry>,
fields_map: HashMap<String, Field>,
@@ -328,12 +329,19 @@ impl Schema {
if let Some(field) = self.0.fields_map.get(full_path) {
return Some((*field, ""));
}
let mut splitting_period_pos: Vec<usize> = locate_splitting_dots(full_path);
while let Some(pos) = splitting_period_pos.pop() {
let (prefix, suffix) = full_path.split_at(pos);
if let Some(field) = self.0.fields_map.get(prefix) {
return Some((*field, &suffix[1..]));
}
// JSON path may contain a dot, for now we try both variants to find the field.
let prefix = split_json_path(prefix).join(".");
if let Some(field) = self.0.fields_map.get(&prefix) {
return Some((*field, &suffix[1..]));
}
}
None
}
@@ -349,6 +357,7 @@ impl Schema {
pub fn find_field_with_default<'a>(
&self,
full_path: &'a str,
default_field_opt: Option<Field>,
) -> Option<(Field, &'a str)> {
let (field, json_path) = self

View File

@@ -72,16 +72,19 @@ fn is_false(val: &bool) -> bool {
impl TextOptions {
/// Returns the indexing options.
#[inline]
pub fn get_indexing_options(&self) -> Option<&TextFieldIndexing> {
self.indexing.as_ref()
}
/// Returns true if the text is to be stored.
#[inline]
pub fn is_stored(&self) -> bool {
self.stored
}
/// Returns true if and only if the value is a fast field.
#[inline]
pub fn is_fast(&self) -> bool {
matches!(self.fast, FastFieldTextOptions::IsEnabled(true))
|| matches!(
@@ -91,6 +94,7 @@ impl TextOptions {
}
/// Returns true if and only if the value is a fast field.
#[inline]
pub fn get_fast_field_tokenizer_name(&self) -> Option<&str> {
match &self.fast {
FastFieldTextOptions::IsEnabled(true) | FastFieldTextOptions::IsEnabled(false) => None,
@@ -101,6 +105,7 @@ impl TextOptions {
}
/// Returns true if values should be coerced to strings (numbers, null).
#[inline]
pub fn should_coerce(&self) -> bool {
self.coerce
}

View File

@@ -117,6 +117,7 @@ impl SegmentSpaceUsage {
use self::ComponentSpaceUsage::*;
use crate::SegmentComponent::*;
match component {
FieldList => ComponentSpaceUsage::Basic(ByteCount::from(0u64)),
Postings => PerField(self.postings().clone()),
Positions => PerField(self.positions().clone()),
FastFields => PerField(self.fast_fields().clone()),

View File

@@ -11,16 +11,22 @@ description = "sstables for tantivy"
[dependencies]
common = {version= "0.6", path="../common", package="tantivy-common"}
tantivy-fst = "0.4"
tantivy-bitpacker = { version= "0.5", path="../bitpacker" }
tantivy-fst = "0.5"
# experimental gives us access to Decompressor::upper_bound
zstd = { version = "0.13", features = ["experimental"] }
[dev-dependencies]
proptest = "1"
criterion = "0.5"
criterion = { version = "0.5", default-features = false }
names = "0.14"
rand = "0.8"
[[bench]]
name = "stream_bench"
harness = false
[[bench]]
name = "ord_to_term"
harness = false

View File

@@ -89,39 +89,71 @@ Note: as the SSTable does not support redundant keys, there is no ambiguity betw
### SSTFooter
```
+-------+-------+-----+------------------+------------+-------------+---------+---------+
| Block | Block | ... | FirstLayerOffset | LayerCount | IndexOffset | NumTerm | Version |
+-------+-------+-----+------------------+------------+-------------+---------+---------+
|----(# of blocks)----|
+-----+----------------+-------------+-------------+---------+---------+
| Fst | BlockAddrStore | StoreOffset | IndexOffset | NumTerm | Version |
+-----+----------------+-------------+-------------+---------+---------+
```
- Block(SSTBlock): uses IndexValue for its Values format
- FirstLayerOffset(u64): Offset between the start of the footer and the start of the top level index
- LayerCount(u32): Number of layers of index (min 1) ## TODO do we want to use 0 as a marker for no layers? It makes small sstables 12 bytes more compact (the 0u32 would alias with the "end of sstable marker")
- Fst(Fst): finite state transducer mapping keys to a block number
- BlockAddrStore(BlockAddrStore): store mapping a block number to its BlockAddr
- StoreOffset(u64): Offset to start of the BlockAddrStore. If zero, see the SingleBlockSStable section
- IndexOffset(u64): Offset to the start of the SSTFooter
- NumTerm(u64): number of terms in the sstable
- Version(u32): Currently equal to 3
Blocks referencing the main table and block referencing the index itself are encoded the same way and
are not directly differentiated. Offsets in blocks referencing the index are relative to the start of
the footer, blocks referencing the main table are relative to the start of that table.
### Fst
### IndexValue
```
+------------+----------+-------+-------+-----+
| EntryCount | StartPos | Entry | Entry | ... |
+------------+----------+-------+-------+-----+
|---( # of entries)---|
```
Fst is in the format of tantivy\_fst
- EntryCount(VInt): number of entries
- StartPos(VInt): the start pos of the first (data) block referenced by this (index) block
- Entry (IndexEntry)
### BlockAddrStore
### Entry
```
+----------+--------------+
| BlockLen | FirstOrdinal |
+----------+--------------+
```
- BlockLen(VInt): length of the block
- FirstOrdinal(VInt): ordinal of the first element in the given block
+---------+-----------+-----------+-----+-----------+-----------+-----+
| MetaLen | BlockMeta | BlockMeta | ... | BlockData | BlockData | ... |
+---------+-----------+-----------+-----+-----------+-----------+-----+
|---------(N blocks)----------|---------(N blocks)----------|
- MetaLen(u64): length of the BlockMeta section
- BlockMeta(BlockAddrBlockMetadata): metadata to seek through BlockData
- BlockData(CompactedBlockAddr): bitpacked per block metadata
### BlockAddrBlockMetadata
+--------+------------+--------------+------------+--------------+-------------------+-----------------+----------+
| Offset | RangeStart | FirstOrdinal | RangeSlope | OrdinalSlope | FirstOrdinalNBits | RangeStartNBits | BlockLen |
+--------+------------+--------------+------------+--------------+-------------------+-----------------+----------+
- Offset(u64): offset of the corresponding BlockData in the datastream
- RangeStart(u64): the start position of the first block
- FirstOrdinal(u64): the first ordinal of the first block
- RangeSlope(u32): slope predicted for start range evolution (see computation in BlockData)
- OrdinalSlope(u64): slope predicted for first ordinal evolution (see computation in BlockData)
- FirstOrdinalNBits(u8): number of bits per ordinal in datastream (see computation in BlockData)
- RangeStartNBits(u8): number of bits per range start in datastream (see computation in BlockData)
### BlockData
+-----------------+-------------------+---------------+
| RangeStartDelta | FirstOrdinalDelta | FinalRangeEnd |
+-----------------+-------------------+---------------+
|------(BlockLen repetitions)---------|
- RangeStartDelta(var): RangeStartNBits *bits* of little endian number. See below for decoding
- FirstOrdinalDelta(var): FirstOrdinalNBits *bits* of little endian number. See below for decoding
- FinalRangeEnd(var): RangeStartNBits *bits* of integer. See below for decoding
converting a BlockData of index Index and a BlockAddrBlockMetadata to an actual block address is done as follow:
range\_prediction := RangeStart + Index * RangeSlop;
range\_derivation := RangeStartDelta - (1 << (RangeStartNBits-1));
range\_start := range\_prediction + range\_derivation
The same computation can be done for ordinal.
Note that `range_derivation` can take negative value. `RangeStartDelta` is just its translation to a positive range.
## SingleBlockSStable
The format used for the index is meant to be compact, however it has a constant cost of around 70
bytes, which isn't negligible for a table containing very few keys.
To limit the impact of that constant cost, single block sstable omit the Fst and BlockAddrStore from
their index. Instead a block with first ordinal of 0, range start of 0 and range end of IndexOffset
is implicitly used for every operations.

View File

@@ -0,0 +1,110 @@
use std::sync::Arc;
use common::file_slice::FileSlice;
use common::OwnedBytes;
use criterion::{criterion_group, criterion_main, Criterion};
use tantivy_sstable::{self, Dictionary, MonotonicU64SSTable};
fn make_test_sstable(suffix: &str) -> FileSlice {
let mut builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new()).unwrap();
// 125 mio elements
for elem in 0..125_000_000 {
let key = format!("prefix.{elem:07X}{suffix}").into_bytes();
builder.insert(&key, &elem).unwrap();
}
let table = builder.finish().unwrap();
let table = Arc::new(OwnedBytes::new(table));
let slice = common::file_slice::FileSlice::new(table.clone());
slice
}
pub fn criterion_benchmark(c: &mut Criterion) {
{
let slice = make_test_sstable(".suffix");
let dict = Dictionary::<MonotonicU64SSTable>::open(slice.clone()).unwrap();
c.bench_function("ord_to_term_suffix", |b| {
let mut res = Vec::new();
b.iter(|| {
assert!(dict.ord_to_term(100_000, &mut res).unwrap());
assert!(dict.ord_to_term(19_000_000, &mut res).unwrap());
})
});
c.bench_function("open_and_ord_to_term_suffix", |b| {
let mut res = Vec::new();
b.iter(|| {
let dict = Dictionary::<MonotonicU64SSTable>::open(slice.clone()).unwrap();
assert!(dict.ord_to_term(100_000, &mut res).unwrap());
assert!(dict.ord_to_term(19_000_000, &mut res).unwrap());
})
});
c.bench_function("term_ord_suffix", |b| {
b.iter(|| {
assert_eq!(
dict.term_ord(b"prefix.00186A0.suffix").unwrap().unwrap(),
100_000
);
assert_eq!(
dict.term_ord(b"prefix.121EAC0.suffix").unwrap().unwrap(),
19_000_000
);
})
});
c.bench_function("open_and_term_ord_suffix", |b| {
b.iter(|| {
let dict = Dictionary::<MonotonicU64SSTable>::open(slice.clone()).unwrap();
assert_eq!(
dict.term_ord(b"prefix.00186A0.suffix").unwrap().unwrap(),
100_000
);
assert_eq!(
dict.term_ord(b"prefix.121EAC0.suffix").unwrap().unwrap(),
19_000_000
);
})
});
}
{
let slice = make_test_sstable("");
let dict = Dictionary::<MonotonicU64SSTable>::open(slice.clone()).unwrap();
c.bench_function("ord_to_term", |b| {
let mut res = Vec::new();
b.iter(|| {
assert!(dict.ord_to_term(100_000, &mut res).unwrap());
assert!(dict.ord_to_term(19_000_000, &mut res).unwrap());
})
});
c.bench_function("open_and_ord_to_term", |b| {
let mut res = Vec::new();
b.iter(|| {
let dict = Dictionary::<MonotonicU64SSTable>::open(slice.clone()).unwrap();
assert!(dict.ord_to_term(100_000, &mut res).unwrap());
assert!(dict.ord_to_term(19_000_000, &mut res).unwrap());
})
});
c.bench_function("term_ord", |b| {
b.iter(|| {
assert_eq!(dict.term_ord(b"prefix.00186A0").unwrap().unwrap(), 100_000);
assert_eq!(
dict.term_ord(b"prefix.121EAC0").unwrap().unwrap(),
19_000_000
);
})
});
c.bench_function("open_and_term_ord", |b| {
b.iter(|| {
let dict = Dictionary::<MonotonicU64SSTable>::open(slice.clone()).unwrap();
assert_eq!(dict.term_ord(b"prefix.00186A0").unwrap().unwrap(), 100_000);
assert_eq!(
dict.term_ord(b"prefix.121EAC0").unwrap().unwrap(),
19_000_000
);
})
});
}
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

View File

@@ -20,7 +20,6 @@ where W: io::Write
// Only here to avoid allocations.
stateless_buffer: Vec<u8>,
block_len: usize,
compress: bool,
}
impl<W, TValueWriter> DeltaWriter<W, TValueWriter>
@@ -35,18 +34,6 @@ where
value_writer: TValueWriter::default(),
stateless_buffer: Vec::new(),
block_len: BLOCK_LEN,
compress: true,
}
}
pub fn new_no_compression(wrt: W) -> Self {
DeltaWriter {
block: Vec::with_capacity(BLOCK_LEN * 2),
write: CountingWriter::wrap(BufWriter::new(wrt)),
value_writer: TValueWriter::default(),
stateless_buffer: Vec::new(),
block_len: BLOCK_LEN,
compress: false,
}
}
@@ -66,7 +53,7 @@ where
let block_len = buffer.len() + self.block.len();
if block_len > 2048 && self.compress {
if block_len > 2048 {
buffer.extend_from_slice(&self.block);
self.block.clear();

View File

@@ -9,8 +9,11 @@ use common::{BinarySerializable, OwnedBytes};
use tantivy_fst::automaton::AlwaysMatch;
use tantivy_fst::Automaton;
use crate::sstable_index_v3::SSTableIndexV3Empty;
use crate::streamer::{Streamer, StreamerBuilder};
use crate::{BlockAddr, DeltaReader, Reader, SSTable, SSTableIndex, TermOrdinal, VoidSSTable};
use crate::{
BlockAddr, DeltaReader, Reader, SSTable, SSTableIndex, SSTableIndexV3, TermOrdinal, VoidSSTable,
};
/// An SSTable is a sorted map that associates sorted `&[u8]` keys
/// to any kind of typed values.
@@ -128,144 +131,99 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
key_range: impl RangeBounds<[u8]>,
limit: Option<u64>,
) -> FileSlice {
// we don't perform great when limit is set to a large value, and sometime we use u64::MAX
// as a marker for no limit, so we'd better capture that.
// (not great means we decode up to the whole bottom layer index, which can take dozens of
// ms on a 100m term dictionary)
let limit = limit.filter(|limit| *limit != u64::MAX);
// TODO replace unwraps with proper error handling
let start_key = match key_range.start_bound() {
Bound::Included(key) | Bound::Excluded(key) => key,
Bound::Unbounded => &[],
};
let end_key = match key_range.end_bound() {
Bound::Included(key) | Bound::Excluded(key) => Some(key),
let first_block_id = match key_range.start_bound() {
Bound::Included(key) | Bound::Excluded(key) => {
let Some(first_block_id) = self.sstable_index.locate_with_key(key) else {
return FileSlice::empty();
};
Some(first_block_id)
}
Bound::Unbounded => None,
};
let bounds = if let Some(limit) = limit {
let mut sstable_iterator = self.sstable_index.iterate_from_key(start_key).unwrap();
let Some(start_block) = sstable_iterator.value() else {
// range_start is after end of table
return FileSlice::empty();
};
if let Some(end_key) = end_key {
if sstable_iterator.key().unwrap() >= end_key {
// the start and end keys are in the same block, return just that block
return self.sstable_slice.slice(start_block.byte_range.clone());
}
}
let start_bound = start_block.byte_range.start;
sstable_iterator.advance().unwrap();
let Some(second_block) = sstable_iterator.value() else {
// we reached the end of the sstable, return everything from start_bound
return self.sstable_slice.slice(start_bound..);
};
let mut end_bound = second_block.byte_range.end;
if let Some(end_key) = end_key {
if sstable_iterator.key().unwrap() >= end_key {
return self.sstable_slice.slice(start_bound..end_bound);
}
}
let target_ord = second_block.first_ordinal + limit;
while sstable_iterator.advance().unwrap() {
let block = sstable_iterator.value().unwrap();
if block.first_ordinal >= target_ord {
break;
}
end_bound = block.byte_range.end;
if let Some(end_key) = end_key {
if sstable_iterator.key().unwrap() >= end_key {
break;
}
}
}
let start_bound = Bound::Included(start_bound);
let end_bound = Bound::Excluded(end_bound);
(start_bound, end_bound)
} else {
let Some(start_block) = self.sstable_index.get_block_with_key(start_key).unwrap()
else {
// range_start is after end of table
return FileSlice::empty();
};
let start_bound = Bound::Included(start_block.byte_range.start);
let end_bound = if let Some(end_key) = end_key {
if let Some(end_block) = self.sstable_index.get_block_with_key(end_key).unwrap() {
Bound::Excluded(end_block.byte_range.end)
} else {
Bound::Unbounded
}
} else {
Bound::Unbounded
};
(start_bound, end_bound)
let last_block_id = match key_range.end_bound() {
Bound::Included(key) | Bound::Excluded(key) => self.sstable_index.locate_with_key(key),
Bound::Unbounded => None,
};
self.sstable_slice.slice(bounds)
let start_bound = if let Some(first_block_id) = first_block_id {
let Some(block_addr) = self.sstable_index.get_block(first_block_id) else {
return FileSlice::empty();
};
Bound::Included(block_addr.byte_range.start)
} else {
Bound::Unbounded
};
let last_block_id = if let Some(limit) = limit {
let second_block_id = first_block_id.map(|id| id + 1).unwrap_or(0);
if let Some(block_addr) = self.sstable_index.get_block(second_block_id) {
let ordinal_limit = block_addr.first_ordinal + limit;
let last_block_limit = self.sstable_index.locate_with_ord(ordinal_limit);
if let Some(last_block_id) = last_block_id {
Some(last_block_id.min(last_block_limit))
} else {
Some(last_block_limit)
}
} else {
last_block_id
}
} else {
last_block_id
};
let end_bound = last_block_id
.and_then(|block_id| self.sstable_index.get_block(block_id))
.map(|block_addr| Bound::Excluded(block_addr.byte_range.end))
.unwrap_or(Bound::Unbounded);
self.sstable_slice.slice((start_bound, end_bound))
}
/// Opens a `TermDictionary`.
pub fn open(term_dictionary_file: FileSlice) -> io::Result<Self> {
let (main_slice, footer_len_slice) = term_dictionary_file.split_from_end(20);
let mut footer_len_bytes: OwnedBytes = footer_len_slice.read_bytes()?;
// let layer_count = u32::deserialize(&mut footer_len_bytes)?;
let index_offset = u64::deserialize(&mut footer_len_bytes)?;
let num_terms = u64::deserialize(&mut footer_len_bytes)?;
let version = u32::deserialize(&mut footer_len_bytes)?;
let (sstable_slice, index_slice) = main_slice.split(index_offset as usize);
match version {
2 => {
// previous format, kept for backward compatibility
let sstable_index_bytes = index_slice.read_bytes()?;
// on the old format, the 1st layer necessarily start immediately, and there is
// only a single layer
let sstable_index =
SSTableIndex::load(sstable_index_bytes, 1, 0).map_err(|_| {
io::Error::new(io::ErrorKind::InvalidData, "SSTable corruption")
})?;
Ok(Dictionary {
sstable_slice,
sstable_index,
num_terms,
phantom_data: PhantomData,
})
}
3 => {
let sstable_index_bytes = index_slice.read_bytes()?;
let (sstable_index_bytes, mut v3_footer_bytes) = sstable_index_bytes.rsplit(12);
let first_layer_offset = v3_footer_bytes.read_u64();
let layer_count = v3_footer_bytes.read_u32();
let sstable_index_bytes = index_slice.read_bytes()?;
let sstable_index = SSTableIndex::load(
sstable_index_bytes,
layer_count,
first_layer_offset as usize,
)
.map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "SSTable corruption"))?;
Ok(Dictionary {
sstable_slice,
sstable_index,
num_terms,
phantom_data: PhantomData,
})
let sstable_index = match version {
2 => SSTableIndex::V2(
crate::sstable_index_v2::SSTableIndex::load(sstable_index_bytes).map_err(|_| {
io::Error::new(io::ErrorKind::InvalidData, "SSTable corruption")
})?,
),
3 => {
let (sstable_index_bytes, mut footerv3_len_bytes) = sstable_index_bytes.rsplit(8);
let store_offset = u64::deserialize(&mut footerv3_len_bytes)?;
if store_offset != 0 {
SSTableIndex::V3(
SSTableIndexV3::load(sstable_index_bytes, store_offset).map_err(|_| {
io::Error::new(io::ErrorKind::InvalidData, "SSTable corruption")
})?,
)
} else {
// if store_offset is zero, there is no index, so we build a pseudo-index
// assuming a single block of sstable covering everything.
SSTableIndex::V3Empty(SSTableIndexV3Empty::load(index_offset as usize))
}
}
_ => {
return Err(io::Error::new(
io::ErrorKind::Other,
format!(
"Unsuported sstable version, expected {}, found {version}",
crate::SSTABLE_VERSION,
),
format!("Unsuported sstable version, expected one of [2, 3], found {version}"),
))
}
}
};
Ok(Dictionary {
sstable_slice,
sstable_index,
num_terms,
phantom_data: PhantomData,
})
}
/// Creates a term dictionary from the supplied bytes.
@@ -289,17 +247,68 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
self.num_terms as usize
}
/// Decode a DeltaReader up to key, returning the number of terms traversed
///
/// If the key was not found, returns Ok(None).
/// After calling this function, it is possible to call `DeltaReader::value` to get the
/// associated value.
fn decode_up_to_key<K: AsRef<[u8]>>(
&self,
key: K,
sstable_delta_reader: &mut DeltaReader<TSSTable::ValueReader>,
) -> io::Result<Option<TermOrdinal>> {
let mut term_ord = 0;
let key_bytes = key.as_ref();
let mut ok_bytes = 0;
while sstable_delta_reader.advance()? {
let prefix_len = sstable_delta_reader.common_prefix_len();
let suffix = sstable_delta_reader.suffix();
match prefix_len.cmp(&ok_bytes) {
Ordering::Less => return Ok(None), // popped bytes already matched => too far
Ordering::Equal => (),
Ordering::Greater => {
// the ok prefix is less than current entry prefix => continue to next elem
term_ord += 1;
continue;
}
}
// we have ok_bytes byte of common prefix, check if this key adds more
for (key_byte, suffix_byte) in key_bytes[ok_bytes..].iter().zip(suffix) {
match suffix_byte.cmp(key_byte) {
Ordering::Less => break, // byte too small
Ordering::Equal => ok_bytes += 1, // new matching byte
Ordering::Greater => return Ok(None), // too far
}
}
if ok_bytes == key_bytes.len() {
if prefix_len + suffix.len() == ok_bytes {
return Ok(Some(term_ord));
} else {
// current key is a prefix of current element, not a match
return Ok(None);
}
}
term_ord += 1;
}
Ok(None)
}
/// Returns the ordinal associated with a given term.
pub fn term_ord<K: AsRef<[u8]>>(&self, key: K) -> io::Result<Option<TermOrdinal>> {
let key_bytes = key.as_ref();
let Some(block_addr) = self.sstable_index.get_block_with_key(key_bytes)? else {
let Some(block_addr) = self.sstable_index.get_block_with_key(key_bytes) else {
return Ok(None);
};
let first_ordinal = block_addr.first_ordinal;
let mut sstable_delta_reader = self.sstable_delta_reader_block(block_addr)?;
decode_up_to_key(key_bytes, &mut sstable_delta_reader)
self.decode_up_to_key(key_bytes, &mut sstable_delta_reader)
.map(|opt| opt.map(|ord| ord + first_ordinal))
}
@@ -314,7 +323,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
/// the buffer may be modified.
pub fn ord_to_term(&self, ord: TermOrdinal, bytes: &mut Vec<u8>) -> io::Result<bool> {
// find block in which the term would be
let block_addr = self.sstable_index.get_block_with_ord(ord)?;
let block_addr = self.sstable_index.get_block_with_ord(ord);
let first_ordinal = block_addr.first_ordinal;
// then search inside that block only
@@ -332,7 +341,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
/// Returns the number of terms in the dictionary.
pub fn term_info_from_ord(&self, term_ord: TermOrdinal) -> io::Result<Option<TSSTable::Value>> {
// find block in which the term would be
let block_addr = self.sstable_index.get_block_with_ord(term_ord)?;
let block_addr = self.sstable_index.get_block_with_ord(term_ord);
let first_ordinal = block_addr.first_ordinal;
// then search inside that block only
@@ -347,7 +356,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
/// Lookups the value corresponding to the key.
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> io::Result<Option<TSSTable::Value>> {
if let Some(block_addr) = self.sstable_index.get_block_with_key(key.as_ref())? {
if let Some(block_addr) = self.sstable_index.get_block_with_key(key.as_ref()) {
let sstable_reader = self.sstable_delta_reader_block(block_addr)?;
return self.do_get(key, sstable_reader);
}
@@ -356,7 +365,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
/// Lookups the value corresponding to the key.
pub async fn get_async<K: AsRef<[u8]>>(&self, key: K) -> io::Result<Option<TSSTable::Value>> {
if let Some(block_addr) = self.sstable_index.get_block_with_key(key.as_ref())? {
if let Some(block_addr) = self.sstable_index.get_block_with_key(key.as_ref()) {
let sstable_reader = self.sstable_delta_reader_block_async(block_addr).await?;
return self.do_get(key, sstable_reader);
}
@@ -368,7 +377,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
key: K,
mut reader: DeltaReader<TSSTable::ValueReader>,
) -> io::Result<Option<TSSTable::Value>> {
if let Some(_ord) = decode_up_to_key(key, &mut reader)? {
if let Some(_ord) = self.decode_up_to_key(key, &mut reader)? {
Ok(Some(reader.value().clone()))
} else {
Ok(None)
@@ -405,56 +414,6 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
}
}
/// Decode a DeltaReader up to key, returning the number of terms traversed
///
/// If the key was not found, returns Ok(None).
/// After calling this function, it is possible to call `DeltaReader::value` to get the
/// associated value.
pub(crate) fn decode_up_to_key<K: AsRef<[u8]>, TValueReader: crate::ValueReader>(
key: K,
sstable_delta_reader: &mut DeltaReader<TValueReader>,
) -> io::Result<Option<TermOrdinal>> {
let mut term_ord = 0;
let key_bytes = key.as_ref();
let mut ok_bytes = 0;
while sstable_delta_reader.advance()? {
let prefix_len = sstable_delta_reader.common_prefix_len();
let suffix = sstable_delta_reader.suffix();
match prefix_len.cmp(&ok_bytes) {
Ordering::Less => return Ok(None), // popped bytes already matched => too far
Ordering::Equal => (),
Ordering::Greater => {
// the ok prefix is less than current entry prefix => continue to next elem
term_ord += 1;
continue;
}
}
// we have ok_bytes byte of common prefix, check if this key adds more
for (key_byte, suffix_byte) in key_bytes[ok_bytes..].iter().zip(suffix) {
match suffix_byte.cmp(key_byte) {
Ordering::Less => break, // byte too small
Ordering::Equal => ok_bytes += 1, // new matching byte
Ordering::Greater => return Ok(None), // too far
}
}
if ok_bytes == key_bytes.len() {
if prefix_len + suffix.len() == ok_bytes {
return Ok(Some(term_ord));
} else {
// current key is a prefix of current element, not a match
return Ok(None);
}
}
term_ord += 1;
}
Ok(None)
}
#[cfg(test)]
mod tests {
use std::ops::Range;
@@ -520,6 +479,8 @@ mod tests {
let dictionary = Dictionary::<MonotonicU64SSTable>::open(slice).unwrap();
// if the last block is id 0, tests are meaningless
assert_ne!(dictionary.sstable_index.locate_with_ord(u64::MAX), 0);
assert_eq!(dictionary.num_terms(), 0x3ffff);
(dictionary, table)
}
@@ -528,7 +489,7 @@ mod tests {
fn test_ord_term_conversion() {
let (dic, slice) = make_test_sstable();
let block = dic.sstable_index.get_block_with_ord(100_000).unwrap();
let block = dic.sstable_index.get_block_with_ord(100_000);
slice.restrict(block.byte_range);
let mut res = Vec::new();
@@ -554,11 +515,7 @@ mod tests {
// end of a block
let ordinal = block.first_ordinal - 1;
let new_range = dic
.sstable_index
.get_block_with_ord(ordinal)
.unwrap()
.byte_range;
let new_range = dic.sstable_index.get_block_with_ord(ordinal).byte_range;
slice.restrict(new_range);
assert!(dic.ord_to_term(ordinal, &mut res).unwrap());
assert_eq!(res, format!("{ordinal:05X}").into_bytes());
@@ -568,7 +525,7 @@ mod tests {
// before first block
// 1st block must be loaded for key-related operations
let block = dic.sstable_index.get_block_with_ord(0).unwrap();
let block = dic.sstable_index.get_block_with_ord(0);
slice.restrict(block.byte_range);
assert!(dic.get(b"$$$").unwrap().is_none());
@@ -577,11 +534,7 @@ mod tests {
// after last block
// last block must be loaded for ord related operations
let ordinal = 0x40000 + 10;
let new_range = dic
.sstable_index
.get_block_with_ord(ordinal)
.unwrap()
.byte_range;
let new_range = dic.sstable_index.get_block_with_ord(ordinal).byte_range;
slice.restrict(new_range);
assert!(!dic.ord_to_term(ordinal, &mut res).unwrap());
assert!(dic.term_info_from_ord(ordinal).unwrap().is_none());
@@ -606,13 +559,11 @@ mod tests {
.sstable_index
.get_block_with_key(b"10000")
.unwrap()
.unwrap()
.byte_range;
let end = dic
.sstable_index
.get_block_with_key(b"18000")
.unwrap()
.unwrap()
.byte_range;
slice.restrict(start.start..end.end);

View File

@@ -1,6 +1,6 @@
use std::io::{self, Write};
use std::num::NonZeroU64;
use std::ops::Range;
use std::usize;
use merge::ValueMerger;
@@ -10,8 +10,9 @@ pub mod merge;
mod streamer;
pub mod value;
mod sstable_index;
pub use sstable_index::{BlockAddr, SSTableIndex, SSTableIndexBuilder};
mod sstable_index_v3;
pub use sstable_index_v3::{BlockAddr, SSTableIndex, SSTableIndexBuilder, SSTableIndexV3};
mod sstable_index_v2;
pub(crate) mod vint;
pub use dictionary::Dictionary;
pub use streamer::{Streamer, StreamerBuilder};
@@ -30,12 +31,6 @@ pub type TermOrdinal = u64;
const DEFAULT_KEY_CAPACITY: usize = 50;
const SSTABLE_VERSION: u32 = 3;
// TODO tune that value. Maybe it's too little?
#[cfg(not(test))]
const DEFAULT_MAX_ROOT_BLOCKS: NonZeroU64 = unsafe { NonZeroU64::new_unchecked(32) };
#[cfg(test)]
const DEFAULT_MAX_ROOT_BLOCKS: NonZeroU64 = unsafe { NonZeroU64::new_unchecked(1) };
/// Given two byte string returns the length of
/// the longest common prefix.
fn common_prefix_len(left: &[u8], right: &[u8]) -> usize {
@@ -61,7 +56,7 @@ pub trait SSTable: Sized {
}
fn writer<W: io::Write>(wrt: W) -> Writer<W, Self::ValueWriter> {
Writer::new(wrt, DEFAULT_MAX_ROOT_BLOCKS)
Writer::new(wrt)
}
fn delta_reader(reader: OwnedBytes) -> DeltaReader<Self::ValueReader> {
@@ -184,7 +179,6 @@ where W: io::Write
delta_writer: DeltaWriter<W, TValueWriter>,
num_terms: u64,
first_ordinal_of_the_block: u64,
index_max_root_blocks: NonZeroU64,
}
impl<W, TValueWriter> Writer<W, TValueWriter>
@@ -197,18 +191,17 @@ where
/// TODO remove this function. (See Issue #1727)
#[doc(hidden)]
pub fn create(wrt: W) -> io::Result<Self> {
Ok(Self::new(wrt, DEFAULT_MAX_ROOT_BLOCKS))
Ok(Self::new(wrt))
}
/// Creates a new `TermDictionaryBuilder`.
pub fn new(wrt: W, index_max_root_blocks: NonZeroU64) -> Self {
pub fn new(wrt: W) -> Self {
Writer {
previous_key: Vec::with_capacity(DEFAULT_KEY_CAPACITY),
num_terms: 0u64,
index_builder: SSTableIndexBuilder::default(),
delta_writer: DeltaWriter::new(wrt),
first_ordinal_of_the_block: 0u64,
index_max_root_blocks,
}
}
@@ -310,15 +303,11 @@ where
// add a final empty block as an end marker
wrt.write_all(&0u32.to_le_bytes())?;
let index_offset = wrt.written_bytes();
let offset = wrt.written_bytes();
let (layer_count, layer_offset): (u32, u64) = self
.index_builder
.serialize(&mut wrt, self.index_max_root_blocks)?;
wrt.write_all(&layer_offset.to_le_bytes())?;
wrt.write_all(&layer_count.to_le_bytes())?;
wrt.write_all(&index_offset.to_le_bytes())?;
let fst_len: u64 = self.index_builder.serialize(&mut wrt)?;
wrt.write_all(&fst_len.to_le_bytes())?;
wrt.write_all(&offset.to_le_bytes())?;
wrt.write_all(&self.num_terms.to_le_bytes())?;
SSTABLE_VERSION.serialize(&mut wrt)?;
@@ -398,12 +387,7 @@ mod test {
16, 17, 33, 18, 19, 17, 20, // data block
0, 0, 0, 0, // no more block
// index
8, 0, 0, 0, // size of index block
0, // compression
1, 0, 12, 0, 32, 17, 20, // index block
0, 0, 0, 0, // no more index block
0, 0, 0, 0, 0, 0, 0, 0, // first layer offset
1, 0, 0, 0, // layer count
0, 0, 0, 0, 0, 0, 0, 0, // fst lenght
16, 0, 0, 0, 0, 0, 0, 0, // index start offset
3, 0, 0, 0, 0, 0, 0, 0, // num term
3, 0, 0, 0, // version

View File

@@ -1,487 +0,0 @@
use std::io::{self, Write};
use std::ops::Range;
use common::OwnedBytes;
use crate::{common_prefix_len, SSTable, SSTableDataCorruption, TermOrdinal};
#[derive(Debug, Clone)]
pub struct SSTableIndex {
root_blocks: Vec<BlockMeta>,
layer_count: u32,
index_bytes: OwnedBytes,
}
impl Default for SSTableIndex {
fn default() -> Self {
SSTableIndex {
root_blocks: Vec::new(),
layer_count: 1,
index_bytes: OwnedBytes::empty(),
}
}
}
impl SSTableIndex {
/// Load an index from its binary representation
pub fn load(
data: OwnedBytes,
layer_count: u32,
first_layer_offset: usize,
) -> Result<SSTableIndex, SSTableDataCorruption> {
let (index_bytes, first_layer_slice) = data.split(first_layer_offset);
let mut reader = IndexSSTable::reader(first_layer_slice);
let mut root_blocks = Vec::new();
while reader.advance().map_err(|_| SSTableDataCorruption)? {
root_blocks.push(BlockMeta {
last_key_or_greater: reader.key().to_vec(),
block_addr: reader.value().clone(),
});
}
Ok(SSTableIndex {
root_blocks,
layer_count,
index_bytes,
// index_bytes: OwnedBytes::empty(),
})
}
/// Get the [`BlockAddr`] of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub fn get_block_with_key(&self, key: &[u8]) -> io::Result<Option<BlockAddr>> {
self.iterate_from_key(key).map(|iter| iter.value().cloned())
}
/// Get the [`BlockAddr`] of the block containing the `ord`-th term.
pub fn get_block_with_ord(&self, ord: TermOrdinal) -> io::Result<BlockAddr> {
let pos = self
.root_blocks
.binary_search_by_key(&ord, |block| block.block_addr.first_ordinal);
let root_pos = match pos {
Ok(pos) => pos,
// Err(0) can't happen as the sstable starts with ordinal zero
Err(pos) => pos - 1,
};
if self.layer_count == 1 {
return Ok(self.root_blocks[root_pos].block_addr.clone());
}
let mut next_layer_block_addr = self.root_blocks[root_pos].block_addr.clone();
for _ in 1..self.layer_count {
let mut sstable_delta_reader = IndexSSTable::delta_reader(
self.index_bytes
.slice(next_layer_block_addr.byte_range.clone()),
);
while sstable_delta_reader.advance()? {
if sstable_delta_reader.value().first_ordinal > ord {
break;
}
next_layer_block_addr = sstable_delta_reader.value().clone();
}
}
Ok(next_layer_block_addr)
}
pub(crate) fn iterate_from_key(&self, key: &[u8]) -> io::Result<ReaderOrSlice<'_>> {
let root_pos = self
.root_blocks
.binary_search_by_key(&key, |block| &block.last_key_or_greater);
let root_pos = match root_pos {
Ok(pos) => pos,
Err(pos) => {
if pos < self.root_blocks.len() {
pos
} else {
// after end of last block: no block matches
return Ok(ReaderOrSlice::End);
}
}
};
let mut next_layer_block_addr = self.root_blocks[root_pos].block_addr.clone();
let mut last_delta_reader = None;
for _ in 1..self.layer_count {
// we don't enter this loop for 1 layer index
let mut sstable_delta_reader = IndexSSTable::delta_reader(
self.index_bytes.slice(next_layer_block_addr.byte_range),
);
crate::dictionary::decode_up_to_key(key, &mut sstable_delta_reader)?;
next_layer_block_addr = sstable_delta_reader.value().clone();
last_delta_reader = Some(sstable_delta_reader);
}
if let Some(delta_reader) = last_delta_reader {
// reconstruct the current key. We stopped either on the exact key, or just after
// either way, common_prefix_len is something that did not change between the
// last-key-before-target and the current pos, so those bytes must match the prefix of
// `key`. The next bytes can be obtained from the delta reader
let mut result_key = Vec::with_capacity(crate::DEFAULT_KEY_CAPACITY);
let common_prefix_len = delta_reader.common_prefix_len();
let suffix = delta_reader.suffix();
let new_len = delta_reader.common_prefix_len() + suffix.len();
result_key.resize(new_len, 0u8);
result_key[..common_prefix_len].copy_from_slice(&key[..common_prefix_len]);
result_key[common_prefix_len..].copy_from_slice(suffix);
let reader = crate::Reader {
key: result_key,
delta_reader,
};
Ok(ReaderOrSlice::Reader(reader))
} else {
// self.layer_count == 1, there is no lvl2 sstable to decode.
Ok(ReaderOrSlice::Iter(&self.root_blocks, root_pos))
}
}
}
pub(crate) enum ReaderOrSlice<'a> {
Reader(crate::Reader<crate::value::index::IndexValueReader>),
Iter(&'a [BlockMeta], usize),
End,
}
impl<'a> ReaderOrSlice<'a> {
pub fn advance(&mut self) -> Result<bool, SSTableDataCorruption> {
match self {
ReaderOrSlice::Reader(reader) => {
let res = reader.advance().map_err(|_| SSTableDataCorruption);
if !matches!(res, Ok(true)) {
*self = ReaderOrSlice::End;
}
res
}
ReaderOrSlice::Iter(slice, index) => {
*index += 1;
if *index < slice.len() {
Ok(true)
} else {
*self = ReaderOrSlice::End;
Ok(false)
}
}
ReaderOrSlice::End => Ok(false),
}
}
/// Get current key. Always Some(_) unless last call to advance returned something else than
/// Ok(true)
pub fn key(&self) -> Option<&[u8]> {
match self {
ReaderOrSlice::Reader(reader) => Some(reader.key()),
ReaderOrSlice::Iter(slice, index) => Some(&slice[*index].last_key_or_greater),
ReaderOrSlice::End => None,
}
}
/// Get current value. Always Some(_) unless last call to advance returned something else than
/// Ok(true)
pub fn value(&self) -> Option<&BlockAddr> {
match self {
ReaderOrSlice::Reader(reader) => Some(reader.value()),
ReaderOrSlice::Iter(slice, index) => Some(&slice[*index].block_addr),
ReaderOrSlice::End => None,
}
}
}
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct BlockAddr {
pub byte_range: Range<usize>,
pub first_ordinal: u64,
}
#[derive(Debug, Clone)]
pub(crate) struct BlockMeta {
/// Any byte string that is lexicographically greater or equal to
/// the last key in the block,
/// and yet strictly smaller than the first key in the next block.
pub last_key_or_greater: Vec<u8>,
pub block_addr: BlockAddr,
}
#[derive(Default)]
pub struct SSTableIndexBuilder {
index: SSTableIndex,
}
/// Given that left < right,
/// mutates `left into a shorter byte string left'` that
/// matches `left <= left' < right`.
fn find_shorter_str_in_between(left: &mut Vec<u8>, right: &[u8]) {
assert!(&left[..] < right);
let common_len = common_prefix_len(left, right);
if left.len() == common_len {
return;
}
// It is possible to do one character shorter in some case,
// but it is not worth the extra complexity
for pos in (common_len + 1)..left.len() {
if left[pos] != u8::MAX {
left[pos] += 1;
left.truncate(pos + 1);
return;
}
}
}
impl SSTableIndexBuilder {
/// In order to make the index as light as possible, we
/// try to find a shorter alternative to the last key of the last block
/// that is still smaller than the next key.
pub(crate) fn shorten_last_block_key_given_next_key(&mut self, next_key: &[u8]) {
if let Some(last_block) = self.index.root_blocks.last_mut() {
find_shorter_str_in_between(&mut last_block.last_key_or_greater, next_key);
}
}
pub fn add_block(&mut self, last_key: &[u8], byte_range: Range<usize>, first_ordinal: u64) {
self.index.root_blocks.push(BlockMeta {
last_key_or_greater: last_key.to_vec(),
block_addr: BlockAddr {
byte_range,
first_ordinal,
},
})
}
pub fn serialize<W: std::io::Write>(
&self,
wrt: W,
index_max_root_blocks: std::num::NonZeroU64,
) -> io::Result<(u32, u64)> {
let index_max_root_blocks = index_max_root_blocks.get();
let mut wrt = common::CountingWriter::wrap(wrt);
let mut next_layer = write_sstable_layer(&mut wrt, &self.index.root_blocks, 0)?;
let mut layer_count = 1;
let mut offset = 0;
while next_layer.len() as u64 > index_max_root_blocks {
offset = wrt.written_bytes();
layer_count += 1;
next_layer = write_sstable_layer(&mut wrt, &next_layer, offset as usize)?;
}
Ok((layer_count, offset))
}
}
fn write_sstable_layer<W: std::io::Write>(
wrt: W,
layer_content: &[BlockMeta],
offset: usize,
) -> io::Result<Vec<BlockMeta>> {
// we can't use a plain writer as it would generate an index
// also disable compression, the index is small anyway, and it's the most costly part of
// opening that kind of sstable
let mut sstable_writer =
crate::DeltaWriter::<_, crate::value::index::IndexValueWriter>::new_no_compression(wrt);
// in tests, set a smaller block size to stress-test
#[cfg(test)]
sstable_writer.set_block_len(16);
let mut next_layer = Vec::new();
let mut previous_key = Vec::with_capacity(crate::DEFAULT_KEY_CAPACITY);
let mut first_ordinal = None;
for block in layer_content.iter() {
if first_ordinal.is_none() {
first_ordinal = Some(block.block_addr.first_ordinal);
}
let keep_len = common_prefix_len(&previous_key, &block.last_key_or_greater);
sstable_writer.write_suffix(keep_len, &block.last_key_or_greater[keep_len..]);
sstable_writer.write_value(&block.block_addr);
if let Some(range) = sstable_writer.flush_block_if_required()? {
let real_range = (range.start + offset)..(range.end + offset);
let block_meta = BlockMeta {
last_key_or_greater: block.last_key_or_greater.clone(),
block_addr: BlockAddr {
byte_range: real_range,
first_ordinal: first_ordinal.take().unwrap(),
},
};
next_layer.push(block_meta);
previous_key.clear();
} else {
previous_key.extend_from_slice(&block.last_key_or_greater);
previous_key.resize(block.last_key_or_greater.len(), 0u8);
previous_key[keep_len..].copy_from_slice(&block.last_key_or_greater[keep_len..]);
}
}
if let Some(range) = sstable_writer.flush_block()? {
if let Some(last_block) = layer_content.last() {
// not going here means an empty table (?!)
let real_range = (range.start + offset)..(range.end + offset);
let block_meta = BlockMeta {
last_key_or_greater: last_block.last_key_or_greater.clone(),
block_addr: BlockAddr {
byte_range: real_range,
first_ordinal: first_ordinal.take().unwrap(),
},
};
next_layer.push(block_meta);
}
}
sstable_writer.finish().write_all(&0u32.to_le_bytes())?;
Ok(next_layer)
}
/// SSTable representing an index
///
/// `last_key_or_greater` is used as the key, the value contains the
/// length and first ordinal of each block. The start offset is implicitly
/// obtained from lengths.
struct IndexSSTable;
impl SSTable for IndexSSTable {
type Value = BlockAddr;
type ValueReader = crate::value::index::IndexValueReader;
type ValueWriter = crate::value::index::IndexValueWriter;
}
#[cfg(test)]
mod tests {
use common::OwnedBytes;
use super::{BlockAddr, SSTableIndex, SSTableIndexBuilder};
use crate::SSTableDataCorruption;
#[test]
fn test_sstable_index() {
let mut sstable_builder = SSTableIndexBuilder::default();
sstable_builder.add_block(b"aaa", 10..20, 0u64);
sstable_builder.add_block(b"bbbbbbb", 20..30, 5u64);
sstable_builder.add_block(b"ccc", 30..40, 10u64);
sstable_builder.add_block(b"dddd", 40..50, 15u64);
let mut buffer: Vec<u8> = Vec::new();
sstable_builder
.serialize(&mut buffer, crate::DEFAULT_MAX_ROOT_BLOCKS)
.unwrap();
let buffer = OwnedBytes::new(buffer);
let sstable_index = SSTableIndex::load(buffer, 1, 0).unwrap();
assert_eq!(
sstable_index.get_block_with_key(b"bbbde").unwrap(),
Some(BlockAddr {
first_ordinal: 10u64,
byte_range: 30..40
})
);
assert_eq!(
sstable_index
.get_block_with_key(b"aa")
.unwrap()
.unwrap()
.first_ordinal,
0
);
assert_eq!(
sstable_index
.get_block_with_key(b"aaa")
.unwrap()
.unwrap()
.first_ordinal,
0
);
assert_eq!(
sstable_index
.get_block_with_key(b"aab")
.unwrap()
.unwrap()
.first_ordinal,
5
);
assert_eq!(
sstable_index
.get_block_with_key(b"ccc")
.unwrap()
.unwrap()
.first_ordinal,
10
);
assert!(sstable_index.get_block_with_key(b"e").unwrap().is_none());
assert_eq!(
sstable_index.get_block_with_ord(0).unwrap().first_ordinal,
0
);
assert_eq!(
sstable_index.get_block_with_ord(1).unwrap().first_ordinal,
0
);
assert_eq!(
sstable_index.get_block_with_ord(4).unwrap().first_ordinal,
0
);
assert_eq!(
sstable_index.get_block_with_ord(5).unwrap().first_ordinal,
5
);
assert_eq!(
sstable_index.get_block_with_ord(6).unwrap().first_ordinal,
5
);
assert_eq!(
sstable_index.get_block_with_ord(100).unwrap().first_ordinal,
15
);
}
#[test]
fn test_sstable_with_corrupted_data() {
let mut sstable_builder = SSTableIndexBuilder::default();
sstable_builder.add_block(b"aaa", 10..20, 0u64);
sstable_builder.add_block(b"bbbbbbb", 20..30, 5u64);
sstable_builder.add_block(b"ccc", 30..40, 10u64);
sstable_builder.add_block(b"dddd", 40..50, 15u64);
let mut buffer: Vec<u8> = Vec::new();
sstable_builder
.serialize(&mut buffer, crate::DEFAULT_MAX_ROOT_BLOCKS)
.unwrap();
buffer[2] = 9u8;
let buffer = OwnedBytes::new(buffer);
let data_corruption_err = SSTableIndex::load(buffer, 1, 0).err().unwrap();
assert!(matches!(data_corruption_err, SSTableDataCorruption));
}
#[track_caller]
fn test_find_shorter_str_in_between_aux(left: &[u8], right: &[u8]) {
let mut left_buf = left.to_vec();
super::find_shorter_str_in_between(&mut left_buf, right);
assert!(left_buf.len() <= left.len());
assert!(left <= &left_buf);
assert!(&left_buf[..] < right);
}
#[test]
fn test_find_shorter_str_in_between() {
test_find_shorter_str_in_between_aux(b"", b"hello");
test_find_shorter_str_in_between_aux(b"abc", b"abcd");
test_find_shorter_str_in_between_aux(b"abcd", b"abd");
test_find_shorter_str_in_between_aux(&[0, 0, 0], &[1]);
test_find_shorter_str_in_between_aux(&[0, 0, 0], &[0, 0, 1]);
test_find_shorter_str_in_between_aux(&[0, 0, 255, 255, 255, 0u8], &[0, 1]);
}
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(100))]
#[test]
fn test_proptest_find_shorter_str(left in any::<Vec<u8>>(), right in any::<Vec<u8>>()) {
if left < right {
test_find_shorter_str_in_between_aux(&left, &right);
}
}
}
}

View File

@@ -0,0 +1,101 @@
use common::OwnedBytes;
use crate::{BlockAddr, SSTable, SSTableDataCorruption, TermOrdinal};
#[derive(Default, Debug, Clone)]
pub struct SSTableIndex {
blocks: Vec<BlockMeta>,
}
impl SSTableIndex {
/// Load an index from its binary representation
pub fn load(data: OwnedBytes) -> Result<SSTableIndex, SSTableDataCorruption> {
let mut reader = IndexSSTable::reader(data);
let mut blocks = Vec::new();
while reader.advance().map_err(|_| SSTableDataCorruption)? {
blocks.push(BlockMeta {
last_key_or_greater: reader.key().to_vec(),
block_addr: reader.value().clone(),
});
}
Ok(SSTableIndex { blocks })
}
/// Get the [`BlockAddr`] of the requested block.
pub(crate) fn get_block(&self, block_id: usize) -> Option<BlockAddr> {
self.blocks
.get(block_id)
.map(|block_meta| block_meta.block_addr.clone())
}
/// Get the block id of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub(crate) fn locate_with_key(&self, key: &[u8]) -> Option<usize> {
let pos = self
.blocks
.binary_search_by_key(&key, |block| &block.last_key_or_greater);
match pos {
Ok(pos) => Some(pos),
Err(pos) => {
if pos < self.blocks.len() {
Some(pos)
} else {
// after end of last block: no block matches
None
}
}
}
}
/// Get the [`BlockAddr`] of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub fn get_block_with_key(&self, key: &[u8]) -> Option<BlockAddr> {
self.locate_with_key(key).and_then(|id| self.get_block(id))
}
pub(crate) fn locate_with_ord(&self, ord: TermOrdinal) -> usize {
let pos = self
.blocks
.binary_search_by_key(&ord, |block| block.block_addr.first_ordinal);
match pos {
Ok(pos) => pos,
// Err(0) can't happen as the sstable starts with ordinal zero
Err(pos) => pos - 1,
}
}
/// Get the [`BlockAddr`] of the block containing the `ord`-th term.
pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> BlockAddr {
// locate_with_ord always returns an index within range
self.get_block(self.locate_with_ord(ord)).unwrap()
}
}
#[derive(Debug, Clone)]
pub(crate) struct BlockMeta {
/// Any byte string that is lexicographically greater or equal to
/// the last key in the block,
/// and yet strictly smaller than the first key in the next block.
pub last_key_or_greater: Vec<u8>,
pub block_addr: BlockAddr,
}
/// SSTable representing an index
///
/// `last_key_or_greater` is used as the key, the value contains the
/// length and first ordinal of each block. The start offset is implicitly
/// obtained from lengths.
struct IndexSSTable;
impl SSTable for IndexSSTable {
type Value = BlockAddr;
type ValueReader = crate::value::index::IndexValueReader;
type ValueWriter = crate::value::index::IndexValueWriter;
}

View File

@@ -0,0 +1,826 @@
use std::io::{self, Read, Write};
use std::ops::Range;
use std::sync::Arc;
use common::{BinarySerializable, FixedSize, OwnedBytes};
use tantivy_bitpacker::{compute_num_bits, BitPacker};
use tantivy_fst::raw::Fst;
use tantivy_fst::{IntoStreamer, Map, MapBuilder, Streamer};
use crate::{common_prefix_len, SSTableDataCorruption, TermOrdinal};
#[derive(Debug, Clone)]
pub enum SSTableIndex {
V2(crate::sstable_index_v2::SSTableIndex),
V3(SSTableIndexV3),
V3Empty(SSTableIndexV3Empty),
}
impl SSTableIndex {
/// Get the [`BlockAddr`] of the requested block.
pub(crate) fn get_block(&self, block_id: u64) -> Option<BlockAddr> {
match self {
SSTableIndex::V2(v2_index) => v2_index.get_block(block_id as usize),
SSTableIndex::V3(v3_index) => v3_index.get_block(block_id),
SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block(block_id),
}
}
/// Get the block id of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub(crate) fn locate_with_key(&self, key: &[u8]) -> Option<u64> {
match self {
SSTableIndex::V2(v2_index) => v2_index.locate_with_key(key).map(|i| i as u64),
SSTableIndex::V3(v3_index) => v3_index.locate_with_key(key),
SSTableIndex::V3Empty(v3_empty) => v3_empty.locate_with_key(key),
}
}
/// Get the [`BlockAddr`] of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub fn get_block_with_key(&self, key: &[u8]) -> Option<BlockAddr> {
match self {
SSTableIndex::V2(v2_index) => v2_index.get_block_with_key(key),
SSTableIndex::V3(v3_index) => v3_index.get_block_with_key(key),
SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block_with_key(key),
}
}
pub(crate) fn locate_with_ord(&self, ord: TermOrdinal) -> u64 {
match self {
SSTableIndex::V2(v2_index) => v2_index.locate_with_ord(ord) as u64,
SSTableIndex::V3(v3_index) => v3_index.locate_with_ord(ord),
SSTableIndex::V3Empty(v3_empty) => v3_empty.locate_with_ord(ord),
}
}
/// Get the [`BlockAddr`] of the block containing the `ord`-th term.
pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> BlockAddr {
match self {
SSTableIndex::V2(v2_index) => v2_index.get_block_with_ord(ord),
SSTableIndex::V3(v3_index) => v3_index.get_block_with_ord(ord),
SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block_with_ord(ord),
}
}
}
#[derive(Debug, Clone)]
pub struct SSTableIndexV3 {
fst_index: Arc<Map<OwnedBytes>>,
block_addr_store: BlockAddrStore,
}
impl SSTableIndexV3 {
/// Load an index from its binary representation
pub fn load(
data: OwnedBytes,
fst_length: u64,
) -> Result<SSTableIndexV3, SSTableDataCorruption> {
let (fst_slice, block_addr_store_slice) = data.split(fst_length as usize);
let fst_index = Fst::new(fst_slice)
.map_err(|_| SSTableDataCorruption)?
.into();
let block_addr_store =
BlockAddrStore::open(block_addr_store_slice).map_err(|_| SSTableDataCorruption)?;
Ok(SSTableIndexV3 {
fst_index: Arc::new(fst_index),
block_addr_store,
})
}
/// Get the [`BlockAddr`] of the requested block.
pub(crate) fn get_block(&self, block_id: u64) -> Option<BlockAddr> {
self.block_addr_store.get(block_id)
}
/// Get the block id of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub(crate) fn locate_with_key(&self, key: &[u8]) -> Option<u64> {
self.fst_index
.range()
.ge(key)
.into_stream()
.next()
.map(|(_key, id)| id)
}
/// Get the [`BlockAddr`] of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub fn get_block_with_key(&self, key: &[u8]) -> Option<BlockAddr> {
self.locate_with_key(key).and_then(|id| self.get_block(id))
}
pub(crate) fn locate_with_ord(&self, ord: TermOrdinal) -> u64 {
self.block_addr_store.binary_search_ord(ord).0
}
/// Get the [`BlockAddr`] of the block containing the `ord`-th term.
pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> BlockAddr {
self.block_addr_store.binary_search_ord(ord).1
}
}
#[derive(Debug, Clone)]
pub struct SSTableIndexV3Empty {
block_addr: BlockAddr,
}
impl SSTableIndexV3Empty {
pub fn load(index_start_pos: usize) -> SSTableIndexV3Empty {
SSTableIndexV3Empty {
block_addr: BlockAddr {
first_ordinal: 0,
byte_range: 0..index_start_pos,
},
}
}
/// Get the [`BlockAddr`] of the requested block.
pub(crate) fn get_block(&self, _block_id: u64) -> Option<BlockAddr> {
Some(self.block_addr.clone())
}
/// Get the block id of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub(crate) fn locate_with_key(&self, _key: &[u8]) -> Option<u64> {
Some(0)
}
/// Get the [`BlockAddr`] of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub fn get_block_with_key(&self, _key: &[u8]) -> Option<BlockAddr> {
Some(self.block_addr.clone())
}
pub(crate) fn locate_with_ord(&self, _ord: TermOrdinal) -> u64 {
0
}
/// Get the [`BlockAddr`] of the block containing the `ord`-th term.
pub(crate) fn get_block_with_ord(&self, _ord: TermOrdinal) -> BlockAddr {
self.block_addr.clone()
}
}
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct BlockAddr {
pub first_ordinal: u64,
pub byte_range: Range<usize>,
}
impl BlockAddr {
fn to_block_start(&self) -> BlockStartAddr {
BlockStartAddr {
first_ordinal: self.first_ordinal,
byte_range_start: self.byte_range.start,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct BlockStartAddr {
first_ordinal: u64,
byte_range_start: usize,
}
impl BlockStartAddr {
fn to_block_addr(&self, byte_range_end: usize) -> BlockAddr {
BlockAddr {
first_ordinal: self.first_ordinal,
byte_range: self.byte_range_start..byte_range_end,
}
}
}
#[derive(Debug, Clone)]
pub(crate) struct BlockMeta {
/// Any byte string that is lexicographically greater or equal to
/// the last key in the block,
/// and yet strictly smaller than the first key in the next block.
pub last_key_or_greater: Vec<u8>,
pub block_addr: BlockAddr,
}
impl BinarySerializable for BlockStartAddr {
fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
let start = self.byte_range_start as u64;
start.serialize(writer)?;
self.first_ordinal.serialize(writer)
}
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
let byte_range_start = u64::deserialize(reader)? as usize;
let first_ordinal = u64::deserialize(reader)?;
Ok(BlockStartAddr {
first_ordinal,
byte_range_start,
})
}
// Provided method
fn num_bytes(&self) -> u64 {
BlockStartAddr::SIZE_IN_BYTES as u64
}
}
impl FixedSize for BlockStartAddr {
const SIZE_IN_BYTES: usize = 2 * u64::SIZE_IN_BYTES;
}
/// Given that left < right,
/// mutates `left into a shorter byte string left'` that
/// matches `left <= left' < right`.
fn find_shorter_str_in_between(left: &mut Vec<u8>, right: &[u8]) {
assert!(&left[..] < right);
let common_len = common_prefix_len(left, right);
if left.len() == common_len {
return;
}
// It is possible to do one character shorter in some case,
// but it is not worth the extra complexity
for pos in (common_len + 1)..left.len() {
if left[pos] != u8::MAX {
left[pos] += 1;
left.truncate(pos + 1);
return;
}
}
}
#[derive(Default)]
pub struct SSTableIndexBuilder {
blocks: Vec<BlockMeta>,
}
impl SSTableIndexBuilder {
/// In order to make the index as light as possible, we
/// try to find a shorter alternative to the last key of the last block
/// that is still smaller than the next key.
pub(crate) fn shorten_last_block_key_given_next_key(&mut self, next_key: &[u8]) {
if let Some(last_block) = self.blocks.last_mut() {
find_shorter_str_in_between(&mut last_block.last_key_or_greater, next_key);
}
}
pub fn add_block(&mut self, last_key: &[u8], byte_range: Range<usize>, first_ordinal: u64) {
self.blocks.push(BlockMeta {
last_key_or_greater: last_key.to_vec(),
block_addr: BlockAddr {
byte_range,
first_ordinal,
},
})
}
pub fn serialize<W: std::io::Write>(&self, wrt: W) -> io::Result<u64> {
if self.blocks.len() <= 1 {
return Ok(0);
}
let counting_writer = common::CountingWriter::wrap(wrt);
let mut map_builder = MapBuilder::new(counting_writer).map_err(fst_error_to_io_error)?;
for (i, block) in self.blocks.iter().enumerate() {
map_builder
.insert(&block.last_key_or_greater, i as u64)
.map_err(fst_error_to_io_error)?;
}
let counting_writer = map_builder.into_inner().map_err(fst_error_to_io_error)?;
let written_bytes = counting_writer.written_bytes();
let mut wrt = counting_writer.finish();
let mut block_store_writer = BlockAddrStoreWriter::new();
for block in &self.blocks {
block_store_writer.write_block_meta(block.block_addr.clone())?;
}
block_store_writer.serialize(&mut wrt)?;
Ok(written_bytes)
}
}
fn fst_error_to_io_error(error: tantivy_fst::Error) -> io::Error {
match error {
tantivy_fst::Error::Fst(fst_error) => io::Error::new(io::ErrorKind::Other, fst_error),
tantivy_fst::Error::Io(ioerror) => ioerror,
}
}
const STORE_BLOCK_LEN: usize = 128;
#[derive(Debug)]
struct BlockAddrBlockMetadata {
offset: u64,
ref_block_addr: BlockStartAddr,
range_start_slope: u32,
first_ordinal_slope: u32,
range_start_nbits: u8,
first_ordinal_nbits: u8,
block_len: u16,
// these fields are computed on deserialization, and not stored
range_shift: i64,
ordinal_shift: i64,
}
impl BlockAddrBlockMetadata {
fn num_bits(&self) -> u8 {
self.first_ordinal_nbits + self.range_start_nbits
}
fn deserialize_block_addr(&self, data: &[u8], inner_offset: usize) -> Option<BlockAddr> {
if inner_offset == 0 {
let range_end = self.ref_block_addr.byte_range_start
+ extract_bits(data, 0, self.range_start_nbits) as usize
+ self.range_start_slope as usize
- self.range_shift as usize;
return Some(self.ref_block_addr.to_block_addr(range_end));
}
let inner_offset = inner_offset - 1;
if inner_offset >= self.block_len as usize {
return None;
}
let num_bits = self.num_bits() as usize;
let range_start_addr = num_bits * inner_offset;
let ordinal_addr = range_start_addr + self.range_start_nbits as usize;
let range_end_addr = range_start_addr + num_bits;
if (range_end_addr + self.range_start_nbits as usize + 7) / 8 > data.len() {
return None;
}
let range_start = self.ref_block_addr.byte_range_start
+ extract_bits(data, range_start_addr, self.range_start_nbits) as usize
+ self.range_start_slope as usize * (inner_offset + 1)
- self.range_shift as usize;
let first_ordinal = self.ref_block_addr.first_ordinal
+ extract_bits(data, ordinal_addr, self.first_ordinal_nbits)
+ self.first_ordinal_slope as u64 * (inner_offset + 1) as u64
- self.ordinal_shift as u64;
let range_end = self.ref_block_addr.byte_range_start
+ extract_bits(data, range_end_addr, self.range_start_nbits) as usize
+ self.range_start_slope as usize * (inner_offset + 2)
- self.range_shift as usize;
Some(BlockAddr {
first_ordinal,
byte_range: range_start..range_end,
})
}
fn bisect_for_ord(&self, data: &[u8], target_ord: TermOrdinal) -> (u64, BlockAddr) {
let inner_target_ord = target_ord - self.ref_block_addr.first_ordinal;
let num_bits = self.num_bits() as usize;
let range_start_nbits = self.range_start_nbits as usize;
let get_ord = |index| {
extract_bits(
data,
num_bits * index as usize + range_start_nbits,
self.first_ordinal_nbits,
) + self.first_ordinal_slope as u64 * (index + 1)
- self.ordinal_shift as u64
};
let inner_offset = match binary_search(self.block_len as u64, |index| {
get_ord(index).cmp(&inner_target_ord)
}) {
Ok(inner_offset) => inner_offset + 1,
Err(inner_offset) => inner_offset,
};
// we can unwrap because inner_offset <= self.block_len
(
inner_offset,
self.deserialize_block_addr(data, inner_offset as usize)
.unwrap(),
)
}
}
// TODO move this function to tantivy_common?
#[inline(always)]
fn extract_bits(data: &[u8], addr_bits: usize, num_bits: u8) -> u64 {
assert!(num_bits <= 56);
let addr_byte = addr_bits / 8;
let bit_shift = (addr_bits % 8) as u64;
let val_unshifted_unmasked: u64 = if data.len() >= addr_byte + 8 {
let b = data[addr_byte..addr_byte + 8].try_into().unwrap();
u64::from_le_bytes(b)
} else {
// the buffer is not large enough.
// Let's copy the few remaining bytes to a 8 byte buffer
// padded with 0s.
let mut buf = [0u8; 8];
let data_to_copy = &data[addr_byte..];
let nbytes = data_to_copy.len();
buf[..nbytes].copy_from_slice(data_to_copy);
u64::from_le_bytes(buf)
};
let val_shifted_unmasked = val_unshifted_unmasked >> bit_shift;
let mask = (1u64 << u64::from(num_bits)) - 1;
val_shifted_unmasked & mask
}
impl BinarySerializable for BlockAddrBlockMetadata {
fn serialize<W: Write + ?Sized>(&self, write: &mut W) -> io::Result<()> {
self.offset.serialize(write)?;
self.ref_block_addr.serialize(write)?;
self.range_start_slope.serialize(write)?;
self.first_ordinal_slope.serialize(write)?;
write.write_all(&[self.first_ordinal_nbits, self.range_start_nbits])?;
self.block_len.serialize(write)?;
self.num_bits();
Ok(())
}
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
let offset = u64::deserialize(reader)?;
let ref_block_addr = BlockStartAddr::deserialize(reader)?;
let range_start_slope = u32::deserialize(reader)?;
let first_ordinal_slope = u32::deserialize(reader)?;
let mut buffer = [0u8; 2];
reader.read_exact(&mut buffer)?;
let first_ordinal_nbits = buffer[0];
let range_start_nbits = buffer[1];
let block_len = u16::deserialize(reader)?;
Ok(BlockAddrBlockMetadata {
offset,
ref_block_addr,
range_start_slope,
first_ordinal_slope,
range_start_nbits,
first_ordinal_nbits,
block_len,
range_shift: 1 << (range_start_nbits - 1),
ordinal_shift: 1 << (first_ordinal_nbits - 1),
})
}
}
impl FixedSize for BlockAddrBlockMetadata {
const SIZE_IN_BYTES: usize = u64::SIZE_IN_BYTES
+ BlockStartAddr::SIZE_IN_BYTES
+ 2 * u32::SIZE_IN_BYTES
+ 2 * u8::SIZE_IN_BYTES
+ u16::SIZE_IN_BYTES;
}
#[derive(Debug, Clone)]
struct BlockAddrStore {
block_meta_bytes: OwnedBytes,
addr_bytes: OwnedBytes,
}
impl BlockAddrStore {
fn open(term_info_store_file: OwnedBytes) -> io::Result<BlockAddrStore> {
let (mut len_slice, main_slice) = term_info_store_file.split(8);
let len = u64::deserialize(&mut len_slice)? as usize;
let (block_meta_bytes, addr_bytes) = main_slice.split(len);
Ok(BlockAddrStore {
block_meta_bytes,
addr_bytes,
})
}
fn get_block_meta(&self, store_block_id: usize) -> Option<BlockAddrBlockMetadata> {
let mut block_data: &[u8] = self
.block_meta_bytes
.get(store_block_id * BlockAddrBlockMetadata::SIZE_IN_BYTES..)?;
BlockAddrBlockMetadata::deserialize(&mut block_data).ok()
}
fn get(&self, block_id: u64) -> Option<BlockAddr> {
let store_block_id = (block_id as usize) / STORE_BLOCK_LEN;
let inner_offset = (block_id as usize) % STORE_BLOCK_LEN;
let block_addr_block_data = self.get_block_meta(store_block_id)?;
block_addr_block_data.deserialize_block_addr(
&self.addr_bytes[block_addr_block_data.offset as usize..],
inner_offset,
)
}
fn binary_search_ord(&self, ord: TermOrdinal) -> (u64, BlockAddr) {
let max_block =
(self.block_meta_bytes.len() / BlockAddrBlockMetadata::SIZE_IN_BYTES) as u64;
let get_first_ordinal = |block_id| {
// we can unwrap because block_id < max_block
self.get(block_id * STORE_BLOCK_LEN as u64)
.unwrap()
.first_ordinal
};
let store_block_id =
binary_search(max_block, |block_id| get_first_ordinal(block_id).cmp(&ord));
let store_block_id = match store_block_id {
Ok(store_block_id) => {
let block_id = store_block_id * STORE_BLOCK_LEN as u64;
// we can unwrap because store_block_id < max_block
return (block_id, self.get(block_id).unwrap());
}
Err(store_block_id) => store_block_id - 1,
};
// we can unwrap because store_block_id < max_block
let block_addr_block_data = self.get_block_meta(store_block_id as usize).unwrap();
let (inner_offset, block_addr) = block_addr_block_data.bisect_for_ord(
&self.addr_bytes[block_addr_block_data.offset as usize..],
ord,
);
(
store_block_id * STORE_BLOCK_LEN as u64 + inner_offset,
block_addr,
)
}
}
fn binary_search(max: u64, cmp_fn: impl Fn(u64) -> std::cmp::Ordering) -> Result<u64, u64> {
use std::cmp::Ordering::*;
let mut size = max;
let mut left = 0;
let mut right = size;
while left < right {
let mid = left + size / 2;
let cmp = cmp_fn(mid);
if cmp == Less {
left = mid + 1;
} else if cmp == Greater {
right = mid;
} else {
return Ok(mid);
}
size = right - left;
}
Err(left)
}
struct BlockAddrStoreWriter {
buffer_block_metas: Vec<u8>,
buffer_addrs: Vec<u8>,
block_addrs: Vec<BlockAddr>,
}
impl BlockAddrStoreWriter {
fn new() -> Self {
BlockAddrStoreWriter {
buffer_block_metas: Vec::new(),
buffer_addrs: Vec::new(),
block_addrs: Vec::with_capacity(STORE_BLOCK_LEN),
}
}
fn flush_block(&mut self) -> io::Result<()> {
if self.block_addrs.is_empty() {
return Ok(());
}
let ref_block_addr = self.block_addrs[0].clone();
for block_addr in &mut self.block_addrs {
block_addr.byte_range.start -= ref_block_addr.byte_range.start;
block_addr.first_ordinal -= ref_block_addr.first_ordinal;
}
// we are only called if block_addrs is not empty
let mut last_block_addr = self.block_addrs.last().unwrap().clone();
last_block_addr.byte_range.end -= ref_block_addr.byte_range.start;
// we skip(1), so we never give an index of 0 to find_best_slope
let (range_start_slope, range_start_nbits) = find_best_slope(
self.block_addrs
.iter()
.map(|block| block.byte_range.start as u64)
.chain(std::iter::once(last_block_addr.byte_range.end as u64))
.enumerate()
.skip(1),
);
// we skip(1), so we never give an index of 0 to find_best_slope
let (first_ordinal_slope, first_ordinal_nbits) = find_best_slope(
self.block_addrs
.iter()
.map(|block| block.first_ordinal)
.enumerate()
.skip(1),
);
let range_shift = 1 << (range_start_nbits - 1);
let ordinal_shift = 1 << (first_ordinal_nbits - 1);
let block_addr_block_meta = BlockAddrBlockMetadata {
offset: self.buffer_addrs.len() as u64,
ref_block_addr: ref_block_addr.to_block_start(),
range_start_slope,
first_ordinal_slope,
range_start_nbits,
first_ordinal_nbits,
block_len: self.block_addrs.len() as u16 - 1,
range_shift,
ordinal_shift,
};
block_addr_block_meta.serialize(&mut self.buffer_block_metas)?;
let mut bit_packer = BitPacker::new();
for (i, block_addr) in self.block_addrs.iter().enumerate().skip(1) {
let range_pred = (range_start_slope as usize * i) as i64;
bit_packer.write(
(block_addr.byte_range.start as i64 - range_pred + range_shift) as u64,
range_start_nbits,
&mut self.buffer_addrs,
)?;
let first_ordinal_pred = (first_ordinal_slope as u64 * i as u64) as i64;
bit_packer.write(
(block_addr.first_ordinal as i64 - first_ordinal_pred + ordinal_shift) as u64,
first_ordinal_nbits,
&mut self.buffer_addrs,
)?;
}
let range_pred = (range_start_slope as usize * self.block_addrs.len()) as i64;
bit_packer.write(
(last_block_addr.byte_range.end as i64 - range_pred + range_shift) as u64,
range_start_nbits,
&mut self.buffer_addrs,
)?;
bit_packer.flush(&mut self.buffer_addrs)?;
self.block_addrs.clear();
Ok(())
}
fn write_block_meta(&mut self, block_addr: BlockAddr) -> io::Result<()> {
self.block_addrs.push(block_addr);
if self.block_addrs.len() >= STORE_BLOCK_LEN {
self.flush_block()?;
}
Ok(())
}
fn serialize<W: std::io::Write>(&mut self, wrt: &mut W) -> io::Result<()> {
self.flush_block()?;
let len = self.buffer_block_metas.len() as u64;
len.serialize(wrt)?;
wrt.write_all(&self.buffer_block_metas)?;
wrt.write_all(&self.buffer_addrs)?;
Ok(())
}
}
/// Given an iterator over (index, value), returns the slope, and number of bits needed to
/// represente the error to a prediction made by this slope.
///
/// The iterator may be empty, but all indexes in it must be non-zero.
fn find_best_slope(elements: impl Iterator<Item = (usize, u64)> + Clone) -> (u32, u8) {
let slope_iterator = elements.clone();
let derivation_iterator = elements;
let mut min_slope_idx = 1;
let mut min_slope_val = 0;
let mut min_slope = u32::MAX;
let mut max_slope_idx = 1;
let mut max_slope_val = 0;
let mut max_slope = 0;
for (index, value) in slope_iterator {
let slope = (value / index as u64) as u32;
if slope <= min_slope {
min_slope = slope;
min_slope_idx = index;
min_slope_val = value;
}
if slope >= max_slope {
max_slope = slope;
max_slope_idx = index;
max_slope_val = value;
}
}
// above is an heuristic giving the "highest" and "lowest" point. It's imperfect in that in that
// a point that appear earlier might have a high slope derivation, but a smaller absolute
// derivation than a latter point.
// The actual best values can be obtained by using the symplex method, but the improvement is
// likely minimal, and computation is way more complexe.
//
// Assuming these point are the furthest up and down, we find the slope that would cause the
// same positive derivation for the highest as negative derivation for the lowest.
// A is the optimal slope. B is the derivation to the guess
//
// 0 = min_slope_val - min_slope_idx * A - B
// 0 = max_slope_val - max_slope_idx * A + B
//
// 0 = min_slope_val + max_slope_val - (min_slope_idx + max_slope_idx) * A
// (min_slope_val + max_slope_val) / (min_slope_idx + max_slope_idx) = A
//
// we actually add some correcting factor to have proper rounding, not truncation.
let denominator = (min_slope_idx + max_slope_idx) as u64;
let final_slope = ((min_slope_val + max_slope_val + denominator / 2) / denominator) as u32;
// we don't solve for B because our choice of point is suboptimal, so it's actually a lower
// bound and we need to iterate to find the actual worst value.
let max_derivation: u64 = derivation_iterator
.map(|(index, value)| (value as i64 - final_slope as i64 * index as i64).unsigned_abs())
.max()
.unwrap_or(0);
(final_slope, compute_num_bits(max_derivation) + 1)
}
#[cfg(test)]
mod tests {
use common::OwnedBytes;
use super::{BlockAddr, SSTableIndexBuilder, SSTableIndexV3};
use crate::SSTableDataCorruption;
#[test]
fn test_sstable_index() {
let mut sstable_builder = SSTableIndexBuilder::default();
sstable_builder.add_block(b"aaa", 10..20, 0u64);
sstable_builder.add_block(b"bbbbbbb", 20..30, 5u64);
sstable_builder.add_block(b"ccc", 30..40, 10u64);
sstable_builder.add_block(b"dddd", 40..50, 15u64);
let mut buffer: Vec<u8> = Vec::new();
let fst_len = sstable_builder.serialize(&mut buffer).unwrap();
let buffer = OwnedBytes::new(buffer);
let sstable_index = SSTableIndexV3::load(buffer, fst_len).unwrap();
assert_eq!(
sstable_index.get_block_with_key(b"bbbde"),
Some(BlockAddr {
first_ordinal: 10u64,
byte_range: 30..40
})
);
assert_eq!(sstable_index.locate_with_key(b"aa").unwrap(), 0);
assert_eq!(sstable_index.locate_with_key(b"aaa").unwrap(), 0);
assert_eq!(sstable_index.locate_with_key(b"aab").unwrap(), 1);
assert_eq!(sstable_index.locate_with_key(b"ccc").unwrap(), 2);
assert!(sstable_index.locate_with_key(b"e").is_none());
assert_eq!(sstable_index.locate_with_ord(0), 0);
assert_eq!(sstable_index.locate_with_ord(1), 0);
assert_eq!(sstable_index.locate_with_ord(4), 0);
assert_eq!(sstable_index.locate_with_ord(5), 1);
assert_eq!(sstable_index.locate_with_ord(100), 3);
}
#[test]
fn test_sstable_with_corrupted_data() {
let mut sstable_builder = SSTableIndexBuilder::default();
sstable_builder.add_block(b"aaa", 10..20, 0u64);
sstable_builder.add_block(b"bbbbbbb", 20..30, 5u64);
sstable_builder.add_block(b"ccc", 30..40, 10u64);
sstable_builder.add_block(b"dddd", 40..50, 15u64);
let mut buffer: Vec<u8> = Vec::new();
let fst_len = sstable_builder.serialize(&mut buffer).unwrap();
buffer[2] = 9u8;
let buffer = OwnedBytes::new(buffer);
let data_corruption_err = SSTableIndexV3::load(buffer, fst_len).err().unwrap();
assert!(matches!(data_corruption_err, SSTableDataCorruption));
}
#[track_caller]
fn test_find_shorter_str_in_between_aux(left: &[u8], right: &[u8]) {
let mut left_buf = left.to_vec();
super::find_shorter_str_in_between(&mut left_buf, right);
assert!(left_buf.len() <= left.len());
assert!(left <= &left_buf);
assert!(&left_buf[..] < right);
}
#[test]
fn test_find_shorter_str_in_between() {
test_find_shorter_str_in_between_aux(b"", b"hello");
test_find_shorter_str_in_between_aux(b"abc", b"abcd");
test_find_shorter_str_in_between_aux(b"abcd", b"abd");
test_find_shorter_str_in_between_aux(&[0, 0, 0], &[1]);
test_find_shorter_str_in_between_aux(&[0, 0, 0], &[0, 0, 1]);
test_find_shorter_str_in_between_aux(&[0, 0, 255, 255, 255, 0u8], &[0, 1]);
}
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(100))]
#[test]
fn test_proptest_find_shorter_str(left in any::<Vec<u8>>(), right in any::<Vec<u8>>()) {
if left < right {
test_find_shorter_str_in_between_aux(&left, &right);
}
}
}
#[test]
fn test_find_best_slop() {
assert_eq!(super::find_best_slope(std::iter::empty()), (0, 1));
assert_eq!(
super::find_best_slope(std::iter::once((1, 12345))),
(12345, 1)
);
}
}

View File

@@ -110,7 +110,7 @@ where
Bound::Included(key) | Bound::Excluded(key) => self
.term_dict
.sstable_index
.get_block_with_key(key)?
.get_block_with_key(key)
.map(|block| block.first_ordinal)
.unwrap_or(0),
Bound::Unbounded => 0,

View File

@@ -3,10 +3,6 @@ use std::io;
use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter};
use crate::{vint, BlockAddr};
// TODO define a LazyIndexValueReader?
// one which keeps state could be useful for ord_to_block fns,
// one which doesn't at all woud be perfect for term_to_block fns
// pending bench to asses real impact
#[derive(Default)]
pub(crate) struct IndexValueReader {
vals: Vec<BlockAddr>,

View File

@@ -1,51 +1,5 @@
use std::iter::{Cloned, Filter};
use std::mem;
use super::{Addr, MemoryArena};
use crate::fastcpy::fast_short_slice_copy;
use crate::memory_arena::store;
/// Returns the actual memory size in bytes
/// required to create a table with a given capacity.
/// required to create a table of size
pub fn compute_table_memory_size(capacity: usize) -> usize {
capacity * mem::size_of::<KeyValue>()
}
#[cfg(not(feature = "compare_hash_only"))]
type HashType = u32;
#[cfg(feature = "compare_hash_only")]
type HashType = u64;
/// `KeyValue` is the item stored in the hash table.
/// The key is actually a `BytesRef` object stored in an external memory arena.
/// The `value_addr` also points to an address in the memory arena.
#[derive(Copy, Clone)]
struct KeyValue {
pub(crate) key_value_addr: Addr,
hash: HashType,
}
impl Default for KeyValue {
fn default() -> Self {
KeyValue {
key_value_addr: Addr::null_pointer(),
hash: 0,
}
}
}
impl KeyValue {
#[inline]
fn is_empty(&self) -> bool {
self.key_value_addr.is_null()
}
#[inline]
fn is_not_empty_ref(&self) -> bool {
!self.key_value_addr.is_null()
}
}
use crate::shared_arena_hashmap::SharedArenaHashMap;
/// Customized `HashMap` with `&[u8]` keys
///
@@ -56,61 +10,13 @@ impl KeyValue {
/// The quirky API has the benefit of avoiding
/// the computation of the hash of the key twice,
/// or copying the key as long as there is no insert.
pub struct ArenaHashMap {
table: Vec<KeyValue>,
pub memory_arena: MemoryArena,
mask: usize,
len: usize,
}
struct LinearProbing {
pos: usize,
mask: usize,
}
impl LinearProbing {
#[inline]
fn compute(hash: HashType, mask: usize) -> LinearProbing {
LinearProbing {
pos: hash as usize,
mask,
}
}
#[inline]
fn next_probe(&mut self) -> usize {
// Not saving the masked version removes a dependency.
self.pos = self.pos.wrapping_add(1);
self.pos & self.mask
}
}
type IterNonEmpty<'a> = Filter<Cloned<std::slice::Iter<'a, KeyValue>>, fn(&KeyValue) -> bool>;
pub struct Iter<'a> {
hashmap: &'a ArenaHashMap,
inner: IterNonEmpty<'a>,
}
impl<'a> Iterator for Iter<'a> {
type Item = (&'a [u8], Addr);
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(move |kv| {
let (key, offset): (&'a [u8], Addr) = self.hashmap.get_key_value(kv.key_value_addr);
(key, offset)
})
}
}
/// Returns the greatest power of two lower or equal to `n`.
/// Except if n == 0, in that case, return 1.
///
/// # Panics if n == 0
fn compute_previous_power_of_two(n: usize) -> usize {
assert!(n > 0);
let msb = (63u32 - (n as u64).leading_zeros()) as u8;
1 << msb
/// ArenaHashMap is like SharedArenaHashMap but takes ownership
/// of the memory arena. The memory arena stores the serialized
/// keys and values.
pub struct ArenaHashMap {
shared_arena_hashmap: SharedArenaHashMap,
pub memory_arena: MemoryArena,
}
impl Default for ArenaHashMap {
@@ -121,156 +27,44 @@ impl Default for ArenaHashMap {
impl ArenaHashMap {
pub fn with_capacity(table_size: usize) -> ArenaHashMap {
let table_size_power_of_2 = compute_previous_power_of_two(table_size);
let memory_arena = MemoryArena::default();
let table = vec![KeyValue::default(); table_size_power_of_2];
ArenaHashMap {
table,
shared_arena_hashmap: SharedArenaHashMap::with_capacity(table_size),
memory_arena,
mask: table_size_power_of_2 - 1,
len: 0,
}
}
#[inline]
#[cfg(not(feature = "compare_hash_only"))]
fn get_hash(&self, key: &[u8]) -> HashType {
murmurhash32::murmurhash2(key)
}
#[inline]
#[cfg(feature = "compare_hash_only")]
fn get_hash(&self, key: &[u8]) -> HashType {
/// Since we compare only the hash we need a high quality hash.
use std::hash::Hasher;
let mut hasher = ahash::AHasher::default();
hasher.write(key);
hasher.finish() as HashType
}
#[inline]
pub fn read<Item: Copy + 'static>(&self, addr: Addr) -> Item {
self.memory_arena.read(addr)
}
#[inline]
fn probe(&self, hash: HashType) -> LinearProbing {
LinearProbing::compute(hash, self.mask)
}
#[inline]
pub fn mem_usage(&self) -> usize {
self.table.len() * mem::size_of::<KeyValue>() + self.memory_arena.mem_usage()
}
#[inline]
fn is_saturated(&self) -> bool {
self.table.len() <= self.len * 2
}
#[inline]
fn get_key_value(&self, addr: Addr) -> (&[u8], Addr) {
let data = self.memory_arena.slice_from(addr);
let key_bytes_len_bytes = unsafe { data.get_unchecked(..2) };
let key_bytes_len = u16::from_le_bytes(key_bytes_len_bytes.try_into().unwrap());
let key_bytes: &[u8] = unsafe { data.get_unchecked(2..2 + key_bytes_len as usize) };
(key_bytes, addr.offset(2 + key_bytes_len as u32))
}
#[inline]
#[cfg(not(feature = "compare_hash_only"))]
fn get_value_addr_if_key_match(&self, target_key: &[u8], addr: Addr) -> Option<Addr> {
use crate::fastcmp::fast_short_slice_compare;
let (stored_key, value_addr) = self.get_key_value(addr);
if fast_short_slice_compare(stored_key, target_key) {
Some(value_addr)
} else {
None
}
}
#[inline]
#[cfg(feature = "compare_hash_only")]
fn get_value_addr_if_key_match(&self, _target_key: &[u8], addr: Addr) -> Option<Addr> {
// For the compare_hash_only feature, it would make sense to store the keys at a different
// memory location. Here they will just pollute the cache.
let data = self.memory_arena.slice_from(addr);
let key_bytes_len_bytes = &data[..2];
let key_bytes_len = u16::from_le_bytes(key_bytes_len_bytes.try_into().unwrap());
let value_addr = addr.offset(2 + key_bytes_len as u32);
Some(value_addr)
}
#[inline]
fn set_bucket(&mut self, hash: HashType, key_value_addr: Addr, bucket: usize) {
self.len += 1;
self.table[bucket] = KeyValue {
key_value_addr,
hash,
};
self.shared_arena_hashmap.mem_usage() + self.memory_arena.mem_usage()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
self.shared_arena_hashmap.is_empty()
}
#[inline]
pub fn len(&self) -> usize {
self.len
self.shared_arena_hashmap.len()
}
#[inline]
pub fn iter(&self) -> Iter<'_> {
Iter {
inner: self
.table
.iter()
.cloned()
.filter(KeyValue::is_not_empty_ref),
hashmap: self,
}
}
fn resize(&mut self) {
let new_len = (self.table.len() * 2).max(1 << 13);
let mask = new_len - 1;
self.mask = mask;
let new_table = vec![KeyValue::default(); new_len];
let old_table = mem::replace(&mut self.table, new_table);
for key_value in old_table.into_iter().filter(KeyValue::is_not_empty_ref) {
let mut probe = LinearProbing::compute(key_value.hash, mask);
loop {
let bucket = probe.next_probe();
if self.table[bucket].is_empty() {
self.table[bucket] = key_value;
break;
}
}
}
pub fn iter(&self) -> impl Iterator<Item = (&[u8], Addr)> {
self.shared_arena_hashmap.iter(&self.memory_arena)
}
/// Get a value associated to a key.
#[inline]
pub fn get<V>(&self, key: &[u8]) -> Option<V>
where V: Copy + 'static {
let hash = self.get_hash(key);
let mut probe = self.probe(hash);
loop {
let bucket = probe.next_probe();
let kv: KeyValue = self.table[bucket];
if kv.is_empty() {
return None;
} else if kv.hash == hash {
if let Some(val_addr) = self.get_value_addr_if_key_match(key, kv.key_value_addr) {
let v = self.memory_arena.read(val_addr);
return Some(v);
}
}
}
self.shared_arena_hashmap.get(key, &self.memory_arena)
}
/// `update` create a new entry for a given key if it does not exist
@@ -284,45 +78,10 @@ impl ArenaHashMap {
/// If the key already as an associated value, then it will be passed
/// `Some(previous_value)`.
#[inline]
pub fn mutate_or_create<V>(&mut self, key: &[u8], mut updater: impl FnMut(Option<V>) -> V)
pub fn mutate_or_create<V>(&mut self, key: &[u8], updater: impl FnMut(Option<V>) -> V)
where V: Copy + 'static {
if self.is_saturated() {
self.resize();
}
let hash = self.get_hash(key);
let mut probe = self.probe(hash);
let mut bucket = probe.next_probe();
let mut kv: KeyValue = self.table[bucket];
loop {
if kv.is_empty() {
// The key does not exist yet.
let val = updater(None);
let num_bytes = std::mem::size_of::<u16>() + key.len() + std::mem::size_of::<V>();
let key_addr = self.memory_arena.allocate_space(num_bytes);
{
let data = self.memory_arena.slice_mut(key_addr, num_bytes);
let key_len_bytes: [u8; 2] = (key.len() as u16).to_le_bytes();
data[..2].copy_from_slice(&key_len_bytes);
let stop = 2 + key.len();
fast_short_slice_copy(key, &mut data[2..stop]);
store(&mut data[stop..], val);
}
self.set_bucket(hash, key_addr, bucket);
return;
}
if kv.hash == hash {
if let Some(val_addr) = self.get_value_addr_if_key_match(key, kv.key_value_addr) {
let v = self.memory_arena.read(val_addr);
let new_v = updater(Some(v));
self.memory_arena.write_at(val_addr, new_v);
return;
}
}
// This allows fetching the next bucket before the loop jmp
bucket = probe.next_probe();
kv = self.table[bucket];
}
self.shared_arena_hashmap
.mutate_or_create(key, &mut self.memory_arena, updater);
}
}
@@ -331,7 +90,7 @@ mod tests {
use std::collections::HashMap;
use super::{compute_previous_power_of_two, ArenaHashMap};
use super::ArenaHashMap;
#[test]
fn test_hash_map() {
@@ -362,14 +121,6 @@ mod tests {
assert_eq!(hash_map.get::<u32>(b"abc"), None);
}
#[test]
fn test_compute_previous_power_of_two() {
assert_eq!(compute_previous_power_of_two(8), 8);
assert_eq!(compute_previous_power_of_two(9), 8);
assert_eq!(compute_previous_power_of_two(7), 4);
assert_eq!(compute_previous_power_of_two(u64::MAX as usize), 1 << 63);
}
#[test]
fn test_many_terms() {
let mut terms: Vec<String> = (0..20_000).map(|val| val.to_string()).collect();

View File

@@ -9,10 +9,12 @@ mod expull;
mod fastcmp;
mod fastcpy;
mod memory_arena;
mod shared_arena_hashmap;
pub use self::arena_hashmap::{compute_table_memory_size, ArenaHashMap};
pub use self::arena_hashmap::ArenaHashMap;
pub use self::expull::ExpUnrolledLinkedList;
pub use self::memory_arena::{Addr, MemoryArena};
pub use self::shared_arena_hashmap::{compute_table_memory_size, SharedArenaHashMap};
/// When adding an element in a `ArenaHashMap`, we get a unique id associated to the given key.
pub type UnorderedId = u32;

View File

@@ -0,0 +1,420 @@
use std::iter::{Cloned, Filter};
use std::mem;
use super::{Addr, MemoryArena};
use crate::fastcpy::fast_short_slice_copy;
use crate::memory_arena::store;
/// Returns the actual memory size in bytes
/// required to create a table with a given capacity.
/// required to create a table of size
pub fn compute_table_memory_size(capacity: usize) -> usize {
capacity * mem::size_of::<KeyValue>()
}
#[cfg(not(feature = "compare_hash_only"))]
type HashType = u32;
#[cfg(feature = "compare_hash_only")]
type HashType = u64;
/// `KeyValue` is the item stored in the hash table.
/// The key is actually a `BytesRef` object stored in an external memory arena.
/// The `value_addr` also points to an address in the memory arena.
#[derive(Copy, Clone)]
struct KeyValue {
key_value_addr: Addr,
hash: HashType,
}
impl Default for KeyValue {
fn default() -> Self {
KeyValue {
key_value_addr: Addr::null_pointer(),
hash: 0,
}
}
}
impl KeyValue {
#[inline]
fn is_empty(&self) -> bool {
self.key_value_addr.is_null()
}
#[inline]
fn is_not_empty_ref(&self) -> bool {
!self.key_value_addr.is_null()
}
}
/// Customized `HashMap` with `&[u8]` keys
///
/// Its main particularity is that rather than storing its
/// keys in the heap, keys are stored in a memory arena
/// inline with the values.
///
/// The quirky API has the benefit of avoiding
/// the computation of the hash of the key twice,
/// or copying the key as long as there is no insert.
///
/// SharedArenaHashMap is like ArenaHashMap but gets the memory arena
/// passed as an argument to the methods.
/// So one MemoryArena can be shared with multiple SharedArenaHashMap.
pub struct SharedArenaHashMap {
table: Vec<KeyValue>,
mask: usize,
len: usize,
}
struct LinearProbing {
pos: usize,
mask: usize,
}
impl LinearProbing {
#[inline]
fn compute(hash: HashType, mask: usize) -> LinearProbing {
LinearProbing {
pos: hash as usize,
mask,
}
}
#[inline]
fn next_probe(&mut self) -> usize {
// Not saving the masked version removes a dependency.
self.pos = self.pos.wrapping_add(1);
self.pos & self.mask
}
}
type IterNonEmpty<'a> = Filter<Cloned<std::slice::Iter<'a, KeyValue>>, fn(&KeyValue) -> bool>;
pub struct Iter<'a> {
hashmap: &'a SharedArenaHashMap,
memory_arena: &'a MemoryArena,
inner: IterNonEmpty<'a>,
}
impl<'a> Iterator for Iter<'a> {
type Item = (&'a [u8], Addr);
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(move |kv| {
let (key, offset): (&'a [u8], Addr) = self
.hashmap
.get_key_value(kv.key_value_addr, self.memory_arena);
(key, offset)
})
}
}
/// Returns the greatest power of two lower or equal to `n`.
/// Except if n == 0, in that case, return 1.
///
/// # Panics if n == 0
fn compute_previous_power_of_two(n: usize) -> usize {
assert!(n > 0);
let msb = (63u32 - (n as u64).leading_zeros()) as u8;
1 << msb
}
impl Default for SharedArenaHashMap {
fn default() -> Self {
SharedArenaHashMap::with_capacity(4)
}
}
impl SharedArenaHashMap {
pub fn with_capacity(table_size: usize) -> SharedArenaHashMap {
let table_size_power_of_2 = compute_previous_power_of_two(table_size);
let table = vec![KeyValue::default(); table_size_power_of_2];
SharedArenaHashMap {
table,
mask: table_size_power_of_2 - 1,
len: 0,
}
}
#[inline]
#[cfg(not(feature = "compare_hash_only"))]
fn get_hash(&self, key: &[u8]) -> HashType {
murmurhash32::murmurhash2(key)
}
#[inline]
#[cfg(feature = "compare_hash_only")]
fn get_hash(&self, key: &[u8]) -> HashType {
/// Since we compare only the hash we need a high quality hash.
use std::hash::Hasher;
let mut hasher = ahash::AHasher::default();
hasher.write(key);
hasher.finish() as HashType
}
#[inline]
fn probe(&self, hash: HashType) -> LinearProbing {
LinearProbing::compute(hash, self.mask)
}
#[inline]
pub fn mem_usage(&self) -> usize {
self.table.len() * mem::size_of::<KeyValue>()
}
#[inline]
fn is_saturated(&self) -> bool {
self.table.len() <= self.len * 2
}
#[inline]
fn get_key_value<'a>(&'a self, addr: Addr, memory_arena: &'a MemoryArena) -> (&[u8], Addr) {
let data = memory_arena.slice_from(addr);
let key_bytes_len_bytes = unsafe { data.get_unchecked(..2) };
let key_bytes_len = u16::from_le_bytes(key_bytes_len_bytes.try_into().unwrap());
let key_bytes: &[u8] = unsafe { data.get_unchecked(2..2 + key_bytes_len as usize) };
(key_bytes, addr.offset(2 + key_bytes_len as u32))
}
#[inline]
#[cfg(not(feature = "compare_hash_only"))]
fn get_value_addr_if_key_match(
&self,
target_key: &[u8],
addr: Addr,
memory_arena: &MemoryArena,
) -> Option<Addr> {
use crate::fastcmp::fast_short_slice_compare;
let (stored_key, value_addr) = self.get_key_value(addr, memory_arena);
if fast_short_slice_compare(stored_key, target_key) {
Some(value_addr)
} else {
None
}
}
#[inline]
#[cfg(feature = "compare_hash_only")]
fn get_value_addr_if_key_match(
&self,
_target_key: &[u8],
addr: Addr,
memory_arena: &MemoryArena,
) -> Option<Addr> {
// For the compare_hash_only feature, it would make sense to store the keys at a different
// memory location. Here they will just pollute the cache.
let data = memory_arena.slice_from(addr);
let key_bytes_len_bytes = &data[..2];
let key_bytes_len = u16::from_le_bytes(key_bytes_len_bytes.try_into().unwrap());
let value_addr = addr.offset(2 + key_bytes_len as u32);
Some(value_addr)
}
#[inline]
fn set_bucket(&mut self, hash: HashType, key_value_addr: Addr, bucket: usize) {
self.len += 1;
self.table[bucket] = KeyValue {
key_value_addr,
hash,
};
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn iter<'a>(&'a self, memory_arena: &'a MemoryArena) -> Iter<'_> {
Iter {
inner: self
.table
.iter()
.cloned()
.filter(KeyValue::is_not_empty_ref),
hashmap: self,
memory_arena,
}
}
fn resize(&mut self) {
let new_len = (self.table.len() * 2).max(1 << 3);
let mask = new_len - 1;
self.mask = mask;
let new_table = vec![KeyValue::default(); new_len];
let old_table = mem::replace(&mut self.table, new_table);
for key_value in old_table.into_iter().filter(KeyValue::is_not_empty_ref) {
let mut probe = LinearProbing::compute(key_value.hash, mask);
loop {
let bucket = probe.next_probe();
if self.table[bucket].is_empty() {
self.table[bucket] = key_value;
break;
}
}
}
}
/// Get a value associated to a key.
#[inline]
pub fn get<V>(&self, key: &[u8], memory_arena: &MemoryArena) -> Option<V>
where V: Copy + 'static {
let hash = self.get_hash(key);
let mut probe = self.probe(hash);
loop {
let bucket = probe.next_probe();
let kv: KeyValue = self.table[bucket];
if kv.is_empty() {
return None;
} else if kv.hash == hash {
if let Some(val_addr) =
self.get_value_addr_if_key_match(key, kv.key_value_addr, memory_arena)
{
let v = memory_arena.read(val_addr);
return Some(v);
}
}
}
}
/// `update` create a new entry for a given key if it does not exist
/// or updates the existing entry.
///
/// The actual logic for this update is define in the `updater`
/// argument.
///
/// If the key is not present, `updater` will receive `None` and
/// will be in charge of returning a default value.
/// If the key already as an associated value, then it will be passed
/// `Some(previous_value)`.
#[inline]
pub fn mutate_or_create<V>(
&mut self,
key: &[u8],
memory_arena: &mut MemoryArena,
mut updater: impl FnMut(Option<V>) -> V,
) -> V
where
V: Copy + 'static,
{
if self.is_saturated() {
self.resize();
}
let hash = self.get_hash(key);
let mut probe = self.probe(hash);
let mut bucket = probe.next_probe();
let mut kv: KeyValue = self.table[bucket];
loop {
if kv.is_empty() {
// The key does not exist yet.
let val = updater(None);
let num_bytes = std::mem::size_of::<u16>() + key.len() + std::mem::size_of::<V>();
let key_addr = memory_arena.allocate_space(num_bytes);
{
let data = memory_arena.slice_mut(key_addr, num_bytes);
let key_len_bytes: [u8; 2] = (key.len() as u16).to_le_bytes();
data[..2].copy_from_slice(&key_len_bytes);
let stop = 2 + key.len();
fast_short_slice_copy(key, &mut data[2..stop]);
store(&mut data[stop..], val);
}
self.set_bucket(hash, key_addr, bucket);
return val;
}
if kv.hash == hash {
if let Some(val_addr) =
self.get_value_addr_if_key_match(key, kv.key_value_addr, memory_arena)
{
let v = memory_arena.read(val_addr);
let new_v = updater(Some(v));
memory_arena.write_at(val_addr, new_v);
return new_v;
}
}
// This allows fetching the next bucket before the loop jmp
bucket = probe.next_probe();
kv = self.table[bucket];
}
}
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use super::{compute_previous_power_of_two, SharedArenaHashMap};
use crate::MemoryArena;
#[test]
fn test_hash_map() {
let mut memory_arena = MemoryArena::default();
let mut hash_map: SharedArenaHashMap = SharedArenaHashMap::default();
hash_map.mutate_or_create(b"abc", &mut memory_arena, |opt_val: Option<u32>| {
assert_eq!(opt_val, None);
3u32
});
hash_map.mutate_or_create(b"abcd", &mut memory_arena, |opt_val: Option<u32>| {
assert_eq!(opt_val, None);
4u32
});
hash_map.mutate_or_create(b"abc", &mut memory_arena, |opt_val: Option<u32>| {
assert_eq!(opt_val, Some(3u32));
5u32
});
let mut vanilla_hash_map = HashMap::new();
let iter_values = hash_map.iter(&memory_arena);
for (key, addr) in iter_values {
let val: u32 = memory_arena.read(addr);
vanilla_hash_map.insert(key.to_owned(), val);
}
assert_eq!(vanilla_hash_map.len(), 2);
}
#[test]
fn test_empty_hashmap() {
let memory_arena = MemoryArena::default();
let hash_map: SharedArenaHashMap = SharedArenaHashMap::default();
assert_eq!(hash_map.get::<u32>(b"abc", &memory_arena), None);
}
#[test]
fn test_compute_previous_power_of_two() {
assert_eq!(compute_previous_power_of_two(8), 8);
assert_eq!(compute_previous_power_of_two(9), 8);
assert_eq!(compute_previous_power_of_two(7), 4);
assert_eq!(compute_previous_power_of_two(u64::MAX as usize), 1 << 63);
}
#[test]
fn test_many_terms() {
let mut memory_arena = MemoryArena::default();
let mut terms: Vec<String> = (0..20_000).map(|val| val.to_string()).collect();
let mut hash_map: SharedArenaHashMap = SharedArenaHashMap::default();
for term in terms.iter() {
hash_map.mutate_or_create(
term.as_bytes(),
&mut memory_arena,
|_opt_val: Option<u32>| 5u32,
);
}
let mut terms_back: Vec<String> = hash_map
.iter(&memory_arena)
.map(|(bytes, _)| String::from_utf8(bytes.to_vec()).unwrap())
.collect();
terms_back.sort();
terms.sort();
for pos in 0..terms.len() {
assert_eq!(terms[pos], terms_back[pos]);
}
}
}