Compare commits

...

21 Commits

Author SHA1 Message Date
Pascal Seitz
bb57e63522 Store List of Fields in Segment
Fiels may be encoded in the columnar storage or in the inverted index
for JSON fields.
Add a new Segment file that contains the list of fields (schema +
encoded)
2023-12-13 15:52:41 +08:00
PSeitz
bff7c58497 improve indexing benchmark (#2275) 2023-12-11 09:04:42 +01:00
trinity-1686a
9ebc5ed053 use fst for sstable index (#2268)
* read path for new fst based index

* implement BlockAddrStoreWriter

* extract slop/derivation computation

* use better linear approximator and allow negative correction to approximator

* document format and reorder some fields

* optimize single block sstable size

* plug backward compat
2023-12-04 15:13:15 +01:00
PSeitz
0b56c88e69 Revert "Preparing for 0.21.2 release." (#2258)
* Revert "Preparing for 0.21.2 release. (#2256)"

This reverts commit 9caab45136.

* bump version to 0.21.1

* set version to 0.22.0-dev
2023-12-01 13:46:12 +01:00
PSeitz
24841f0b2a update bitpacker dep (#2269) 2023-12-01 13:45:52 +01:00
PSeitz
1a9fc10be9 add fields_metadata to SegmentReader, add columnar docs (#2222)
* add fields_metadata to SegmentReader, add columnar docs

* use schema to resolve field, add test

* normalize paths

* merge for FieldsMetadata, add fields_metadata on Index

* Update src/core/segment_reader.rs

Co-authored-by: Paul Masurel <paul@quickwit.io>

* merge code paths

* add Hash

* move function oustide

---------

Co-authored-by: Paul Masurel <paul@quickwit.io>
2023-11-22 12:29:53 +01:00
PSeitz
07573a7f19 update fst (#2267)
update fst to 0.5 (deduplicates regex-syntax in the dep tree)
deps cleanup
2023-11-21 16:06:57 +01:00
BlackHoleFox
daad2dc151 Take string references instead of owned values building Facet paths (#2265) 2023-11-20 09:40:44 +01:00
PSeitz
054f49dc31 support escaped dot, add agg test (#2250)
add agg test for nested JSON
allow escaping of dot
2023-11-20 03:00:57 +01:00
PSeitz
47009ed2d3 remove unused deps (#2264)
found with cargo machete
remove pprof (doesn't work)
2023-11-20 02:59:59 +01:00
PSeitz
0aae31d7d7 reduce number of allocations (#2257)
* reduce number of allocations

Explanation makes up around 50% of all allocations (numbers not perf).
It's created during serialization but not called.

- Make Explanation optional in BM25
- Avoid allocations when using Explanation

* use Cow
2023-11-16 13:47:36 +01:00
Paul Masurel
9caab45136 Preparing for 0.21.2 release. (#2256) 2023-11-15 10:43:36 +09:00
Chris Tam
6d9a7b7eb0 Derive Debug for SchemaBuilder (#2254) 2023-11-15 01:03:44 +01:00
dependabot[bot]
7a2c5804b1 Update itertools requirement from 0.11.0 to 0.12.0 (#2255)
Updates the requirements on [itertools](https://github.com/rust-itertools/itertools) to permit the latest version.
- [Changelog](https://github.com/rust-itertools/itertools/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-itertools/itertools/compare/v0.11.0...v0.12.0)

---
updated-dependencies:
- dependency-name: itertools
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-11-15 01:03:08 +01:00
François Massot
5319977171 Merge pull request #2253 from quickwit-oss/issue/2251-bug-merge-json-object-with-number
Fix bug occuring when merging JSON object indexed with positions.
2023-11-14 17:28:29 +01:00
trinity-1686a
828632e8c4 rustfmt 2023-11-14 15:05:16 +01:00
Paul Masurel
6b59ec6fd5 Fix bug occuring when merging JSON object indexed with positions.
In JSON Object field the presence of term frequencies depend on the
field.
Typically, a string with postiions indexed will have positions
while numbers won't.

The presence or absence of term freqs for a given term is unfortunately
encoded in a very passive way.

It is given by the presence of extra information in the skip info, or
the lack of term freqs after decoding vint blocks.

Before, after writing a segment, we would encode the segment correctly
(without any term freq for number in json object field).
However during merge, we would get the default term freq=1 value.
(this is default in the absence of encoded term freqs)

The merger would then proceed and attempt to decode 1 position when
there are in fact none.

This PR requires to explictly tell the posting serialize whether
term frequencies should be serialized for each new term.

Closes #2251
2023-11-14 22:41:48 +09:00
PSeitz
b60d862150 docid deltas while indexing (#2249)
* docid deltas while indexing

storing deltas is especially helpful for repetitive data like logs.
In those cases, recording a doc on a term costed 4 bytes instead of 1
byte now.

HDFS Indexing 1.1GB Total memory consumption:
Before:  760 MB
Now:     590 MB

* use scan for delta decoding
2023-11-13 05:14:27 +01:00
PSeitz
4837c7811a add missing inlines (#2245) 2023-11-10 08:00:42 +01:00
PSeitz
5a2397d57e add sstable ord_to_term benchmark (#2242) 2023-11-10 07:27:48 +01:00
PSeitz
927b4432c9 Perf: use term hashmap in fastfield (#2243)
* add shared arena hashmap

* bench fastfield indexing

* use shared arena hashmap in columnar

lower minimum resize in hashtable

* clippy

* add comments
2023-11-09 13:44:02 +01:00
64 changed files with 3246 additions and 906 deletions

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "tantivy" name = "tantivy"
version = "0.21.0" version = "0.22.0-dev"
authors = ["Paul Masurel <paul.masurel@gmail.com>"] authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT" license = "MIT"
categories = ["database-implementations", "data-structures"] categories = ["database-implementations", "data-structures"]
@@ -22,10 +22,10 @@ crc32fast = "1.3.2"
once_cell = "1.10.0" once_cell = "1.10.0"
regex = { version = "1.5.5", default-features = false, features = ["std", "unicode"] } regex = { version = "1.5.5", default-features = false, features = ["std", "unicode"] }
aho-corasick = "1.0" aho-corasick = "1.0"
tantivy-fst = "0.4.0" tantivy-fst = "0.5"
memmap2 = { version = "0.9.0", optional = true } memmap2 = { version = "0.9.0", optional = true }
lz4_flex = { version = "0.11", default-features = false, optional = true } lz4_flex = { version = "0.11", default-features = false, optional = true }
zstd = { version = "0.13", optional = true, default-features = false } zstd = { version = "0.13", default-features = false }
tempfile = { version = "3.3.0", optional = true } tempfile = { version = "3.3.0", optional = true }
log = "0.4.16" log = "0.4.16"
serde = { version = "1.0.136", features = ["derive"] } serde = { version = "1.0.136", features = ["derive"] }
@@ -37,21 +37,19 @@ uuid = { version = "1.0.0", features = ["v4", "serde"] }
crossbeam-channel = "0.5.4" crossbeam-channel = "0.5.4"
rust-stemmers = "1.2.0" rust-stemmers = "1.2.0"
downcast-rs = "1.2.0" downcast-rs = "1.2.0"
bitpacking = { git = "https://github.com/quickwit-oss/bitpacking", rev = "f730b75", default-features = false, features = ["bitpacker4x"] } bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker4x"] }
census = "0.4.0" census = "0.4.0"
rustc-hash = "1.1.0" rustc-hash = "1.1.0"
thiserror = "1.0.30" thiserror = "1.0.30"
htmlescape = "0.3.1" htmlescape = "0.3.1"
fail = { version = "0.5.0", optional = true } fail = { version = "0.5.0", optional = true }
murmurhash32 = "0.3.0"
time = { version = "0.3.10", features = ["serde-well-known"] } time = { version = "0.3.10", features = ["serde-well-known"] }
smallvec = "1.8.0" smallvec = "1.8.0"
rayon = "1.5.2" rayon = "1.5.2"
lru = "0.12.0" lru = "0.12.0"
fastdivide = "0.4.0" fastdivide = "0.4.0"
itertools = "0.11.0" itertools = "0.12.0"
measure_time = "0.8.2" measure_time = "0.8.2"
async-trait = "0.1.53"
arc-swap = "1.5.0" arc-swap = "1.5.0"
columnar = { version= "0.2", path="./columnar", package ="tantivy-columnar" } columnar = { version= "0.2", path="./columnar", package ="tantivy-columnar" }
@@ -75,15 +73,13 @@ matches = "0.1.9"
pretty_assertions = "1.2.1" pretty_assertions = "1.2.1"
proptest = "1.0.0" proptest = "1.0.0"
test-log = "0.2.10" test-log = "0.2.10"
env_logger = "0.10.0"
futures = "0.3.21" futures = "0.3.21"
paste = "1.0.11" paste = "1.0.11"
more-asserts = "0.3.1" more-asserts = "0.3.1"
rand_distr = "0.4.3" rand_distr = "0.4.3"
[target.'cfg(not(windows))'.dev-dependencies] [target.'cfg(not(windows))'.dev-dependencies]
criterion = "0.5" criterion = { version = "0.5", default-features = false }
pprof = { git = "https://github.com/PSeitz/pprof-rs/", rev = "53af24b", features = ["flamegraph", "criterion"] } # temp fork that works with criterion 0.5
[dev-dependencies.fail] [dev-dependencies.fail]
version = "0.5.0" version = "0.5.0"
@@ -109,7 +105,7 @@ mmap = ["fs4", "tempfile", "memmap2"]
stopwords = [] stopwords = []
lz4-compression = ["lz4_flex"] lz4-compression = ["lz4_flex"]
zstd-compression = ["zstd"] zstd-compression = []
failpoints = ["fail", "fail/failpoints"] failpoints = ["fail", "fail/failpoints"]
unstable = [] # useful for benches. unstable = [] # useful for benches.

View File

@@ -1,14 +1,99 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use criterion::{criterion_group, criterion_main, BatchSize, Bencher, Criterion, Throughput};
use pprof::criterion::{Output, PProfProfiler};
use tantivy::schema::{TantivyDocument, FAST, INDEXED, STORED, STRING, TEXT}; use tantivy::schema::{TantivyDocument, FAST, INDEXED, STORED, STRING, TEXT};
use tantivy::{Index, IndexWriter}; use tantivy::{tokenizer, Index, IndexWriter};
const HDFS_LOGS: &str = include_str!("hdfs.json"); const HDFS_LOGS: &str = include_str!("hdfs.json");
const GH_LOGS: &str = include_str!("gh.json"); const GH_LOGS: &str = include_str!("gh.json");
const WIKI: &str = include_str!("wiki.json"); const WIKI: &str = include_str!("wiki.json");
fn get_lines(input: &str) -> Vec<&str> { fn benchmark(
input.trim().split('\n').collect() b: &mut Bencher,
input: &str,
schema: tantivy::schema::Schema,
commit: bool,
parse_json: bool,
is_dynamic: bool,
) {
if is_dynamic {
benchmark_dynamic_json(b, input, schema, commit, parse_json)
} else {
_benchmark(b, input, schema, commit, parse_json, |schema, doc_json| {
TantivyDocument::parse_json(&schema, doc_json).unwrap()
})
}
}
fn get_index(schema: tantivy::schema::Schema) -> Index {
let mut index = Index::create_in_ram(schema.clone());
let ff_tokenizer_manager = tokenizer::TokenizerManager::default();
ff_tokenizer_manager.register(
"raw",
tokenizer::TextAnalyzer::builder(tokenizer::RawTokenizer::default())
.filter(tokenizer::RemoveLongFilter::limit(255))
.build(),
);
index.set_fast_field_tokenizers(ff_tokenizer_manager.clone());
index
}
fn _benchmark(
b: &mut Bencher,
input: &str,
schema: tantivy::schema::Schema,
commit: bool,
include_json_parsing: bool,
create_doc: impl Fn(&tantivy::schema::Schema, &str) -> TantivyDocument,
) {
if include_json_parsing {
let lines: Vec<&str> = input.trim().split('\n').collect();
b.iter(|| {
let index = get_index(schema.clone());
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let doc = create_doc(&schema, doc_json);
index_writer.add_document(doc).unwrap();
}
if commit {
index_writer.commit().unwrap();
}
})
} else {
let docs: Vec<_> = input
.trim()
.split('\n')
.map(|doc_json| create_doc(&schema, doc_json))
.collect();
b.iter_batched(
|| docs.clone(),
|docs| {
let index = get_index(schema.clone());
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc in docs {
index_writer.add_document(doc).unwrap();
}
if commit {
index_writer.commit().unwrap();
}
},
BatchSize::SmallInput,
)
}
}
fn benchmark_dynamic_json(
b: &mut Bencher,
input: &str,
schema: tantivy::schema::Schema,
commit: bool,
parse_json: bool,
) {
let json_field = schema.get_field("json").unwrap();
_benchmark(b, input, schema, commit, parse_json, |_schema, doc_json| {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
tantivy::doc!(json_field=>json_val)
})
} }
pub fn hdfs_index_benchmark(c: &mut Criterion) { pub fn hdfs_index_benchmark(c: &mut Criterion) {
@@ -19,7 +104,14 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
schema_builder.add_text_field("severity", STRING); schema_builder.add_text_field("severity", STRING);
schema_builder.build() schema_builder.build()
}; };
let schema_with_store = { let schema_only_fast = {
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
schema_builder.add_u64_field("timestamp", FAST);
schema_builder.add_text_field("body", FAST);
schema_builder.add_text_field("severity", FAST);
schema_builder.build()
};
let _schema_with_store = {
let mut schema_builder = tantivy::schema::SchemaBuilder::new(); let mut schema_builder = tantivy::schema::SchemaBuilder::new();
schema_builder.add_u64_field("timestamp", INDEXED | STORED); schema_builder.add_u64_field("timestamp", INDEXED | STORED);
schema_builder.add_text_field("body", TEXT | STORED); schema_builder.add_text_field("body", TEXT | STORED);
@@ -28,77 +120,39 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
}; };
let dynamic_schema = { let dynamic_schema = {
let mut schema_builder = tantivy::schema::SchemaBuilder::new(); let mut schema_builder = tantivy::schema::SchemaBuilder::new();
schema_builder.add_json_field("json", TEXT); schema_builder.add_json_field("json", TEXT | FAST);
schema_builder.build() schema_builder.build()
}; };
let mut group = c.benchmark_group("index-hdfs"); let mut group = c.benchmark_group("index-hdfs");
group.throughput(Throughput::Bytes(HDFS_LOGS.len() as u64)); group.throughput(Throughput::Bytes(HDFS_LOGS.len() as u64));
group.sample_size(20); group.sample_size(20);
group.bench_function("index-hdfs-no-commit", |b| {
let lines = get_lines(HDFS_LOGS); let benches = [
b.iter(|| { ("only-indexed-".to_string(), schema, false),
let index = Index::create_in_ram(schema.clone()); //("stored-".to_string(), _schema_with_store, false),
let index_writer: IndexWriter = index.writer_with_num_threads(1, 100_000_000).unwrap(); ("only-fast-".to_string(), schema_only_fast, false),
for doc_json in &lines { ("dynamic-".to_string(), dynamic_schema, true),
let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap(); ];
index_writer.add_document(doc).unwrap();
} for (prefix, schema, is_dynamic) in benches {
}) for commit in [false, true] {
let suffix = if commit { "with-commit" } else { "no-commit" };
for parse_json in [false] {
// for parse_json in [false, true] {
let suffix = if parse_json {
format!("{}-with-json-parsing", suffix)
} else {
format!("{}", suffix)
};
let bench_name = format!("{}{}", prefix, suffix);
group.bench_function(bench_name, |b| {
benchmark(b, HDFS_LOGS, schema.clone(), commit, parse_json, is_dynamic)
}); });
group.bench_function("index-hdfs-with-commit", |b| {
let lines = get_lines(HDFS_LOGS);
b.iter(|| {
let index = Index::create_in_ram(schema.clone());
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
index_writer.add_document(doc).unwrap();
} }
index_writer.commit().unwrap();
})
});
group.bench_function("index-hdfs-no-commit-with-docstore", |b| {
let lines = get_lines(HDFS_LOGS);
b.iter(|| {
let index = Index::create_in_ram(schema_with_store.clone());
let index_writer: IndexWriter = index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
index_writer.add_document(doc).unwrap();
} }
})
});
group.bench_function("index-hdfs-with-commit-with-docstore", |b| {
let lines = get_lines(HDFS_LOGS);
b.iter(|| {
let index = Index::create_in_ram(schema_with_store.clone());
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let doc = TantivyDocument::parse_json(&schema, doc_json).unwrap();
index_writer.add_document(doc).unwrap();
} }
index_writer.commit().unwrap();
})
});
group.bench_function("index-hdfs-no-commit-json-without-docstore", |b| {
let lines = get_lines(HDFS_LOGS);
b.iter(|| {
let index = Index::create_in_ram(dynamic_schema.clone());
let json_field = dynamic_schema.get_field("json").unwrap();
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let doc = tantivy::doc!(json_field=>json_val);
index_writer.add_document(doc).unwrap();
}
index_writer.commit().unwrap();
})
});
} }
pub fn gh_index_benchmark(c: &mut Criterion) { pub fn gh_index_benchmark(c: &mut Criterion) {
@@ -107,39 +161,24 @@ pub fn gh_index_benchmark(c: &mut Criterion) {
schema_builder.add_json_field("json", TEXT | FAST); schema_builder.add_json_field("json", TEXT | FAST);
schema_builder.build() schema_builder.build()
}; };
let dynamic_schema_fast = {
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
schema_builder.add_json_field("json", FAST);
schema_builder.build()
};
let mut group = c.benchmark_group("index-gh"); let mut group = c.benchmark_group("index-gh");
group.throughput(Throughput::Bytes(GH_LOGS.len() as u64)); group.throughput(Throughput::Bytes(GH_LOGS.len() as u64));
group.bench_function("index-gh-no-commit", |b| { group.bench_function("index-gh-no-commit", |b| {
let lines = get_lines(GH_LOGS); benchmark_dynamic_json(b, GH_LOGS, dynamic_schema.clone(), false, false)
b.iter(|| {
let json_field = dynamic_schema.get_field("json").unwrap();
let index = Index::create_in_ram(dynamic_schema.clone());
let index_writer: IndexWriter = index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let doc = tantivy::doc!(json_field=>json_val);
index_writer.add_document(doc).unwrap();
}
})
}); });
group.bench_function("index-gh-with-commit", |b| { group.bench_function("index-gh-fast", |b| {
let lines = get_lines(GH_LOGS); benchmark_dynamic_json(b, GH_LOGS, dynamic_schema_fast.clone(), false, false)
b.iter(|| { });
let json_field = dynamic_schema.get_field("json").unwrap();
let index = Index::create_in_ram(dynamic_schema.clone()); group.bench_function("index-gh-fast-with-commit", |b| {
let mut index_writer: IndexWriter = benchmark_dynamic_json(b, GH_LOGS, dynamic_schema_fast.clone(), true, false)
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let doc = tantivy::doc!(json_field=>json_val);
index_writer.add_document(doc).unwrap();
}
index_writer.commit().unwrap();
})
}); });
} }
@@ -154,34 +193,10 @@ pub fn wiki_index_benchmark(c: &mut Criterion) {
group.throughput(Throughput::Bytes(WIKI.len() as u64)); group.throughput(Throughput::Bytes(WIKI.len() as u64));
group.bench_function("index-wiki-no-commit", |b| { group.bench_function("index-wiki-no-commit", |b| {
let lines = get_lines(WIKI); benchmark_dynamic_json(b, WIKI, dynamic_schema.clone(), false, false)
b.iter(|| {
let json_field = dynamic_schema.get_field("json").unwrap();
let index = Index::create_in_ram(dynamic_schema.clone());
let index_writer: IndexWriter = index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let doc = tantivy::doc!(json_field=>json_val);
index_writer.add_document(doc).unwrap();
}
})
}); });
group.bench_function("index-wiki-with-commit", |b| { group.bench_function("index-wiki-with-commit", |b| {
let lines = get_lines(WIKI); benchmark_dynamic_json(b, WIKI, dynamic_schema.clone(), true, false)
b.iter(|| {
let json_field = dynamic_schema.get_field("json").unwrap();
let index = Index::create_in_ram(dynamic_schema.clone());
let mut index_writer: IndexWriter =
index.writer_with_num_threads(1, 100_000_000).unwrap();
for doc_json in &lines {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let doc = tantivy::doc!(json_field=>json_val);
index_writer.add_document(doc).unwrap();
}
index_writer.commit().unwrap();
})
}); });
} }
@@ -192,12 +207,12 @@ criterion_group! {
} }
criterion_group! { criterion_group! {
name = gh_benches; name = gh_benches;
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); config = Criterion::default();
targets = gh_index_benchmark targets = gh_index_benchmark
} }
criterion_group! { criterion_group! {
name = wiki_benches; name = wiki_benches;
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); config = Criterion::default();
targets = wiki_index_benchmark targets = wiki_index_benchmark
} }
criterion_main!(benches, gh_benches, wiki_benches); criterion_main!(benches, gh_benches, wiki_benches);

View File

@@ -15,7 +15,7 @@ homepage = "https://github.com/quickwit-oss/tantivy"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
bitpacking = {version="0.8", default-features=false, features = ["bitpacker1x"]} bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker1x"] }
[dev-dependencies] [dev-dependencies]
rand = "0.8" rand = "0.8"

View File

@@ -9,8 +9,7 @@ description = "column oriented storage for tantivy"
categories = ["database-implementations", "data-structures", "compression"] categories = ["database-implementations", "data-structures", "compression"]
[dependencies] [dependencies]
itertools = "0.11.0" itertools = "0.12.0"
fnv = "1.0.7"
fastdivide = "0.4.0" fastdivide = "0.4.0"
stacker = { version= "0.2", path = "../stacker", package="tantivy-stacker"} stacker = { version= "0.2", path = "../stacker", package="tantivy-stacker"}

View File

@@ -8,7 +8,6 @@ license = "MIT"
columnar = {path="../", package="tantivy-columnar"} columnar = {path="../", package="tantivy-columnar"}
serde_json = "1" serde_json = "1"
serde_json_borrow = {git="https://github.com/PSeitz/serde_json_borrow/"} serde_json_borrow = {git="https://github.com/PSeitz/serde_json_borrow/"}
serde = "1"
[workspace] [workspace]
members = [] members = []

View File

@@ -58,7 +58,7 @@ impl ColumnType {
self == &ColumnType::DateTime self == &ColumnType::DateTime
} }
pub(crate) fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> { pub fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> {
COLUMN_TYPES.get(code as usize).copied().ok_or(InvalidData) COLUMN_TYPES.get(code as usize).copied().ok_or(InvalidData)
} }
} }

View File

@@ -269,7 +269,8 @@ impl StrOrBytesColumnWriter {
dictionaries: &mut [DictionaryBuilder], dictionaries: &mut [DictionaryBuilder],
arena: &mut MemoryArena, arena: &mut MemoryArena,
) { ) {
let unordered_id = dictionaries[self.dictionary_id as usize].get_or_allocate_id(bytes); let unordered_id =
dictionaries[self.dictionary_id as usize].get_or_allocate_id(bytes, arena);
self.column_writer.record(doc, unordered_id, arena); self.column_writer.record(doc, unordered_id, arena);
} }

View File

@@ -333,7 +333,7 @@ impl ColumnarWriter {
num_docs: RowId, num_docs: RowId,
old_to_new_row_ids: Option<&[RowId]>, old_to_new_row_ids: Option<&[RowId]>,
wrt: &mut dyn io::Write, wrt: &mut dyn io::Write,
) -> io::Result<()> { ) -> io::Result<Vec<(String, ColumnType)>> {
let mut serializer = ColumnarSerializer::new(wrt); let mut serializer = ColumnarSerializer::new(wrt);
let mut columns: Vec<(&[u8], ColumnType, Addr)> = self let mut columns: Vec<(&[u8], ColumnType, Addr)> = self
.numerical_field_hash_map .numerical_field_hash_map
@@ -374,7 +374,9 @@ impl ColumnarWriter {
let (arena, buffers, dictionaries) = (&self.arena, &mut self.buffers, &self.dictionaries); let (arena, buffers, dictionaries) = (&self.arena, &mut self.buffers, &self.dictionaries);
let mut symbol_byte_buffer: Vec<u8> = Vec::new(); let mut symbol_byte_buffer: Vec<u8> = Vec::new();
for (column_name, column_type, addr) in columns { for (column_name, column_type, addr) in columns.iter() {
let column_type = *column_type;
let addr = *addr;
match column_type { match column_type {
ColumnType::Bool => { ColumnType::Bool => {
let column_writer: ColumnWriter = self.bool_field_hash_map.read(addr); let column_writer: ColumnWriter = self.bool_field_hash_map.read(addr);
@@ -437,6 +439,7 @@ impl ColumnarWriter {
&mut symbol_byte_buffer, &mut symbol_byte_buffer,
), ),
buffers, buffers,
&self.arena,
&mut column_serializer, &mut column_serializer,
)?; )?;
column_serializer.finalize()?; column_serializer.finalize()?;
@@ -484,12 +487,21 @@ impl ColumnarWriter {
}; };
} }
serializer.finalize(num_docs)?; serializer.finalize(num_docs)?;
Ok(()) Ok(columns
.into_iter()
.map(|(column_name, column_type, _)| {
(
String::from_utf8_lossy(column_name).to_string(),
column_type,
)
})
.collect())
} }
} }
// Serialize [Dictionary, Column, dictionary num bytes U32::LE] // Serialize [Dictionary, Column, dictionary num bytes U32::LE]
// Column: [Column Index, Column Values, column index num bytes U32::LE] // Column: [Column Index, Column Values, column index num bytes U32::LE]
#[allow(clippy::too_many_arguments)]
fn serialize_bytes_or_str_column( fn serialize_bytes_or_str_column(
cardinality: Cardinality, cardinality: Cardinality,
num_docs: RowId, num_docs: RowId,
@@ -497,6 +509,7 @@ fn serialize_bytes_or_str_column(
dictionary_builder: &DictionaryBuilder, dictionary_builder: &DictionaryBuilder,
operation_it: impl Iterator<Item = ColumnOperation<UnorderedId>>, operation_it: impl Iterator<Item = ColumnOperation<UnorderedId>>,
buffers: &mut SpareBuffers, buffers: &mut SpareBuffers,
arena: &MemoryArena,
wrt: impl io::Write, wrt: impl io::Write,
) -> io::Result<()> { ) -> io::Result<()> {
let SpareBuffers { let SpareBuffers {
@@ -505,7 +518,8 @@ fn serialize_bytes_or_str_column(
.. ..
} = buffers; } = buffers;
let mut counting_writer = CountingWriter::wrap(wrt); let mut counting_writer = CountingWriter::wrap(wrt);
let term_id_mapping: TermIdMapping = dictionary_builder.serialize(&mut counting_writer)?; let term_id_mapping: TermIdMapping =
dictionary_builder.serialize(arena, &mut counting_writer)?;
let dictionary_num_bytes: u32 = counting_writer.written_bytes() as u32; let dictionary_num_bytes: u32 = counting_writer.written_bytes() as u32;
let mut wrt = counting_writer.finish(); let mut wrt = counting_writer.finish();
let operation_iterator = operation_it.map(|symbol: ColumnOperation<UnorderedId>| { let operation_iterator = operation_it.map(|symbol: ColumnOperation<UnorderedId>| {

View File

@@ -1,7 +1,7 @@
use std::io; use std::io;
use fnv::FnvHashMap;
use sstable::SSTable; use sstable::SSTable;
use stacker::{MemoryArena, SharedArenaHashMap};
pub(crate) struct TermIdMapping { pub(crate) struct TermIdMapping {
unordered_to_ord: Vec<OrderedId>, unordered_to_ord: Vec<OrderedId>,
@@ -31,29 +31,38 @@ pub struct OrderedId(pub u32);
/// mapping. /// mapping.
#[derive(Default)] #[derive(Default)]
pub(crate) struct DictionaryBuilder { pub(crate) struct DictionaryBuilder {
dict: FnvHashMap<Vec<u8>, UnorderedId>, dict: SharedArenaHashMap,
memory_consumption: usize,
} }
impl DictionaryBuilder { impl DictionaryBuilder {
/// Get or allocate an unordered id. /// Get or allocate an unordered id.
/// (This ID is simply an auto-incremented id.) /// (This ID is simply an auto-incremented id.)
pub fn get_or_allocate_id(&mut self, term: &[u8]) -> UnorderedId { pub fn get_or_allocate_id(&mut self, term: &[u8], arena: &mut MemoryArena) -> UnorderedId {
if let Some(term_id) = self.dict.get(term) { let next_id = self.dict.len() as u32;
return *term_id; let unordered_id = self
.dict
.mutate_or_create(term, arena, |unordered_id: Option<u32>| {
if let Some(unordered_id) = unordered_id {
unordered_id
} else {
next_id
} }
let new_id = UnorderedId(self.dict.len() as u32); });
self.dict.insert(term.to_vec(), new_id); UnorderedId(unordered_id)
self.memory_consumption += term.len();
self.memory_consumption += 40; // Term Metadata + HashMap overhead
new_id
} }
/// Serialize the dictionary into an fst, and returns the /// Serialize the dictionary into an fst, and returns the
/// `UnorderedId -> TermOrdinal` map. /// `UnorderedId -> TermOrdinal` map.
pub fn serialize<'a, W: io::Write + 'a>(&self, wrt: &mut W) -> io::Result<TermIdMapping> { pub fn serialize<'a, W: io::Write + 'a>(
let mut terms: Vec<(&[u8], UnorderedId)> = &self,
self.dict.iter().map(|(k, v)| (k.as_slice(), *v)).collect(); arena: &MemoryArena,
wrt: &mut W,
) -> io::Result<TermIdMapping> {
let mut terms: Vec<(&[u8], UnorderedId)> = self
.dict
.iter(arena)
.map(|(k, v)| (k, arena.read(v)))
.collect();
terms.sort_unstable_by_key(|(key, _)| *key); terms.sort_unstable_by_key(|(key, _)| *key);
// TODO Remove the allocation. // TODO Remove the allocation.
let mut unordered_to_ord: Vec<OrderedId> = vec![OrderedId(0u32); terms.len()]; let mut unordered_to_ord: Vec<OrderedId> = vec![OrderedId(0u32); terms.len()];
@@ -68,7 +77,7 @@ impl DictionaryBuilder {
} }
pub(crate) fn mem_usage(&self) -> usize { pub(crate) fn mem_usage(&self) -> usize {
self.memory_consumption self.dict.mem_usage()
} }
} }
@@ -78,12 +87,13 @@ mod tests {
#[test] #[test]
fn test_dictionary_builder() { fn test_dictionary_builder() {
let mut arena = MemoryArena::default();
let mut dictionary_builder = DictionaryBuilder::default(); let mut dictionary_builder = DictionaryBuilder::default();
let hello_uid = dictionary_builder.get_or_allocate_id(b"hello"); let hello_uid = dictionary_builder.get_or_allocate_id(b"hello", &mut arena);
let happy_uid = dictionary_builder.get_or_allocate_id(b"happy"); let happy_uid = dictionary_builder.get_or_allocate_id(b"happy", &mut arena);
let tax_uid = dictionary_builder.get_or_allocate_id(b"tax"); let tax_uid = dictionary_builder.get_or_allocate_id(b"tax", &mut arena);
let mut buffer = Vec::new(); let mut buffer = Vec::new();
let id_mapping = dictionary_builder.serialize(&mut buffer).unwrap(); let id_mapping = dictionary_builder.serialize(&arena, &mut buffer).unwrap();
assert_eq!(id_mapping.to_ord(hello_uid), OrderedId(1)); assert_eq!(id_mapping.to_ord(hello_uid), OrderedId(1));
assert_eq!(id_mapping.to_ord(happy_uid), OrderedId(0)); assert_eq!(id_mapping.to_ord(happy_uid), OrderedId(0));
assert_eq!(id_mapping.to_ord(tax_uid), OrderedId(2)); assert_eq!(id_mapping.to_ord(tax_uid), OrderedId(2));

View File

@@ -1,3 +1,22 @@
//! # Tantivy-Columnar
//!
//! `tantivy-columnar`provides a columnar storage for tantivy.
//! The crate allows for efficient read operations on specific columns rather than entire records.
//!
//! ## Overview
//!
//! - **columnar**: Reading, writing, and merging multiple columns:
//! - **[ColumnarWriter]**: Makes it possible to create a new columnar.
//! - **[ColumnarReader]**: The ColumnarReader makes it possible to access a set of columns
//! associated to field names.
//! - **[merge_columnar]**: Contains the functionalities to merge multiple ColumnarReader or
//! segments into a single one.
//!
//! - **column**: A single column, which contains
//! - [column_index]: Resolves the rows for a document id. Manages the cardinality of the
//! column.
//! - [column_values]: Stores the values of a column in a dense format.
#![cfg_attr(all(feature = "unstable", test), feature(test))] #![cfg_attr(all(feature = "unstable", test), feature(test))]
#[cfg(test)] #[cfg(test)]

View File

@@ -26,7 +26,7 @@ fn test_dataframe_writer_str() {
assert_eq!(columnar.num_columns(), 1); assert_eq!(columnar.num_columns(), 1);
let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap(); let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
assert_eq!(cols.len(), 1); assert_eq!(cols.len(), 1);
assert_eq!(cols[0].num_bytes(), 87); assert_eq!(cols[0].num_bytes(), 73);
} }
#[test] #[test]
@@ -40,7 +40,7 @@ fn test_dataframe_writer_bytes() {
assert_eq!(columnar.num_columns(), 1); assert_eq!(columnar.num_columns(), 1);
let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap(); let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
assert_eq!(cols.len(), 1); assert_eq!(cols.len(), 1);
assert_eq!(cols[0].num_bytes(), 87); assert_eq!(cols[0].num_bytes(), 73);
} }
#[test] #[test]

View File

@@ -6,7 +6,7 @@ use ownedbytes::OwnedBytes;
use crate::ByteCount; use crate::ByteCount;
#[derive(Clone, Copy, Eq, PartialEq)] #[derive(Clone, Copy, Eq, PartialEq, Hash)]
pub struct TinySet(u64); pub struct TinySet(u64);
impl fmt::Debug for TinySet { impl fmt::Debug for TinySet {

View File

@@ -624,6 +624,65 @@ fn test_aggregation_on_json_object() {
); );
} }
#[test]
fn test_aggregation_on_nested_json_object() {
let mut schema_builder = Schema::builder();
let json = schema_builder.add_json_field("json.blub", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
index_writer
.add_document(doc!(json => json!({"color.dot": "red", "color": {"nested":"red"} })))
.unwrap();
index_writer
.add_document(doc!(json => json!({"color.dot": "blue", "color": {"nested":"blue"} })))
.unwrap();
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let agg: Aggregations = serde_json::from_value(json!({
"jsonagg1": {
"terms": {
"field": "json\\.blub.color\\.dot",
}
},
"jsonagg2": {
"terms": {
"field": "json\\.blub.color.nested",
}
}
}))
.unwrap();
let aggregation_collector = get_collector(agg);
let aggregation_results = searcher.search(&AllQuery, &aggregation_collector).unwrap();
let aggregation_res_json = serde_json::to_value(aggregation_results).unwrap();
assert_eq!(
&aggregation_res_json,
&serde_json::json!({
"jsonagg1": {
"buckets": [
{"doc_count": 1, "key": "blue"},
{"doc_count": 1, "key": "red"}
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0
},
"jsonagg2": {
"buckets": [
{"doc_count": 1, "key": "blue"},
{"doc_count": 1, "key": "red"}
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0
}
})
);
}
#[test] #[test]
fn test_aggregation_on_json_object_empty_columns() { fn test_aggregation_on_json_object_empty_columns() {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();

View File

@@ -23,6 +23,7 @@ use crate::reader::{IndexReader, IndexReaderBuilder};
use crate::schema::document::Document; use crate::schema::document::Document;
use crate::schema::{Field, FieldType, Schema}; use crate::schema::{Field, FieldType, Schema};
use crate::tokenizer::{TextAnalyzer, TokenizerManager}; use crate::tokenizer::{TextAnalyzer, TokenizerManager};
use crate::{merge_field_meta_data, FieldMetadata, SegmentReader};
fn load_metas( fn load_metas(
directory: &dyn Directory, directory: &dyn Directory,
@@ -489,6 +490,28 @@ impl Index {
self.inventory.all() self.inventory.all()
} }
/// Returns the list of fields that have been indexed in the Index.
/// The field list includes the field defined in the schema as well as the fields
/// that have been indexed as a part of a JSON field.
/// The returned field name is the full field name, including the name of the JSON field.
///
/// The returned field names can be used in queries.
///
/// Notice: If your data contains JSON fields this is **very expensive**, as it requires
/// browsing through the inverted index term dictionary and the columnar field dictionary.
///
/// Disclaimer: Some fields may not be listed here. For instance, if the schema contains a json
/// field that is not indexed nor a fast field but is stored, it is possible for the field
/// to not be listed.
pub fn fields_metadata(&self) -> crate::Result<Vec<FieldMetadata>> {
let segments = self.searchable_segments()?;
let fields_metadata: Vec<Vec<FieldMetadata>> = segments
.into_iter()
.map(|segment| SegmentReader::open(&segment)?.fields_metadata())
.collect::<Result<_, _>>()?;
Ok(merge_field_meta_data(fields_metadata, &self.schema()))
}
/// Creates a new segment_meta (Advanced user only). /// Creates a new segment_meta (Advanced user only).
/// ///
/// As long as the `SegmentMeta` lives, the files associated with the /// As long as the `SegmentMeta` lives, the files associated with the

View File

@@ -142,6 +142,7 @@ impl SegmentMeta {
SegmentComponent::FastFields => ".fast".to_string(), SegmentComponent::FastFields => ".fast".to_string(),
SegmentComponent::FieldNorms => ".fieldnorm".to_string(), SegmentComponent::FieldNorms => ".fieldnorm".to_string(),
SegmentComponent::Delete => format!(".{}.del", self.delete_opstamp().unwrap_or(0)), SegmentComponent::Delete => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
SegmentComponent::FieldList => ".fieldlist".to_string(),
}); });
PathBuf::from(path) PathBuf::from(path)
} }

View File

@@ -70,12 +70,12 @@ impl InvertedIndexReader {
&self.termdict &self.termdict
} }
/// Return the fields and types encoded in the dictionary in lexicographic oder. /// Return the fields and types encoded in the dictionary in lexicographic order.
/// Only valid on JSON fields. /// Only valid on JSON fields.
/// ///
/// Notice: This requires a full scan and therefore **very expensive**. /// Notice: This requires a full scan and therefore **very expensive**.
/// TODO: Move to sstable to use the index. /// TODO: Move to sstable to use the index.
pub fn list_fields(&self) -> io::Result<Vec<(String, Type)>> { pub fn list_encoded_fields(&self) -> io::Result<Vec<(String, Type)>> {
let mut stream = self.termdict.stream()?; let mut stream = self.termdict.stream()?;
let mut fields = Vec::new(); let mut fields = Vec::new();
let mut fields_set = FnvHashSet::default(); let mut fields_set = FnvHashSet::default();

View File

@@ -1,4 +1,4 @@
use columnar::MonotonicallyMappableToU64; use columnar::{ColumnType, MonotonicallyMappableToU64};
use common::{replace_in_place, JsonPathWriter}; use common::{replace_in_place, JsonPathWriter};
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
@@ -62,6 +62,14 @@ impl IndexingPositionsPerPath {
} }
} }
/// Convert JSON_PATH_SEGMENT_SEP to a dot.
pub fn json_path_sep_to_dot(path: &mut str) {
// This is safe since we are replacing a ASCII character by another ASCII character.
unsafe {
replace_in_place(JSON_PATH_SEGMENT_SEP, b'.', path.as_bytes_mut());
}
}
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub(crate) fn index_json_values<'a, V: Value<'a>>( pub(crate) fn index_json_values<'a, V: Value<'a>>(
doc: DocId, doc: DocId,
@@ -145,7 +153,7 @@ fn index_json_value<'a, V: Value<'a>>(
let mut token_stream = text_analyzer.token_stream(val); let mut token_stream = text_analyzer.token_stream(val);
let unordered_id = ctx let unordered_id = ctx
.path_to_unordered_id .path_to_unordered_id
.get_or_allocate_unordered_id(json_path_writer.as_str()); .get_or_allocate_unordered_id(json_path_writer.as_str(), ColumnType::Str);
// TODO: make sure the chain position works out. // TODO: make sure the chain position works out.
set_path_id(term_buffer, unordered_id); set_path_id(term_buffer, unordered_id);
@@ -163,7 +171,7 @@ fn index_json_value<'a, V: Value<'a>>(
set_path_id( set_path_id(
term_buffer, term_buffer,
ctx.path_to_unordered_id ctx.path_to_unordered_id
.get_or_allocate_unordered_id(json_path_writer.as_str()), .get_or_allocate_unordered_id(json_path_writer.as_str(), ColumnType::U64),
); );
term_buffer.append_type_and_fast_value(val); term_buffer.append_type_and_fast_value(val);
postings_writer.subscribe(doc, 0u32, term_buffer, ctx); postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
@@ -172,7 +180,7 @@ fn index_json_value<'a, V: Value<'a>>(
set_path_id( set_path_id(
term_buffer, term_buffer,
ctx.path_to_unordered_id ctx.path_to_unordered_id
.get_or_allocate_unordered_id(json_path_writer.as_str()), .get_or_allocate_unordered_id(json_path_writer.as_str(), ColumnType::I64),
); );
term_buffer.append_type_and_fast_value(val); term_buffer.append_type_and_fast_value(val);
postings_writer.subscribe(doc, 0u32, term_buffer, ctx); postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
@@ -181,7 +189,7 @@ fn index_json_value<'a, V: Value<'a>>(
set_path_id( set_path_id(
term_buffer, term_buffer,
ctx.path_to_unordered_id ctx.path_to_unordered_id
.get_or_allocate_unordered_id(json_path_writer.as_str()), .get_or_allocate_unordered_id(json_path_writer.as_str(), ColumnType::F64),
); );
term_buffer.append_type_and_fast_value(val); term_buffer.append_type_and_fast_value(val);
postings_writer.subscribe(doc, 0u32, term_buffer, ctx); postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
@@ -190,7 +198,7 @@ fn index_json_value<'a, V: Value<'a>>(
set_path_id( set_path_id(
term_buffer, term_buffer,
ctx.path_to_unordered_id ctx.path_to_unordered_id
.get_or_allocate_unordered_id(json_path_writer.as_str()), .get_or_allocate_unordered_id(json_path_writer.as_str(), ColumnType::Bool),
); );
term_buffer.append_type_and_fast_value(val); term_buffer.append_type_and_fast_value(val);
postings_writer.subscribe(doc, 0u32, term_buffer, ctx); postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
@@ -198,8 +206,10 @@ fn index_json_value<'a, V: Value<'a>>(
ReferenceValueLeaf::Date(val) => { ReferenceValueLeaf::Date(val) => {
set_path_id( set_path_id(
term_buffer, term_buffer,
ctx.path_to_unordered_id ctx.path_to_unordered_id.get_or_allocate_unordered_id(
.get_or_allocate_unordered_id(json_path_writer.as_str()), json_path_writer.as_str(),
ColumnType::DateTime,
),
); );
term_buffer.append_type_and_fast_value(val); term_buffer.append_type_and_fast_value(val);
postings_writer.subscribe(doc, 0u32, term_buffer, ctx); postings_writer.subscribe(doc, 0u32, term_buffer, ctx);
@@ -320,7 +330,7 @@ pub struct JsonTermWriter<'a> {
/// In other words, /// In other words,
/// - `k8s.node` ends up as `["k8s", "node"]`. /// - `k8s.node` ends up as `["k8s", "node"]`.
/// - `k8s\.node` ends up as `["k8s.node"]`. /// - `k8s\.node` ends up as `["k8s.node"]`.
fn split_json_path(json_path: &str) -> Vec<String> { pub fn split_json_path(json_path: &str) -> Vec<String> {
let mut escaped_state: bool = false; let mut escaped_state: bool = false;
let mut json_path_segments = Vec::new(); let mut json_path_segments = Vec::new();
let mut buffer = String::new(); let mut buffer = String::new();

View File

@@ -25,7 +25,7 @@ pub use self::searcher::{Searcher, SearcherGeneration};
pub use self::segment::Segment; pub use self::segment::Segment;
pub use self::segment_component::SegmentComponent; pub use self::segment_component::SegmentComponent;
pub use self::segment_id::SegmentId; pub use self::segment_id::SegmentId;
pub use self::segment_reader::SegmentReader; pub use self::segment_reader::{merge_field_meta_data, FieldMetadata, SegmentReader};
pub use self::single_segment_index_writer::SingleSegmentIndexWriter; pub use self::single_segment_index_writer::SingleSegmentIndexWriter;
/// The meta file contains all the information about the list of segments and the schema /// The meta file contains all the information about the list of segments and the schema

View File

@@ -27,12 +27,14 @@ pub enum SegmentComponent {
/// Bitset describing which document of the segment is alive. /// Bitset describing which document of the segment is alive.
/// (It was representing deleted docs but changed to represent alive docs from v0.17) /// (It was representing deleted docs but changed to represent alive docs from v0.17)
Delete, Delete,
/// Field list describing the fields in the segment.
FieldList,
} }
impl SegmentComponent { impl SegmentComponent {
/// Iterates through the components. /// Iterates through the components.
pub fn iterator() -> slice::Iter<'static, SegmentComponent> { pub fn iterator() -> slice::Iter<'static, SegmentComponent> {
static SEGMENT_COMPONENTS: [SegmentComponent; 8] = [ static SEGMENT_COMPONENTS: [SegmentComponent; 9] = [
SegmentComponent::Postings, SegmentComponent::Postings,
SegmentComponent::Positions, SegmentComponent::Positions,
SegmentComponent::FastFields, SegmentComponent::FastFields,
@@ -41,6 +43,7 @@ impl SegmentComponent {
SegmentComponent::Store, SegmentComponent::Store,
SegmentComponent::TempStore, SegmentComponent::TempStore,
SegmentComponent::Delete, SegmentComponent::Delete,
SegmentComponent::FieldList,
]; ];
SEGMENT_COMPONENTS.iter() SEGMENT_COMPONENTS.iter()
} }

View File

@@ -1,11 +1,15 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::ops::BitOrAssign;
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use std::{fmt, io}; use std::{fmt, io};
use itertools::Itertools;
use crate::core::{InvertedIndexReader, Segment, SegmentComponent, SegmentId}; use crate::core::{InvertedIndexReader, Segment, SegmentComponent, SegmentId};
use crate::directory::{CompositeFile, FileSlice}; use crate::directory::{CompositeFile, FileSlice};
use crate::error::DataCorruption; use crate::error::DataCorruption;
use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders}; use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders};
use crate::field_list::read_split_fields;
use crate::fieldnorm::{FieldNormReader, FieldNormReaders}; use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
use crate::schema::{Field, IndexRecordOption, Schema, Type}; use crate::schema::{Field, IndexRecordOption, Schema, Type};
use crate::space_usage::SegmentSpaceUsage; use crate::space_usage::SegmentSpaceUsage;
@@ -39,6 +43,7 @@ pub struct SegmentReader {
fast_fields_readers: FastFieldReaders, fast_fields_readers: FastFieldReaders,
fieldnorm_readers: FieldNormReaders, fieldnorm_readers: FieldNormReaders,
list_fields_file: Option<FileSlice>, // Optional field list file for backwards compatibility
store_file: FileSlice, store_file: FileSlice,
alive_bitset_opt: Option<AliveBitSet>, alive_bitset_opt: Option<AliveBitSet>,
schema: Schema, schema: Schema,
@@ -148,6 +153,7 @@ impl SegmentReader {
let termdict_composite = CompositeFile::open(&termdict_file)?; let termdict_composite = CompositeFile::open(&termdict_file)?;
let store_file = segment.open_read(SegmentComponent::Store)?; let store_file = segment.open_read(SegmentComponent::Store)?;
let list_fields_file = segment.open_read(SegmentComponent::FieldList).ok();
crate::fail_point!("SegmentReader::open#middle"); crate::fail_point!("SegmentReader::open#middle");
@@ -196,6 +202,7 @@ impl SegmentReader {
segment_id: segment.id(), segment_id: segment.id(),
delete_opstamp: segment.meta().delete_opstamp(), delete_opstamp: segment.meta().delete_opstamp(),
store_file, store_file,
list_fields_file,
alive_bitset_opt, alive_bitset_opt,
positions_composite, positions_composite,
schema, schema,
@@ -280,6 +287,41 @@ impl SegmentReader {
Ok(inv_idx_reader) Ok(inv_idx_reader)
} }
/// Returns the list of fields that have been indexed in the segment.
/// The field list includes the field defined in the schema as well as the fields
/// that have been indexed as a part of a JSON field.
/// The returned field name is the full field name, including the name of the JSON field.
///
/// The returned field names can be used in queries.
///
/// Notice: If your data contains JSON fields this is **very expensive**, as it requires
/// browsing through the inverted index term dictionary and the columnar field dictionary.
///
/// Disclaimer: Some fields may not be listed here. For instance, if the schema contains a json
/// field that is not indexed nor a fast field but is stored, it is possible for the field
/// to not be listed.
pub fn fields_metadata(&self) -> crate::Result<Vec<FieldMetadata>> {
if let Some(list_fields_file) = self.list_fields_file.as_ref() {
let file = list_fields_file.read_bytes()?;
let fields_metadata =
read_split_fields(file)?.collect::<io::Result<Vec<FieldMetadata>>>();
fields_metadata.map_err(|e| e.into())
} else {
// Schema fallback
Ok(self
.schema()
.fields()
.map(|(_field, entry)| FieldMetadata {
field_name: entry.name().to_string(),
typ: entry.field_type().value_type(),
indexed: entry.is_indexed(),
stored: entry.is_stored(),
fast: entry.is_fast(),
})
.collect())
}
}
/// Returns the segment id /// Returns the segment id
pub fn segment_id(&self) -> SegmentId { pub fn segment_id(&self) -> SegmentId {
self.segment_id self.segment_id
@@ -330,6 +372,65 @@ impl SegmentReader {
} }
} }
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
/// FieldMetadata
pub struct FieldMetadata {
/// The field name
// Notice: Don't reorder the declaration of 1.field_name 2.typ, as it is used for ordering by
// field_name then typ.
pub field_name: String,
/// The field type
// Notice: Don't reorder the declaration of 1.field_name 2.typ, as it is used for ordering by
// field_name then typ.
pub typ: Type,
/// Is the field indexed for search
pub indexed: bool,
/// Is the field stored in the doc store
pub stored: bool,
/// Is the field stored in the columnar storage
pub fast: bool,
}
impl BitOrAssign for FieldMetadata {
fn bitor_assign(&mut self, rhs: Self) {
assert!(self.field_name == rhs.field_name);
assert!(self.typ == rhs.typ);
self.indexed |= rhs.indexed;
self.stored |= rhs.stored;
self.fast |= rhs.fast;
}
}
// Maybe too slow for the high cardinality case
fn is_field_stored(field_name: &str, schema: &Schema) -> bool {
schema
.find_field(field_name)
.map(|(field, _path)| schema.get_field_entry(field).is_stored())
.unwrap_or(false)
}
/// Helper to merge the field metadata from multiple segments.
pub fn merge_field_meta_data(
field_metadatas: Vec<Vec<FieldMetadata>>,
schema: &Schema,
) -> Vec<FieldMetadata> {
let mut merged_field_metadata = Vec::new();
for (_key, mut group) in &field_metadatas
.into_iter()
.kmerge_by(|left, right| left < right)
// TODO: Remove allocation
.group_by(|el| (el.field_name.to_string(), el.typ))
{
let mut merged: FieldMetadata = group.next().unwrap();
for el in group {
merged |= el;
}
// Currently is_field_stored is maybe too slow for the high cardinality case
merged.stored = is_field_stored(&merged.field_name, schema);
merged_field_metadata.push(merged);
}
merged_field_metadata
}
fn intersect_alive_bitset( fn intersect_alive_bitset(
left_opt: Option<AliveBitSet>, left_opt: Option<AliveBitSet>,
right_opt: Option<AliveBitSet>, right_opt: Option<AliveBitSet>,
@@ -353,9 +454,127 @@ impl fmt::Debug for SegmentReader {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*;
use crate::core::Index; use crate::core::Index;
use crate::schema::{Schema, Term, STORED, TEXT}; use crate::schema::{Schema, SchemaBuilder, Term, STORED, TEXT};
use crate::{DocId, IndexWriter}; use crate::{DocId, FieldMetadata, IndexWriter};
#[test]
fn test_merge_field_meta_data_same() {
let schema = SchemaBuilder::new().build();
let field_metadata1 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: true,
stored: false,
fast: true,
};
let field_metadata2 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: true,
stored: false,
fast: true,
};
let res = merge_field_meta_data(
vec![vec![field_metadata1.clone()], vec![field_metadata2]],
&schema,
);
assert_eq!(res, vec![field_metadata1]);
}
#[test]
fn test_merge_field_meta_data_different() {
let schema = SchemaBuilder::new().build();
let field_metadata1 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: false,
stored: false,
fast: true,
};
let field_metadata2 = FieldMetadata {
field_name: "b".to_string(),
typ: crate::schema::Type::Str,
indexed: false,
stored: false,
fast: true,
};
let field_metadata3 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: true,
stored: false,
fast: false,
};
let res = merge_field_meta_data(
vec![
vec![field_metadata1.clone(), field_metadata2.clone()],
vec![field_metadata3],
],
&schema,
);
let field_metadata_expected1 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: true,
stored: false,
fast: true,
};
assert_eq!(res, vec![field_metadata_expected1, field_metadata2.clone()]);
}
#[test]
fn test_merge_field_meta_data_merge() {
use pretty_assertions::assert_eq;
let get_meta_data = |name: &str, typ: Type| FieldMetadata {
field_name: name.to_string(),
typ,
indexed: false,
stored: false,
fast: true,
};
let schema = SchemaBuilder::new().build();
let mut metas = vec![get_meta_data("d", Type::Str), get_meta_data("e", Type::U64)];
metas.sort();
let res = merge_field_meta_data(vec![vec![get_meta_data("e", Type::Str)], metas], &schema);
assert_eq!(
res,
vec![
get_meta_data("d", Type::Str),
get_meta_data("e", Type::Str),
get_meta_data("e", Type::U64),
]
);
}
#[test]
fn test_merge_field_meta_data_bitxor() {
let field_metadata1 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: false,
stored: false,
fast: true,
};
let field_metadata2 = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: true,
stored: false,
fast: false,
};
let field_metadata_expected = FieldMetadata {
field_name: "a".to_string(),
typ: crate::schema::Type::Str,
indexed: true,
stored: false,
fast: true,
};
let mut res1 = field_metadata1.clone();
res1 |= field_metadata2.clone();
let mut res2 = field_metadata2.clone();
res2 |= field_metadata1;
assert_eq!(res1, field_metadata_expected);
assert_eq!(res2, field_metadata_expected);
}
#[test] #[test]
fn test_num_alive() -> crate::Result<()> { fn test_num_alive() -> crate::Result<()> {

View File

@@ -1,12 +1,13 @@
use crate::collector::Count; use crate::collector::Count;
use crate::directory::{RamDirectory, WatchCallback}; use crate::directory::{RamDirectory, WatchCallback};
use crate::indexer::NoMergePolicy; use crate::indexer::{LogMergePolicy, NoMergePolicy};
use crate::json_utils::JsonTermWriter;
use crate::query::TermQuery; use crate::query::TermQuery;
use crate::schema::{Field, IndexRecordOption, Schema, INDEXED, STRING, TEXT}; use crate::schema::{Field, IndexRecordOption, Schema, Type, INDEXED, STRING, TEXT};
use crate::tokenizer::TokenizerManager; use crate::tokenizer::TokenizerManager;
use crate::{ use crate::{
Directory, Index, IndexBuilder, IndexReader, IndexSettings, IndexWriter, ReloadPolicy, Directory, DocSet, Index, IndexBuilder, IndexReader, IndexSettings, IndexWriter, Postings,
SegmentId, TantivyDocument, Term, ReloadPolicy, SegmentId, TantivyDocument, Term,
}; };
#[test] #[test]
@@ -344,3 +345,132 @@ fn test_merging_segment_update_docfreq() {
let term_info = inv_index.get_term_info(&term).unwrap().unwrap(); let term_info = inv_index.get_term_info(&term).unwrap().unwrap();
assert_eq!(term_info.doc_freq, 12); assert_eq!(term_info.doc_freq, 12);
} }
// motivated by https://github.com/quickwit-oss/quickwit/issues/4130
#[test]
fn test_positions_merge_bug_non_text_json_vint() {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_json_field("dynamic", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let mut writer: IndexWriter = index.writer_for_tests().unwrap();
let mut merge_policy = LogMergePolicy::default();
merge_policy.set_min_num_segments(2);
writer.set_merge_policy(Box::new(merge_policy));
// Here a string would work.
let doc_json = r#"{"tenant_id":75}"#;
let vals = serde_json::from_str(doc_json).unwrap();
let mut doc = TantivyDocument::default();
doc.add_object(field, vals);
writer.add_document(doc.clone()).unwrap();
writer.commit().unwrap();
writer.add_document(doc.clone()).unwrap();
writer.commit().unwrap();
writer.wait_merging_threads().unwrap();
let reader = index.reader().unwrap();
assert_eq!(reader.searcher().segment_readers().len(), 1);
}
// Same as above but with bitpacked blocks
#[test]
fn test_positions_merge_bug_non_text_json_bitpacked_block() {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_json_field("dynamic", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let mut writer: IndexWriter = index.writer_for_tests().unwrap();
let mut merge_policy = LogMergePolicy::default();
merge_policy.set_min_num_segments(2);
writer.set_merge_policy(Box::new(merge_policy));
// Here a string would work.
let doc_json = r#"{"tenant_id":75}"#;
let vals = serde_json::from_str(doc_json).unwrap();
let mut doc = TantivyDocument::default();
doc.add_object(field, vals);
for _ in 0..128 {
writer.add_document(doc.clone()).unwrap();
}
writer.commit().unwrap();
writer.add_document(doc.clone()).unwrap();
writer.commit().unwrap();
writer.wait_merging_threads().unwrap();
let reader = index.reader().unwrap();
assert_eq!(reader.searcher().segment_readers().len(), 1);
}
#[test]
fn test_non_text_json_term_freq() {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_json_field("dynamic", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let mut writer: IndexWriter = index.writer_for_tests().unwrap();
// Here a string would work.
let doc_json = r#"{"tenant_id":75}"#;
let vals = serde_json::from_str(doc_json).unwrap();
let mut doc = TantivyDocument::default();
doc.add_object(field, vals);
writer.add_document(doc.clone()).unwrap();
writer.commit().unwrap();
let reader = index.reader().unwrap();
assert_eq!(reader.searcher().segment_readers().len(), 1);
let searcher = reader.searcher();
let segment_reader = searcher.segment_reader(0u32);
let inv_idx = segment_reader.inverted_index(field).unwrap();
let mut term = Term::with_type_and_field(Type::Json, field);
let mut json_term_writer = JsonTermWriter::wrap(&mut term, false);
json_term_writer.push_path_segment("tenant_id");
json_term_writer.close_path_and_set_type(Type::U64);
json_term_writer.set_fast_value(75u64);
let postings = inv_idx
.read_postings(
&json_term_writer.term(),
IndexRecordOption::WithFreqsAndPositions,
)
.unwrap()
.unwrap();
assert_eq!(postings.doc(), 0);
assert_eq!(postings.term_freq(), 1u32);
}
#[test]
fn test_non_text_json_term_freq_bitpacked() {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_json_field("dynamic", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let mut writer: IndexWriter = index.writer_for_tests().unwrap();
// Here a string would work.
let doc_json = r#"{"tenant_id":75}"#;
let vals = serde_json::from_str(doc_json).unwrap();
let mut doc = TantivyDocument::default();
doc.add_object(field, vals);
let num_docs = 132;
for _ in 0..num_docs {
writer.add_document(doc.clone()).unwrap();
}
writer.commit().unwrap();
let reader = index.reader().unwrap();
assert_eq!(reader.searcher().segment_readers().len(), 1);
let searcher = reader.searcher();
let segment_reader = searcher.segment_reader(0u32);
let inv_idx = segment_reader.inverted_index(field).unwrap();
let mut term = Term::with_type_and_field(Type::Json, field);
let mut json_term_writer = JsonTermWriter::wrap(&mut term, false);
json_term_writer.push_path_segment("tenant_id");
json_term_writer.close_path_and_set_type(Type::U64);
json_term_writer.set_fast_value(75u64);
let mut postings = inv_idx
.read_postings(
&json_term_writer.term(),
IndexRecordOption::WithFreqsAndPositions,
)
.unwrap()
.unwrap();
assert_eq!(postings.doc(), 0);
assert_eq!(postings.term_freq(), 1u32);
for i in 1..num_docs {
assert_eq!(postings.advance(), i);
assert_eq!(postings.term_freq(), 1u32);
}
}

View File

@@ -131,7 +131,7 @@ mod tests {
} }
let file = directory.open_read(path).unwrap(); let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 93); assert_eq!(file.len(), 80);
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let column = fast_field_readers let column = fast_field_readers
.u64("field") .u64("field")
@@ -181,7 +181,7 @@ mod tests {
write.terminate().unwrap(); write.terminate().unwrap();
} }
let file = directory.open_read(path).unwrap(); let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 121); assert_eq!(file.len(), 108);
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let col = fast_field_readers let col = fast_field_readers
.u64("field") .u64("field")
@@ -214,7 +214,7 @@ mod tests {
write.terminate().unwrap(); write.terminate().unwrap();
} }
let file = directory.open_read(path).unwrap(); let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 94); assert_eq!(file.len(), 81);
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let fast_field_reader = fast_field_readers let fast_field_reader = fast_field_readers
.u64("field") .u64("field")
@@ -246,7 +246,7 @@ mod tests {
write.terminate().unwrap(); write.terminate().unwrap();
} }
let file = directory.open_read(path).unwrap(); let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 4489); assert_eq!(file.len(), 4476);
{ {
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap(); let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let col = fast_field_readers let col = fast_field_readers
@@ -279,7 +279,7 @@ mod tests {
write.terminate().unwrap(); write.terminate().unwrap();
} }
let file = directory.open_read(path).unwrap(); let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 265); assert_eq!(file.len(), 252);
{ {
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap(); let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
@@ -773,7 +773,7 @@ mod tests {
write.terminate().unwrap(); write.terminate().unwrap();
} }
let file = directory.open_read(path).unwrap(); let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 102); assert_eq!(file.len(), 84);
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap(); let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
let bool_col = fast_field_readers.bool("field_bool").unwrap(); let bool_col = fast_field_readers.bool("field_bool").unwrap();
assert_eq!(bool_col.first(0), Some(true)); assert_eq!(bool_col.first(0), Some(true));
@@ -805,7 +805,7 @@ mod tests {
write.terminate().unwrap(); write.terminate().unwrap();
} }
let file = directory.open_read(path).unwrap(); let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 114); assert_eq!(file.len(), 96);
let readers = FastFieldReaders::open(file, schema).unwrap(); let readers = FastFieldReaders::open(file, schema).unwrap();
let bool_col = readers.bool("field_bool").unwrap(); let bool_col = readers.bool("field_bool").unwrap();
for i in 0..25 { for i in 0..25 {
@@ -830,7 +830,7 @@ mod tests {
write.terminate().unwrap(); write.terminate().unwrap();
} }
let file = directory.open_read(path).unwrap(); let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 104); assert_eq!(file.len(), 86);
let fastfield_readers = FastFieldReaders::open(file, schema).unwrap(); let fastfield_readers = FastFieldReaders::open(file, schema).unwrap();
let col = fastfield_readers.bool("field_bool").unwrap(); let col = fastfield_readers.bool("field_bool").unwrap();
assert_eq!(col.first(0), None); assert_eq!(col.first(0), None);
@@ -1288,11 +1288,18 @@ mod tests {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
let searcher = index.reader().unwrap().searcher(); let searcher = index.reader().unwrap().searcher();
let fast_field_reader = searcher.segment_reader(0u32).fast_fields(); let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
// Supported for now, maybe dropped in the future.
let column = fast_field_reader let column = fast_field_reader
.column_opt::<i64>("jsonfield.attr.age") .column_opt::<i64>("jsonfield.attr.age")
.unwrap() .unwrap()
.unwrap(); .unwrap();
let vals: Vec<i64> = column.values_for_doc(0u32).collect(); let vals: Vec<i64> = column.values_for_doc(0u32).collect();
assert_eq!(&vals, &[33]); assert_eq!(&vals, &[33]);
let column = fast_field_reader
.column_opt::<i64>("jsonfield\\.attr.age")
.unwrap()
.unwrap();
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
assert_eq!(&vals, &[33]);
} }
} }

View File

@@ -238,13 +238,17 @@ impl FastFieldsWriter {
mut self, mut self,
wrt: &mut dyn io::Write, wrt: &mut dyn io::Write,
doc_id_map_opt: Option<&DocIdMapping>, doc_id_map_opt: Option<&DocIdMapping>,
) -> io::Result<()> { ) -> io::Result<Vec<(String, Type)>> {
let num_docs = self.num_docs; let num_docs = self.num_docs;
let old_to_new_row_ids = let old_to_new_row_ids =
doc_id_map_opt.map(|doc_id_mapping| doc_id_mapping.old_to_new_ids()); doc_id_map_opt.map(|doc_id_mapping| doc_id_mapping.old_to_new_ids());
self.columnar_writer let columns = self
.columnar_writer
.serialize(num_docs, old_to_new_row_ids, wrt)?; .serialize(num_docs, old_to_new_row_ids, wrt)?;
Ok(()) Ok(columns
.into_iter()
.map(|(field_name, column)| (field_name.to_string(), column.into()))
.collect())
} }
} }

369
src/field_list/mod.rs Normal file
View File

@@ -0,0 +1,369 @@
//! The list of fields that are stored in a `tantivy` `Index`.
use std::collections::HashSet;
use std::io::{self, ErrorKind, Read};
use columnar::ColumnType;
use common::TinySet;
use fnv::FnvHashMap;
use crate::indexer::path_to_unordered_id::OrderedPathId;
use crate::json_utils::json_path_sep_to_dot;
use crate::postings::IndexingContext;
use crate::schema::{Field, Schema, Type};
use crate::{merge_field_meta_data, FieldMetadata, Term};
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
pub(crate) struct FieldConfig {
pub typ: Type,
pub indexed: bool,
pub stored: bool,
pub fast: bool,
}
impl FieldConfig {
fn serialize(&self) -> [u8; 2] {
let typ = self.typ.to_code();
let flags = (self.indexed as u8) << 2 | (self.stored as u8) << 1 | (self.fast as u8);
[typ, flags]
}
fn deserialize_from(data: [u8; 2]) -> io::Result<FieldConfig> {
let typ = Type::from_code(data[0]).ok_or_else(|| {
io::Error::new(
ErrorKind::InvalidData,
format!("could not deserialize type {}", data[0]),
)
})?;
let data = data[1];
let indexed = (data & 0b100) != 0;
let stored = (data & 0b010) != 0;
let fast = (data & 0b001) != 0;
Ok(FieldConfig {
typ,
indexed,
stored,
fast,
})
}
}
/// Serializes the split fields.
pub(crate) fn serialize_segment_fields(
ctx: IndexingContext,
wrt: &mut dyn io::Write,
schema: &Schema,
unordered_id_to_ordered_id: &[(OrderedPathId, TinySet)],
mut columns: Vec<(String, Type)>,
) -> crate::Result<()> {
let mut field_list_set: HashSet<(Field, OrderedPathId, TinySet)> = HashSet::default();
let mut encoded_fields = Vec::new();
let mut map_to_canonical = FnvHashMap::default();
// Replace unordered ids by ordered ids to be able to sort
let ordered_id_to_path = ctx.path_to_unordered_id.ordered_id_to_path();
for (key, _addr) in ctx.term_index.iter() {
let field = Term::wrap(key).field();
let field_entry = schema.get_field_entry(field);
if field_entry.field_type().value_type() == Type::Json {
let byte_range_unordered_id = 5..5 + 4;
let unordered_id =
u32::from_be_bytes(key[byte_range_unordered_id.clone()].try_into().unwrap());
let (path_id, typ_code_bitvec) = unordered_id_to_ordered_id[unordered_id as usize];
if !field_list_set.contains(&(field, path_id, typ_code_bitvec)) {
field_list_set.insert((field, path_id, typ_code_bitvec));
let mut build_path = |field_name: &str, mut json_path: String| {
// In this case we need to map the potential fast field to the field name
// accepted by the query parser.
let create_canonical =
!field_entry.is_expand_dots_enabled() && json_path.contains('.');
if create_canonical {
// Without expand dots enabled dots need to be escaped.
let escaped_json_path = json_path.replace('.', "\\.");
let full_path = format!("{}.{}", field_name, escaped_json_path);
let full_path_unescaped = format!("{}.{}", field_name, &json_path);
map_to_canonical.insert(full_path_unescaped, full_path.to_string());
full_path
} else {
// With expand dots enabled, we can use '.' instead of '\u{1}'.
json_path_sep_to_dot(&mut json_path);
format!("{}.{}", field_name, json_path)
}
};
let path = build_path(
field_entry.name(),
ordered_id_to_path[path_id.path_id() as usize].to_string(), /* String::from_utf8(key[5..].to_vec()).unwrap(), */
);
encoded_fields.push((path, typ_code_bitvec));
}
}
}
let mut indexed_fields: Vec<FieldMetadata> = Vec::new();
for (_field, field_entry) in schema.fields() {
let field_name = field_entry.name().to_string();
let is_indexed = field_entry.is_indexed();
let is_json = field_entry.field_type().value_type() == Type::Json;
if is_indexed && !is_json {
indexed_fields.push(FieldMetadata {
indexed: true,
stored: false,
field_name: field_name.to_string(),
fast: false,
typ: field_entry.field_type().value_type(),
});
}
}
for (field_name, field_type_set) in encoded_fields {
for field_type in field_type_set {
let column_type = ColumnType::try_from_code(field_type as u8).unwrap();
indexed_fields.push(FieldMetadata {
indexed: true,
stored: false,
field_name: field_name.to_string(),
fast: false,
typ: Type::from(column_type),
});
}
}
let mut fast_fields: Vec<FieldMetadata> = columns
.iter_mut()
.map(|(field_name, typ)| {
json_path_sep_to_dot(field_name);
// map to canonical path, to avoid similar but different entries.
// Eventually we should just accept '.' seperated for all cases.
let field_name = map_to_canonical
.get(field_name)
.unwrap_or(field_name)
.to_string();
FieldMetadata {
indexed: false,
stored: false,
field_name,
fast: true,
typ: *typ,
}
})
.collect();
// Since the type is encoded differently in the fast field and in the inverted index,
// the order of the fields is not guaranteed to be the same. Therefore, we sort the fields.
// If we are sure that the order is the same, we can remove this sort.
indexed_fields.sort_unstable();
fast_fields.sort_unstable();
let merged = merge_field_meta_data(vec![indexed_fields, fast_fields], schema);
let out = serialize_split_fields(&merged);
wrt.write_all(&out)?;
Ok(())
}
/// Serializes the Split fields.
///
/// `fields_metadata` has to be sorted.
pub fn serialize_split_fields(fields_metadata: &[FieldMetadata]) -> Vec<u8> {
// ensure that fields_metadata is strictly sorted.
debug_assert!(fields_metadata.windows(2).all(|w| w[0] < w[1]));
let mut payload = Vec::new();
// Write Num Fields
let length = fields_metadata.len() as u32;
payload.extend_from_slice(&length.to_le_bytes());
for field_metadata in fields_metadata {
write_field(field_metadata, &mut payload);
}
let compression_level = 3;
let payload_compressed = zstd::stream::encode_all(&mut &payload[..], compression_level)
.expect("zstd encoding failed");
let mut out = Vec::new();
// Write Header -- Format Version
let format_version = 1u8;
out.push(format_version);
// Write Payload
out.extend_from_slice(&payload_compressed);
out
}
fn write_field(field_metadata: &FieldMetadata, out: &mut Vec<u8>) {
let field_config = FieldConfig {
typ: field_metadata.typ,
indexed: field_metadata.indexed,
stored: field_metadata.stored,
fast: field_metadata.fast,
};
// Write Config 2 bytes
out.extend_from_slice(&field_config.serialize());
let str_length = field_metadata.field_name.len() as u16;
// Write String length 2 bytes
out.extend_from_slice(&str_length.to_le_bytes());
out.extend_from_slice(field_metadata.field_name.as_bytes());
}
/// Reads a fixed number of bytes into an array and returns the array.
fn read_exact_array<R: Read, const N: usize>(reader: &mut R) -> io::Result<[u8; N]> {
let mut buffer = [0u8; N];
reader.read_exact(&mut buffer)?;
Ok(buffer)
}
/// Reads the Split fields from a zstd compressed stream of bytes
pub fn read_split_fields<R: Read>(
mut reader: R,
) -> io::Result<impl Iterator<Item = io::Result<FieldMetadata>>> {
let format_version = read_exact_array::<_, 1>(&mut reader)?[0];
assert_eq!(format_version, 1);
let reader = zstd::Decoder::new(reader)?;
read_split_fields_from_zstd(reader)
}
fn read_field<R: Read>(reader: &mut R) -> io::Result<FieldMetadata> {
// Read FieldConfig (2 bytes)
let config_bytes = read_exact_array::<_, 2>(reader)?;
let field_config = FieldConfig::deserialize_from(config_bytes)?; // Assuming this returns a Result
// Read field name length and the field name
let name_len = u16::from_le_bytes(read_exact_array::<_, 2>(reader)?) as usize;
let mut data = vec![0; name_len];
reader.read_exact(&mut data)?;
let field_name = String::from_utf8(data).map_err(|err| {
io::Error::new(
ErrorKind::InvalidData,
format!(
"Encountered invalid utf8 when deserializing field name: {}",
err
),
)
})?;
Ok(FieldMetadata {
field_name,
typ: field_config.typ,
indexed: field_config.indexed,
stored: field_config.stored,
fast: field_config.fast,
})
}
/// Reads the Split fields from a stream of bytes
fn read_split_fields_from_zstd<R: Read>(
mut reader: R,
) -> io::Result<impl Iterator<Item = io::Result<FieldMetadata>>> {
let mut num_fields = u32::from_le_bytes(read_exact_array::<_, 4>(&mut reader)?);
Ok(std::iter::from_fn(move || {
if num_fields == 0 {
return None;
}
num_fields -= 1;
Some(read_field(&mut reader))
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn field_config_deser_test() {
let field_config = FieldConfig {
typ: Type::Str,
indexed: true,
stored: false,
fast: true,
};
let serialized = field_config.serialize();
let deserialized = FieldConfig::deserialize_from(serialized).unwrap();
assert_eq!(field_config, deserialized);
}
#[test]
fn write_read_field_test() {
for typ in Type::iter_values() {
let field_metadata = FieldMetadata {
field_name: "test".to_string(),
typ,
indexed: true,
stored: true,
fast: true,
};
let mut out = Vec::new();
write_field(&field_metadata, &mut out);
let deserialized = read_field(&mut &out[..]).unwrap();
assert_eq!(field_metadata, deserialized);
}
let field_metadata = FieldMetadata {
field_name: "test".to_string(),
typ: Type::Str,
indexed: false,
stored: true,
fast: true,
};
let mut out = Vec::new();
write_field(&field_metadata, &mut out);
let deserialized = read_field(&mut &out[..]).unwrap();
assert_eq!(field_metadata, deserialized);
let field_metadata = FieldMetadata {
field_name: "test".to_string(),
typ: Type::Str,
indexed: false,
stored: false,
fast: true,
};
let mut out = Vec::new();
write_field(&field_metadata, &mut out);
let deserialized = read_field(&mut &out[..]).unwrap();
assert_eq!(field_metadata, deserialized);
let field_metadata = FieldMetadata {
field_name: "test".to_string(),
typ: Type::Str,
indexed: true,
stored: false,
fast: false,
};
let mut out = Vec::new();
write_field(&field_metadata, &mut out);
let deserialized = read_field(&mut &out[..]).unwrap();
assert_eq!(field_metadata, deserialized);
}
#[test]
fn write_split_fields_test() {
let fields_metadata = vec![
FieldMetadata {
field_name: "test".to_string(),
typ: Type::Str,
indexed: true,
stored: true,
fast: true,
},
FieldMetadata {
field_name: "test2".to_string(),
typ: Type::Str,
indexed: true,
stored: false,
fast: false,
},
FieldMetadata {
field_name: "test3".to_string(),
typ: Type::U64,
indexed: true,
stored: false,
fast: true,
},
];
let out = serialize_split_fields(&fields_metadata);
let deserialized: Vec<FieldMetadata> = read_split_fields(&mut &out[..])
.unwrap()
.map(|el| el.unwrap())
.collect();
assert_eq!(fields_metadata, deserialized);
}
}

View File

@@ -1,3 +1,4 @@
use std::io::Write;
use std::sync::Arc; use std::sync::Arc;
use columnar::{ use columnar::{
@@ -13,6 +14,7 @@ use crate::directory::WritePtr;
use crate::docset::{DocSet, TERMINATED}; use crate::docset::{DocSet, TERMINATED};
use crate::error::DataCorruption; use crate::error::DataCorruption;
use crate::fastfield::{AliveBitSet, FastFieldNotAvailableError}; use crate::fastfield::{AliveBitSet, FastFieldNotAvailableError};
use crate::field_list::serialize_split_fields;
use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter}; use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping}; use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping};
use crate::indexer::SegmentSerializer; use crate::indexer::SegmentSerializer;
@@ -21,8 +23,8 @@ use crate::schema::{value_type_to_column_type, Field, FieldType, Schema};
use crate::store::StoreWriter; use crate::store::StoreWriter;
use crate::termdict::{TermMerger, TermOrdinal}; use crate::termdict::{TermMerger, TermOrdinal};
use crate::{ use crate::{
DocAddress, DocId, IndexSettings, IndexSortByField, InvertedIndexReader, Order, merge_field_meta_data, DocAddress, DocId, FieldMetadata, IndexSettings, IndexSortByField,
SegmentComponent, SegmentOrdinal, InvertedIndexReader, Order, SegmentComponent, SegmentOrdinal,
}; };
/// Segment's max doc must be `< MAX_DOC_LIMIT`. /// Segment's max doc must be `< MAX_DOC_LIMIT`.
@@ -255,6 +257,19 @@ impl IndexMerger {
Ok(()) Ok(())
} }
fn write_field_list(&self, list_field_wrt: &mut WritePtr) -> crate::Result<()> {
let field_metadatas: Vec<Vec<FieldMetadata>> = self
.readers
.iter()
.map(|reader| reader.fields_metadata())
.collect::<crate::Result<Vec<_>>>()?;
let merged = merge_field_meta_data(field_metadatas, &self.schema);
let out = serialize_split_fields(&merged);
list_field_wrt.write_all(&out)?;
Ok(())
}
fn write_fast_fields( fn write_fast_fields(
&self, &self,
fast_field_wrt: &mut WritePtr, fast_field_wrt: &mut WritePtr,
@@ -552,7 +567,41 @@ impl IndexMerger {
continue; continue;
} }
field_serializer.new_term(term_bytes, total_doc_freq)?; // This should never happen as we early exited for total_doc_freq == 0.
assert!(!segment_postings_containing_the_term.is_empty());
let has_term_freq = {
let has_term_freq = !segment_postings_containing_the_term[0]
.1
.block_cursor
.freqs()
.is_empty();
for (_, postings) in &segment_postings_containing_the_term[1..] {
// This may look at a strange way to test whether we have term freq or not.
// With JSON object, the schema is not sufficient to know whether a term
// has its term frequency encoded or not:
// strings may have term frequencies, while number terms never have one.
//
// Ideally, we should have burnt one bit of two in the `TermInfo`.
// However, we preferred not changing the codec too much and detect this
// instead by
// - looking at the size of the skip data for bitpacked blocks
// - observing the absence of remaining data after reading the docs for vint
// blocks.
//
// Overall the reliable way to know if we have actual frequencies loaded or not
// is to check whether the actual decoded array is empty or not.
if has_term_freq != !postings.block_cursor.freqs().is_empty() {
return Err(DataCorruption::comment_only(
"Term freqs are inconsistent across segments",
)
.into());
}
}
has_term_freq
};
field_serializer.new_term(term_bytes, total_doc_freq, has_term_freq)?;
// We can now serialize this postings, by pushing each document to the // We can now serialize this postings, by pushing each document to the
// postings serializer. // postings serializer.
@@ -567,8 +616,13 @@ impl IndexMerger {
if let Some(remapped_doc_id) = old_to_new_doc_id[doc as usize] { if let Some(remapped_doc_id) = old_to_new_doc_id[doc as usize] {
// we make sure to only write the term if // we make sure to only write the term if
// there is at least one document. // there is at least one document.
let term_freq = segment_postings.term_freq(); let term_freq = if has_term_freq {
segment_postings.positions(&mut positions_buffer); segment_postings.positions(&mut positions_buffer);
segment_postings.term_freq()
} else {
0u32
};
// if doc_id_mapping exists, the doc_ids are reordered, they are // if doc_id_mapping exists, the doc_ids are reordered, they are
// not just stacked. The field serializer expects monotonically increasing // not just stacked. The field serializer expects monotonically increasing
// doc_ids, so we collect and sort them first, before writing. // doc_ids, so we collect and sort them first, before writing.
@@ -734,6 +788,7 @@ impl IndexMerger {
self.write_storable_fields(serializer.get_store_writer(), &doc_id_mapping)?; self.write_storable_fields(serializer.get_store_writer(), &doc_id_mapping)?;
debug!("write-fastfields"); debug!("write-fastfields");
self.write_fast_fields(serializer.get_fast_field_write(), doc_id_mapping)?; self.write_fast_fields(serializer.get_fast_field_write(), doc_id_mapping)?;
self.write_field_list(serializer.get_field_list_write())?;
debug!("close-serializer"); debug!("close-serializer");
serializer.close()?; serializer.close()?;

View File

@@ -59,10 +59,13 @@ type AddBatchReceiver<D> = channel::Receiver<AddBatch<D>>;
#[cfg(test)] #[cfg(test)]
mod tests_mmap { mod tests_mmap {
use crate::collector::Count; use crate::aggregation::agg_req::Aggregations;
use crate::query::QueryParser; use crate::aggregation::agg_result::AggregationResults;
use crate::schema::{JsonObjectOptions, Schema, Type, TEXT}; use crate::aggregation::AggregationCollector;
use crate::{Index, IndexWriter, Term}; use crate::collector::{Count, TopDocs};
use crate::query::{AllQuery, QueryParser};
use crate::schema::{JsonObjectOptions, Schema, Type, FAST, INDEXED, STORED, TEXT};
use crate::{FieldMetadata, Index, IndexWriter, Term};
#[test] #[test]
fn test_advance_delete_bug() -> crate::Result<()> { fn test_advance_delete_bug() -> crate::Result<()> {
@@ -173,8 +176,7 @@ mod tests_mmap {
#[test] #[test]
fn test_json_field_list_fields() { fn test_json_field_list_fields() {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let json_options: JsonObjectOptions = let json_options: JsonObjectOptions = JsonObjectOptions::from(TEXT);
JsonObjectOptions::from(TEXT).set_expand_dots_enabled();
let json_field = schema_builder.add_json_field("json", json_options); let json_field = schema_builder.add_json_field("json", json_options);
let index = Index::create_in_ram(schema_builder.build()); let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests().unwrap(); let mut index_writer = index.writer_for_tests().unwrap();
@@ -193,9 +195,9 @@ mod tests_mmap {
let reader = &searcher.segment_readers()[0]; let reader = &searcher.segment_readers()[0];
let inverted_index = reader.inverted_index(json_field).unwrap(); let inverted_index = reader.inverted_index(json_field).unwrap();
assert_eq!( assert_eq!(
inverted_index.list_fields().unwrap(), inverted_index.list_encoded_fields().unwrap(),
[ [
("k8s\u{1}container\u{1}name".to_string(), Type::Str), ("k8s.container.name".to_string(), Type::Str),
("sub\u{1}a".to_string(), Type::I64), ("sub\u{1}a".to_string(), Type::I64),
("sub\u{1}b".to_string(), Type::I64), ("sub\u{1}b".to_string(), Type::I64),
("suber\u{1}a".to_string(), Type::I64), ("suber\u{1}a".to_string(), Type::I64),
@@ -205,4 +207,240 @@ mod tests_mmap {
] ]
); );
} }
#[test]
fn test_json_fields_metadata_expanded_dots_one_segment() {
test_json_fields_metadata(true, true);
}
#[test]
fn test_json_fields_metadata_expanded_dots_multi_segment() {
test_json_fields_metadata(true, false);
}
#[test]
fn test_json_fields_metadata_no_expanded_dots_one_segment() {
test_json_fields_metadata(false, true);
}
#[test]
fn test_json_fields_metadata_no_expanded_dots_multi_segment() {
test_json_fields_metadata(false, false);
}
fn test_json_fields_metadata(expanded_dots: bool, one_segment: bool) {
use pretty_assertions::assert_eq;
let mut schema_builder = Schema::builder();
let json_options: JsonObjectOptions =
JsonObjectOptions::from(TEXT).set_fast(None).set_stored();
let json_options = if expanded_dots {
json_options.set_expand_dots_enabled()
} else {
json_options
};
schema_builder.add_json_field("json.confusing", json_options.clone());
let json_field = schema_builder.add_json_field("json.shadow", json_options.clone());
let json_field2 = schema_builder.add_json_field("json", json_options.clone());
schema_builder.add_json_field("empty_json", json_options);
let number_field = schema_builder.add_u64_field("numbers", FAST);
schema_builder.add_u64_field("empty", FAST | INDEXED | STORED);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests().unwrap();
let json =
serde_json::json!({"k8s.container.name": "a", "val": "a", "sub": {"a": 1, "b": 1}});
index_writer.add_document(doc!(json_field=>json)).unwrap();
let json =
serde_json::json!({"k8s.container.name": "a", "val": "a", "suber": {"a": 1, "b": 1}});
if !one_segment {
index_writer.commit().unwrap();
}
index_writer.add_document(doc!(json_field=>json)).unwrap();
let json = serde_json::json!({"k8s.container.name": "a", "k8s.container.name": "a", "val": "a", "suber": {"a": "a", "b": 1}});
index_writer
.add_document(doc!(number_field => 50u64, json_field=>json, json_field2=>json!({"shadow": {"val": "a"}})))
.unwrap();
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
assert_eq!(searcher.num_docs(), 3);
let fields_metadata = index.fields_metadata().unwrap();
assert_eq!(
fields_metadata,
[
FieldMetadata {
field_name: "empty".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::U64
},
FieldMetadata {
field_name: if expanded_dots {
"json.shadow.k8s.container.name".to_string()
} else {
"json.shadow.k8s\\.container\\.name".to_string()
},
indexed: true,
stored: true,
fast: true,
typ: Type::Str
},
FieldMetadata {
field_name: "json.shadow.sub.a".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::I64
},
FieldMetadata {
field_name: "json.shadow.sub.b".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::I64
},
FieldMetadata {
field_name: "json.shadow.suber.a".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::I64
},
FieldMetadata {
field_name: "json.shadow.suber.a".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::Str
},
FieldMetadata {
field_name: "json.shadow.suber.b".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::I64
},
FieldMetadata {
field_name: "json.shadow.val".to_string(),
indexed: true,
stored: true,
fast: true,
typ: Type::Str
},
FieldMetadata {
field_name: "numbers".to_string(),
indexed: false,
stored: false,
fast: true,
typ: Type::U64
}
]
);
let query_parser = QueryParser::for_index(&index, vec![]);
// Test if returned field name can be queried
for indexed_field in fields_metadata.iter().filter(|meta| meta.indexed) {
let val = if indexed_field.typ == Type::Str {
"a"
} else {
"1"
};
let query_str = &format!("{}:{}", indexed_field.field_name, val);
let query = query_parser.parse_query(query_str).unwrap();
let count_docs = searcher.search(&*query, &TopDocs::with_limit(2)).unwrap();
if indexed_field.field_name.contains("empty") || indexed_field.typ == Type::Json {
assert_eq!(count_docs.len(), 0);
} else {
assert!(!count_docs.is_empty(), "{}", indexed_field.field_name);
}
}
// Test if returned field name can be used for aggregation
for fast_field in fields_metadata.iter().filter(|meta| meta.fast) {
let agg_req_str = json!(
{
"termagg": {
"terms": {
"field": fast_field.field_name,
}
}
});
let agg_req: Aggregations = serde_json::from_value(agg_req_str).unwrap();
let collector = AggregationCollector::from_aggs(agg_req, Default::default());
let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
let res = serde_json::to_value(agg_res).unwrap();
if !fast_field.field_name.contains("empty") && fast_field.typ != Type::Json {
assert!(
!res["termagg"]["buckets"].as_array().unwrap().is_empty(),
"{}",
fast_field.field_name
);
}
}
}
#[test]
fn test_json_field_shadowing_field_name_bug() {
/// This test is only there to display a bug on addressing a field if it gets shadowed
/// The issues only occurs if the field name that shadows contains a dot.
///
/// Happens independently of the `expand_dots` option. Since that option does not
/// affect the field name itself.
use pretty_assertions::assert_eq;
let mut schema_builder = Schema::builder();
let json_options: JsonObjectOptions =
JsonObjectOptions::from(TEXT).set_fast(None).set_stored();
// let json_options = json_options.set_expand_dots_enabled();
let json_field_shadow = schema_builder.add_json_field("json.shadow", json_options.clone());
let json_field = schema_builder.add_json_field("json", json_options.clone());
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests().unwrap();
index_writer
.add_document(
doc!(json_field_shadow=>json!({"val": "b"}), json_field=>json!({"shadow": {"val": "a"}})),
)
.unwrap();
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let fields_and_vals = vec![
// Only way to address or it gets shadowed by `json.shadow` field
("json.shadow\u{1}val".to_string(), "a"), // Succeeds
//("json.shadow.val".to_string(), "a"), // Fails
("json.shadow.val".to_string(), "b"), // Succeeds
];
let query_parser = QueryParser::for_index(&index, vec![]);
// Test if field name can be queried
for (indexed_field, val) in fields_and_vals.iter() {
let query_str = &format!("{}:{}", indexed_field, val);
let query = query_parser.parse_query(query_str).unwrap();
let count_docs = searcher.search(&*query, &TopDocs::with_limit(2)).unwrap();
assert!(!count_docs.is_empty(), "{}:{}", indexed_field, val);
}
// Test if field name can be used for aggregation
for (field_name, val) in fields_and_vals.iter() {
let agg_req_str = json!(
{
"termagg": {
"terms": {
"field": field_name,
}
}
});
let agg_req: Aggregations = serde_json::from_value(agg_req_str).unwrap();
let collector = AggregationCollector::from_aggs(agg_req, Default::default());
let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
let res = serde_json::to_value(agg_res).unwrap();
assert_eq!(
res["termagg"]["buckets"].as_array().unwrap()[0]["key"]
.as_str()
.unwrap(),
*val,
"{}",
field_name
);
}
}
} }

View File

@@ -1,3 +1,5 @@
use columnar::ColumnType;
use common::TinySet;
use fnv::FnvHashMap; use fnv::FnvHashMap;
/// `Field` is represented by an unsigned 32-bit integer type. /// `Field` is represented by an unsigned 32-bit integer type.
@@ -24,34 +26,44 @@ impl From<u32> for OrderedPathId {
#[derive(Default)] #[derive(Default)]
pub(crate) struct PathToUnorderedId { pub(crate) struct PathToUnorderedId {
map: FnvHashMap<String, u32>, /// TinySet contains the type codes of the columns in the path.
map: FnvHashMap<String, (u32, TinySet)>,
} }
impl PathToUnorderedId { impl PathToUnorderedId {
#[inline] #[inline]
pub(crate) fn get_or_allocate_unordered_id(&mut self, path: &str) -> u32 { pub(crate) fn get_or_allocate_unordered_id(&mut self, path: &str, typ: ColumnType) -> u32 {
if let Some(id) = self.map.get(path) { let code_bit = typ.to_code();
if let Some((id, all_codes)) = self.map.get_mut(path) {
*all_codes = all_codes.insert(code_bit as u32);
return *id; return *id;
} }
self.insert_new_path(path) self.insert_new_path(path, code_bit)
} }
#[cold] #[cold]
fn insert_new_path(&mut self, path: &str) -> u32 { fn insert_new_path(&mut self, path: &str, typ_code: u8) -> u32 {
let next_id = self.map.len() as u32; let next_id = self.map.len() as u32;
self.map.insert(path.to_string(), next_id); self.map.insert(
path.to_string(),
(next_id, TinySet::singleton(typ_code as u32)),
);
next_id next_id
} }
/// Retuns ids which reflect the lexical order of the paths. /// Retuns ids which reflect the lexical order of the paths.
/// ///
/// The returned vec can be indexed with the unordered id to get the ordered id. /// The returned vec can be indexed with the unordered id to get the ordered id.
pub(crate) fn unordered_id_to_ordered_id(&self) -> Vec<OrderedPathId> { pub(crate) fn unordered_id_to_ordered_id(&self) -> Vec<(OrderedPathId, TinySet)> {
let mut sorted_ids: Vec<(&str, &u32)> = let mut sorted_ids: Vec<(&str, (u32, TinySet))> = self
self.map.iter().map(|(k, v)| (k.as_str(), v)).collect(); .map
.iter()
.map(|(k, (id, typ_code))| (k.as_str(), (*id, *typ_code)))
.collect();
sorted_ids.sort_unstable_by_key(|(path, _)| *path); sorted_ids.sort_unstable_by_key(|(path, _)| *path);
let mut result = vec![OrderedPathId::default(); sorted_ids.len()]; let mut result = vec![(OrderedPathId::default(), TinySet::empty()); sorted_ids.len()];
for (ordered, unordered) in sorted_ids.iter().map(|(_k, v)| v).enumerate() { for (ordered, (unordered, typ_code)) in sorted_ids.iter().map(|(_k, v)| v).enumerate() {
result[**unordered as usize] = OrderedPathId::from_ordered_id(ordered as u32); result[*unordered as usize] =
(OrderedPathId::from_ordered_id(ordered as u32), *typ_code);
} }
result result
} }
@@ -74,12 +86,12 @@ mod tests {
let terms = vec!["b", "a", "b", "c"]; let terms = vec!["b", "a", "b", "c"];
let ids = terms let ids = terms
.iter() .iter()
.map(|term| path_to_id.get_or_allocate_unordered_id(term)) .map(|term| path_to_id.get_or_allocate_unordered_id(term, ColumnType::Str))
.collect::<Vec<u32>>(); .collect::<Vec<u32>>();
assert_eq!(ids, vec![0, 1, 0, 2]); assert_eq!(ids, vec![0, 1, 0, 2]);
let ordered_ids = ids let ordered_ids = ids
.iter() .iter()
.map(|id| path_to_id.unordered_id_to_ordered_id()[*id as usize]) .map(|id| path_to_id.unordered_id_to_ordered_id()[*id as usize].0)
.collect::<Vec<OrderedPathId>>(); .collect::<Vec<OrderedPathId>>();
assert_eq!(ordered_ids, vec![1.into(), 0.into(), 1.into(), 2.into()]); assert_eq!(ordered_ids, vec![1.into(), 0.into(), 1.into(), 2.into()]);
// Fetch terms // Fetch terms

View File

@@ -12,6 +12,7 @@ pub struct SegmentSerializer {
segment: Segment, segment: Segment,
pub(crate) store_writer: StoreWriter, pub(crate) store_writer: StoreWriter,
fast_field_write: WritePtr, fast_field_write: WritePtr,
field_list_write: WritePtr,
fieldnorms_serializer: Option<FieldNormsSerializer>, fieldnorms_serializer: Option<FieldNormsSerializer>,
postings_serializer: InvertedIndexSerializer, postings_serializer: InvertedIndexSerializer,
} }
@@ -49,6 +50,7 @@ impl SegmentSerializer {
}; };
let fast_field_write = segment.open_write(SegmentComponent::FastFields)?; let fast_field_write = segment.open_write(SegmentComponent::FastFields)?;
let field_list_write = segment.open_write(SegmentComponent::FieldList)?;
let fieldnorms_write = segment.open_write(SegmentComponent::FieldNorms)?; let fieldnorms_write = segment.open_write(SegmentComponent::FieldNorms)?;
let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?; let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?;
@@ -58,6 +60,7 @@ impl SegmentSerializer {
segment, segment,
store_writer, store_writer,
fast_field_write, fast_field_write,
field_list_write,
fieldnorms_serializer: Some(fieldnorms_serializer), fieldnorms_serializer: Some(fieldnorms_serializer),
postings_serializer, postings_serializer,
}) })
@@ -81,6 +84,11 @@ impl SegmentSerializer {
&mut self.postings_serializer &mut self.postings_serializer
} }
/// Accessor to the ``.
pub fn get_field_list_write(&mut self) -> &mut WritePtr {
&mut self.field_list_write
}
/// Accessor to the `FastFieldSerializer`. /// Accessor to the `FastFieldSerializer`.
pub fn get_fast_field_write(&mut self) -> &mut WritePtr { pub fn get_fast_field_write(&mut self) -> &mut WritePtr {
&mut self.fast_field_write &mut self.fast_field_write
@@ -104,6 +112,7 @@ impl SegmentSerializer {
fieldnorms_serializer.close()?; fieldnorms_serializer.close()?;
} }
self.fast_field_write.terminate()?; self.fast_field_write.terminate()?;
self.field_list_write.terminate()?;
self.postings_serializer.close()?; self.postings_serializer.close()?;
self.store_writer.close()?; self.store_writer.close()?;
Ok(()) Ok(())

View File

@@ -8,6 +8,7 @@ use super::operation::AddOperation;
use crate::core::json_utils::index_json_values; use crate::core::json_utils::index_json_values;
use crate::core::Segment; use crate::core::Segment;
use crate::fastfield::FastFieldsWriter; use crate::fastfield::FastFieldsWriter;
use crate::field_list::serialize_segment_fields;
use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter}; use crate::fieldnorm::{FieldNormReaders, FieldNormsWriter};
use crate::indexer::segment_serializer::SegmentSerializer; use crate::indexer::segment_serializer::SegmentSerializer;
use crate::postings::{ use crate::postings::{
@@ -443,16 +444,29 @@ fn remap_and_write(
.segment() .segment()
.open_read(SegmentComponent::FieldNorms)?; .open_read(SegmentComponent::FieldNorms)?;
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?; let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
let unordered_id_to_ordered_id = ctx.path_to_unordered_id.unordered_id_to_ordered_id();
serialize_postings( serialize_postings(
ctx, &ctx,
schema, schema.clone(),
per_field_postings_writers, per_field_postings_writers,
fieldnorm_readers, fieldnorm_readers,
doc_id_map, doc_id_map,
&unordered_id_to_ordered_id,
serializer.get_postings_serializer(), serializer.get_postings_serializer(),
)?; )?;
debug!("fastfield-serialize"); debug!("fastfield-serialize");
fast_field_writers.serialize(serializer.get_fast_field_write(), doc_id_map)?; let columns = fast_field_writers.serialize(serializer.get_fast_field_write(), doc_id_map)?;
let field_list_serializer = serializer.get_field_list_write();
serialize_segment_fields(
ctx,
field_list_serializer,
&schema,
&unordered_id_to_ordered_id,
columns,
)?;
// finalize temp docstore and create version, which reflects the doc_id_map // finalize temp docstore and create version, which reflects the doc_id_map
if let Some(doc_id_map) = doc_id_map { if let Some(doc_id_map) = doc_id_map {

View File

@@ -188,6 +188,7 @@ pub mod aggregation;
pub mod collector; pub mod collector;
pub mod directory; pub mod directory;
pub mod fastfield; pub mod fastfield;
pub mod field_list;
pub mod fieldnorm; pub mod fieldnorm;
pub mod positions; pub mod positions;
pub mod postings; pub mod postings;
@@ -221,9 +222,9 @@ pub use self::snippet::{Snippet, SnippetGenerator};
#[doc(hidden)] #[doc(hidden)]
pub use crate::core::json_utils; pub use crate::core::json_utils;
pub use crate::core::{ pub use crate::core::{
Executor, Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, InvertedIndexReader, merge_field_meta_data, Executor, FieldMetadata, Index, IndexBuilder, IndexMeta, IndexSettings,
Order, Searcher, SearcherGeneration, Segment, SegmentComponent, SegmentId, SegmentMeta, IndexSortByField, InvertedIndexReader, Order, Searcher, SearcherGeneration, Segment,
SegmentReader, SingleSegmentIndexWriter, SegmentComponent, SegmentId, SegmentMeta, SegmentReader, SingleSegmentIndexWriter,
}; };
pub use crate::directory::Directory; pub use crate::directory::Directory;
pub use crate::indexer::IndexWriter; pub use crate::indexer::IndexWriter;
@@ -238,7 +239,9 @@ pub use crate::schema::DatePrecision;
pub use crate::schema::{DateOptions, DateTimePrecision, Document, TantivyDocument, Term}; pub use crate::schema::{DateOptions, DateTimePrecision, Document, TantivyDocument, Term};
/// Index format version. /// Index format version.
const INDEX_FORMAT_VERSION: u32 = 6; ///
/// Version 7: Add `.fieldlist` file containing the list of fields in a segment.
const INDEX_FORMAT_VERSION: u32 = 7;
/// Oldest index format version this tantivy version can read. /// Oldest index format version this tantivy version can read.
const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4; const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4;

View File

@@ -11,6 +11,10 @@ use crate::schema::{Field, Type, JSON_END_OF_PATH};
use crate::tokenizer::TokenStream; use crate::tokenizer::TokenStream;
use crate::{DocId, Term}; use crate::{DocId, Term};
/// The `JsonPostingsWriter` is odd in that it relies on a hidden contract:
///
/// `subscribe` is called directly to index non-text tokens, while
/// `index_text` is used to index text.
#[derive(Default)] #[derive(Default)]
pub(crate) struct JsonPostingsWriter<Rec: Recorder> { pub(crate) struct JsonPostingsWriter<Rec: Recorder> {
str_posting_writer: SpecializedPostingsWriter<Rec>, str_posting_writer: SpecializedPostingsWriter<Rec>,

View File

@@ -63,7 +63,7 @@ pub mod tests {
let mut segment = index.new_segment(); let mut segment = index.new_segment();
let mut posting_serializer = InvertedIndexSerializer::open(&mut segment)?; let mut posting_serializer = InvertedIndexSerializer::open(&mut segment)?;
let mut field_serializer = posting_serializer.new_field(text_field, 120 * 4, None)?; let mut field_serializer = posting_serializer.new_field(text_field, 120 * 4, None)?;
field_serializer.new_term("abc".as_bytes(), 12u32)?; field_serializer.new_term("abc".as_bytes(), 12u32, true)?;
for doc_id in 0u32..120u32 { for doc_id in 0u32..120u32 {
let delta_positions = vec![1, 2, 3, 2]; let delta_positions = vec![1, 2, 3, 2];
field_serializer.write_doc(doc_id, 4, &delta_positions); field_serializer.write_doc(doc_id, 4, &delta_positions);

View File

@@ -2,6 +2,7 @@ use std::io;
use std::marker::PhantomData; use std::marker::PhantomData;
use std::ops::Range; use std::ops::Range;
use common::TinySet;
use stacker::Addr; use stacker::Addr;
use crate::fieldnorm::FieldNormReaders; use crate::fieldnorm::FieldNormReaders;
@@ -46,37 +47,38 @@ fn make_field_partition(
/// It pushes all term, one field at a time, towards the /// It pushes all term, one field at a time, towards the
/// postings serializer. /// postings serializer.
pub(crate) fn serialize_postings( pub(crate) fn serialize_postings(
ctx: IndexingContext, ctx: &IndexingContext,
schema: Schema, schema: Schema,
per_field_postings_writers: &PerFieldPostingsWriter, per_field_postings_writers: &PerFieldPostingsWriter,
fieldnorm_readers: FieldNormReaders, fieldnorm_readers: FieldNormReaders,
doc_id_map: Option<&DocIdMapping>, doc_id_map: Option<&DocIdMapping>,
unordered_id_to_ordered_id: &[(OrderedPathId, TinySet)],
serializer: &mut InvertedIndexSerializer, serializer: &mut InvertedIndexSerializer,
) -> crate::Result<()> { ) -> crate::Result<()> {
// Replace unordered ids by ordered ids to be able to sort // Replace unordered ids by ordered ids to be able to sort
let unordered_id_to_ordered_id: Vec<OrderedPathId> = let ordered_id_to_path = ctx.path_to_unordered_id.ordered_id_to_path();
ctx.path_to_unordered_id.unordered_id_to_ordered_id();
let mut term_offsets: Vec<(Field, OrderedPathId, &[u8], Addr)> = let mut term_offsets: Vec<(Field, OrderedPathId, &[u8], Addr)> =
Vec::with_capacity(ctx.term_index.len()); Vec::with_capacity(ctx.term_index.len());
term_offsets.extend(ctx.term_index.iter().map(|(key, addr)| { for (key, addr) in ctx.term_index.iter() {
let field = Term::wrap(key).field(); let field = Term::wrap(key).field();
if schema.get_field_entry(field).field_type().value_type() == Type::Json { let field_entry = schema.get_field_entry(field);
let byte_range_path = 5..5 + 4; if field_entry.field_type().value_type() == Type::Json {
let unordered_id = u32::from_be_bytes(key[byte_range_path.clone()].try_into().unwrap()); let byte_range_unordered_id = 5..5 + 4;
let path_id = unordered_id_to_ordered_id[unordered_id as usize]; let unordered_id =
(field, path_id, &key[byte_range_path.end..], addr) u32::from_be_bytes(key[byte_range_unordered_id.clone()].try_into().unwrap());
let (path_id, _typ_code_bitvec) = unordered_id_to_ordered_id[unordered_id as usize];
term_offsets.push((field, path_id, &key[byte_range_unordered_id.end..], addr));
} else { } else {
(field, 0.into(), &key[5..], addr) term_offsets.push((field, 0.into(), &key[5..], addr));
}
} }
}));
// Sort by field, path, and term // Sort by field, path, and term
term_offsets.sort_unstable_by( term_offsets.sort_unstable_by(
|(field1, path_id1, bytes1, _), (field2, path_id2, bytes2, _)| { |(field1, path_id1, bytes1, _), (field2, path_id2, bytes2, _)| {
(field1, path_id1, bytes1).cmp(&(field2, path_id2, bytes2)) (field1, path_id1, bytes1).cmp(&(field2, path_id2, bytes2))
}, },
); );
let ordered_id_to_path = ctx.path_to_unordered_id.ordered_id_to_path();
let field_offsets = make_field_partition(&term_offsets); let field_offsets = make_field_partition(&term_offsets);
for (field, byte_offsets) in field_offsets { for (field, byte_offsets) in field_offsets {
let postings_writer = per_field_postings_writers.get_for_field(field); let postings_writer = per_field_postings_writers.get_for_field(field);
@@ -87,7 +89,7 @@ pub(crate) fn serialize_postings(
&term_offsets[byte_offsets], &term_offsets[byte_offsets],
&ordered_id_to_path, &ordered_id_to_path,
doc_id_map, doc_id_map,
&ctx, ctx,
&mut field_serializer, &mut field_serializer,
)?; )?;
field_serializer.close()?; field_serializer.close()?;
@@ -194,7 +196,7 @@ impl<Rec: Recorder> SpecializedPostingsWriter<Rec> {
) -> io::Result<()> { ) -> io::Result<()> {
let recorder: Rec = ctx.term_index.read(addr); let recorder: Rec = ctx.term_index.read(addr);
let term_doc_freq = recorder.term_doc_freq().unwrap_or(0u32); let term_doc_freq = recorder.term_doc_freq().unwrap_or(0u32);
serializer.new_term(term, term_doc_freq)?; serializer.new_term(term, term_doc_freq, recorder.has_term_freq())?;
recorder.serialize(&ctx.arena, doc_id_map, serializer, buffer_lender); recorder.serialize(&ctx.arena, doc_id_map, serializer, buffer_lender);
serializer.close_term()?; serializer.close_term()?;
Ok(()) Ok(())

View File

@@ -79,24 +79,20 @@ pub(crate) trait Recorder: Copy + Default + Send + Sync + 'static {
/// ///
/// Returns `None` if not available. /// Returns `None` if not available.
fn term_doc_freq(&self) -> Option<u32>; fn term_doc_freq(&self) -> Option<u32>;
#[inline]
fn has_term_freq(&self) -> bool {
true
}
} }
/// Only records the doc ids /// Only records the doc ids
#[derive(Clone, Copy)] #[derive(Clone, Copy, Default)]
pub struct DocIdRecorder { pub struct DocIdRecorder {
stack: ExpUnrolledLinkedList, stack: ExpUnrolledLinkedList,
current_doc: DocId, current_doc: DocId,
} }
impl Default for DocIdRecorder {
fn default() -> Self {
DocIdRecorder {
stack: ExpUnrolledLinkedList::default(),
current_doc: u32::MAX,
}
}
}
impl Recorder for DocIdRecorder { impl Recorder for DocIdRecorder {
#[inline] #[inline]
fn current_doc(&self) -> DocId { fn current_doc(&self) -> DocId {
@@ -105,8 +101,9 @@ impl Recorder for DocIdRecorder {
#[inline] #[inline]
fn new_doc(&mut self, doc: DocId, arena: &mut MemoryArena) { fn new_doc(&mut self, doc: DocId, arena: &mut MemoryArena) {
let delta = doc - self.current_doc;
self.current_doc = doc; self.current_doc = doc;
self.stack.writer(arena).write_u32_vint(doc); self.stack.writer(arena).write_u32_vint(delta);
} }
#[inline] #[inline]
@@ -123,21 +120,20 @@ impl Recorder for DocIdRecorder {
buffer_lender: &mut BufferLender, buffer_lender: &mut BufferLender,
) { ) {
let (buffer, doc_ids) = buffer_lender.lend_all(); let (buffer, doc_ids) = buffer_lender.lend_all();
self.stack.read_to_end(arena, buffer);
// TODO avoid reading twice. // TODO avoid reading twice.
self.stack.read_to_end(arena, buffer);
if let Some(doc_id_map) = doc_id_map { if let Some(doc_id_map) = doc_id_map {
doc_ids.extend( let iter = get_sum_reader(VInt32Reader::new(&buffer[..]));
VInt32Reader::new(&buffer[..]) doc_ids.extend(iter.map(|old_doc_id| doc_id_map.get_new_doc_id(old_doc_id)));
.map(|old_doc_id| doc_id_map.get_new_doc_id(old_doc_id)),
);
doc_ids.sort_unstable(); doc_ids.sort_unstable();
for doc in doc_ids { for doc in doc_ids {
serializer.write_doc(*doc, 0u32, &[][..]); serializer.write_doc(*doc, 0u32, &[][..]);
} }
} else { } else {
for doc in VInt32Reader::new(&buffer[..]) { let iter = get_sum_reader(VInt32Reader::new(&buffer[..]));
serializer.write_doc(doc, 0u32, &[][..]); for doc_id in iter {
serializer.write_doc(doc_id, 0u32, &[][..]);
} }
} }
} }
@@ -145,6 +141,19 @@ impl Recorder for DocIdRecorder {
fn term_doc_freq(&self) -> Option<u32> { fn term_doc_freq(&self) -> Option<u32> {
None None
} }
fn has_term_freq(&self) -> bool {
false
}
}
/// Takes an Iterator of delta encoded elements and returns an iterator
/// that yields the sum of the elements.
fn get_sum_reader(iter: impl Iterator<Item = u32>) -> impl Iterator<Item = u32> {
iter.scan(0, |state, delta| {
*state += delta;
Some(*state)
})
} }
/// Recorder encoding document ids, and term frequencies /// Recorder encoding document ids, and term frequencies
@@ -164,9 +173,10 @@ impl Recorder for TermFrequencyRecorder {
#[inline] #[inline]
fn new_doc(&mut self, doc: DocId, arena: &mut MemoryArena) { fn new_doc(&mut self, doc: DocId, arena: &mut MemoryArena) {
let delta = doc - self.current_doc;
self.term_doc_freq += 1; self.term_doc_freq += 1;
self.current_doc = doc; self.current_doc = doc;
self.stack.writer(arena).write_u32_vint(doc); self.stack.writer(arena).write_u32_vint(delta);
} }
#[inline] #[inline]
@@ -193,9 +203,12 @@ impl Recorder for TermFrequencyRecorder {
let mut u32_it = VInt32Reader::new(&buffer[..]); let mut u32_it = VInt32Reader::new(&buffer[..]);
if let Some(doc_id_map) = doc_id_map { if let Some(doc_id_map) = doc_id_map {
let mut doc_id_and_tf = vec![]; let mut doc_id_and_tf = vec![];
while let Some(old_doc_id) = u32_it.next() { let mut prev_doc = 0;
while let Some(delta_doc_id) = u32_it.next() {
let doc_id = prev_doc + delta_doc_id;
prev_doc = doc_id;
let term_freq = u32_it.next().unwrap_or(self.current_tf); let term_freq = u32_it.next().unwrap_or(self.current_tf);
doc_id_and_tf.push((doc_id_map.get_new_doc_id(old_doc_id), term_freq)); doc_id_and_tf.push((doc_id_map.get_new_doc_id(doc_id), term_freq));
} }
doc_id_and_tf.sort_unstable_by_key(|&(doc_id, _)| doc_id); doc_id_and_tf.sort_unstable_by_key(|&(doc_id, _)| doc_id);
@@ -203,9 +216,12 @@ impl Recorder for TermFrequencyRecorder {
serializer.write_doc(doc_id, tf, &[][..]); serializer.write_doc(doc_id, tf, &[][..]);
} }
} else { } else {
while let Some(doc) = u32_it.next() { let mut prev_doc = 0;
while let Some(delta_doc_id) = u32_it.next() {
let doc_id = prev_doc + delta_doc_id;
prev_doc = doc_id;
let term_freq = u32_it.next().unwrap_or(self.current_tf); let term_freq = u32_it.next().unwrap_or(self.current_tf);
serializer.write_doc(doc, term_freq, &[][..]); serializer.write_doc(doc_id, term_freq, &[][..]);
} }
} }
} }
@@ -216,23 +232,13 @@ impl Recorder for TermFrequencyRecorder {
} }
/// Recorder encoding term frequencies as well as positions. /// Recorder encoding term frequencies as well as positions.
#[derive(Clone, Copy)] #[derive(Clone, Copy, Default)]
pub struct TfAndPositionRecorder { pub struct TfAndPositionRecorder {
stack: ExpUnrolledLinkedList, stack: ExpUnrolledLinkedList,
current_doc: DocId, current_doc: DocId,
term_doc_freq: u32, term_doc_freq: u32,
} }
impl Default for TfAndPositionRecorder {
fn default() -> Self {
TfAndPositionRecorder {
stack: ExpUnrolledLinkedList::default(),
current_doc: u32::MAX,
term_doc_freq: 0u32,
}
}
}
impl Recorder for TfAndPositionRecorder { impl Recorder for TfAndPositionRecorder {
#[inline] #[inline]
fn current_doc(&self) -> DocId { fn current_doc(&self) -> DocId {
@@ -241,9 +247,10 @@ impl Recorder for TfAndPositionRecorder {
#[inline] #[inline]
fn new_doc(&mut self, doc: DocId, arena: &mut MemoryArena) { fn new_doc(&mut self, doc: DocId, arena: &mut MemoryArena) {
let delta = doc - self.current_doc;
self.current_doc = doc; self.current_doc = doc;
self.term_doc_freq += 1u32; self.term_doc_freq += 1u32;
self.stack.writer(arena).write_u32_vint(doc); self.stack.writer(arena).write_u32_vint(delta);
} }
#[inline] #[inline]
@@ -269,7 +276,10 @@ impl Recorder for TfAndPositionRecorder {
self.stack.read_to_end(arena, buffer_u8); self.stack.read_to_end(arena, buffer_u8);
let mut u32_it = VInt32Reader::new(&buffer_u8[..]); let mut u32_it = VInt32Reader::new(&buffer_u8[..]);
let mut doc_id_and_positions = vec![]; let mut doc_id_and_positions = vec![];
while let Some(doc) = u32_it.next() { let mut prev_doc = 0;
while let Some(delta_doc_id) = u32_it.next() {
let doc_id = prev_doc + delta_doc_id;
prev_doc = doc_id;
let mut prev_position_plus_one = 1u32; let mut prev_position_plus_one = 1u32;
buffer_positions.clear(); buffer_positions.clear();
loop { loop {
@@ -287,9 +297,9 @@ impl Recorder for TfAndPositionRecorder {
if let Some(doc_id_map) = doc_id_map { if let Some(doc_id_map) = doc_id_map {
// this simple variant to remap may consume to much memory // this simple variant to remap may consume to much memory
doc_id_and_positions doc_id_and_positions
.push((doc_id_map.get_new_doc_id(doc), buffer_positions.to_vec())); .push((doc_id_map.get_new_doc_id(doc_id), buffer_positions.to_vec()));
} else { } else {
serializer.write_doc(doc, buffer_positions.len() as u32, buffer_positions); serializer.write_doc(doc_id, buffer_positions.len() as u32, buffer_positions);
} }
} }
if doc_id_map.is_some() { if doc_id_map.is_some() {

View File

@@ -71,7 +71,7 @@ impl SegmentPostings {
{ {
let mut postings_serializer = let mut postings_serializer =
PostingsSerializer::new(&mut buffer, 0.0, IndexRecordOption::Basic, None); PostingsSerializer::new(&mut buffer, 0.0, IndexRecordOption::Basic, None);
postings_serializer.new_term(docs.len() as u32); postings_serializer.new_term(docs.len() as u32, false);
for &doc in docs { for &doc in docs {
postings_serializer.write_doc(doc, 1u32); postings_serializer.write_doc(doc, 1u32);
} }
@@ -120,7 +120,7 @@ impl SegmentPostings {
IndexRecordOption::WithFreqs, IndexRecordOption::WithFreqs,
fieldnorm_reader, fieldnorm_reader,
); );
postings_serializer.new_term(doc_and_tfs.len() as u32); postings_serializer.new_term(doc_and_tfs.len() as u32, true);
for &(doc, tf) in doc_and_tfs { for &(doc, tf) in doc_and_tfs {
postings_serializer.write_doc(doc, tf); postings_serializer.write_doc(doc, tf);
} }
@@ -238,14 +238,18 @@ impl Postings for SegmentPostings {
} }
fn positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) { fn positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) {
let term_freq = self.term_freq() as usize; let term_freq = self.term_freq();
if let Some(position_reader) = self.position_reader.as_mut() { if let Some(position_reader) = self.position_reader.as_mut() {
debug_assert!(
!self.block_cursor.freqs().is_empty(),
"No positions available"
);
let read_offset = self.block_cursor.position_offset() let read_offset = self.block_cursor.position_offset()
+ (self.block_cursor.freqs()[..self.cur] + (self.block_cursor.freqs()[..self.cur]
.iter() .iter()
.cloned() .cloned()
.sum::<u32>() as u64); .sum::<u32>() as u64);
output.resize(term_freq, 0u32); output.resize(term_freq as usize, 0u32);
position_reader.read(read_offset, &mut output[..]); position_reader.read(read_offset, &mut output[..]);
let mut cum = offset; let mut cum = offset;
for output_mut in output.iter_mut() { for output_mut in output.iter_mut() {

View File

@@ -168,7 +168,12 @@ impl<'a> FieldSerializer<'a> {
/// * term - the term. It needs to come after the previous term according to the lexicographical /// * term - the term. It needs to come after the previous term according to the lexicographical
/// order. /// order.
/// * term_doc_freq - return the number of document containing the term. /// * term_doc_freq - return the number of document containing the term.
pub fn new_term(&mut self, term: &[u8], term_doc_freq: u32) -> io::Result<()> { pub fn new_term(
&mut self,
term: &[u8],
term_doc_freq: u32,
record_term_freq: bool,
) -> io::Result<()> {
assert!( assert!(
!self.term_open, !self.term_open,
"Called new_term, while the previous term was not closed." "Called new_term, while the previous term was not closed."
@@ -177,7 +182,8 @@ impl<'a> FieldSerializer<'a> {
self.postings_serializer.clear(); self.postings_serializer.clear();
self.current_term_info = self.current_term_info(); self.current_term_info = self.current_term_info();
self.term_dictionary_builder.insert_key(term)?; self.term_dictionary_builder.insert_key(term)?;
self.postings_serializer.new_term(term_doc_freq); self.postings_serializer
.new_term(term_doc_freq, record_term_freq);
Ok(()) Ok(())
} }
@@ -330,10 +336,10 @@ impl<W: Write> PostingsSerializer<W> {
} }
} }
pub fn new_term(&mut self, term_doc_freq: u32) { pub fn new_term(&mut self, term_doc_freq: u32, record_term_freq: bool) {
self.bm25_weight = None; self.bm25_weight = None;
self.term_has_freq = self.mode.has_freq() && term_doc_freq != 0; self.term_has_freq = self.mode.has_freq() && record_term_freq;
if !self.term_has_freq { if !self.term_has_freq {
return; return;
} }
@@ -349,7 +355,7 @@ impl<W: Write> PostingsSerializer<W> {
return; return;
} }
self.bm25_weight = Some(Bm25Weight::for_one_term( self.bm25_weight = Some(Bm25Weight::for_one_term_without_explain(
term_doc_freq as u64, term_doc_freq as u64,
num_docs_in_segment, num_docs_in_segment,
self.avg_fieldnorm, self.avg_fieldnorm,

View File

@@ -77,7 +77,7 @@ pub struct Bm25Params {
/// A struct used for computing BM25 scores. /// A struct used for computing BM25 scores.
#[derive(Clone)] #[derive(Clone)]
pub struct Bm25Weight { pub struct Bm25Weight {
idf_explain: Explanation, idf_explain: Option<Explanation>,
weight: Score, weight: Score,
cache: [Score; 256], cache: [Score; 256],
average_fieldnorm: Score, average_fieldnorm: Score,
@@ -147,11 +147,30 @@ impl Bm25Weight {
idf_explain.add_const("N, total number of docs", total_num_docs as Score); idf_explain.add_const("N, total number of docs", total_num_docs as Score);
Bm25Weight::new(idf_explain, avg_fieldnorm) Bm25Weight::new(idf_explain, avg_fieldnorm)
} }
/// Construct a [Bm25Weight] for a single term.
/// This method does not carry the [Explanation] for the idf.
pub fn for_one_term_without_explain(
term_doc_freq: u64,
total_num_docs: u64,
avg_fieldnorm: Score,
) -> Bm25Weight {
let idf = idf(term_doc_freq, total_num_docs);
Bm25Weight::new_without_explain(idf, avg_fieldnorm)
}
pub(crate) fn new(idf_explain: Explanation, average_fieldnorm: Score) -> Bm25Weight { pub(crate) fn new(idf_explain: Explanation, average_fieldnorm: Score) -> Bm25Weight {
let weight = idf_explain.value() * (1.0 + K1); let weight = idf_explain.value() * (1.0 + K1);
Bm25Weight { Bm25Weight {
idf_explain, idf_explain: Some(idf_explain),
weight,
cache: compute_tf_cache(average_fieldnorm),
average_fieldnorm,
}
}
pub(crate) fn new_without_explain(idf: f32, average_fieldnorm: Score) -> Bm25Weight {
let weight = idf * (1.0 + K1);
Bm25Weight {
idf_explain: None,
weight, weight,
cache: compute_tf_cache(average_fieldnorm), cache: compute_tf_cache(average_fieldnorm),
average_fieldnorm, average_fieldnorm,
@@ -202,7 +221,9 @@ impl Bm25Weight {
let mut explanation = Explanation::new("TermQuery, product of...", score); let mut explanation = Explanation::new("TermQuery, product of...", score);
explanation.add_detail(Explanation::new("(K1+1)", K1 + 1.0)); explanation.add_detail(Explanation::new("(K1+1)", K1 + 1.0));
explanation.add_detail(self.idf_explain.clone()); if let Some(idf_explain) = &self.idf_explain {
explanation.add_detail(idf_explain.clone());
}
explanation.add_detail(tf_explanation); explanation.add_detail(tf_explanation);
explanation explanation
} }

View File

@@ -74,7 +74,8 @@ impl Weight for BoostWeight {
fn explain(&self, reader: &SegmentReader, doc: u32) -> crate::Result<Explanation> { fn explain(&self, reader: &SegmentReader, doc: u32) -> crate::Result<Explanation> {
let underlying_explanation = self.weight.explain(reader, doc)?; let underlying_explanation = self.weight.explain(reader, doc)?;
let score = underlying_explanation.value() * self.boost; let score = underlying_explanation.value() * self.boost;
let mut explanation = Explanation::new(format!("Boost x{} of ...", self.boost), score); let mut explanation =
Explanation::new_with_string(format!("Boost x{} of ...", self.boost), score);
explanation.add_detail(underlying_explanation); explanation.add_detail(underlying_explanation);
Ok(explanation) Ok(explanation)
} }
@@ -151,7 +152,7 @@ mod tests {
let explanation = query.explain(&searcher, DocAddress::new(0, 0u32)).unwrap(); let explanation = query.explain(&searcher, DocAddress::new(0, 0u32)).unwrap();
assert_eq!( assert_eq!(
explanation.to_pretty_json(), explanation.to_pretty_json(),
"{\n \"value\": 0.2,\n \"description\": \"Boost x0.2 of ...\",\n \"details\": [\n {\n \"value\": 1.0,\n \"description\": \"AllQuery\",\n \"context\": []\n }\n ],\n \"context\": []\n}" "{\n \"value\": 0.2,\n \"description\": \"Boost x0.2 of ...\",\n \"details\": [\n {\n \"value\": 1.0,\n \"description\": \"AllQuery\"\n }\n ]\n}"
); );
Ok(()) Ok(())
} }

View File

@@ -164,11 +164,9 @@ mod tests {
"details": [ "details": [
{ {
"value": 1.0, "value": 1.0,
"description": "AllQuery", "description": "AllQuery"
"context": []
} }
], ]
"context": []
}"# }"#
); );
Ok(()) Ok(())

View File

@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::fmt; use std::fmt;
use serde::Serialize; use serde::Serialize;
@@ -16,12 +17,12 @@ pub(crate) fn does_not_match(doc: DocId) -> TantivyError {
#[derive(Clone, Serialize)] #[derive(Clone, Serialize)]
pub struct Explanation { pub struct Explanation {
value: Score, value: Score,
description: String, description: Cow<'static, str>,
#[serde(skip_serializing_if = "Vec::is_empty")] #[serde(skip_serializing_if = "Option::is_none")]
details: Vec<Explanation>, details: Option<Vec<Explanation>>,
context: Vec<String>, #[serde(skip_serializing_if = "Option::is_none")]
context: Option<Vec<String>>,
} }
impl fmt::Debug for Explanation { impl fmt::Debug for Explanation {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Explanation({})", self.to_pretty_json()) write!(f, "Explanation({})", self.to_pretty_json())
@@ -30,12 +31,21 @@ impl fmt::Debug for Explanation {
impl Explanation { impl Explanation {
/// Creates a new explanation object. /// Creates a new explanation object.
pub fn new<T: ToString>(description: T, value: Score) -> Explanation { pub fn new_with_string(description: String, value: Score) -> Explanation {
Explanation { Explanation {
value, value,
description: description.to_string(), description: Cow::Owned(description),
details: vec![], details: None,
context: vec![], context: None,
}
}
/// Creates a new explanation object.
pub fn new(description: &'static str, value: Score) -> Explanation {
Explanation {
value,
description: Cow::Borrowed(description),
details: None,
context: None,
} }
} }
@@ -48,17 +58,21 @@ impl Explanation {
/// ///
/// Details are treated as child of the current node. /// Details are treated as child of the current node.
pub fn add_detail(&mut self, child_explanation: Explanation) { pub fn add_detail(&mut self, child_explanation: Explanation) {
self.details.push(child_explanation); self.details
.get_or_insert_with(Vec::new)
.push(child_explanation);
} }
/// Adds some extra context to the explanation. /// Adds some extra context to the explanation.
pub fn add_context(&mut self, context: String) { pub fn add_context(&mut self, context: String) {
self.context.push(context); self.context.get_or_insert_with(Vec::new).push(context);
} }
/// Shortcut for `self.details.push(Explanation::new(name, value));` /// Shortcut for `self.details.push(Explanation::new(name, value));`
pub fn add_const<T: ToString>(&mut self, name: T, value: Score) { pub fn add_const(&mut self, name: &'static str, value: Score) {
self.details.push(Explanation::new(name, value)); self.details
.get_or_insert_with(Vec::new)
.push(Explanation::new(name, value));
} }
/// Returns an indented json representation of the explanation tree for debug usage. /// Returns an indented json representation of the explanation tree for debug usage.

View File

@@ -101,7 +101,7 @@ impl TermQuery {
.. ..
} => Bm25Weight::for_terms(statistics_provider, &[self.term.clone()])?, } => Bm25Weight::for_terms(statistics_provider, &[self.term.clone()])?,
EnableScoring::Disabled { .. } => { EnableScoring::Disabled { .. } => {
Bm25Weight::new(Explanation::new("<no score>".to_string(), 1.0f32), 1.0f32) Bm25Weight::new(Explanation::new("<no score>", 1.0f32), 1.0f32)
} }
}; };
let scoring_enabled = enable_scoring.is_scoring_enabled(); let scoring_enabled = enable_scoring.is_scoring_enabled();

View File

@@ -40,21 +40,25 @@ impl From<BytesOptionsDeser> for BytesOptions {
impl BytesOptions { impl BytesOptions {
/// Returns true if the value is indexed. /// Returns true if the value is indexed.
#[inline]
pub fn is_indexed(&self) -> bool { pub fn is_indexed(&self) -> bool {
self.indexed self.indexed
} }
/// Returns true if and only if the value is normed. /// Returns true if and only if the value is normed.
#[inline]
pub fn fieldnorms(&self) -> bool { pub fn fieldnorms(&self) -> bool {
self.fieldnorms self.fieldnorms
} }
/// Returns true if the value is a fast field. /// Returns true if the value is a fast field.
#[inline]
pub fn is_fast(&self) -> bool { pub fn is_fast(&self) -> bool {
self.fast self.fast
} }
/// Returns true if the value is stored. /// Returns true if the value is stored.
#[inline]
pub fn is_stored(&self) -> bool { pub fn is_stored(&self) -> bool {
self.stored self.stored
} }

View File

@@ -27,21 +27,25 @@ pub struct DateOptions {
impl DateOptions { impl DateOptions {
/// Returns true iff the value is stored. /// Returns true iff the value is stored.
#[inline]
pub fn is_stored(&self) -> bool { pub fn is_stored(&self) -> bool {
self.stored self.stored
} }
/// Returns true iff the value is indexed and therefore searchable. /// Returns true iff the value is indexed and therefore searchable.
#[inline]
pub fn is_indexed(&self) -> bool { pub fn is_indexed(&self) -> bool {
self.indexed self.indexed
} }
/// Returns true iff the field has fieldnorm. /// Returns true iff the field has fieldnorm.
#[inline]
pub fn fieldnorms(&self) -> bool { pub fn fieldnorms(&self) -> bool {
self.fieldnorms && self.indexed self.fieldnorms && self.indexed
} }
/// Returns true iff the value is a fast field. /// Returns true iff the value is a fast field.
#[inline]
pub fn is_fast(&self) -> bool { pub fn is_fast(&self) -> bool {
self.fast self.fast
} }

View File

@@ -131,16 +131,16 @@ impl Facet {
pub fn from_path<Path>(path: Path) -> Facet pub fn from_path<Path>(path: Path) -> Facet
where where
Path: IntoIterator, Path: IntoIterator,
Path::Item: ToString, Path::Item: AsRef<str>,
{ {
let mut facet_string: String = String::with_capacity(100); let mut facet_string: String = String::with_capacity(100);
let mut step_it = path.into_iter(); let mut step_it = path.into_iter();
if let Some(step) = step_it.next() { if let Some(step) = step_it.next() {
facet_string.push_str(&step.to_string()); facet_string.push_str(step.as_ref());
} }
for step in step_it { for step in step_it {
facet_string.push(FACET_SEP_CHAR); facet_string.push(FACET_SEP_CHAR);
facet_string.push_str(&step.to_string()); facet_string.push_str(step.as_ref());
} }
Facet(facet_string) Facet(facet_string)
} }

View File

@@ -14,6 +14,7 @@ pub struct FacetOptions {
impl FacetOptions { impl FacetOptions {
/// Returns true if the value is stored. /// Returns true if the value is stored.
#[inline]
pub fn is_stored(&self) -> bool { pub fn is_stored(&self) -> bool {
self.stored self.stored
} }

View File

@@ -108,7 +108,16 @@ impl FieldEntry {
self.field_type.is_fast() self.field_type.is_fast()
} }
/// Returns true if the field has the expand dots option set (for json fields)
pub fn is_expand_dots_enabled(&self) -> bool {
match self.field_type {
FieldType::JsonObject(ref options) => options.is_expand_dots_enabled(),
_ => false,
}
}
/// Returns true if the field is stored /// Returns true if the field is stored
#[inline]
pub fn is_stored(&self) -> bool { pub fn is_stored(&self) -> bool {
match self.field_type { match self.field_type {
FieldType::U64(ref options) FieldType::U64(ref options)

View File

@@ -3,6 +3,7 @@ use std::str::FromStr;
use base64::engine::general_purpose::STANDARD as BASE64; use base64::engine::general_purpose::STANDARD as BASE64;
use base64::Engine; use base64::Engine;
use columnar::ColumnType;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value as JsonValue; use serde_json::Value as JsonValue;
use thiserror::Error; use thiserror::Error;
@@ -47,7 +48,7 @@ pub enum ValueParsingError {
/// ///
/// Contrary to FieldType, this does /// Contrary to FieldType, this does
/// not include the way the field must be indexed. /// not include the way the field must be indexed.
#[derive(Copy, Clone, Debug, Eq, PartialEq)] #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[repr(u8)] #[repr(u8)]
pub enum Type { pub enum Type {
/// `&str` /// `&str`
@@ -72,6 +73,21 @@ pub enum Type {
IpAddr = b'p', IpAddr = b'p',
} }
impl From<ColumnType> for Type {
fn from(value: ColumnType) -> Self {
match value {
ColumnType::Str => Type::Str,
ColumnType::U64 => Type::U64,
ColumnType::I64 => Type::I64,
ColumnType::F64 => Type::F64,
ColumnType::Bool => Type::Bool,
ColumnType::DateTime => Type::Date,
ColumnType::Bytes => Type::Bytes,
ColumnType::IpAddr => Type::IpAddr,
}
}
}
const ALL_TYPES: [Type; 10] = [ const ALL_TYPES: [Type; 10] = [
Type::Str, Type::Str,
Type::U64, Type::U64,

View File

@@ -31,21 +31,25 @@ pub struct IpAddrOptions {
impl IpAddrOptions { impl IpAddrOptions {
/// Returns true iff the value is a fast field. /// Returns true iff the value is a fast field.
#[inline]
pub fn is_fast(&self) -> bool { pub fn is_fast(&self) -> bool {
self.fast self.fast
} }
/// Returns `true` if the ip address should be stored in the doc store. /// Returns `true` if the ip address should be stored in the doc store.
#[inline]
pub fn is_stored(&self) -> bool { pub fn is_stored(&self) -> bool {
self.stored self.stored
} }
/// Returns true iff the value is indexed and therefore searchable. /// Returns true iff the value is indexed and therefore searchable.
#[inline]
pub fn is_indexed(&self) -> bool { pub fn is_indexed(&self) -> bool {
self.indexed self.indexed
} }
/// Returns true if and only if the value is normed. /// Returns true if and only if the value is normed.
#[inline]
pub fn fieldnorms(&self) -> bool { pub fn fieldnorms(&self) -> bool {
self.fieldnorms self.fieldnorms
} }

View File

@@ -46,17 +46,20 @@ pub struct JsonObjectOptions {
impl JsonObjectOptions { impl JsonObjectOptions {
/// Returns `true` if the json object should be stored. /// Returns `true` if the json object should be stored.
#[inline]
pub fn is_stored(&self) -> bool { pub fn is_stored(&self) -> bool {
self.stored self.stored
} }
/// Returns `true` iff the json object should be indexed. /// Returns `true` iff the json object should be indexed.
#[inline]
pub fn is_indexed(&self) -> bool { pub fn is_indexed(&self) -> bool {
self.indexing.is_some() self.indexing.is_some()
} }
/// Returns true if and only if the json object fields are /// Returns true if and only if the json object fields are
/// to be treated as fast fields. /// to be treated as fast fields.
#[inline]
pub fn is_fast(&self) -> bool { pub fn is_fast(&self) -> bool {
matches!(self.fast, FastFieldTextOptions::IsEnabled(true)) matches!(self.fast, FastFieldTextOptions::IsEnabled(true))
|| matches!( || matches!(
@@ -66,6 +69,7 @@ impl JsonObjectOptions {
} }
/// Returns true if and only if the value is a fast field. /// Returns true if and only if the value is a fast field.
#[inline]
pub fn get_fast_field_tokenizer_name(&self) -> Option<&str> { pub fn get_fast_field_tokenizer_name(&self) -> Option<&str> {
match &self.fast { match &self.fast {
FastFieldTextOptions::IsEnabled(true) | FastFieldTextOptions::IsEnabled(false) => None, FastFieldTextOptions::IsEnabled(true) | FastFieldTextOptions::IsEnabled(false) => None,
@@ -87,6 +91,7 @@ impl JsonObjectOptions {
/// ///
/// If disabled, the "." needs to be escaped: /// If disabled, the "." needs to be escaped:
/// `k8s\.node\.id:5`. /// `k8s\.node\.id:5`.
#[inline]
pub fn is_expand_dots_enabled(&self) -> bool { pub fn is_expand_dots_enabled(&self) -> bool {
self.expand_dots_enabled self.expand_dots_enabled
} }
@@ -103,6 +108,7 @@ impl JsonObjectOptions {
/// If set to `Some` then both int and str values will be indexed. /// If set to `Some` then both int and str values will be indexed.
/// The inner `TextFieldIndexing` will however, only apply to the str values /// The inner `TextFieldIndexing` will however, only apply to the str values
/// in the json object. /// in the json object.
#[inline]
pub fn get_text_indexing_options(&self) -> Option<&TextFieldIndexing> { pub fn get_text_indexing_options(&self) -> Option<&TextFieldIndexing> {
self.indexing.as_ref() self.indexing.as_ref()
} }

View File

@@ -57,26 +57,31 @@ impl From<NumericOptionsDeser> for NumericOptions {
impl NumericOptions { impl NumericOptions {
/// Returns true iff the value is stored in the doc store. /// Returns true iff the value is stored in the doc store.
#[inline]
pub fn is_stored(&self) -> bool { pub fn is_stored(&self) -> bool {
self.stored self.stored
} }
/// Returns true iff the value is indexed and therefore searchable. /// Returns true iff the value is indexed and therefore searchable.
#[inline]
pub fn is_indexed(&self) -> bool { pub fn is_indexed(&self) -> bool {
self.indexed self.indexed
} }
/// Returns true iff the field has fieldnorm. /// Returns true iff the field has fieldnorm.
#[inline]
pub fn fieldnorms(&self) -> bool { pub fn fieldnorms(&self) -> bool {
self.fieldnorms && self.indexed self.fieldnorms && self.indexed
} }
/// Returns true iff the value is a fast field. /// Returns true iff the value is a fast field.
#[inline]
pub fn is_fast(&self) -> bool { pub fn is_fast(&self) -> bool {
self.fast self.fast
} }
/// Returns true if values should be coerced to numbers. /// Returns true if values should be coerced to numbers.
#[inline]
pub fn should_coerce(&self) -> bool { pub fn should_coerce(&self) -> bool {
self.coerce self.coerce
} }

View File

@@ -8,6 +8,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
use super::ip_options::IpAddrOptions; use super::ip_options::IpAddrOptions;
use super::*; use super::*;
use crate::json_utils::split_json_path;
use crate::schema::bytes_options::BytesOptions; use crate::schema::bytes_options::BytesOptions;
use crate::TantivyError; use crate::TantivyError;
@@ -30,7 +31,7 @@ use crate::TantivyError;
/// let body_field = schema_builder.add_text_field("body", TEXT); /// let body_field = schema_builder.add_text_field("body", TEXT);
/// let schema = schema_builder.build(); /// let schema = schema_builder.build();
/// ``` /// ```
#[derive(Default)] #[derive(Debug, Default)]
pub struct SchemaBuilder { pub struct SchemaBuilder {
fields: Vec<FieldEntry>, fields: Vec<FieldEntry>,
fields_map: HashMap<String, Field>, fields_map: HashMap<String, Field>,
@@ -328,12 +329,19 @@ impl Schema {
if let Some(field) = self.0.fields_map.get(full_path) { if let Some(field) = self.0.fields_map.get(full_path) {
return Some((*field, "")); return Some((*field, ""));
} }
let mut splitting_period_pos: Vec<usize> = locate_splitting_dots(full_path); let mut splitting_period_pos: Vec<usize> = locate_splitting_dots(full_path);
while let Some(pos) = splitting_period_pos.pop() { while let Some(pos) = splitting_period_pos.pop() {
let (prefix, suffix) = full_path.split_at(pos); let (prefix, suffix) = full_path.split_at(pos);
if let Some(field) = self.0.fields_map.get(prefix) { if let Some(field) = self.0.fields_map.get(prefix) {
return Some((*field, &suffix[1..])); return Some((*field, &suffix[1..]));
} }
// JSON path may contain a dot, for now we try both variants to find the field.
let prefix = split_json_path(prefix).join(".");
if let Some(field) = self.0.fields_map.get(&prefix) {
return Some((*field, &suffix[1..]));
}
} }
None None
} }
@@ -349,6 +357,7 @@ impl Schema {
pub fn find_field_with_default<'a>( pub fn find_field_with_default<'a>(
&self, &self,
full_path: &'a str, full_path: &'a str,
default_field_opt: Option<Field>, default_field_opt: Option<Field>,
) -> Option<(Field, &'a str)> { ) -> Option<(Field, &'a str)> {
let (field, json_path) = self let (field, json_path) = self

View File

@@ -72,16 +72,19 @@ fn is_false(val: &bool) -> bool {
impl TextOptions { impl TextOptions {
/// Returns the indexing options. /// Returns the indexing options.
#[inline]
pub fn get_indexing_options(&self) -> Option<&TextFieldIndexing> { pub fn get_indexing_options(&self) -> Option<&TextFieldIndexing> {
self.indexing.as_ref() self.indexing.as_ref()
} }
/// Returns true if the text is to be stored. /// Returns true if the text is to be stored.
#[inline]
pub fn is_stored(&self) -> bool { pub fn is_stored(&self) -> bool {
self.stored self.stored
} }
/// Returns true if and only if the value is a fast field. /// Returns true if and only if the value is a fast field.
#[inline]
pub fn is_fast(&self) -> bool { pub fn is_fast(&self) -> bool {
matches!(self.fast, FastFieldTextOptions::IsEnabled(true)) matches!(self.fast, FastFieldTextOptions::IsEnabled(true))
|| matches!( || matches!(
@@ -91,6 +94,7 @@ impl TextOptions {
} }
/// Returns true if and only if the value is a fast field. /// Returns true if and only if the value is a fast field.
#[inline]
pub fn get_fast_field_tokenizer_name(&self) -> Option<&str> { pub fn get_fast_field_tokenizer_name(&self) -> Option<&str> {
match &self.fast { match &self.fast {
FastFieldTextOptions::IsEnabled(true) | FastFieldTextOptions::IsEnabled(false) => None, FastFieldTextOptions::IsEnabled(true) | FastFieldTextOptions::IsEnabled(false) => None,
@@ -101,6 +105,7 @@ impl TextOptions {
} }
/// Returns true if values should be coerced to strings (numbers, null). /// Returns true if values should be coerced to strings (numbers, null).
#[inline]
pub fn should_coerce(&self) -> bool { pub fn should_coerce(&self) -> bool {
self.coerce self.coerce
} }

View File

@@ -117,6 +117,7 @@ impl SegmentSpaceUsage {
use self::ComponentSpaceUsage::*; use self::ComponentSpaceUsage::*;
use crate::SegmentComponent::*; use crate::SegmentComponent::*;
match component { match component {
FieldList => ComponentSpaceUsage::Basic(ByteCount::from(0u64)),
Postings => PerField(self.postings().clone()), Postings => PerField(self.postings().clone()),
Positions => PerField(self.positions().clone()), Positions => PerField(self.positions().clone()),
FastFields => PerField(self.fast_fields().clone()), FastFields => PerField(self.fast_fields().clone()),

View File

@@ -11,16 +11,22 @@ description = "sstables for tantivy"
[dependencies] [dependencies]
common = {version= "0.6", path="../common", package="tantivy-common"} common = {version= "0.6", path="../common", package="tantivy-common"}
tantivy-fst = "0.4" tantivy-bitpacker = { version= "0.5", path="../bitpacker" }
tantivy-fst = "0.5"
# experimental gives us access to Decompressor::upper_bound # experimental gives us access to Decompressor::upper_bound
zstd = { version = "0.13", features = ["experimental"] } zstd = { version = "0.13", features = ["experimental"] }
[dev-dependencies] [dev-dependencies]
proptest = "1" proptest = "1"
criterion = "0.5" criterion = { version = "0.5", default-features = false }
names = "0.14" names = "0.14"
rand = "0.8" rand = "0.8"
[[bench]] [[bench]]
name = "stream_bench" name = "stream_bench"
harness = false harness = false
[[bench]]
name = "ord_to_term"
harness = false

View File

@@ -89,33 +89,71 @@ Note: as the SSTable does not support redundant keys, there is no ambiguity betw
### SSTFooter ### SSTFooter
``` ```
+-------+-------+-----+-------------+---------+---------+ +-----+----------------+-------------+-------------+---------+---------+
| Block | Block | ... | IndexOffset | NumTerm | Version | | Fst | BlockAddrStore | StoreOffset | IndexOffset | NumTerm | Version |
+-------+-------+-----+-------------+---------+---------+ +-----+----------------+-------------+-------------+---------+---------+
|----( # of blocks)---|
``` ```
- Block(SSTBlock): uses IndexValue for its Values format - Fst(Fst): finite state transducer mapping keys to a block number
- BlockAddrStore(BlockAddrStore): store mapping a block number to its BlockAddr
- StoreOffset(u64): Offset to start of the BlockAddrStore. If zero, see the SingleBlockSStable section
- IndexOffset(u64): Offset to the start of the SSTFooter - IndexOffset(u64): Offset to the start of the SSTFooter
- NumTerm(u64): number of terms in the sstable - NumTerm(u64): number of terms in the sstable
- Version(u32): Currently equal to 2 - Version(u32): Currently equal to 3
### IndexValue ### Fst
```
+------------+----------+-------+-------+-----+
| EntryCount | StartPos | Entry | Entry | ... |
+------------+----------+-------+-------+-----+
|---( # of entries)---|
```
- EntryCount(VInt): number of entries Fst is in the format of tantivy\_fst
- StartPos(VInt): the start pos of the first (data) block referenced by this (index) block
- Entry (IndexEntry)
### Entry ### BlockAddrStore
```
+----------+--------------+ +---------+-----------+-----------+-----+-----------+-----------+-----+
| BlockLen | FirstOrdinal | | MetaLen | BlockMeta | BlockMeta | ... | BlockData | BlockData | ... |
+----------+--------------+ +---------+-----------+-----------+-----+-----------+-----------+-----+
``` |---------(N blocks)----------|---------(N blocks)----------|
- BlockLen(VInt): length of the block
- FirstOrdinal(VInt): ordinal of the first element in the given block - MetaLen(u64): length of the BlockMeta section
- BlockMeta(BlockAddrBlockMetadata): metadata to seek through BlockData
- BlockData(CompactedBlockAddr): bitpacked per block metadata
### BlockAddrBlockMetadata
+--------+------------+--------------+------------+--------------+-------------------+-----------------+----------+
| Offset | RangeStart | FirstOrdinal | RangeSlope | OrdinalSlope | FirstOrdinalNBits | RangeStartNBits | BlockLen |
+--------+------------+--------------+------------+--------------+-------------------+-----------------+----------+
- Offset(u64): offset of the corresponding BlockData in the datastream
- RangeStart(u64): the start position of the first block
- FirstOrdinal(u64): the first ordinal of the first block
- RangeSlope(u32): slope predicted for start range evolution (see computation in BlockData)
- OrdinalSlope(u64): slope predicted for first ordinal evolution (see computation in BlockData)
- FirstOrdinalNBits(u8): number of bits per ordinal in datastream (see computation in BlockData)
- RangeStartNBits(u8): number of bits per range start in datastream (see computation in BlockData)
### BlockData
+-----------------+-------------------+---------------+
| RangeStartDelta | FirstOrdinalDelta | FinalRangeEnd |
+-----------------+-------------------+---------------+
|------(BlockLen repetitions)---------|
- RangeStartDelta(var): RangeStartNBits *bits* of little endian number. See below for decoding
- FirstOrdinalDelta(var): FirstOrdinalNBits *bits* of little endian number. See below for decoding
- FinalRangeEnd(var): RangeStartNBits *bits* of integer. See below for decoding
converting a BlockData of index Index and a BlockAddrBlockMetadata to an actual block address is done as follow:
range\_prediction := RangeStart + Index * RangeSlop;
range\_derivation := RangeStartDelta - (1 << (RangeStartNBits-1));
range\_start := range\_prediction + range\_derivation
The same computation can be done for ordinal.
Note that `range_derivation` can take negative value. `RangeStartDelta` is just its translation to a positive range.
## SingleBlockSStable
The format used for the index is meant to be compact, however it has a constant cost of around 70
bytes, which isn't negligible for a table containing very few keys.
To limit the impact of that constant cost, single block sstable omit the Fst and BlockAddrStore from
their index. Instead a block with first ordinal of 0, range start of 0 and range end of IndexOffset
is implicitly used for every operations.

View File

@@ -0,0 +1,110 @@
use std::sync::Arc;
use common::file_slice::FileSlice;
use common::OwnedBytes;
use criterion::{criterion_group, criterion_main, Criterion};
use tantivy_sstable::{self, Dictionary, MonotonicU64SSTable};
fn make_test_sstable(suffix: &str) -> FileSlice {
let mut builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new()).unwrap();
// 125 mio elements
for elem in 0..125_000_000 {
let key = format!("prefix.{elem:07X}{suffix}").into_bytes();
builder.insert(&key, &elem).unwrap();
}
let table = builder.finish().unwrap();
let table = Arc::new(OwnedBytes::new(table));
let slice = common::file_slice::FileSlice::new(table.clone());
slice
}
pub fn criterion_benchmark(c: &mut Criterion) {
{
let slice = make_test_sstable(".suffix");
let dict = Dictionary::<MonotonicU64SSTable>::open(slice.clone()).unwrap();
c.bench_function("ord_to_term_suffix", |b| {
let mut res = Vec::new();
b.iter(|| {
assert!(dict.ord_to_term(100_000, &mut res).unwrap());
assert!(dict.ord_to_term(19_000_000, &mut res).unwrap());
})
});
c.bench_function("open_and_ord_to_term_suffix", |b| {
let mut res = Vec::new();
b.iter(|| {
let dict = Dictionary::<MonotonicU64SSTable>::open(slice.clone()).unwrap();
assert!(dict.ord_to_term(100_000, &mut res).unwrap());
assert!(dict.ord_to_term(19_000_000, &mut res).unwrap());
})
});
c.bench_function("term_ord_suffix", |b| {
b.iter(|| {
assert_eq!(
dict.term_ord(b"prefix.00186A0.suffix").unwrap().unwrap(),
100_000
);
assert_eq!(
dict.term_ord(b"prefix.121EAC0.suffix").unwrap().unwrap(),
19_000_000
);
})
});
c.bench_function("open_and_term_ord_suffix", |b| {
b.iter(|| {
let dict = Dictionary::<MonotonicU64SSTable>::open(slice.clone()).unwrap();
assert_eq!(
dict.term_ord(b"prefix.00186A0.suffix").unwrap().unwrap(),
100_000
);
assert_eq!(
dict.term_ord(b"prefix.121EAC0.suffix").unwrap().unwrap(),
19_000_000
);
})
});
}
{
let slice = make_test_sstable("");
let dict = Dictionary::<MonotonicU64SSTable>::open(slice.clone()).unwrap();
c.bench_function("ord_to_term", |b| {
let mut res = Vec::new();
b.iter(|| {
assert!(dict.ord_to_term(100_000, &mut res).unwrap());
assert!(dict.ord_to_term(19_000_000, &mut res).unwrap());
})
});
c.bench_function("open_and_ord_to_term", |b| {
let mut res = Vec::new();
b.iter(|| {
let dict = Dictionary::<MonotonicU64SSTable>::open(slice.clone()).unwrap();
assert!(dict.ord_to_term(100_000, &mut res).unwrap());
assert!(dict.ord_to_term(19_000_000, &mut res).unwrap());
})
});
c.bench_function("term_ord", |b| {
b.iter(|| {
assert_eq!(dict.term_ord(b"prefix.00186A0").unwrap().unwrap(), 100_000);
assert_eq!(
dict.term_ord(b"prefix.121EAC0").unwrap().unwrap(),
19_000_000
);
})
});
c.bench_function("open_and_term_ord", |b| {
b.iter(|| {
let dict = Dictionary::<MonotonicU64SSTable>::open(slice.clone()).unwrap();
assert_eq!(dict.term_ord(b"prefix.00186A0").unwrap().unwrap(), 100_000);
assert_eq!(
dict.term_ord(b"prefix.121EAC0").unwrap().unwrap(),
19_000_000
);
})
});
}
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

View File

@@ -9,8 +9,11 @@ use common::{BinarySerializable, OwnedBytes};
use tantivy_fst::automaton::AlwaysMatch; use tantivy_fst::automaton::AlwaysMatch;
use tantivy_fst::Automaton; use tantivy_fst::Automaton;
use crate::sstable_index_v3::SSTableIndexV3Empty;
use crate::streamer::{Streamer, StreamerBuilder}; use crate::streamer::{Streamer, StreamerBuilder};
use crate::{BlockAddr, DeltaReader, Reader, SSTable, SSTableIndex, TermOrdinal, VoidSSTable}; use crate::{
BlockAddr, DeltaReader, Reader, SSTable, SSTableIndex, SSTableIndexV3, TermOrdinal, VoidSSTable,
};
/// An SSTable is a sorted map that associates sorted `&[u8]` keys /// An SSTable is a sorted map that associates sorted `&[u8]` keys
/// to any kind of typed values. /// to any kind of typed values.
@@ -180,24 +183,41 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
pub fn open(term_dictionary_file: FileSlice) -> io::Result<Self> { pub fn open(term_dictionary_file: FileSlice) -> io::Result<Self> {
let (main_slice, footer_len_slice) = term_dictionary_file.split_from_end(20); let (main_slice, footer_len_slice) = term_dictionary_file.split_from_end(20);
let mut footer_len_bytes: OwnedBytes = footer_len_slice.read_bytes()?; let mut footer_len_bytes: OwnedBytes = footer_len_slice.read_bytes()?;
let index_offset = u64::deserialize(&mut footer_len_bytes)?; let index_offset = u64::deserialize(&mut footer_len_bytes)?;
let num_terms = u64::deserialize(&mut footer_len_bytes)?; let num_terms = u64::deserialize(&mut footer_len_bytes)?;
let version = u32::deserialize(&mut footer_len_bytes)?; let version = u32::deserialize(&mut footer_len_bytes)?;
if version != crate::SSTABLE_VERSION {
return Err(io::Error::new(
io::ErrorKind::Other,
format!(
"Unsuported sstable version, expected {version}, found {}",
crate::SSTABLE_VERSION,
),
));
}
let (sstable_slice, index_slice) = main_slice.split(index_offset as usize); let (sstable_slice, index_slice) = main_slice.split(index_offset as usize);
let sstable_index_bytes = index_slice.read_bytes()?; let sstable_index_bytes = index_slice.read_bytes()?;
let sstable_index = SSTableIndex::load(sstable_index_bytes)
.map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "SSTable corruption"))?; let sstable_index = match version {
2 => SSTableIndex::V2(
crate::sstable_index_v2::SSTableIndex::load(sstable_index_bytes).map_err(|_| {
io::Error::new(io::ErrorKind::InvalidData, "SSTable corruption")
})?,
),
3 => {
let (sstable_index_bytes, mut footerv3_len_bytes) = sstable_index_bytes.rsplit(8);
let store_offset = u64::deserialize(&mut footerv3_len_bytes)?;
if store_offset != 0 {
SSTableIndex::V3(
SSTableIndexV3::load(sstable_index_bytes, store_offset).map_err(|_| {
io::Error::new(io::ErrorKind::InvalidData, "SSTable corruption")
})?,
)
} else {
// if store_offset is zero, there is no index, so we build a pseudo-index
// assuming a single block of sstable covering everything.
SSTableIndex::V3Empty(SSTableIndexV3Empty::load(index_offset as usize))
}
}
_ => {
return Err(io::Error::new(
io::ErrorKind::Other,
format!("Unsuported sstable version, expected one of [2, 3], found {version}"),
))
}
};
Ok(Dictionary { Ok(Dictionary {
sstable_slice, sstable_slice,
sstable_index, sstable_index,

View File

@@ -10,8 +10,9 @@ pub mod merge;
mod streamer; mod streamer;
pub mod value; pub mod value;
mod sstable_index; mod sstable_index_v3;
pub use sstable_index::{BlockAddr, SSTableIndex, SSTableIndexBuilder}; pub use sstable_index_v3::{BlockAddr, SSTableIndex, SSTableIndexBuilder, SSTableIndexV3};
mod sstable_index_v2;
pub(crate) mod vint; pub(crate) mod vint;
pub use dictionary::Dictionary; pub use dictionary::Dictionary;
pub use streamer::{Streamer, StreamerBuilder}; pub use streamer::{Streamer, StreamerBuilder};
@@ -28,7 +29,7 @@ use crate::value::{RangeValueReader, RangeValueWriter};
pub type TermOrdinal = u64; pub type TermOrdinal = u64;
const DEFAULT_KEY_CAPACITY: usize = 50; const DEFAULT_KEY_CAPACITY: usize = 50;
const SSTABLE_VERSION: u32 = 2; const SSTABLE_VERSION: u32 = 3;
/// Given two byte string returns the length of /// Given two byte string returns the length of
/// the longest common prefix. /// the longest common prefix.
@@ -304,7 +305,8 @@ where
let offset = wrt.written_bytes(); let offset = wrt.written_bytes();
self.index_builder.serialize(&mut wrt)?; let fst_len: u64 = self.index_builder.serialize(&mut wrt)?;
wrt.write_all(&fst_len.to_le_bytes())?;
wrt.write_all(&offset.to_le_bytes())?; wrt.write_all(&offset.to_le_bytes())?;
wrt.write_all(&self.num_terms.to_le_bytes())?; wrt.write_all(&self.num_terms.to_le_bytes())?;
@@ -385,13 +387,10 @@ mod test {
16, 17, 33, 18, 19, 17, 20, // data block 16, 17, 33, 18, 19, 17, 20, // data block
0, 0, 0, 0, // no more block 0, 0, 0, 0, // no more block
// index // index
8, 0, 0, 0, // size of index block 0, 0, 0, 0, 0, 0, 0, 0, // fst lenght
0, // compression
1, 0, 12, 0, 32, 17, 20, // index block
0, 0, 0, 0, // no more index block
16, 0, 0, 0, 0, 0, 0, 0, // index start offset 16, 0, 0, 0, 0, 0, 0, 0, // index start offset
3, 0, 0, 0, 0, 0, 0, 0, // num term 3, 0, 0, 0, 0, 0, 0, 0, // num term
2, 0, 0, 0, // version 3, 0, 0, 0, // version
] ]
); );
let buffer = OwnedBytes::new(buffer); let buffer = OwnedBytes::new(buffer);

View File

@@ -1,266 +0,0 @@
use std::io::{self, Write};
use std::ops::Range;
use common::OwnedBytes;
use crate::{common_prefix_len, SSTable, SSTableDataCorruption, TermOrdinal};
#[derive(Default, Debug, Clone)]
pub struct SSTableIndex {
blocks: Vec<BlockMeta>,
}
impl SSTableIndex {
/// Load an index from its binary representation
pub fn load(data: OwnedBytes) -> Result<SSTableIndex, SSTableDataCorruption> {
let mut reader = IndexSSTable::reader(data);
let mut blocks = Vec::new();
while reader.advance().map_err(|_| SSTableDataCorruption)? {
blocks.push(BlockMeta {
last_key_or_greater: reader.key().to_vec(),
block_addr: reader.value().clone(),
});
}
Ok(SSTableIndex { blocks })
}
/// Get the [`BlockAddr`] of the requested block.
pub(crate) fn get_block(&self, block_id: usize) -> Option<BlockAddr> {
self.blocks
.get(block_id)
.map(|block_meta| block_meta.block_addr.clone())
}
/// Get the block id of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub(crate) fn locate_with_key(&self, key: &[u8]) -> Option<usize> {
let pos = self
.blocks
.binary_search_by_key(&key, |block| &block.last_key_or_greater);
match pos {
Ok(pos) => Some(pos),
Err(pos) => {
if pos < self.blocks.len() {
Some(pos)
} else {
// after end of last block: no block matches
None
}
}
}
}
/// Get the [`BlockAddr`] of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub fn get_block_with_key(&self, key: &[u8]) -> Option<BlockAddr> {
self.locate_with_key(key).and_then(|id| self.get_block(id))
}
pub(crate) fn locate_with_ord(&self, ord: TermOrdinal) -> usize {
let pos = self
.blocks
.binary_search_by_key(&ord, |block| block.block_addr.first_ordinal);
match pos {
Ok(pos) => pos,
// Err(0) can't happen as the sstable starts with ordinal zero
Err(pos) => pos - 1,
}
}
/// Get the [`BlockAddr`] of the block containing the `ord`-th term.
pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> BlockAddr {
// locate_with_ord always returns an index within range
self.get_block(self.locate_with_ord(ord)).unwrap()
}
}
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct BlockAddr {
pub byte_range: Range<usize>,
pub first_ordinal: u64,
}
#[derive(Debug, Clone)]
pub(crate) struct BlockMeta {
/// Any byte string that is lexicographically greater or equal to
/// the last key in the block,
/// and yet strictly smaller than the first key in the next block.
pub last_key_or_greater: Vec<u8>,
pub block_addr: BlockAddr,
}
#[derive(Default)]
pub struct SSTableIndexBuilder {
index: SSTableIndex,
}
/// Given that left < right,
/// mutates `left into a shorter byte string left'` that
/// matches `left <= left' < right`.
fn find_shorter_str_in_between(left: &mut Vec<u8>, right: &[u8]) {
assert!(&left[..] < right);
let common_len = common_prefix_len(left, right);
if left.len() == common_len {
return;
}
// It is possible to do one character shorter in some case,
// but it is not worth the extra complexity
for pos in (common_len + 1)..left.len() {
if left[pos] != u8::MAX {
left[pos] += 1;
left.truncate(pos + 1);
return;
}
}
}
impl SSTableIndexBuilder {
/// In order to make the index as light as possible, we
/// try to find a shorter alternative to the last key of the last block
/// that is still smaller than the next key.
pub(crate) fn shorten_last_block_key_given_next_key(&mut self, next_key: &[u8]) {
if let Some(last_block) = self.index.blocks.last_mut() {
find_shorter_str_in_between(&mut last_block.last_key_or_greater, next_key);
}
}
pub fn add_block(&mut self, last_key: &[u8], byte_range: Range<usize>, first_ordinal: u64) {
self.index.blocks.push(BlockMeta {
last_key_or_greater: last_key.to_vec(),
block_addr: BlockAddr {
byte_range,
first_ordinal,
},
})
}
pub fn serialize<W: std::io::Write>(&self, wrt: W) -> io::Result<()> {
// we can't use a plain writer as it would generate an index
let mut sstable_writer = IndexSSTable::delta_writer(wrt);
// in tests, set a smaller block size to stress-test
#[cfg(test)]
sstable_writer.set_block_len(16);
let mut previous_key = Vec::with_capacity(crate::DEFAULT_KEY_CAPACITY);
for block in self.index.blocks.iter() {
let keep_len = common_prefix_len(&previous_key, &block.last_key_or_greater);
sstable_writer.write_suffix(keep_len, &block.last_key_or_greater[keep_len..]);
sstable_writer.write_value(&block.block_addr);
sstable_writer.flush_block_if_required()?;
previous_key.clear();
previous_key.extend_from_slice(&block.last_key_or_greater);
}
sstable_writer.flush_block()?;
sstable_writer.finish().write_all(&0u32.to_le_bytes())?;
Ok(())
}
}
/// SSTable representing an index
///
/// `last_key_or_greater` is used as the key, the value contains the
/// length and first ordinal of each block. The start offset is implicitly
/// obtained from lengths.
struct IndexSSTable;
impl SSTable for IndexSSTable {
type Value = BlockAddr;
type ValueReader = crate::value::index::IndexValueReader;
type ValueWriter = crate::value::index::IndexValueWriter;
}
#[cfg(test)]
mod tests {
use common::OwnedBytes;
use super::{BlockAddr, SSTableIndex, SSTableIndexBuilder};
use crate::SSTableDataCorruption;
#[test]
fn test_sstable_index() {
let mut sstable_builder = SSTableIndexBuilder::default();
sstable_builder.add_block(b"aaa", 10..20, 0u64);
sstable_builder.add_block(b"bbbbbbb", 20..30, 5u64);
sstable_builder.add_block(b"ccc", 30..40, 10u64);
sstable_builder.add_block(b"dddd", 40..50, 15u64);
let mut buffer: Vec<u8> = Vec::new();
sstable_builder.serialize(&mut buffer).unwrap();
let buffer = OwnedBytes::new(buffer);
let sstable_index = SSTableIndex::load(buffer).unwrap();
assert_eq!(
sstable_index.get_block_with_key(b"bbbde"),
Some(BlockAddr {
first_ordinal: 10u64,
byte_range: 30..40
})
);
assert_eq!(sstable_index.locate_with_key(b"aa").unwrap(), 0);
assert_eq!(sstable_index.locate_with_key(b"aaa").unwrap(), 0);
assert_eq!(sstable_index.locate_with_key(b"aab").unwrap(), 1);
assert_eq!(sstable_index.locate_with_key(b"ccc").unwrap(), 2);
assert!(sstable_index.locate_with_key(b"e").is_none());
assert_eq!(sstable_index.locate_with_ord(0), 0);
assert_eq!(sstable_index.locate_with_ord(1), 0);
assert_eq!(sstable_index.locate_with_ord(4), 0);
assert_eq!(sstable_index.locate_with_ord(5), 1);
assert_eq!(sstable_index.locate_with_ord(100), 3);
}
#[test]
fn test_sstable_with_corrupted_data() {
let mut sstable_builder = SSTableIndexBuilder::default();
sstable_builder.add_block(b"aaa", 10..20, 0u64);
sstable_builder.add_block(b"bbbbbbb", 20..30, 5u64);
sstable_builder.add_block(b"ccc", 30..40, 10u64);
sstable_builder.add_block(b"dddd", 40..50, 15u64);
let mut buffer: Vec<u8> = Vec::new();
sstable_builder.serialize(&mut buffer).unwrap();
buffer[2] = 9u8;
let buffer = OwnedBytes::new(buffer);
let data_corruption_err = SSTableIndex::load(buffer).err().unwrap();
assert!(matches!(data_corruption_err, SSTableDataCorruption));
}
#[track_caller]
fn test_find_shorter_str_in_between_aux(left: &[u8], right: &[u8]) {
let mut left_buf = left.to_vec();
super::find_shorter_str_in_between(&mut left_buf, right);
assert!(left_buf.len() <= left.len());
assert!(left <= &left_buf);
assert!(&left_buf[..] < right);
}
#[test]
fn test_find_shorter_str_in_between() {
test_find_shorter_str_in_between_aux(b"", b"hello");
test_find_shorter_str_in_between_aux(b"abc", b"abcd");
test_find_shorter_str_in_between_aux(b"abcd", b"abd");
test_find_shorter_str_in_between_aux(&[0, 0, 0], &[1]);
test_find_shorter_str_in_between_aux(&[0, 0, 0], &[0, 0, 1]);
test_find_shorter_str_in_between_aux(&[0, 0, 255, 255, 255, 0u8], &[0, 1]);
}
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(100))]
#[test]
fn test_proptest_find_shorter_str(left in any::<Vec<u8>>(), right in any::<Vec<u8>>()) {
if left < right {
test_find_shorter_str_in_between_aux(&left, &right);
}
}
}
}

View File

@@ -0,0 +1,101 @@
use common::OwnedBytes;
use crate::{BlockAddr, SSTable, SSTableDataCorruption, TermOrdinal};
#[derive(Default, Debug, Clone)]
pub struct SSTableIndex {
blocks: Vec<BlockMeta>,
}
impl SSTableIndex {
/// Load an index from its binary representation
pub fn load(data: OwnedBytes) -> Result<SSTableIndex, SSTableDataCorruption> {
let mut reader = IndexSSTable::reader(data);
let mut blocks = Vec::new();
while reader.advance().map_err(|_| SSTableDataCorruption)? {
blocks.push(BlockMeta {
last_key_or_greater: reader.key().to_vec(),
block_addr: reader.value().clone(),
});
}
Ok(SSTableIndex { blocks })
}
/// Get the [`BlockAddr`] of the requested block.
pub(crate) fn get_block(&self, block_id: usize) -> Option<BlockAddr> {
self.blocks
.get(block_id)
.map(|block_meta| block_meta.block_addr.clone())
}
/// Get the block id of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub(crate) fn locate_with_key(&self, key: &[u8]) -> Option<usize> {
let pos = self
.blocks
.binary_search_by_key(&key, |block| &block.last_key_or_greater);
match pos {
Ok(pos) => Some(pos),
Err(pos) => {
if pos < self.blocks.len() {
Some(pos)
} else {
// after end of last block: no block matches
None
}
}
}
}
/// Get the [`BlockAddr`] of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub fn get_block_with_key(&self, key: &[u8]) -> Option<BlockAddr> {
self.locate_with_key(key).and_then(|id| self.get_block(id))
}
pub(crate) fn locate_with_ord(&self, ord: TermOrdinal) -> usize {
let pos = self
.blocks
.binary_search_by_key(&ord, |block| block.block_addr.first_ordinal);
match pos {
Ok(pos) => pos,
// Err(0) can't happen as the sstable starts with ordinal zero
Err(pos) => pos - 1,
}
}
/// Get the [`BlockAddr`] of the block containing the `ord`-th term.
pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> BlockAddr {
// locate_with_ord always returns an index within range
self.get_block(self.locate_with_ord(ord)).unwrap()
}
}
#[derive(Debug, Clone)]
pub(crate) struct BlockMeta {
/// Any byte string that is lexicographically greater or equal to
/// the last key in the block,
/// and yet strictly smaller than the first key in the next block.
pub last_key_or_greater: Vec<u8>,
pub block_addr: BlockAddr,
}
/// SSTable representing an index
///
/// `last_key_or_greater` is used as the key, the value contains the
/// length and first ordinal of each block. The start offset is implicitly
/// obtained from lengths.
struct IndexSSTable;
impl SSTable for IndexSSTable {
type Value = BlockAddr;
type ValueReader = crate::value::index::IndexValueReader;
type ValueWriter = crate::value::index::IndexValueWriter;
}

View File

@@ -0,0 +1,826 @@
use std::io::{self, Read, Write};
use std::ops::Range;
use std::sync::Arc;
use common::{BinarySerializable, FixedSize, OwnedBytes};
use tantivy_bitpacker::{compute_num_bits, BitPacker};
use tantivy_fst::raw::Fst;
use tantivy_fst::{IntoStreamer, Map, MapBuilder, Streamer};
use crate::{common_prefix_len, SSTableDataCorruption, TermOrdinal};
#[derive(Debug, Clone)]
pub enum SSTableIndex {
V2(crate::sstable_index_v2::SSTableIndex),
V3(SSTableIndexV3),
V3Empty(SSTableIndexV3Empty),
}
impl SSTableIndex {
/// Get the [`BlockAddr`] of the requested block.
pub(crate) fn get_block(&self, block_id: u64) -> Option<BlockAddr> {
match self {
SSTableIndex::V2(v2_index) => v2_index.get_block(block_id as usize),
SSTableIndex::V3(v3_index) => v3_index.get_block(block_id),
SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block(block_id),
}
}
/// Get the block id of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub(crate) fn locate_with_key(&self, key: &[u8]) -> Option<u64> {
match self {
SSTableIndex::V2(v2_index) => v2_index.locate_with_key(key).map(|i| i as u64),
SSTableIndex::V3(v3_index) => v3_index.locate_with_key(key),
SSTableIndex::V3Empty(v3_empty) => v3_empty.locate_with_key(key),
}
}
/// Get the [`BlockAddr`] of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub fn get_block_with_key(&self, key: &[u8]) -> Option<BlockAddr> {
match self {
SSTableIndex::V2(v2_index) => v2_index.get_block_with_key(key),
SSTableIndex::V3(v3_index) => v3_index.get_block_with_key(key),
SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block_with_key(key),
}
}
pub(crate) fn locate_with_ord(&self, ord: TermOrdinal) -> u64 {
match self {
SSTableIndex::V2(v2_index) => v2_index.locate_with_ord(ord) as u64,
SSTableIndex::V3(v3_index) => v3_index.locate_with_ord(ord),
SSTableIndex::V3Empty(v3_empty) => v3_empty.locate_with_ord(ord),
}
}
/// Get the [`BlockAddr`] of the block containing the `ord`-th term.
pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> BlockAddr {
match self {
SSTableIndex::V2(v2_index) => v2_index.get_block_with_ord(ord),
SSTableIndex::V3(v3_index) => v3_index.get_block_with_ord(ord),
SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block_with_ord(ord),
}
}
}
#[derive(Debug, Clone)]
pub struct SSTableIndexV3 {
fst_index: Arc<Map<OwnedBytes>>,
block_addr_store: BlockAddrStore,
}
impl SSTableIndexV3 {
/// Load an index from its binary representation
pub fn load(
data: OwnedBytes,
fst_length: u64,
) -> Result<SSTableIndexV3, SSTableDataCorruption> {
let (fst_slice, block_addr_store_slice) = data.split(fst_length as usize);
let fst_index = Fst::new(fst_slice)
.map_err(|_| SSTableDataCorruption)?
.into();
let block_addr_store =
BlockAddrStore::open(block_addr_store_slice).map_err(|_| SSTableDataCorruption)?;
Ok(SSTableIndexV3 {
fst_index: Arc::new(fst_index),
block_addr_store,
})
}
/// Get the [`BlockAddr`] of the requested block.
pub(crate) fn get_block(&self, block_id: u64) -> Option<BlockAddr> {
self.block_addr_store.get(block_id)
}
/// Get the block id of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub(crate) fn locate_with_key(&self, key: &[u8]) -> Option<u64> {
self.fst_index
.range()
.ge(key)
.into_stream()
.next()
.map(|(_key, id)| id)
}
/// Get the [`BlockAddr`] of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub fn get_block_with_key(&self, key: &[u8]) -> Option<BlockAddr> {
self.locate_with_key(key).and_then(|id| self.get_block(id))
}
pub(crate) fn locate_with_ord(&self, ord: TermOrdinal) -> u64 {
self.block_addr_store.binary_search_ord(ord).0
}
/// Get the [`BlockAddr`] of the block containing the `ord`-th term.
pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> BlockAddr {
self.block_addr_store.binary_search_ord(ord).1
}
}
#[derive(Debug, Clone)]
pub struct SSTableIndexV3Empty {
block_addr: BlockAddr,
}
impl SSTableIndexV3Empty {
pub fn load(index_start_pos: usize) -> SSTableIndexV3Empty {
SSTableIndexV3Empty {
block_addr: BlockAddr {
first_ordinal: 0,
byte_range: 0..index_start_pos,
},
}
}
/// Get the [`BlockAddr`] of the requested block.
pub(crate) fn get_block(&self, _block_id: u64) -> Option<BlockAddr> {
Some(self.block_addr.clone())
}
/// Get the block id of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub(crate) fn locate_with_key(&self, _key: &[u8]) -> Option<u64> {
Some(0)
}
/// Get the [`BlockAddr`] of the block that would contain `key`.
///
/// Returns None if `key` is lexicographically after the last key recorded.
pub fn get_block_with_key(&self, _key: &[u8]) -> Option<BlockAddr> {
Some(self.block_addr.clone())
}
pub(crate) fn locate_with_ord(&self, _ord: TermOrdinal) -> u64 {
0
}
/// Get the [`BlockAddr`] of the block containing the `ord`-th term.
pub(crate) fn get_block_with_ord(&self, _ord: TermOrdinal) -> BlockAddr {
self.block_addr.clone()
}
}
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct BlockAddr {
pub first_ordinal: u64,
pub byte_range: Range<usize>,
}
impl BlockAddr {
fn to_block_start(&self) -> BlockStartAddr {
BlockStartAddr {
first_ordinal: self.first_ordinal,
byte_range_start: self.byte_range.start,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct BlockStartAddr {
first_ordinal: u64,
byte_range_start: usize,
}
impl BlockStartAddr {
fn to_block_addr(&self, byte_range_end: usize) -> BlockAddr {
BlockAddr {
first_ordinal: self.first_ordinal,
byte_range: self.byte_range_start..byte_range_end,
}
}
}
#[derive(Debug, Clone)]
pub(crate) struct BlockMeta {
/// Any byte string that is lexicographically greater or equal to
/// the last key in the block,
/// and yet strictly smaller than the first key in the next block.
pub last_key_or_greater: Vec<u8>,
pub block_addr: BlockAddr,
}
impl BinarySerializable for BlockStartAddr {
fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
let start = self.byte_range_start as u64;
start.serialize(writer)?;
self.first_ordinal.serialize(writer)
}
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
let byte_range_start = u64::deserialize(reader)? as usize;
let first_ordinal = u64::deserialize(reader)?;
Ok(BlockStartAddr {
first_ordinal,
byte_range_start,
})
}
// Provided method
fn num_bytes(&self) -> u64 {
BlockStartAddr::SIZE_IN_BYTES as u64
}
}
impl FixedSize for BlockStartAddr {
const SIZE_IN_BYTES: usize = 2 * u64::SIZE_IN_BYTES;
}
/// Given that left < right,
/// mutates `left into a shorter byte string left'` that
/// matches `left <= left' < right`.
fn find_shorter_str_in_between(left: &mut Vec<u8>, right: &[u8]) {
assert!(&left[..] < right);
let common_len = common_prefix_len(left, right);
if left.len() == common_len {
return;
}
// It is possible to do one character shorter in some case,
// but it is not worth the extra complexity
for pos in (common_len + 1)..left.len() {
if left[pos] != u8::MAX {
left[pos] += 1;
left.truncate(pos + 1);
return;
}
}
}
#[derive(Default)]
pub struct SSTableIndexBuilder {
blocks: Vec<BlockMeta>,
}
impl SSTableIndexBuilder {
/// In order to make the index as light as possible, we
/// try to find a shorter alternative to the last key of the last block
/// that is still smaller than the next key.
pub(crate) fn shorten_last_block_key_given_next_key(&mut self, next_key: &[u8]) {
if let Some(last_block) = self.blocks.last_mut() {
find_shorter_str_in_between(&mut last_block.last_key_or_greater, next_key);
}
}
pub fn add_block(&mut self, last_key: &[u8], byte_range: Range<usize>, first_ordinal: u64) {
self.blocks.push(BlockMeta {
last_key_or_greater: last_key.to_vec(),
block_addr: BlockAddr {
byte_range,
first_ordinal,
},
})
}
pub fn serialize<W: std::io::Write>(&self, wrt: W) -> io::Result<u64> {
if self.blocks.len() <= 1 {
return Ok(0);
}
let counting_writer = common::CountingWriter::wrap(wrt);
let mut map_builder = MapBuilder::new(counting_writer).map_err(fst_error_to_io_error)?;
for (i, block) in self.blocks.iter().enumerate() {
map_builder
.insert(&block.last_key_or_greater, i as u64)
.map_err(fst_error_to_io_error)?;
}
let counting_writer = map_builder.into_inner().map_err(fst_error_to_io_error)?;
let written_bytes = counting_writer.written_bytes();
let mut wrt = counting_writer.finish();
let mut block_store_writer = BlockAddrStoreWriter::new();
for block in &self.blocks {
block_store_writer.write_block_meta(block.block_addr.clone())?;
}
block_store_writer.serialize(&mut wrt)?;
Ok(written_bytes)
}
}
fn fst_error_to_io_error(error: tantivy_fst::Error) -> io::Error {
match error {
tantivy_fst::Error::Fst(fst_error) => io::Error::new(io::ErrorKind::Other, fst_error),
tantivy_fst::Error::Io(ioerror) => ioerror,
}
}
const STORE_BLOCK_LEN: usize = 128;
#[derive(Debug)]
struct BlockAddrBlockMetadata {
offset: u64,
ref_block_addr: BlockStartAddr,
range_start_slope: u32,
first_ordinal_slope: u32,
range_start_nbits: u8,
first_ordinal_nbits: u8,
block_len: u16,
// these fields are computed on deserialization, and not stored
range_shift: i64,
ordinal_shift: i64,
}
impl BlockAddrBlockMetadata {
fn num_bits(&self) -> u8 {
self.first_ordinal_nbits + self.range_start_nbits
}
fn deserialize_block_addr(&self, data: &[u8], inner_offset: usize) -> Option<BlockAddr> {
if inner_offset == 0 {
let range_end = self.ref_block_addr.byte_range_start
+ extract_bits(data, 0, self.range_start_nbits) as usize
+ self.range_start_slope as usize
- self.range_shift as usize;
return Some(self.ref_block_addr.to_block_addr(range_end));
}
let inner_offset = inner_offset - 1;
if inner_offset >= self.block_len as usize {
return None;
}
let num_bits = self.num_bits() as usize;
let range_start_addr = num_bits * inner_offset;
let ordinal_addr = range_start_addr + self.range_start_nbits as usize;
let range_end_addr = range_start_addr + num_bits;
if (range_end_addr + self.range_start_nbits as usize + 7) / 8 > data.len() {
return None;
}
let range_start = self.ref_block_addr.byte_range_start
+ extract_bits(data, range_start_addr, self.range_start_nbits) as usize
+ self.range_start_slope as usize * (inner_offset + 1)
- self.range_shift as usize;
let first_ordinal = self.ref_block_addr.first_ordinal
+ extract_bits(data, ordinal_addr, self.first_ordinal_nbits)
+ self.first_ordinal_slope as u64 * (inner_offset + 1) as u64
- self.ordinal_shift as u64;
let range_end = self.ref_block_addr.byte_range_start
+ extract_bits(data, range_end_addr, self.range_start_nbits) as usize
+ self.range_start_slope as usize * (inner_offset + 2)
- self.range_shift as usize;
Some(BlockAddr {
first_ordinal,
byte_range: range_start..range_end,
})
}
fn bisect_for_ord(&self, data: &[u8], target_ord: TermOrdinal) -> (u64, BlockAddr) {
let inner_target_ord = target_ord - self.ref_block_addr.first_ordinal;
let num_bits = self.num_bits() as usize;
let range_start_nbits = self.range_start_nbits as usize;
let get_ord = |index| {
extract_bits(
data,
num_bits * index as usize + range_start_nbits,
self.first_ordinal_nbits,
) + self.first_ordinal_slope as u64 * (index + 1)
- self.ordinal_shift as u64
};
let inner_offset = match binary_search(self.block_len as u64, |index| {
get_ord(index).cmp(&inner_target_ord)
}) {
Ok(inner_offset) => inner_offset + 1,
Err(inner_offset) => inner_offset,
};
// we can unwrap because inner_offset <= self.block_len
(
inner_offset,
self.deserialize_block_addr(data, inner_offset as usize)
.unwrap(),
)
}
}
// TODO move this function to tantivy_common?
#[inline(always)]
fn extract_bits(data: &[u8], addr_bits: usize, num_bits: u8) -> u64 {
assert!(num_bits <= 56);
let addr_byte = addr_bits / 8;
let bit_shift = (addr_bits % 8) as u64;
let val_unshifted_unmasked: u64 = if data.len() >= addr_byte + 8 {
let b = data[addr_byte..addr_byte + 8].try_into().unwrap();
u64::from_le_bytes(b)
} else {
// the buffer is not large enough.
// Let's copy the few remaining bytes to a 8 byte buffer
// padded with 0s.
let mut buf = [0u8; 8];
let data_to_copy = &data[addr_byte..];
let nbytes = data_to_copy.len();
buf[..nbytes].copy_from_slice(data_to_copy);
u64::from_le_bytes(buf)
};
let val_shifted_unmasked = val_unshifted_unmasked >> bit_shift;
let mask = (1u64 << u64::from(num_bits)) - 1;
val_shifted_unmasked & mask
}
impl BinarySerializable for BlockAddrBlockMetadata {
fn serialize<W: Write + ?Sized>(&self, write: &mut W) -> io::Result<()> {
self.offset.serialize(write)?;
self.ref_block_addr.serialize(write)?;
self.range_start_slope.serialize(write)?;
self.first_ordinal_slope.serialize(write)?;
write.write_all(&[self.first_ordinal_nbits, self.range_start_nbits])?;
self.block_len.serialize(write)?;
self.num_bits();
Ok(())
}
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
let offset = u64::deserialize(reader)?;
let ref_block_addr = BlockStartAddr::deserialize(reader)?;
let range_start_slope = u32::deserialize(reader)?;
let first_ordinal_slope = u32::deserialize(reader)?;
let mut buffer = [0u8; 2];
reader.read_exact(&mut buffer)?;
let first_ordinal_nbits = buffer[0];
let range_start_nbits = buffer[1];
let block_len = u16::deserialize(reader)?;
Ok(BlockAddrBlockMetadata {
offset,
ref_block_addr,
range_start_slope,
first_ordinal_slope,
range_start_nbits,
first_ordinal_nbits,
block_len,
range_shift: 1 << (range_start_nbits - 1),
ordinal_shift: 1 << (first_ordinal_nbits - 1),
})
}
}
impl FixedSize for BlockAddrBlockMetadata {
const SIZE_IN_BYTES: usize = u64::SIZE_IN_BYTES
+ BlockStartAddr::SIZE_IN_BYTES
+ 2 * u32::SIZE_IN_BYTES
+ 2 * u8::SIZE_IN_BYTES
+ u16::SIZE_IN_BYTES;
}
#[derive(Debug, Clone)]
struct BlockAddrStore {
block_meta_bytes: OwnedBytes,
addr_bytes: OwnedBytes,
}
impl BlockAddrStore {
fn open(term_info_store_file: OwnedBytes) -> io::Result<BlockAddrStore> {
let (mut len_slice, main_slice) = term_info_store_file.split(8);
let len = u64::deserialize(&mut len_slice)? as usize;
let (block_meta_bytes, addr_bytes) = main_slice.split(len);
Ok(BlockAddrStore {
block_meta_bytes,
addr_bytes,
})
}
fn get_block_meta(&self, store_block_id: usize) -> Option<BlockAddrBlockMetadata> {
let mut block_data: &[u8] = self
.block_meta_bytes
.get(store_block_id * BlockAddrBlockMetadata::SIZE_IN_BYTES..)?;
BlockAddrBlockMetadata::deserialize(&mut block_data).ok()
}
fn get(&self, block_id: u64) -> Option<BlockAddr> {
let store_block_id = (block_id as usize) / STORE_BLOCK_LEN;
let inner_offset = (block_id as usize) % STORE_BLOCK_LEN;
let block_addr_block_data = self.get_block_meta(store_block_id)?;
block_addr_block_data.deserialize_block_addr(
&self.addr_bytes[block_addr_block_data.offset as usize..],
inner_offset,
)
}
fn binary_search_ord(&self, ord: TermOrdinal) -> (u64, BlockAddr) {
let max_block =
(self.block_meta_bytes.len() / BlockAddrBlockMetadata::SIZE_IN_BYTES) as u64;
let get_first_ordinal = |block_id| {
// we can unwrap because block_id < max_block
self.get(block_id * STORE_BLOCK_LEN as u64)
.unwrap()
.first_ordinal
};
let store_block_id =
binary_search(max_block, |block_id| get_first_ordinal(block_id).cmp(&ord));
let store_block_id = match store_block_id {
Ok(store_block_id) => {
let block_id = store_block_id * STORE_BLOCK_LEN as u64;
// we can unwrap because store_block_id < max_block
return (block_id, self.get(block_id).unwrap());
}
Err(store_block_id) => store_block_id - 1,
};
// we can unwrap because store_block_id < max_block
let block_addr_block_data = self.get_block_meta(store_block_id as usize).unwrap();
let (inner_offset, block_addr) = block_addr_block_data.bisect_for_ord(
&self.addr_bytes[block_addr_block_data.offset as usize..],
ord,
);
(
store_block_id * STORE_BLOCK_LEN as u64 + inner_offset,
block_addr,
)
}
}
fn binary_search(max: u64, cmp_fn: impl Fn(u64) -> std::cmp::Ordering) -> Result<u64, u64> {
use std::cmp::Ordering::*;
let mut size = max;
let mut left = 0;
let mut right = size;
while left < right {
let mid = left + size / 2;
let cmp = cmp_fn(mid);
if cmp == Less {
left = mid + 1;
} else if cmp == Greater {
right = mid;
} else {
return Ok(mid);
}
size = right - left;
}
Err(left)
}
struct BlockAddrStoreWriter {
buffer_block_metas: Vec<u8>,
buffer_addrs: Vec<u8>,
block_addrs: Vec<BlockAddr>,
}
impl BlockAddrStoreWriter {
fn new() -> Self {
BlockAddrStoreWriter {
buffer_block_metas: Vec::new(),
buffer_addrs: Vec::new(),
block_addrs: Vec::with_capacity(STORE_BLOCK_LEN),
}
}
fn flush_block(&mut self) -> io::Result<()> {
if self.block_addrs.is_empty() {
return Ok(());
}
let ref_block_addr = self.block_addrs[0].clone();
for block_addr in &mut self.block_addrs {
block_addr.byte_range.start -= ref_block_addr.byte_range.start;
block_addr.first_ordinal -= ref_block_addr.first_ordinal;
}
// we are only called if block_addrs is not empty
let mut last_block_addr = self.block_addrs.last().unwrap().clone();
last_block_addr.byte_range.end -= ref_block_addr.byte_range.start;
// we skip(1), so we never give an index of 0 to find_best_slope
let (range_start_slope, range_start_nbits) = find_best_slope(
self.block_addrs
.iter()
.map(|block| block.byte_range.start as u64)
.chain(std::iter::once(last_block_addr.byte_range.end as u64))
.enumerate()
.skip(1),
);
// we skip(1), so we never give an index of 0 to find_best_slope
let (first_ordinal_slope, first_ordinal_nbits) = find_best_slope(
self.block_addrs
.iter()
.map(|block| block.first_ordinal)
.enumerate()
.skip(1),
);
let range_shift = 1 << (range_start_nbits - 1);
let ordinal_shift = 1 << (first_ordinal_nbits - 1);
let block_addr_block_meta = BlockAddrBlockMetadata {
offset: self.buffer_addrs.len() as u64,
ref_block_addr: ref_block_addr.to_block_start(),
range_start_slope,
first_ordinal_slope,
range_start_nbits,
first_ordinal_nbits,
block_len: self.block_addrs.len() as u16 - 1,
range_shift,
ordinal_shift,
};
block_addr_block_meta.serialize(&mut self.buffer_block_metas)?;
let mut bit_packer = BitPacker::new();
for (i, block_addr) in self.block_addrs.iter().enumerate().skip(1) {
let range_pred = (range_start_slope as usize * i) as i64;
bit_packer.write(
(block_addr.byte_range.start as i64 - range_pred + range_shift) as u64,
range_start_nbits,
&mut self.buffer_addrs,
)?;
let first_ordinal_pred = (first_ordinal_slope as u64 * i as u64) as i64;
bit_packer.write(
(block_addr.first_ordinal as i64 - first_ordinal_pred + ordinal_shift) as u64,
first_ordinal_nbits,
&mut self.buffer_addrs,
)?;
}
let range_pred = (range_start_slope as usize * self.block_addrs.len()) as i64;
bit_packer.write(
(last_block_addr.byte_range.end as i64 - range_pred + range_shift) as u64,
range_start_nbits,
&mut self.buffer_addrs,
)?;
bit_packer.flush(&mut self.buffer_addrs)?;
self.block_addrs.clear();
Ok(())
}
fn write_block_meta(&mut self, block_addr: BlockAddr) -> io::Result<()> {
self.block_addrs.push(block_addr);
if self.block_addrs.len() >= STORE_BLOCK_LEN {
self.flush_block()?;
}
Ok(())
}
fn serialize<W: std::io::Write>(&mut self, wrt: &mut W) -> io::Result<()> {
self.flush_block()?;
let len = self.buffer_block_metas.len() as u64;
len.serialize(wrt)?;
wrt.write_all(&self.buffer_block_metas)?;
wrt.write_all(&self.buffer_addrs)?;
Ok(())
}
}
/// Given an iterator over (index, value), returns the slope, and number of bits needed to
/// represente the error to a prediction made by this slope.
///
/// The iterator may be empty, but all indexes in it must be non-zero.
fn find_best_slope(elements: impl Iterator<Item = (usize, u64)> + Clone) -> (u32, u8) {
let slope_iterator = elements.clone();
let derivation_iterator = elements;
let mut min_slope_idx = 1;
let mut min_slope_val = 0;
let mut min_slope = u32::MAX;
let mut max_slope_idx = 1;
let mut max_slope_val = 0;
let mut max_slope = 0;
for (index, value) in slope_iterator {
let slope = (value / index as u64) as u32;
if slope <= min_slope {
min_slope = slope;
min_slope_idx = index;
min_slope_val = value;
}
if slope >= max_slope {
max_slope = slope;
max_slope_idx = index;
max_slope_val = value;
}
}
// above is an heuristic giving the "highest" and "lowest" point. It's imperfect in that in that
// a point that appear earlier might have a high slope derivation, but a smaller absolute
// derivation than a latter point.
// The actual best values can be obtained by using the symplex method, but the improvement is
// likely minimal, and computation is way more complexe.
//
// Assuming these point are the furthest up and down, we find the slope that would cause the
// same positive derivation for the highest as negative derivation for the lowest.
// A is the optimal slope. B is the derivation to the guess
//
// 0 = min_slope_val - min_slope_idx * A - B
// 0 = max_slope_val - max_slope_idx * A + B
//
// 0 = min_slope_val + max_slope_val - (min_slope_idx + max_slope_idx) * A
// (min_slope_val + max_slope_val) / (min_slope_idx + max_slope_idx) = A
//
// we actually add some correcting factor to have proper rounding, not truncation.
let denominator = (min_slope_idx + max_slope_idx) as u64;
let final_slope = ((min_slope_val + max_slope_val + denominator / 2) / denominator) as u32;
// we don't solve for B because our choice of point is suboptimal, so it's actually a lower
// bound and we need to iterate to find the actual worst value.
let max_derivation: u64 = derivation_iterator
.map(|(index, value)| (value as i64 - final_slope as i64 * index as i64).unsigned_abs())
.max()
.unwrap_or(0);
(final_slope, compute_num_bits(max_derivation) + 1)
}
#[cfg(test)]
mod tests {
use common::OwnedBytes;
use super::{BlockAddr, SSTableIndexBuilder, SSTableIndexV3};
use crate::SSTableDataCorruption;
#[test]
fn test_sstable_index() {
let mut sstable_builder = SSTableIndexBuilder::default();
sstable_builder.add_block(b"aaa", 10..20, 0u64);
sstable_builder.add_block(b"bbbbbbb", 20..30, 5u64);
sstable_builder.add_block(b"ccc", 30..40, 10u64);
sstable_builder.add_block(b"dddd", 40..50, 15u64);
let mut buffer: Vec<u8> = Vec::new();
let fst_len = sstable_builder.serialize(&mut buffer).unwrap();
let buffer = OwnedBytes::new(buffer);
let sstable_index = SSTableIndexV3::load(buffer, fst_len).unwrap();
assert_eq!(
sstable_index.get_block_with_key(b"bbbde"),
Some(BlockAddr {
first_ordinal: 10u64,
byte_range: 30..40
})
);
assert_eq!(sstable_index.locate_with_key(b"aa").unwrap(), 0);
assert_eq!(sstable_index.locate_with_key(b"aaa").unwrap(), 0);
assert_eq!(sstable_index.locate_with_key(b"aab").unwrap(), 1);
assert_eq!(sstable_index.locate_with_key(b"ccc").unwrap(), 2);
assert!(sstable_index.locate_with_key(b"e").is_none());
assert_eq!(sstable_index.locate_with_ord(0), 0);
assert_eq!(sstable_index.locate_with_ord(1), 0);
assert_eq!(sstable_index.locate_with_ord(4), 0);
assert_eq!(sstable_index.locate_with_ord(5), 1);
assert_eq!(sstable_index.locate_with_ord(100), 3);
}
#[test]
fn test_sstable_with_corrupted_data() {
let mut sstable_builder = SSTableIndexBuilder::default();
sstable_builder.add_block(b"aaa", 10..20, 0u64);
sstable_builder.add_block(b"bbbbbbb", 20..30, 5u64);
sstable_builder.add_block(b"ccc", 30..40, 10u64);
sstable_builder.add_block(b"dddd", 40..50, 15u64);
let mut buffer: Vec<u8> = Vec::new();
let fst_len = sstable_builder.serialize(&mut buffer).unwrap();
buffer[2] = 9u8;
let buffer = OwnedBytes::new(buffer);
let data_corruption_err = SSTableIndexV3::load(buffer, fst_len).err().unwrap();
assert!(matches!(data_corruption_err, SSTableDataCorruption));
}
#[track_caller]
fn test_find_shorter_str_in_between_aux(left: &[u8], right: &[u8]) {
let mut left_buf = left.to_vec();
super::find_shorter_str_in_between(&mut left_buf, right);
assert!(left_buf.len() <= left.len());
assert!(left <= &left_buf);
assert!(&left_buf[..] < right);
}
#[test]
fn test_find_shorter_str_in_between() {
test_find_shorter_str_in_between_aux(b"", b"hello");
test_find_shorter_str_in_between_aux(b"abc", b"abcd");
test_find_shorter_str_in_between_aux(b"abcd", b"abd");
test_find_shorter_str_in_between_aux(&[0, 0, 0], &[1]);
test_find_shorter_str_in_between_aux(&[0, 0, 0], &[0, 0, 1]);
test_find_shorter_str_in_between_aux(&[0, 0, 255, 255, 255, 0u8], &[0, 1]);
}
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(100))]
#[test]
fn test_proptest_find_shorter_str(left in any::<Vec<u8>>(), right in any::<Vec<u8>>()) {
if left < right {
test_find_shorter_str_in_between_aux(&left, &right);
}
}
}
#[test]
fn test_find_best_slop() {
assert_eq!(super::find_best_slope(std::iter::empty()), (0, 1));
assert_eq!(
super::find_best_slope(std::iter::once((1, 12345))),
(12345, 1)
);
}
}

View File

@@ -1,51 +1,5 @@
use std::iter::{Cloned, Filter};
use std::mem;
use super::{Addr, MemoryArena}; use super::{Addr, MemoryArena};
use crate::fastcpy::fast_short_slice_copy; use crate::shared_arena_hashmap::SharedArenaHashMap;
use crate::memory_arena::store;
/// Returns the actual memory size in bytes
/// required to create a table with a given capacity.
/// required to create a table of size
pub fn compute_table_memory_size(capacity: usize) -> usize {
capacity * mem::size_of::<KeyValue>()
}
#[cfg(not(feature = "compare_hash_only"))]
type HashType = u32;
#[cfg(feature = "compare_hash_only")]
type HashType = u64;
/// `KeyValue` is the item stored in the hash table.
/// The key is actually a `BytesRef` object stored in an external memory arena.
/// The `value_addr` also points to an address in the memory arena.
#[derive(Copy, Clone)]
struct KeyValue {
pub(crate) key_value_addr: Addr,
hash: HashType,
}
impl Default for KeyValue {
fn default() -> Self {
KeyValue {
key_value_addr: Addr::null_pointer(),
hash: 0,
}
}
}
impl KeyValue {
#[inline]
fn is_empty(&self) -> bool {
self.key_value_addr.is_null()
}
#[inline]
fn is_not_empty_ref(&self) -> bool {
!self.key_value_addr.is_null()
}
}
/// Customized `HashMap` with `&[u8]` keys /// Customized `HashMap` with `&[u8]` keys
/// ///
@@ -56,61 +10,13 @@ impl KeyValue {
/// The quirky API has the benefit of avoiding /// The quirky API has the benefit of avoiding
/// the computation of the hash of the key twice, /// the computation of the hash of the key twice,
/// or copying the key as long as there is no insert. /// or copying the key as long as there is no insert.
pub struct ArenaHashMap {
table: Vec<KeyValue>,
pub memory_arena: MemoryArena,
mask: usize,
len: usize,
}
struct LinearProbing {
pos: usize,
mask: usize,
}
impl LinearProbing {
#[inline]
fn compute(hash: HashType, mask: usize) -> LinearProbing {
LinearProbing {
pos: hash as usize,
mask,
}
}
#[inline]
fn next_probe(&mut self) -> usize {
// Not saving the masked version removes a dependency.
self.pos = self.pos.wrapping_add(1);
self.pos & self.mask
}
}
type IterNonEmpty<'a> = Filter<Cloned<std::slice::Iter<'a, KeyValue>>, fn(&KeyValue) -> bool>;
pub struct Iter<'a> {
hashmap: &'a ArenaHashMap,
inner: IterNonEmpty<'a>,
}
impl<'a> Iterator for Iter<'a> {
type Item = (&'a [u8], Addr);
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(move |kv| {
let (key, offset): (&'a [u8], Addr) = self.hashmap.get_key_value(kv.key_value_addr);
(key, offset)
})
}
}
/// Returns the greatest power of two lower or equal to `n`.
/// Except if n == 0, in that case, return 1.
/// ///
/// # Panics if n == 0 /// ArenaHashMap is like SharedArenaHashMap but takes ownership
fn compute_previous_power_of_two(n: usize) -> usize { /// of the memory arena. The memory arena stores the serialized
assert!(n > 0); /// keys and values.
let msb = (63u32 - (n as u64).leading_zeros()) as u8; pub struct ArenaHashMap {
1 << msb shared_arena_hashmap: SharedArenaHashMap,
pub memory_arena: MemoryArena,
} }
impl Default for ArenaHashMap { impl Default for ArenaHashMap {
@@ -121,156 +27,44 @@ impl Default for ArenaHashMap {
impl ArenaHashMap { impl ArenaHashMap {
pub fn with_capacity(table_size: usize) -> ArenaHashMap { pub fn with_capacity(table_size: usize) -> ArenaHashMap {
let table_size_power_of_2 = compute_previous_power_of_two(table_size);
let memory_arena = MemoryArena::default(); let memory_arena = MemoryArena::default();
let table = vec![KeyValue::default(); table_size_power_of_2];
ArenaHashMap { ArenaHashMap {
table, shared_arena_hashmap: SharedArenaHashMap::with_capacity(table_size),
memory_arena, memory_arena,
mask: table_size_power_of_2 - 1,
len: 0,
} }
} }
#[inline]
#[cfg(not(feature = "compare_hash_only"))]
fn get_hash(&self, key: &[u8]) -> HashType {
murmurhash32::murmurhash2(key)
}
#[inline]
#[cfg(feature = "compare_hash_only")]
fn get_hash(&self, key: &[u8]) -> HashType {
/// Since we compare only the hash we need a high quality hash.
use std::hash::Hasher;
let mut hasher = ahash::AHasher::default();
hasher.write(key);
hasher.finish() as HashType
}
#[inline] #[inline]
pub fn read<Item: Copy + 'static>(&self, addr: Addr) -> Item { pub fn read<Item: Copy + 'static>(&self, addr: Addr) -> Item {
self.memory_arena.read(addr) self.memory_arena.read(addr)
} }
#[inline]
fn probe(&self, hash: HashType) -> LinearProbing {
LinearProbing::compute(hash, self.mask)
}
#[inline] #[inline]
pub fn mem_usage(&self) -> usize { pub fn mem_usage(&self) -> usize {
self.table.len() * mem::size_of::<KeyValue>() + self.memory_arena.mem_usage() self.shared_arena_hashmap.mem_usage() + self.memory_arena.mem_usage()
}
#[inline]
fn is_saturated(&self) -> bool {
self.table.len() <= self.len * 2
}
#[inline]
fn get_key_value(&self, addr: Addr) -> (&[u8], Addr) {
let data = self.memory_arena.slice_from(addr);
let key_bytes_len_bytes = unsafe { data.get_unchecked(..2) };
let key_bytes_len = u16::from_le_bytes(key_bytes_len_bytes.try_into().unwrap());
let key_bytes: &[u8] = unsafe { data.get_unchecked(2..2 + key_bytes_len as usize) };
(key_bytes, addr.offset(2 + key_bytes_len as u32))
}
#[inline]
#[cfg(not(feature = "compare_hash_only"))]
fn get_value_addr_if_key_match(&self, target_key: &[u8], addr: Addr) -> Option<Addr> {
use crate::fastcmp::fast_short_slice_compare;
let (stored_key, value_addr) = self.get_key_value(addr);
if fast_short_slice_compare(stored_key, target_key) {
Some(value_addr)
} else {
None
}
}
#[inline]
#[cfg(feature = "compare_hash_only")]
fn get_value_addr_if_key_match(&self, _target_key: &[u8], addr: Addr) -> Option<Addr> {
// For the compare_hash_only feature, it would make sense to store the keys at a different
// memory location. Here they will just pollute the cache.
let data = self.memory_arena.slice_from(addr);
let key_bytes_len_bytes = &data[..2];
let key_bytes_len = u16::from_le_bytes(key_bytes_len_bytes.try_into().unwrap());
let value_addr = addr.offset(2 + key_bytes_len as u32);
Some(value_addr)
}
#[inline]
fn set_bucket(&mut self, hash: HashType, key_value_addr: Addr, bucket: usize) {
self.len += 1;
self.table[bucket] = KeyValue {
key_value_addr,
hash,
};
} }
#[inline] #[inline]
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
self.len() == 0 self.shared_arena_hashmap.is_empty()
} }
#[inline] #[inline]
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.len self.shared_arena_hashmap.len()
} }
#[inline] #[inline]
pub fn iter(&self) -> Iter<'_> { pub fn iter(&self) -> impl Iterator<Item = (&[u8], Addr)> {
Iter { self.shared_arena_hashmap.iter(&self.memory_arena)
inner: self
.table
.iter()
.cloned()
.filter(KeyValue::is_not_empty_ref),
hashmap: self,
}
}
fn resize(&mut self) {
let new_len = (self.table.len() * 2).max(1 << 13);
let mask = new_len - 1;
self.mask = mask;
let new_table = vec![KeyValue::default(); new_len];
let old_table = mem::replace(&mut self.table, new_table);
for key_value in old_table.into_iter().filter(KeyValue::is_not_empty_ref) {
let mut probe = LinearProbing::compute(key_value.hash, mask);
loop {
let bucket = probe.next_probe();
if self.table[bucket].is_empty() {
self.table[bucket] = key_value;
break;
}
}
}
} }
/// Get a value associated to a key. /// Get a value associated to a key.
#[inline] #[inline]
pub fn get<V>(&self, key: &[u8]) -> Option<V> pub fn get<V>(&self, key: &[u8]) -> Option<V>
where V: Copy + 'static { where V: Copy + 'static {
let hash = self.get_hash(key); self.shared_arena_hashmap.get(key, &self.memory_arena)
let mut probe = self.probe(hash);
loop {
let bucket = probe.next_probe();
let kv: KeyValue = self.table[bucket];
if kv.is_empty() {
return None;
} else if kv.hash == hash {
if let Some(val_addr) = self.get_value_addr_if_key_match(key, kv.key_value_addr) {
let v = self.memory_arena.read(val_addr);
return Some(v);
}
}
}
} }
/// `update` create a new entry for a given key if it does not exist /// `update` create a new entry for a given key if it does not exist
@@ -284,45 +78,10 @@ impl ArenaHashMap {
/// If the key already as an associated value, then it will be passed /// If the key already as an associated value, then it will be passed
/// `Some(previous_value)`. /// `Some(previous_value)`.
#[inline] #[inline]
pub fn mutate_or_create<V>(&mut self, key: &[u8], mut updater: impl FnMut(Option<V>) -> V) pub fn mutate_or_create<V>(&mut self, key: &[u8], updater: impl FnMut(Option<V>) -> V)
where V: Copy + 'static { where V: Copy + 'static {
if self.is_saturated() { self.shared_arena_hashmap
self.resize(); .mutate_or_create(key, &mut self.memory_arena, updater);
}
let hash = self.get_hash(key);
let mut probe = self.probe(hash);
let mut bucket = probe.next_probe();
let mut kv: KeyValue = self.table[bucket];
loop {
if kv.is_empty() {
// The key does not exist yet.
let val = updater(None);
let num_bytes = std::mem::size_of::<u16>() + key.len() + std::mem::size_of::<V>();
let key_addr = self.memory_arena.allocate_space(num_bytes);
{
let data = self.memory_arena.slice_mut(key_addr, num_bytes);
let key_len_bytes: [u8; 2] = (key.len() as u16).to_le_bytes();
data[..2].copy_from_slice(&key_len_bytes);
let stop = 2 + key.len();
fast_short_slice_copy(key, &mut data[2..stop]);
store(&mut data[stop..], val);
}
self.set_bucket(hash, key_addr, bucket);
return;
}
if kv.hash == hash {
if let Some(val_addr) = self.get_value_addr_if_key_match(key, kv.key_value_addr) {
let v = self.memory_arena.read(val_addr);
let new_v = updater(Some(v));
self.memory_arena.write_at(val_addr, new_v);
return;
}
}
// This allows fetching the next bucket before the loop jmp
bucket = probe.next_probe();
kv = self.table[bucket];
}
} }
} }
@@ -331,7 +90,7 @@ mod tests {
use std::collections::HashMap; use std::collections::HashMap;
use super::{compute_previous_power_of_two, ArenaHashMap}; use super::ArenaHashMap;
#[test] #[test]
fn test_hash_map() { fn test_hash_map() {
@@ -362,14 +121,6 @@ mod tests {
assert_eq!(hash_map.get::<u32>(b"abc"), None); assert_eq!(hash_map.get::<u32>(b"abc"), None);
} }
#[test]
fn test_compute_previous_power_of_two() {
assert_eq!(compute_previous_power_of_two(8), 8);
assert_eq!(compute_previous_power_of_two(9), 8);
assert_eq!(compute_previous_power_of_two(7), 4);
assert_eq!(compute_previous_power_of_two(u64::MAX as usize), 1 << 63);
}
#[test] #[test]
fn test_many_terms() { fn test_many_terms() {
let mut terms: Vec<String> = (0..20_000).map(|val| val.to_string()).collect(); let mut terms: Vec<String> = (0..20_000).map(|val| val.to_string()).collect();

View File

@@ -9,10 +9,12 @@ mod expull;
mod fastcmp; mod fastcmp;
mod fastcpy; mod fastcpy;
mod memory_arena; mod memory_arena;
mod shared_arena_hashmap;
pub use self::arena_hashmap::{compute_table_memory_size, ArenaHashMap}; pub use self::arena_hashmap::ArenaHashMap;
pub use self::expull::ExpUnrolledLinkedList; pub use self::expull::ExpUnrolledLinkedList;
pub use self::memory_arena::{Addr, MemoryArena}; pub use self::memory_arena::{Addr, MemoryArena};
pub use self::shared_arena_hashmap::{compute_table_memory_size, SharedArenaHashMap};
/// When adding an element in a `ArenaHashMap`, we get a unique id associated to the given key. /// When adding an element in a `ArenaHashMap`, we get a unique id associated to the given key.
pub type UnorderedId = u32; pub type UnorderedId = u32;

View File

@@ -0,0 +1,420 @@
use std::iter::{Cloned, Filter};
use std::mem;
use super::{Addr, MemoryArena};
use crate::fastcpy::fast_short_slice_copy;
use crate::memory_arena::store;
/// Returns the actual memory size in bytes
/// required to create a table with a given capacity.
/// required to create a table of size
pub fn compute_table_memory_size(capacity: usize) -> usize {
capacity * mem::size_of::<KeyValue>()
}
#[cfg(not(feature = "compare_hash_only"))]
type HashType = u32;
#[cfg(feature = "compare_hash_only")]
type HashType = u64;
/// `KeyValue` is the item stored in the hash table.
/// The key is actually a `BytesRef` object stored in an external memory arena.
/// The `value_addr` also points to an address in the memory arena.
#[derive(Copy, Clone)]
struct KeyValue {
key_value_addr: Addr,
hash: HashType,
}
impl Default for KeyValue {
fn default() -> Self {
KeyValue {
key_value_addr: Addr::null_pointer(),
hash: 0,
}
}
}
impl KeyValue {
#[inline]
fn is_empty(&self) -> bool {
self.key_value_addr.is_null()
}
#[inline]
fn is_not_empty_ref(&self) -> bool {
!self.key_value_addr.is_null()
}
}
/// Customized `HashMap` with `&[u8]` keys
///
/// Its main particularity is that rather than storing its
/// keys in the heap, keys are stored in a memory arena
/// inline with the values.
///
/// The quirky API has the benefit of avoiding
/// the computation of the hash of the key twice,
/// or copying the key as long as there is no insert.
///
/// SharedArenaHashMap is like ArenaHashMap but gets the memory arena
/// passed as an argument to the methods.
/// So one MemoryArena can be shared with multiple SharedArenaHashMap.
pub struct SharedArenaHashMap {
table: Vec<KeyValue>,
mask: usize,
len: usize,
}
struct LinearProbing {
pos: usize,
mask: usize,
}
impl LinearProbing {
#[inline]
fn compute(hash: HashType, mask: usize) -> LinearProbing {
LinearProbing {
pos: hash as usize,
mask,
}
}
#[inline]
fn next_probe(&mut self) -> usize {
// Not saving the masked version removes a dependency.
self.pos = self.pos.wrapping_add(1);
self.pos & self.mask
}
}
type IterNonEmpty<'a> = Filter<Cloned<std::slice::Iter<'a, KeyValue>>, fn(&KeyValue) -> bool>;
pub struct Iter<'a> {
hashmap: &'a SharedArenaHashMap,
memory_arena: &'a MemoryArena,
inner: IterNonEmpty<'a>,
}
impl<'a> Iterator for Iter<'a> {
type Item = (&'a [u8], Addr);
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(move |kv| {
let (key, offset): (&'a [u8], Addr) = self
.hashmap
.get_key_value(kv.key_value_addr, self.memory_arena);
(key, offset)
})
}
}
/// Returns the greatest power of two lower or equal to `n`.
/// Except if n == 0, in that case, return 1.
///
/// # Panics if n == 0
fn compute_previous_power_of_two(n: usize) -> usize {
assert!(n > 0);
let msb = (63u32 - (n as u64).leading_zeros()) as u8;
1 << msb
}
impl Default for SharedArenaHashMap {
fn default() -> Self {
SharedArenaHashMap::with_capacity(4)
}
}
impl SharedArenaHashMap {
pub fn with_capacity(table_size: usize) -> SharedArenaHashMap {
let table_size_power_of_2 = compute_previous_power_of_two(table_size);
let table = vec![KeyValue::default(); table_size_power_of_2];
SharedArenaHashMap {
table,
mask: table_size_power_of_2 - 1,
len: 0,
}
}
#[inline]
#[cfg(not(feature = "compare_hash_only"))]
fn get_hash(&self, key: &[u8]) -> HashType {
murmurhash32::murmurhash2(key)
}
#[inline]
#[cfg(feature = "compare_hash_only")]
fn get_hash(&self, key: &[u8]) -> HashType {
/// Since we compare only the hash we need a high quality hash.
use std::hash::Hasher;
let mut hasher = ahash::AHasher::default();
hasher.write(key);
hasher.finish() as HashType
}
#[inline]
fn probe(&self, hash: HashType) -> LinearProbing {
LinearProbing::compute(hash, self.mask)
}
#[inline]
pub fn mem_usage(&self) -> usize {
self.table.len() * mem::size_of::<KeyValue>()
}
#[inline]
fn is_saturated(&self) -> bool {
self.table.len() <= self.len * 2
}
#[inline]
fn get_key_value<'a>(&'a self, addr: Addr, memory_arena: &'a MemoryArena) -> (&[u8], Addr) {
let data = memory_arena.slice_from(addr);
let key_bytes_len_bytes = unsafe { data.get_unchecked(..2) };
let key_bytes_len = u16::from_le_bytes(key_bytes_len_bytes.try_into().unwrap());
let key_bytes: &[u8] = unsafe { data.get_unchecked(2..2 + key_bytes_len as usize) };
(key_bytes, addr.offset(2 + key_bytes_len as u32))
}
#[inline]
#[cfg(not(feature = "compare_hash_only"))]
fn get_value_addr_if_key_match(
&self,
target_key: &[u8],
addr: Addr,
memory_arena: &MemoryArena,
) -> Option<Addr> {
use crate::fastcmp::fast_short_slice_compare;
let (stored_key, value_addr) = self.get_key_value(addr, memory_arena);
if fast_short_slice_compare(stored_key, target_key) {
Some(value_addr)
} else {
None
}
}
#[inline]
#[cfg(feature = "compare_hash_only")]
fn get_value_addr_if_key_match(
&self,
_target_key: &[u8],
addr: Addr,
memory_arena: &MemoryArena,
) -> Option<Addr> {
// For the compare_hash_only feature, it would make sense to store the keys at a different
// memory location. Here they will just pollute the cache.
let data = memory_arena.slice_from(addr);
let key_bytes_len_bytes = &data[..2];
let key_bytes_len = u16::from_le_bytes(key_bytes_len_bytes.try_into().unwrap());
let value_addr = addr.offset(2 + key_bytes_len as u32);
Some(value_addr)
}
#[inline]
fn set_bucket(&mut self, hash: HashType, key_value_addr: Addr, bucket: usize) {
self.len += 1;
self.table[bucket] = KeyValue {
key_value_addr,
hash,
};
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn iter<'a>(&'a self, memory_arena: &'a MemoryArena) -> Iter<'_> {
Iter {
inner: self
.table
.iter()
.cloned()
.filter(KeyValue::is_not_empty_ref),
hashmap: self,
memory_arena,
}
}
fn resize(&mut self) {
let new_len = (self.table.len() * 2).max(1 << 3);
let mask = new_len - 1;
self.mask = mask;
let new_table = vec![KeyValue::default(); new_len];
let old_table = mem::replace(&mut self.table, new_table);
for key_value in old_table.into_iter().filter(KeyValue::is_not_empty_ref) {
let mut probe = LinearProbing::compute(key_value.hash, mask);
loop {
let bucket = probe.next_probe();
if self.table[bucket].is_empty() {
self.table[bucket] = key_value;
break;
}
}
}
}
/// Get a value associated to a key.
#[inline]
pub fn get<V>(&self, key: &[u8], memory_arena: &MemoryArena) -> Option<V>
where V: Copy + 'static {
let hash = self.get_hash(key);
let mut probe = self.probe(hash);
loop {
let bucket = probe.next_probe();
let kv: KeyValue = self.table[bucket];
if kv.is_empty() {
return None;
} else if kv.hash == hash {
if let Some(val_addr) =
self.get_value_addr_if_key_match(key, kv.key_value_addr, memory_arena)
{
let v = memory_arena.read(val_addr);
return Some(v);
}
}
}
}
/// `update` create a new entry for a given key if it does not exist
/// or updates the existing entry.
///
/// The actual logic for this update is define in the `updater`
/// argument.
///
/// If the key is not present, `updater` will receive `None` and
/// will be in charge of returning a default value.
/// If the key already as an associated value, then it will be passed
/// `Some(previous_value)`.
#[inline]
pub fn mutate_or_create<V>(
&mut self,
key: &[u8],
memory_arena: &mut MemoryArena,
mut updater: impl FnMut(Option<V>) -> V,
) -> V
where
V: Copy + 'static,
{
if self.is_saturated() {
self.resize();
}
let hash = self.get_hash(key);
let mut probe = self.probe(hash);
let mut bucket = probe.next_probe();
let mut kv: KeyValue = self.table[bucket];
loop {
if kv.is_empty() {
// The key does not exist yet.
let val = updater(None);
let num_bytes = std::mem::size_of::<u16>() + key.len() + std::mem::size_of::<V>();
let key_addr = memory_arena.allocate_space(num_bytes);
{
let data = memory_arena.slice_mut(key_addr, num_bytes);
let key_len_bytes: [u8; 2] = (key.len() as u16).to_le_bytes();
data[..2].copy_from_slice(&key_len_bytes);
let stop = 2 + key.len();
fast_short_slice_copy(key, &mut data[2..stop]);
store(&mut data[stop..], val);
}
self.set_bucket(hash, key_addr, bucket);
return val;
}
if kv.hash == hash {
if let Some(val_addr) =
self.get_value_addr_if_key_match(key, kv.key_value_addr, memory_arena)
{
let v = memory_arena.read(val_addr);
let new_v = updater(Some(v));
memory_arena.write_at(val_addr, new_v);
return new_v;
}
}
// This allows fetching the next bucket before the loop jmp
bucket = probe.next_probe();
kv = self.table[bucket];
}
}
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use super::{compute_previous_power_of_two, SharedArenaHashMap};
use crate::MemoryArena;
#[test]
fn test_hash_map() {
let mut memory_arena = MemoryArena::default();
let mut hash_map: SharedArenaHashMap = SharedArenaHashMap::default();
hash_map.mutate_or_create(b"abc", &mut memory_arena, |opt_val: Option<u32>| {
assert_eq!(opt_val, None);
3u32
});
hash_map.mutate_or_create(b"abcd", &mut memory_arena, |opt_val: Option<u32>| {
assert_eq!(opt_val, None);
4u32
});
hash_map.mutate_or_create(b"abc", &mut memory_arena, |opt_val: Option<u32>| {
assert_eq!(opt_val, Some(3u32));
5u32
});
let mut vanilla_hash_map = HashMap::new();
let iter_values = hash_map.iter(&memory_arena);
for (key, addr) in iter_values {
let val: u32 = memory_arena.read(addr);
vanilla_hash_map.insert(key.to_owned(), val);
}
assert_eq!(vanilla_hash_map.len(), 2);
}
#[test]
fn test_empty_hashmap() {
let memory_arena = MemoryArena::default();
let hash_map: SharedArenaHashMap = SharedArenaHashMap::default();
assert_eq!(hash_map.get::<u32>(b"abc", &memory_arena), None);
}
#[test]
fn test_compute_previous_power_of_two() {
assert_eq!(compute_previous_power_of_two(8), 8);
assert_eq!(compute_previous_power_of_two(9), 8);
assert_eq!(compute_previous_power_of_two(7), 4);
assert_eq!(compute_previous_power_of_two(u64::MAX as usize), 1 << 63);
}
#[test]
fn test_many_terms() {
let mut memory_arena = MemoryArena::default();
let mut terms: Vec<String> = (0..20_000).map(|val| val.to_string()).collect();
let mut hash_map: SharedArenaHashMap = SharedArenaHashMap::default();
for term in terms.iter() {
hash_map.mutate_or_create(
term.as_bytes(),
&mut memory_arena,
|_opt_val: Option<u32>| 5u32,
);
}
let mut terms_back: Vec<String> = hash_map
.iter(&memory_arena)
.map(|(bytes, _)| String::from_utf8(bytes.to_vec()).unwrap())
.collect();
terms_back.sort();
terms.sort();
for pos in 0..terms.len() {
assert_eq!(terms[pos], terms_back[pos]);
}
}
}