compact doc (#2402)

* compact doc

* add any value type

* pass references when building CompactDoc

* remove OwnedValue from API

* clippy

* clippy

* fail on large documents

* fmt

* cleanup

* cleanup

* implement Value for different types

fix serde_json date Value implementation

* fmt

* cleanup

* fmt

* cleanup

* store positions instead of pos+len

* remove nodes array

* remove mediumvec

* cleanup

* infallible serialize into vec

* remove positions indirection

* remove 24MB limitation in document

use u32 for Addr
Remove the 3 byte addressing limitation and use VInt instead

* cleanup

* extend test

* cleanup, add comments

* rename, remove pub
This commit is contained in:
PSeitz
2024-05-21 10:16:08 +02:00
committed by GitHub
parent 5a80420b10
commit e1679f3fb9
24 changed files with 887 additions and 238 deletions

View File

@@ -18,7 +18,7 @@ fn benchmark(
benchmark_dynamic_json(b, input, schema, commit, parse_json)
} else {
_benchmark(b, input, schema, commit, parse_json, |schema, doc_json| {
TantivyDocument::parse_json(&schema, doc_json).unwrap()
TantivyDocument::parse_json(schema, doc_json).unwrap()
})
}
}
@@ -90,8 +90,7 @@ fn benchmark_dynamic_json(
) {
let json_field = schema.get_field("json").unwrap();
_benchmark(b, input, schema, commit, parse_json, |_schema, doc_json| {
let json_val: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(doc_json).unwrap();
let json_val: serde_json::Value = serde_json::from_str(doc_json).unwrap();
tantivy::doc!(json_field=>json_val)
})
}
@@ -138,12 +137,13 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
for (prefix, schema, is_dynamic) in benches {
for commit in [false, true] {
let suffix = if commit { "with-commit" } else { "no-commit" };
for parse_json in [false] {
{
let parse_json = false;
// for parse_json in [false, true] {
let suffix = if parse_json {
format!("{}-with-json-parsing", suffix)
} else {
format!("{}", suffix)
suffix.to_string()
};
let bench_name = format!("{}{}", prefix, suffix);