mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 00:02:55 +00:00
- Removed useless copy when ingesting JSON. - Bugfix in phrase query with a missing field norms. - Disabled range query on default fields Closes #1251
122 lines
5.1 KiB
Rust
122 lines
5.1 KiB
Rust
use criterion::{criterion_group, criterion_main, Criterion};
|
|
use pprof::criterion::{Output, PProfProfiler};
|
|
use tantivy::schema::{INDEXED, STORED, STRING, TEXT};
|
|
use tantivy::Index;
|
|
|
|
const HDFS_LOGS: &str = include_str!("hdfs.json");
|
|
const NUM_REPEATS: usize = 2;
|
|
|
|
pub fn hdfs_index_benchmark(c: &mut Criterion) {
|
|
let schema = {
|
|
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
|
schema_builder.add_u64_field("timestamp", INDEXED);
|
|
schema_builder.add_text_field("body", TEXT);
|
|
schema_builder.add_text_field("severity", STRING);
|
|
schema_builder.build()
|
|
};
|
|
let schema_with_store = {
|
|
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
|
schema_builder.add_u64_field("timestamp", INDEXED | STORED);
|
|
schema_builder.add_text_field("body", TEXT | STORED);
|
|
schema_builder.add_text_field("severity", STRING | STORED);
|
|
schema_builder.build()
|
|
};
|
|
let dynamic_schema = {
|
|
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
|
schema_builder.add_json_field("json", TEXT);
|
|
schema_builder.build()
|
|
};
|
|
|
|
let mut group = c.benchmark_group("index-hdfs");
|
|
group.sample_size(20);
|
|
group.bench_function("index-hdfs-no-commit", |b| {
|
|
b.iter(|| {
|
|
let index = Index::create_in_ram(schema.clone());
|
|
let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
|
for _ in 0..NUM_REPEATS {
|
|
for doc_json in HDFS_LOGS.trim().split("\n") {
|
|
let doc = schema.parse_document(doc_json).unwrap();
|
|
index_writer.add_document(doc).unwrap();
|
|
}
|
|
}
|
|
})
|
|
});
|
|
group.bench_function("index-hdfs-with-commit", |b| {
|
|
b.iter(|| {
|
|
let index = Index::create_in_ram(schema.clone());
|
|
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
|
for _ in 0..NUM_REPEATS {
|
|
for doc_json in HDFS_LOGS.trim().split("\n") {
|
|
let doc = schema.parse_document(doc_json).unwrap();
|
|
index_writer.add_document(doc).unwrap();
|
|
}
|
|
}
|
|
index_writer.commit().unwrap();
|
|
})
|
|
});
|
|
group.bench_function("index-hdfs-no-commit-with-docstore", |b| {
|
|
b.iter(|| {
|
|
let index = Index::create_in_ram(schema_with_store.clone());
|
|
let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
|
for _ in 0..NUM_REPEATS {
|
|
for doc_json in HDFS_LOGS.trim().split("\n") {
|
|
let doc = schema.parse_document(doc_json).unwrap();
|
|
index_writer.add_document(doc).unwrap();
|
|
}
|
|
}
|
|
})
|
|
});
|
|
group.bench_function("index-hdfs-with-commit-with-docstore", |b| {
|
|
b.iter(|| {
|
|
let index = Index::create_in_ram(schema_with_store.clone());
|
|
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
|
for _ in 0..NUM_REPEATS {
|
|
for doc_json in HDFS_LOGS.trim().split("\n") {
|
|
let doc = schema.parse_document(doc_json).unwrap();
|
|
index_writer.add_document(doc).unwrap();
|
|
}
|
|
}
|
|
index_writer.commit().unwrap();
|
|
})
|
|
});
|
|
group.bench_function("index-hdfs-no-commit-json-without-docstore", |b| {
|
|
b.iter(|| {
|
|
let index = Index::create_in_ram(dynamic_schema.clone());
|
|
let json_field = dynamic_schema.get_field("json").unwrap();
|
|
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
|
for _ in 0..NUM_REPEATS {
|
|
for doc_json in HDFS_LOGS.trim().split("\n") {
|
|
let json_val: serde_json::Map<String, serde_json::Value> =
|
|
serde_json::from_str(doc_json).unwrap();
|
|
let doc = tantivy::doc!(json_field=>json_val);
|
|
index_writer.add_document(doc).unwrap();
|
|
}
|
|
}
|
|
index_writer.commit().unwrap();
|
|
})
|
|
});
|
|
group.bench_function("index-hdfs-with-commit-json-without-docstore", |b| {
|
|
b.iter(|| {
|
|
let index = Index::create_in_ram(dynamic_schema.clone());
|
|
let json_field = dynamic_schema.get_field("json").unwrap();
|
|
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
|
for _ in 0..NUM_REPEATS {
|
|
for doc_json in HDFS_LOGS.trim().split("\n") {
|
|
let json_val: serde_json::Map<String, serde_json::Value> =
|
|
serde_json::from_str(doc_json).unwrap();
|
|
let doc = tantivy::doc!(json_field=>json_val);
|
|
index_writer.add_document(doc).unwrap();
|
|
}
|
|
}
|
|
index_writer.commit().unwrap();
|
|
})
|
|
});
|
|
}
|
|
|
|
criterion_group! {
|
|
name = benches;
|
|
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
|
|
targets = hdfs_index_benchmark
|
|
}
|
|
criterion_main!(benches);
|