Add bench to reproduce performance drop on array of texts.

This commit is contained in:
François Massot
2022-10-29 02:54:07 +02:00
parent 4e46f4f8c4
commit 003722d831
2 changed files with 100013 additions and 0 deletions

100000
benches/hdfs_with_array.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -4,6 +4,7 @@ use tantivy::schema::{INDEXED, STORED, STRING, TEXT};
use tantivy::Index;
const HDFS_LOGS: &str = include_str!("hdfs.json");
const HDFS_LOGS_WITH_ARRAY: &str = include_str!("hdfs_with_array.json");
const NUM_REPEATS: usize = 2;
pub fn hdfs_index_benchmark(c: &mut Criterion) {
@@ -41,6 +42,18 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
}
})
});
group.bench_function("index-hdfs-with-array-no-commit", |b| {
b.iter(|| {
let index = Index::create_in_ram(schema.clone());
let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
for _ in 0..NUM_REPEATS {
for doc_json in HDFS_LOGS_WITH_ARRAY.trim().split("\n") {
let doc = schema.parse_document(doc_json).unwrap();
index_writer.add_document(doc).unwrap();
}
}
})
});
group.bench_function("index-hdfs-with-commit", |b| {
b.iter(|| {
let index = Index::create_in_ram(schema.clone());