Add bench to reproduce performance drop on array of texts.

2026-01-06 09:12:55 +00:00 · 2022-10-29 02:54:07 +02:00
parent 4e46f4f8c4
commit 003722d831
2 changed files with 100013 additions and 0 deletions
--- a/benches/hdfs_with_array.json
+++ b/benches/hdfs_with_array.json
--- a/benches/index-bench.rs
+++ b/benches/index-bench.rs
@@ -4,6 +4,7 @@ use tantivy::schema::{INDEXED, STORED, STRING, TEXT};
 use tantivy::Index;

 const HDFS_LOGS: &str = include_str!("hdfs.json");
+const HDFS_LOGS_WITH_ARRAY: &str = include_str!("hdfs_with_array.json");
 const NUM_REPEATS: usize = 2;

 pub fn hdfs_index_benchmark(c: &mut Criterion) {
@@ -41,6 +42,18 @@ pub fn hdfs_index_benchmark(c: &mut Criterion) {
            }
        })
    });
+    group.bench_function("index-hdfs-with-array-no-commit", |b| {
+        b.iter(|| {
+            let index = Index::create_in_ram(schema.clone());
+            let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
+            for _ in 0..NUM_REPEATS {
+                for doc_json in HDFS_LOGS_WITH_ARRAY.trim().split("\n") {
+                    let doc = schema.parse_document(doc_json).unwrap();
+                    index_writer.add_document(doc).unwrap();
+                }
+            }
+        })
+    });
    group.bench_function("index-hdfs-with-commit", |b| {
        b.iter(|| {
            let index = Index::create_in_ram(schema.clone());